#!/usr/bin/env python # coding: utf-8 # In[1]: with open('input.txt','r',encoding='utf-8') as f: text = f.read() # In[2]: #Uncomment this to generate the windows automaticaly #window_length = [x for x in range(10,len(text),10)] window_length = [10, 20, 40, 60] # In[3]: root = [] # In[4]: def grow_tree(root,text,limit): if len(text) == 0: return node = [node for node in root if node[0] == text[0]] if not node: root.append([text[0],1,False,[]]) node = root[-1][3] elif node[0][2] == True: return else: node = node[0] if len(text) <= limit: node[1] += 1 node = node[3] grow_tree(node,text[1:],limit) # In[5]: def put_blocks(node): if not node: return elif node[2] == True: return elif node[1] == 1: node[2] = True return elif not node[3]: return else: for subnode in node[3]: put_blocks(subnode) # In[6]: def get_tree_text(node,value,text,ls): if not node: return else: if node[1] < value: ls.append([text,value]) value = node[1] text = text + node[0] if not node[3]: return else: for subnode in node[3]: get_tree_text(subnode,value,text,ls) # In[7]: window_num = 0 for window in window_length: #Grow tree, using different window lengths for i in range(len(text) - (window-1)): text_window = text[i:i+window] if window_num == 0: limit = window else: limit = window - window_length[window_num-1] grow_tree(root,text_window,limit) #Put blocks on nodes which dec for node in root: put_blocks(node) window_num += 1 # In[8]: def print_tree(node,value,text): if not node: return else: if node[1] < value: print(f'{text} ({value})') value = node[1] text = text + node[0] if not node[3]: return else: for subnode in node[3]: print_tree(subnode,value,text) # In[9]: print() print('Proper printed version:') for node in root: value = node[1] print_tree(node,value,'') # In[10]: tree_data = [] for node in root: value = node[1] get_tree_text(node,value,'',tree_data) # In[11]: tree_data_set = set([tuple(x) for x in tree_data]) # In[12]: tree_data = list(tree_data_set) # In[13]: sorted_tree_data = [] for value in tree_data: bits_saved = (len(value[0]) * value[1])*8 cost = len(value[0]) + 2*value[1] sorted_tree_data.append([value[0],bits_saved,cost,bits_saved/cost]) sorted_tree_data = sorted(sorted_tree_data, key=lambda x: x[3],reverse=True) # In[14]: print('Sorted values..') # In[15]: for data in sorted_tree_data: print(data) # In[ ]: # In[ ]: