#!/usr/bin/env python # coding: utf-8 # In[1]: text = 'this is a test this test is a test' # In[2]: text = 'simple text is best text' # In[3]: text = """putt text hereeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee""" # In[4]: #Uncomment this to generate the windows automaticaly #window_length = [x for x in range(10,len(text),10)] window_length = [25,59] # In[5]: root = [] # In[6]: def grow_tree(root,text,limit): if len(text) == 0: return node = [node for node in root if node[0] == text[0]] if not node: root.append([text[0],1,False,[]]) node = root[-1][3] elif node[0][2] == True: return else: node = node[0] if len(text) <= limit: node[1] += 1 node = node[3] grow_tree(node,text[1:],limit) # In[7]: def put_blocks(node): if not node: return elif node[2] == True: return elif node[1] == 1: node[2] = True return elif not node[3]: return else: put_blocks(node[3][0]) # In[8]: def check_blocks(node): if not node: return False elif node[2] == True: return True elif not node[3]: return False else: return check_blocks(node[3][0]) # In[9]: def get_tree_text(node,value,text,ls): if not node: return else: if node[1] < value: ls.append([text,value]) value = node[1] text = text + node[0] if not node[3]: return else: get_tree_text(node[3][0],value,text,ls) # In[10]: window_num = 0 for window in window_length: #Grow tree, using different window lengths for i in range(len(text) - (window-1)): text_window = text[i:i+window] if window_num == 0: limit = window else: limit = window - window_length[window_num-1] grow_tree(root,text_window,limit) #Put blocks on nodes which dec for node in root: put_blocks(node) #Find number of unblocked nodes unblocked_nodes = [] for node in root: if check_blocks(node): pass else: unblocked_nodes.append(node) window_num += 1 # In[11]: def print_tree(node,value,text): if not node: return else: if node[1] < value: print(f'{text} ({value})') value = node[1] text = text + node[0] if not node[3]: return else: print_tree(node[3][0],value,text) # In[12]: print() print('Proper printed version:') for node in root: value = node[1] print_tree(node,value,'') # In[13]: tree_data = [] for node in root: value = node[1] get_tree_text(node,value,'',tree_data) # In[16]: sorted_tree_data = [] for value in tree_data: bits_saved = (len(value[0]) * value[1])*1 cost = len(value[0]) + 2*value[1] sorted_tree_data.append([value[0],bits_saved,cost,bits_saved/cost]) sorted_tree_data = sorted(sorted_tree_data, key=lambda x: x[3],reverse=True) # In[17]: for data in sorted_tree_data: print(data) # In[ ]: