123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
#!/usr/bin/env python
# coding: utf-8
# In[1]:
with open('input.txt','r',encoding='utf-8') as f:
text = f.read()
# In[2]:
#Uncomment this to generate the windows automaticaly
#window_length = [x for x in range(10,len(text),10)]
window_length = [10, 20, 40, 60]
# In[3]:
root = []
# In[4]:
def grow_tree(root,text,limit):
if len(text) == 0:
return
node = [node for node in root if node[0] == text[0]]
if not node:
root.append([text[0],1,False,[]])
node = root[-1][3]
elif node[0][2] == True:
return
else:
node = node[0]
if len(text) <= limit:
node[1] += 1
node = node[3]
grow_tree(node,text[1:],limit)
# In[5]:
def put_blocks(node):
if not node:
return
elif node[2] == True:
return
elif node[1] == 1:
node[2] = True
return
elif not node[3]:
return
else:
for subnode in node[3]:
put_blocks(subnode)
# In[6]:
def get_tree_text(node,value,text,ls):
if not node:
return
else:
if node[1] < value:
ls.append([text,value])
value = node[1]
text = text + node[0]
if not node[3]:
return
else:
for subnode in node[3]:
get_tree_text(subnode,value,text,ls)
# In[7]:
window_num = 0
for window in window_length:
#Grow tree, using different window lengths
for i in range(len(text) - (window-1)):
text_window = text[i:i+window]
if window_num == 0:
limit = window
else:
limit = window - window_length[window_num-1]
grow_tree(root,text_window,limit)
#Put blocks on nodes which dec
for node in root:
put_blocks(node)
window_num += 1
# In[8]:
def print_tree(node,value,text):
if not node:
return
else:
if node[1] < value:
print(f'{text} ({value})')
value = node[1]
text = text + node[0]
if not node[3]:
return
else:
for subnode in node[3]:
print_tree(subnode,value,text)
# In[9]:
print()
print('Proper printed version:')
for node in root:
value = node[1]
print_tree(node,value,'')
# In[10]:
tree_data = []
for node in root:
value = node[1]
get_tree_text(node,value,'',tree_data)
# In[11]:
tree_data_set = set([tuple(x) for x in tree_data])
# In[12]:
tree_data = list(tree_data_set)
# In[13]:
sorted_tree_data = []
for value in tree_data:
bits_saved = (len(value[0]) * value[1])*8
cost = len(value[0]) + 2*value[1]
sorted_tree_data.append([value[0],bits_saved,cost,bits_saved/cost])
sorted_tree_data = sorted(sorted_tree_data, key=lambda x: x[3],reverse=True)
# In[14]:
print('Sorted values..')
# In[15]:
for data in sorted_tree_data:
print(data)
# In[ ]:
# In[ ]: