#!/usr/bin/env python # coding: utf-8 # In[19]: import pickle import os import sys from datetime import datetime # In[ ]: def update_ranges(range_low,range_high): digits = [] while True: if int(range_low * 10) == int(range_high*10): digits.append((str(int(range_low*10)))) range_low = range_low*10 range_high = range_high*10 range_low = range_low % 1 range_high = range_high % 1 else: break return range_low,range_high,digits def reset_ranges(counter_dict,range_low,range_high): diff = range_high - range_low for first_char in counter_dict.keys(): for second_char in counter_dict[first_char].keys(): high = 0 low = range_low for prediction_char in sorted(counter_dict[first_char][second_char].keys()): value = counter_dict[first_char][second_char][prediction_char] per = value[1] high = low + per * diff value[2] = (low,high) counter_dict[first_char][second_char][prediction_char] = value low = high def decode_from_dict(counter_dict,sorted_keys,value,genesis_chars,low,high): found = False keys = sorted_keys[genesis_chars[0]][genesis_chars[1]] high_len = len(keys) -1 low_len = 0 while(low_len <= high_len): mid = (low_len + high_len)//2 range_low,range_high = get_range(counter_dict,genesis_chars[0],genesis_chars[1],keys[mid],low,high) if value>=range_low and value<=range_high: return range_low,range_high, keys[mid] elif range_high<=value: low_len = mid + 1 else: high_len = mid - 1 if not found: print('Not found... value = ',value) return -1,-1,None def get_sorted_keys(counter_dict): sorted_keys = {} for first_char in sorted(counter_dict.keys()): sorted_keys[first_char] = {} for second_char in sorted(counter_dict[first_char].keys()): sorted_keys[first_char][second_char] = [] for prediction_char in sorted(counter_dict[first_char][second_char].keys()): sorted_keys[first_char][second_char].append(prediction_char) return sorted_keys def get_range(counter_dict,first_char,second_char,prediction_char,low,high): static_low,static_high = counter_dict[first_char][second_char][prediction_char][2] diff = high - low new_low = (static_low * diff) + low new_high = (static_high * diff) + low return new_low, new_high # In[2]: def uncompress(compression_files_dir): abs_path = os.path.abspath(compression_files_dir) with open(os.path.abspath(abs_path +'/'+'dictionary.pickle'),'rb') as file: counter_dict = pickle.load(file) for first_char in sorted(counter_dict.keys()): for second_char in sorted(counter_dict[first_char].keys()): second_char_count = sum([v for k,v in counter_dict[first_char][second_char].items()]) for prediction_char in sorted(counter_dict[first_char][second_char].keys()): value = counter_dict[first_char][second_char][prediction_char] per = value/second_char_count counter_dict[first_char][second_char][prediction_char] = [value,per,(0,0)] with open(os.path.abspath(abs_path +'/'+'integer_value.pickle'), 'rb') as f: value_str = str(pickle.load(f)) with open(os.path.abspath(abs_path +'/'+'genesis_chars.pickle'),'rb') as f: genesis_chars = pickle.load(f) with open(os.path.abspath(abs_path +'/'+'len_of_text'),'rb') as f: len_of_text = pickle.load(f) value = float('0.'+value_str[:19]) #Reset ranges reset_ranges(counter_dict,0.0,1.0) sorted_keys = get_sorted_keys(counter_dict) count = 0 output = [genesis_chars[0],genesis_chars[1]] range_low = 0.0 range_high = 1.0 print(len_of_text) for i in range(len_of_text): range_low,range_high,key = decode_from_dict(counter_dict,sorted_keys,value,genesis_chars,range_low,range_high) if range_low == range_high == -1: break output.append(key) genesis_chars = genesis_chars[1]+key range_low,range_high,ls = update_ranges(range_low,range_high) if ls: count += len(ls) value = float('0.'+value_str[count:count+19]) output_str = ''.join(output) output_str = ''.join(output) with open(os.path.abspath(os.getcwd()+'/'+'output.txt'),'w',encoding='utf-8') as f: f.write(output_str) # In[ ]: if __name__ == '__main__': if (len(sys.argv)!= 2): print('Please give path to directory containing compression files!') exit() filepath = os.path.abspath(sys.argv[1]) uncompress(filepath)