#!/usr/bin/env python
# coding: utf-8

# In[3]:


import pickle
import os
import sys
from datetime import datetime


# In[ ]:

def update_ranges(range_low,range_high):
    digits = []
    while True:
        if int(range_low * 10) == int(range_high*10):
            digits.append((str(int(range_low*10))))
            range_low = range_low*10
            range_high = range_high*10
            range_low = range_low % 1
            range_high = range_high % 1
        else:
            break
    return range_low,range_high,digits

def reset_ranges(counter_dict,range_low,range_high):
    diff = range_high - range_low
    for first_char in counter_dict.keys():
        for second_char in counter_dict[first_char].keys():
            high = 0
            low = range_low
            for prediction_char in sorted(counter_dict[first_char][second_char].keys()):
                    value = counter_dict[first_char][second_char][prediction_char]
                    per = value[1]
                    high = low + per * diff
                    value[2] = (low,high)
                    counter_dict[first_char][second_char][prediction_char] = value
                    low = high

# In[4]:
def get_range(counter_dict,first_char,second_char,prediction_char,low,high):
    static_low,static_high = counter_dict[first_char][second_char][prediction_char][2]
    diff = high - low
    new_low = (static_low * diff) + low
    new_high = (static_high * diff) + low
    return new_low, new_high
    

def compress(filepath):
    
    try:
        with open(filepath,'r',encoding='utf-8') as f:
            text = f.read()
    except:
        print('Could not read the file!')
        return
    try:
        os.mkdir(os.path.abspath(os.getcwd()+'/'+'compression_files'))
    except:
        pass
        
    counter_dict = {}

    for i in range(len(text)-2):
        first_char = text[i]
        second_char = text[i+1]
        prediction_char = text[i+2]

        if first_char in counter_dict:
            if second_char in counter_dict[first_char]:
                if prediction_char in counter_dict[first_char][second_char]:
                    counter_dict[first_char][second_char][prediction_char] += 1
                else:
                    counter_dict[first_char][second_char][prediction_char] = 1
            else:
                counter_dict[first_char][second_char] = {prediction_char:1}
        else:
            counter_dict[first_char] = {second_char:{prediction_char:1}}

    #Save the dictionary
    with open(os.path.abspath(os.getcwd())+'/'+'compression_files'+'/'+'dictionary.pickle','wb') as file:
        pickle.dump(counter_dict,file,protocol=pickle.HIGHEST_PROTOCOL)

    for first_char in sorted(counter_dict.keys()):
        for second_char in sorted(counter_dict[first_char].keys()):
            second_char_count = sum([v for k,v in counter_dict[first_char][second_char].items()])
            for prediction_char in sorted(counter_dict[first_char][second_char].keys()):
                value = counter_dict[first_char][second_char][prediction_char]
                per = value/second_char_count
                counter_dict[first_char][second_char][prediction_char] = [value,per,(0,0)]
    
    value_ls = []
    genesis_chars = text[0] + text[1]

    reset_ranges(counter_dict,range_low=0.0,range_high=1.0)
    range_low = 0.0
    range_high = 1.0

    for i in range(len(text)-2):
        first_char = text[i]
        second_char = text[i+1]
        prediction_char = text[i+2]

        range_low,range_high = get_range(counter_dict,first_char,second_char,prediction_char,range_low,range_high)

        #update range
        range_low,range_high,ls = update_ranges(range_low,range_high)
        value_ls += ls

    value = (range_low + range_high)/2
    value_str = str(value)[2:4]
    value = '0.'+''.join(value_ls)+value_str
    tostore = int(value[2:])

    with open(os.path.abspath(os.getcwd())+'/'+'compression_files'+'/'+'integer_value.pickle', 'wb') as f:
        pickle.dump(tostore, f)
    with open(os.path.abspath(os.getcwd())+'/'+'compression_files'+'/'+'genesis_chars.pickle','wb') as f:
        pickle.dump(genesis_chars,f)
    with open(os.path.abspath(os.getcwd())+'/'+'compression_files'+'/'+'len_of_text','wb') as f:
        pickle.dump(len(text)-2,f)

    print('Compression successful!')


# In[5]:


if __name__ == '__main__':
    if (len(sys.argv)!= 2):
        print('Please give input file path! (or just name, if file in current directory)')
        exit()
    filepath = os.path.abspath(sys.argv[1])
    compress(filepath)

# In[ ]: