Help to deal with complex dictionary and list on python

cloudytechi147 · December 1, 2021, 12:02pm

I’ve been working on python to make a program that needs to handle the complex problem from a list of dict. The thing is I need to transform this data into the dictionary and sort it. The input for this function comes from trees. The code I share here is working but takes a long time to run. In here I wanna ask is there any idea to make this function run faster in python? I use python 3.7.3 if you ask. The reason I wanna improve this code is that when I tried to make input data for this function need around 3-4 hours, but to run this function need time around 21-22 hours (this really shock me).

here is the structure of data that I input on below:

[ # list of trees
    [ # list of leaf nodes
        { # dict of spg that contain in leaf
            (tau_1,tau_2):[(ent1,ent2),(ent1,ent3)]
        }
    ]
]

this is the sample data that I work on:

[
    [
        {(7, 8): [(1, 28), (1, 29)]},
        {(7, 8): [(1, 30)]},
        {(7, 8): [(1, 32)]},
        {(7, 8): [(1, 21), (1, 22)]},
        {(7, 8): [(1, 18)]},
        {(7, 8): [(1, 19), (1, 31)]},
        {(7, 8): [(1, 13), (1, 14)]},
        {(7, 8): [(1, 16)]},
        {(7, 8): [(1, 15)]},
        {(7, 8): [(1, 11), (1, 12)]},
        {(7, 8): [(1, 17)]},
        {(7, 8): [(1, 23), (1, 26)]},
        {(7, 8): [(1, 20), (1, 27)]},
        {(7, 8): [(1, 7)]},
        {(7, 8): [(1, 9), (1, 10)]},
        {(7, 8): [(1, 8)]},
        {(7, 8): [(1, 5), (1, 6)]},
        {(7, 8): [(1, 24), (1, 25)]},
        {
            (0, 1): [(1, 2)],
            (1, 2): [(1, 2)],
            (2, 3): [(1, 2)],
            (3, 4): [(1, 2)],
            (4, 5): [(1, 2)],
            (5, 6): [(1, 2)],
            (6, 7): [(1, 2)],
            (7, 8): [(1, 2)],
        },
        {
            (0, 1): [(1, 3), (1, 4)],
            (1, 2): [(1, 3), (1, 4)],
            (2, 3): [(1, 3), (1, 4)],
            (3, 4): [(1, 3), (1, 4)],
            (4, 5): [(1, 3), (1, 4)],
            (5, 6): [(1, 3), (1, 4)],
            (6, 7): [(1, 3), (1, 4)],
            (7, 8): [(1, 3), (1, 4)],
        },
    ],
    [
        {(7, 8): [(2, 28), (2, 29)]},
        {(7, 8): [(2, 30)]},
        {(7, 8): [(2, 32)]},
        {(7, 8): [(2, 21), (2, 22)]},
        {(7, 8): [(2, 18)]},
        {(7, 8): [(2, 19), (2, 31)]},
        {(7, 8): [(2, 13), (2, 14)]},
        {(7, 8): [(2, 16)]},
        {(7, 8): [(2, 24), (2, 25)]},
        {(7, 8): [(2, 23)]},
        {(7, 8): [(2, 26), (2, 27)]},
        {(7, 8): [(2, 20)]},
        {(7, 8): [(2, 15)]},
        {(7, 8): [(2, 11), (2, 12)]},
        {(7, 8): [(2, 17)]},
        {(7, 8): [(2, 9), (2, 10)]},
        {(7, 8): [(2, 1)]},
        {(7, 8): [(2, 3), (2, 4)]},
        {(7, 8): [(2, 7), (2, 8)]},
        {(7, 8): [(2, 5), (2, 6)]},
    ]
]

this is the value of the parameter in case you need to know and tree vs graph:

tau_lenght: 50
ent: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42]
gamma: 1
tau_start: [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99]
tau_end: [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100]
time_range: 2
json_output: 'output_json'

Here is the code:

import json
def prepare_output(data,tau_lenght,ent,gamma,tau_start,tau_end,time_range,json_output):
    # step 1:
    # change data to dict where pair of tau became key
    # list of group of entity into sorted based on o_r
    spgs = {} # collect all spg per 2 tau based on entity
    for tau in range(tau_lenght-1): # for every tau
        temp_tau = [] # containing result per 2 tau
        for o_r in ent: # for every o_r
            temp = [] # contain all SPG whic contain o_r on tau
            for lst in data: # lst is the result on every decision tree
                for leaf in lst: # leaf from decision tree
                    if (tau,tau+1) in leaf.keys(): # looking for pair tau existence
                        # if leaf contain entity o_r
                        if list_contain(leaf[(tau,tau+1)],o_r):
                            # adding new crew candidate
                            # helper.list_of_ent is working for change [(1,2),(2,3)] into [1,2,3]
                            temp.append(helper.list_of_ent(leaf[(tau,tau+1)]))
            if temp: # if temp not empty
                temp_tau.append(temp) # adding temp into temp_tau
        if temp_tau: # if temp_tau not empty
            # connect all candidate into one spg
            cur_tau = []
            for lst in temp_tau:
                for pairs in lst:
                    cur_tau.append(pairs)
            spgs[(tau,tau+1)] = set(tuple(i) for i in cur_tau) # menambahkan 
            # temp_tau into res
    
    # step 2:
    # change into dict which include group of ent as key,
    # and list of (tau1,tau2) as value
    result_per_group = {}
    for tau in spgs.keys():
        for group in spgs[tau]:
            if group in result_per_group.keys():
                result_per_group[group].append(tau)
            else:
                result_per_group[group] = [tau]
    
    # step 3:
    # remove all candidate > gamma
    for group in result_per_group.keys():
        lst = result_per_group[group]
        for tau_idx in range(len(lst)-1):
            tau1_end = tau_end[tau_idx]
            tau2_start = tau_start[tau_idx+1]
            if (tau2_start - tau1_end) > (time_range * gamma):
                result_per_group.pop(group)
                break
    
    global output
    # helper.dict_str_key is fuction I made to convert key of dict into string
    output['output'] = helper.dict_str_key(result_per_group)
    # to convert into json file
    helper.to_json(json_output,output)

# this some function from helper that I use in here
def list_of_ent(lst):
    res = set()
    for e1,e2 in lst:
        res.add(e1)
        res.add(e2)
    return sorted(list(res))

def dict_str_key(data):
    data_str_key = {}
    for key in data.keys():
        data_str_key[str(key)] = data[key]
    return data_str_key

def to_json(name,data):
    name = 'output/'+name+'.json'
    with open(name, 'w') as file:
        json.dump(data, file)

I’ve also asked the same question on StackOverflow - Link