diff --git a/main.py b/main.py
index c42ec3899fb1cb5766cb3d8defb169d28b3acd28..c65e6d3b16f3d2a5ae38e318fb52b98f41e1ee4a 100644
--- a/main.py
+++ b/main.py
@@ -16,6 +16,7 @@ import sys
 import random
 import numpy as np
 import seaborn as sns
+import pandas as pd
 import matplotlib.pyplot as plt
 import itertools
 import os
@@ -138,12 +139,63 @@ def maxent(world, terminal, trajectories):
 
     return reward
 
+def merge_user_log(folder_pathname, user_id, with_feedback, column_to_remove):
+    absolute_path = folder_pathname+"/"+str(+user_id)+"/"+str(with_feedback)
+    df = pd.DataFrame()
+    if len(absolute_path)==0:
+        print("Error no folders in path ", absolute_path)
+    # else:
+    #     df = pd.read_csv(absolute_path+"/1/bn_variables.csv")
+
+    if column_to_remove!=None:
+        df = df.drop(column_to_remove, axis=1)
+    #df_removed = df.drop(["user_memory", "user_reactivity"], axis=1)
+    sessions_directory = os.listdir(absolute_path)
+    episode_length = [0]*(len(sessions_directory)+1)
+
+    for i in range(len(sessions_directory)):
+        file_folder = absolute_path+"/"+sessions_directory[i]
+        print("File folder: ", file_folder)
+        files = os.listdir(file_folder)
+
+        for k in range(len(files)):
+            if files[k] == "bn_variables.csv":
+                df_ = pd.read_csv(file_folder+"/"+files[k])
+                df = df.append(df_)
+                episode_length[i+1] = episode_length[i]+(df_.shape[0]-1)+1
+    df.to_csv(absolute_path + "/summary_bn_variables.csv", index=False)
+    return df, episode_length
+
+def compute_agent_policy(folder_pathname, user_id, with_feedback, state_space, action_space, episode_length):
+    #read columns of interest (game_state, attempt, user_prev_action)
+    ep = Episode()
+    df = pd.read_csv(folder_pathname+"/"+str(user_id)+"/"+str(with_feedback)+"/summary_bn_variables.csv")
+    agent_policy_counter = [[0 for  a in action_space] for s in state_space]
+    agent_policy_prob = [[0 for  a in action_space] for s in state_space]
+    row_t_0 = 0
+    for index, row in df.iterrows():
+        if index == 0 or index in episode_length:
+            state_point = (row['game_state'], row['attempt'], 0)
+            state_index = ep.state_from_point_to_index(state_space, state_point)
+            action_point = (row['agent_feedback'], row['agent_assistance'])
+            action_index = ep.state_from_point_to_index(action_space, action_point)
+            agent_policy_counter[state_index][action_index] += 1
+            row_t_0 = row['user_action']
+        else:
+            state_point = (row['game_state'], row['attempt'], row_t_0)
+            state_index = ep.state_from_point_to_index(state_space, state_point)
+            action_point = (row['agent_feedback'], row['agent_assistance'])
+            action_index = ep.state_from_point_to_index(action_space, action_point)
+            agent_policy_counter[state_index][action_index] += 1
+            row_t_0 = row['user_action']
+    for s in range(len(state_space)):
+        agent_policy_prob[s] = list(map(lambda x:x/(sum(agent_policy_counter[s])+0.001), agent_policy_counter[s]))
+
+    return agent_policy_prob
 
 def main():
-    # common style arguments for plotting
-    style = {
-        'border': {'color': 'red', 'linewidth': 0.5},
-    }
+    df, episode_length = merge_user_log(folder_pathname="/home/pal/Documents/Framework/GenerativeMutualShapingRL/data",
+                   user_id=1, with_feedback=True, column_to_remove=None)
 
     #################GENERATE SIMULATION################################
     # SIMULATION PARAMS
@@ -157,7 +209,6 @@ def main():
     user_pref_assistance = 2
     agent_behaviour = "challenge"
 
-
     # define state space struct for the irl algorithm
     episode_instance = Episode()
     # DEFINITION OF THE MDP
@@ -165,7 +216,7 @@ def main():
     attempt = [i for i in range(1, Attempt.counter.value + 1)]
     # +1 (3,_,_) absorbing state
     game_state = [i for i in range(0, Game_State.counter.value + 1)]
-    user_action = [i for i in range(-1, User_Action.counter.value - 1)]
+    user_action = [i for i in range(0, User_Action.counter.value)]
     state_space = (game_state, attempt, user_action)
     states_space_list = list(itertools.product(*state_space))
     state_space_index = [episode_instance.state_from_point_to_index(states_space_list, s) for s in states_space_list]
@@ -179,6 +230,11 @@ def main():
     initial_state = (1, 1, 0)
     agent_policy = [0 for s in state_space]
 
+    compute_agent_policy(folder_pathname="/home/pal/Documents/Framework/GenerativeMutualShapingRL/data/",
+                         user_id=1, with_feedback=True, state_space=states_space_list,
+                         action_space=action_space_list, episode_length=episode_length)
+
+
     #1. RUN THE SIMULATION WITH THE PARAMS SET BY THE CAREGIVER
     game_performance_per_episode, react_time_per_episode, agent_assistance_per_episode, agent_feedback_per_episode, episodes_list = \
     Sim.simulation(bn_model_user_action=bn_model_user_action,