From 59c6de46d37e67e3d1c7ba8cc4c771eabc1b34f5 Mon Sep 17 00:00:00 2001
From: Antonio Andriella <aandriella@iri.upc.edu>
Date: Wed, 14 Oct 2020 21:11:44 +0200
Subject: [PATCH] Working simulation of BN with update

---
 simulation.py | 421 +++++++++++++++++++++-----------------------------
 1 file changed, 174 insertions(+), 247 deletions(-)

diff --git a/simulation.py b/simulation.py
index 33aa674..421eb26 100644
--- a/simulation.py
+++ b/simulation.py
@@ -8,95 +8,96 @@ from bn_variables import Agent_Assistance, Agent_Feedback, User_Action, User_Rea
 import bn_functions
 import utils
 from episode import Episode
+import pandas as pd
+
+def build_model_from_data(csv_filename, dag_filename, dag_model=None):
+    print("/************************************************************/")
+    print("Init model")
+    DAG = bnlearn.import_DAG(dag_filename)
+    df_caregiver = bnlearn.sampling(DAG, n=10000)
+
+    print("/************************************************************/")
+    print("real_user Model")
+    DAG_real_user_no_cpd = bnlearn.import_DAG(dag_filename, CPD=False)
+    df_real_user = pd.read_csv(csv_filename)
+    DAG_real_user = bnlearn.parameter_learning.fit(DAG_real_user_no_cpd, df_real_user, methodtype='bayes')
+    df_real_user = bnlearn.sampling(DAG_real_user, n=10000)
+    print("/************************************************************/")
+    print("Shared knowledge")
+    DAG_shared_no_cpd = bnlearn.import_DAG(dag_filename, CPD=False)
+    shared_knowledge = [df_real_user, df_caregiver]
+    conc_shared_knowledge = pd.concat(shared_knowledge)
+    DAG_shared = bnlearn.parameter_learning.fit(DAG_shared_no_cpd, conc_shared_knowledge)
+    #df_conc_shared_knowledge = bn.sampling(DAG_shared, n=10000)
+    return DAG_shared
+
+
+def generate_agent_assistance(preferred_assistance, agent_behaviour, current_state, state_space, action_space):
+    episode = Episode()
+    game_state, attempt, prev_user_outcome = episode.state_from_index_to_point(state_space, current_state)
+    robot_action = 0
+    #agent_behaviour is a tuple first item is the feedback, second item is the robot assistance
+    print(game_state,attempt, prev_user_outcome)
+    if attempt == 1:
+        robot_action = episode.state_from_point_to_index(action_space,
+                                                                        (random.randint(0, 1), 0))
+    elif attempt!=1 and prev_user_outcome == 0:
+        if attempt == 2 and agent_behaviour == "challenge":
+            robot_action = episode.state_from_point_to_index(action_space,
+            (random.randint(0,1),min(max(0, preferred_assistance-1), 5)))
+            print("catt2")
+        elif attempt == 2 and agent_behaviour == "help":
+            robot_action = episode.state_from_point_to_index(action_space,
+            (random.randint(0, 1), min(max(0, preferred_assistance), 5)))
+            print("hatt2")
+        elif attempt == 3 and agent_behaviour == "challenge":
+            robot_action = episode.state_from_point_to_index(action_space,
+            (random.randint(0,1),min(max(0, preferred_assistance-2), 5)))
+            print("catt3")
+        elif attempt == 3 and agent_behaviour == "help":
+            robot_action = episode.state_from_point_to_index(action_space,
+            (random.randint(0, 1), min(max(0, preferred_assistance+1), 5)))
+            print("hatt3")
+        elif attempt == 4 and agent_behaviour == "challenge":
+            robot_action = episode.state_from_point_to_index(action_space,
+            (random.randint(0,1),min(max(0, preferred_assistance-3), 5)))
+            print("catt4")
+        elif attempt == 4 and agent_behaviour == "help":
+            robot_action = episode.state_from_point_to_index(action_space,
+            (random.randint(0, 1), min(max(0, preferred_assistance+2), 5)))
+            print("hatt4")
+
+    elif attempt!=1 and prev_user_outcome == -1:
+        if attempt == 2 and agent_behaviour == "challenge":
+            robot_action = episode.state_from_point_to_index(action_space,
+            (random.randint(0, 1), min(max(0, preferred_assistance+1), 5)))
+            print("catt2")
+        elif attempt == 2 and agent_behaviour == "help":
+            robot_action = episode.state_from_point_to_index(action_space,
+            (random.randint(0, 1), min(max(0, preferred_assistance), 5)))
+            print("hatt2")
+        elif attempt == 3 and agent_behaviour == "challenge":
+            robot_action = episode.state_from_point_to_index(action_space,
+            (random.randint(0, 1), min(max(0, preferred_assistance+2), 5)))
+            print("catt3")
+        elif attempt == 3 and agent_behaviour == "help":
+            robot_action = episode.state_from_point_to_index(action_space,
+            (random.randint(0, 1), min(max(0, preferred_assistance-1), 5)))
+            print("hatt3")
+        elif attempt == 4 and agent_behaviour == "challenge":
+            robot_action = episode.state_from_point_to_index(action_space,
+            (random.randint(0, 1), min(max(0, preferred_assistance+2), 5)))
+            print("catt4")
+        elif attempt == 4 and agent_behaviour == "help":
+            robot_action = episode.state_from_point_to_index(action_space,
+            (random.randint(0, 1), min(max(0, preferred_assistance-3), 5)))
+            print("hatt4")
+
+    agent_feedback, agent_assistance = episode.state_from_index_to_point(action_space, robot_action)
+
+    return agent_feedback, agent_assistance
 
-#
-# def choose_next_states(task_progress, game_state_t0, n_attempt_t0, max_attempt_per_object,
-#                        selected_agent_assistance_action,
-#                        bn_model_user_action, var_user_action_target_action):
-#
-#     def get_next_state(task_progress, game_state_t0, n_attempt_t0, max_attempt_per_object):
-#
-#         next_state = []
-#
-#         #correct move on the last state of the bin
-#         if (task_progress == 1 or task_progress == 3 or task_progress == 4) and n_attempt_t0<max_attempt_per_object:
-#             next_state.append((game_state_t0+1, n_attempt_t0+1))
-#         #correct state bu still in the bin
-#         elif task_progress == 0 or task_progress == 2 and n_attempt_t0<max_attempt_per_object:
-#             next_state.append((game_state_t0, n_attempt_t0+1))
-#         elif (task_progress == 1 or task_progress == 3 or task_progress == 4) and n_attempt_t0>=max_attempt_per_object:
-#            assert "you reach the maximum number of attempt the agent will move it for you"
-#         elif task_progress == 0 or task_progress == 2 and n_attempt_t0>=max_attempt_per_object:
-#             assert "you reach the maximum number of attempt the agent will move it for you"
-#
-#         return next_state
-#
-#     next_state = get_next_state(task_progress, game_state_t0, n_attempt_t0, max_attempt_per_object)
-#     query_answer_probs = []
-#     for t in next_state:
-#         vars_user_evidence = {"game_state_t0": game_state_t0,
-#                           "attempt_t0": n_attempt_t0 - 1,
-#                           "robot_assistance": selected_agent_assistance_action,
-#                           "game_state_t1": t[0],
-#                           "attempt_t1": t[1],
-#                           }
-#
-#         query_user_action_prob = bn_functions.infer_prob_from_state(bn_model_user_action,
-#                                                                 infer_variable=var_user_action_target_action,
-#                                                                 evidence_variables=vars_user_evidence)
-#         query_answer_probs.append(query_user_action_prob)
-#
-#
-#     #do the inference here
-#     #1. check given the current_state which are the possible states
-#     #2. for each of the possible states get the probability of user_action
-#     #3. select the state with the most higher action and execute it
-#     #4. return user_action
-#
 
-def generate_agent_assistance(preferred_assistance, agent_behaviour, n_game_state, n_attempt, alpha_action=0.1):
-    agent_policy = [[0 for j in range(n_attempt)] for i in range(n_game_state)]
-    previous_assistance = -1
-    def get_alternative_action(agent_assistance, previous_assistance, agent_behaviour, alpha_action):
-        agent_assistance_res = agent_assistance
-        if previous_assistance == agent_assistance:
-            if agent_behaviour == "challenge":
-                if random.random() > alpha_action:
-                    agent_assistance_res = min(max(0, agent_assistance-1), 5)
-                else:
-                    agent_assistance_res = min(max(0, agent_assistance), 5)
-            else:
-                if random.random() > alpha_action:
-                    agent_assistance_res = min(max(0, agent_assistance + 1), 5)
-                else:
-                    agent_assistance_res = min(max(0, agent_assistance), 5)
-        return agent_assistance_res
-
-
-    for gs in range(n_game_state):
-        for att in range(n_attempt):
-            if att == 0:
-                if random.random()>alpha_action:
-                    agent_policy[gs][att] = preferred_assistance
-                    previous_assistance = agent_policy[gs][att]
-                else:
-                    if random.random()>0.5:
-                        agent_policy[gs][att] = min(max(0, preferred_assistance-1),5)
-                        previous_assistance = agent_policy[gs][att]
-                    else:
-                        agent_policy[gs][att] = min(max(0, preferred_assistance+1), 5)
-                        previous_assistance = agent_policy[gs][att]
-            else:
-                if agent_behaviour == "challenge":
-                    agent_policy[gs][att] = min(max(0, preferred_assistance-1), 5)
-                    agent_policy[gs][att] = get_alternative_action(agent_policy[gs][att], previous_assistance, agent_behaviour, alpha_action)
-                    previous_assistance = agent_policy[gs][att]
-                else:
-                    agent_policy[gs][att] = min(max(0, preferred_assistance+1), 5)
-                    agent_policy[gs][att] = get_alternative_action(agent_policy[gs][att], previous_assistance, agent_behaviour, alpha_action)
-                    previous_assistance = agent_policy[gs][att]
-
-    return agent_policy
 
 
 def compute_next_state(user_action, task_progress_counter, attempt_counter, correct_move_counter,
@@ -167,11 +168,12 @@ def compute_next_state(user_action, task_progress_counter, attempt_counter, corr
 
 
 
-def simulation(bn_model_user_action, var_user_action_target_action, bn_model_user_react_time, var_user_react_time_target_action,
-               user_memory_name, user_memory_value, user_attention_name, user_attention_value,
-               user_reactivity_name, user_reactivity_value,
-               task_progress_t0_name, task_progress_t1_name, game_attempt_t0_name, game_attempt_t1_name,
-               agent_assistance_name, agent_policy,
+def simulation(bn_model_user_action, var_user_action_target_action,
+               game_state_bn_name, attempt_bn_name,
+               agent_assistance_bn_name, agent_feedback_bn_name,
+               user_pref_assistance,
+               agent_behaviour,
+               agent_policy,
                state_space, action_space,
                epochs=50, task_complexity=5, max_attempt_per_object=4, alpha_learning=0):
     '''
@@ -183,33 +185,15 @@ def simulation(bn_model_user_action, var_user_action_target_action, bn_model_use
         n_timeout_per_episode:
 
     '''
-    #TODO: remove agent_assistance_vect and agent_feedback_vect
-
-    #metrics we need, in order to compute afterwords the belief
-
-    agent_feedback_per_action = [[0 for i in range(Agent_Feedback.counter.value)] for j in range(User_Action.counter.value)]
-    agent_assistance_per_action = [[0 for i in range(Agent_Assistance.counter.value)] for j in range(User_Action.counter.value)]
-
-    attempt_counter_per_react_time = [[0 for i in range(Attempt.counter.value)] for j in range(User_React_time.counter.value)]
-    game_state_counter_per_react_time = [[0 for i in range(Game_State.counter.value)] for j in range(User_React_time.counter.value)]
-    agent_feedback_per_react_time = [[0 for i in range(Agent_Feedback.counter.value)] for j in  range(User_React_time.counter.value)]
-    agent_assistance_per_react_time = [[0 for i in range(Agent_Assistance.counter.value)] for j in   range(User_React_time.counter.value)]
-
-    game_state_counter_per_agent_feedback = [[0 for i in range(Game_State.counter.value)] for j in   range(Agent_Feedback.counter.value)]
-    attempt_counter_per_agent_feedback = [[0 for i in range(Attempt.counter.value)] for j in   range(Agent_Feedback.counter.value)]
-    game_state_counter_per_agent_assistance = [[0 for i in range(Game_State.counter.value)] for j in
-                                             range(Agent_Assistance.counter.value)]
-    attempt_counter_per_agent_assistance = [[0 for i in range(Attempt.counter.value)] for j in
-                                          range(Agent_Assistance.counter.value)]
-
 
-    user_action_per_game_state_attempt_counter_agent_assistance = [[[[0 for i in range(User_Action.counter.value)] for l in range(Game_State.counter.value)] for j in
-                                               range(Attempt.counter.value)] for k in range(Agent_Assistance.counter.value)]
-    user_action_per_agent_assistance = [[0 for i in range(User_Action.counter.value)] for j in
-                                            range(Agent_Assistance.counter.value)]
-    attempt_counter_per_user_action = [[0 for i in range(Attempt.counter.value)] for j in range(User_Action.counter.value)]
+    user_action_per_robot_feedback_robot_assistance = [[[0 for i in range(User_Action.counter.value)]
+                                                           for j in range(Agent_Assistance.counter.value)]
+                                                           for l in range(Agent_Feedback.counter.value)
+                                                        ]
+    attempt_counter_per_user_action = [[0 for i in range(Attempt.counter.value)] for j in
+                                       range(User_Action.counter.value)]
     game_state_counter_per_user_action = [[0 for i in range(Game_State.counter.value)] for j in
-                                     range(User_Action.counter.value)]
+                                          range(User_Action.counter.value)]
 
     #output variables:
     n_correct_per_episode = [0]*epochs
@@ -243,22 +227,11 @@ def simulation(bn_model_user_action, var_user_action_target_action, bn_model_use
 
         #The following variables are used to update the BN at the end of the episode
         user_action_dynamic_variables = {
-                                        'attempt_t1': attempt_counter_per_user_action,
-                                        'game_state_t1': game_state_counter_per_user_action,
-                                        'user_action': user_action_per_game_state_attempt_counter_agent_assistance
+                                        'attempt': attempt_counter_per_user_action,
+                                        'game_state': game_state_counter_per_user_action,
+                                        'user_action': user_action_per_robot_feedback_robot_assistance
                                         }
 
-        user_react_time_dynamic_variables = {'attempt': attempt_counter_per_react_time,
-                             'game_state': game_state_counter_per_react_time,
-                             'agent_assistance': agent_assistance_per_react_time,
-                             'agent_feedback': agent_feedback_per_react_time}
-
-        agent_assistance_dynamic_variables = {'attempt': attempt_counter_per_agent_assistance,
-                                  'game_state': game_state_counter_per_agent_assistance}
-
-        agent_feedback_dynamic_variables = {'attempt': attempt_counter_per_agent_feedback,
-                                  'game_state': game_state_counter_per_agent_feedback}
-
         #data structure to memorise the sequence of states  (state, action, next_state)
         episode = []
         selected_user_action = 0
@@ -267,60 +240,37 @@ def simulation(bn_model_user_action, var_user_action_target_action, bn_model_use
         while(task_progress_counter<=task_complexity):
 
             current_state = (game_state_counter, attempt_counter, selected_user_action)
+            current_state_index = ep.state_from_point_to_index(state_space, current_state)
+            if agent_policy==[]:
+                selected_agent_feedback_action, selected_agent_assistance_action = \
+                    generate_agent_assistance(preferred_assistance=user_pref_assistance,
+                                              agent_behaviour=agent_behaviour,
+                                              current_state=current_state_index,
+                                              state_space=state_space,
+                                              action_space=action_space
+                                              )
+            else:
+                selected_agent_feedback_action, selected_agent_assistance_action = ep.state_from_index_to_point(action_space, agent_policy[current_state_index])
 
-            selected_agent_assistance_action = agent_policy[game_state_counter][attempt_counter-1]#random.randint(0,5)
-            selected_agent_feedback_action = 0#random.randint(0,1)
 
             #counters for plots
             n_assistance_lev_per_episode[e][selected_agent_assistance_action] += 1
             current_agent_action = (selected_agent_feedback_action, selected_agent_assistance_action)
-
-            print("agent_assistance {}, attempt {}, game {}, agent_feedback {}".format(selected_agent_assistance_action, attempt_counter, game_state_counter, selected_agent_feedback_action))
-
+            print("agent_assistance {}, agent_feedback {},  attempt {}, game {}".format(selected_agent_assistance_action, selected_agent_feedback_action, attempt_counter, game_state_counter))
 
             ##########################QUERY FOR THE USER ACTION AND REACT TIME#####################################
-            #compare the real user with the estimated Persona and returns a user action (0, 1a, 2)
-
             #return the user action in this state based on the Persona profile
-            vars_user_evidence = {    task_progress_t0_name: game_state_counter,
-                                      game_attempt_t0_name: attempt_counter - 1,
-                                      task_progress_t1_name: game_state_counter,
-                                      game_attempt_t1_name: attempt_counter - 1,
-                                      agent_assistance_name: selected_agent_assistance_action,
+            vars_user_evidence = {    game_state_bn_name: game_state_counter,
+                                      attempt_bn_name: attempt_counter - 1,
+                                      agent_assistance_bn_name: selected_agent_assistance_action,
+                                      agent_feedback_bn_name: selected_agent_feedback_action,
                                       }
 
-            query_user_action_prob = bn_functions.infer_prob_from_state(bn_model_user_action,
+            query_user_action_prob = bn_functions.infer_prob_from_state(user_bn_model=bn_model_user_action,
                                                                         infer_variable=var_user_action_target_action,
                                                                         evidence_variables=vars_user_evidence)
-            # query_user_react_time_prob = bn_functions.infer_prob_from_state(bn_model_user_react_time,
-            #                                                                 infer_variable=var_user_react_time_target_action,
-            #                                                                 evidence_variables=vars_user_evidence)
-            #
-            #
 
             selected_user_action = bn_functions.get_stochastic_action(query_user_action_prob.values)
-            # selected_user_react_time = bn_functions.get_stochastic_action(query_user_react_time_prob.values)
-            # counters for plots
-            # n_react_time_per_episode[e][selected_user_react_time] += 1
-
-            #updates counters for user action
-
-            user_action_per_game_state_attempt_counter_agent_assistance[selected_agent_assistance_action][attempt_counter-1][game_state_counter][selected_user_action] += 1
-            attempt_counter_per_user_action[selected_user_action][attempt_counter-1] += 1
-            game_state_counter_per_user_action[selected_user_action][game_state_counter] += 1
-            user_action_per_agent_assistance[selected_agent_assistance_action][selected_user_action] += 1
-
-            #update counter for user react time
-            # agent_assistance_per_react_time[selected_user_react_time][selected_agent_assistance_action] += 1
-            # attempt_counter_per_react_time[selected_user_react_time][attempt_counter-1] += 1
-            # game_state_counter_per_react_time[selected_user_react_time][game_state_counter] += 1
-            # agent_feedback_per_react_time[selected_user_react_time][selected_agent_feedback_action] += 1
-            #update counter for agent feedback
-            game_state_counter_per_agent_feedback[selected_agent_feedback_action][game_state_counter] += 1
-            attempt_counter_per_agent_feedback[selected_agent_feedback_action][attempt_counter-1] += 1
-            #update counter for agent assistance
-            game_state_counter_per_agent_assistance[selected_agent_assistance_action][game_state_counter] += 1
-            attempt_counter_per_agent_assistance[selected_agent_assistance_action][attempt_counter-1] += 1
 
             # updates counters for simulation
             # remap user_action index
@@ -357,35 +307,17 @@ def simulation(bn_model_user_action, var_user_action_target_action, bn_model_use
 
         #update user models
         bn_model_user_action = bn_functions.update_cpds_tables(bn_model_user_action, user_action_dynamic_variables, alpha_learning)
-        bn_model_user_react_time = bn_functions.update_cpds_tables(bn_model_user_react_time, user_react_time_dynamic_variables)
-        #update agent models
-
-        print("user_given_game_attempt:", bn_model_user_action['model'].cpds[0].values)
-        print("user_given_robot:", bn_model_user_action['model'].cpds[5].values)
-        print("game_user:", bn_model_user_action['model'].cpds[3].values)
-        print("attempt_user:", bn_model_user_action['model'].cpds[2].values)
 
         #reset counter
-        user_action_per_game_state_attempt_counter_agent_assistance = [[[[0 for i in range(User_Action.counter.value)]
-                                                                         for l in range(Game_State.counter.value)] for j in
-                                                                        range(Attempt.counter.value)] for k in range(Agent_Assistance.counter.value)]
-        user_action_per_agent_assistance = [[0 for i in range(User_Action.counter.value)] for j in
-                                            range(Agent_Assistance.counter.value)]
+        user_action_per_robot_feedback_robot_assistance = [[[0 for i in range(User_Action.counter.value)]
+                                                            for j in range(Agent_Assistance.counter.value)]
+                                                           for l in range(Agent_Feedback.counter.value)
+                                                           ]
         attempt_counter_per_user_action = [[0 for i in range(Attempt.counter.value)] for j in
                                            range(User_Action.counter.value)]
         game_state_counter_per_user_action = [[0 for i in range(Game_State.counter.value)] for j in
                                               range(User_Action.counter.value)]
 
-        attempt_counter_per_react_time = [[0 for i in range(Attempt.counter.value)] for j in
-                                          range(User_React_time.counter.value)]
-        game_state_counter_per_react_time = [[0 for i in range(Game_State.counter.value)] for j in
-                                             range(User_React_time.counter.value)]
-        agent_feedback_per_react_time = [[0 for i in range(Agent_Feedback.counter.value)] for j in
-                                         range(User_React_time.counter.value)]
-        agent_assistance_per_react_time = [[0 for i in range(Agent_Assistance.counter.value)] for j in
-                                           range(User_React_time.counter.value)]
-
-
         #for plots
         n_correct_per_episode[e] = correct_move_counter
         n_wrong_per_episode[e] = wrong_move_counter
@@ -408,57 +340,52 @@ def simulation(bn_model_user_action, var_user_action_target_action, bn_model_use
 #############################################################################
 
 
-agent_policy = generate_agent_assistance(preferred_assistance=2, agent_behaviour="help", n_game_state=Game_State.counter.value, n_attempt=Attempt.counter.value, alpha_action=0.5)
-
-# SIMULATION PARAMS
-epochs = 20
-scaling_factor = 1
-# initialise the agent
-bn_model_caregiver_assistance = bnlearn.import_DAG('/home/pal/Documents/Framework/bn_generative_model/bn_agent_model/agent_assistive_model.bif')
-bn_model_caregiver_feedback = None#bnlearn.import_DAG('/home/pal/Documents/Framework/bn_generative_model/bn_agent_model/agent_feedback_model.bif')
-bn_model_user_action = bnlearn.import_DAG('/home/pal/Documents/Framework/bn_generative_model/bn_persona_model/persona_model_test.bif')
-bn_model_user_react_time = bnlearn.import_DAG('/home/pal/Documents/Framework/bn_generative_model/bn_persona_model/user_react_time_model.bif')
-
-# initialise memory, attention and reactivity variables
-persona_memory = 0;
-persona_attention = 0;
-persona_reactivity = 1;
-
-# define state space struct for the irl algorithm
-episode_instance = Episode()
-# DEFINITION OF THE MDP
-# define state space struct for the irl algorithm
-attempt = [i for i in range(1, Attempt.counter.value + 1)]
-# +1 (3,_,_) absorbing state
-game_state = [i for i in range(0, Game_State.counter.value + 1)]
-user_action = [i for i in range(-1, User_Action.counter.value - 1)]
-state_space = (game_state, attempt, user_action)
-states_space_list = list(itertools.product(*state_space))
-state_space_index = [episode_instance.state_from_point_to_index(states_space_list, s) for s in states_space_list]
-agent_assistance_action = [i for i in range(Agent_Assistance.counter.value)]
-agent_feedback_action = [i for i in range(Agent_Feedback.counter.value)]
-action_space = (agent_feedback_action, agent_assistance_action)
-action_space_list = list(itertools.product(*action_space))
-action_space_index = [episode_instance.state_from_point_to_index(action_space_list, a) for a in action_space_list]
-terminal_state = [(Game_State.counter.value, i, user_action[j]) for i in range(1, Attempt.counter.value + 1) for j in
-                  range(len(user_action))]
-initial_state = (1, 1, 0)
-
-#1. RUN THE SIMULATION WITH THE PARAMS SET BY THE CAREGIVER
-
-
-game_performance_per_episode, react_time_per_episode, agent_assistance_per_episode, agent_feedback_per_episode, episodes_list = \
-    simulation(bn_model_user_action=bn_model_user_action, var_user_action_target_action=['user_action'],
-                   bn_model_user_react_time=bn_model_user_react_time,
-                   var_user_react_time_target_action=['user_react_time'],
-                   user_memory_name="memory", user_memory_value=persona_memory,
-                   user_attention_name="attention", user_attention_value=persona_attention,
-                   user_reactivity_name="reactivity", user_reactivity_value=persona_reactivity,
-                   task_progress_t0_name="game_state_t0", task_progress_t1_name="game_state_t1",
-                   game_attempt_t0_name="attempt_t0", game_attempt_t1_name="attempt_t1",
-                   agent_assistance_name="agent_assistance", agent_policy=agent_policy,
-                   state_space=states_space_list, action_space=action_space_list,
-                   epochs=epochs, task_complexity=5, max_attempt_per_object=4, alpha_learning=0.1)
-
-utils.plot2D_game_performance("/home/pal/Documents/Framework/bn_generative_model/results/user_performance.png", epochs, scaling_factor, game_performance_per_episode)
-utils.plot2D_assistance("/home/pal/Documents/Framework/bn_generative_model/results/agent_assistance.png", epochs, scaling_factor, agent_assistance_per_episode)
+# agent_policy = generate_agent_assistance(preferred_assistance=2, agent_behaviour="help", n_game_state=Game_State.counter.value, n_attempt=Attempt.counter.value, alpha_action=0.1)
+# print(agent_policy)
+#
+# # SIMULATION PARAMS
+# epochs = 20
+# scaling_factor = 1
+# # initialise the agent
+# bn_model_user_action = bnlearn.import_DAG('/home/pal/Documents/Framework/bn_generative_model/bn_persona_model/persona_model_test.bif')
+#
+# # initialise memory, attention and reactivity variables
+# persona_memory = 0;
+# persona_attention = 0;
+# persona_reactivity = 1;
+#
+# # define state space struct for the irl algorithm
+# episode_instance = Episode()
+# # DEFINITION OF THE MDP
+# # define state space struct for the irl algorithm
+# attempt = [i for i in range(1, Attempt.counter.value + 1)]
+# # +1 (3,_,_) absorbing state
+# game_state = [i for i in range(0, Game_State.counter.value + 1)]
+# user_action = [i for i in range(-1, User_Action.counter.value - 1)]
+# state_space = (game_state, attempt, user_action)
+# states_space_list = list(itertools.product(*state_space))
+# state_space_index = [episode_instance.state_from_point_to_index(states_space_list, s) for s in states_space_list]
+# agent_assistance_action = [i for i in range(Agent_Assistance.counter.value)]
+# agent_feedback_action = [i for i in range(Agent_Feedback.counter.value)]
+# action_space = (agent_feedback_action, agent_assistance_action)
+# action_space_list = list(itertools.product(*action_space))
+# action_space_index = [episode_instance.state_from_point_to_index(action_space_list, a) for a in action_space_list]
+# terminal_state = [(Game_State.counter.value, i, user_action[j]) for i in range(1, Attempt.counter.value + 1) for j in
+#                   range(len(user_action))]
+# initial_state = (1, 1, 0)
+#
+# #1. RUN THE SIMULATION WITH THE PARAMS SET BY THE CAREGIVER
+#
+#
+# game_performance_per_episode, react_time_per_episode, agent_assistance_per_episode, agent_feedback_per_episode, episodes_list = \
+#     simulation(bn_model_user_action=bn_model_user_action, var_user_action_target_action=['user_action'],
+#                    game_state_bn_name="game_state",
+#                    attempt_bn_name="attempt",
+#                    agent_assistance_bn_name="agent_assistance",
+#                    agent_feedback_bn_name="agent_feedback",
+#                    agent_policy=agent_policy,
+#                    state_space=states_space_list, action_space=action_space_list,
+#                    epochs=epochs, task_complexity=5, max_attempt_per_object=4, alpha_learning=0.1)
+#
+# utils.plot2D_game_performance("results/user_performance.png", epochs, scaling_factor, game_performance_per_episode)
+# utils.plot2D_assistance("results/agent_assistance.png", epochs, scaling_factor, agent_assistance_per_episode)
-- 
GitLab