diff --git a/simulation.py b/simulation.py
index 421eb26d2ce84ff52959e154e6189bc6ad4027e3..27e1eb769e35115b27dd5633667f9eae267444bc 100644
--- a/simulation.py
+++ b/simulation.py
@@ -34,14 +34,13 @@ def build_model_from_data(csv_filename, dag_filename, dag_model=None):
 
 def generate_agent_assistance(preferred_assistance, agent_behaviour, current_state, state_space, action_space):
     episode = Episode()
-    game_state, attempt, prev_user_outcome = episode.state_from_index_to_point(state_space, current_state)
+    game_state, attempt, prev_user_action = episode.state_from_index_to_point(state_space, current_state)
     robot_action = 0
     #agent_behaviour is a tuple first item is the feedback, second item is the robot assistance
-    print(game_state,attempt, prev_user_outcome)
+    print(game_state,attempt, prev_user_action)
     if attempt == 1:
-        robot_action = episode.state_from_point_to_index(action_space,
-                                                                        (random.randint(0, 1), 0))
-    elif attempt!=1 and prev_user_outcome == 0:
+        robot_action = episode.state_from_point_to_index(action_space, (random.randint(0, 1), 0))
+    elif attempt!=1 and prev_user_action == 0:
         if attempt == 2 and agent_behaviour == "challenge":
             robot_action = episode.state_from_point_to_index(action_space,
             (random.randint(0,1),min(max(0, preferred_assistance-1), 5)))
@@ -67,7 +66,7 @@ def generate_agent_assistance(preferred_assistance, agent_behaviour, current_sta
             (random.randint(0, 1), min(max(0, preferred_assistance+2), 5)))
             print("hatt4")
 
-    elif attempt!=1 and prev_user_outcome == -1:
+    elif attempt!=1 and prev_user_action == -1:
         if attempt == 2 and agent_behaviour == "challenge":
             robot_action = episode.state_from_point_to_index(action_space,
             (random.randint(0, 1), min(max(0, preferred_assistance+1), 5)))
@@ -93,9 +92,9 @@ def generate_agent_assistance(preferred_assistance, agent_behaviour, current_sta
             (random.randint(0, 1), min(max(0, preferred_assistance-3), 5)))
             print("hatt4")
 
-    agent_feedback, agent_assistance = episode.state_from_index_to_point(action_space, robot_action)
+    agent_assistance = episode.state_from_index_to_point(action_space, robot_action)
 
-    return agent_feedback, agent_assistance
+    return agent_assistance
 
 
 
@@ -134,16 +133,16 @@ def compute_next_state(user_action, task_progress_counter, attempt_counter, corr
 
     # if then else are necessary to classify the task game state into beg, mid, end
 
-    if user_action == 1 and game_state_counter<3:
+    if user_action == 0 and game_state_counter<3:
         attempt_counter = 1
         correct_move_counter += 1
         task_progress_counter += 1
     # if the user made a wrong move and still did not reach the maximum number of attempts
-    elif user_action == -1 and attempt_counter < max_attempt_per_object and game_state_counter<3:
+    elif user_action == 1 and attempt_counter < max_attempt_per_object and game_state_counter<3:
         attempt_counter += 1
         wrong_move_counter += 1
     # if the user did not move any token and still did not reach the maximum number of attempts
-    elif user_action == 0 and attempt_counter < max_attempt_per_object and game_state_counter<3:
+    elif user_action == 2 and attempt_counter < max_attempt_per_object and game_state_counter<3:
         attempt_counter += 1
         timeout_counter += 1
     # the agent or therapist makes the correct move on the patient's behalf
@@ -167,8 +166,24 @@ def compute_next_state(user_action, task_progress_counter, attempt_counter, corr
     return next_state, task_progress_counter, game_state_counter, attempt_counter, correct_move_counter, wrong_move_counter, timeout_counter, max_attempt_counter
 
 
+def select_agent_action(agent_action, epsilon):
+    '''
+    Args:
+        agent_action: list of possible actions with their probabilities
+    Return:
+        one of the agent's actions
+    '''
 
-def simulation(bn_model_user_action, var_user_action_target_action,
+    if random.random()>epsilon:
+        return np.argmax(agent_action)
+    else:
+        agent_action[np.argmax(agent_action)]=0
+        return  np.argmax(agent_action)
+
+def simulation(bn_model_user_action,
+               bn_model_agent_behaviour,
+               var_user_action_target_action,
+               var_agent_behaviour_target_action,
                game_state_bn_name, attempt_bn_name,
                agent_assistance_bn_name, agent_feedback_bn_name,
                user_pref_assistance,
@@ -186,14 +201,14 @@ def simulation(bn_model_user_action, var_user_action_target_action,
 
     '''
 
-    user_action_per_robot_feedback_robot_assistance = [[[0 for i in range(User_Action.counter.value)]
-                                                           for j in range(Agent_Assistance.counter.value)]
-                                                           for l in range(Agent_Feedback.counter.value)
-                                                        ]
-    attempt_counter_per_user_action = [[0 for i in range(Attempt.counter.value)] for j in
-                                       range(User_Action.counter.value)]
-    game_state_counter_per_user_action = [[0 for i in range(Game_State.counter.value)] for j in
-                                          range(User_Action.counter.value)]
+    # user_action_per_robot_feedback_robot_assistance = [[[0 for i in range(User_Action.counter.value)]
+    #                                                        for j in range(Agent_Assistance.counter.value)]
+    #                                                        for l in range(Agent_Feedback.counter.value)
+    #                                                     ]
+    # attempt_counter_per_user_action = [[0 for i in range(Attempt.counter.value)] for j in
+    #                                    range(User_Action.counter.value)]
+    # game_state_counter_per_user_action = [[0 for i in range(Game_State.counter.value)] for j in
+    #                                       range(User_Action.counter.value)]
 
     #output variables:
     n_correct_per_episode = [0]*epochs
@@ -226,11 +241,11 @@ def simulation(bn_model_user_action, var_user_action_target_action,
         max_attempt_counter = 0
 
         #The following variables are used to update the BN at the end of the episode
-        user_action_dynamic_variables = {
-                                        'attempt': attempt_counter_per_user_action,
-                                        'game_state': game_state_counter_per_user_action,
-                                        'user_action': user_action_per_robot_feedback_robot_assistance
-                                        }
+        # user_action_dynamic_variables = {
+        #                                 'attempt': attempt_counter_per_user_action,
+        #                                 'game_state': game_state_counter_per_user_action,
+        #                                 'user_action': user_action_per_robot_feedback_robot_assistance
+        #                                 }
 
         #data structure to memorise the sequence of states  (state, action, next_state)
         episode = []
@@ -241,29 +256,40 @@ def simulation(bn_model_user_action, var_user_action_target_action,
 
             current_state = (game_state_counter, attempt_counter, selected_user_action)
             current_state_index = ep.state_from_point_to_index(state_space, current_state)
-            if agent_policy==[]:
-                selected_agent_feedback_action, selected_agent_assistance_action = \
-                    generate_agent_assistance(preferred_assistance=user_pref_assistance,
-                                              agent_behaviour=agent_behaviour,
-                                              current_state=current_state_index,
-                                              state_space=state_space,
-                                              action_space=action_space
-                                              )
-            else:
-                selected_agent_feedback_action, selected_agent_assistance_action = ep.state_from_index_to_point(action_space, agent_policy[current_state_index])
-
+            # if agent_policy==[]:
+            #     selected_agent_feedback_action, selected_agent_assistance_action = \
+            #         generate_agent_assistance(preferred_assistance=user_pref_assistance,
+            #                                   agent_behaviour=agent_behaviour,
+            #                                   current_state=current_state_index,
+            #                                   state_space=state_space,
+            #                                   action_space=action_space
+            #                                   )
+            # else:
+            #     #TODO agent_policy is a list of 12 items
+            #     # select the one with the highest probability 1-epsilon of the times and one of the others epsilon times
+            #
+            #     selected_agent_feedback_action, selected_agent_assistance_action = ep.state_from_index_to_point(action_space, select_agent_action(agent_policy[current_state_index], epsilon=0.1))
+
+            vars_agent_evidence = {game_state_bn_name: game_state_counter,
+                                  attempt_bn_name: attempt_counter - 1,
+                                  }
+
+            query_agent_behaviour_prob = bn_functions.infer_prob_from_state(user_bn_model=bn_model_agent_behaviour,
+                                                                        infer_variable=var_agent_behaviour_target_action,
+                                                                        evidence_variables=vars_agent_evidence)
+
+            selected_agent_behaviour_action = bn_functions.get_stochastic_action(query_agent_behaviour_prob.values)
+            #selected_agent_behaviour_action = np.argmax(query_agent_behaviour_prob.values)
 
             #counters for plots
-            n_assistance_lev_per_episode[e][selected_agent_assistance_action] += 1
-            current_agent_action = (selected_agent_feedback_action, selected_agent_assistance_action)
-            print("agent_assistance {}, agent_feedback {},  attempt {}, game {}".format(selected_agent_assistance_action, selected_agent_feedback_action, attempt_counter, game_state_counter))
+            n_assistance_lev_per_episode[e][selected_agent_behaviour_action] += 1
+            print("agent_assistance {},  attempt {}, game {}".format(selected_agent_behaviour_action, attempt_counter, game_state_counter))
 
             ##########################QUERY FOR THE USER ACTION AND REACT TIME#####################################
             #return the user action in this state based on the Persona profile
             vars_user_evidence = {    game_state_bn_name: game_state_counter,
                                       attempt_bn_name: attempt_counter - 1,
-                                      agent_assistance_bn_name: selected_agent_assistance_action,
-                                      agent_feedback_bn_name: selected_agent_feedback_action,
+                                      agent_assistance_bn_name: selected_agent_behaviour_action,
                                       }
 
             query_user_action_prob = bn_functions.infer_prob_from_state(user_bn_model=bn_model_user_action,
@@ -272,14 +298,14 @@ def simulation(bn_model_user_action, var_user_action_target_action,
 
             selected_user_action = bn_functions.get_stochastic_action(query_user_action_prob.values)
 
-            # updates counters for simulation
-            # remap user_action index
-            if selected_user_action == 0:
-              selected_user_action = 1
-            elif selected_user_action == 1:
-              selected_user_action = -1
-            else:
-              selected_user_action = 0
+            # # updates counters for simulation
+            # # remap user_action index
+            # if selected_user_action == 0:
+            #   selected_user_action = 1
+            # elif selected_user_action == 1:
+            #   selected_user_action = -1
+            # else:
+            #   selected_user_action = 0
 
             #updates counters for simulation
             iter_counter += 1
@@ -291,9 +317,15 @@ def simulation(bn_model_user_action, var_user_action_target_action,
                                                                         timeout_counter, max_attempt_counter,
                                                                         max_attempt_per_object)
 
+            # update counters
+            # if game_state_counter <= 2:
+            #     user_action_per_robot_feedback_robot_assistance[selected_agent_feedback_action][selected_agent_assistance_action][selected_user_action] += 1
+            #     attempt_counter_per_user_action[selected_user_action][attempt_counter - 1] += 1
+            #     game_state_counter_per_user_action[selected_user_action][game_state_counter] += 1
+
             # store the (state, action, next_state)
             episode.append((ep.state_from_point_to_index(state_space, current_state),
-                            ep.state_from_point_to_index(action_space, current_agent_action),
+                            selected_agent_behaviour_action,
                             ep.state_from_point_to_index(state_space, next_state)))
 
             print("current_state ", current_state, " next_state ", next_state)
@@ -305,8 +337,11 @@ def simulation(bn_model_user_action, var_user_action_target_action,
         #save episode
         episodes.append(Episode(episode))
 
+
+
         #update user models
-        bn_model_user_action = bn_functions.update_cpds_tables(bn_model_user_action, user_action_dynamic_variables, alpha_learning)
+        # bn_model_user_action = bn_functions.update_cpds_tables(bn_model_user_action, user_action_dynamic_variables, alpha_learning)
+        #
 
         #reset counter
         user_action_per_robot_feedback_robot_assistance = [[[0 for i in range(User_Action.counter.value)]
@@ -340,9 +375,7 @@ def simulation(bn_model_user_action, var_user_action_target_action,
 #############################################################################
 
 
-# agent_policy = generate_agent_assistance(preferred_assistance=2, agent_behaviour="help", n_game_state=Game_State.counter.value, n_attempt=Attempt.counter.value, alpha_action=0.1)
-# print(agent_policy)
-#
+
 # # SIMULATION PARAMS
 # epochs = 20
 # scaling_factor = 1
@@ -375,6 +408,9 @@ def simulation(bn_model_user_action, var_user_action_target_action,
 # initial_state = (1, 1, 0)
 #
 # #1. RUN THE SIMULATION WITH THE PARAMS SET BY THE CAREGIVER
+# agent_policy = generate_agent_assistance(preferred_assistance=2, agent_behaviour="help", current_state=5, state_space=states_space_list, action_space=action_space_list)
+# print(agent_policy)
+#
 #
 #
 # game_performance_per_episode, react_time_per_episode, agent_assistance_per_episode, agent_feedback_per_episode, episodes_list = \