Working code with persona, real_user and robot in the simulation

6e9d0ce6 · Antonio Andriella · 32365577 · 6e9d0ce6 · 6e9d0ce6 · 6e9d0ce6
Commit 6e9d0ce6 authored 4 years ago by Antonio Andriella
--- a/persona_model.bif
+++ b/persona_model.bif
-network persona_model_4 {
+network persona_model {
 }
 %VARIABLES DEFINITION

--- a/bn_persona/utilities.py
+++ b/bn_persona/utilities.py
+import random
+import bn_functions
+def compute_next_state(user_action, task_evolution, attempt_counter, correct_move_counter,
+                       wrong_move_counter, timeout_counter
+                       ):
+    '''
+    The function computes given the current state and action of the user, the next state
+    Args:
+        user_action: 0,1,2
+        task_evolution: beg, mid, end
+        correct_move_counter:
+        attempt_counter:
+        wrong_move_counter:
+        timeout_counter:
+    Return:
+        the counters updated according to the user_action
+    '''
+    if user_action == 0:
+        attempt_counter = 0
+        task_evolution += 1
+        correct_move_counter += 1
+    # if the user made a wrong move and still did not reach the maximum number of attempts
+    elif user_action == 1 and attempt_counter < 3:
+        attempt_counter += 1
+        wrong_move_counter += 1
+    # if the user did not move any token and still did not reach the maximum number of attempts
+    elif user_action == 2 and attempt_counter < 3:
+        attempt_counter += 1
+        timeout_counter += 1
+    # the robot or therapist makes the correct move on the patient's behalf
+    else:
+        attempt_counter = 0
+        task_evolution += 1
+        correct_move_counter += 1
+    return task_evolution, attempt_counter, correct_move_counter, wrong_move_counter, timeout_counter
+def get_user_action_prob():
+def get_stochatic_action(actions_prob):
+    '''
+    Select one of the actions according to the actions_prob
+    Args:
+        actions_prob: the probability of the Persona based on the BN to make a correct move, wrong move, timeout
+    Return:
+        the id of the selected action
+    N.B:
+    '''
+    action_id = None
+    correct_action_from_BN = actions_prob[0]
+    wrong_action_from_BN = actions_prob[1]
+    timeout_action_from_BN = actions_prob[2]
+    rnd_val = random.uniform(0,1)
+    #if user_prob is lower than the correct action prob then is the correct one
+    if rnd_val<=correct_action_from_BN:
+        action_id = 0
+    #if rnd is larger than the correct action prob and lower than wrong
+    #  action prob then is the wrong one
+    elif rnd_val>correct_action_from_BN \
+        and rnd_val<(correct_action_from_BN+wrong_action_from_BN):
+        action_id = 1
+    #timeout
+    else:
+        action_id = 2
+    return action_id
--- a/persona_model_3.bif
+++ b/persona_model_3.bif
 network persona_model_3 {
 }
-%definition of the variables
+%VARIABLES DEFINITION
 variable reactivity {
  type discrete [3] {slow, medium, fast};
 }
@@ -11,29 +11,23 @@ variable memory {
 variable attention {
  type discrete[3] {low, medium, high};
 }
 variable robot_assistance {
  type discrete [ 5 ] { lev_0, lev_1, lev_2, lev_3, lev_4 };
 }
 variable attempt {
  type discrete [ 4 ] { att_1, att_2, att_3, att_4 };
 }
 variable game_state {
  type discrete [ 3 ] { beg, mid, end };
 }
 variable robot_feedback {
  type discrete [ 2 ] { yes, no };
 }
 variable user_action {
  type discrete [ 3 ] { correct, wrong, timeout };
 }
-%definition of individual probabilities
+%INDIVIDUAL PROBABILITIES DEFINITION
 probability ( robot_assistance ) {
  table 0.2, 0.2, 0.2, 0.2, 0.2;
 }
@@ -46,26 +40,23 @@ probability ( attempt ) {
 probability ( user_action ) {
  table 0.33, 0.33, 0.34;
 }
-#cpds 4
+#CPDS 4
 probability ( reactivity ) {
  table 0.33, 0.33, 0.34;
 }
-#cpds 3
+#CPDS 3
 probability ( memory ) {
  table 0.33, 0.33, 0.34;
 }
-#cpds 1
+#CPDS 1
 probability ( attention ) {
  table 0.33, 0.33, 0.34;
 }
 probability ( robot_feedback ) {
  table 0.5, 0.5;
 }
+#CPDS 7
-#cpds 7
 probability (user_action | reactivity, memory, attention) {
 (slow, low, low)  0.1, 0.4, 0.5;
 (slow, low, medium) 0.3, 0.5, 0.2;
 (slow, low, high) 0.4, 0.5, 0.1;
@@ -75,7 +66,7 @@ probability (user_action | reactivity, memory, attention) {
 (slow, high, low)  0.3, 0.4, 0.3;
 (slow, high, medium) 0.6, 0.3, 0.1;
 (slow, high, high) 0.7, 0.2, 0.1;
+%%%
 (medium, low, low)  0.3, 0.4, 0.3;
 (medium, low, medium) 0.3, 0.5, 0.2;
 (medium, low, high) 0.4, 0.3, 0.3;
@@ -85,7 +76,7 @@ probability (user_action | reactivity, memory, attention) {
 (medium, high, low)  0.34, 0.33, 0.33;
 (medium, high, medium) 0.7, 0.2, 0.1;
 (medium, high, high) 0.75, 0.25, 0.0;
+%%%
 (fast, low, low)  0.5, 0.2, 0.3;
 (fast, low, medium) 0.6, 0.2, 0.2;
 (fast, low, high) 0.7, 0.3, 0.0;
@@ -95,37 +86,32 @@ probability (user_action | reactivity, memory, attention) {
 (fast, high, low)  0.5, 0.2, 0.3;
 (fast, high, medium) 0.6, 0.2, 0.2;
 (fast, high, high) 0.9, 0.1, 0.0;
 }
+#CPDS 5
-#cpds 5
 probability (robot_feedback | user_action) {
  (correct) 0.8, 0.2;
  (wrong) 0.5, 0.5;
  (timeout) 0.2, 0.8;
 }
+#CPDS 6
-#cpds 6
 probability (robot_assistance | user_action) {
  (correct) 0.05 0.1 0.15 0.3 0.4;
  (wrong) 0.1 0.2 0.4 0.2 0.1;
  (timeout) 0.2 0.4 0.2 0.1 0.1;
 }
+#CPDS 2
-#cpds 2
 probability (game_state | user_action)  {
   (correct) 0.2, 0.4, 0.4;
   (wrong) 0.4, 0.4, 0.2;
   (timeout) 0.6, 0.3, 0.1;
 }
+#CPDS 0
-#cpds 0
 probability (attempt | user_action)  {
   (correct) 0.1, 0.2, 0.3, 0.4;
   (wrong) 0.5, 0.3, 0.15, 0.05;
   (timeout) 0.4, 0.3, 0.2, 0.1;
 }
+#CPDS 5
 probability (robot_assistance | robot_feedback) {
  (yes) 0.5 0.3 0.1 0.1 0.0;
  (no) 0.0 0.1 0.1 0.3 0.5;

--- a/main.py
+++ b/main.py
 import bnlearn
 import numpy as np
-import enum
 import random
-import matplotlib.pyplot as plt
+#import classes and modules
+from bn_variables import Memory, Attention, Reactivity, Robot_Assistance, Robot_Feedback, Robot_Assistance_Feedback, User_Action, User_Capability, Game_State, Attempt
-#define constants
+import bn_functions
-class User_Action(enum.Enum):
+import utils
-    correct = 0
-    wrong = 1
-    timeout = 2
-    name = "user_action"
-    counter = 3
-class Reactivity(enum.Enum):
-    slow = 0
-    medium = 1
-    fast = 2
-    name = "reactivity"
-    counter = 3
-class Memory(enum.Enum):
-    low = 0
-    medium = 1
-    high = 2
-    name = "memory"
-    counter = 3
-class Robot_Assistance(enum.Enum):
-    lev_0 = 0
-    lev_1 = 1
-    lev_2 = 2
-    lev_3 = 3
-    lev_4 = 4
-    name = "robot_assistance"
-    counter = 5
-class Robot_Feedback(enum.Enum):
-    yes = 0
-    no = 1
-    name = "robot_feedback"
-    counter = 2
-class Game_State(enum.Enum):
-    beg = 0
-    middle = 1
-    end = 2
-    name = "game_state"
-    counter = 3
-class Attempt(enum.Enum):
-    at_1 = 0
-    at_2 = 1
-    at_3 = 2
-    at_4 = 3
-    name = "attempt"
-    counter = 4
-def plot2D(save_path, n_episodes, *y):
-    # The position of the bars on the x-axis
-    barWidth = 0.35
-    r = np.arange(n_episodes)  # the x locations for the groups
-    # Get values from the group and categories
-    x = [i for i in range(n_episodes)]
-    correct = y[0][0]
-    wrong = y[0][1]
-    timeout = y[0][2]
-    # plot bars
-    plt.figure(figsize=(10, 7))
-    plt.bar(r, correct,  edgecolor='white', width=barWidth, label="correct")
-    plt.bar(r, wrong, bottom=np.array(correct), edgecolor='white', width=barWidth, label='wrong')
-    plt.bar(r, timeout, bottom=np.array(correct) + np.array(wrong), edgecolor='white',
-            width=barWidth, label='timeout')
-    plt.legend()
-    # Custom X axis
-    plt.xticks(r, x, fontweight='bold')
-    plt.ylabel("performance")
-    plt.savefig(save_path)
-    plt.show()
-def compute_prob(cpds_table):
-    '''
-    Given the counters generate the probability distributions
-    Args:
-        cpds_table: with counters
-    Return:
-         the probs for the cpds table
-    '''
-    for val in range(len(cpds_table)):
-            cpds_table[val] = list(map(lambda x: x / (sum(cpds_table[val])+0.00001), cpds_table[val]))
-    return cpds_table
-def average_prob(ref_cpds_table, current_cpds_table):
-    '''
-    Args:
-        ref_cpds_table: table from bnlearn
-        current_cpds_table: table from interaction
-    Return:
-        avg from both tables
-    '''
-    res_cpds_table = ref_cpds_table.copy()
-    for elem1 in range(len(ref_cpds_table)):
-        for elem2 in range(len(ref_cpds_table[0])):
-            res_cpds_table[elem1][elem2] = (ref_cpds_table[elem1][elem2]+current_cpds_table[elem1][elem2])/2
-    return res_cpds_table
 def compute_next_state(user_action, task_evolution, attempt_counter, correct_move_counter,
-                       wrong_move_counter, timeout_counter
+                       wrong_move_counter, timeout_counter, max_attept_counter
                       ):
    '''
+    The function computes given the current state and action of the user, the next state
    Args:
        user_action: 0,1,2
        task_evolution: beg, mid, end
@@ -111,8 +18,14 @@ def compute_next_state(user_action, task_evolution, attempt_counter, correct_mov
        attempt_counter:
        wrong_move_counter:
        timeout_counter:
+        max_attempt_counter:
    Return:
-        the counters updated according to the user_action
+        task_evolution
+        attempt_counter
+        correct_move_counter
+        wrong_move_counter
+        timeout_counter
+        max_attempt_counter
    '''
    if user_action == 0:
        attempt_counter = 0
@@ -130,123 +43,30 @@ def compute_next_state(user_action, task_evolution, attempt_counter, correct_mov
    else:
        attempt_counter = 0
        task_evolution += 1
-        correct_move_counter += 1
+        #correct_move_counter += 1
+        max_attept_counter += 1
-    return task_evolution, attempt_counter, correct_move_counter, wrong_move_counter, timeout_counter
+    return task_evolution, attempt_counter, correct_move_counter, wrong_move_counter, timeout_counter, max_attept_counter
-def update_cpds_tables(game_state_counter, attempt_counter,
-                       robot_assistance, robot_feedback,
-                       persona_bn_model
-                       ):
-    '''
-    Args:
-        game_state_counter: from 0 to 2 beg, mid, end
-        attempt_counter: from 1 to 4
-        robot_assistance: from 0 to 4
-        robot_feedback: 0 or 1 depending if a feedback has been provided
-        persona_bn_model: the cpds tables of the model to update
-    Return:
-        the cpds tables updated with the new counters
-    '''
-    # transform counters into probabilities
-    prob_over_attempt_per_action = compute_prob(attempt_counter)
-    prob_over_game_per_action = compute_prob(game_state_counter)
-    prob_over_feedback_per_action = compute_prob(robot_feedback)
-    prob_over_assistance_per_feedback = compute_prob(robot_assistance)
-    # average the probabilities obtained with the cpdf tables
-    updated_prob_over_attempt_per_action = average_prob(
-        np.transpose(persona_bn_model['cpds']['model'].cpds[0].values),
-        prob_over_attempt_per_action)
-    updated_prob_over_game_per_action = average_prob(np.transpose(persona_bn_model['cpds']['model'].cpds[2].values),
-                                                     prob_over_game_per_action)
-    updated_prob_over_feedback_per_action = average_prob(
-        np.transpose(persona_bn_model['cpds']['model'].cpds[6].values),
-        prob_over_feedback_per_action)
-    updated_prob_over_assistance_per_feedback = average_prob(
-        np.transpose(persona_bn_model['cpds']['model'].cpds[5].values),
-        prob_over_assistance_per_feedback)
-    # dirty solution, hardcoded based on the .bif look at it to know the corresponding cpds
-    persona_bn_model['cpds']['model'].cpds[0].values = np.transpose(updated_prob_over_attempt_per_action)
-    persona_bn_model['cpds']['model'].cpds[2].values = np.transpose(updated_prob_over_game_per_action)
-    persona_bn_model['cpds']['model'].cpds[6].values = np.transpose(updated_prob_over_feedback_per_action)
-    persona_bn_model['cpds']['model'].cpds[5].values = np.transpose(updated_prob_over_assistance_per_feedback)
-    return persona_bn_model
-def get_user_actions_prob_from_state(user_initial_cpds, user_memory, user_attention, user_reactivity,
-                               game_state_counter, attempt_counter,
-                               robot_assistance_action, robot_feedback_action
-                               ):
-    '''
-    Args:
-        user_initial_cpds: cpds for the given user
-        user_memory: from 1 to 3
-        user_attention: from 1 to 3
-        user_reactivity: from 1 to 3
-        :param game_state_counter:  beg, mid, end
-        :param attempt_counter: from 1 to 4
-        :param robot_assistance_action: between 0 and 4
-        :param robot_feedback_action: between 0 and 1
-    Return:
-         the probability of the user to perform: i) correct_move, ii) wrong_move, iii) timeout
-    '''
-    query = bnlearn.inference.fit(user_initial_cpds, variables=['user_action'],
+def simulation(user_bn_model, user_var_target, user_memory_name, user_memory_value, user_attention_name, user_attention_value,
-                                  evidence={'robot_assistance': robot_assistance_action,
+               user_reactivity_name, user_reactivity_value,
-                                            'attempt': attempt_counter,
+               task_progress_name, game_attempt_name, robot_assistance_name, robot_feedback_name,
-                                            'game_state': game_state_counter,
+               robot_bn_model, robot_var_target,
-                                            'robot_feedback': robot_feedback_action,
+               other_user_bn_model, other_user_var_target, other_user_memory_name, other_user_memory_value,
-                                            'memory': user_memory,
+               other_user_attention_name, other_user_attention_value,
-                                            'attention': user_attention,
+               other_user_reactivity_name, other_user_reactivity_value,
-                                            'reactivity': user_reactivity
+               epochs=50, task_complexity=5):
-                                            })
-    user_actions_prob_from_state = query.values
-    return user_actions_prob_from_state
-def get_user_action(actions_prob):
    '''
-    Select one of the actions according to the actions_prob
    Args:
-        actions_prob: the probability of the Persona based on the BN to make a correct move, wrong move, timeout
-    Return:
-        the id of the selected action
-    N.B:
-    '''
-    action_id = None
-    correct_action_from_BN = actions_prob[0]
-    wrong_action_from_BN = actions_prob[1]
-    timeout_action_from_BN = actions_prob[2]
-    rnd_val = random.uniform(0,1)
-    #if user_prob is lower than the correct action prob then is the correct one
-    if rnd_val<=correct_action_from_BN:
-        action_id = 0
-    #if rnd is larger than the correct action prob and lower than wrong
-    #  action prob then is the wrong one
-    elif rnd_val>correct_action_from_BN \
-        and rnd_val<(correct_action_from_BN+wrong_action_from_BN):
-        action_id = 1
-    #timeout
-    else:
-        action_id = 2
-    return action_id
-def simulation(robot_assistance_vect, robot_feedback_vect, user_bn_model, persona_bn_model, epochs=50, task_complexity=5):
-    '''
-    This function computes the entire simulation for #epochs
-    Args:
-        robot_assistance_vect: the robot's levels of assistance (might be included directly in this function)
-        robot_feedback_vect: the robot's feedback (might be included directly in this function)
-        user_bn_model: it is a model created from BN that given the state of the game and initial cpds returns probabilities of the user's actions
-        persona_bn_model: it is a model created from BN that given the state of the game and initial cpds returns probabilities of the user's actions
-        epochs: the number of simulations
-        task_complexity: the number of tokens to sort
    Return:
+        n_correct_per_episode:
+        n_wrong_per_episode:
+        n_timeout_per_episode:
    '''
+    #TODO: remove robot_assistance_vect and robot_feedback_vect
    #metrics we need, in order to compute afterwords the belief
    '''
@@ -267,10 +87,26 @@ def simulation(robot_assistance_vect, robot_feedback_vect, user_bn_model, person
    robot_assistance_per_feedback = [[0 for i in range(Robot_Assistance.counter.value)] for j in range(Robot_Feedback.counter.value)]
+    #these are the variables of the persona bn that are dynamic and will be affected from the game evolution
+    #TODO: it might be worth to integrate them as a param in the simulation function, only the name?
    #output variables:
    n_correct_per_episode = [0]*epochs
    n_wrong_per_episode = [0]*epochs
    n_timeout_per_episode = [0]*epochs
+    n_max_attempt_per_episode = [0]*epochs
+    game_performance_episode = [0]*epochs
+    n_lev_0_no_feed_per_episode = [0]*epochs
+    n_lev_1_no_feed_per_episode = [0]*epochs
+    n_lev_2_no_feed_per_episode = [0]*epochs
+    n_lev_3_no_feed_per_episode = [0]*epochs
+    n_lev_4_no_feed_per_episode = [0]*epochs
+    n_lev_0_with_feed_per_episode = [0]*epochs
+    n_lev_1_with_feed_per_episode = [0]*epochs
+    n_lev_2_with_feed_per_episode = [0]*epochs
+    n_lev_3_with_feed_per_episode = [0]*epochs
+    n_lev_4_with_feed_per_episode = [0]*epochs
+    robot_assistance_per_episode = [0]*epochs
    for e in range(epochs):
        '''Simulation framework'''
@@ -281,36 +117,94 @@ def simulation(robot_assistance_vect, robot_feedback_vect, user_bn_model, person
        correct_move_counter = 0
        wrong_move_counter = 0
        timeout_counter = 0
+        max_attempt_counter = 0
+        robot_assistance_action = 0
+        robot_feedback_action = 0
+        dynamic_variables = {'attempt': attempt_counter_per_action,
+                             'game_state': game_state_counter_per_action,
+                             'robot_assistance': robot_assistance_per_feedback,
+                             'robot_feedback': robot_feedback_per_action}
        while(task_evolution<task_complexity):
            #if then else are necessary to classify the task game state into beg, mid, end
-            if task_evolution>=0 and task_evolution<=2:
+            if task_evolution>=0 and task_evolution<=1:
                game_state_counter = 0
-            elif task_evolution>=3 and task_evolution<=4:
+            elif task_evolution>=2 and task_evolution<=3:
                game_state_counter = 1
            else:
                game_state_counter = 2
-            #select robot assistance (replace it with RL or IRL algorithm)
-            robot_assistance_action = 2#random.randint(min(robot_assistance_vect), max(robot_assistance_vect))
+            robot_vars_evidence = {     user_reactivity_name: user_reactivity_value,
-            #select robot feedback (replace it with RL or IRL algorithm)
+                                        user_memory_name: user_memory_value,
-            robot_feedback_action = random.randint(min(robot_feedback_vect), max(robot_feedback_vect))
+                                        task_progress_name: game_state_counter,
+                                        game_attempt_name: attempt_counter,
+                                        }
+            robot_actions_prob = bn_functions.infer_prob_from_state(robot_bn_model,
+                                                                   infer_variable=robot_var_target,
+                                                                   evidence_variables=robot_vars_evidence)
+            robot_action = bn_functions.get_stochastic_action(robot_actions_prob.values)
+            n_robot_assistance_feedback = Robot_Assistance_Feedback.counter.value
+            if robot_action>=n_robot_assistance_feedback/2:
+                robot_feedback_action = 1
+                robot_assistance_action = n_robot_assistance_feedback-robot_action-1
+                if robot_assistance_action == 0:
+                    n_lev_0_no_feed_per_episode[e] += 1
+                elif robot_assistance_action == 1:
+                    n_lev_1_no_feed_per_episode[e] += 1
+                elif robot_assistance_action == 2:
+                    n_lev_2_no_feed_per_episode[e] += 1
+                elif robot_assistance_action == 3:
+                    n_lev_3_no_feed_per_episode[e] += 1
+                else:
+                    n_lev_4_no_feed_per_episode[e] += 1
+            else:
+                robot_feedback_action = 0
+                robot_assistance_action = robot_action
+                if robot_assistance_action == 0:
+                    n_lev_0_with_feed_per_episode[e] += 1
+                elif robot_assistance_action == 1:
+                    n_lev_1_with_feed_per_episode[e] += 1
+                elif robot_assistance_action == 2:
+                    n_lev_2_with_feed_per_episode[e] += 1
+                elif robot_assistance_action == 3:
+                    n_lev_3_with_feed_per_episode[e] += 1
+                else:
+                    n_lev_4_with_feed_per_episode[e] += 1
            print("robot_assistance {}, attempt {}, game {}, robot_feedback {}".format(robot_assistance_action, attempt_counter, game_state_counter, robot_feedback_action))
            #compare the real user with the estimated Persona and returns a user action (0, 1, 2)
-            if real_user_model!=None:
+            if other_user_bn_model!=None:
                #return the user action in this state based on the user profile
-                user_actions_prob = get_user_actions_prob_from_state(user_bn_model['cpds'],user_bn_model['memory'],
+                other_user_vars_evidence = {other_user_attention_name:other_user_attention_value,
-                                                        user_bn_model['attention'], user_bn_model['reactivity'],
+                                            other_user_reactivity_name:other_user_reactivity_value,
-                                                        game_state_counter, attempt_counter, robot_assistance_action,
+                                            other_user_memory_name:other_user_memory_value,
-                                                        robot_feedback_action)
+                                            task_progress_name:game_state_counter,
+                                            game_attempt_name:attempt_counter,
+                                            robot_assistance_name:robot_assistance_action,
+                                            robot_feedback_name:robot_feedback_action
+                                            }
+                user_actions_prob = bn_functions.infer_prob_from_state(other_user_bn_model,
+                                                                       infer_variable=other_user_var_target,
+                                                                       evidence_variables=other_user_vars_evidence)
            else:
                #return the user action in this state based on the Persona profile
-                user_actions_prob = get_user_actions_prob_from_state(persona_bn_model['cpds'],persona_bn_model['memory'],
-                                                        persona_bn_model['attention'], persona_bn_model['reactivity'],
-                                                        game_state_counter, attempt_counter, robot_assistance_action,
-                                                        robot_feedback_action)
-            user_action = get_user_action(user_actions_prob)
+                user_vars_evidence = {other_user_attention_name: user_attention_value,
+                                            user_reactivity_name: user_reactivity_value,
+                                            user_memory_name: user_memory_value,
+                                            task_progress_name: game_state_counter,
+                                            game_attempt_name: attempt_counter,
+                                            robot_assistance_name: robot_assistance_action,
+                                            robot_feedback_name: robot_feedback_action
+                                            }
+                user_actions_prob = bn_functions.infer_prob_from_state(user_bn_model,
+                                                                       infer_variable=user_var_target,
+                                                                       evidence_variables=user_vars_evidence)
+            user_action = bn_functions.get_stochastic_action(user_actions_prob.values)
            #updates counters for plots
            robot_assistance_per_feedback[robot_feedback_action][robot_assistance_action] += 1
            attempt_counter_per_action[user_action][attempt_counter] += 1
@@ -319,11 +213,11 @@ def simulation(robot_assistance_vect, robot_feedback_vect, user_bn_model, person
            #updates counters for simulation
            iter_counter += 1
-            task_evolution, attempt_counter, \
+            task_evolution, attempt_counter, correct_move_counter, \
-            correct_move_counter, wrong_move_counter, timeout_counter = compute_next_state(user_action,
+            wrong_move_counter, timeout_counter, max_attempt_counter = compute_next_state(user_action,
                                                                        task_evolution, attempt_counter,
                                                                        correct_move_counter, wrong_move_counter,
-                                                                        timeout_counter)
+                                                                        timeout_counter, max_attempt_counter)
        print("task_evolution {}, attempt_counter {}, timeout_counter {}".format(task_evolution, iter_counter, timeout_counter))
        print("robot_assistance_per_feedback {}".format(robot_assistance_per_feedback))
@@ -333,45 +227,84 @@ def simulation(robot_assistance_vect, robot_feedback_vect, user_bn_model, person
        print("iter {}, correct {}, wrong {}, timeout {}".format(iter_counter, correct_move_counter, wrong_move_counter, timeout_counter))
        print("correct_move {}, wrong_move {}, timeout {}".format(correct_move_counter, wrong_move_counter, timeout_counter))
-        persona_bn_model = update_cpds_tables(game_state_counter_per_action, attempt_counter_per_action,
-                                              robot_assistance_per_feedback, robot_feedback_per_action, persona_bn_model)
+        user_bn_model = bn_functions.update_cpds_tables(user_bn_model, dynamic_variables)
+        #reset counter??
+        robot_assistance_per_feedback = [[0 for i in range(Robot_Assistance.counter.value)] for j in
+                                         range(Robot_Feedback.counter.value)]
+        attempt_counter_per_action = [[0 for i in range(Attempt.counter.value)] for j in
+                                      range(User_Action.counter.value)]
+        game_state_counter_per_action = [[0 for i in range(Game_State.counter.value)] for j in
+                                         range(User_Action.counter.value)]
+        robot_feedback_per_action = [[0 for i in range(Robot_Feedback.counter.value)] for j in
+                                     range(User_Action.counter.value)]
+        #for plots
        n_correct_per_episode[e] = correct_move_counter
        n_wrong_per_episode[e] = wrong_move_counter
        n_timeout_per_episode[e] = timeout_counter
+        n_max_attempt_per_episode[e] = max_attempt_counter
+        game_performance_episode[e] = [n_correct_per_episode[e],
+                                       n_wrong_per_episode[e],
+                                       n_timeout_per_episode[e],
+                                       n_max_attempt_per_episode[e]]
+        robot_assistance_per_episode[e] = [n_lev_0_no_feed_per_episode[e],
+        n_lev_1_no_feed_per_episode[e], n_lev_2_no_feed_per_episode[e],
+        n_lev_3_no_feed_per_episode[e], n_lev_4_no_feed_per_episode[e],
+        n_lev_0_with_feed_per_episode[e], n_lev_1_with_feed_per_episode[e],
+        n_lev_2_with_feed_per_episode[e], n_lev_3_with_feed_per_episode[e],
+                                           n_lev_4_with_feed_per_episode[e]
+        ]
+    return game_performance_episode, robot_assistance_per_episode
-    return n_correct_per_episode, n_wrong_per_episode, n_timeout_per_episode
+#############################################################################
+#############################################################################
+####################### RUN THE SIMULATION ##################################
+#############################################################################
+#############################################################################
 #SIMULATION PARAMS
 robot_assistance = [i for i in range(Robot_Assistance.counter.value)]
 robot_feedback = [i for i in range(Robot_Feedback.counter.value)]
 epochs = 40
+#initialise the robot
+robot_cpds = bnlearn.import_DAG('bn_robot_model/robot_model.bif')
 #initialise memory, attention and reactivity varibles
 persona_memory = 0; persona_attention = 0; persona_reactivity = 1;
-persona_cpds = bnlearn.import_DAG('persona_model.bif')
+persona_cpds = bnlearn.import_DAG('bn_persona_model/persona_model.bif')
-persona_user_model = {'cpds':persona_cpds, 'memory':persona_memory, 'attention':persona_attention, 'reactivity':persona_reactivity}
 #initialise memory, attention and reactivity varibles
 real_user_memory = 2; real_user_attention = 2; real_user_reactivity = 2;
-real_user_cpds = bnlearn.import_DAG('user_model.bif')
+real_user_cpds = None#bnlearn.import_DAG('bn_other_user_model/user_model.bif')
-real_user_model = {'cpds':real_user_cpds, 'memory':real_user_memory, 'attention':real_user_attention, 'reactivity':real_user_reactivity}
+game_performance_per_episode, robot_assistance_per_episode = simulation(user_bn_model=persona_cpds, user_var_target=['user_action'], user_memory_name="memory", user_memory_value=persona_memory,
-print("user_action -> attempt ", persona_user_model['cpds']['model'].cpds[0].values)
+                 user_attention_name="attention", user_attention_value=persona_attention,
-print("user_action -> game_state ",persona_user_model['cpds']['model'].cpds[2].values)
+            user_reactivity_name="reactivity", user_reactivity_value=persona_reactivity,
-print("robot_feedback -> robot_assistance ", persona_user_model['cpds']['model'].cpds[5].values)
+           task_progress_name="game_state", game_attempt_name="attempt",
-print("user_action -> reactivity, memory ", persona_user_model['cpds']['model'].cpds[6].values)
+                 robot_assistance_name="robot_assistance", robot_feedback_name="robot_feedback",
+           robot_bn_model=robot_cpds, robot_var_target=["robot_assistance_feedback"],
-results = simulation(robot_assistance, robot_feedback, real_user_model, persona_user_model, epochs=epochs, task_complexity=5)
+           other_user_bn_model=real_user_cpds, other_user_var_target=['user_action'],
-if real_user_model != None:
+                 other_user_memory_name="memory", other_user_memory_value=real_user_memory,
-    plot_path = "epoch_"+str(epochs)+"_real_user_memory_"+str(real_user_memory)+"_real_user_attention_"+str(real_user_attention)+"_real_user_reactivity_"+str(real_user_reactivity)+".jpg"
+           other_user_attention_name="attention", other_user_attention_value=real_user_attention,
+           other_user_reactivity_name="reactivity", other_user_reactivity_value=real_user_reactivity,
+           epochs=epochs, task_complexity=5)
+if real_user_cpds != None:
+    plot_game_performance_path = "game_performance_"+"_epoch_"+str(epochs)+"_real_user_memory_"+str(real_user_memory)+"_real_user_attention_"+str(real_user_attention)+"_real_user_reactivity_"+str(real_user_reactivity)+".jpg"
+    plot_robot_assistance_path = "robot_assistance_"+"epoch_"+str(epochs)+"_real_user_memory_"+str(real_user_memory)+"_real_user_attention_"+str(real_user_attention)+"_real_user_reactivity_"+str(real_user_reactivity)+".jpg"
 else:
-    plot_path = "epoch_" + str(epochs) + "_persona_memory_" + str(persona_memory) + "_persona_attention_" + str(persona_attention) + "_persona_reactivity_" + str(persona_reactivity) + ".jpg"
+    plot_game_performance_path = "game_performance_"+"epoch_" + str(epochs) + "_persona_memory_" + str(persona_memory) + "_persona_attention_" + str(persona_attention) + "_persona_reactivity_" + str(persona_reactivity) + ".jpg"
+    plot_robot_assistance_path = "robot_assistance_"+"epoch_"+str(epochs)+"_persona_memory_"+str(persona_memory)+"_persona_attention_"+str(persona_attention)+"_persona_reactivity_"+str(persona_reactivity)+".jpg"
-plot2D(plot_path, epochs, results)
+utils.plot2D_game_performance(plot_game_performance_path, epochs, game_performance_per_episode)
+utils.plot2D_assistance("test.jpg", epochs, robot_assistance_per_episode)
 #TODO
 '''
 - define a function that takes the state as input and return the user action and its reaction time
+- plot robot's levels of assistance during the session
 - evalute if the persona is wrong how long does it take for the simulator to detect that
 - check percentages
 '''
\ No newline at end of file