version with simulation and plots

f11e8a4e · Antonio Andriella · 611495d0 · f11e8a4e
Commit f11e8a4e authored 5 years ago by Antonio Andriella
--- a/main.py
+++ b/main.py
@@ -2,6 +2,7 @@ import bnlearn
 import numpy as np
 import enum
 import random
+import matplotlib.pyplot as plt

 #define constants
 class User_Action(enum.Enum):
@@ -55,12 +56,46 @@ print("user_action -> game_state ", model['model'].cpds[2].values)
 print("robot_feedback -> robot_assistance ", model['model'].cpds[5].values)
 print("user_action -> reactivity, memory ", model['model'].cpds[6].values)

+
+def plot2D(save_path, n_episodes, *y):
+    # The position of the bars on the x-axis
+    barWidth = 0.35
+    r = np.arange(n_episodes)  # the x locations for the groups
+
+    # Get values from the group and categories
+    x = [i for i in range(n_episodes)]
+    correct = y[0][0]
+    wrong = y[0][1]
+    timeout = y[0][2]
+    # add colors
+    #colors = ['#FF9999', '#00BFFF', '#C1FFC1', '#CAE1FF', '#FFDEAD']
+
+    # plot bars
+    plt.figure(figsize=(10, 7))
+    plt.bar(r, correct,  edgecolor='white', width=barWidth, label="correct")
+    plt.bar(r, wrong, bottom=np.array(correct), edgecolor='white', width=barWidth, label='wrong')
+    plt.bar(r, timeout, bottom=np.array(correct) + np.array(wrong), edgecolor='white',
+            width=barWidth, label='timeout')
+    plt.legend()
+    # Custom X axis
+    plt.xticks(r, x, fontweight='bold')
+    plt.ylabel("performance")
+    plt.savefig(save_path)
+    plt.show()
+
 def compute_prob(cpds_table):
+    '''
+    Given the counters generate the probability distributions
+    Args:
+        cpds_table: with counters
+    Return:
+         the probs for the cpds table
+    '''
    for val in range(len(cpds_table)):
            cpds_table[val] = list(map(lambda x: x / (sum(cpds_table[val])+0.00001), cpds_table[val]))
    return cpds_table

-def avg_prob(ref_cpds_table, current_cpds_table):
+def average_prob(ref_cpds_table, current_cpds_table):
    '''
    Args:
        ref_cpds_table: table from bnlearn
@@ -74,129 +109,157 @@ def avg_prob(ref_cpds_table, current_cpds_table):
            res_cpds_table[elem1][elem2] = (ref_cpds_table[elem1][elem2]+current_cpds_table[elem1][elem2])/2
    return res_cpds_table

+def generate_user_action(actions_prob):
+    '''
+    Select one of the actions according to the actions_prob
+    Args:
+        actions_prob: the result of the query to the BN
+    Return:
+        the id of the selected action
+    '''
+    action_id = 0
+    correct_action = actions_prob[0]
+    wrong_action = actions_prob[1]
+    timeout = actions_prob[2]
+    rnd_val = random.random()
+    if rnd_val<=correct_action:
+        action_id = 0
+    elif rnd_val>correct_action \
+        and rnd_val<correct_action+wrong_action:
+        action_id = 1
+    else:
+        action_id = 2
+    return action_id

-def simulation(robot_assistance_vect, robot_feedback_vect):
+
+def simulation(robot_assistance_vect, robot_feedback_vect, memory, attention, reactivity, epochs=50, non_stochastic=False):
    #metrics we need in order to compute the afterwords the belief
    '''
    CPD 0: for each attempt 1 to 4 store the number of correct, wrong and timeout
    '''
-    attempt_counter_per_action = [[0 for j in range(User_Action.counter.value)] for i in range(Attempt.counter.value)]
+    attempt_counter_per_action = [[0 for i in range(Attempt.counter.value)]  for j in range(User_Action.counter.value)]
    '''
    CPD 2: for each game_state 0 to 2 store the number of correct, wrong and timeout
    '''
-    game_state_counter_per_action = [[0 for j in range(User_Action.counter.value)] for i in range(Game_State.counter.value)]
+    game_state_counter_per_action = [[0 for i in range(Game_State.counter.value)]  for j in range(User_Action.counter.value)]
    '''
    CPD 5: for each robot feedback store the number of correct, wrong and timeout
    '''
-    robot_feedback_per_action = [[0 for j in range(User_Action.counter.value)] for i in range(Robot_Feedback.counter.value)]
+    robot_feedback_per_action = [[0 for i in range(Robot_Feedback.counter.value)] for j in range(User_Action.counter.value)]
    '''
    CPD 6: for each robot assistance store the number of pos and neg feedback
    '''
-    robot_assistance_per_feedback = [[0 for j in range(Robot_Feedback.counter.value)] for i in range(Robot_Assistance.counter.value)]
-
-    task_complexity = 5
-    task_evolution = 0
-    attempt_counter = 0
-    game_state_counter = 0
-
-    iter_counter = 0
-    correct_move_counter = 0
-    wrong_move_counter = 0
-    timeout_counter = 0
-
-    '''Simulation framework'''
-    while(task_evolution<=task_complexity):
-        if task_evolution>=0 and task_evolution<=2:
-            game_state_counter = 0
-        elif task_evolution>=3 and task_evolution<=4:
-            game_state_counter = 1
-        else:
-            game_state_counter = 2
-        #select robot assistance
-        robot_assistance_action = random.randint(min(robot_assistance_vect), max(robot_assistance_vect))
-        #select robot feedback
-        robot_feedback_action = random.randint(min(robot_feedback_vect), max(robot_feedback_vect))
-
-        print("robot_assistance {}, attempt {}, game {}, robot_feedback {}".format(robot_assistance_action, attempt_counter, game_state_counter, robot_feedback_action))
-        query = bnlearn.inference.fit(model, variables=['user_action'], evidence={'robot_assistance': robot_assistance_action,
-                                                                                  'attempt': attempt_counter,
-                                                                                  'game_state': game_state_counter,
-                                                                                  'robot_feedback': robot_feedback_action,
-                                                                                  'memory': 0,
-                                                                                  'attention': 0,
-                                                                                  'reactivity': 0
-                                                                                  })
-        user_move_action = np.argmax(query.values, axis=0)
-
-        robot_assistance_per_feedback[robot_assistance_action][robot_feedback_action] += 1
-        attempt_counter_per_action[attempt_counter][user_move_action] += 1
-        game_state_counter_per_action[game_state_counter][user_move_action] += 1
-        robot_feedback_per_action[robot_feedback_action][user_move_action] += 1
-
-        iter_counter += 1
-        if user_move_action == 0:
-            attempt_counter += 0
-            task_evolution += 1
-            correct_move_counter += 1
-        elif user_move_action == 1 and attempt_counter<3:
-            attempt_counter += 1
-            wrong_move_counter += 1
-        elif user_move_action == 2 and attempt_counter<3:
-            attempt_counter += 1
-            wrong_move_counter += 1
-        else:
-            attempt_counter += 0
-            task_evolution += 1
-            timeout_counter += 1
-
-        print("correct {}, wrong {}, timeout {}".format(query.values[0],
-                                                    query.values[1],
-                                                    query.values[2]))
-
-
-    print("robot_assistance_per_feedback {}".format(robot_assistance_per_feedback))
-    print("attempt_counter_per_action {}".format(attempt_counter_per_action))
-    print("game_state_counter_per_action {}".format(game_state_counter_per_action))
-    print("robot_feedback_per_action {}".format(robot_feedback_per_action))
-    print("iter {}, correct {}, wrong {}, timeout {}".format(iter_counter, correct_move_counter, wrong_move_counter, timeout_counter))
-
-    return attempt_counter_per_action, game_state_counter_per_action, robot_assistance_per_feedback, robot_feedback_per_action
-
-
-robot_assistance_vect = [0, 1, 2, 3, 4]
-robot_feedback_vect = [0, 1]
-attempt_counter_per_action, game_state_counter_per_action, \
-robot_assistance_per_feedback, robot_feedback_per_action = simulation(robot_assistance_vect, robot_feedback_vect)
-
-print("************BEFORE*************")
-print(model['model'].cpds[0].values)
-print(model['model'].cpds[2].values)
-print(model['model'].cpds[5].values)
-print(model['model'].cpds[6].values)
-
-prob_over_attempt_per_action = compute_prob(attempt_counter_per_action)
-prob_over_game_per_action  = compute_prob(game_state_counter_per_action)
-prob_over_feedback_per_action = compute_prob(robot_feedback_per_action)
-prob_over_assistance_per_feedback = compute_prob(robot_assistance_per_feedback)
-
-print("************DURING*************")
-print(prob_over_attempt_per_action)
-print(prob_over_game_per_action)
-print(prob_over_feedback_per_action)
-print(prob_over_assistance_per_feedback)
-
-res_prob_over_attempt_per_action = avg_prob(model['model'].cpds[0].values,
-                                            prob_over_attempt_per_action)
-res_prob_over_game_per_action = avg_prob(model['model'].cpds[2].values,
-                                         prob_over_game_per_action)
-res_prob_over_feedback_per_action = avg_prob(model['model'].cpds[6].values,
-                                         prob_over_feedback_per_action)
-res_prob_over_assistance_per_feedback = avg_prob(model['model'].cpds[5].values,
-                                                 prob_over_assistance_per_feedback)
-
-
-print("************AFTER*************")
-print(res_prob_over_attempt_per_action)
-print(res_prob_over_game_per_action)
-print(res_prob_over_feedback_per_action)
-print(res_prob_over_assistance_per_feedback)
+    robot_assistance_per_feedback = [[0 for i in range(Robot_Assistance.counter.value)] for j in range(Robot_Feedback.counter.value)]
+
+
+    #output variables:
+    n_correct_per_episode = [0]*epochs
+    n_wrong_per_episode = [0]*epochs
+    n_timeout_per_episode = [0]*epochs
+
+
+    for e in range(epochs):
+        '''Simulation framework'''
+        task_complexity = 5
+        task_evolution = 0
+
+        attempt_counter = 0
+        game_state_counter = 0
+        iter_counter = 0
+        correct_move_counter = 0
+        wrong_move_counter = 0
+        timeout_counter = 0
+        while(task_evolution<=task_complexity):
+            if task_evolution>=0 and task_evolution<=2:
+                game_state_counter = 0
+            elif task_evolution>=3 and task_evolution<=4:
+                game_state_counter = 1
+            else:
+                game_state_counter = 2
+            #select robot assistance
+            robot_assistance_action = random.randint(min(robot_assistance_vect), max(robot_assistance_vect))
+            #select robot feedback
+            robot_feedback_action = random.randint(min(robot_feedback_vect), max(robot_feedback_vect))
+
+            print("robot_assistance {}, attempt {}, game {}, robot_feedback {}".format(robot_assistance_action, attempt_counter, game_state_counter, robot_feedback_action))
+            query = bnlearn.inference.fit(model, variables=['user_action'], evidence={'robot_assistance': robot_assistance_action,
+                                                                                      'attempt': attempt_counter,
+                                                                                      'game_state': game_state_counter,
+                                                                                      'robot_feedback': robot_feedback_action,
+                                                                                      'memory': memory,
+                                                                                      'attention': attention,
+                                                                                      'reactivity': reactivity
+                                                                                      })
+            #generate a random number and trigger one of the three possible action
+            user_action = generate_user_action(query.values)#np.argmax(query.values, axis=0)
+
+            robot_assistance_per_feedback[robot_feedback_action][robot_assistance_action] += 1
+            attempt_counter_per_action[user_action][attempt_counter] += 1
+            game_state_counter_per_action[user_action][game_state_counter] += 1
+            robot_feedback_per_action[user_action][robot_feedback_action] += 1
+
+            iter_counter += 1
+            if user_action == 0:
+                attempt_counter = 0
+                task_evolution += 1
+                correct_move_counter += 1
+            elif user_action == 1 and attempt_counter<3:
+                attempt_counter += 1
+                wrong_move_counter += 1
+            elif user_action == 2 and attempt_counter<3:
+                attempt_counter += 1
+                wrong_move_counter += 1
+            else:
+                attempt_counter = 0
+                task_evolution += 1
+                timeout_counter += 1
+
+        print("task_evolution {}, attempt_counter {}, timeout_counter {}".format(task_evolution, iter_counter, timeout_counter))
+
+        print("robot_assistance_per_feedback {}".format(robot_assistance_per_feedback))
+        print("attempt_counter_per_action {}".format(attempt_counter_per_action))
+        print("game_state_counter_per_action {}".format(game_state_counter_per_action))
+        print("robot_feedback_per_action {}".format(robot_feedback_per_action))
+        print("iter {}, correct {}, wrong {}, timeout {}".format(iter_counter, correct_move_counter, wrong_move_counter, timeout_counter))
+
+        print("correct_move {}, wrong_move {}, timeout {}".format(correct_move_counter, wrong_move_counter, timeout_counter))
+        #transform counters into probabilities
+        prob_over_attempt_per_action = compute_prob(attempt_counter_per_action)
+        prob_over_game_per_action = compute_prob(game_state_counter_per_action)
+        prob_over_feedback_per_action = compute_prob(robot_feedback_per_action)
+        prob_over_assistance_per_feedback = compute_prob(robot_assistance_per_feedback)
+        #average the probabilities obtained with the cpdf tables
+        updated_prob_over_attempt_per_action = average_prob(np.transpose(model['model'].cpds[0].values),
+                                                        prob_over_attempt_per_action)
+        updated_prob_over_game_per_action = average_prob(np.transpose(model['model'].cpds[2].values),
+                                                     prob_over_game_per_action)
+        updated_prob_over_feedback_per_action = average_prob(np.transpose(model['model'].cpds[6].values),
+                                                         prob_over_feedback_per_action)
+        updated_prob_over_assistance_per_feedback = average_prob(np.transpose(model['model'].cpds[5].values),
+                                                             prob_over_assistance_per_feedback)
+
+        model['model'].cpds[0].values = np.transpose(updated_prob_over_attempt_per_action)
+        model['model'].cpds[2].values = np.transpose(updated_prob_over_game_per_action)
+        model['model'].cpds[6].values = np.transpose(updated_prob_over_feedback_per_action)
+        model['model'].cpds[5].values = np.transpose(updated_prob_over_assistance_per_feedback)
+
+        n_correct_per_episode[e] = correct_move_counter
+        n_wrong_per_episode[e] = wrong_move_counter
+        n_timeout_per_episode[e] = timeout_counter
+
+    return n_correct_per_episode, n_wrong_per_episode, n_timeout_per_episode
+
+robot_assistance = [i for i in range(Robot_Assistance.counter.value)]
+robot_feedback = [i for i in range(Robot_Feedback.counter.value)]
+epochs = 10
+memory = 0; attention = 0; reactivity = 1;
+results = simulation(robot_assistance, robot_feedback, memory, attention, reactivity, 10)
+plot_path = "epoch_"+str(epochs)+"_memory_"+str(memory)+"_attention_"+str(attention)+"_reactivity_"+str(reactivity)+".jpg"
+plot2D(plot_path, epochs, results)
+
+#TODO
+'''
+- define a function that takes the state as input and return the user action and its reaction time
+- evalute if the persona is wrong how long does it take for the simulator to detect that
+- check percentages
+'''
\ No newline at end of file