From 074c5dd4f3b2351468ff51aedf733369abc141b2 Mon Sep 17 00:00:00 2001
From: Antonio Andriella <aandriella@iri.upc.edu>
Date: Wed, 4 Aug 2021 14:38:56 +0200
Subject: [PATCH] commit last working version

---
 bn_models/agent_model_id_1_True.bif  | 40 ---------------------
 bn_models/agent_model_template.bif   | 26 --------------
 bn_models/persona_model_template.bif | 33 ------------------
 bn_models/user_model_id_1_True.bif   | 51 ---------------------------
 simulation.py                        | 42 ++++++++++++++++------
 test.py                              |  4 +--
 utils.py                             | 52 +++++++++++++++-------------
 7 files changed, 61 insertions(+), 187 deletions(-)
 delete mode 100644 bn_models/agent_model_id_1_True.bif
 delete mode 100644 bn_models/agent_model_template.bif
 delete mode 100644 bn_models/persona_model_template.bif
 delete mode 100644 bn_models/user_model_id_1_True.bif

diff --git a/bn_models/agent_model_id_1_True.bif b/bn_models/agent_model_id_1_True.bif
deleted file mode 100644
index ebc8cee..0000000
--- a/bn_models/agent_model_id_1_True.bif
+++ /dev/null
@@ -1,40 +0,0 @@
-network agent_assistive_model {
-}
-
-%VARIABLES DEFINITION
-
-variable agent_assistance {
-  type discrete [ 6 ] { lev_0, lev_1, lev_2, lev_3, lev_4, lev_5};
-}
-variable attempt {
-  type discrete [ 4 ] { att_1, att_2, att_3, att_4 };
-}
-variable game_state {
-  type discrete [ 3 ] { beg, mid, end };
-}
-
-
-%INDIVIDUAL PROBABILITIES DEFINITION
-probability ( agent_assistance ) {
-  table 0.17, 0.17, 0.17, 0.17, 0.16, 0.16;
-}
-probability ( game_state ) {
-  table 0.34, 0.33, 0.33;
-}
-probability ( attempt ) {
-  table 0.25, 0.25, 0.25, 0.25;
-}
-probability (agent_assistance | game_state, attempt) { 
-(beg, att_1)	0.2857142857142857, 0.35714285714285715, 0.14285714285714285, 0.07142857142857142, 0.07142857142857142, 0.07142857142857142; 
-(beg, att_2)	0.07142857142857142, 0.21428571428571427, 0.35714285714285715, 0.21428571428571427, 0.07142857142857142, 0.07142857142857142; 
-(beg, att_3)	0.06666666666666667, 0.13333333333333333, 0.13333333333333333, 0.3333333333333333, 0.26666666666666666, 0.06666666666666667; 
-(beg, att_4)	0.058823529411764705, 0.058823529411764705, 0.11764705882352941, 0.29411764705882354, 0.23529411764705882, 0.23529411764705882; 
-(mid, att_1)	0.2857142857142857, 0.35714285714285715, 0.14285714285714285, 0.07142857142857142, 0.07142857142857142, 0.07142857142857142; 
-(mid, att_2)	0.07142857142857142, 0.21428571428571427, 0.35714285714285715, 0.21428571428571427, 0.07142857142857142, 0.07142857142857142; 
-(mid, att_3)	0.06666666666666667, 0.13333333333333333, 0.13333333333333333, 0.3333333333333333, 0.26666666666666666, 0.06666666666666667; 
-(mid, att_4)	0.058823529411764705, 0.058823529411764705, 0.11764705882352941, 0.29411764705882354, 0.23529411764705882, 0.23529411764705882; 
-(end, att_1)	0.2857142857142857, 0.35714285714285715, 0.14285714285714285, 0.07142857142857142, 0.07142857142857142, 0.07142857142857142; 
-(end, att_2)	0.07142857142857142, 0.21428571428571427, 0.35714285714285715, 0.21428571428571427, 0.07142857142857142, 0.07142857142857142; 
-(end, att_3)	0.06666666666666667, 0.13333333333333333, 0.13333333333333333, 0.3333333333333333, 0.26666666666666666, 0.06666666666666667; 
-(end, att_4)	0.058823529411764705, 0.058823529411764705, 0.11764705882352941, 0.29411764705882354, 0.23529411764705882, 0.23529411764705882; 
-}
\ No newline at end of file
diff --git a/bn_models/agent_model_template.bif b/bn_models/agent_model_template.bif
deleted file mode 100644
index 1ca11f0..0000000
--- a/bn_models/agent_model_template.bif
+++ /dev/null
@@ -1,26 +0,0 @@
-network agent_assistive_model {
-}
-
-%VARIABLES DEFINITION
-
-variable agent_assistance {
-  type discrete [ 6 ] { lev_0, lev_1, lev_2, lev_3, lev_4, lev_5};
-}
-variable attempt {
-  type discrete [ 4 ] { att_1, att_2, att_3, att_4 };
-}
-variable game_state {
-  type discrete [ 3 ] { beg, mid, end };
-}
-
-
-%INDIVIDUAL PROBABILITIES DEFINITION
-probability ( agent_assistance ) {
-  table 0.17, 0.17, 0.17, 0.17, 0.16, 0.16;
-}
-probability ( game_state ) {
-  table 0.34, 0.33, 0.33;
-}
-probability ( attempt ) {
-  table 0.25, 0.25, 0.25, 0.25;
-}
diff --git a/bn_models/persona_model_template.bif b/bn_models/persona_model_template.bif
deleted file mode 100644
index d8227ea..0000000
--- a/bn_models/persona_model_template.bif
+++ /dev/null
@@ -1,33 +0,0 @@
-network persona_model {
-}
-
-%VARIABLES DEFINITION
-
-variable agent_assistance {
-  type discrete [ 6 ] { lev_0, lev_1, lev_2, lev_3, lev_4, lev_5 };
-}
-variable attempt {
-  type discrete [ 4 ] { att_1, att_2, att_3, att_4 };
-}
-variable game_state {
-  type discrete [ 3 ] { beg, mid, end };
-}
-
-variable user_action {
-  type discrete [ 3 ] { correct, wrong, timeout };
-}
-
-%INDIVIDUAL PROBABILITIES DEFINITION
-
-probability ( agent_assistance ) {
-  table 0.17, 0.16, 0.16, 0.17, 0.17, 0.17;
-}
-probability ( game_state) {
-  table 0.34, 0.33, 0.33;
-}
-probability ( attempt ) {
-  table 0.25, 0.25, 0.25, 0.25;
-}
-probability ( user_action ) {
-  table 0.33, 0.33, 0.34;
-}
diff --git a/bn_models/user_model_id_1_True.bif b/bn_models/user_model_id_1_True.bif
deleted file mode 100644
index 3f1ae72..0000000
--- a/bn_models/user_model_id_1_True.bif
+++ /dev/null
@@ -1,51 +0,0 @@
-network persona_model {
-}
-
-%VARIABLES DEFINITION
-
-variable agent_assistance {
-  type discrete [ 6 ] { lev_0, lev_1, lev_2, lev_3, lev_4, lev_5 };
-}
-variable attempt {
-  type discrete [ 4 ] { att_1, att_2, att_3, att_4 };
-}
-variable game_state {
-  type discrete [ 3 ] { beg, mid, end };
-}
-
-variable user_action {
-  type discrete [ 3 ] { correct, wrong, timeout };
-}
-
-%INDIVIDUAL PROBABILITIES DEFINITION
-
-probability ( agent_assistance ) {
-  table 0.17, 0.16, 0.16, 0.17, 0.17, 0.17;
-}
-probability ( game_state) {
-  table 0.34, 0.33, 0.33;
-}
-probability ( attempt ) {
-  table 0.25, 0.25, 0.25, 0.25;
-}
-probability ( user_action ) {
-  table 0.33, 0.33, 0.34;
-}
-probability (game_state | user_action)  { 
-(correct)	0.2222222222222222,0.3333333333333333,0.4444444444444444; 
-(wrong)	0.5,0.3333333333333333,0.16666666666666666; 
-(timeout)	0.5,0.3333333333333333,0.16666666666666666; 
-}
-probability (attempt | user_action)  { 
-(correct)	0.15384615384615385,0.23076923076923078,0.3076923076923077,0.3076923076923077; 
-(wrong)	0.42857142857142855,0.2857142857142857,0.14285714285714285,0.14285714285714285; 
-(timeout)	0.42857142857142855,0.2857142857142857,0.14285714285714285,0.14285714285714285; 
-}
-probability (user_action | agent_assistance) { 
-(lev_0)	0.4,0.3,0.3; 
-(lev_1)	0.6,0.2,0.2; 
-(lev_2)	0.6,0.2,0.2; 
-(lev_3)	0.8,0.1,0.1; 
-(lev_4)	1.0,0.0,0.0; 
-(lev_5)	1.0,0.0,0.0; 
-}
\ No newline at end of file
diff --git a/simulation.py b/simulation.py
index 15bd225..ab3029f 100644
--- a/simulation.py
+++ b/simulation.py
@@ -163,20 +163,37 @@ def compute_next_state(user_action, task_progress_counter, attempt_counter, corr
     return next_state, task_progress_counter, game_state_counter, attempt_counter, correct_move_counter, wrong_move_counter, timeout_counter, max_attempt_counter
 
 
-def select_agent_action(agent_action, epsilon):
+def select_agent_action(agent_action, agent_objective, epsilon):
     '''
     Args:
         agent_action: list of possible actions with their probabilities
+        epsilon: probability at which the agent selects a non optimal action
+        objective: it can be, None, challenge, and help
     Return:
         one of the agent's actions
     '''
 
-    if random.random()>epsilon:
-        return np.argmax(agent_action)
+    if agent_objective == "help":
+        if random.random() < epsilon:
+            return np.argmax(agent_action)
+        else:
+            agent_best_action = np.argmax(agent_action)
+            agent_help_action = min(5, agent_best_action+1)
+        return agent_help_action
+    elif agent_objective == "challenge":
+        if random.random() < epsilon:
+            return np.argmax(agent_action)
+        else:
+            agent_best_action = np.argmax(agent_action)
+            agent_challenge_action = max(0, agent_best_action-1)
+            return agent_challenge_action
     else:
-        agent_action_rm_best = agent_action[:]
-        agent_action_rm_best[np.argmax(agent_action)] = 0
-        return np.argmax(agent_action_rm_best)
+        if random.random() > epsilon:
+            return np.argmax(agent_action)
+        else:
+            agent_action_rm_best = agent_action[:]
+            agent_action_rm_best[np.argmax(agent_action)] = 0
+            return np.argmax(agent_action_rm_best)
 
 def simulation(bn_model_user_action,
                bn_model_agent_behaviour,
@@ -185,8 +202,10 @@ def simulation(bn_model_user_action,
                game_state_bn_name, attempt_bn_name,
                agent_assistance_bn_name,
                agent_policy,
-               state_space, action_space,
-               epoch=50,  run = 50, task_complexity=5, max_attempt_per_object=4, alpha_learning=0):
+               agent_objective,
+               epsilon,
+               state_space,
+               epoch=50,  run = 50, task_complexity=5, max_attempt_per_object=4):
     '''
     Args:
 
@@ -229,7 +248,7 @@ def simulation(bn_model_user_action,
         n_timeout_per_episode_run = [0] * run
         n_max_attempt_per_episode_run = [0] * run
         game_performance_episode_run = [0] * run
-        n_assistance_lev_per_episode_run = [[0 for i in range(Agent_Assistance.counter.value)] for j in range(run)]
+        n_assistance_lev_per_episode_run = [[0 for i in range(Agent_Assistance.counter.value+1)] for j in range(run)]
 
         for r in range(run):
 
@@ -271,9 +290,9 @@ def simulation(bn_model_user_action,
                                                                                 evidence_variables=vars_agent_evidence)
 
                     #selected_agent_behaviour_action = bn_functions.get_stochastic_action(query_agent_behaviour_prob.values)
-                    selected_agent_behaviour_action = select_agent_action(query_agent_behaviour_prob.values, epsilon=0.2)
+                    selected_agent_behaviour_action = select_agent_action(query_agent_behaviour_prob.values, agent_objective=agent_objective, epsilon=epsilon)
                 else:
-                    selected_agent_behaviour_action = select_agent_action(agent_policy[current_state_index], epsilon=0.2)
+                    selected_agent_behaviour_action = select_agent_action(agent_policy[current_state_index], agent_objective=agent_objective, epsilon=epsilon)
                     #selected_agent_behaviour_action = bn_functions.get_stochastic_action(agent_policy[current_state_index])
                     #selected_agent_behaviour_action =np.argmax(agent_policy[current_state_index])
 
@@ -321,6 +340,7 @@ def simulation(bn_model_user_action,
             print("game_state_counter {}, iter_counter {}, correct_counter {}, wrong_counter {}, "
                   "timeout_counter {}, max_attempt {}".format(game_state_counter, iter_counter, correct_move_counter,
                                                               wrong_move_counter, timeout_counter, max_attempt_counter))
+            print("Assisttance level for  episode:", n_assistance_lev_per_episode_run)
 
             #save episode
             episodes.append(Episode(episode))
diff --git a/test.py b/test.py
index b5fd6ab..45ffe12 100644
--- a/test.py
+++ b/test.py
@@ -82,7 +82,7 @@ print( " Q shared ", q_shared.values)
 # })
 # print("BEFORE")
 # print(q1.values)
-# df = bn.sampling(DAG_update, n=1000)
+# df = bn.sampling(DAG_update, n=1)
 # DAG_update = bn.parameter_learning.fit(DAG_update, df)
 # q1 = bn.inference.fit(DAG_update, variables=['user_action'], evidence={
 #                                                                 'game_state': 0,
@@ -92,7 +92,7 @@ print( " Q shared ", q_shared.values)
 # print("AFTER")
 # print(q1.values)
 
-#df = bn.sampling(DAG, n=1000, verbose=2)
+#df = bn.sampling(DAG, n=1, verbose=2)
 #model = bn.structure_learning.fit(df)
 #G = bn.plot(model)
 #DAGnew = bn.parameter_learning.fit(model, df, methodtype="bayes")
diff --git a/utils.py b/utils.py
index 41d788b..9366e0f 100644
--- a/utils.py
+++ b/utils.py
@@ -5,13 +5,13 @@ import pickle
 def plot2D_game_performance(save_path, n_episodes, scaling_factor=1, *y):
     # The position of the bars on the x-axis
     barWidth = 0.35
-    r = np.arange(n_episodes)[1::scaling_factor]  # the x locations for the groups
+    r = np.arange(n_episodes)  # the x locations for the groups
     # Get values from the group and categories
-    x = [i for i in range(n_episodes)][1::scaling_factor]
-    correct = list(map(lambda x:x[0], y[0]))[1::scaling_factor]
-    wrong = list(map(lambda x:x[1], y[0]))[1::scaling_factor]
-    timeout = list(map(lambda x:x[2], y[0]))[1::scaling_factor]
-    max_attempt = list(map(lambda x:x[3], y[0]))[1::scaling_factor]
+    x = [i for i in range(1, n_episodes+1)]
+    correct = list(map(lambda x:x[0], y[0]))
+    wrong = list(map(lambda x:x[1], y[0]))
+    timeout = list(map(lambda x:x[2], y[0]))
+    max_attempt = list(map(lambda x:x[3], y[0]))
 
     # plot bars
     plt.figure(figsize=(10, 7))
@@ -22,28 +22,29 @@ def plot2D_game_performance(save_path, n_episodes, scaling_factor=1, *y):
     plt.bar(r, max_attempt, bottom=np.array(correct) + np.array(wrong) + np.array(timeout), edgecolor='white',
             width=barWidth, label='max_attempt')
 
-    plt.legend()
+    plt.legend(loc="upper right")
     # Custom X axis
     plt.xticks(r, x, fontweight='bold')
-    plt.ylabel("performance")
+    plt.ylabel("sim patient performance")
+    plt.xlabel("epoch")
     plt.savefig(save_path)
-    plt.show()
 
 
 def plot2D_assistance(save_path, n_episodes, scaling_factor=1, *y):
     # The position of the bars on the x-axis
     barWidth = 0.35
-    r = np.arange(n_episodes)[1::scaling_factor]
+    r = np.arange(n_episodes+1)
     # the x locations for the groups
     # Get values from the group and categories
-    x = [i for i in range(n_episodes)][1::scaling_factor]
+    x = [i for i in range(1, n_episodes+2)]
 
-    lev_0 = list(map(lambda x:x[0], y[0]))[1::scaling_factor]
-    lev_1 = list(map(lambda x:x[1], y[0]))[1::scaling_factor]
-    lev_2 = list(map(lambda x:x[2], y[0]))[1::scaling_factor]
-    lev_3 = list(map(lambda x:x[3], y[0]))[1::scaling_factor]
-    lev_4 = list(map(lambda x:x[4], y[0]))[1::scaling_factor]
-    lev_5 = list(map(lambda x:x[5], y[0]))[1::scaling_factor]
+    lev_0 = list(map(lambda x:x[0], y[0]))
+    lev_1 = list(map(lambda x:x[1], y[0]))
+    lev_2 = list(map(lambda x:x[2], y[0]))
+    lev_3 = list(map(lambda x:x[3], y[0]))
+    lev_4 = list(map(lambda x:x[4], y[0]))
+    lev_5 = list(map(lambda x:x[5], y[0]))
+    lev_6 = list(map(lambda x:x[6], y[0]))
 
     # plot bars
     plt.figure(figsize=(10, 7))
@@ -57,23 +58,26 @@ def plot2D_assistance(save_path, n_episodes, scaling_factor=1, *y):
             width=barWidth, label='lev_4')
     plt.bar(r, lev_5, bottom=np.array(lev_0) + np.array(lev_1) + np.array(lev_2) + np.array(lev_3)+ np.array(lev_4), edgecolor='white',
             width=barWidth, label='lev_5')
+    plt.bar(r, lev_6, bottom=np.array(lev_0) + np.array(lev_1) + np.array(lev_2) + np.array(lev_3) + np.array(lev_4)+np.array(lev_5),
+            edgecolor='white',
+            width=barWidth, label='lev_6')
 
-    plt.legend()
+    plt.legend(loc="upper right")
     # Custom X axis
     plt.xticks(r, x, fontweight='bold')
-    plt.ylabel("assistance")
+    plt.ylabel("Levels of assistance")
+    plt.xlabel("Epoch")
     plt.savefig(save_path)
-    plt.show()
 
 def plot2D_feedback(save_path, n_episodes, scaling_factor=1, *y):
     # The position of the bars on the x-axis
     barWidth = 0.35
-    r = np.arange(n_episodes)[1::scaling_factor]  # the x locations for the groups
+    r = np.arange(n_episodes)[1::scaling_factor+1]  # the x locations for the groups
     # Get values from the group and categories
-    x = [i for i in range(n_episodes)][1::scaling_factor]
+    x = [i for i in range(n_episodes)][1::scaling_factor+1]
 
-    feedback_no = list(map(lambda x:x[0], y[0]))[1::scaling_factor]
-    feedback_yes = list(map(lambda x:x[1], y[0]))[1::scaling_factor]
+    feedback_no = list(map(lambda x:x[0], y[0]))[0::scaling_factor]
+    feedback_yes = list(map(lambda x:x[1], y[0]))[0::scaling_factor]
 
     # plot bars
     plt.figure(figsize=(10, 7))
-- 
GitLab