commit last working version

074c5dd4 · Antonio Andriella · f63ad9b7 · f63ad9b7 · f63ad9b7 · f63ad9b7
Commit 074c5dd4 authored 3 years ago by Antonio Andriella
--- a/bn_models/agent_model_id_1_True.bif
+++ b/bn_models/agent_model_id_1_True.bif
-network agent_assistive_model {
-}
-
-%VARIABLES DEFINITION
-
-variable agent_assistance {
-  type discrete [ 6 ] { lev_0, lev_1, lev_2, lev_3, lev_4, lev_5};
-}
-variable attempt {
-  type discrete [ 4 ] { att_1, att_2, att_3, att_4 };
-}
-variable game_state {
-  type discrete [ 3 ] { beg, mid, end };
-}
-
-
-%INDIVIDUAL PROBABILITIES DEFINITION
-probability ( agent_assistance ) {
-  table 0.17, 0.17, 0.17, 0.17, 0.16, 0.16;
-}
-probability ( game_state ) {
-  table 0.34, 0.33, 0.33;
-}
-probability ( attempt ) {
-  table 0.25, 0.25, 0.25, 0.25;
-}
-probability (agent_assistance | game_state, attempt) { 
-(beg, att_1)	0.2857142857142857, 0.35714285714285715, 0.14285714285714285, 0.07142857142857142, 0.07142857142857142, 0.07142857142857142; 
-(beg, att_2)	0.07142857142857142, 0.21428571428571427, 0.35714285714285715, 0.21428571428571427, 0.07142857142857142, 0.07142857142857142; 
-(beg, att_3)	0.06666666666666667, 0.13333333333333333, 0.13333333333333333, 0.3333333333333333, 0.26666666666666666, 0.06666666666666667; 
-(beg, att_4)	0.058823529411764705, 0.058823529411764705, 0.11764705882352941, 0.29411764705882354, 0.23529411764705882, 0.23529411764705882; 
-(mid, att_1)	0.2857142857142857, 0.35714285714285715, 0.14285714285714285, 0.07142857142857142, 0.07142857142857142, 0.07142857142857142; 
-(mid, att_2)	0.07142857142857142, 0.21428571428571427, 0.35714285714285715, 0.21428571428571427, 0.07142857142857142, 0.07142857142857142; 
-(mid, att_3)	0.06666666666666667, 0.13333333333333333, 0.13333333333333333, 0.3333333333333333, 0.26666666666666666, 0.06666666666666667; 
-(mid, att_4)	0.058823529411764705, 0.058823529411764705, 0.11764705882352941, 0.29411764705882354, 0.23529411764705882, 0.23529411764705882; 
-(end, att_1)	0.2857142857142857, 0.35714285714285715, 0.14285714285714285, 0.07142857142857142, 0.07142857142857142, 0.07142857142857142; 
-(end, att_2)	0.07142857142857142, 0.21428571428571427, 0.35714285714285715, 0.21428571428571427, 0.07142857142857142, 0.07142857142857142; 
-(end, att_3)	0.06666666666666667, 0.13333333333333333, 0.13333333333333333, 0.3333333333333333, 0.26666666666666666, 0.06666666666666667; 
-(end, att_4)	0.058823529411764705, 0.058823529411764705, 0.11764705882352941, 0.29411764705882354, 0.23529411764705882, 0.23529411764705882; 
-}
\ No newline at end of file
--- a/bn_models/agent_model_template.bif
+++ b/bn_models/agent_model_template.bif
-network agent_assistive_model {
-}
-
-%VARIABLES DEFINITION
-
-variable agent_assistance {
-  type discrete [ 6 ] { lev_0, lev_1, lev_2, lev_3, lev_4, lev_5};
-}
-variable attempt {
-  type discrete [ 4 ] { att_1, att_2, att_3, att_4 };
-}
-variable game_state {
-  type discrete [ 3 ] { beg, mid, end };
-}
-
-
-%INDIVIDUAL PROBABILITIES DEFINITION
-probability ( agent_assistance ) {
-  table 0.17, 0.17, 0.17, 0.17, 0.16, 0.16;
-}
-probability ( game_state ) {
-  table 0.34, 0.33, 0.33;
-}
-probability ( attempt ) {
-  table 0.25, 0.25, 0.25, 0.25;
-}
--- a/bn_models/persona_model_template.bif
+++ b/bn_models/persona_model_template.bif
-network persona_model {
-}
-
-%VARIABLES DEFINITION
-
-variable agent_assistance {
-  type discrete [ 6 ] { lev_0, lev_1, lev_2, lev_3, lev_4, lev_5 };
-}
-variable attempt {
-  type discrete [ 4 ] { att_1, att_2, att_3, att_4 };
-}
-variable game_state {
-  type discrete [ 3 ] { beg, mid, end };
-}
-
-variable user_action {
-  type discrete [ 3 ] { correct, wrong, timeout };
-}
-
-%INDIVIDUAL PROBABILITIES DEFINITION
-
-probability ( agent_assistance ) {
-  table 0.17, 0.16, 0.16, 0.17, 0.17, 0.17;
-}
-probability ( game_state) {
-  table 0.34, 0.33, 0.33;
-}
-probability ( attempt ) {
-  table 0.25, 0.25, 0.25, 0.25;
-}
-probability ( user_action ) {
-  table 0.33, 0.33, 0.34;
-}
--- a/bn_models/user_model_id_1_True.bif
+++ b/bn_models/user_model_id_1_True.bif
-network persona_model {
-}
-
-%VARIABLES DEFINITION
-
-variable agent_assistance {
-  type discrete [ 6 ] { lev_0, lev_1, lev_2, lev_3, lev_4, lev_5 };
-}
-variable attempt {
-  type discrete [ 4 ] { att_1, att_2, att_3, att_4 };
-}
-variable game_state {
-  type discrete [ 3 ] { beg, mid, end };
-}
-
-variable user_action {
-  type discrete [ 3 ] { correct, wrong, timeout };
-}
-
-%INDIVIDUAL PROBABILITIES DEFINITION
-
-probability ( agent_assistance ) {
-  table 0.17, 0.16, 0.16, 0.17, 0.17, 0.17;
-}
-probability ( game_state) {
-  table 0.34, 0.33, 0.33;
-}
-probability ( attempt ) {
-  table 0.25, 0.25, 0.25, 0.25;
-}
-probability ( user_action ) {
-  table 0.33, 0.33, 0.34;
-}
-probability (game_state | user_action)  { 
-(correct)	0.2222222222222222,0.3333333333333333,0.4444444444444444; 
-(wrong)	0.5,0.3333333333333333,0.16666666666666666; 
-(timeout)	0.5,0.3333333333333333,0.16666666666666666; 
-}
-probability (attempt | user_action)  { 
-(correct)	0.15384615384615385,0.23076923076923078,0.3076923076923077,0.3076923076923077; 
-(wrong)	0.42857142857142855,0.2857142857142857,0.14285714285714285,0.14285714285714285; 
-(timeout)	0.42857142857142855,0.2857142857142857,0.14285714285714285,0.14285714285714285; 
-}
-probability (user_action | agent_assistance) { 
-(lev_0)	0.4,0.3,0.3; 
-(lev_1)	0.6,0.2,0.2; 
-(lev_2)	0.6,0.2,0.2; 
-(lev_3)	0.8,0.1,0.1; 
-(lev_4)	1.0,0.0,0.0; 
-(lev_5)	1.0,0.0,0.0; 
-}
\ No newline at end of file
--- a/simulation.py
+++ b/simulation.py
@@ -163,20 +163,37 @@ def compute_next_state(user_action, task_progress_counter, attempt_counter, corr
    return next_state, task_progress_counter, game_state_counter, attempt_counter, correct_move_counter, wrong_move_counter, timeout_counter, max_attempt_counter


-def select_agent_action(agent_action, epsilon):
+def select_agent_action(agent_action, agent_objective, epsilon):
    '''
    Args:
        agent_action: list of possible actions with their probabilities
+        epsilon: probability at which the agent selects a non optimal action
+        objective: it can be, None, challenge, and help
    Return:
        one of the agent's actions
    '''

-    if random.random()>epsilon:
-        return np.argmax(agent_action)
+    if agent_objective == "help":
+        if random.random() < epsilon:
+            return np.argmax(agent_action)
+        else:
+            agent_best_action = np.argmax(agent_action)
+            agent_help_action = min(5, agent_best_action+1)
+        return agent_help_action
+    elif agent_objective == "challenge":
+        if random.random() < epsilon:
+            return np.argmax(agent_action)
+        else:
+            agent_best_action = np.argmax(agent_action)
+            agent_challenge_action = max(0, agent_best_action-1)
+            return agent_challenge_action
    else:
-        agent_action_rm_best = agent_action[:]
-        agent_action_rm_best[np.argmax(agent_action)] = 0
-        return np.argmax(agent_action_rm_best)
+        if random.random() > epsilon:
+            return np.argmax(agent_action)
+        else:
+            agent_action_rm_best = agent_action[:]
+            agent_action_rm_best[np.argmax(agent_action)] = 0
+            return np.argmax(agent_action_rm_best)

 def simulation(bn_model_user_action,
               bn_model_agent_behaviour,
@@ -185,8 +202,10 @@ def simulation(bn_model_user_action,
               game_state_bn_name, attempt_bn_name,
               agent_assistance_bn_name,
               agent_policy,
-               state_space, action_space,
-               epoch=50,  run = 50, task_complexity=5, max_attempt_per_object=4, alpha_learning=0):
+               agent_objective,
+               epsilon,
+               state_space,
+               epoch=50,  run = 50, task_complexity=5, max_attempt_per_object=4):
    '''
    Args:

@@ -229,7 +248,7 @@ def simulation(bn_model_user_action,
        n_timeout_per_episode_run = [0] * run
        n_max_attempt_per_episode_run = [0] * run
        game_performance_episode_run = [0] * run
-        n_assistance_lev_per_episode_run = [[0 for i in range(Agent_Assistance.counter.value)] for j in range(run)]
+        n_assistance_lev_per_episode_run = [[0 for i in range(Agent_Assistance.counter.value+1)] for j in range(run)]

        for r in range(run):

@@ -271,9 +290,9 @@ def simulation(bn_model_user_action,
                                                                                evidence_variables=vars_agent_evidence)

                    #selected_agent_behaviour_action = bn_functions.get_stochastic_action(query_agent_behaviour_prob.values)
-                    selected_agent_behaviour_action = select_agent_action(query_agent_behaviour_prob.values, epsilon=0.2)
+                    selected_agent_behaviour_action = select_agent_action(query_agent_behaviour_prob.values, agent_objective=agent_objective, epsilon=epsilon)
                else:
-                    selected_agent_behaviour_action = select_agent_action(agent_policy[current_state_index], epsilon=0.2)
+                    selected_agent_behaviour_action = select_agent_action(agent_policy[current_state_index], agent_objective=agent_objective, epsilon=epsilon)
                    #selected_agent_behaviour_action = bn_functions.get_stochastic_action(agent_policy[current_state_index])
                    #selected_agent_behaviour_action =np.argmax(agent_policy[current_state_index])

@@ -321,6 +340,7 @@ def simulation(bn_model_user_action,
            print("game_state_counter {}, iter_counter {}, correct_counter {}, wrong_counter {}, "
                  "timeout_counter {}, max_attempt {}".format(game_state_counter, iter_counter, correct_move_counter,
                                                              wrong_move_counter, timeout_counter, max_attempt_counter))
+            print("Assisttance level for  episode:", n_assistance_lev_per_episode_run)

            #save episode
            episodes.append(Episode(episode))

--- a/test.py
+++ b/test.py
@@ -82,7 +82,7 @@ print( " Q shared ", q_shared.values)
 # })
 # print("BEFORE")
 # print(q1.values)
-# df = bn.sampling(DAG_update, n=1000)
+# df = bn.sampling(DAG_update, n=1)
 # DAG_update = bn.parameter_learning.fit(DAG_update, df)
 # q1 = bn.inference.fit(DAG_update, variables=['user_action'], evidence={
 #                                                                 'game_state': 0,
@@ -92,7 +92,7 @@ print( " Q shared ", q_shared.values)
 # print("AFTER")
 # print(q1.values)

-#df = bn.sampling(DAG, n=1000, verbose=2)
+#df = bn.sampling(DAG, n=1, verbose=2)
 #model = bn.structure_learning.fit(df)
 #G = bn.plot(model)
 #DAGnew = bn.parameter_learning.fit(model, df, methodtype="bayes")

--- a/utils.py
+++ b/utils.py
@@ -5,13 +5,13 @@ import pickle
 def plot2D_game_performance(save_path, n_episodes, scaling_factor=1, *y):
    # The position of the bars on the x-axis
    barWidth = 0.35
-    r = np.arange(n_episodes)[1::scaling_factor]  # the x locations for the groups
+    r = np.arange(n_episodes)  # the x locations for the groups
    # Get values from the group and categories
-    x = [i for i in range(n_episodes)][1::scaling_factor]
-    correct = list(map(lambda x:x[0], y[0]))[1::scaling_factor]
-    wrong = list(map(lambda x:x[1], y[0]))[1::scaling_factor]
-    timeout = list(map(lambda x:x[2], y[0]))[1::scaling_factor]
-    max_attempt = list(map(lambda x:x[3], y[0]))[1::scaling_factor]
+    x = [i for i in range(1, n_episodes+1)]
+    correct = list(map(lambda x:x[0], y[0]))
+    wrong = list(map(lambda x:x[1], y[0]))
+    timeout = list(map(lambda x:x[2], y[0]))
+    max_attempt = list(map(lambda x:x[3], y[0]))

    # plot bars
    plt.figure(figsize=(10, 7))
@@ -22,28 +22,29 @@ def plot2D_game_performance(save_path, n_episodes, scaling_factor=1, *y):
    plt.bar(r, max_attempt, bottom=np.array(correct) + np.array(wrong) + np.array(timeout), edgecolor='white',
            width=barWidth, label='max_attempt')

-    plt.legend()
+    plt.legend(loc="upper right")
    # Custom X axis
    plt.xticks(r, x, fontweight='bold')
-    plt.ylabel("performance")
+    plt.ylabel("sim patient performance")
+    plt.xlabel("epoch")
    plt.savefig(save_path)
-    plt.show()


 def plot2D_assistance(save_path, n_episodes, scaling_factor=1, *y):
    # The position of the bars on the x-axis
    barWidth = 0.35
-    r = np.arange(n_episodes)[1::scaling_factor]
+    r = np.arange(n_episodes+1)
    # the x locations for the groups
    # Get values from the group and categories
-    x = [i for i in range(n_episodes)][1::scaling_factor]
+    x = [i for i in range(1, n_episodes+2)]

-    lev_0 = list(map(lambda x:x[0], y[0]))[1::scaling_factor]
-    lev_1 = list(map(lambda x:x[1], y[0]))[1::scaling_factor]
-    lev_2 = list(map(lambda x:x[2], y[0]))[1::scaling_factor]
-    lev_3 = list(map(lambda x:x[3], y[0]))[1::scaling_factor]
-    lev_4 = list(map(lambda x:x[4], y[0]))[1::scaling_factor]
-    lev_5 = list(map(lambda x:x[5], y[0]))[1::scaling_factor]
+    lev_0 = list(map(lambda x:x[0], y[0]))
+    lev_1 = list(map(lambda x:x[1], y[0]))
+    lev_2 = list(map(lambda x:x[2], y[0]))
+    lev_3 = list(map(lambda x:x[3], y[0]))
+    lev_4 = list(map(lambda x:x[4], y[0]))
+    lev_5 = list(map(lambda x:x[5], y[0]))
+    lev_6 = list(map(lambda x:x[6], y[0]))

    # plot bars
    plt.figure(figsize=(10, 7))
@@ -57,23 +58,26 @@ def plot2D_assistance(save_path, n_episodes, scaling_factor=1, *y):
            width=barWidth, label='lev_4')
    plt.bar(r, lev_5, bottom=np.array(lev_0) + np.array(lev_1) + np.array(lev_2) + np.array(lev_3)+ np.array(lev_4), edgecolor='white',
            width=barWidth, label='lev_5')
+    plt.bar(r, lev_6, bottom=np.array(lev_0) + np.array(lev_1) + np.array(lev_2) + np.array(lev_3) + np.array(lev_4)+np.array(lev_5),
+            edgecolor='white',
+            width=barWidth, label='lev_6')

-    plt.legend()
+    plt.legend(loc="upper right")
    # Custom X axis
    plt.xticks(r, x, fontweight='bold')
-    plt.ylabel("assistance")
+    plt.ylabel("Levels of assistance")
+    plt.xlabel("Epoch")
    plt.savefig(save_path)
-    plt.show()

 def plot2D_feedback(save_path, n_episodes, scaling_factor=1, *y):
    # The position of the bars on the x-axis
    barWidth = 0.35
-    r = np.arange(n_episodes)[1::scaling_factor]  # the x locations for the groups
+    r = np.arange(n_episodes)[1::scaling_factor+1]  # the x locations for the groups
    # Get values from the group and categories
-    x = [i for i in range(n_episodes)][1::scaling_factor]
+    x = [i for i in range(n_episodes)][1::scaling_factor+1]

-    feedback_no = list(map(lambda x:x[0], y[0]))[1::scaling_factor]
-    feedback_yes = list(map(lambda x:x[1], y[0]))[1::scaling_factor]
+    feedback_no = list(map(lambda x:x[0], y[0]))[0::scaling_factor]
+    feedback_yes = list(map(lambda x:x[1], y[0]))[0::scaling_factor]

    # plot bars
    plt.figure(figsize=(10, 7))