diff --git a/bn_models/agent_model_id_1_True.bif b/bn_models/agent_model_id_1_True.bif deleted file mode 100644 index ebc8cee75c6472cfe11647188fe3c0aea77d50f2..0000000000000000000000000000000000000000 --- a/bn_models/agent_model_id_1_True.bif +++ /dev/null @@ -1,40 +0,0 @@ -network agent_assistive_model { -} - -%VARIABLES DEFINITION - -variable agent_assistance { - type discrete [ 6 ] { lev_0, lev_1, lev_2, lev_3, lev_4, lev_5}; -} -variable attempt { - type discrete [ 4 ] { att_1, att_2, att_3, att_4 }; -} -variable game_state { - type discrete [ 3 ] { beg, mid, end }; -} - - -%INDIVIDUAL PROBABILITIES DEFINITION -probability ( agent_assistance ) { - table 0.17, 0.17, 0.17, 0.17, 0.16, 0.16; -} -probability ( game_state ) { - table 0.34, 0.33, 0.33; -} -probability ( attempt ) { - table 0.25, 0.25, 0.25, 0.25; -} -probability (agent_assistance | game_state, attempt) { -(beg, att_1) 0.2857142857142857, 0.35714285714285715, 0.14285714285714285, 0.07142857142857142, 0.07142857142857142, 0.07142857142857142; -(beg, att_2) 0.07142857142857142, 0.21428571428571427, 0.35714285714285715, 0.21428571428571427, 0.07142857142857142, 0.07142857142857142; -(beg, att_3) 0.06666666666666667, 0.13333333333333333, 0.13333333333333333, 0.3333333333333333, 0.26666666666666666, 0.06666666666666667; -(beg, att_4) 0.058823529411764705, 0.058823529411764705, 0.11764705882352941, 0.29411764705882354, 0.23529411764705882, 0.23529411764705882; -(mid, att_1) 0.2857142857142857, 0.35714285714285715, 0.14285714285714285, 0.07142857142857142, 0.07142857142857142, 0.07142857142857142; -(mid, att_2) 0.07142857142857142, 0.21428571428571427, 0.35714285714285715, 0.21428571428571427, 0.07142857142857142, 0.07142857142857142; -(mid, att_3) 0.06666666666666667, 0.13333333333333333, 0.13333333333333333, 0.3333333333333333, 0.26666666666666666, 0.06666666666666667; -(mid, att_4) 0.058823529411764705, 0.058823529411764705, 0.11764705882352941, 0.29411764705882354, 0.23529411764705882, 0.23529411764705882; -(end, att_1) 0.2857142857142857, 0.35714285714285715, 0.14285714285714285, 0.07142857142857142, 0.07142857142857142, 0.07142857142857142; -(end, att_2) 0.07142857142857142, 0.21428571428571427, 0.35714285714285715, 0.21428571428571427, 0.07142857142857142, 0.07142857142857142; -(end, att_3) 0.06666666666666667, 0.13333333333333333, 0.13333333333333333, 0.3333333333333333, 0.26666666666666666, 0.06666666666666667; -(end, att_4) 0.058823529411764705, 0.058823529411764705, 0.11764705882352941, 0.29411764705882354, 0.23529411764705882, 0.23529411764705882; -} \ No newline at end of file diff --git a/bn_models/agent_model_template.bif b/bn_models/agent_model_template.bif deleted file mode 100644 index 1ca11f0133460feef8789a72c56f5764d688d8fa..0000000000000000000000000000000000000000 --- a/bn_models/agent_model_template.bif +++ /dev/null @@ -1,26 +0,0 @@ -network agent_assistive_model { -} - -%VARIABLES DEFINITION - -variable agent_assistance { - type discrete [ 6 ] { lev_0, lev_1, lev_2, lev_3, lev_4, lev_5}; -} -variable attempt { - type discrete [ 4 ] { att_1, att_2, att_3, att_4 }; -} -variable game_state { - type discrete [ 3 ] { beg, mid, end }; -} - - -%INDIVIDUAL PROBABILITIES DEFINITION -probability ( agent_assistance ) { - table 0.17, 0.17, 0.17, 0.17, 0.16, 0.16; -} -probability ( game_state ) { - table 0.34, 0.33, 0.33; -} -probability ( attempt ) { - table 0.25, 0.25, 0.25, 0.25; -} diff --git a/bn_models/persona_model_template.bif b/bn_models/persona_model_template.bif deleted file mode 100644 index d8227eaa35963ecfe91423854c19ea8ac6fdcafb..0000000000000000000000000000000000000000 --- a/bn_models/persona_model_template.bif +++ /dev/null @@ -1,33 +0,0 @@ -network persona_model { -} - -%VARIABLES DEFINITION - -variable agent_assistance { - type discrete [ 6 ] { lev_0, lev_1, lev_2, lev_3, lev_4, lev_5 }; -} -variable attempt { - type discrete [ 4 ] { att_1, att_2, att_3, att_4 }; -} -variable game_state { - type discrete [ 3 ] { beg, mid, end }; -} - -variable user_action { - type discrete [ 3 ] { correct, wrong, timeout }; -} - -%INDIVIDUAL PROBABILITIES DEFINITION - -probability ( agent_assistance ) { - table 0.17, 0.16, 0.16, 0.17, 0.17, 0.17; -} -probability ( game_state) { - table 0.34, 0.33, 0.33; -} -probability ( attempt ) { - table 0.25, 0.25, 0.25, 0.25; -} -probability ( user_action ) { - table 0.33, 0.33, 0.34; -} diff --git a/bn_models/user_model_id_1_True.bif b/bn_models/user_model_id_1_True.bif deleted file mode 100644 index 3f1ae72adce9568df8bf01f4df5d1f28b9b1571d..0000000000000000000000000000000000000000 --- a/bn_models/user_model_id_1_True.bif +++ /dev/null @@ -1,51 +0,0 @@ -network persona_model { -} - -%VARIABLES DEFINITION - -variable agent_assistance { - type discrete [ 6 ] { lev_0, lev_1, lev_2, lev_3, lev_4, lev_5 }; -} -variable attempt { - type discrete [ 4 ] { att_1, att_2, att_3, att_4 }; -} -variable game_state { - type discrete [ 3 ] { beg, mid, end }; -} - -variable user_action { - type discrete [ 3 ] { correct, wrong, timeout }; -} - -%INDIVIDUAL PROBABILITIES DEFINITION - -probability ( agent_assistance ) { - table 0.17, 0.16, 0.16, 0.17, 0.17, 0.17; -} -probability ( game_state) { - table 0.34, 0.33, 0.33; -} -probability ( attempt ) { - table 0.25, 0.25, 0.25, 0.25; -} -probability ( user_action ) { - table 0.33, 0.33, 0.34; -} -probability (game_state | user_action) { -(correct) 0.2222222222222222,0.3333333333333333,0.4444444444444444; -(wrong) 0.5,0.3333333333333333,0.16666666666666666; -(timeout) 0.5,0.3333333333333333,0.16666666666666666; -} -probability (attempt | user_action) { -(correct) 0.15384615384615385,0.23076923076923078,0.3076923076923077,0.3076923076923077; -(wrong) 0.42857142857142855,0.2857142857142857,0.14285714285714285,0.14285714285714285; -(timeout) 0.42857142857142855,0.2857142857142857,0.14285714285714285,0.14285714285714285; -} -probability (user_action | agent_assistance) { -(lev_0) 0.4,0.3,0.3; -(lev_1) 0.6,0.2,0.2; -(lev_2) 0.6,0.2,0.2; -(lev_3) 0.8,0.1,0.1; -(lev_4) 1.0,0.0,0.0; -(lev_5) 1.0,0.0,0.0; -} \ No newline at end of file diff --git a/simulation.py b/simulation.py index 15bd225bb57ed75937340104501069a88f760768..ab3029f995a6c9942e0928788b6562a8a4167366 100644 --- a/simulation.py +++ b/simulation.py @@ -163,20 +163,37 @@ def compute_next_state(user_action, task_progress_counter, attempt_counter, corr return next_state, task_progress_counter, game_state_counter, attempt_counter, correct_move_counter, wrong_move_counter, timeout_counter, max_attempt_counter -def select_agent_action(agent_action, epsilon): +def select_agent_action(agent_action, agent_objective, epsilon): ''' Args: agent_action: list of possible actions with their probabilities + epsilon: probability at which the agent selects a non optimal action + objective: it can be, None, challenge, and help Return: one of the agent's actions ''' - if random.random()>epsilon: - return np.argmax(agent_action) + if agent_objective == "help": + if random.random() < epsilon: + return np.argmax(agent_action) + else: + agent_best_action = np.argmax(agent_action) + agent_help_action = min(5, agent_best_action+1) + return agent_help_action + elif agent_objective == "challenge": + if random.random() < epsilon: + return np.argmax(agent_action) + else: + agent_best_action = np.argmax(agent_action) + agent_challenge_action = max(0, agent_best_action-1) + return agent_challenge_action else: - agent_action_rm_best = agent_action[:] - agent_action_rm_best[np.argmax(agent_action)] = 0 - return np.argmax(agent_action_rm_best) + if random.random() > epsilon: + return np.argmax(agent_action) + else: + agent_action_rm_best = agent_action[:] + agent_action_rm_best[np.argmax(agent_action)] = 0 + return np.argmax(agent_action_rm_best) def simulation(bn_model_user_action, bn_model_agent_behaviour, @@ -185,8 +202,10 @@ def simulation(bn_model_user_action, game_state_bn_name, attempt_bn_name, agent_assistance_bn_name, agent_policy, - state_space, action_space, - epoch=50, run = 50, task_complexity=5, max_attempt_per_object=4, alpha_learning=0): + agent_objective, + epsilon, + state_space, + epoch=50, run = 50, task_complexity=5, max_attempt_per_object=4): ''' Args: @@ -229,7 +248,7 @@ def simulation(bn_model_user_action, n_timeout_per_episode_run = [0] * run n_max_attempt_per_episode_run = [0] * run game_performance_episode_run = [0] * run - n_assistance_lev_per_episode_run = [[0 for i in range(Agent_Assistance.counter.value)] for j in range(run)] + n_assistance_lev_per_episode_run = [[0 for i in range(Agent_Assistance.counter.value+1)] for j in range(run)] for r in range(run): @@ -271,9 +290,9 @@ def simulation(bn_model_user_action, evidence_variables=vars_agent_evidence) #selected_agent_behaviour_action = bn_functions.get_stochastic_action(query_agent_behaviour_prob.values) - selected_agent_behaviour_action = select_agent_action(query_agent_behaviour_prob.values, epsilon=0.2) + selected_agent_behaviour_action = select_agent_action(query_agent_behaviour_prob.values, agent_objective=agent_objective, epsilon=epsilon) else: - selected_agent_behaviour_action = select_agent_action(agent_policy[current_state_index], epsilon=0.2) + selected_agent_behaviour_action = select_agent_action(agent_policy[current_state_index], agent_objective=agent_objective, epsilon=epsilon) #selected_agent_behaviour_action = bn_functions.get_stochastic_action(agent_policy[current_state_index]) #selected_agent_behaviour_action =np.argmax(agent_policy[current_state_index]) @@ -321,6 +340,7 @@ def simulation(bn_model_user_action, print("game_state_counter {}, iter_counter {}, correct_counter {}, wrong_counter {}, " "timeout_counter {}, max_attempt {}".format(game_state_counter, iter_counter, correct_move_counter, wrong_move_counter, timeout_counter, max_attempt_counter)) + print("Assisttance level for episode:", n_assistance_lev_per_episode_run) #save episode episodes.append(Episode(episode)) diff --git a/test.py b/test.py index b5fd6abccd6355314018406ca22782d58d7840f2..45ffe12564556582cc80883d41d14a207d93610b 100644 --- a/test.py +++ b/test.py @@ -82,7 +82,7 @@ print( " Q shared ", q_shared.values) # }) # print("BEFORE") # print(q1.values) -# df = bn.sampling(DAG_update, n=1000) +# df = bn.sampling(DAG_update, n=1) # DAG_update = bn.parameter_learning.fit(DAG_update, df) # q1 = bn.inference.fit(DAG_update, variables=['user_action'], evidence={ # 'game_state': 0, @@ -92,7 +92,7 @@ print( " Q shared ", q_shared.values) # print("AFTER") # print(q1.values) -#df = bn.sampling(DAG, n=1000, verbose=2) +#df = bn.sampling(DAG, n=1, verbose=2) #model = bn.structure_learning.fit(df) #G = bn.plot(model) #DAGnew = bn.parameter_learning.fit(model, df, methodtype="bayes") diff --git a/utils.py b/utils.py index 41d788bacb6523191606f13cbaea7ba8515ecca6..9366e0f38dfdb0d0a030c390f89af96fb36cc81b 100644 --- a/utils.py +++ b/utils.py @@ -5,13 +5,13 @@ import pickle def plot2D_game_performance(save_path, n_episodes, scaling_factor=1, *y): # The position of the bars on the x-axis barWidth = 0.35 - r = np.arange(n_episodes)[1::scaling_factor] # the x locations for the groups + r = np.arange(n_episodes) # the x locations for the groups # Get values from the group and categories - x = [i for i in range(n_episodes)][1::scaling_factor] - correct = list(map(lambda x:x[0], y[0]))[1::scaling_factor] - wrong = list(map(lambda x:x[1], y[0]))[1::scaling_factor] - timeout = list(map(lambda x:x[2], y[0]))[1::scaling_factor] - max_attempt = list(map(lambda x:x[3], y[0]))[1::scaling_factor] + x = [i for i in range(1, n_episodes+1)] + correct = list(map(lambda x:x[0], y[0])) + wrong = list(map(lambda x:x[1], y[0])) + timeout = list(map(lambda x:x[2], y[0])) + max_attempt = list(map(lambda x:x[3], y[0])) # plot bars plt.figure(figsize=(10, 7)) @@ -22,28 +22,29 @@ def plot2D_game_performance(save_path, n_episodes, scaling_factor=1, *y): plt.bar(r, max_attempt, bottom=np.array(correct) + np.array(wrong) + np.array(timeout), edgecolor='white', width=barWidth, label='max_attempt') - plt.legend() + plt.legend(loc="upper right") # Custom X axis plt.xticks(r, x, fontweight='bold') - plt.ylabel("performance") + plt.ylabel("sim patient performance") + plt.xlabel("epoch") plt.savefig(save_path) - plt.show() def plot2D_assistance(save_path, n_episodes, scaling_factor=1, *y): # The position of the bars on the x-axis barWidth = 0.35 - r = np.arange(n_episodes)[1::scaling_factor] + r = np.arange(n_episodes+1) # the x locations for the groups # Get values from the group and categories - x = [i for i in range(n_episodes)][1::scaling_factor] + x = [i for i in range(1, n_episodes+2)] - lev_0 = list(map(lambda x:x[0], y[0]))[1::scaling_factor] - lev_1 = list(map(lambda x:x[1], y[0]))[1::scaling_factor] - lev_2 = list(map(lambda x:x[2], y[0]))[1::scaling_factor] - lev_3 = list(map(lambda x:x[3], y[0]))[1::scaling_factor] - lev_4 = list(map(lambda x:x[4], y[0]))[1::scaling_factor] - lev_5 = list(map(lambda x:x[5], y[0]))[1::scaling_factor] + lev_0 = list(map(lambda x:x[0], y[0])) + lev_1 = list(map(lambda x:x[1], y[0])) + lev_2 = list(map(lambda x:x[2], y[0])) + lev_3 = list(map(lambda x:x[3], y[0])) + lev_4 = list(map(lambda x:x[4], y[0])) + lev_5 = list(map(lambda x:x[5], y[0])) + lev_6 = list(map(lambda x:x[6], y[0])) # plot bars plt.figure(figsize=(10, 7)) @@ -57,23 +58,26 @@ def plot2D_assistance(save_path, n_episodes, scaling_factor=1, *y): width=barWidth, label='lev_4') plt.bar(r, lev_5, bottom=np.array(lev_0) + np.array(lev_1) + np.array(lev_2) + np.array(lev_3)+ np.array(lev_4), edgecolor='white', width=barWidth, label='lev_5') + plt.bar(r, lev_6, bottom=np.array(lev_0) + np.array(lev_1) + np.array(lev_2) + np.array(lev_3) + np.array(lev_4)+np.array(lev_5), + edgecolor='white', + width=barWidth, label='lev_6') - plt.legend() + plt.legend(loc="upper right") # Custom X axis plt.xticks(r, x, fontweight='bold') - plt.ylabel("assistance") + plt.ylabel("Levels of assistance") + plt.xlabel("Epoch") plt.savefig(save_path) - plt.show() def plot2D_feedback(save_path, n_episodes, scaling_factor=1, *y): # The position of the bars on the x-axis barWidth = 0.35 - r = np.arange(n_episodes)[1::scaling_factor] # the x locations for the groups + r = np.arange(n_episodes)[1::scaling_factor+1] # the x locations for the groups # Get values from the group and categories - x = [i for i in range(n_episodes)][1::scaling_factor] + x = [i for i in range(n_episodes)][1::scaling_factor+1] - feedback_no = list(map(lambda x:x[0], y[0]))[1::scaling_factor] - feedback_yes = list(map(lambda x:x[1], y[0]))[1::scaling_factor] + feedback_no = list(map(lambda x:x[0], y[0]))[0::scaling_factor] + feedback_yes = list(map(lambda x:x[1], y[0]))[0::scaling_factor] # plot bars plt.figure(figsize=(10, 7))