Skip to content
Snippets Groups Projects
Commit 074c5dd4 authored by Antonio Andriella's avatar Antonio Andriella
Browse files

commit last working version

parent f63ad9b7
Branches new_bn_model
No related tags found
No related merge requests found
network agent_assistive_model {
}
%VARIABLES DEFINITION
variable agent_assistance {
type discrete [ 6 ] { lev_0, lev_1, lev_2, lev_3, lev_4, lev_5};
}
variable attempt {
type discrete [ 4 ] { att_1, att_2, att_3, att_4 };
}
variable game_state {
type discrete [ 3 ] { beg, mid, end };
}
%INDIVIDUAL PROBABILITIES DEFINITION
probability ( agent_assistance ) {
table 0.17, 0.17, 0.17, 0.17, 0.16, 0.16;
}
probability ( game_state ) {
table 0.34, 0.33, 0.33;
}
probability ( attempt ) {
table 0.25, 0.25, 0.25, 0.25;
}
probability (agent_assistance | game_state, attempt) {
(beg, att_1) 0.2857142857142857, 0.35714285714285715, 0.14285714285714285, 0.07142857142857142, 0.07142857142857142, 0.07142857142857142;
(beg, att_2) 0.07142857142857142, 0.21428571428571427, 0.35714285714285715, 0.21428571428571427, 0.07142857142857142, 0.07142857142857142;
(beg, att_3) 0.06666666666666667, 0.13333333333333333, 0.13333333333333333, 0.3333333333333333, 0.26666666666666666, 0.06666666666666667;
(beg, att_4) 0.058823529411764705, 0.058823529411764705, 0.11764705882352941, 0.29411764705882354, 0.23529411764705882, 0.23529411764705882;
(mid, att_1) 0.2857142857142857, 0.35714285714285715, 0.14285714285714285, 0.07142857142857142, 0.07142857142857142, 0.07142857142857142;
(mid, att_2) 0.07142857142857142, 0.21428571428571427, 0.35714285714285715, 0.21428571428571427, 0.07142857142857142, 0.07142857142857142;
(mid, att_3) 0.06666666666666667, 0.13333333333333333, 0.13333333333333333, 0.3333333333333333, 0.26666666666666666, 0.06666666666666667;
(mid, att_4) 0.058823529411764705, 0.058823529411764705, 0.11764705882352941, 0.29411764705882354, 0.23529411764705882, 0.23529411764705882;
(end, att_1) 0.2857142857142857, 0.35714285714285715, 0.14285714285714285, 0.07142857142857142, 0.07142857142857142, 0.07142857142857142;
(end, att_2) 0.07142857142857142, 0.21428571428571427, 0.35714285714285715, 0.21428571428571427, 0.07142857142857142, 0.07142857142857142;
(end, att_3) 0.06666666666666667, 0.13333333333333333, 0.13333333333333333, 0.3333333333333333, 0.26666666666666666, 0.06666666666666667;
(end, att_4) 0.058823529411764705, 0.058823529411764705, 0.11764705882352941, 0.29411764705882354, 0.23529411764705882, 0.23529411764705882;
}
\ No newline at end of file
network agent_assistive_model {
}
%VARIABLES DEFINITION
variable agent_assistance {
type discrete [ 6 ] { lev_0, lev_1, lev_2, lev_3, lev_4, lev_5};
}
variable attempt {
type discrete [ 4 ] { att_1, att_2, att_3, att_4 };
}
variable game_state {
type discrete [ 3 ] { beg, mid, end };
}
%INDIVIDUAL PROBABILITIES DEFINITION
probability ( agent_assistance ) {
table 0.17, 0.17, 0.17, 0.17, 0.16, 0.16;
}
probability ( game_state ) {
table 0.34, 0.33, 0.33;
}
probability ( attempt ) {
table 0.25, 0.25, 0.25, 0.25;
}
network persona_model {
}
%VARIABLES DEFINITION
variable agent_assistance {
type discrete [ 6 ] { lev_0, lev_1, lev_2, lev_3, lev_4, lev_5 };
}
variable attempt {
type discrete [ 4 ] { att_1, att_2, att_3, att_4 };
}
variable game_state {
type discrete [ 3 ] { beg, mid, end };
}
variable user_action {
type discrete [ 3 ] { correct, wrong, timeout };
}
%INDIVIDUAL PROBABILITIES DEFINITION
probability ( agent_assistance ) {
table 0.17, 0.16, 0.16, 0.17, 0.17, 0.17;
}
probability ( game_state) {
table 0.34, 0.33, 0.33;
}
probability ( attempt ) {
table 0.25, 0.25, 0.25, 0.25;
}
probability ( user_action ) {
table 0.33, 0.33, 0.34;
}
network persona_model {
}
%VARIABLES DEFINITION
variable agent_assistance {
type discrete [ 6 ] { lev_0, lev_1, lev_2, lev_3, lev_4, lev_5 };
}
variable attempt {
type discrete [ 4 ] { att_1, att_2, att_3, att_4 };
}
variable game_state {
type discrete [ 3 ] { beg, mid, end };
}
variable user_action {
type discrete [ 3 ] { correct, wrong, timeout };
}
%INDIVIDUAL PROBABILITIES DEFINITION
probability ( agent_assistance ) {
table 0.17, 0.16, 0.16, 0.17, 0.17, 0.17;
}
probability ( game_state) {
table 0.34, 0.33, 0.33;
}
probability ( attempt ) {
table 0.25, 0.25, 0.25, 0.25;
}
probability ( user_action ) {
table 0.33, 0.33, 0.34;
}
probability (game_state | user_action) {
(correct) 0.2222222222222222,0.3333333333333333,0.4444444444444444;
(wrong) 0.5,0.3333333333333333,0.16666666666666666;
(timeout) 0.5,0.3333333333333333,0.16666666666666666;
}
probability (attempt | user_action) {
(correct) 0.15384615384615385,0.23076923076923078,0.3076923076923077,0.3076923076923077;
(wrong) 0.42857142857142855,0.2857142857142857,0.14285714285714285,0.14285714285714285;
(timeout) 0.42857142857142855,0.2857142857142857,0.14285714285714285,0.14285714285714285;
}
probability (user_action | agent_assistance) {
(lev_0) 0.4,0.3,0.3;
(lev_1) 0.6,0.2,0.2;
(lev_2) 0.6,0.2,0.2;
(lev_3) 0.8,0.1,0.1;
(lev_4) 1.0,0.0,0.0;
(lev_5) 1.0,0.0,0.0;
}
\ No newline at end of file
......@@ -163,20 +163,37 @@ def compute_next_state(user_action, task_progress_counter, attempt_counter, corr
return next_state, task_progress_counter, game_state_counter, attempt_counter, correct_move_counter, wrong_move_counter, timeout_counter, max_attempt_counter
def select_agent_action(agent_action, epsilon):
def select_agent_action(agent_action, agent_objective, epsilon):
'''
Args:
agent_action: list of possible actions with their probabilities
epsilon: probability at which the agent selects a non optimal action
objective: it can be, None, challenge, and help
Return:
one of the agent's actions
'''
if random.random()>epsilon:
return np.argmax(agent_action)
if agent_objective == "help":
if random.random() < epsilon:
return np.argmax(agent_action)
else:
agent_best_action = np.argmax(agent_action)
agent_help_action = min(5, agent_best_action+1)
return agent_help_action
elif agent_objective == "challenge":
if random.random() < epsilon:
return np.argmax(agent_action)
else:
agent_best_action = np.argmax(agent_action)
agent_challenge_action = max(0, agent_best_action-1)
return agent_challenge_action
else:
agent_action_rm_best = agent_action[:]
agent_action_rm_best[np.argmax(agent_action)] = 0
return np.argmax(agent_action_rm_best)
if random.random() > epsilon:
return np.argmax(agent_action)
else:
agent_action_rm_best = agent_action[:]
agent_action_rm_best[np.argmax(agent_action)] = 0
return np.argmax(agent_action_rm_best)
def simulation(bn_model_user_action,
bn_model_agent_behaviour,
......@@ -185,8 +202,10 @@ def simulation(bn_model_user_action,
game_state_bn_name, attempt_bn_name,
agent_assistance_bn_name,
agent_policy,
state_space, action_space,
epoch=50, run = 50, task_complexity=5, max_attempt_per_object=4, alpha_learning=0):
agent_objective,
epsilon,
state_space,
epoch=50, run = 50, task_complexity=5, max_attempt_per_object=4):
'''
Args:
......@@ -229,7 +248,7 @@ def simulation(bn_model_user_action,
n_timeout_per_episode_run = [0] * run
n_max_attempt_per_episode_run = [0] * run
game_performance_episode_run = [0] * run
n_assistance_lev_per_episode_run = [[0 for i in range(Agent_Assistance.counter.value)] for j in range(run)]
n_assistance_lev_per_episode_run = [[0 for i in range(Agent_Assistance.counter.value+1)] for j in range(run)]
for r in range(run):
......@@ -271,9 +290,9 @@ def simulation(bn_model_user_action,
evidence_variables=vars_agent_evidence)
#selected_agent_behaviour_action = bn_functions.get_stochastic_action(query_agent_behaviour_prob.values)
selected_agent_behaviour_action = select_agent_action(query_agent_behaviour_prob.values, epsilon=0.2)
selected_agent_behaviour_action = select_agent_action(query_agent_behaviour_prob.values, agent_objective=agent_objective, epsilon=epsilon)
else:
selected_agent_behaviour_action = select_agent_action(agent_policy[current_state_index], epsilon=0.2)
selected_agent_behaviour_action = select_agent_action(agent_policy[current_state_index], agent_objective=agent_objective, epsilon=epsilon)
#selected_agent_behaviour_action = bn_functions.get_stochastic_action(agent_policy[current_state_index])
#selected_agent_behaviour_action =np.argmax(agent_policy[current_state_index])
......@@ -321,6 +340,7 @@ def simulation(bn_model_user_action,
print("game_state_counter {}, iter_counter {}, correct_counter {}, wrong_counter {}, "
"timeout_counter {}, max_attempt {}".format(game_state_counter, iter_counter, correct_move_counter,
wrong_move_counter, timeout_counter, max_attempt_counter))
print("Assisttance level for episode:", n_assistance_lev_per_episode_run)
#save episode
episodes.append(Episode(episode))
......
......@@ -82,7 +82,7 @@ print( " Q shared ", q_shared.values)
# })
# print("BEFORE")
# print(q1.values)
# df = bn.sampling(DAG_update, n=1000)
# df = bn.sampling(DAG_update, n=1)
# DAG_update = bn.parameter_learning.fit(DAG_update, df)
# q1 = bn.inference.fit(DAG_update, variables=['user_action'], evidence={
# 'game_state': 0,
......@@ -92,7 +92,7 @@ print( " Q shared ", q_shared.values)
# print("AFTER")
# print(q1.values)
#df = bn.sampling(DAG, n=1000, verbose=2)
#df = bn.sampling(DAG, n=1, verbose=2)
#model = bn.structure_learning.fit(df)
#G = bn.plot(model)
#DAGnew = bn.parameter_learning.fit(model, df, methodtype="bayes")
......
......@@ -5,13 +5,13 @@ import pickle
def plot2D_game_performance(save_path, n_episodes, scaling_factor=1, *y):
# The position of the bars on the x-axis
barWidth = 0.35
r = np.arange(n_episodes)[1::scaling_factor] # the x locations for the groups
r = np.arange(n_episodes) # the x locations for the groups
# Get values from the group and categories
x = [i for i in range(n_episodes)][1::scaling_factor]
correct = list(map(lambda x:x[0], y[0]))[1::scaling_factor]
wrong = list(map(lambda x:x[1], y[0]))[1::scaling_factor]
timeout = list(map(lambda x:x[2], y[0]))[1::scaling_factor]
max_attempt = list(map(lambda x:x[3], y[0]))[1::scaling_factor]
x = [i for i in range(1, n_episodes+1)]
correct = list(map(lambda x:x[0], y[0]))
wrong = list(map(lambda x:x[1], y[0]))
timeout = list(map(lambda x:x[2], y[0]))
max_attempt = list(map(lambda x:x[3], y[0]))
# plot bars
plt.figure(figsize=(10, 7))
......@@ -22,28 +22,29 @@ def plot2D_game_performance(save_path, n_episodes, scaling_factor=1, *y):
plt.bar(r, max_attempt, bottom=np.array(correct) + np.array(wrong) + np.array(timeout), edgecolor='white',
width=barWidth, label='max_attempt')
plt.legend()
plt.legend(loc="upper right")
# Custom X axis
plt.xticks(r, x, fontweight='bold')
plt.ylabel("performance")
plt.ylabel("sim patient performance")
plt.xlabel("epoch")
plt.savefig(save_path)
plt.show()
def plot2D_assistance(save_path, n_episodes, scaling_factor=1, *y):
# The position of the bars on the x-axis
barWidth = 0.35
r = np.arange(n_episodes)[1::scaling_factor]
r = np.arange(n_episodes+1)
# the x locations for the groups
# Get values from the group and categories
x = [i for i in range(n_episodes)][1::scaling_factor]
x = [i for i in range(1, n_episodes+2)]
lev_0 = list(map(lambda x:x[0], y[0]))[1::scaling_factor]
lev_1 = list(map(lambda x:x[1], y[0]))[1::scaling_factor]
lev_2 = list(map(lambda x:x[2], y[0]))[1::scaling_factor]
lev_3 = list(map(lambda x:x[3], y[0]))[1::scaling_factor]
lev_4 = list(map(lambda x:x[4], y[0]))[1::scaling_factor]
lev_5 = list(map(lambda x:x[5], y[0]))[1::scaling_factor]
lev_0 = list(map(lambda x:x[0], y[0]))
lev_1 = list(map(lambda x:x[1], y[0]))
lev_2 = list(map(lambda x:x[2], y[0]))
lev_3 = list(map(lambda x:x[3], y[0]))
lev_4 = list(map(lambda x:x[4], y[0]))
lev_5 = list(map(lambda x:x[5], y[0]))
lev_6 = list(map(lambda x:x[6], y[0]))
# plot bars
plt.figure(figsize=(10, 7))
......@@ -57,23 +58,26 @@ def plot2D_assistance(save_path, n_episodes, scaling_factor=1, *y):
width=barWidth, label='lev_4')
plt.bar(r, lev_5, bottom=np.array(lev_0) + np.array(lev_1) + np.array(lev_2) + np.array(lev_3)+ np.array(lev_4), edgecolor='white',
width=barWidth, label='lev_5')
plt.bar(r, lev_6, bottom=np.array(lev_0) + np.array(lev_1) + np.array(lev_2) + np.array(lev_3) + np.array(lev_4)+np.array(lev_5),
edgecolor='white',
width=barWidth, label='lev_6')
plt.legend()
plt.legend(loc="upper right")
# Custom X axis
plt.xticks(r, x, fontweight='bold')
plt.ylabel("assistance")
plt.ylabel("Levels of assistance")
plt.xlabel("Epoch")
plt.savefig(save_path)
plt.show()
def plot2D_feedback(save_path, n_episodes, scaling_factor=1, *y):
# The position of the bars on the x-axis
barWidth = 0.35
r = np.arange(n_episodes)[1::scaling_factor] # the x locations for the groups
r = np.arange(n_episodes)[1::scaling_factor+1] # the x locations for the groups
# Get values from the group and categories
x = [i for i in range(n_episodes)][1::scaling_factor]
x = [i for i in range(n_episodes)][1::scaling_factor+1]
feedback_no = list(map(lambda x:x[0], y[0]))[1::scaling_factor]
feedback_yes = list(map(lambda x:x[1], y[0]))[1::scaling_factor]
feedback_no = list(map(lambda x:x[0], y[0]))[0::scaling_factor]
feedback_yes = list(map(lambda x:x[1], y[0]))[0::scaling_factor]
# plot bars
plt.figure(figsize=(10, 7))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment