Skip to content
Snippets Groups Projects
Commit 6e9d0ce6 authored by Antonio Andriella's avatar Antonio Andriella
Browse files

Working code with persona, real_user and robot in the simulation

parent 32365577
No related branches found
No related tags found
No related merge requests found
network persona_model_4 { network persona_model {
} }
%VARIABLES DEFINITION %VARIABLES DEFINITION
......
import random
import bn_functions
def compute_next_state(user_action, task_evolution, attempt_counter, correct_move_counter,
wrong_move_counter, timeout_counter
):
'''
The function computes given the current state and action of the user, the next state
Args:
user_action: 0,1,2
task_evolution: beg, mid, end
correct_move_counter:
attempt_counter:
wrong_move_counter:
timeout_counter:
Return:
the counters updated according to the user_action
'''
if user_action == 0:
attempt_counter = 0
task_evolution += 1
correct_move_counter += 1
# if the user made a wrong move and still did not reach the maximum number of attempts
elif user_action == 1 and attempt_counter < 3:
attempt_counter += 1
wrong_move_counter += 1
# if the user did not move any token and still did not reach the maximum number of attempts
elif user_action == 2 and attempt_counter < 3:
attempt_counter += 1
timeout_counter += 1
# the robot or therapist makes the correct move on the patient's behalf
else:
attempt_counter = 0
task_evolution += 1
correct_move_counter += 1
return task_evolution, attempt_counter, correct_move_counter, wrong_move_counter, timeout_counter
def get_user_action_prob():
def get_stochatic_action(actions_prob):
'''
Select one of the actions according to the actions_prob
Args:
actions_prob: the probability of the Persona based on the BN to make a correct move, wrong move, timeout
Return:
the id of the selected action
N.B:
'''
action_id = None
correct_action_from_BN = actions_prob[0]
wrong_action_from_BN = actions_prob[1]
timeout_action_from_BN = actions_prob[2]
rnd_val = random.uniform(0,1)
#if user_prob is lower than the correct action prob then is the correct one
if rnd_val<=correct_action_from_BN:
action_id = 0
#if rnd is larger than the correct action prob and lower than wrong
# action prob then is the wrong one
elif rnd_val>correct_action_from_BN \
and rnd_val<(correct_action_from_BN+wrong_action_from_BN):
action_id = 1
#timeout
else:
action_id = 2
return action_id
network persona_model_3 { network persona_model_3 {
} }
%definition of the variables %VARIABLES DEFINITION
variable reactivity { variable reactivity {
type discrete [3] {slow, medium, fast}; type discrete [3] {slow, medium, fast};
} }
...@@ -11,29 +11,23 @@ variable memory { ...@@ -11,29 +11,23 @@ variable memory {
variable attention { variable attention {
type discrete[3] {low, medium, high}; type discrete[3] {low, medium, high};
} }
variable robot_assistance { variable robot_assistance {
type discrete [ 5 ] { lev_0, lev_1, lev_2, lev_3, lev_4 }; type discrete [ 5 ] { lev_0, lev_1, lev_2, lev_3, lev_4 };
} }
variable attempt { variable attempt {
type discrete [ 4 ] { att_1, att_2, att_3, att_4 }; type discrete [ 4 ] { att_1, att_2, att_3, att_4 };
} }
variable game_state { variable game_state {
type discrete [ 3 ] { beg, mid, end }; type discrete [ 3 ] { beg, mid, end };
} }
variable robot_feedback { variable robot_feedback {
type discrete [ 2 ] { yes, no }; type discrete [ 2 ] { yes, no };
} }
variable user_action { variable user_action {
type discrete [ 3 ] { correct, wrong, timeout }; type discrete [ 3 ] { correct, wrong, timeout };
} }
%definition of individual probabilities %INDIVIDUAL PROBABILITIES DEFINITION
probability ( robot_assistance ) { probability ( robot_assistance ) {
table 0.2, 0.2, 0.2, 0.2, 0.2; table 0.2, 0.2, 0.2, 0.2, 0.2;
} }
...@@ -46,26 +40,23 @@ probability ( attempt ) { ...@@ -46,26 +40,23 @@ probability ( attempt ) {
probability ( user_action ) { probability ( user_action ) {
table 0.33, 0.33, 0.34; table 0.33, 0.33, 0.34;
} }
#cpds 4 #CPDS 4
probability ( reactivity ) { probability ( reactivity ) {
table 0.33, 0.33, 0.34; table 0.33, 0.33, 0.34;
} }
#cpds 3 #CPDS 3
probability ( memory ) { probability ( memory ) {
table 0.33, 0.33, 0.34; table 0.33, 0.33, 0.34;
} }
#cpds 1 #CPDS 1
probability ( attention ) { probability ( attention ) {
table 0.33, 0.33, 0.34; table 0.33, 0.33, 0.34;
} }
probability ( robot_feedback ) { probability ( robot_feedback ) {
table 0.5, 0.5; table 0.5, 0.5;
} }
#CPDS 7
#cpds 7
probability (user_action | reactivity, memory, attention) { probability (user_action | reactivity, memory, attention) {
(slow, low, low) 0.1, 0.4, 0.5; (slow, low, low) 0.1, 0.4, 0.5;
(slow, low, medium) 0.3, 0.5, 0.2; (slow, low, medium) 0.3, 0.5, 0.2;
(slow, low, high) 0.4, 0.5, 0.1; (slow, low, high) 0.4, 0.5, 0.1;
...@@ -75,7 +66,7 @@ probability (user_action | reactivity, memory, attention) { ...@@ -75,7 +66,7 @@ probability (user_action | reactivity, memory, attention) {
(slow, high, low) 0.3, 0.4, 0.3; (slow, high, low) 0.3, 0.4, 0.3;
(slow, high, medium) 0.6, 0.3, 0.1; (slow, high, medium) 0.6, 0.3, 0.1;
(slow, high, high) 0.7, 0.2, 0.1; (slow, high, high) 0.7, 0.2, 0.1;
%%%
(medium, low, low) 0.3, 0.4, 0.3; (medium, low, low) 0.3, 0.4, 0.3;
(medium, low, medium) 0.3, 0.5, 0.2; (medium, low, medium) 0.3, 0.5, 0.2;
(medium, low, high) 0.4, 0.3, 0.3; (medium, low, high) 0.4, 0.3, 0.3;
...@@ -85,7 +76,7 @@ probability (user_action | reactivity, memory, attention) { ...@@ -85,7 +76,7 @@ probability (user_action | reactivity, memory, attention) {
(medium, high, low) 0.34, 0.33, 0.33; (medium, high, low) 0.34, 0.33, 0.33;
(medium, high, medium) 0.7, 0.2, 0.1; (medium, high, medium) 0.7, 0.2, 0.1;
(medium, high, high) 0.75, 0.25, 0.0; (medium, high, high) 0.75, 0.25, 0.0;
%%%
(fast, low, low) 0.5, 0.2, 0.3; (fast, low, low) 0.5, 0.2, 0.3;
(fast, low, medium) 0.6, 0.2, 0.2; (fast, low, medium) 0.6, 0.2, 0.2;
(fast, low, high) 0.7, 0.3, 0.0; (fast, low, high) 0.7, 0.3, 0.0;
...@@ -95,37 +86,32 @@ probability (user_action | reactivity, memory, attention) { ...@@ -95,37 +86,32 @@ probability (user_action | reactivity, memory, attention) {
(fast, high, low) 0.5, 0.2, 0.3; (fast, high, low) 0.5, 0.2, 0.3;
(fast, high, medium) 0.6, 0.2, 0.2; (fast, high, medium) 0.6, 0.2, 0.2;
(fast, high, high) 0.9, 0.1, 0.0; (fast, high, high) 0.9, 0.1, 0.0;
} }
#CPDS 5
#cpds 5
probability (robot_feedback | user_action) { probability (robot_feedback | user_action) {
(correct) 0.8, 0.2; (correct) 0.8, 0.2;
(wrong) 0.5, 0.5; (wrong) 0.5, 0.5;
(timeout) 0.2, 0.8; (timeout) 0.2, 0.8;
} }
#CPDS 6
#cpds 6
probability (robot_assistance | user_action) { probability (robot_assistance | user_action) {
(correct) 0.05 0.1 0.15 0.3 0.4; (correct) 0.05 0.1 0.15 0.3 0.4;
(wrong) 0.1 0.2 0.4 0.2 0.1; (wrong) 0.1 0.2 0.4 0.2 0.1;
(timeout) 0.2 0.4 0.2 0.1 0.1; (timeout) 0.2 0.4 0.2 0.1 0.1;
} }
#CPDS 2
#cpds 2
probability (game_state | user_action) { probability (game_state | user_action) {
(correct) 0.2, 0.4, 0.4; (correct) 0.2, 0.4, 0.4;
(wrong) 0.4, 0.4, 0.2; (wrong) 0.4, 0.4, 0.2;
(timeout) 0.6, 0.3, 0.1; (timeout) 0.6, 0.3, 0.1;
} }
#CPDS 0
#cpds 0
probability (attempt | user_action) { probability (attempt | user_action) {
(correct) 0.1, 0.2, 0.3, 0.4; (correct) 0.1, 0.2, 0.3, 0.4;
(wrong) 0.5, 0.3, 0.15, 0.05; (wrong) 0.5, 0.3, 0.15, 0.05;
(timeout) 0.4, 0.3, 0.2, 0.1; (timeout) 0.4, 0.3, 0.2, 0.1;
} }
#CPDS 5
probability (robot_assistance | robot_feedback) { probability (robot_assistance | robot_feedback) {
(yes) 0.5 0.3 0.1 0.1 0.0; (yes) 0.5 0.3 0.1 0.1 0.0;
(no) 0.0 0.1 0.1 0.3 0.5; (no) 0.0 0.1 0.1 0.3 0.5;
......
import bnlearn import bnlearn
import numpy as np import numpy as np
import enum
import random import random
import matplotlib.pyplot as plt #import classes and modules
from bn_variables import Memory, Attention, Reactivity, Robot_Assistance, Robot_Feedback, Robot_Assistance_Feedback, User_Action, User_Capability, Game_State, Attempt
#define constants import bn_functions
class User_Action(enum.Enum): import utils
correct = 0
wrong = 1
timeout = 2
name = "user_action"
counter = 3
class Reactivity(enum.Enum):
slow = 0
medium = 1
fast = 2
name = "reactivity"
counter = 3
class Memory(enum.Enum):
low = 0
medium = 1
high = 2
name = "memory"
counter = 3
class Robot_Assistance(enum.Enum):
lev_0 = 0
lev_1 = 1
lev_2 = 2
lev_3 = 3
lev_4 = 4
name = "robot_assistance"
counter = 5
class Robot_Feedback(enum.Enum):
yes = 0
no = 1
name = "robot_feedback"
counter = 2
class Game_State(enum.Enum):
beg = 0
middle = 1
end = 2
name = "game_state"
counter = 3
class Attempt(enum.Enum):
at_1 = 0
at_2 = 1
at_3 = 2
at_4 = 3
name = "attempt"
counter = 4
def plot2D(save_path, n_episodes, *y):
# The position of the bars on the x-axis
barWidth = 0.35
r = np.arange(n_episodes) # the x locations for the groups
# Get values from the group and categories
x = [i for i in range(n_episodes)]
correct = y[0][0]
wrong = y[0][1]
timeout = y[0][2]
# plot bars
plt.figure(figsize=(10, 7))
plt.bar(r, correct, edgecolor='white', width=barWidth, label="correct")
plt.bar(r, wrong, bottom=np.array(correct), edgecolor='white', width=barWidth, label='wrong')
plt.bar(r, timeout, bottom=np.array(correct) + np.array(wrong), edgecolor='white',
width=barWidth, label='timeout')
plt.legend()
# Custom X axis
plt.xticks(r, x, fontweight='bold')
plt.ylabel("performance")
plt.savefig(save_path)
plt.show()
def compute_prob(cpds_table):
'''
Given the counters generate the probability distributions
Args:
cpds_table: with counters
Return:
the probs for the cpds table
'''
for val in range(len(cpds_table)):
cpds_table[val] = list(map(lambda x: x / (sum(cpds_table[val])+0.00001), cpds_table[val]))
return cpds_table
def average_prob(ref_cpds_table, current_cpds_table):
'''
Args:
ref_cpds_table: table from bnlearn
current_cpds_table: table from interaction
Return:
avg from both tables
'''
res_cpds_table = ref_cpds_table.copy()
for elem1 in range(len(ref_cpds_table)):
for elem2 in range(len(ref_cpds_table[0])):
res_cpds_table[elem1][elem2] = (ref_cpds_table[elem1][elem2]+current_cpds_table[elem1][elem2])/2
return res_cpds_table
def compute_next_state(user_action, task_evolution, attempt_counter, correct_move_counter, def compute_next_state(user_action, task_evolution, attempt_counter, correct_move_counter,
wrong_move_counter, timeout_counter wrong_move_counter, timeout_counter, max_attept_counter
): ):
''' '''
The function computes given the current state and action of the user, the next state
Args: Args:
user_action: 0,1,2 user_action: 0,1,2
task_evolution: beg, mid, end task_evolution: beg, mid, end
...@@ -111,8 +18,14 @@ def compute_next_state(user_action, task_evolution, attempt_counter, correct_mov ...@@ -111,8 +18,14 @@ def compute_next_state(user_action, task_evolution, attempt_counter, correct_mov
attempt_counter: attempt_counter:
wrong_move_counter: wrong_move_counter:
timeout_counter: timeout_counter:
max_attempt_counter:
Return: Return:
the counters updated according to the user_action task_evolution
attempt_counter
correct_move_counter
wrong_move_counter
timeout_counter
max_attempt_counter
''' '''
if user_action == 0: if user_action == 0:
attempt_counter = 0 attempt_counter = 0
...@@ -130,123 +43,30 @@ def compute_next_state(user_action, task_evolution, attempt_counter, correct_mov ...@@ -130,123 +43,30 @@ def compute_next_state(user_action, task_evolution, attempt_counter, correct_mov
else: else:
attempt_counter = 0 attempt_counter = 0
task_evolution += 1 task_evolution += 1
correct_move_counter += 1 #correct_move_counter += 1
max_attept_counter += 1
return task_evolution, attempt_counter, correct_move_counter, wrong_move_counter, timeout_counter return task_evolution, attempt_counter, correct_move_counter, wrong_move_counter, timeout_counter, max_attept_counter
def update_cpds_tables(game_state_counter, attempt_counter,
robot_assistance, robot_feedback,
persona_bn_model
):
'''
Args:
game_state_counter: from 0 to 2 beg, mid, end
attempt_counter: from 1 to 4
robot_assistance: from 0 to 4
robot_feedback: 0 or 1 depending if a feedback has been provided
persona_bn_model: the cpds tables of the model to update
Return:
the cpds tables updated with the new counters
'''
# transform counters into probabilities
prob_over_attempt_per_action = compute_prob(attempt_counter)
prob_over_game_per_action = compute_prob(game_state_counter)
prob_over_feedback_per_action = compute_prob(robot_feedback)
prob_over_assistance_per_feedback = compute_prob(robot_assistance)
# average the probabilities obtained with the cpdf tables
updated_prob_over_attempt_per_action = average_prob(
np.transpose(persona_bn_model['cpds']['model'].cpds[0].values),
prob_over_attempt_per_action)
updated_prob_over_game_per_action = average_prob(np.transpose(persona_bn_model['cpds']['model'].cpds[2].values),
prob_over_game_per_action)
updated_prob_over_feedback_per_action = average_prob(
np.transpose(persona_bn_model['cpds']['model'].cpds[6].values),
prob_over_feedback_per_action)
updated_prob_over_assistance_per_feedback = average_prob(
np.transpose(persona_bn_model['cpds']['model'].cpds[5].values),
prob_over_assistance_per_feedback)
# dirty solution, hardcoded based on the .bif look at it to know the corresponding cpds
persona_bn_model['cpds']['model'].cpds[0].values = np.transpose(updated_prob_over_attempt_per_action)
persona_bn_model['cpds']['model'].cpds[2].values = np.transpose(updated_prob_over_game_per_action)
persona_bn_model['cpds']['model'].cpds[6].values = np.transpose(updated_prob_over_feedback_per_action)
persona_bn_model['cpds']['model'].cpds[5].values = np.transpose(updated_prob_over_assistance_per_feedback)
return persona_bn_model
def get_user_actions_prob_from_state(user_initial_cpds, user_memory, user_attention, user_reactivity,
game_state_counter, attempt_counter,
robot_assistance_action, robot_feedback_action
):
'''
Args:
user_initial_cpds: cpds for the given user
user_memory: from 1 to 3
user_attention: from 1 to 3
user_reactivity: from 1 to 3
:param game_state_counter: beg, mid, end
:param attempt_counter: from 1 to 4
:param robot_assistance_action: between 0 and 4
:param robot_feedback_action: between 0 and 1
Return:
the probability of the user to perform: i) correct_move, ii) wrong_move, iii) timeout
'''
query = bnlearn.inference.fit(user_initial_cpds, variables=['user_action'], def simulation(user_bn_model, user_var_target, user_memory_name, user_memory_value, user_attention_name, user_attention_value,
evidence={'robot_assistance': robot_assistance_action, user_reactivity_name, user_reactivity_value,
'attempt': attempt_counter, task_progress_name, game_attempt_name, robot_assistance_name, robot_feedback_name,
'game_state': game_state_counter, robot_bn_model, robot_var_target,
'robot_feedback': robot_feedback_action, other_user_bn_model, other_user_var_target, other_user_memory_name, other_user_memory_value,
'memory': user_memory, other_user_attention_name, other_user_attention_value,
'attention': user_attention, other_user_reactivity_name, other_user_reactivity_value,
'reactivity': user_reactivity epochs=50, task_complexity=5):
})
user_actions_prob_from_state = query.values
return user_actions_prob_from_state
def get_user_action(actions_prob):
''' '''
Select one of the actions according to the actions_prob
Args: Args:
actions_prob: the probability of the Persona based on the BN to make a correct move, wrong move, timeout
Return:
the id of the selected action
N.B:
'''
action_id = None
correct_action_from_BN = actions_prob[0]
wrong_action_from_BN = actions_prob[1]
timeout_action_from_BN = actions_prob[2]
rnd_val = random.uniform(0,1)
#if user_prob is lower than the correct action prob then is the correct one
if rnd_val<=correct_action_from_BN:
action_id = 0
#if rnd is larger than the correct action prob and lower than wrong
# action prob then is the wrong one
elif rnd_val>correct_action_from_BN \
and rnd_val<(correct_action_from_BN+wrong_action_from_BN):
action_id = 1
#timeout
else:
action_id = 2
return action_id
def simulation(robot_assistance_vect, robot_feedback_vect, user_bn_model, persona_bn_model, epochs=50, task_complexity=5):
'''
This function computes the entire simulation for #epochs
Args:
robot_assistance_vect: the robot's levels of assistance (might be included directly in this function)
robot_feedback_vect: the robot's feedback (might be included directly in this function)
user_bn_model: it is a model created from BN that given the state of the game and initial cpds returns probabilities of the user's actions
persona_bn_model: it is a model created from BN that given the state of the game and initial cpds returns probabilities of the user's actions
epochs: the number of simulations
task_complexity: the number of tokens to sort
Return: Return:
n_correct_per_episode:
n_wrong_per_episode:
n_timeout_per_episode:
''' '''
#TODO: remove robot_assistance_vect and robot_feedback_vect
#metrics we need, in order to compute afterwords the belief #metrics we need, in order to compute afterwords the belief
''' '''
...@@ -267,10 +87,26 @@ def simulation(robot_assistance_vect, robot_feedback_vect, user_bn_model, person ...@@ -267,10 +87,26 @@ def simulation(robot_assistance_vect, robot_feedback_vect, user_bn_model, person
robot_assistance_per_feedback = [[0 for i in range(Robot_Assistance.counter.value)] for j in range(Robot_Feedback.counter.value)] robot_assistance_per_feedback = [[0 for i in range(Robot_Assistance.counter.value)] for j in range(Robot_Feedback.counter.value)]
#these are the variables of the persona bn that are dynamic and will be affected from the game evolution
#TODO: it might be worth to integrate them as a param in the simulation function, only the name?
#output variables: #output variables:
n_correct_per_episode = [0]*epochs n_correct_per_episode = [0]*epochs
n_wrong_per_episode = [0]*epochs n_wrong_per_episode = [0]*epochs
n_timeout_per_episode = [0]*epochs n_timeout_per_episode = [0]*epochs
n_max_attempt_per_episode = [0]*epochs
game_performance_episode = [0]*epochs
n_lev_0_no_feed_per_episode = [0]*epochs
n_lev_1_no_feed_per_episode = [0]*epochs
n_lev_2_no_feed_per_episode = [0]*epochs
n_lev_3_no_feed_per_episode = [0]*epochs
n_lev_4_no_feed_per_episode = [0]*epochs
n_lev_0_with_feed_per_episode = [0]*epochs
n_lev_1_with_feed_per_episode = [0]*epochs
n_lev_2_with_feed_per_episode = [0]*epochs
n_lev_3_with_feed_per_episode = [0]*epochs
n_lev_4_with_feed_per_episode = [0]*epochs
robot_assistance_per_episode = [0]*epochs
for e in range(epochs): for e in range(epochs):
'''Simulation framework''' '''Simulation framework'''
...@@ -281,36 +117,94 @@ def simulation(robot_assistance_vect, robot_feedback_vect, user_bn_model, person ...@@ -281,36 +117,94 @@ def simulation(robot_assistance_vect, robot_feedback_vect, user_bn_model, person
correct_move_counter = 0 correct_move_counter = 0
wrong_move_counter = 0 wrong_move_counter = 0
timeout_counter = 0 timeout_counter = 0
max_attempt_counter = 0
robot_assistance_action = 0
robot_feedback_action = 0
dynamic_variables = {'attempt': attempt_counter_per_action,
'game_state': game_state_counter_per_action,
'robot_assistance': robot_assistance_per_feedback,
'robot_feedback': robot_feedback_per_action}
while(task_evolution<task_complexity): while(task_evolution<task_complexity):
#if then else are necessary to classify the task game state into beg, mid, end #if then else are necessary to classify the task game state into beg, mid, end
if task_evolution>=0 and task_evolution<=2: if task_evolution>=0 and task_evolution<=1:
game_state_counter = 0 game_state_counter = 0
elif task_evolution>=3 and task_evolution<=4: elif task_evolution>=2 and task_evolution<=3:
game_state_counter = 1 game_state_counter = 1
else: else:
game_state_counter = 2 game_state_counter = 2
#select robot assistance (replace it with RL or IRL algorithm)
robot_assistance_action = 2#random.randint(min(robot_assistance_vect), max(robot_assistance_vect)) robot_vars_evidence = { user_reactivity_name: user_reactivity_value,
#select robot feedback (replace it with RL or IRL algorithm) user_memory_name: user_memory_value,
robot_feedback_action = random.randint(min(robot_feedback_vect), max(robot_feedback_vect)) task_progress_name: game_state_counter,
game_attempt_name: attempt_counter,
}
robot_actions_prob = bn_functions.infer_prob_from_state(robot_bn_model,
infer_variable=robot_var_target,
evidence_variables=robot_vars_evidence)
robot_action = bn_functions.get_stochastic_action(robot_actions_prob.values)
n_robot_assistance_feedback = Robot_Assistance_Feedback.counter.value
if robot_action>=n_robot_assistance_feedback/2:
robot_feedback_action = 1
robot_assistance_action = n_robot_assistance_feedback-robot_action-1
if robot_assistance_action == 0:
n_lev_0_no_feed_per_episode[e] += 1
elif robot_assistance_action == 1:
n_lev_1_no_feed_per_episode[e] += 1
elif robot_assistance_action == 2:
n_lev_2_no_feed_per_episode[e] += 1
elif robot_assistance_action == 3:
n_lev_3_no_feed_per_episode[e] += 1
else:
n_lev_4_no_feed_per_episode[e] += 1
else:
robot_feedback_action = 0
robot_assistance_action = robot_action
if robot_assistance_action == 0:
n_lev_0_with_feed_per_episode[e] += 1
elif robot_assistance_action == 1:
n_lev_1_with_feed_per_episode[e] += 1
elif robot_assistance_action == 2:
n_lev_2_with_feed_per_episode[e] += 1
elif robot_assistance_action == 3:
n_lev_3_with_feed_per_episode[e] += 1
else:
n_lev_4_with_feed_per_episode[e] += 1
print("robot_assistance {}, attempt {}, game {}, robot_feedback {}".format(robot_assistance_action, attempt_counter, game_state_counter, robot_feedback_action)) print("robot_assistance {}, attempt {}, game {}, robot_feedback {}".format(robot_assistance_action, attempt_counter, game_state_counter, robot_feedback_action))
#compare the real user with the estimated Persona and returns a user action (0, 1, 2) #compare the real user with the estimated Persona and returns a user action (0, 1, 2)
if real_user_model!=None: if other_user_bn_model!=None:
#return the user action in this state based on the user profile #return the user action in this state based on the user profile
user_actions_prob = get_user_actions_prob_from_state(user_bn_model['cpds'],user_bn_model['memory'], other_user_vars_evidence = {other_user_attention_name:other_user_attention_value,
user_bn_model['attention'], user_bn_model['reactivity'], other_user_reactivity_name:other_user_reactivity_value,
game_state_counter, attempt_counter, robot_assistance_action, other_user_memory_name:other_user_memory_value,
robot_feedback_action) task_progress_name:game_state_counter,
game_attempt_name:attempt_counter,
robot_assistance_name:robot_assistance_action,
robot_feedback_name:robot_feedback_action
}
user_actions_prob = bn_functions.infer_prob_from_state(other_user_bn_model,
infer_variable=other_user_var_target,
evidence_variables=other_user_vars_evidence)
else: else:
#return the user action in this state based on the Persona profile #return the user action in this state based on the Persona profile
user_actions_prob = get_user_actions_prob_from_state(persona_bn_model['cpds'],persona_bn_model['memory'],
persona_bn_model['attention'], persona_bn_model['reactivity'],
game_state_counter, attempt_counter, robot_assistance_action,
robot_feedback_action)
user_action = get_user_action(user_actions_prob) user_vars_evidence = {other_user_attention_name: user_attention_value,
user_reactivity_name: user_reactivity_value,
user_memory_name: user_memory_value,
task_progress_name: game_state_counter,
game_attempt_name: attempt_counter,
robot_assistance_name: robot_assistance_action,
robot_feedback_name: robot_feedback_action
}
user_actions_prob = bn_functions.infer_prob_from_state(user_bn_model,
infer_variable=user_var_target,
evidence_variables=user_vars_evidence)
user_action = bn_functions.get_stochastic_action(user_actions_prob.values)
#updates counters for plots #updates counters for plots
robot_assistance_per_feedback[robot_feedback_action][robot_assistance_action] += 1 robot_assistance_per_feedback[robot_feedback_action][robot_assistance_action] += 1
attempt_counter_per_action[user_action][attempt_counter] += 1 attempt_counter_per_action[user_action][attempt_counter] += 1
...@@ -319,11 +213,11 @@ def simulation(robot_assistance_vect, robot_feedback_vect, user_bn_model, person ...@@ -319,11 +213,11 @@ def simulation(robot_assistance_vect, robot_feedback_vect, user_bn_model, person
#updates counters for simulation #updates counters for simulation
iter_counter += 1 iter_counter += 1
task_evolution, attempt_counter, \ task_evolution, attempt_counter, correct_move_counter, \
correct_move_counter, wrong_move_counter, timeout_counter = compute_next_state(user_action, wrong_move_counter, timeout_counter, max_attempt_counter = compute_next_state(user_action,
task_evolution, attempt_counter, task_evolution, attempt_counter,
correct_move_counter, wrong_move_counter, correct_move_counter, wrong_move_counter,
timeout_counter) timeout_counter, max_attempt_counter)
print("task_evolution {}, attempt_counter {}, timeout_counter {}".format(task_evolution, iter_counter, timeout_counter)) print("task_evolution {}, attempt_counter {}, timeout_counter {}".format(task_evolution, iter_counter, timeout_counter))
print("robot_assistance_per_feedback {}".format(robot_assistance_per_feedback)) print("robot_assistance_per_feedback {}".format(robot_assistance_per_feedback))
...@@ -333,45 +227,84 @@ def simulation(robot_assistance_vect, robot_feedback_vect, user_bn_model, person ...@@ -333,45 +227,84 @@ def simulation(robot_assistance_vect, robot_feedback_vect, user_bn_model, person
print("iter {}, correct {}, wrong {}, timeout {}".format(iter_counter, correct_move_counter, wrong_move_counter, timeout_counter)) print("iter {}, correct {}, wrong {}, timeout {}".format(iter_counter, correct_move_counter, wrong_move_counter, timeout_counter))
print("correct_move {}, wrong_move {}, timeout {}".format(correct_move_counter, wrong_move_counter, timeout_counter)) print("correct_move {}, wrong_move {}, timeout {}".format(correct_move_counter, wrong_move_counter, timeout_counter))
persona_bn_model = update_cpds_tables(game_state_counter_per_action, attempt_counter_per_action,
robot_assistance_per_feedback, robot_feedback_per_action, persona_bn_model)
user_bn_model = bn_functions.update_cpds_tables(user_bn_model, dynamic_variables)
#reset counter??
robot_assistance_per_feedback = [[0 for i in range(Robot_Assistance.counter.value)] for j in
range(Robot_Feedback.counter.value)]
attempt_counter_per_action = [[0 for i in range(Attempt.counter.value)] for j in
range(User_Action.counter.value)]
game_state_counter_per_action = [[0 for i in range(Game_State.counter.value)] for j in
range(User_Action.counter.value)]
robot_feedback_per_action = [[0 for i in range(Robot_Feedback.counter.value)] for j in
range(User_Action.counter.value)]
#for plots
n_correct_per_episode[e] = correct_move_counter n_correct_per_episode[e] = correct_move_counter
n_wrong_per_episode[e] = wrong_move_counter n_wrong_per_episode[e] = wrong_move_counter
n_timeout_per_episode[e] = timeout_counter n_timeout_per_episode[e] = timeout_counter
n_max_attempt_per_episode[e] = max_attempt_counter
game_performance_episode[e] = [n_correct_per_episode[e],
n_wrong_per_episode[e],
n_timeout_per_episode[e],
n_max_attempt_per_episode[e]]
robot_assistance_per_episode[e] = [n_lev_0_no_feed_per_episode[e],
n_lev_1_no_feed_per_episode[e], n_lev_2_no_feed_per_episode[e],
n_lev_3_no_feed_per_episode[e], n_lev_4_no_feed_per_episode[e],
n_lev_0_with_feed_per_episode[e], n_lev_1_with_feed_per_episode[e],
n_lev_2_with_feed_per_episode[e], n_lev_3_with_feed_per_episode[e],
n_lev_4_with_feed_per_episode[e]
]
return game_performance_episode, robot_assistance_per_episode
return n_correct_per_episode, n_wrong_per_episode, n_timeout_per_episode
#############################################################################
#############################################################################
####################### RUN THE SIMULATION ##################################
#############################################################################
#############################################################################
#SIMULATION PARAMS #SIMULATION PARAMS
robot_assistance = [i for i in range(Robot_Assistance.counter.value)] robot_assistance = [i for i in range(Robot_Assistance.counter.value)]
robot_feedback = [i for i in range(Robot_Feedback.counter.value)] robot_feedback = [i for i in range(Robot_Feedback.counter.value)]
epochs = 40 epochs = 40
#initialise the robot
robot_cpds = bnlearn.import_DAG('bn_robot_model/robot_model.bif')
#initialise memory, attention and reactivity varibles #initialise memory, attention and reactivity varibles
persona_memory = 0; persona_attention = 0; persona_reactivity = 1; persona_memory = 0; persona_attention = 0; persona_reactivity = 1;
persona_cpds = bnlearn.import_DAG('persona_model.bif') persona_cpds = bnlearn.import_DAG('bn_persona_model/persona_model.bif')
persona_user_model = {'cpds':persona_cpds, 'memory':persona_memory, 'attention':persona_attention, 'reactivity':persona_reactivity}
#initialise memory, attention and reactivity varibles #initialise memory, attention and reactivity varibles
real_user_memory = 2; real_user_attention = 2; real_user_reactivity = 2; real_user_memory = 2; real_user_attention = 2; real_user_reactivity = 2;
real_user_cpds = bnlearn.import_DAG('user_model.bif') real_user_cpds = None#bnlearn.import_DAG('bn_other_user_model/user_model.bif')
real_user_model = {'cpds':real_user_cpds, 'memory':real_user_memory, 'attention':real_user_attention, 'reactivity':real_user_reactivity}
game_performance_per_episode, robot_assistance_per_episode = simulation(user_bn_model=persona_cpds, user_var_target=['user_action'], user_memory_name="memory", user_memory_value=persona_memory,
print("user_action -> attempt ", persona_user_model['cpds']['model'].cpds[0].values) user_attention_name="attention", user_attention_value=persona_attention,
print("user_action -> game_state ",persona_user_model['cpds']['model'].cpds[2].values) user_reactivity_name="reactivity", user_reactivity_value=persona_reactivity,
print("robot_feedback -> robot_assistance ", persona_user_model['cpds']['model'].cpds[5].values) task_progress_name="game_state", game_attempt_name="attempt",
print("user_action -> reactivity, memory ", persona_user_model['cpds']['model'].cpds[6].values) robot_assistance_name="robot_assistance", robot_feedback_name="robot_feedback",
robot_bn_model=robot_cpds, robot_var_target=["robot_assistance_feedback"],
results = simulation(robot_assistance, robot_feedback, real_user_model, persona_user_model, epochs=epochs, task_complexity=5) other_user_bn_model=real_user_cpds, other_user_var_target=['user_action'],
if real_user_model != None: other_user_memory_name="memory", other_user_memory_value=real_user_memory,
plot_path = "epoch_"+str(epochs)+"_real_user_memory_"+str(real_user_memory)+"_real_user_attention_"+str(real_user_attention)+"_real_user_reactivity_"+str(real_user_reactivity)+".jpg" other_user_attention_name="attention", other_user_attention_value=real_user_attention,
other_user_reactivity_name="reactivity", other_user_reactivity_value=real_user_reactivity,
epochs=epochs, task_complexity=5)
if real_user_cpds != None:
plot_game_performance_path = "game_performance_"+"_epoch_"+str(epochs)+"_real_user_memory_"+str(real_user_memory)+"_real_user_attention_"+str(real_user_attention)+"_real_user_reactivity_"+str(real_user_reactivity)+".jpg"
plot_robot_assistance_path = "robot_assistance_"+"epoch_"+str(epochs)+"_real_user_memory_"+str(real_user_memory)+"_real_user_attention_"+str(real_user_attention)+"_real_user_reactivity_"+str(real_user_reactivity)+".jpg"
else: else:
plot_path = "epoch_" + str(epochs) + "_persona_memory_" + str(persona_memory) + "_persona_attention_" + str(persona_attention) + "_persona_reactivity_" + str(persona_reactivity) + ".jpg" plot_game_performance_path = "game_performance_"+"epoch_" + str(epochs) + "_persona_memory_" + str(persona_memory) + "_persona_attention_" + str(persona_attention) + "_persona_reactivity_" + str(persona_reactivity) + ".jpg"
plot_robot_assistance_path = "robot_assistance_"+"epoch_"+str(epochs)+"_persona_memory_"+str(persona_memory)+"_persona_attention_"+str(persona_attention)+"_persona_reactivity_"+str(persona_reactivity)+".jpg"
plot2D(plot_path, epochs, results)
utils.plot2D_game_performance(plot_game_performance_path, epochs, game_performance_per_episode)
utils.plot2D_assistance("test.jpg", epochs, robot_assistance_per_episode)
#TODO #TODO
''' '''
- define a function that takes the state as input and return the user action and its reaction time - define a function that takes the state as input and return the user action and its reaction time
- plot robot's levels of assistance during the session
- evalute if the persona is wrong how long does it take for the simulator to detect that - evalute if the persona is wrong how long does it take for the simulator to detect that
- check percentages - check percentages
''' '''
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment