diff --git a/persona_model.bif b/bn_persona/persona_model.bif similarity index 98% rename from persona_model.bif rename to bn_persona/persona_model.bif index 1b5193a223346ab1d0048859b05fb5d1a50c6ee7..ef8062593b35916b8470d1079aefc607ab6c6d08 100644 --- a/persona_model.bif +++ b/bn_persona/persona_model.bif @@ -1,4 +1,4 @@ -network persona_model_4 { +network persona_model { } %VARIABLES DEFINITION diff --git a/bn_persona/utilities.py b/bn_persona/utilities.py new file mode 100644 index 0000000000000000000000000000000000000000..adaf78b9191df6e5a2a3a44cd5f2ccbdfe6d6d71 --- /dev/null +++ b/bn_persona/utilities.py @@ -0,0 +1,70 @@ +import random +import bn_functions + +def compute_next_state(user_action, task_evolution, attempt_counter, correct_move_counter, + wrong_move_counter, timeout_counter + ): + ''' + The function computes given the current state and action of the user, the next state + Args: + user_action: 0,1,2 + task_evolution: beg, mid, end + correct_move_counter: + attempt_counter: + wrong_move_counter: + timeout_counter: + Return: + the counters updated according to the user_action + ''' + if user_action == 0: + attempt_counter = 0 + task_evolution += 1 + correct_move_counter += 1 + # if the user made a wrong move and still did not reach the maximum number of attempts + elif user_action == 1 and attempt_counter < 3: + attempt_counter += 1 + wrong_move_counter += 1 + # if the user did not move any token and still did not reach the maximum number of attempts + elif user_action == 2 and attempt_counter < 3: + attempt_counter += 1 + timeout_counter += 1 + # the robot or therapist makes the correct move on the patient's behalf + else: + attempt_counter = 0 + task_evolution += 1 + correct_move_counter += 1 + + return task_evolution, attempt_counter, correct_move_counter, wrong_move_counter, timeout_counter + +def get_user_action_prob(): + + + + +def get_stochatic_action(actions_prob): + ''' + Select one of the actions according to the actions_prob + Args: + actions_prob: the probability of the Persona based on the BN to make a correct move, wrong move, timeout + Return: + the id of the selected action + N.B: + ''' + action_id = None + correct_action_from_BN = actions_prob[0] + wrong_action_from_BN = actions_prob[1] + timeout_action_from_BN = actions_prob[2] + + rnd_val = random.uniform(0,1) + #if user_prob is lower than the correct action prob then is the correct one + if rnd_val<=correct_action_from_BN: + action_id = 0 + #if rnd is larger than the correct action prob and lower than wrong + # action prob then is the wrong one + elif rnd_val>correct_action_from_BN \ + and rnd_val<(correct_action_from_BN+wrong_action_from_BN): + action_id = 1 + #timeout + else: + action_id = 2 + return action_id diff --git a/persona_model_3.bif b/examples/persona_model_3.bif similarity index 94% rename from persona_model_3.bif rename to examples/persona_model_3.bif index 06c85dfec93ce7152cb3a17c51112d3682aecab0..c958ca1aa9fce669336419a4e60a5b4c5566ef0b 100644 --- a/persona_model_3.bif +++ b/examples/persona_model_3.bif @@ -1,7 +1,7 @@ network persona_model_3 { } -%definition of the variables +%VARIABLES DEFINITION variable reactivity { type discrete [3] {slow, medium, fast}; } @@ -11,29 +11,23 @@ variable memory { variable attention { type discrete[3] {low, medium, high}; } - variable robot_assistance { type discrete [ 5 ] { lev_0, lev_1, lev_2, lev_3, lev_4 }; } - variable attempt { type discrete [ 4 ] { att_1, att_2, att_3, att_4 }; } - variable game_state { type discrete [ 3 ] { beg, mid, end }; } - variable robot_feedback { type discrete [ 2 ] { yes, no }; } - variable user_action { type discrete [ 3 ] { correct, wrong, timeout }; } -%definition of individual probabilities - +%INDIVIDUAL PROBABILITIES DEFINITION probability ( robot_assistance ) { table 0.2, 0.2, 0.2, 0.2, 0.2; } @@ -46,26 +40,23 @@ probability ( attempt ) { probability ( user_action ) { table 0.33, 0.33, 0.34; } -#cpds 4 +#CPDS 4 probability ( reactivity ) { table 0.33, 0.33, 0.34; } -#cpds 3 +#CPDS 3 probability ( memory ) { table 0.33, 0.33, 0.34; } -#cpds 1 +#CPDS 1 probability ( attention ) { table 0.33, 0.33, 0.34; } probability ( robot_feedback ) { table 0.5, 0.5; } - - -#cpds 7 +#CPDS 7 probability (user_action | reactivity, memory, attention) { - (slow, low, low) 0.1, 0.4, 0.5; (slow, low, medium) 0.3, 0.5, 0.2; (slow, low, high) 0.4, 0.5, 0.1; @@ -75,7 +66,7 @@ probability (user_action | reactivity, memory, attention) { (slow, high, low) 0.3, 0.4, 0.3; (slow, high, medium) 0.6, 0.3, 0.1; (slow, high, high) 0.7, 0.2, 0.1; - +%%% (medium, low, low) 0.3, 0.4, 0.3; (medium, low, medium) 0.3, 0.5, 0.2; (medium, low, high) 0.4, 0.3, 0.3; @@ -85,7 +76,7 @@ probability (user_action | reactivity, memory, attention) { (medium, high, low) 0.34, 0.33, 0.33; (medium, high, medium) 0.7, 0.2, 0.1; (medium, high, high) 0.75, 0.25, 0.0; - +%%% (fast, low, low) 0.5, 0.2, 0.3; (fast, low, medium) 0.6, 0.2, 0.2; (fast, low, high) 0.7, 0.3, 0.0; @@ -95,37 +86,32 @@ probability (user_action | reactivity, memory, attention) { (fast, high, low) 0.5, 0.2, 0.3; (fast, high, medium) 0.6, 0.2, 0.2; (fast, high, high) 0.9, 0.1, 0.0; - } - -#cpds 5 +#CPDS 5 probability (robot_feedback | user_action) { (correct) 0.8, 0.2; (wrong) 0.5, 0.5; (timeout) 0.2, 0.8; } - -#cpds 6 +#CPDS 6 probability (robot_assistance | user_action) { (correct) 0.05 0.1 0.15 0.3 0.4; (wrong) 0.1 0.2 0.4 0.2 0.1; (timeout) 0.2 0.4 0.2 0.1 0.1; } - -#cpds 2 +#CPDS 2 probability (game_state | user_action) { (correct) 0.2, 0.4, 0.4; (wrong) 0.4, 0.4, 0.2; (timeout) 0.6, 0.3, 0.1; } - -#cpds 0 +#CPDS 0 probability (attempt | user_action) { (correct) 0.1, 0.2, 0.3, 0.4; (wrong) 0.5, 0.3, 0.15, 0.05; (timeout) 0.4, 0.3, 0.2, 0.1; } - +#CPDS 5 probability (robot_assistance | robot_feedback) { (yes) 0.5 0.3 0.1 0.1 0.0; (no) 0.0 0.1 0.1 0.3 0.5; diff --git a/main.py b/main.py index 6250b8e90284fd9efa9312ad80598554f331d061..2ee35687df662b6dd043cb87bf02991b652c9b3b 100644 --- a/main.py +++ b/main.py @@ -1,109 +1,16 @@ import bnlearn import numpy as np -import enum import random -import matplotlib.pyplot as plt - -#define constants -class User_Action(enum.Enum): - correct = 0 - wrong = 1 - timeout = 2 - name = "user_action" - counter = 3 -class Reactivity(enum.Enum): - slow = 0 - medium = 1 - fast = 2 - name = "reactivity" - counter = 3 -class Memory(enum.Enum): - low = 0 - medium = 1 - high = 2 - name = "memory" - counter = 3 -class Robot_Assistance(enum.Enum): - lev_0 = 0 - lev_1 = 1 - lev_2 = 2 - lev_3 = 3 - lev_4 = 4 - name = "robot_assistance" - counter = 5 -class Robot_Feedback(enum.Enum): - yes = 0 - no = 1 - name = "robot_feedback" - counter = 2 -class Game_State(enum.Enum): - beg = 0 - middle = 1 - end = 2 - name = "game_state" - counter = 3 -class Attempt(enum.Enum): - at_1 = 0 - at_2 = 1 - at_3 = 2 - at_4 = 3 - name = "attempt" - counter = 4 - - - -def plot2D(save_path, n_episodes, *y): - # The position of the bars on the x-axis - barWidth = 0.35 - r = np.arange(n_episodes) # the x locations for the groups - # Get values from the group and categories - x = [i for i in range(n_episodes)] - correct = y[0][0] - wrong = y[0][1] - timeout = y[0][2] - # plot bars - plt.figure(figsize=(10, 7)) - plt.bar(r, correct, edgecolor='white', width=barWidth, label="correct") - plt.bar(r, wrong, bottom=np.array(correct), edgecolor='white', width=barWidth, label='wrong') - plt.bar(r, timeout, bottom=np.array(correct) + np.array(wrong), edgecolor='white', - width=barWidth, label='timeout') - plt.legend() - # Custom X axis - plt.xticks(r, x, fontweight='bold') - plt.ylabel("performance") - plt.savefig(save_path) - plt.show() - -def compute_prob(cpds_table): - ''' - Given the counters generate the probability distributions - Args: - cpds_table: with counters - Return: - the probs for the cpds table - ''' - for val in range(len(cpds_table)): - cpds_table[val] = list(map(lambda x: x / (sum(cpds_table[val])+0.00001), cpds_table[val])) - return cpds_table - -def average_prob(ref_cpds_table, current_cpds_table): - ''' - Args: - ref_cpds_table: table from bnlearn - current_cpds_table: table from interaction - Return: - avg from both tables - ''' - res_cpds_table = ref_cpds_table.copy() - for elem1 in range(len(ref_cpds_table)): - for elem2 in range(len(ref_cpds_table[0])): - res_cpds_table[elem1][elem2] = (ref_cpds_table[elem1][elem2]+current_cpds_table[elem1][elem2])/2 - return res_cpds_table +#import classes and modules +from bn_variables import Memory, Attention, Reactivity, Robot_Assistance, Robot_Feedback, Robot_Assistance_Feedback, User_Action, User_Capability, Game_State, Attempt +import bn_functions +import utils def compute_next_state(user_action, task_evolution, attempt_counter, correct_move_counter, - wrong_move_counter, timeout_counter + wrong_move_counter, timeout_counter, max_attept_counter ): ''' + The function computes given the current state and action of the user, the next state Args: user_action: 0,1,2 task_evolution: beg, mid, end @@ -111,8 +18,14 @@ def compute_next_state(user_action, task_evolution, attempt_counter, correct_mov attempt_counter: wrong_move_counter: timeout_counter: + max_attempt_counter: Return: - the counters updated according to the user_action + task_evolution + attempt_counter + correct_move_counter + wrong_move_counter + timeout_counter + max_attempt_counter ''' if user_action == 0: attempt_counter = 0 @@ -130,123 +43,30 @@ def compute_next_state(user_action, task_evolution, attempt_counter, correct_mov else: attempt_counter = 0 task_evolution += 1 - correct_move_counter += 1 + #correct_move_counter += 1 + max_attept_counter += 1 - return task_evolution, attempt_counter, correct_move_counter, wrong_move_counter, timeout_counter + return task_evolution, attempt_counter, correct_move_counter, wrong_move_counter, timeout_counter, max_attept_counter -def update_cpds_tables(game_state_counter, attempt_counter, - robot_assistance, robot_feedback, - persona_bn_model - ): - ''' - Args: - game_state_counter: from 0 to 2 beg, mid, end - attempt_counter: from 1 to 4 - robot_assistance: from 0 to 4 - robot_feedback: 0 or 1 depending if a feedback has been provided - persona_bn_model: the cpds tables of the model to update - Return: - the cpds tables updated with the new counters - ''' - # transform counters into probabilities - prob_over_attempt_per_action = compute_prob(attempt_counter) - prob_over_game_per_action = compute_prob(game_state_counter) - prob_over_feedback_per_action = compute_prob(robot_feedback) - prob_over_assistance_per_feedback = compute_prob(robot_assistance) - - # average the probabilities obtained with the cpdf tables - - updated_prob_over_attempt_per_action = average_prob( - np.transpose(persona_bn_model['cpds']['model'].cpds[0].values), - prob_over_attempt_per_action) - updated_prob_over_game_per_action = average_prob(np.transpose(persona_bn_model['cpds']['model'].cpds[2].values), - prob_over_game_per_action) - updated_prob_over_feedback_per_action = average_prob( - np.transpose(persona_bn_model['cpds']['model'].cpds[6].values), - prob_over_feedback_per_action) - updated_prob_over_assistance_per_feedback = average_prob( - np.transpose(persona_bn_model['cpds']['model'].cpds[5].values), - prob_over_assistance_per_feedback) - - # dirty solution, hardcoded based on the .bif look at it to know the corresponding cpds - persona_bn_model['cpds']['model'].cpds[0].values = np.transpose(updated_prob_over_attempt_per_action) - persona_bn_model['cpds']['model'].cpds[2].values = np.transpose(updated_prob_over_game_per_action) - persona_bn_model['cpds']['model'].cpds[6].values = np.transpose(updated_prob_over_feedback_per_action) - persona_bn_model['cpds']['model'].cpds[5].values = np.transpose(updated_prob_over_assistance_per_feedback) - - return persona_bn_model - -def get_user_actions_prob_from_state(user_initial_cpds, user_memory, user_attention, user_reactivity, - game_state_counter, attempt_counter, - robot_assistance_action, robot_feedback_action - ): - ''' - Args: - user_initial_cpds: cpds for the given user - user_memory: from 1 to 3 - user_attention: from 1 to 3 - user_reactivity: from 1 to 3 - :param game_state_counter: beg, mid, end - :param attempt_counter: from 1 to 4 - :param robot_assistance_action: between 0 and 4 - :param robot_feedback_action: between 0 and 1 - Return: - the probability of the user to perform: i) correct_move, ii) wrong_move, iii) timeout - ''' - query = bnlearn.inference.fit(user_initial_cpds, variables=['user_action'], - evidence={'robot_assistance': robot_assistance_action, - 'attempt': attempt_counter, - 'game_state': game_state_counter, - 'robot_feedback': robot_feedback_action, - 'memory': user_memory, - 'attention': user_attention, - 'reactivity': user_reactivity - }) - user_actions_prob_from_state = query.values - return user_actions_prob_from_state - -def get_user_action(actions_prob): +def simulation(user_bn_model, user_var_target, user_memory_name, user_memory_value, user_attention_name, user_attention_value, + user_reactivity_name, user_reactivity_value, + task_progress_name, game_attempt_name, robot_assistance_name, robot_feedback_name, + robot_bn_model, robot_var_target, + other_user_bn_model, other_user_var_target, other_user_memory_name, other_user_memory_value, + other_user_attention_name, other_user_attention_value, + other_user_reactivity_name, other_user_reactivity_value, + epochs=50, task_complexity=5): ''' - Select one of the actions according to the actions_prob Args: - actions_prob: the probability of the Persona based on the BN to make a correct move, wrong move, timeout - Return: - the id of the selected action - N.B: - ''' - action_id = None - correct_action_from_BN = actions_prob[0] - wrong_action_from_BN = actions_prob[1] - timeout_action_from_BN = actions_prob[2] - - rnd_val = random.uniform(0,1) - #if user_prob is lower than the correct action prob then is the correct one - if rnd_val<=correct_action_from_BN: - action_id = 0 - #if rnd is larger than the correct action prob and lower than wrong - # action prob then is the wrong one - elif rnd_val>correct_action_from_BN \ - and rnd_val<(correct_action_from_BN+wrong_action_from_BN): - action_id = 1 - #timeout - else: - action_id = 2 - return action_id - -def simulation(robot_assistance_vect, robot_feedback_vect, user_bn_model, persona_bn_model, epochs=50, task_complexity=5): - ''' - This function computes the entire simulation for #epochs - Args: - robot_assistance_vect: the robot's levels of assistance (might be included directly in this function) - robot_feedback_vect: the robot's feedback (might be included directly in this function) - user_bn_model: it is a model created from BN that given the state of the game and initial cpds returns probabilities of the user's actions - persona_bn_model: it is a model created from BN that given the state of the game and initial cpds returns probabilities of the user's actions - epochs: the number of simulations - task_complexity: the number of tokens to sort Return: + n_correct_per_episode: + n_wrong_per_episode: + n_timeout_per_episode: + ''' + #TODO: remove robot_assistance_vect and robot_feedback_vect #metrics we need, in order to compute afterwords the belief ''' @@ -267,10 +87,26 @@ def simulation(robot_assistance_vect, robot_feedback_vect, user_bn_model, person robot_assistance_per_feedback = [[0 for i in range(Robot_Assistance.counter.value)] for j in range(Robot_Feedback.counter.value)] + #these are the variables of the persona bn that are dynamic and will be affected from the game evolution + #TODO: it might be worth to integrate them as a param in the simulation function, only the name? + #output variables: n_correct_per_episode = [0]*epochs n_wrong_per_episode = [0]*epochs n_timeout_per_episode = [0]*epochs + n_max_attempt_per_episode = [0]*epochs + game_performance_episode = [0]*epochs + n_lev_0_no_feed_per_episode = [0]*epochs + n_lev_1_no_feed_per_episode = [0]*epochs + n_lev_2_no_feed_per_episode = [0]*epochs + n_lev_3_no_feed_per_episode = [0]*epochs + n_lev_4_no_feed_per_episode = [0]*epochs + n_lev_0_with_feed_per_episode = [0]*epochs + n_lev_1_with_feed_per_episode = [0]*epochs + n_lev_2_with_feed_per_episode = [0]*epochs + n_lev_3_with_feed_per_episode = [0]*epochs + n_lev_4_with_feed_per_episode = [0]*epochs + robot_assistance_per_episode = [0]*epochs for e in range(epochs): '''Simulation framework''' @@ -281,36 +117,94 @@ def simulation(robot_assistance_vect, robot_feedback_vect, user_bn_model, person correct_move_counter = 0 wrong_move_counter = 0 timeout_counter = 0 + max_attempt_counter = 0 + robot_assistance_action = 0 + robot_feedback_action = 0 + + dynamic_variables = {'attempt': attempt_counter_per_action, + 'game_state': game_state_counter_per_action, + 'robot_assistance': robot_assistance_per_feedback, + 'robot_feedback': robot_feedback_per_action} while(task_evolution<task_complexity): #if then else are necessary to classify the task game state into beg, mid, end - if task_evolution>=0 and task_evolution<=2: + if task_evolution>=0 and task_evolution<=1: game_state_counter = 0 - elif task_evolution>=3 and task_evolution<=4: + elif task_evolution>=2 and task_evolution<=3: game_state_counter = 1 else: game_state_counter = 2 - #select robot assistance (replace it with RL or IRL algorithm) - robot_assistance_action = 2#random.randint(min(robot_assistance_vect), max(robot_assistance_vect)) - #select robot feedback (replace it with RL or IRL algorithm) - robot_feedback_action = random.randint(min(robot_feedback_vect), max(robot_feedback_vect)) + + robot_vars_evidence = { user_reactivity_name: user_reactivity_value, + user_memory_name: user_memory_value, + task_progress_name: game_state_counter, + game_attempt_name: attempt_counter, + } + robot_actions_prob = bn_functions.infer_prob_from_state(robot_bn_model, + infer_variable=robot_var_target, + evidence_variables=robot_vars_evidence) + robot_action = bn_functions.get_stochastic_action(robot_actions_prob.values) + n_robot_assistance_feedback = Robot_Assistance_Feedback.counter.value + if robot_action>=n_robot_assistance_feedback/2: + robot_feedback_action = 1 + robot_assistance_action = n_robot_assistance_feedback-robot_action-1 + if robot_assistance_action == 0: + n_lev_0_no_feed_per_episode[e] += 1 + elif robot_assistance_action == 1: + n_lev_1_no_feed_per_episode[e] += 1 + elif robot_assistance_action == 2: + n_lev_2_no_feed_per_episode[e] += 1 + elif robot_assistance_action == 3: + n_lev_3_no_feed_per_episode[e] += 1 + else: + n_lev_4_no_feed_per_episode[e] += 1 + else: + robot_feedback_action = 0 + robot_assistance_action = robot_action + if robot_assistance_action == 0: + n_lev_0_with_feed_per_episode[e] += 1 + elif robot_assistance_action == 1: + n_lev_1_with_feed_per_episode[e] += 1 + elif robot_assistance_action == 2: + n_lev_2_with_feed_per_episode[e] += 1 + elif robot_assistance_action == 3: + n_lev_3_with_feed_per_episode[e] += 1 + else: + n_lev_4_with_feed_per_episode[e] += 1 + + print("robot_assistance {}, attempt {}, game {}, robot_feedback {}".format(robot_assistance_action, attempt_counter, game_state_counter, robot_feedback_action)) #compare the real user with the estimated Persona and returns a user action (0, 1, 2) - if real_user_model!=None: + if other_user_bn_model!=None: #return the user action in this state based on the user profile - user_actions_prob = get_user_actions_prob_from_state(user_bn_model['cpds'],user_bn_model['memory'], - user_bn_model['attention'], user_bn_model['reactivity'], - game_state_counter, attempt_counter, robot_assistance_action, - robot_feedback_action) + other_user_vars_evidence = {other_user_attention_name:other_user_attention_value, + other_user_reactivity_name:other_user_reactivity_value, + other_user_memory_name:other_user_memory_value, + task_progress_name:game_state_counter, + game_attempt_name:attempt_counter, + robot_assistance_name:robot_assistance_action, + robot_feedback_name:robot_feedback_action + } + user_actions_prob = bn_functions.infer_prob_from_state(other_user_bn_model, + infer_variable=other_user_var_target, + evidence_variables=other_user_vars_evidence) else: #return the user action in this state based on the Persona profile - user_actions_prob = get_user_actions_prob_from_state(persona_bn_model['cpds'],persona_bn_model['memory'], - persona_bn_model['attention'], persona_bn_model['reactivity'], - game_state_counter, attempt_counter, robot_assistance_action, - robot_feedback_action) - user_action = get_user_action(user_actions_prob) + user_vars_evidence = {other_user_attention_name: user_attention_value, + user_reactivity_name: user_reactivity_value, + user_memory_name: user_memory_value, + task_progress_name: game_state_counter, + game_attempt_name: attempt_counter, + robot_assistance_name: robot_assistance_action, + robot_feedback_name: robot_feedback_action + } + user_actions_prob = bn_functions.infer_prob_from_state(user_bn_model, + infer_variable=user_var_target, + evidence_variables=user_vars_evidence) + + user_action = bn_functions.get_stochastic_action(user_actions_prob.values) #updates counters for plots robot_assistance_per_feedback[robot_feedback_action][robot_assistance_action] += 1 attempt_counter_per_action[user_action][attempt_counter] += 1 @@ -319,11 +213,11 @@ def simulation(robot_assistance_vect, robot_feedback_vect, user_bn_model, person #updates counters for simulation iter_counter += 1 - task_evolution, attempt_counter, \ - correct_move_counter, wrong_move_counter, timeout_counter = compute_next_state(user_action, + task_evolution, attempt_counter, correct_move_counter, \ + wrong_move_counter, timeout_counter, max_attempt_counter = compute_next_state(user_action, task_evolution, attempt_counter, correct_move_counter, wrong_move_counter, - timeout_counter) + timeout_counter, max_attempt_counter) print("task_evolution {}, attempt_counter {}, timeout_counter {}".format(task_evolution, iter_counter, timeout_counter)) print("robot_assistance_per_feedback {}".format(robot_assistance_per_feedback)) @@ -333,45 +227,84 @@ def simulation(robot_assistance_vect, robot_feedback_vect, user_bn_model, person print("iter {}, correct {}, wrong {}, timeout {}".format(iter_counter, correct_move_counter, wrong_move_counter, timeout_counter)) print("correct_move {}, wrong_move {}, timeout {}".format(correct_move_counter, wrong_move_counter, timeout_counter)) - persona_bn_model = update_cpds_tables(game_state_counter_per_action, attempt_counter_per_action, - robot_assistance_per_feedback, robot_feedback_per_action, persona_bn_model) + user_bn_model = bn_functions.update_cpds_tables(user_bn_model, dynamic_variables) + #reset counter?? + robot_assistance_per_feedback = [[0 for i in range(Robot_Assistance.counter.value)] for j in + range(Robot_Feedback.counter.value)] + attempt_counter_per_action = [[0 for i in range(Attempt.counter.value)] for j in + range(User_Action.counter.value)] + game_state_counter_per_action = [[0 for i in range(Game_State.counter.value)] for j in + range(User_Action.counter.value)] + robot_feedback_per_action = [[0 for i in range(Robot_Feedback.counter.value)] for j in + range(User_Action.counter.value)] + + #for plots n_correct_per_episode[e] = correct_move_counter n_wrong_per_episode[e] = wrong_move_counter n_timeout_per_episode[e] = timeout_counter + n_max_attempt_per_episode[e] = max_attempt_counter + game_performance_episode[e] = [n_correct_per_episode[e], + n_wrong_per_episode[e], + n_timeout_per_episode[e], + n_max_attempt_per_episode[e]] + robot_assistance_per_episode[e] = [n_lev_0_no_feed_per_episode[e], + n_lev_1_no_feed_per_episode[e], n_lev_2_no_feed_per_episode[e], + n_lev_3_no_feed_per_episode[e], n_lev_4_no_feed_per_episode[e], + n_lev_0_with_feed_per_episode[e], n_lev_1_with_feed_per_episode[e], + n_lev_2_with_feed_per_episode[e], n_lev_3_with_feed_per_episode[e], + n_lev_4_with_feed_per_episode[e] + ] + + return game_performance_episode, robot_assistance_per_episode - return n_correct_per_episode, n_wrong_per_episode, n_timeout_per_episode + + +############################################################################# +############################################################################# +####################### RUN THE SIMULATION ################################## +############################################################################# +############################################################################# #SIMULATION PARAMS robot_assistance = [i for i in range(Robot_Assistance.counter.value)] robot_feedback = [i for i in range(Robot_Feedback.counter.value)] epochs = 40 +#initialise the robot +robot_cpds = bnlearn.import_DAG('bn_robot_model/robot_model.bif') #initialise memory, attention and reactivity varibles persona_memory = 0; persona_attention = 0; persona_reactivity = 1; -persona_cpds = bnlearn.import_DAG('persona_model.bif') -persona_user_model = {'cpds':persona_cpds, 'memory':persona_memory, 'attention':persona_attention, 'reactivity':persona_reactivity} +persona_cpds = bnlearn.import_DAG('bn_persona_model/persona_model.bif') #initialise memory, attention and reactivity varibles real_user_memory = 2; real_user_attention = 2; real_user_reactivity = 2; -real_user_cpds = bnlearn.import_DAG('user_model.bif') -real_user_model = {'cpds':real_user_cpds, 'memory':real_user_memory, 'attention':real_user_attention, 'reactivity':real_user_reactivity} - -print("user_action -> attempt ", persona_user_model['cpds']['model'].cpds[0].values) -print("user_action -> game_state ",persona_user_model['cpds']['model'].cpds[2].values) -print("robot_feedback -> robot_assistance ", persona_user_model['cpds']['model'].cpds[5].values) -print("user_action -> reactivity, memory ", persona_user_model['cpds']['model'].cpds[6].values) - -results = simulation(robot_assistance, robot_feedback, real_user_model, persona_user_model, epochs=epochs, task_complexity=5) -if real_user_model != None: - plot_path = "epoch_"+str(epochs)+"_real_user_memory_"+str(real_user_memory)+"_real_user_attention_"+str(real_user_attention)+"_real_user_reactivity_"+str(real_user_reactivity)+".jpg" +real_user_cpds = None#bnlearn.import_DAG('bn_other_user_model/user_model.bif') + +game_performance_per_episode, robot_assistance_per_episode = simulation(user_bn_model=persona_cpds, user_var_target=['user_action'], user_memory_name="memory", user_memory_value=persona_memory, + user_attention_name="attention", user_attention_value=persona_attention, + user_reactivity_name="reactivity", user_reactivity_value=persona_reactivity, + task_progress_name="game_state", game_attempt_name="attempt", + robot_assistance_name="robot_assistance", robot_feedback_name="robot_feedback", + robot_bn_model=robot_cpds, robot_var_target=["robot_assistance_feedback"], + other_user_bn_model=real_user_cpds, other_user_var_target=['user_action'], + other_user_memory_name="memory", other_user_memory_value=real_user_memory, + other_user_attention_name="attention", other_user_attention_value=real_user_attention, + other_user_reactivity_name="reactivity", other_user_reactivity_value=real_user_reactivity, + epochs=epochs, task_complexity=5) +if real_user_cpds != None: + plot_game_performance_path = "game_performance_"+"_epoch_"+str(epochs)+"_real_user_memory_"+str(real_user_memory)+"_real_user_attention_"+str(real_user_attention)+"_real_user_reactivity_"+str(real_user_reactivity)+".jpg" + plot_robot_assistance_path = "robot_assistance_"+"epoch_"+str(epochs)+"_real_user_memory_"+str(real_user_memory)+"_real_user_attention_"+str(real_user_attention)+"_real_user_reactivity_"+str(real_user_reactivity)+".jpg" else: - plot_path = "epoch_" + str(epochs) + "_persona_memory_" + str(persona_memory) + "_persona_attention_" + str(persona_attention) + "_persona_reactivity_" + str(persona_reactivity) + ".jpg" + plot_game_performance_path = "game_performance_"+"epoch_" + str(epochs) + "_persona_memory_" + str(persona_memory) + "_persona_attention_" + str(persona_attention) + "_persona_reactivity_" + str(persona_reactivity) + ".jpg" + plot_robot_assistance_path = "robot_assistance_"+"epoch_"+str(epochs)+"_persona_memory_"+str(persona_memory)+"_persona_attention_"+str(persona_attention)+"_persona_reactivity_"+str(persona_reactivity)+".jpg" -plot2D(plot_path, epochs, results) +utils.plot2D_game_performance(plot_game_performance_path, epochs, game_performance_per_episode) +utils.plot2D_assistance("test.jpg", epochs, robot_assistance_per_episode) #TODO ''' - define a function that takes the state as input and return the user action and its reaction time +- plot robot's levels of assistance during the session - evalute if the persona is wrong how long does it take for the simulator to detect that - check percentages ''' \ No newline at end of file