diff --git a/main.py b/main.py index dd9a4e5fbf12a7a067034aef95ff50cb75a9038d..aaa32f5ae20011d0a10392e5c026d09792fc9045 100644 --- a/main.py +++ b/main.py @@ -82,11 +82,11 @@ def interpret_user_output(action_id): return user_action, user_react_time -def simulation(user_bn_model, user_var_target, user_memory_name, user_memory_value, user_attention_name, user_attention_value, +def simulation(user_bn_model, user_var_target_action, user_var_target_react_time, user_memory_name, user_memory_value, user_attention_name, user_attention_value, user_reactivity_name, user_reactivity_value, task_progress_name, game_attempt_name, robot_assistance_name, robot_feedback_name, - robot_bn_model, robot_var_target, - other_user_bn_model, other_user_var_target, other_user_memory_name, other_user_memory_value, + robot_bn_model, robot_var_ass_action, robot_var_feedback_action, + other_user_bn_model, other_user_var_target_action, other_user_var_target_react_time, other_user_memory_name, other_user_memory_value, other_user_attention_name, other_user_attention_value, other_user_reactivity_name, other_user_reactivity_value, epochs=50, task_complexity=5): @@ -129,17 +129,8 @@ def simulation(user_bn_model, user_var_target, user_memory_name, user_memory_val n_timeout_per_episode = [0]*epochs n_max_attempt_per_episode = [0]*epochs game_performance_episode = [0]*epochs - n_lev_0_no_feed_per_episode = [0]*epochs - n_lev_1_no_feed_per_episode = [0]*epochs - n_lev_2_no_feed_per_episode = [0]*epochs - n_lev_3_no_feed_per_episode = [0]*epochs - n_lev_4_no_feed_per_episode = [0]*epochs - n_lev_0_with_feed_per_episode = [0]*epochs - n_lev_1_with_feed_per_episode = [0]*epochs - n_lev_2_with_feed_per_episode = [0]*epochs - n_lev_3_with_feed_per_episode = [0]*epochs - n_lev_4_with_feed_per_episode = [0]*epochs - robot_assistance_per_episode = [0]*epochs + n_assistance_lev_per_episode = [[0 for i in range(Robot_Assistance.counter.value)] for j in range(epochs)] + n_feedback_per_episode = [[0 for i in range(Robot_Feedback.counter.value)] for j in range(epochs)] for e in range(epochs): '''Simulation framework''' @@ -151,14 +142,17 @@ def simulation(user_bn_model, user_var_target, user_memory_name, user_memory_val wrong_move_counter = 0 timeout_counter = 0 max_attempt_counter = 0 - robot_assistance_action = 0 - robot_feedback_action = 0 + selected_robot_assistance_action = 0 + selected_robot_feedback_action = 0 - dynamic_variables = {'attempt': attempt_counter_per_action, + user_dynamic_variables = {'attempt': attempt_counter_per_action, 'game_state': game_state_counter_per_action, 'robot_assistance': robot_assistance_per_action, 'robot_feedback': robot_feedback_per_action} + robot_dynamic_variables = {'attempt': attempt_counter_per_action, + 'game_state': game_state_counter_per_action} + while(task_evolution<task_complexity): #if then else are necessary to classify the task game state into beg, mid, end if task_evolution>=0 and task_evolution<=1: @@ -168,46 +162,24 @@ def simulation(user_bn_model, user_var_target, user_memory_name, user_memory_val else: game_state_counter = 2 - robot_vars_evidence = { user_reactivity_name: user_reactivity_value, - user_memory_name: user_memory_value, - task_progress_name: game_state_counter, - game_attempt_name: attempt_counter, - } - robot_actions_prob = bn_functions.infer_prob_from_state(robot_bn_model, - infer_variable=robot_var_target, + robot_vars_evidence = {user_reactivity_name: user_reactivity_value, + user_memory_name: user_memory_value, + task_progress_name: game_state_counter, + game_attempt_name: attempt_counter, + } + query_robot_ass_prob = bn_functions.infer_prob_from_state(robot_bn_model, + infer_variable=robot_var_ass_action, evidence_variables=robot_vars_evidence) + query_robot_feedback_prob = bn_functions.infer_prob_from_state(robot_bn_model, + infer_variable=robot_var_feedback_action, + evidence_variables=robot_vars_evidence) - robot_action = bn_functions.get_stochastic_action(robot_actions_prob.values) - n_robot_assistance_feedback = Robot_Assistance_Feedback.counter.value - if robot_action>=n_robot_assistance_feedback/2: - robot_feedback_action = 1 - robot_assistance_action = n_robot_assistance_feedback-robot_action-1 - if robot_assistance_action == 0: - n_lev_0_no_feed_per_episode[e] += 1 - elif robot_assistance_action == 1: - n_lev_1_no_feed_per_episode[e] += 1 - elif robot_assistance_action == 2: - n_lev_2_no_feed_per_episode[e] += 1 - elif robot_assistance_action == 3: - n_lev_3_no_feed_per_episode[e] += 1 - else: - n_lev_4_no_feed_per_episode[e] += 1 - else: - robot_feedback_action = 0 - robot_assistance_action = robot_action - if robot_assistance_action == 0: - n_lev_0_with_feed_per_episode[e] += 1 - elif robot_assistance_action == 1: - n_lev_1_with_feed_per_episode[e] += 1 - elif robot_assistance_action == 2: - n_lev_2_with_feed_per_episode[e] += 1 - elif robot_assistance_action == 3: - n_lev_3_with_feed_per_episode[e] += 1 - else: - n_lev_4_with_feed_per_episode[e] += 1 - - - print("robot_assistance {}, attempt {}, game {}, robot_feedback {}".format(robot_assistance_action, attempt_counter, game_state_counter, robot_feedback_action)) + selected_robot_assistance_action = bn_functions.get_stochastic_action(query_robot_ass_prob) + selected_robot_feedback_action = bn_functions.get_stochastic_action(query_robot_feedback_prob) + + n_assistance_lev_per_episode[selected_robot_assistance_action][e] += 1 + n_feedback_per_episode[selected_robot_feedback_action][e] += 1 + print("robot_assistance {}, attempt {}, game {}, robot_feedback {}".format(selected_robot_assistance_action, attempt_counter, game_state_counter, selected_robot_feedback_action)) #compare the real user with the estimated Persona and returns a user action (0, 1, 2) if other_user_bn_model!=None: @@ -217,12 +189,16 @@ def simulation(user_bn_model, user_var_target, user_memory_name, user_memory_val other_user_memory_name:other_user_memory_value, task_progress_name:game_state_counter, game_attempt_name:attempt_counter, - robot_assistance_name:robot_assistance_action, - robot_feedback_name:robot_feedback_action + robot_assistance_name:selected_robot_assistance_action, + robot_feedback_name:selected_robot_feedback_action } - user_actions_prob = bn_functions.infer_prob_from_state(other_user_bn_model, - infer_variable=other_user_var_target, - evidence_variables=other_user_vars_evidence) + query_user_action_prob = bn_functions.infer_prob_from_state(user_bn_model, + infer_variable=user_var_target_action, + evidence_variables=user_vars_evidence) + query_user_react_time_prob = bn_functions.infer_prob_from_state(user_bn_model, + infer_variable=user_var_target_react_time, + evidence_variables=user_vars_evidence) + else: #return the user action in this state based on the Persona profile @@ -232,31 +208,39 @@ def simulation(user_bn_model, user_var_target, user_memory_name, user_memory_val user_memory_name: user_memory_value, task_progress_name: game_state_counter, game_attempt_name: attempt_counter, - robot_assistance_name: robot_assistance_action, - robot_feedback_name: robot_feedback_action + robot_assistance_name: selected_robot_assistance_action, + robot_feedback_name: selected_robot_feedback_action } - query_res_prob = bn_functions.infer_prob_from_state(user_bn_model, - infer_variable=user_var_target, + query_user_action_prob = bn_functions.infer_prob_from_state(user_bn_model, + infer_variable=user_var_target_action, + evidence_variables=user_vars_evidence) + query_user_react_time_prob = bn_functions.infer_prob_from_state(user_bn_model, + infer_variable=user_var_target_react_time, evidence_variables=user_vars_evidence) + + #this is needed because we are querying the system with user_react_time and user_action output is 3x3 - query_res_prob_to_list = [query_res_prob.values[i][j] for i in range(query_res_prob.values.shape[0]) for j in range (query_res_prob.values.shape[1])] - query_action = bn_functions.get_stochastic_action(query_res_prob_to_list) - user_action, user_react_time = bn_functions.interpret_user_output(query_action) - #updates counters for plots - robot_assistance_per_action[user_action][robot_assistance_action] += 1 - attempt_counter_per_action[user_action][attempt_counter] += 1 - game_state_counter_per_action[user_action][game_state_counter] += 1 - robot_feedback_per_action[user_action][robot_feedback_action] += 1 + selected_user_action = bn_functions.get_stochastic_action(query_user_action_prob) + selected_user_react_time = bn_functions.get_stochastic_action(query_user_react_time_prob) + + #updates counters for user and robot model + robot_assistance_per_action[selected_user_action][selected_robot_assistance_action] += 1 + attempt_counter_per_action[selected_user_action][attempt_counter] += 1 + game_state_counter_per_action[selected_user_action][game_state_counter] += 1 + robot_feedback_per_action[selected_user_action][selected_robot_feedback_action] += 1 + + #TODO create the counters for the user_react_time variables #updates counters for simulation iter_counter += 1 task_evolution, attempt_counter, correct_move_counter, \ - wrong_move_counter, timeout_counter, max_attempt_counter = compute_next_state(user_action, + wrong_move_counter, timeout_counter, max_attempt_counter = compute_next_state(selected_user_action, task_evolution, attempt_counter, correct_move_counter, wrong_move_counter, timeout_counter, max_attempt_counter) + ####################################END of EPISODE####################################### print("task_evolution {}, attempt_counter {}, timeout_counter {}".format(task_evolution, iter_counter, timeout_counter)) print("robot_assistance_per_action {}".format(robot_assistance_per_action)) print("attempt_counter_per_action {}".format(attempt_counter_per_action)) @@ -265,17 +249,20 @@ def simulation(user_bn_model, user_var_target, user_memory_name, user_memory_val print("iter {}, correct {}, wrong {}, timeout {}".format(iter_counter, correct_move_counter, wrong_move_counter, timeout_counter)) print("correct_move {}, wrong_move {}, timeout {}".format(correct_move_counter, wrong_move_counter, timeout_counter)) + #update user model + user_bn_model = bn_functions.update_cpds_tables(user_bn_model, user_dynamic_variables) + #update robot model + robot_bn_model = bn_functions.update_cpds_tables(robot_bn_model, robot_dynamic_variables) - user_bn_model = bn_functions.update_cpds_tables(user_bn_model, dynamic_variables) - #reset counter?? + #reset counter robot_assistance_per_action = [[0 for i in range(Robot_Assistance.counter.value)] for j in range(User_Action.counter.value)] - attempt_counter_per_action = [[0 for i in range(Attempt.counter.value)] for j in - range(User_Action.counter.value)] - game_state_counter_per_action = [[0 for i in range(Game_State.counter.value)] for j in - range(User_Action.counter.value)] robot_feedback_per_action = [[0 for i in range(Robot_Feedback.counter.value)] for j in range(User_Action.counter.value)] + game_state_counter_per_action = [[0 for i in range(Game_State.counter.value)] for j in + range(User_Action.counter.value)] + attempt_counter_per_action = [[0 for i in range(Attempt.counter.value)] for j in + range(User_Action.counter.value)] #for plots n_correct_per_episode[e] = correct_move_counter @@ -286,15 +273,9 @@ def simulation(user_bn_model, user_var_target, user_memory_name, user_memory_val n_wrong_per_episode[e], n_timeout_per_episode[e], n_max_attempt_per_episode[e]] - robot_assistance_per_episode[e] = [n_lev_0_no_feed_per_episode[e], - n_lev_1_no_feed_per_episode[e], n_lev_2_no_feed_per_episode[e], - n_lev_3_no_feed_per_episode[e], n_lev_4_no_feed_per_episode[e], - n_lev_0_with_feed_per_episode[e], n_lev_1_with_feed_per_episode[e], - n_lev_2_with_feed_per_episode[e], n_lev_3_with_feed_per_episode[e], - n_lev_4_with_feed_per_episode[e] - ] - return game_performance_episode, robot_assistance_per_episode + + return game_performance_episode, n_assistance_lev_per_episode, n_feedback_per_episode @@ -320,14 +301,19 @@ persona_cpds = bnlearn.import_DAG('bn_persona_model/new_persona_model.bif') real_user_memory = 2; real_user_attention = 2; real_user_reactivity = 2; real_user_cpds = None#bnlearn.import_DAG('bn_other_user_model/user_model.bif') -game_performance_per_episode, robot_assistance_per_episode = simulation(user_bn_model=persona_cpds, user_var_target=['user_action', 'user_react_time'], user_memory_name="memory", user_memory_value=persona_memory, - user_attention_name="attention", user_attention_value=persona_attention, +game_performance_per_episode, robot_assistance_per_episode = \ + simulation(user_bn_model=persona_cpds, user_var_target_action=['user_action'], + user_var_target_react_time=['user_react_time'], + user_memory_name="memory", user_memory_value=persona_memory, + user_attention_name="attention", user_attention_value=persona_attention, user_reactivity_name="reactivity", user_reactivity_value=persona_reactivity, - task_progress_name="game_state", game_attempt_name="attempt", - robot_assistance_name="robot_assistance", robot_feedback_name="robot_feedback", + task_progress_name="game_state", game_attempt_name="attempt", + robot_assistance_name="robot_assistance", robot_feedback_name="robot_feedback", - robot_bn_model=robot_cpds, robot_var_target=["robot_assistance_feedback"], - other_user_bn_model=real_user_cpds, other_user_var_target=['user_action'], + robot_bn_model=robot_cpds, robot_var_ass_action=["robot_assistance"], + robot_var_feedback_action=["robot_feedback"], + other_user_bn_model=real_user_cpds, other_user_var_target_action=['user_action'], + other_user_var_target_react_time=["user_react_time"], other_user_memory_name="memory", other_user_memory_value=real_user_memory, other_user_attention_name="attention", other_user_attention_value=real_user_attention, other_user_reactivity_name="reactivity", other_user_reactivity_value=real_user_reactivity,