diff --git a/simulation.py b/simulation.py new file mode 100644 index 0000000000000000000000000000000000000000..37036d3cf2537b8cb526ff7d6d7a4181c5b7dd0f --- /dev/null +++ b/simulation.py @@ -0,0 +1,468 @@ +import itertools +import os +import bnlearn +import numpy as np +#import classes and modules +from bn_variables import Agent_Assistance, Agent_Feedback, User_Action, User_React_time, Game_State, Attempt +import bn_functions +import utils +from episode import Episode + + +def compute_next_state(user_action, task_progress_counter, attempt_counter, correct_move_counter, + wrong_move_counter, timeout_counter, max_attempt_counter, max_attempt_per_object + ): + ''' + The function computes given the current state and action of the user, the next state + Args: + user_action: -1a wrong, 0 timeout, 1a correct + game_state_counter: beg, mid, end + correct_move_counter: + attempt_counter: + wrong_move_counter: + timeout_counter: + max_attempt_counter: + max_attempt_per_object: + Return: + game_state_counter + attempt_counter + correct_move_counter + wrong_move_counter + timeout_counter + max_attempt_counter + ''' + + if task_progress_counter >= 0 and task_progress_counter < 2: + game_state_counter = 0 + elif task_progress_counter >= 2 and task_progress_counter < 4: + game_state_counter = 1 + elif task_progress_counter >= 4 and task_progress_counter < 5: + game_state_counter = 2 + else: + game_state_counter = 3 + + # if then else are necessary to classify the task game state into beg, mid, end + + if user_action == 1 and game_state_counter<3: + attempt_counter = 1 + correct_move_counter += 1 + task_progress_counter += 1 + # if the user made a wrong move and still did not reach the maximum number of attempts + elif user_action == -1 and attempt_counter < max_attempt_per_object and game_state_counter<3: + attempt_counter += 1 + wrong_move_counter += 1 + # if the user did not move any token and still did not reach the maximum number of attempts + elif user_action == 0 and attempt_counter < max_attempt_per_object and game_state_counter<3: + attempt_counter += 1 + timeout_counter += 1 + # the agent or therapist makes the correct move on the patient's behalf + elif attempt_counter>=max_attempt_per_object and game_state_counter<3: + attempt_counter = 1 + max_attempt_counter += 1 + task_progress_counter +=1 + + if game_state_counter==3: + attempt_counter = 1 + task_progress_counter +=1 + print("Reach the end of the episode") + + # TODO call the function to compute the state of the game (beg, mid, end) + + + + + next_state = (game_state_counter, attempt_counter, user_action) + + return next_state, task_progress_counter, game_state_counter, attempt_counter, correct_move_counter, wrong_move_counter, timeout_counter, max_attempt_counter + + + +def simulation(bn_model_user_action, var_user_action_target_action, bn_model_user_react_time, var_user_react_time_target_action, + user_memory_name, user_memory_value, user_attention_name, user_attention_value, + user_reactivity_name, user_reactivity_value, + task_progress_name, game_attempt_name, agent_assistance_name, agent_feedback_name, + bn_model_agent_assistance, var_agent_assistance_target_action, bn_model_agent_feedback, + var_agent_feedback_target_action, agent_policy, + state_space, action_space, + epochs=50, task_complexity=5, max_attempt_per_object=4): + ''' + Args: + + Return: + n_correct_per_episode: + n_wrong_per_episode: + n_timeout_per_episode: + + ''' + #TODO: remove agent_assistance_vect and agent_feedback_vect + + #metrics we need, in order to compute afterwords the belief + + attempt_counter_per_action = [[0 for i in range(Attempt.counter.value)] for j in range(User_Action.counter.value)] + game_state_counter_per_action = [[0 for i in range(Game_State.counter.value)] for j in range(User_Action.counter.value)] + agent_feedback_per_action = [[0 for i in range(Agent_Feedback.counter.value)] for j in range(User_Action.counter.value)] + agent_assistance_per_action = [[0 for i in range(Agent_Assistance.counter.value)] for j in range(User_Action.counter.value)] + + attempt_counter_per_react_time = [[0 for i in range(Attempt.counter.value)] for j in range(User_React_time.counter.value)] + game_state_counter_per_react_time = [[0 for i in range(Game_State.counter.value)] for j in range(User_React_time.counter.value)] + agent_feedback_per_react_time = [[0 for i in range(Agent_Feedback.counter.value)] for j in range(User_React_time.counter.value)] + agent_assistance_per_react_time = [[0 for i in range(Agent_Assistance.counter.value)] for j in range(User_React_time.counter.value)] + + game_state_counter_per_agent_assistance = [[0 for i in range(Game_State.counter.value)] for j in range(Agent_Assistance.counter.value)] + attempt_counter_per_agent_assistance = [[0 for i in range(Attempt.counter.value)] for j in range(Agent_Assistance.counter.value)] + + game_state_counter_per_agent_feedback = [[0 for i in range(Game_State.counter.value)] for j in range(Agent_Feedback.counter.value)] + attempt_counter_per_agent_feedback = [[0 for i in range(Attempt.counter.value)] for j in range(Agent_Feedback.counter.value)] + + + #output variables: + n_correct_per_episode = [0]*epochs + n_wrong_per_episode = [0]*epochs + n_timeout_per_episode = [0]*epochs + n_max_attempt_per_episode = [0]*epochs + game_performance_episode = [0]*epochs + n_assistance_lev_per_episode = [[0 for i in range(Agent_Assistance.counter.value)] for j in range(epochs)] + n_feedback_per_episode = [[0 for i in range(Agent_Feedback.counter.value)] for j in range(epochs)] + n_react_time_per_episode = [[0 for i in range(User_React_time.counter.value)] for j in range(epochs)] + + + #data structure to memorise a sequence of episode + episodes = [] + ep = Episode() + + for e in range(epochs): + '''Simulation framework''' + #counters + game_state_counter = 0 + attempt_counter = 1 + iter_counter = 0 + correct_move_counter = 0 + wrong_move_counter = 0 + timeout_counter = 0 + max_attempt_counter = 0 + + #The following variables are used to update the BN at the end of the episode + user_action_dynamic_variables = {'attempt': attempt_counter_per_action, + 'game_state': game_state_counter_per_action, + 'agent_assistance': agent_assistance_per_action, + 'agent_feedback': agent_feedback_per_action} + + user_react_time_dynamic_variables = {'attempt': attempt_counter_per_react_time, + 'game_state': game_state_counter_per_react_time, + 'agent_assistance': agent_assistance_per_react_time, + 'agent_feedback': agent_feedback_per_react_time} + + agent_assistance_dynamic_variables = {'attempt': attempt_counter_per_agent_assistance, + 'game_state': game_state_counter_per_agent_assistance} + + agent_feedback_dynamic_variables = {'attempt': attempt_counter_per_agent_feedback, + 'game_state': game_state_counter_per_agent_feedback} + + #data structure to memorise the sequence of states (state, action, next_state) + episode = [] + selected_user_action = 0 + task_progress_counter = 0 + #####################SIMULATE ONE EPISODE######################################### + while(task_progress_counter<=task_complexity): + + current_state = (game_state_counter, attempt_counter, selected_user_action) + + if type(agent_policy) is not np.ndarray: + ##################QUERY FOR THE ROBOT ASSISTANCE AND FEEDBACK################## + vars_agent_evidence = { + user_reactivity_name: user_reactivity_value, + user_memory_name: user_memory_value, + task_progress_name: game_state_counter, + game_attempt_name: attempt_counter-1, + } + + query_agent_assistance_prob = bn_functions.infer_prob_from_state(bn_model_agent_assistance, + infer_variable=var_agent_assistance_target_action, + evidence_variables=vars_agent_evidence) + if bn_model_agent_feedback != None: + query_agent_feedback_prob = bn_functions.infer_prob_from_state(bn_model_agent_feedback, + infer_variable=var_agent_feedback_target_action, + evidence_variables=vars_agent_evidence) + selected_agent_feedback_action = bn_functions.get_stochastic_action(query_agent_feedback_prob.values) + else: + selected_agent_feedback_action = 0 + + + selected_agent_assistance_action = bn_functions.get_stochastic_action(query_agent_assistance_prob.values) + else: + idx_state = ep.state_from_point_to_index(state_space, current_state) + if agent_policy[idx_state]>=Agent_Assistance.counter.value: + selected_agent_assistance_action = agent_policy[idx_state]-Agent_Assistance.counter.value + selected_agent_feedback_action = 1 + else: + selected_agent_assistance_action = agent_policy[idx_state] + selected_agent_feedback_action = 0 + + n_feedback_per_episode[e][selected_agent_feedback_action] += 1 + + #counters for plots + n_assistance_lev_per_episode[e][selected_agent_assistance_action] += 1 + current_agent_action = (selected_agent_feedback_action, selected_agent_assistance_action) + + print("agent_assistance {}, attempt {}, game {}, agent_feedback {}".format(selected_agent_assistance_action, attempt_counter, game_state_counter, selected_agent_feedback_action)) + + + ##########################QUERY FOR THE USER ACTION AND REACT TIME##################################### + #compare the real user with the estimated Persona and returns a user action (0, 1a, 2) + + #return the user action in this state based on the Persona profile + vars_user_evidence = {user_attention_name: user_attention_value, + user_reactivity_name: user_reactivity_value, + user_memory_name: user_memory_value, + task_progress_name: game_state_counter, + game_attempt_name: attempt_counter-1, + agent_assistance_name: selected_agent_assistance_action, + agent_feedback_name: selected_agent_feedback_action + } + query_user_action_prob = bn_functions.infer_prob_from_state(bn_model_user_action, + infer_variable=var_user_action_target_action, + evidence_variables=vars_user_evidence) + query_user_react_time_prob = bn_functions.infer_prob_from_state(bn_model_user_react_time, + infer_variable=var_user_react_time_target_action, + evidence_variables=vars_user_evidence) + + + + selected_user_action = bn_functions.get_stochastic_action(query_user_action_prob.values) + selected_user_react_time = bn_functions.get_stochastic_action(query_user_react_time_prob.values) + # counters for plots + n_react_time_per_episode[e][selected_user_react_time] += 1 + + #updates counters for user action + agent_assistance_per_action[selected_user_action][selected_agent_assistance_action] += 1 + attempt_counter_per_action[selected_user_action][attempt_counter-1] += 1 + game_state_counter_per_action[selected_user_action][game_state_counter] += 1 + agent_feedback_per_action[selected_user_action][selected_agent_feedback_action] += 1 + #update counter for user react time + agent_assistance_per_react_time[selected_user_react_time][selected_agent_assistance_action] += 1 + attempt_counter_per_react_time[selected_user_react_time][attempt_counter-1] += 1 + game_state_counter_per_react_time[selected_user_react_time][game_state_counter] += 1 + agent_feedback_per_react_time[selected_user_react_time][selected_agent_feedback_action] += 1 + #update counter for agent feedback + game_state_counter_per_agent_feedback[selected_agent_feedback_action][game_state_counter] += 1 + attempt_counter_per_agent_feedback[selected_agent_feedback_action][attempt_counter-1] += 1 + #update counter for agent assistance + game_state_counter_per_agent_assistance[selected_agent_assistance_action][game_state_counter] += 1 + attempt_counter_per_agent_assistance[selected_agent_assistance_action][attempt_counter-1] += 1 + + # updates counters for simulation + # remap user_action index + if selected_user_action == 0: + selected_user_action = 1 + elif selected_user_action == 1: + selected_user_action = -1 + else: + selected_user_action = 0 + + #updates counters for simulation + iter_counter += 1 + next_state, task_progress_counter, game_state_counter, attempt_counter, correct_move_counter, \ + wrong_move_counter, timeout_counter, max_attempt_counter = compute_next_state(selected_user_action, + task_progress_counter, + attempt_counter, + correct_move_counter, wrong_move_counter, + timeout_counter, max_attempt_counter, + max_attempt_per_object) + + + + # store the (state, action, next_state) + episode.append((ep.state_from_point_to_index(state_space, current_state), + ep.state_from_point_to_index(action_space, current_agent_action), + ep.state_from_point_to_index(state_space, next_state))) + + print("current_state ", current_state, " next_state ", next_state) + ####################################END of EPISODE####################################### + print("game_state_counter {}, iter_counter {}, correct_counter {}, wrong_counter {}, " + "timeout_counter {}, max_attempt {}".format(game_state_counter, iter_counter, correct_move_counter, + wrong_move_counter, timeout_counter, max_attempt_counter)) + + #save episode + episodes.append(Episode(episode)) + + #update user models + bn_model_user_action = bn_functions.update_cpds_tables(bn_model_user_action, user_action_dynamic_variables) + bn_model_user_react_time = bn_functions.update_cpds_tables(bn_model_user_react_time, user_react_time_dynamic_variables) + #update agent models + bn_model_agent_assistance = bn_functions.update_cpds_tables(bn_model_agent_assistance, agent_assistance_dynamic_variables) + if bn_model_agent_feedback !=None: + bn_model_agent_feedback = bn_functions.update_cpds_tables(bn_model_agent_feedback, agent_feedback_dynamic_variables) + + #reset counter + agent_assistance_per_action = [[0 for i in range(Agent_Assistance.counter.value)] for j in + range(User_Action.counter.value)] + agent_feedback_per_action = [[0 for i in range(Agent_Feedback.counter.value)] for j in + range(User_Action.counter.value)] + game_state_counter_per_action = [[0 for i in range(Game_State.counter.value)] for j in + range(User_Action.counter.value)] + attempt_counter_per_action = [[0 for i in range(Attempt.counter.value)] for j in + range(User_Action.counter.value)] + + attempt_counter_per_react_time = [[0 for i in range(Attempt.counter.value)] for j in + range(User_React_time.counter.value)] + game_state_counter_per_react_time = [[0 for i in range(Game_State.counter.value)] for j in + range(User_React_time.counter.value)] + agent_feedback_per_react_time = [[0 for i in range(Agent_Feedback.counter.value)] for j in + range(User_React_time.counter.value)] + agent_assistance_per_react_time = [[0 for i in range(Agent_Assistance.counter.value)] for j in + range(User_React_time.counter.value)] + + game_state_counter_per_agent_assistance = [[0 for i in range(Game_State.counter.value)] for j in + range(Agent_Assistance.counter.value)] + attempt_counter_per_agent_assistance = [[0 for i in range(Attempt.counter.value)] for j in + range(Agent_Assistance.counter.value)] + + game_state_counter_per_agent_feedback = [[0 for i in range(Game_State.counter.value)] for j in + range(Agent_Feedback.counter.value)] + attempt_counter_per_agent_feedback = [[0 for i in range(Attempt.counter.value)] for j in + range(Agent_Feedback.counter.value)] + + #for plots + n_correct_per_episode[e] = correct_move_counter + n_wrong_per_episode[e] = wrong_move_counter + n_timeout_per_episode[e] = timeout_counter + n_max_attempt_per_episode[e] = max_attempt_counter + game_performance_episode[e] = [n_correct_per_episode[e], + n_wrong_per_episode[e], + n_timeout_per_episode[e], + n_max_attempt_per_episode[e]] + + + return game_performance_episode, n_react_time_per_episode, n_assistance_lev_per_episode, n_feedback_per_episode, episodes + + + +############################################################################# +############################################################################# +####################### RUN THE SIMULATION ################################## +############################################################################# +############################################################################# + +# #SIMULATION PARAMS +# epochs = 100 +# +# #initialise the agent +# bn_model_caregiver_assistance = bnlearn.import_DAG('bn_agent_model/agent_assistive_model.bif') +# bn_model_caregiver_feedback = bnlearn.import_DAG('bn_agent_model/agent_feedback_model.bif') +# bn_model_user_action = bnlearn.import_DAG('bn_persona_model/user_action_model.bif') +# bn_model_user_react_time = bnlearn.import_DAG('bn_persona_model/user_react_time_model.bif') +# bn_model_other_user_action = None#bnlearn.import_DAG('bn_persona_model/other_user_action_model.bif') +# bn_model_other_user_react_time = None#bnlearn.import_DAG('bn_persona_model/other_user_react_time_model.bif') +# +# #initialise memory, attention and reactivity varibles +# persona_memory = 0; persona_attention = 0; persona_reactivity = 1; +# #initialise memory, attention and reactivity varibles +# other_user_memory = 2; other_user_attention = 2; other_user_reactivity = 2; +# +# #define state space struct for the irl algorithm +# attempt = [i for i in range(1, Attempt.counter.value+1)] +# #+1a (3,_,_) absorbing state +# game_state = [i for i in range(0, Game_State.counter.value+1)] +# user_action = [i for i in range(-1, User_Action.counter.value-1)] +# state_space = (game_state, attempt, user_action) +# states_space_list = list(itertools.product(*state_space)) +# agent_assistance_action = [i for i in range(Agent_Assistance.counter.value)] +# agent_feedback_action = [i for i in range(Agent_Feedback.counter.value)] +# action_space = (agent_assistance_action, agent_feedback_action) +# action_space_list = list(itertools.product(*action_space)) +# +# ##############BEFORE RUNNING THE SIMULATION UPDATE THE BELIEF IF YOU HAVE DATA#################### +# log_directory = "/home/pal/carf_ws/src/carf/caregiver_in_the_loop/log/1/0" +# if os.path.exists(log_directory): +# bn_belief_user_action_file = log_directory+"/bn_belief_user_action.pkl" +# bn_belief_user_react_time_file = log_directory+"/bn_belief_user_react_time.pkl" +# bn_belief_caregiver_assistance_file = log_directory+"/bn_belief_caregiver_assistive_action.pkl" +# bn_belief_caregiver_feedback_file = log_directory+"/bn_belief_caregiver_feedback_action.pkl" +# +# bn_belief_user_action = utils.read_user_statistics_from_pickle(bn_belief_user_action_file) +# bn_belief_user_react_time = utils.read_user_statistics_from_pickle(bn_belief_user_react_time_file) +# bn_belief_caregiver_assistance = utils.read_user_statistics_from_pickle(bn_belief_caregiver_assistance_file) +# bn_belief_caregiver_feedback = utils.read_user_statistics_from_pickle(bn_belief_caregiver_feedback_file) +# bn_model_user_action = bn_functions.update_cpds_tables(bn_model=bn_model_user_action, variables_tables=bn_belief_user_action) +# bn_model_user_react_time = bn_functions.update_cpds_tables(bn_model=bn_model_user_react_time, variables_tables=bn_belief_user_react_time) +# bn_model_caregiver_assistance = bn_functions.update_cpds_tables(bn_model=bn_model_caregiver_assistance, variables_tables=bn_belief_caregiver_assistance) +# bn_model_caregiver_feedback = bn_functions.update_cpds_tables(bn_model=bn_model_caregiver_feedback, variables_tables=bn_belief_caregiver_feedback) +# +# else: +# assert("You're not using the user information") +# question = input("Are you sure you don't want to load user's belief information?") +# +# game_performance_per_episode, react_time_per_episode, agent_assistance_per_episode, agent_feedback_per_episode, generated_episodes = \ +# simulation(bn_model_user_action=bn_model_user_action, var_user_action_target_action=['user_action'], +# bn_model_user_react_time=bn_model_user_react_time, +# var_user_react_time_target_action=['user_react_time'], +# user_memory_name="memory", user_memory_value=persona_memory, +# user_attention_name="attention", user_attention_value=persona_attention, +# user_reactivity_name="reactivity", user_reactivity_value=persona_reactivity, +# task_progress_name="game_state", game_attempt_name="attempt", +# agent_assistance_name="agent_assistance", agent_feedback_name="agent_feedback", +# bn_model_agent_assistance=bn_model_caregiver_assistance, +# var_agent_assistance_target_action=["agent_assistance"], +# bn_model_agent_feedback=bn_model_caregiver_feedback, var_agent_feedback_target_action=["agent_feedback"], +# bn_model_other_user_action=bn_model_other_user_action, +# var_other_user_action_target_action=['user_action'], +# bn_model_other_user_react_time=bn_model_other_user_react_time, +# var_other_user_target_react_time_action=["user_react_time"], other_user_memory_name="memory", +# other_user_memory_value=other_user_memory, other_user_attention_name="attention", +# other_user_attention_value=other_user_attention, other_user_reactivity_name="reactivity", +# other_user_reactivity_value=other_user_reactivity, +# state_space=states_space_list, action_space=action_space_list, +# epochs=epochs, task_complexity=5, max_attempt_per_object=4) +# +# +# +# plot_game_performance_path = "" +# plot_agent_assistance_path = "" +# episodes_path = "episodes.npy" +# +# if bn_model_other_user_action != None: +# plot_game_performance_path = "game_performance_"+"_epoch_"+str(epochs)+"_real_user_memory_"+str(real_user_memory)+"_real_user_attention_"+str(real_user_attention)+"_real_user_reactivity_"+str(real_user_reactivity)+".jpg" +# plot_agent_assistance_path = "agent_assistance_"+"epoch_"+str(epochs)+"_real_user_memory_"+str(real_user_memory)+"_real_user_attention_"+str(real_user_attention)+"_real_user_reactivity_"+str(real_user_reactivity)+".jpg" +# plot_agent_feedback_path = "agent_feedback_"+"epoch_"+str(epochs)+"_real_user_memory_"+str(real_user_memory)+"_real_user_attention_"+str(real_user_attention)+"_real_user_reactivity_"+str(real_user_reactivity)+".jpg" +# +# else: +# plot_game_performance_path = "game_performance_"+"epoch_" + str(epochs) + "_persona_memory_" + str(persona_memory) + "_persona_attention_" + str(persona_attention) + "_persona_reactivity_" + str(persona_reactivity) + ".jpg" +# plot_agent_assistance_path = "agent_assistance_"+"epoch_"+str(epochs)+"_persona_memory_"+str(persona_memory)+"_persona_attention_"+str(persona_attention)+"_persona_reactivity_"+str(persona_reactivity)+".jpg" +# plot_agent_feedback_path = "agent_feedback_"+"epoch_"+str(epochs)+"_persona_memory_"+str(persona_memory)+"_persona_attention_"+str(persona_attention)+"_persona_reactivity_"+str(persona_reactivity)+".jpg" +# +# dir_name = input("Please insert the name of the directory:") +# full_path = os.getcwd()+"/results/"+dir_name+"/" +# if not os.path.exists(full_path): +# os.mkdir(full_path) +# print("Directory ", full_path, " created.") +# else: +# dir_name = input("The directory already exist please insert a new name:") +# print("Directory ", full_path, " created.") +# if os.path.exists(full_path): +# assert("Directory already exists ... start again") +# exit(0) +# +# with open(full_path+episodes_path, "ab") as f: +# np.save(full_path+episodes_path, generated_episodes) +# f.close() +# +# +# utils.plot2D_game_performance(full_path+plot_game_performance_path, epochs, game_performance_per_episode) +# utils.plot2D_assistance(full_path+plot_agent_assistance_path, epochs, agent_assistance_per_episode) +# utils.plot2D_feedback(full_path+plot_agent_feedback_path, epochs, agent_feedback_per_episode) + + + +''' +With the current simulator we can generate a list of episodes +the episodes will be used to generate the trans probabilities and as input to the IRL algo +''' +#TODO +# - include reaction time as output +# - average mistakes, average timeout, average assistance, average_react_time +# - include real time episodes into the simulation: +# - counters for agent_assistance, agent_feedback, attempt, game_state, attention and reactivity +# - using the function update probability to generate the new user model and use it as input to the simulator + + +