From f9c978459c4b380f6c6373c041319711e40fbd0b Mon Sep 17 00:00:00 2001 From: Antonio Andriella <aandriella@iri.upc.edu> Date: Thu, 24 Sep 2020 23:40:43 +0200 Subject: [PATCH] Remove main --- main.py | 476 -------------------------------------------------------- 1 file changed, 476 deletions(-) delete mode 100644 main.py diff --git a/main.py b/main.py deleted file mode 100644 index ab68aac..0000000 --- a/main.py +++ /dev/null @@ -1,476 +0,0 @@ -import itertools -import os -import bnlearn -import numpy as np -#import classes and modules -from bn_variables import Agent_Assistance, Agent_Feedback, User_Action, User_React_time, Game_State, Attempt -import bn_functions -import utils -from episode import Episode - - -def compute_next_state(user_action, task_progress_counter, attempt_counter, correct_move_counter, - wrong_move_counter, timeout_counter, max_attempt_counter, max_attempt_per_object - ): - ''' - The function computes given the current state and action of the user, the next state - Args: - user_action: -1a wrong, 0 timeout, 1a correct - game_state_counter: beg, mid, end - correct_move_counter: - attempt_counter: - wrong_move_counter: - timeout_counter: - max_attempt_counter: - max_attempt_per_object: - Return: - game_state_counter - attempt_counter - correct_move_counter - wrong_move_counter - timeout_counter - max_attempt_counter - ''' - - if task_progress_counter >= 0 and task_progress_counter < 2: - game_state_counter = 0 - elif task_progress_counter >= 2 and task_progress_counter < 4: - game_state_counter = 1 - elif task_progress_counter >= 4 and task_progress_counter < 5: - game_state_counter = 2 - else: - game_state_counter = 3 - - # if then else are necessary to classify the task game state into beg, mid, end - - if user_action == 1 and game_state_counter<3: - attempt_counter = 1 - correct_move_counter += 1 - task_progress_counter += 1 - # if the user made a wrong move and still did not reach the maximum number of attempts - elif user_action == -1 and attempt_counter < max_attempt_per_object and game_state_counter<3: - attempt_counter += 1 - wrong_move_counter += 1 - # if the user did not move any token and still did not reach the maximum number of attempts - elif user_action == 0 and attempt_counter < max_attempt_per_object and game_state_counter<3: - attempt_counter += 1 - timeout_counter += 1 - # the agent or therapist makes the correct move on the patient's behalf - elif attempt_counter>=max_attempt_per_object and game_state_counter<3: - attempt_counter = 1 - max_attempt_counter += 1 - task_progress_counter +=1 - - if game_state_counter==3: - attempt_counter = 1 - task_progress_counter +=1 - print("Reach the end of the episode") - - # TODO call the function to compute the state of the game (beg, mid, end) - - - - - next_state = (game_state_counter, attempt_counter, user_action) - - return next_state, task_progress_counter, game_state_counter, attempt_counter, correct_move_counter, wrong_move_counter, timeout_counter, max_attempt_counter - - - -def simulation(bn_model_user_action, var_user_action_target_action, bn_model_user_react_time, var_user_react_time_target_action, - user_memory_name, user_memory_value, user_attention_name, user_attention_value, - user_reactivity_name, user_reactivity_value, - task_progress_name, game_attempt_name, agent_assistance_name, agent_feedback_name, - bn_model_agent_assistance, var_agent_assistance_target_action, bn_model_agent_feedback, - var_agent_feedback_target_action, - bn_model_other_user_action, var_other_user_action_target_action, - bn_model_other_user_react_time, var_other_user_target_react_time_action, - other_user_memory_name, other_user_memory_value, - other_user_attention_name, other_user_attention_value, other_user_reactivity_name, - other_user_reactivity_value, - state_space, action_space, - epochs=50, task_complexity=5, max_attempt_per_object=4): - ''' - Args: - - Return: - n_correct_per_episode: - n_wrong_per_episode: - n_timeout_per_episode: - - ''' - #TODO: remove agent_assistance_vect and agent_feedback_vect - - #metrics we need, in order to compute afterwords the belief - - attempt_counter_per_action = [[0 for i in range(Attempt.counter.value)] for j in range(User_Action.counter.value)] - game_state_counter_per_action = [[0 for i in range(Game_State.counter.value)] for j in range(User_Action.counter.value)] - agent_feedback_per_action = [[0 for i in range(Agent_Feedback.counter.value)] for j in range(User_Action.counter.value)] - agent_assistance_per_action = [[0 for i in range(Agent_Assistance.counter.value)] for j in range(User_Action.counter.value)] - - attempt_counter_per_react_time = [[0 for i in range(Attempt.counter.value)] for j in range(User_React_time.counter.value)] - game_state_counter_per_react_time = [[0 for i in range(Game_State.counter.value)] for j in range(User_React_time.counter.value)] - agent_feedback_per_react_time = [[0 for i in range(Agent_Feedback.counter.value)] for j in range(User_React_time.counter.value)] - agent_assistance_per_react_time = [[0 for i in range(Agent_Assistance.counter.value)] for j in range(User_React_time.counter.value)] - - game_state_counter_per_agent_assistance = [[0 for i in range(Game_State.counter.value)] for j in range(Agent_Assistance.counter.value)] - attempt_counter_per_agent_assistance = [[0 for i in range(Attempt.counter.value)] for j in range(Agent_Assistance.counter.value)] - - game_state_counter_per_agent_feedback = [[0 for i in range(Game_State.counter.value)] for j in range(Agent_Feedback.counter.value)] - attempt_counter_per_agent_feedback = [[0 for i in range(Attempt.counter.value)] for j in range(Agent_Feedback.counter.value)] - - - #output variables: - n_correct_per_episode = [0]*epochs - n_wrong_per_episode = [0]*epochs - n_timeout_per_episode = [0]*epochs - n_max_attempt_per_episode = [0]*epochs - game_performance_episode = [0]*epochs - n_assistance_lev_per_episode = [[0 for i in range(Agent_Assistance.counter.value)] for j in range(epochs)] - n_feedback_per_episode = [[0 for i in range(Agent_Feedback.counter.value)] for j in range(epochs)] - n_react_time_per_episode = [[0 for i in range(User_React_time.counter.value)] for j in range(epochs)] - - - #data structure to memorise a sequence of episode - episodes = [] - ep = Episode() - - for e in range(epochs): - '''Simulation framework''' - #counters - game_state_counter = 0 - attempt_counter = 1 - iter_counter = 0 - correct_move_counter = 0 - wrong_move_counter = 0 - timeout_counter = 0 - max_attempt_counter = 0 - - #The following variables are used to update the BN at the end of the episode - user_action_dynamic_variables = {'attempt': attempt_counter_per_action, - 'game_state': game_state_counter_per_action, - 'agent_assistance': agent_assistance_per_action, - 'agent_feedback': agent_feedback_per_action} - - user_react_time_dynamic_variables = {'attempt': attempt_counter_per_react_time, - 'game_state': game_state_counter_per_react_time, - 'agent_assistance': agent_assistance_per_react_time, - 'agent_feedback': agent_feedback_per_react_time} - - agent_assistance_dynamic_variables = {'attempt': attempt_counter_per_agent_assistance, - 'game_state': game_state_counter_per_agent_assistance} - - agent_feedback_dynamic_variables = {'attempt': attempt_counter_per_agent_feedback, - 'game_state': game_state_counter_per_agent_feedback} - - #data structure to memorise the sequence of states (state, action, next_state) - episode = [] - selected_user_action = 0 - task_progress_counter = 0 - #####################SIMULATE ONE EPISODE######################################### - while(task_progress_counter<=task_complexity): - - current_state = (game_state_counter, attempt_counter, selected_user_action) - - ##################QUERY FOR THE ROBOT ASSISTANCE AND FEEDBACK################## - vars_agent_evidence = { - user_reactivity_name: user_reactivity_value, - user_memory_name: user_memory_value, - task_progress_name: game_state_counter, - game_attempt_name: attempt_counter-1, - } - query_agent_assistance_prob = bn_functions.infer_prob_from_state(bn_model_agent_assistance, - infer_variable=var_agent_assistance_target_action, - evidence_variables=vars_agent_evidence) - if bn_model_agent_feedback != None: - query_agent_feedback_prob = bn_functions.infer_prob_from_state(bn_model_agent_feedback, - infer_variable=var_agent_feedback_target_action, - evidence_variables=vars_agent_evidence) - selected_agent_feedback_action = bn_functions.get_stochastic_action(query_agent_feedback_prob.values) - else: - selected_agent_feedback_action = 0 - - selected_agent_assistance_action = bn_functions.get_stochastic_action(query_agent_assistance_prob.values) - n_feedback_per_episode[e][selected_agent_feedback_action] += 1 - - #counters for plots - n_assistance_lev_per_episode[e][selected_agent_assistance_action] += 1 - current_agent_action = (selected_agent_assistance_action, selected_agent_feedback_action) - - print("agent_assistance {}, attempt {}, game {}, agent_feedback {}".format(selected_agent_assistance_action, attempt_counter, game_state_counter, selected_agent_feedback_action)) - - - ##########################QUERY FOR THE USER ACTION AND REACT TIME##################################### - #compare the real user with the estimated Persona and returns a user action (0, 1a, 2) - if bn_model_other_user_action!=None and bn_model_user_react_time!=None: - #return the user action in this state based on the user profile - vars_other_user_evidence = {other_user_attention_name:other_user_attention_value, - other_user_reactivity_name:other_user_reactivity_value, - other_user_memory_name:other_user_memory_value, - task_progress_name:game_state_counter, - game_attempt_name:attempt_counter-1, - agent_assistance_name:selected_agent_assistance_action, - agent_feedback_name:selected_agent_feedback_action - } - query_user_action_prob = bn_functions.infer_prob_from_state(bn_model_other_user_action, - infer_variable=var_other_user_action_target_action, - evidence_variables=vars_other_user_evidence) - query_user_react_time_prob = bn_functions.infer_prob_from_state(bn_model_other_user_react_time, - infer_variable=var_other_user_target_react_time_action, - evidence_variables=vars_other_user_evidence) - - - else: - #return the user action in this state based on the Persona profile - vars_user_evidence = {user_attention_name: user_attention_value, - user_reactivity_name: user_reactivity_value, - user_memory_name: user_memory_value, - task_progress_name: game_state_counter, - game_attempt_name: attempt_counter-1, - agent_assistance_name: selected_agent_assistance_action, - agent_feedback_name: selected_agent_feedback_action - } - query_user_action_prob = bn_functions.infer_prob_from_state(bn_model_user_action, - infer_variable=var_user_action_target_action, - evidence_variables=vars_user_evidence) - query_user_react_time_prob = bn_functions.infer_prob_from_state(bn_model_user_react_time, - infer_variable=var_user_react_time_target_action, - evidence_variables=vars_user_evidence) - - - - selected_user_action = bn_functions.get_stochastic_action(query_user_action_prob.values) - selected_user_react_time = bn_functions.get_stochastic_action(query_user_react_time_prob.values) - # counters for plots - n_react_time_per_episode[e][selected_user_react_time] += 1 - - #updates counters for user action - agent_assistance_per_action[selected_user_action][selected_agent_assistance_action] += 1 - attempt_counter_per_action[selected_user_action][attempt_counter-1] += 1 - game_state_counter_per_action[selected_user_action][game_state_counter] += 1 - agent_feedback_per_action[selected_user_action][selected_agent_feedback_action] += 1 - #update counter for user react time - agent_assistance_per_react_time[selected_user_react_time][selected_agent_assistance_action] += 1 - attempt_counter_per_react_time[selected_user_react_time][attempt_counter-1] += 1 - game_state_counter_per_react_time[selected_user_react_time][game_state_counter] += 1 - agent_feedback_per_react_time[selected_user_react_time][selected_agent_feedback_action] += 1 - #update counter for agent feedback - game_state_counter_per_agent_feedback[selected_agent_feedback_action][game_state_counter] += 1 - attempt_counter_per_agent_feedback[selected_agent_feedback_action][attempt_counter-1] += 1 - #update counter for agent assistance - game_state_counter_per_agent_assistance[selected_agent_assistance_action][game_state_counter] += 1 - attempt_counter_per_agent_assistance[selected_agent_assistance_action][attempt_counter-1] += 1 - - # updates counters for simulation - # remap user_action index - if selected_user_action == 0: - selected_user_action = 1 - elif selected_user_action == 1: - selected_user_action = -1 - else: - selected_user_action = 0 - - #updates counters for simulation - iter_counter += 1 - next_state, task_progress_counter, game_state_counter, attempt_counter, correct_move_counter, \ - wrong_move_counter, timeout_counter, max_attempt_counter = compute_next_state(selected_user_action, - task_progress_counter, - attempt_counter, - correct_move_counter, wrong_move_counter, - timeout_counter, max_attempt_counter, - max_attempt_per_object) - - # store the (state, action, next_state) - episode.append((ep.state_from_point_to_index(state_space, current_state,), - ep.state_from_point_to_index(action_space, current_agent_action), - ep.state_from_point_to_index(state_space, next_state))) - - print("current_state ", current_state, " next_state ", next_state) - ####################################END of EPISODE####################################### - print("game_state_counter {}, iter_counter {}, correct_counter {}, wrong_counter {}, " - "timeout_counter {}, max_attempt {}".format(game_state_counter, iter_counter, correct_move_counter, - wrong_move_counter, timeout_counter, max_attempt_counter)) - - #save episode - episodes.append(episode) - - #update user models - bn_model_user_action = bn_functions.update_cpds_tables(bn_model_user_action, user_action_dynamic_variables) - bn_model_user_react_time = bn_functions.update_cpds_tables(bn_model_user_react_time, user_react_time_dynamic_variables) - #update agent models - bn_model_agent_assistance = bn_functions.update_cpds_tables(bn_model_agent_assistance, agent_assistance_dynamic_variables) - bn_model_agent_feedback = bn_functions.update_cpds_tables(bn_model_agent_feedback, agent_feedback_dynamic_variables) - - #reset counter - agent_assistance_per_action = [[0 for i in range(Agent_Assistance.counter.value)] for j in - range(User_Action.counter.value)] - agent_feedback_per_action = [[0 for i in range(Agent_Feedback.counter.value)] for j in - range(User_Action.counter.value)] - game_state_counter_per_action = [[0 for i in range(Game_State.counter.value)] for j in - range(User_Action.counter.value)] - attempt_counter_per_action = [[0 for i in range(Attempt.counter.value)] for j in - range(User_Action.counter.value)] - - attempt_counter_per_react_time = [[0 for i in range(Attempt.counter.value)] for j in - range(User_React_time.counter.value)] - game_state_counter_per_react_time = [[0 for i in range(Game_State.counter.value)] for j in - range(User_React_time.counter.value)] - agent_feedback_per_react_time = [[0 for i in range(Agent_Feedback.counter.value)] for j in - range(User_React_time.counter.value)] - agent_assistance_per_react_time = [[0 for i in range(Agent_Assistance.counter.value)] for j in - range(User_React_time.counter.value)] - - game_state_counter_per_agent_assistance = [[0 for i in range(Game_State.counter.value)] for j in - range(Agent_Assistance.counter.value)] - attempt_counter_per_agent_assistance = [[0 for i in range(Attempt.counter.value)] for j in - range(Agent_Assistance.counter.value)] - - game_state_counter_per_agent_feedback = [[0 for i in range(Game_State.counter.value)] for j in - range(Agent_Feedback.counter.value)] - attempt_counter_per_agent_feedback = [[0 for i in range(Attempt.counter.value)] for j in - range(Agent_Feedback.counter.value)] - - #for plots - n_correct_per_episode[e] = correct_move_counter - n_wrong_per_episode[e] = wrong_move_counter - n_timeout_per_episode[e] = timeout_counter - n_max_attempt_per_episode[e] = max_attempt_counter - game_performance_episode[e] = [n_correct_per_episode[e], - n_wrong_per_episode[e], - n_timeout_per_episode[e], - n_max_attempt_per_episode[e]] - - - return game_performance_episode, n_react_time_per_episode, n_assistance_lev_per_episode, n_feedback_per_episode, episodes - - - -############################################################################# -############################################################################# -####################### RUN THE SIMULATION ################################## -############################################################################# -############################################################################# - -#SIMULATION PARAMS -epochs = 100 - -#initialise the agent -bn_model_caregiver_assistance = bnlearn.import_DAG('bn_agent_model/agent_assistive_model.bif') -bn_model_caregiver_feedback = bnlearn.import_DAG('bn_agent_model/agent_feedback_model.bif') -bn_model_user_action = bnlearn.import_DAG('bn_persona_model/user_action_model.bif') -bn_model_user_react_time = bnlearn.import_DAG('bn_persona_model/user_react_time_model.bif') -bn_model_other_user_action = None#bnlearn.import_DAG('bn_persona_model/other_user_action_model.bif') -bn_model_other_user_react_time = None#bnlearn.import_DAG('bn_persona_model/other_user_react_time_model.bif') - -#initialise memory, attention and reactivity varibles -persona_memory = 0; persona_attention = 0; persona_reactivity = 1; -#initialise memory, attention and reactivity varibles -other_user_memory = 2; other_user_attention = 2; other_user_reactivity = 2; - -#define state space struct for the irl algorithm -attempt = [i for i in range(1, Attempt.counter.value+1)] -#+1a (3,_,_) absorbing state -game_state = [i for i in range(0, Game_State.counter.value+1)] -user_action = [i for i in range(-1, User_Action.counter.value-1)] -state_space = (game_state, attempt, user_action) -states_space_list = list(itertools.product(*state_space)) -agent_assistance_action = [i for i in range(Agent_Assistance.counter.value)] -agent_feedback_action = [i for i in range(Agent_Feedback.counter.value)] -action_space = (agent_assistance_action, agent_feedback_action) -action_space_list = list(itertools.product(*action_space)) - -##############BEFORE RUNNING THE SIMULATION UPDATE THE BELIEF IF YOU HAVE DATA#################### -log_directory = "" -if os.path.exists(log_directory): - bn_belief_user_action_file = log_directory+"/bn_belief_user_action.pkl" - bn_belief_user_react_time_file = log_directory+"/bn_belief_user_react_time.pkl" - bn_belief_caregiver_assistance_file = log_directory+"/bn_belief_caregiver_assistive_action.pkl" - bn_belief_caregiver_feedback_file = log_directory+"/bn_belief_caregiver_feedback_action.pkl" - - bn_belief_user_action = utils.read_user_statistics_from_pickle(bn_belief_user_action_file) - bn_belief_user_react_time = utils.read_user_statistics_from_pickle(bn_belief_user_react_time_file) - bn_belief_caregiver_assistance = utils.read_user_statistics_from_pickle(bn_belief_caregiver_assistance_file) - bn_belief_caregiver_feedback = utils.read_user_statistics_from_pickle(bn_belief_caregiver_feedback_file) - bn_model_user_action = bn_functions.update_cpds_tables(bn_model=bn_model_user_action, variables_tables=bn_belief_user_action) - bn_model_user_react_time = bn_functions.update_cpds_tables(bn_model=bn_model_user_react_time, variables_tables=bn_belief_user_react_time) - bn_model_caregiver_assistance = bn_functions.update_cpds_tables(bn_model=bn_model_caregiver_assistance, variables_tables=bn_belief_caregiver_assistance) - bn_model_caregiver_feedback = bn_functions.update_cpds_tables(bn_model=bn_model_caregiver_feedback, variables_tables=bn_belief_caregiver_feedback) - -else: - assert("You're not using the user information") - question = input("Are you sure you don't want to load user's belief information?") - -game_performance_per_episode, react_time_per_episode, agent_assistance_per_episode, agent_feedback_per_episode, generated_episodes = \ - simulation(bn_model_user_action=bn_model_user_action, var_user_action_target_action=['user_action'], - bn_model_user_react_time=bn_model_user_react_time, - var_user_react_time_target_action=['user_react_time'], - user_memory_name="memory", user_memory_value=persona_memory, - user_attention_name="attention", user_attention_value=persona_attention, - user_reactivity_name="reactivity", user_reactivity_value=persona_reactivity, - task_progress_name="game_state", game_attempt_name="attempt", - agent_assistance_name="agent_assistance", agent_feedback_name="agent_feedback", - bn_model_agent_assistance=bn_model_caregiver_assistance, - var_agent_assistance_target_action=["agent_assistance"], - bn_model_agent_feedback=bn_model_caregiver_feedback, var_agent_feedback_target_action=["agent_feedback"], - bn_model_other_user_action=bn_model_other_user_action, - var_other_user_action_target_action=['user_action'], - bn_model_other_user_react_time=bn_model_other_user_react_time, - var_other_user_target_react_time_action=["user_react_time"], other_user_memory_name="memory", - other_user_memory_value=other_user_memory, other_user_attention_name="attention", - other_user_attention_value=other_user_attention, other_user_reactivity_name="reactivity", - other_user_reactivity_value=other_user_reactivity, - state_space=states_space_list, action_space=action_space_list, - epochs=epochs, task_complexity=5, max_attempt_per_object=4) - - - -plot_game_performance_path = "" -plot_agent_assistance_path = "" -episodes_path = "episodes.npy" - -if bn_model_other_user_action != None: - plot_game_performance_path = "game_performance_"+"_epoch_"+str(epochs)+"_real_user_memory_"+str(real_user_memory)+"_real_user_attention_"+str(real_user_attention)+"_real_user_reactivity_"+str(real_user_reactivity)+".jpg" - plot_agent_assistance_path = "agent_assistance_"+"epoch_"+str(epochs)+"_real_user_memory_"+str(real_user_memory)+"_real_user_attention_"+str(real_user_attention)+"_real_user_reactivity_"+str(real_user_reactivity)+".jpg" - plot_agent_feedback_path = "agent_feedback_"+"epoch_"+str(epochs)+"_real_user_memory_"+str(real_user_memory)+"_real_user_attention_"+str(real_user_attention)+"_real_user_reactivity_"+str(real_user_reactivity)+".jpg" - -else: - plot_game_performance_path = "game_performance_"+"epoch_" + str(epochs) + "_persona_memory_" + str(persona_memory) + "_persona_attention_" + str(persona_attention) + "_persona_reactivity_" + str(persona_reactivity) + ".jpg" - plot_agent_assistance_path = "agent_assistance_"+"epoch_"+str(epochs)+"_persona_memory_"+str(persona_memory)+"_persona_attention_"+str(persona_attention)+"_persona_reactivity_"+str(persona_reactivity)+".jpg" - plot_agent_feedback_path = "agent_feedback_"+"epoch_"+str(epochs)+"_persona_memory_"+str(persona_memory)+"_persona_attention_"+str(persona_attention)+"_persona_reactivity_"+str(persona_reactivity)+".jpg" - -dir_name = input("Please insert the name of the directory:") -full_path = os.getcwd()+"/results/"+dir_name+"/" -if not os.path.exists(full_path): - os.mkdir(full_path) - print("Directory ", full_path, " created.") -else: - dir_name = input("The directory already exist please insert a new name:") - print("Directory ", full_path, " created.") - if os.path.exists(full_path): - assert("Directory already exists ... start again") - exit(0) - -with open(full_path+episodes_path, "ab") as f: - np.save(full_path+episodes_path, generated_episodes) - f.close() - - -utils.plot2D_game_performance(full_path+plot_game_performance_path, epochs, game_performance_per_episode) -utils.plot2D_assistance(full_path+plot_agent_assistance_path, epochs, agent_assistance_per_episode) -utils.plot2D_feedback(full_path+plot_agent_feedback_path, epochs, agent_feedback_per_episode) - - - -''' -With the current simulator we can generate a list of episodes -the episodes will be used to generate the trans probabilities and as input to the IRL algo -''' -#TODO -# - include reaction time as output -# - average mistakes, average timeout, average assistance, average_react_time -# - include real time episodes into the simulation: -# - counters for agent_assistance, agent_feedback, attempt, game_state, attention and reactivity -# - using the function update probability to generate the new user model and use it as input to the simulator - - - -- GitLab