From 59c6de46d37e67e3d1c7ba8cc4c771eabc1b34f5 Mon Sep 17 00:00:00 2001 From: Antonio Andriella <aandriella@iri.upc.edu> Date: Wed, 14 Oct 2020 21:11:44 +0200 Subject: [PATCH] Working simulation of BN with update --- simulation.py | 421 +++++++++++++++++++++----------------------------- 1 file changed, 174 insertions(+), 247 deletions(-) diff --git a/simulation.py b/simulation.py index 33aa674..421eb26 100644 --- a/simulation.py +++ b/simulation.py @@ -8,95 +8,96 @@ from bn_variables import Agent_Assistance, Agent_Feedback, User_Action, User_Rea import bn_functions import utils from episode import Episode +import pandas as pd + +def build_model_from_data(csv_filename, dag_filename, dag_model=None): + print("/************************************************************/") + print("Init model") + DAG = bnlearn.import_DAG(dag_filename) + df_caregiver = bnlearn.sampling(DAG, n=10000) + + print("/************************************************************/") + print("real_user Model") + DAG_real_user_no_cpd = bnlearn.import_DAG(dag_filename, CPD=False) + df_real_user = pd.read_csv(csv_filename) + DAG_real_user = bnlearn.parameter_learning.fit(DAG_real_user_no_cpd, df_real_user, methodtype='bayes') + df_real_user = bnlearn.sampling(DAG_real_user, n=10000) + print("/************************************************************/") + print("Shared knowledge") + DAG_shared_no_cpd = bnlearn.import_DAG(dag_filename, CPD=False) + shared_knowledge = [df_real_user, df_caregiver] + conc_shared_knowledge = pd.concat(shared_knowledge) + DAG_shared = bnlearn.parameter_learning.fit(DAG_shared_no_cpd, conc_shared_knowledge) + #df_conc_shared_knowledge = bn.sampling(DAG_shared, n=10000) + return DAG_shared + + +def generate_agent_assistance(preferred_assistance, agent_behaviour, current_state, state_space, action_space): + episode = Episode() + game_state, attempt, prev_user_outcome = episode.state_from_index_to_point(state_space, current_state) + robot_action = 0 + #agent_behaviour is a tuple first item is the feedback, second item is the robot assistance + print(game_state,attempt, prev_user_outcome) + if attempt == 1: + robot_action = episode.state_from_point_to_index(action_space, + (random.randint(0, 1), 0)) + elif attempt!=1 and prev_user_outcome == 0: + if attempt == 2 and agent_behaviour == "challenge": + robot_action = episode.state_from_point_to_index(action_space, + (random.randint(0,1),min(max(0, preferred_assistance-1), 5))) + print("catt2") + elif attempt == 2 and agent_behaviour == "help": + robot_action = episode.state_from_point_to_index(action_space, + (random.randint(0, 1), min(max(0, preferred_assistance), 5))) + print("hatt2") + elif attempt == 3 and agent_behaviour == "challenge": + robot_action = episode.state_from_point_to_index(action_space, + (random.randint(0,1),min(max(0, preferred_assistance-2), 5))) + print("catt3") + elif attempt == 3 and agent_behaviour == "help": + robot_action = episode.state_from_point_to_index(action_space, + (random.randint(0, 1), min(max(0, preferred_assistance+1), 5))) + print("hatt3") + elif attempt == 4 and agent_behaviour == "challenge": + robot_action = episode.state_from_point_to_index(action_space, + (random.randint(0,1),min(max(0, preferred_assistance-3), 5))) + print("catt4") + elif attempt == 4 and agent_behaviour == "help": + robot_action = episode.state_from_point_to_index(action_space, + (random.randint(0, 1), min(max(0, preferred_assistance+2), 5))) + print("hatt4") + + elif attempt!=1 and prev_user_outcome == -1: + if attempt == 2 and agent_behaviour == "challenge": + robot_action = episode.state_from_point_to_index(action_space, + (random.randint(0, 1), min(max(0, preferred_assistance+1), 5))) + print("catt2") + elif attempt == 2 and agent_behaviour == "help": + robot_action = episode.state_from_point_to_index(action_space, + (random.randint(0, 1), min(max(0, preferred_assistance), 5))) + print("hatt2") + elif attempt == 3 and agent_behaviour == "challenge": + robot_action = episode.state_from_point_to_index(action_space, + (random.randint(0, 1), min(max(0, preferred_assistance+2), 5))) + print("catt3") + elif attempt == 3 and agent_behaviour == "help": + robot_action = episode.state_from_point_to_index(action_space, + (random.randint(0, 1), min(max(0, preferred_assistance-1), 5))) + print("hatt3") + elif attempt == 4 and agent_behaviour == "challenge": + robot_action = episode.state_from_point_to_index(action_space, + (random.randint(0, 1), min(max(0, preferred_assistance+2), 5))) + print("catt4") + elif attempt == 4 and agent_behaviour == "help": + robot_action = episode.state_from_point_to_index(action_space, + (random.randint(0, 1), min(max(0, preferred_assistance-3), 5))) + print("hatt4") + + agent_feedback, agent_assistance = episode.state_from_index_to_point(action_space, robot_action) + + return agent_feedback, agent_assistance -# -# def choose_next_states(task_progress, game_state_t0, n_attempt_t0, max_attempt_per_object, -# selected_agent_assistance_action, -# bn_model_user_action, var_user_action_target_action): -# -# def get_next_state(task_progress, game_state_t0, n_attempt_t0, max_attempt_per_object): -# -# next_state = [] -# -# #correct move on the last state of the bin -# if (task_progress == 1 or task_progress == 3 or task_progress == 4) and n_attempt_t0<max_attempt_per_object: -# next_state.append((game_state_t0+1, n_attempt_t0+1)) -# #correct state bu still in the bin -# elif task_progress == 0 or task_progress == 2 and n_attempt_t0<max_attempt_per_object: -# next_state.append((game_state_t0, n_attempt_t0+1)) -# elif (task_progress == 1 or task_progress == 3 or task_progress == 4) and n_attempt_t0>=max_attempt_per_object: -# assert "you reach the maximum number of attempt the agent will move it for you" -# elif task_progress == 0 or task_progress == 2 and n_attempt_t0>=max_attempt_per_object: -# assert "you reach the maximum number of attempt the agent will move it for you" -# -# return next_state -# -# next_state = get_next_state(task_progress, game_state_t0, n_attempt_t0, max_attempt_per_object) -# query_answer_probs = [] -# for t in next_state: -# vars_user_evidence = {"game_state_t0": game_state_t0, -# "attempt_t0": n_attempt_t0 - 1, -# "robot_assistance": selected_agent_assistance_action, -# "game_state_t1": t[0], -# "attempt_t1": t[1], -# } -# -# query_user_action_prob = bn_functions.infer_prob_from_state(bn_model_user_action, -# infer_variable=var_user_action_target_action, -# evidence_variables=vars_user_evidence) -# query_answer_probs.append(query_user_action_prob) -# -# -# #do the inference here -# #1. check given the current_state which are the possible states -# #2. for each of the possible states get the probability of user_action -# #3. select the state with the most higher action and execute it -# #4. return user_action -# -def generate_agent_assistance(preferred_assistance, agent_behaviour, n_game_state, n_attempt, alpha_action=0.1): - agent_policy = [[0 for j in range(n_attempt)] for i in range(n_game_state)] - previous_assistance = -1 - def get_alternative_action(agent_assistance, previous_assistance, agent_behaviour, alpha_action): - agent_assistance_res = agent_assistance - if previous_assistance == agent_assistance: - if agent_behaviour == "challenge": - if random.random() > alpha_action: - agent_assistance_res = min(max(0, agent_assistance-1), 5) - else: - agent_assistance_res = min(max(0, agent_assistance), 5) - else: - if random.random() > alpha_action: - agent_assistance_res = min(max(0, agent_assistance + 1), 5) - else: - agent_assistance_res = min(max(0, agent_assistance), 5) - return agent_assistance_res - - - for gs in range(n_game_state): - for att in range(n_attempt): - if att == 0: - if random.random()>alpha_action: - agent_policy[gs][att] = preferred_assistance - previous_assistance = agent_policy[gs][att] - else: - if random.random()>0.5: - agent_policy[gs][att] = min(max(0, preferred_assistance-1),5) - previous_assistance = agent_policy[gs][att] - else: - agent_policy[gs][att] = min(max(0, preferred_assistance+1), 5) - previous_assistance = agent_policy[gs][att] - else: - if agent_behaviour == "challenge": - agent_policy[gs][att] = min(max(0, preferred_assistance-1), 5) - agent_policy[gs][att] = get_alternative_action(agent_policy[gs][att], previous_assistance, agent_behaviour, alpha_action) - previous_assistance = agent_policy[gs][att] - else: - agent_policy[gs][att] = min(max(0, preferred_assistance+1), 5) - agent_policy[gs][att] = get_alternative_action(agent_policy[gs][att], previous_assistance, agent_behaviour, alpha_action) - previous_assistance = agent_policy[gs][att] - - return agent_policy def compute_next_state(user_action, task_progress_counter, attempt_counter, correct_move_counter, @@ -167,11 +168,12 @@ def compute_next_state(user_action, task_progress_counter, attempt_counter, corr -def simulation(bn_model_user_action, var_user_action_target_action, bn_model_user_react_time, var_user_react_time_target_action, - user_memory_name, user_memory_value, user_attention_name, user_attention_value, - user_reactivity_name, user_reactivity_value, - task_progress_t0_name, task_progress_t1_name, game_attempt_t0_name, game_attempt_t1_name, - agent_assistance_name, agent_policy, +def simulation(bn_model_user_action, var_user_action_target_action, + game_state_bn_name, attempt_bn_name, + agent_assistance_bn_name, agent_feedback_bn_name, + user_pref_assistance, + agent_behaviour, + agent_policy, state_space, action_space, epochs=50, task_complexity=5, max_attempt_per_object=4, alpha_learning=0): ''' @@ -183,33 +185,15 @@ def simulation(bn_model_user_action, var_user_action_target_action, bn_model_use n_timeout_per_episode: ''' - #TODO: remove agent_assistance_vect and agent_feedback_vect - - #metrics we need, in order to compute afterwords the belief - - agent_feedback_per_action = [[0 for i in range(Agent_Feedback.counter.value)] for j in range(User_Action.counter.value)] - agent_assistance_per_action = [[0 for i in range(Agent_Assistance.counter.value)] for j in range(User_Action.counter.value)] - - attempt_counter_per_react_time = [[0 for i in range(Attempt.counter.value)] for j in range(User_React_time.counter.value)] - game_state_counter_per_react_time = [[0 for i in range(Game_State.counter.value)] for j in range(User_React_time.counter.value)] - agent_feedback_per_react_time = [[0 for i in range(Agent_Feedback.counter.value)] for j in range(User_React_time.counter.value)] - agent_assistance_per_react_time = [[0 for i in range(Agent_Assistance.counter.value)] for j in range(User_React_time.counter.value)] - - game_state_counter_per_agent_feedback = [[0 for i in range(Game_State.counter.value)] for j in range(Agent_Feedback.counter.value)] - attempt_counter_per_agent_feedback = [[0 for i in range(Attempt.counter.value)] for j in range(Agent_Feedback.counter.value)] - game_state_counter_per_agent_assistance = [[0 for i in range(Game_State.counter.value)] for j in - range(Agent_Assistance.counter.value)] - attempt_counter_per_agent_assistance = [[0 for i in range(Attempt.counter.value)] for j in - range(Agent_Assistance.counter.value)] - - user_action_per_game_state_attempt_counter_agent_assistance = [[[[0 for i in range(User_Action.counter.value)] for l in range(Game_State.counter.value)] for j in - range(Attempt.counter.value)] for k in range(Agent_Assistance.counter.value)] - user_action_per_agent_assistance = [[0 for i in range(User_Action.counter.value)] for j in - range(Agent_Assistance.counter.value)] - attempt_counter_per_user_action = [[0 for i in range(Attempt.counter.value)] for j in range(User_Action.counter.value)] + user_action_per_robot_feedback_robot_assistance = [[[0 for i in range(User_Action.counter.value)] + for j in range(Agent_Assistance.counter.value)] + for l in range(Agent_Feedback.counter.value) + ] + attempt_counter_per_user_action = [[0 for i in range(Attempt.counter.value)] for j in + range(User_Action.counter.value)] game_state_counter_per_user_action = [[0 for i in range(Game_State.counter.value)] for j in - range(User_Action.counter.value)] + range(User_Action.counter.value)] #output variables: n_correct_per_episode = [0]*epochs @@ -243,22 +227,11 @@ def simulation(bn_model_user_action, var_user_action_target_action, bn_model_use #The following variables are used to update the BN at the end of the episode user_action_dynamic_variables = { - 'attempt_t1': attempt_counter_per_user_action, - 'game_state_t1': game_state_counter_per_user_action, - 'user_action': user_action_per_game_state_attempt_counter_agent_assistance + 'attempt': attempt_counter_per_user_action, + 'game_state': game_state_counter_per_user_action, + 'user_action': user_action_per_robot_feedback_robot_assistance } - user_react_time_dynamic_variables = {'attempt': attempt_counter_per_react_time, - 'game_state': game_state_counter_per_react_time, - 'agent_assistance': agent_assistance_per_react_time, - 'agent_feedback': agent_feedback_per_react_time} - - agent_assistance_dynamic_variables = {'attempt': attempt_counter_per_agent_assistance, - 'game_state': game_state_counter_per_agent_assistance} - - agent_feedback_dynamic_variables = {'attempt': attempt_counter_per_agent_feedback, - 'game_state': game_state_counter_per_agent_feedback} - #data structure to memorise the sequence of states (state, action, next_state) episode = [] selected_user_action = 0 @@ -267,60 +240,37 @@ def simulation(bn_model_user_action, var_user_action_target_action, bn_model_use while(task_progress_counter<=task_complexity): current_state = (game_state_counter, attempt_counter, selected_user_action) + current_state_index = ep.state_from_point_to_index(state_space, current_state) + if agent_policy==[]: + selected_agent_feedback_action, selected_agent_assistance_action = \ + generate_agent_assistance(preferred_assistance=user_pref_assistance, + agent_behaviour=agent_behaviour, + current_state=current_state_index, + state_space=state_space, + action_space=action_space + ) + else: + selected_agent_feedback_action, selected_agent_assistance_action = ep.state_from_index_to_point(action_space, agent_policy[current_state_index]) - selected_agent_assistance_action = agent_policy[game_state_counter][attempt_counter-1]#random.randint(0,5) - selected_agent_feedback_action = 0#random.randint(0,1) #counters for plots n_assistance_lev_per_episode[e][selected_agent_assistance_action] += 1 current_agent_action = (selected_agent_feedback_action, selected_agent_assistance_action) - - print("agent_assistance {}, attempt {}, game {}, agent_feedback {}".format(selected_agent_assistance_action, attempt_counter, game_state_counter, selected_agent_feedback_action)) - + print("agent_assistance {}, agent_feedback {}, attempt {}, game {}".format(selected_agent_assistance_action, selected_agent_feedback_action, attempt_counter, game_state_counter)) ##########################QUERY FOR THE USER ACTION AND REACT TIME##################################### - #compare the real user with the estimated Persona and returns a user action (0, 1a, 2) - #return the user action in this state based on the Persona profile - vars_user_evidence = { task_progress_t0_name: game_state_counter, - game_attempt_t0_name: attempt_counter - 1, - task_progress_t1_name: game_state_counter, - game_attempt_t1_name: attempt_counter - 1, - agent_assistance_name: selected_agent_assistance_action, + vars_user_evidence = { game_state_bn_name: game_state_counter, + attempt_bn_name: attempt_counter - 1, + agent_assistance_bn_name: selected_agent_assistance_action, + agent_feedback_bn_name: selected_agent_feedback_action, } - query_user_action_prob = bn_functions.infer_prob_from_state(bn_model_user_action, + query_user_action_prob = bn_functions.infer_prob_from_state(user_bn_model=bn_model_user_action, infer_variable=var_user_action_target_action, evidence_variables=vars_user_evidence) - # query_user_react_time_prob = bn_functions.infer_prob_from_state(bn_model_user_react_time, - # infer_variable=var_user_react_time_target_action, - # evidence_variables=vars_user_evidence) - # - # selected_user_action = bn_functions.get_stochastic_action(query_user_action_prob.values) - # selected_user_react_time = bn_functions.get_stochastic_action(query_user_react_time_prob.values) - # counters for plots - # n_react_time_per_episode[e][selected_user_react_time] += 1 - - #updates counters for user action - - user_action_per_game_state_attempt_counter_agent_assistance[selected_agent_assistance_action][attempt_counter-1][game_state_counter][selected_user_action] += 1 - attempt_counter_per_user_action[selected_user_action][attempt_counter-1] += 1 - game_state_counter_per_user_action[selected_user_action][game_state_counter] += 1 - user_action_per_agent_assistance[selected_agent_assistance_action][selected_user_action] += 1 - - #update counter for user react time - # agent_assistance_per_react_time[selected_user_react_time][selected_agent_assistance_action] += 1 - # attempt_counter_per_react_time[selected_user_react_time][attempt_counter-1] += 1 - # game_state_counter_per_react_time[selected_user_react_time][game_state_counter] += 1 - # agent_feedback_per_react_time[selected_user_react_time][selected_agent_feedback_action] += 1 - #update counter for agent feedback - game_state_counter_per_agent_feedback[selected_agent_feedback_action][game_state_counter] += 1 - attempt_counter_per_agent_feedback[selected_agent_feedback_action][attempt_counter-1] += 1 - #update counter for agent assistance - game_state_counter_per_agent_assistance[selected_agent_assistance_action][game_state_counter] += 1 - attempt_counter_per_agent_assistance[selected_agent_assistance_action][attempt_counter-1] += 1 # updates counters for simulation # remap user_action index @@ -357,35 +307,17 @@ def simulation(bn_model_user_action, var_user_action_target_action, bn_model_use #update user models bn_model_user_action = bn_functions.update_cpds_tables(bn_model_user_action, user_action_dynamic_variables, alpha_learning) - bn_model_user_react_time = bn_functions.update_cpds_tables(bn_model_user_react_time, user_react_time_dynamic_variables) - #update agent models - - print("user_given_game_attempt:", bn_model_user_action['model'].cpds[0].values) - print("user_given_robot:", bn_model_user_action['model'].cpds[5].values) - print("game_user:", bn_model_user_action['model'].cpds[3].values) - print("attempt_user:", bn_model_user_action['model'].cpds[2].values) #reset counter - user_action_per_game_state_attempt_counter_agent_assistance = [[[[0 for i in range(User_Action.counter.value)] - for l in range(Game_State.counter.value)] for j in - range(Attempt.counter.value)] for k in range(Agent_Assistance.counter.value)] - user_action_per_agent_assistance = [[0 for i in range(User_Action.counter.value)] for j in - range(Agent_Assistance.counter.value)] + user_action_per_robot_feedback_robot_assistance = [[[0 for i in range(User_Action.counter.value)] + for j in range(Agent_Assistance.counter.value)] + for l in range(Agent_Feedback.counter.value) + ] attempt_counter_per_user_action = [[0 for i in range(Attempt.counter.value)] for j in range(User_Action.counter.value)] game_state_counter_per_user_action = [[0 for i in range(Game_State.counter.value)] for j in range(User_Action.counter.value)] - attempt_counter_per_react_time = [[0 for i in range(Attempt.counter.value)] for j in - range(User_React_time.counter.value)] - game_state_counter_per_react_time = [[0 for i in range(Game_State.counter.value)] for j in - range(User_React_time.counter.value)] - agent_feedback_per_react_time = [[0 for i in range(Agent_Feedback.counter.value)] for j in - range(User_React_time.counter.value)] - agent_assistance_per_react_time = [[0 for i in range(Agent_Assistance.counter.value)] for j in - range(User_React_time.counter.value)] - - #for plots n_correct_per_episode[e] = correct_move_counter n_wrong_per_episode[e] = wrong_move_counter @@ -408,57 +340,52 @@ def simulation(bn_model_user_action, var_user_action_target_action, bn_model_use ############################################################################# -agent_policy = generate_agent_assistance(preferred_assistance=2, agent_behaviour="help", n_game_state=Game_State.counter.value, n_attempt=Attempt.counter.value, alpha_action=0.5) - -# SIMULATION PARAMS -epochs = 20 -scaling_factor = 1 -# initialise the agent -bn_model_caregiver_assistance = bnlearn.import_DAG('/home/pal/Documents/Framework/bn_generative_model/bn_agent_model/agent_assistive_model.bif') -bn_model_caregiver_feedback = None#bnlearn.import_DAG('/home/pal/Documents/Framework/bn_generative_model/bn_agent_model/agent_feedback_model.bif') -bn_model_user_action = bnlearn.import_DAG('/home/pal/Documents/Framework/bn_generative_model/bn_persona_model/persona_model_test.bif') -bn_model_user_react_time = bnlearn.import_DAG('/home/pal/Documents/Framework/bn_generative_model/bn_persona_model/user_react_time_model.bif') - -# initialise memory, attention and reactivity variables -persona_memory = 0; -persona_attention = 0; -persona_reactivity = 1; - -# define state space struct for the irl algorithm -episode_instance = Episode() -# DEFINITION OF THE MDP -# define state space struct for the irl algorithm -attempt = [i for i in range(1, Attempt.counter.value + 1)] -# +1 (3,_,_) absorbing state -game_state = [i for i in range(0, Game_State.counter.value + 1)] -user_action = [i for i in range(-1, User_Action.counter.value - 1)] -state_space = (game_state, attempt, user_action) -states_space_list = list(itertools.product(*state_space)) -state_space_index = [episode_instance.state_from_point_to_index(states_space_list, s) for s in states_space_list] -agent_assistance_action = [i for i in range(Agent_Assistance.counter.value)] -agent_feedback_action = [i for i in range(Agent_Feedback.counter.value)] -action_space = (agent_feedback_action, agent_assistance_action) -action_space_list = list(itertools.product(*action_space)) -action_space_index = [episode_instance.state_from_point_to_index(action_space_list, a) for a in action_space_list] -terminal_state = [(Game_State.counter.value, i, user_action[j]) for i in range(1, Attempt.counter.value + 1) for j in - range(len(user_action))] -initial_state = (1, 1, 0) - -#1. RUN THE SIMULATION WITH THE PARAMS SET BY THE CAREGIVER - - -game_performance_per_episode, react_time_per_episode, agent_assistance_per_episode, agent_feedback_per_episode, episodes_list = \ - simulation(bn_model_user_action=bn_model_user_action, var_user_action_target_action=['user_action'], - bn_model_user_react_time=bn_model_user_react_time, - var_user_react_time_target_action=['user_react_time'], - user_memory_name="memory", user_memory_value=persona_memory, - user_attention_name="attention", user_attention_value=persona_attention, - user_reactivity_name="reactivity", user_reactivity_value=persona_reactivity, - task_progress_t0_name="game_state_t0", task_progress_t1_name="game_state_t1", - game_attempt_t0_name="attempt_t0", game_attempt_t1_name="attempt_t1", - agent_assistance_name="agent_assistance", agent_policy=agent_policy, - state_space=states_space_list, action_space=action_space_list, - epochs=epochs, task_complexity=5, max_attempt_per_object=4, alpha_learning=0.1) - -utils.plot2D_game_performance("/home/pal/Documents/Framework/bn_generative_model/results/user_performance.png", epochs, scaling_factor, game_performance_per_episode) -utils.plot2D_assistance("/home/pal/Documents/Framework/bn_generative_model/results/agent_assistance.png", epochs, scaling_factor, agent_assistance_per_episode) +# agent_policy = generate_agent_assistance(preferred_assistance=2, agent_behaviour="help", n_game_state=Game_State.counter.value, n_attempt=Attempt.counter.value, alpha_action=0.1) +# print(agent_policy) +# +# # SIMULATION PARAMS +# epochs = 20 +# scaling_factor = 1 +# # initialise the agent +# bn_model_user_action = bnlearn.import_DAG('/home/pal/Documents/Framework/bn_generative_model/bn_persona_model/persona_model_test.bif') +# +# # initialise memory, attention and reactivity variables +# persona_memory = 0; +# persona_attention = 0; +# persona_reactivity = 1; +# +# # define state space struct for the irl algorithm +# episode_instance = Episode() +# # DEFINITION OF THE MDP +# # define state space struct for the irl algorithm +# attempt = [i for i in range(1, Attempt.counter.value + 1)] +# # +1 (3,_,_) absorbing state +# game_state = [i for i in range(0, Game_State.counter.value + 1)] +# user_action = [i for i in range(-1, User_Action.counter.value - 1)] +# state_space = (game_state, attempt, user_action) +# states_space_list = list(itertools.product(*state_space)) +# state_space_index = [episode_instance.state_from_point_to_index(states_space_list, s) for s in states_space_list] +# agent_assistance_action = [i for i in range(Agent_Assistance.counter.value)] +# agent_feedback_action = [i for i in range(Agent_Feedback.counter.value)] +# action_space = (agent_feedback_action, agent_assistance_action) +# action_space_list = list(itertools.product(*action_space)) +# action_space_index = [episode_instance.state_from_point_to_index(action_space_list, a) for a in action_space_list] +# terminal_state = [(Game_State.counter.value, i, user_action[j]) for i in range(1, Attempt.counter.value + 1) for j in +# range(len(user_action))] +# initial_state = (1, 1, 0) +# +# #1. RUN THE SIMULATION WITH THE PARAMS SET BY THE CAREGIVER +# +# +# game_performance_per_episode, react_time_per_episode, agent_assistance_per_episode, agent_feedback_per_episode, episodes_list = \ +# simulation(bn_model_user_action=bn_model_user_action, var_user_action_target_action=['user_action'], +# game_state_bn_name="game_state", +# attempt_bn_name="attempt", +# agent_assistance_bn_name="agent_assistance", +# agent_feedback_bn_name="agent_feedback", +# agent_policy=agent_policy, +# state_space=states_space_list, action_space=action_space_list, +# epochs=epochs, task_complexity=5, max_attempt_per_object=4, alpha_learning=0.1) +# +# utils.plot2D_game_performance("results/user_performance.png", epochs, scaling_factor, game_performance_per_episode) +# utils.plot2D_assistance("results/agent_assistance.png", epochs, scaling_factor, agent_assistance_per_episode) -- GitLab