Skip to content
Snippets Groups Projects
Commit 59c6de46 authored by Antonio Andriella's avatar Antonio Andriella
Browse files

Working simulation of BN with update

parent 65b57306
No related branches found
No related tags found
No related merge requests found
......@@ -8,95 +8,96 @@ from bn_variables import Agent_Assistance, Agent_Feedback, User_Action, User_Rea
import bn_functions
import utils
from episode import Episode
import pandas as pd
def build_model_from_data(csv_filename, dag_filename, dag_model=None):
print("/************************************************************/")
print("Init model")
DAG = bnlearn.import_DAG(dag_filename)
df_caregiver = bnlearn.sampling(DAG, n=10000)
print("/************************************************************/")
print("real_user Model")
DAG_real_user_no_cpd = bnlearn.import_DAG(dag_filename, CPD=False)
df_real_user = pd.read_csv(csv_filename)
DAG_real_user = bnlearn.parameter_learning.fit(DAG_real_user_no_cpd, df_real_user, methodtype='bayes')
df_real_user = bnlearn.sampling(DAG_real_user, n=10000)
print("/************************************************************/")
print("Shared knowledge")
DAG_shared_no_cpd = bnlearn.import_DAG(dag_filename, CPD=False)
shared_knowledge = [df_real_user, df_caregiver]
conc_shared_knowledge = pd.concat(shared_knowledge)
DAG_shared = bnlearn.parameter_learning.fit(DAG_shared_no_cpd, conc_shared_knowledge)
#df_conc_shared_knowledge = bn.sampling(DAG_shared, n=10000)
return DAG_shared
def generate_agent_assistance(preferred_assistance, agent_behaviour, current_state, state_space, action_space):
episode = Episode()
game_state, attempt, prev_user_outcome = episode.state_from_index_to_point(state_space, current_state)
robot_action = 0
#agent_behaviour is a tuple first item is the feedback, second item is the robot assistance
print(game_state,attempt, prev_user_outcome)
if attempt == 1:
robot_action = episode.state_from_point_to_index(action_space,
(random.randint(0, 1), 0))
elif attempt!=1 and prev_user_outcome == 0:
if attempt == 2 and agent_behaviour == "challenge":
robot_action = episode.state_from_point_to_index(action_space,
(random.randint(0,1),min(max(0, preferred_assistance-1), 5)))
print("catt2")
elif attempt == 2 and agent_behaviour == "help":
robot_action = episode.state_from_point_to_index(action_space,
(random.randint(0, 1), min(max(0, preferred_assistance), 5)))
print("hatt2")
elif attempt == 3 and agent_behaviour == "challenge":
robot_action = episode.state_from_point_to_index(action_space,
(random.randint(0,1),min(max(0, preferred_assistance-2), 5)))
print("catt3")
elif attempt == 3 and agent_behaviour == "help":
robot_action = episode.state_from_point_to_index(action_space,
(random.randint(0, 1), min(max(0, preferred_assistance+1), 5)))
print("hatt3")
elif attempt == 4 and agent_behaviour == "challenge":
robot_action = episode.state_from_point_to_index(action_space,
(random.randint(0,1),min(max(0, preferred_assistance-3), 5)))
print("catt4")
elif attempt == 4 and agent_behaviour == "help":
robot_action = episode.state_from_point_to_index(action_space,
(random.randint(0, 1), min(max(0, preferred_assistance+2), 5)))
print("hatt4")
elif attempt!=1 and prev_user_outcome == -1:
if attempt == 2 and agent_behaviour == "challenge":
robot_action = episode.state_from_point_to_index(action_space,
(random.randint(0, 1), min(max(0, preferred_assistance+1), 5)))
print("catt2")
elif attempt == 2 and agent_behaviour == "help":
robot_action = episode.state_from_point_to_index(action_space,
(random.randint(0, 1), min(max(0, preferred_assistance), 5)))
print("hatt2")
elif attempt == 3 and agent_behaviour == "challenge":
robot_action = episode.state_from_point_to_index(action_space,
(random.randint(0, 1), min(max(0, preferred_assistance+2), 5)))
print("catt3")
elif attempt == 3 and agent_behaviour == "help":
robot_action = episode.state_from_point_to_index(action_space,
(random.randint(0, 1), min(max(0, preferred_assistance-1), 5)))
print("hatt3")
elif attempt == 4 and agent_behaviour == "challenge":
robot_action = episode.state_from_point_to_index(action_space,
(random.randint(0, 1), min(max(0, preferred_assistance+2), 5)))
print("catt4")
elif attempt == 4 and agent_behaviour == "help":
robot_action = episode.state_from_point_to_index(action_space,
(random.randint(0, 1), min(max(0, preferred_assistance-3), 5)))
print("hatt4")
agent_feedback, agent_assistance = episode.state_from_index_to_point(action_space, robot_action)
return agent_feedback, agent_assistance
#
# def choose_next_states(task_progress, game_state_t0, n_attempt_t0, max_attempt_per_object,
# selected_agent_assistance_action,
# bn_model_user_action, var_user_action_target_action):
#
# def get_next_state(task_progress, game_state_t0, n_attempt_t0, max_attempt_per_object):
#
# next_state = []
#
# #correct move on the last state of the bin
# if (task_progress == 1 or task_progress == 3 or task_progress == 4) and n_attempt_t0<max_attempt_per_object:
# next_state.append((game_state_t0+1, n_attempt_t0+1))
# #correct state bu still in the bin
# elif task_progress == 0 or task_progress == 2 and n_attempt_t0<max_attempt_per_object:
# next_state.append((game_state_t0, n_attempt_t0+1))
# elif (task_progress == 1 or task_progress == 3 or task_progress == 4) and n_attempt_t0>=max_attempt_per_object:
# assert "you reach the maximum number of attempt the agent will move it for you"
# elif task_progress == 0 or task_progress == 2 and n_attempt_t0>=max_attempt_per_object:
# assert "you reach the maximum number of attempt the agent will move it for you"
#
# return next_state
#
# next_state = get_next_state(task_progress, game_state_t0, n_attempt_t0, max_attempt_per_object)
# query_answer_probs = []
# for t in next_state:
# vars_user_evidence = {"game_state_t0": game_state_t0,
# "attempt_t0": n_attempt_t0 - 1,
# "robot_assistance": selected_agent_assistance_action,
# "game_state_t1": t[0],
# "attempt_t1": t[1],
# }
#
# query_user_action_prob = bn_functions.infer_prob_from_state(bn_model_user_action,
# infer_variable=var_user_action_target_action,
# evidence_variables=vars_user_evidence)
# query_answer_probs.append(query_user_action_prob)
#
#
# #do the inference here
# #1. check given the current_state which are the possible states
# #2. for each of the possible states get the probability of user_action
# #3. select the state with the most higher action and execute it
# #4. return user_action
#
def generate_agent_assistance(preferred_assistance, agent_behaviour, n_game_state, n_attempt, alpha_action=0.1):
agent_policy = [[0 for j in range(n_attempt)] for i in range(n_game_state)]
previous_assistance = -1
def get_alternative_action(agent_assistance, previous_assistance, agent_behaviour, alpha_action):
agent_assistance_res = agent_assistance
if previous_assistance == agent_assistance:
if agent_behaviour == "challenge":
if random.random() > alpha_action:
agent_assistance_res = min(max(0, agent_assistance-1), 5)
else:
agent_assistance_res = min(max(0, agent_assistance), 5)
else:
if random.random() > alpha_action:
agent_assistance_res = min(max(0, agent_assistance + 1), 5)
else:
agent_assistance_res = min(max(0, agent_assistance), 5)
return agent_assistance_res
for gs in range(n_game_state):
for att in range(n_attempt):
if att == 0:
if random.random()>alpha_action:
agent_policy[gs][att] = preferred_assistance
previous_assistance = agent_policy[gs][att]
else:
if random.random()>0.5:
agent_policy[gs][att] = min(max(0, preferred_assistance-1),5)
previous_assistance = agent_policy[gs][att]
else:
agent_policy[gs][att] = min(max(0, preferred_assistance+1), 5)
previous_assistance = agent_policy[gs][att]
else:
if agent_behaviour == "challenge":
agent_policy[gs][att] = min(max(0, preferred_assistance-1), 5)
agent_policy[gs][att] = get_alternative_action(agent_policy[gs][att], previous_assistance, agent_behaviour, alpha_action)
previous_assistance = agent_policy[gs][att]
else:
agent_policy[gs][att] = min(max(0, preferred_assistance+1), 5)
agent_policy[gs][att] = get_alternative_action(agent_policy[gs][att], previous_assistance, agent_behaviour, alpha_action)
previous_assistance = agent_policy[gs][att]
return agent_policy
def compute_next_state(user_action, task_progress_counter, attempt_counter, correct_move_counter,
......@@ -167,11 +168,12 @@ def compute_next_state(user_action, task_progress_counter, attempt_counter, corr
def simulation(bn_model_user_action, var_user_action_target_action, bn_model_user_react_time, var_user_react_time_target_action,
user_memory_name, user_memory_value, user_attention_name, user_attention_value,
user_reactivity_name, user_reactivity_value,
task_progress_t0_name, task_progress_t1_name, game_attempt_t0_name, game_attempt_t1_name,
agent_assistance_name, agent_policy,
def simulation(bn_model_user_action, var_user_action_target_action,
game_state_bn_name, attempt_bn_name,
agent_assistance_bn_name, agent_feedback_bn_name,
user_pref_assistance,
agent_behaviour,
agent_policy,
state_space, action_space,
epochs=50, task_complexity=5, max_attempt_per_object=4, alpha_learning=0):
'''
......@@ -183,33 +185,15 @@ def simulation(bn_model_user_action, var_user_action_target_action, bn_model_use
n_timeout_per_episode:
'''
#TODO: remove agent_assistance_vect and agent_feedback_vect
#metrics we need, in order to compute afterwords the belief
agent_feedback_per_action = [[0 for i in range(Agent_Feedback.counter.value)] for j in range(User_Action.counter.value)]
agent_assistance_per_action = [[0 for i in range(Agent_Assistance.counter.value)] for j in range(User_Action.counter.value)]
attempt_counter_per_react_time = [[0 for i in range(Attempt.counter.value)] for j in range(User_React_time.counter.value)]
game_state_counter_per_react_time = [[0 for i in range(Game_State.counter.value)] for j in range(User_React_time.counter.value)]
agent_feedback_per_react_time = [[0 for i in range(Agent_Feedback.counter.value)] for j in range(User_React_time.counter.value)]
agent_assistance_per_react_time = [[0 for i in range(Agent_Assistance.counter.value)] for j in range(User_React_time.counter.value)]
game_state_counter_per_agent_feedback = [[0 for i in range(Game_State.counter.value)] for j in range(Agent_Feedback.counter.value)]
attempt_counter_per_agent_feedback = [[0 for i in range(Attempt.counter.value)] for j in range(Agent_Feedback.counter.value)]
game_state_counter_per_agent_assistance = [[0 for i in range(Game_State.counter.value)] for j in
range(Agent_Assistance.counter.value)]
attempt_counter_per_agent_assistance = [[0 for i in range(Attempt.counter.value)] for j in
range(Agent_Assistance.counter.value)]
user_action_per_game_state_attempt_counter_agent_assistance = [[[[0 for i in range(User_Action.counter.value)] for l in range(Game_State.counter.value)] for j in
range(Attempt.counter.value)] for k in range(Agent_Assistance.counter.value)]
user_action_per_agent_assistance = [[0 for i in range(User_Action.counter.value)] for j in
range(Agent_Assistance.counter.value)]
attempt_counter_per_user_action = [[0 for i in range(Attempt.counter.value)] for j in range(User_Action.counter.value)]
user_action_per_robot_feedback_robot_assistance = [[[0 for i in range(User_Action.counter.value)]
for j in range(Agent_Assistance.counter.value)]
for l in range(Agent_Feedback.counter.value)
]
attempt_counter_per_user_action = [[0 for i in range(Attempt.counter.value)] for j in
range(User_Action.counter.value)]
game_state_counter_per_user_action = [[0 for i in range(Game_State.counter.value)] for j in
range(User_Action.counter.value)]
range(User_Action.counter.value)]
#output variables:
n_correct_per_episode = [0]*epochs
......@@ -243,22 +227,11 @@ def simulation(bn_model_user_action, var_user_action_target_action, bn_model_use
#The following variables are used to update the BN at the end of the episode
user_action_dynamic_variables = {
'attempt_t1': attempt_counter_per_user_action,
'game_state_t1': game_state_counter_per_user_action,
'user_action': user_action_per_game_state_attempt_counter_agent_assistance
'attempt': attempt_counter_per_user_action,
'game_state': game_state_counter_per_user_action,
'user_action': user_action_per_robot_feedback_robot_assistance
}
user_react_time_dynamic_variables = {'attempt': attempt_counter_per_react_time,
'game_state': game_state_counter_per_react_time,
'agent_assistance': agent_assistance_per_react_time,
'agent_feedback': agent_feedback_per_react_time}
agent_assistance_dynamic_variables = {'attempt': attempt_counter_per_agent_assistance,
'game_state': game_state_counter_per_agent_assistance}
agent_feedback_dynamic_variables = {'attempt': attempt_counter_per_agent_feedback,
'game_state': game_state_counter_per_agent_feedback}
#data structure to memorise the sequence of states (state, action, next_state)
episode = []
selected_user_action = 0
......@@ -267,60 +240,37 @@ def simulation(bn_model_user_action, var_user_action_target_action, bn_model_use
while(task_progress_counter<=task_complexity):
current_state = (game_state_counter, attempt_counter, selected_user_action)
current_state_index = ep.state_from_point_to_index(state_space, current_state)
if agent_policy==[]:
selected_agent_feedback_action, selected_agent_assistance_action = \
generate_agent_assistance(preferred_assistance=user_pref_assistance,
agent_behaviour=agent_behaviour,
current_state=current_state_index,
state_space=state_space,
action_space=action_space
)
else:
selected_agent_feedback_action, selected_agent_assistance_action = ep.state_from_index_to_point(action_space, agent_policy[current_state_index])
selected_agent_assistance_action = agent_policy[game_state_counter][attempt_counter-1]#random.randint(0,5)
selected_agent_feedback_action = 0#random.randint(0,1)
#counters for plots
n_assistance_lev_per_episode[e][selected_agent_assistance_action] += 1
current_agent_action = (selected_agent_feedback_action, selected_agent_assistance_action)
print("agent_assistance {}, attempt {}, game {}, agent_feedback {}".format(selected_agent_assistance_action, attempt_counter, game_state_counter, selected_agent_feedback_action))
print("agent_assistance {}, agent_feedback {}, attempt {}, game {}".format(selected_agent_assistance_action, selected_agent_feedback_action, attempt_counter, game_state_counter))
##########################QUERY FOR THE USER ACTION AND REACT TIME#####################################
#compare the real user with the estimated Persona and returns a user action (0, 1a, 2)
#return the user action in this state based on the Persona profile
vars_user_evidence = { task_progress_t0_name: game_state_counter,
game_attempt_t0_name: attempt_counter - 1,
task_progress_t1_name: game_state_counter,
game_attempt_t1_name: attempt_counter - 1,
agent_assistance_name: selected_agent_assistance_action,
vars_user_evidence = { game_state_bn_name: game_state_counter,
attempt_bn_name: attempt_counter - 1,
agent_assistance_bn_name: selected_agent_assistance_action,
agent_feedback_bn_name: selected_agent_feedback_action,
}
query_user_action_prob = bn_functions.infer_prob_from_state(bn_model_user_action,
query_user_action_prob = bn_functions.infer_prob_from_state(user_bn_model=bn_model_user_action,
infer_variable=var_user_action_target_action,
evidence_variables=vars_user_evidence)
# query_user_react_time_prob = bn_functions.infer_prob_from_state(bn_model_user_react_time,
# infer_variable=var_user_react_time_target_action,
# evidence_variables=vars_user_evidence)
#
#
selected_user_action = bn_functions.get_stochastic_action(query_user_action_prob.values)
# selected_user_react_time = bn_functions.get_stochastic_action(query_user_react_time_prob.values)
# counters for plots
# n_react_time_per_episode[e][selected_user_react_time] += 1
#updates counters for user action
user_action_per_game_state_attempt_counter_agent_assistance[selected_agent_assistance_action][attempt_counter-1][game_state_counter][selected_user_action] += 1
attempt_counter_per_user_action[selected_user_action][attempt_counter-1] += 1
game_state_counter_per_user_action[selected_user_action][game_state_counter] += 1
user_action_per_agent_assistance[selected_agent_assistance_action][selected_user_action] += 1
#update counter for user react time
# agent_assistance_per_react_time[selected_user_react_time][selected_agent_assistance_action] += 1
# attempt_counter_per_react_time[selected_user_react_time][attempt_counter-1] += 1
# game_state_counter_per_react_time[selected_user_react_time][game_state_counter] += 1
# agent_feedback_per_react_time[selected_user_react_time][selected_agent_feedback_action] += 1
#update counter for agent feedback
game_state_counter_per_agent_feedback[selected_agent_feedback_action][game_state_counter] += 1
attempt_counter_per_agent_feedback[selected_agent_feedback_action][attempt_counter-1] += 1
#update counter for agent assistance
game_state_counter_per_agent_assistance[selected_agent_assistance_action][game_state_counter] += 1
attempt_counter_per_agent_assistance[selected_agent_assistance_action][attempt_counter-1] += 1
# updates counters for simulation
# remap user_action index
......@@ -357,35 +307,17 @@ def simulation(bn_model_user_action, var_user_action_target_action, bn_model_use
#update user models
bn_model_user_action = bn_functions.update_cpds_tables(bn_model_user_action, user_action_dynamic_variables, alpha_learning)
bn_model_user_react_time = bn_functions.update_cpds_tables(bn_model_user_react_time, user_react_time_dynamic_variables)
#update agent models
print("user_given_game_attempt:", bn_model_user_action['model'].cpds[0].values)
print("user_given_robot:", bn_model_user_action['model'].cpds[5].values)
print("game_user:", bn_model_user_action['model'].cpds[3].values)
print("attempt_user:", bn_model_user_action['model'].cpds[2].values)
#reset counter
user_action_per_game_state_attempt_counter_agent_assistance = [[[[0 for i in range(User_Action.counter.value)]
for l in range(Game_State.counter.value)] for j in
range(Attempt.counter.value)] for k in range(Agent_Assistance.counter.value)]
user_action_per_agent_assistance = [[0 for i in range(User_Action.counter.value)] for j in
range(Agent_Assistance.counter.value)]
user_action_per_robot_feedback_robot_assistance = [[[0 for i in range(User_Action.counter.value)]
for j in range(Agent_Assistance.counter.value)]
for l in range(Agent_Feedback.counter.value)
]
attempt_counter_per_user_action = [[0 for i in range(Attempt.counter.value)] for j in
range(User_Action.counter.value)]
game_state_counter_per_user_action = [[0 for i in range(Game_State.counter.value)] for j in
range(User_Action.counter.value)]
attempt_counter_per_react_time = [[0 for i in range(Attempt.counter.value)] for j in
range(User_React_time.counter.value)]
game_state_counter_per_react_time = [[0 for i in range(Game_State.counter.value)] for j in
range(User_React_time.counter.value)]
agent_feedback_per_react_time = [[0 for i in range(Agent_Feedback.counter.value)] for j in
range(User_React_time.counter.value)]
agent_assistance_per_react_time = [[0 for i in range(Agent_Assistance.counter.value)] for j in
range(User_React_time.counter.value)]
#for plots
n_correct_per_episode[e] = correct_move_counter
n_wrong_per_episode[e] = wrong_move_counter
......@@ -408,57 +340,52 @@ def simulation(bn_model_user_action, var_user_action_target_action, bn_model_use
#############################################################################
agent_policy = generate_agent_assistance(preferred_assistance=2, agent_behaviour="help", n_game_state=Game_State.counter.value, n_attempt=Attempt.counter.value, alpha_action=0.5)
# SIMULATION PARAMS
epochs = 20
scaling_factor = 1
# initialise the agent
bn_model_caregiver_assistance = bnlearn.import_DAG('/home/pal/Documents/Framework/bn_generative_model/bn_agent_model/agent_assistive_model.bif')
bn_model_caregiver_feedback = None#bnlearn.import_DAG('/home/pal/Documents/Framework/bn_generative_model/bn_agent_model/agent_feedback_model.bif')
bn_model_user_action = bnlearn.import_DAG('/home/pal/Documents/Framework/bn_generative_model/bn_persona_model/persona_model_test.bif')
bn_model_user_react_time = bnlearn.import_DAG('/home/pal/Documents/Framework/bn_generative_model/bn_persona_model/user_react_time_model.bif')
# initialise memory, attention and reactivity variables
persona_memory = 0;
persona_attention = 0;
persona_reactivity = 1;
# define state space struct for the irl algorithm
episode_instance = Episode()
# DEFINITION OF THE MDP
# define state space struct for the irl algorithm
attempt = [i for i in range(1, Attempt.counter.value + 1)]
# +1 (3,_,_) absorbing state
game_state = [i for i in range(0, Game_State.counter.value + 1)]
user_action = [i for i in range(-1, User_Action.counter.value - 1)]
state_space = (game_state, attempt, user_action)
states_space_list = list(itertools.product(*state_space))
state_space_index = [episode_instance.state_from_point_to_index(states_space_list, s) for s in states_space_list]
agent_assistance_action = [i for i in range(Agent_Assistance.counter.value)]
agent_feedback_action = [i for i in range(Agent_Feedback.counter.value)]
action_space = (agent_feedback_action, agent_assistance_action)
action_space_list = list(itertools.product(*action_space))
action_space_index = [episode_instance.state_from_point_to_index(action_space_list, a) for a in action_space_list]
terminal_state = [(Game_State.counter.value, i, user_action[j]) for i in range(1, Attempt.counter.value + 1) for j in
range(len(user_action))]
initial_state = (1, 1, 0)
#1. RUN THE SIMULATION WITH THE PARAMS SET BY THE CAREGIVER
game_performance_per_episode, react_time_per_episode, agent_assistance_per_episode, agent_feedback_per_episode, episodes_list = \
simulation(bn_model_user_action=bn_model_user_action, var_user_action_target_action=['user_action'],
bn_model_user_react_time=bn_model_user_react_time,
var_user_react_time_target_action=['user_react_time'],
user_memory_name="memory", user_memory_value=persona_memory,
user_attention_name="attention", user_attention_value=persona_attention,
user_reactivity_name="reactivity", user_reactivity_value=persona_reactivity,
task_progress_t0_name="game_state_t0", task_progress_t1_name="game_state_t1",
game_attempt_t0_name="attempt_t0", game_attempt_t1_name="attempt_t1",
agent_assistance_name="agent_assistance", agent_policy=agent_policy,
state_space=states_space_list, action_space=action_space_list,
epochs=epochs, task_complexity=5, max_attempt_per_object=4, alpha_learning=0.1)
utils.plot2D_game_performance("/home/pal/Documents/Framework/bn_generative_model/results/user_performance.png", epochs, scaling_factor, game_performance_per_episode)
utils.plot2D_assistance("/home/pal/Documents/Framework/bn_generative_model/results/agent_assistance.png", epochs, scaling_factor, agent_assistance_per_episode)
# agent_policy = generate_agent_assistance(preferred_assistance=2, agent_behaviour="help", n_game_state=Game_State.counter.value, n_attempt=Attempt.counter.value, alpha_action=0.1)
# print(agent_policy)
#
# # SIMULATION PARAMS
# epochs = 20
# scaling_factor = 1
# # initialise the agent
# bn_model_user_action = bnlearn.import_DAG('/home/pal/Documents/Framework/bn_generative_model/bn_persona_model/persona_model_test.bif')
#
# # initialise memory, attention and reactivity variables
# persona_memory = 0;
# persona_attention = 0;
# persona_reactivity = 1;
#
# # define state space struct for the irl algorithm
# episode_instance = Episode()
# # DEFINITION OF THE MDP
# # define state space struct for the irl algorithm
# attempt = [i for i in range(1, Attempt.counter.value + 1)]
# # +1 (3,_,_) absorbing state
# game_state = [i for i in range(0, Game_State.counter.value + 1)]
# user_action = [i for i in range(-1, User_Action.counter.value - 1)]
# state_space = (game_state, attempt, user_action)
# states_space_list = list(itertools.product(*state_space))
# state_space_index = [episode_instance.state_from_point_to_index(states_space_list, s) for s in states_space_list]
# agent_assistance_action = [i for i in range(Agent_Assistance.counter.value)]
# agent_feedback_action = [i for i in range(Agent_Feedback.counter.value)]
# action_space = (agent_feedback_action, agent_assistance_action)
# action_space_list = list(itertools.product(*action_space))
# action_space_index = [episode_instance.state_from_point_to_index(action_space_list, a) for a in action_space_list]
# terminal_state = [(Game_State.counter.value, i, user_action[j]) for i in range(1, Attempt.counter.value + 1) for j in
# range(len(user_action))]
# initial_state = (1, 1, 0)
#
# #1. RUN THE SIMULATION WITH THE PARAMS SET BY THE CAREGIVER
#
#
# game_performance_per_episode, react_time_per_episode, agent_assistance_per_episode, agent_feedback_per_episode, episodes_list = \
# simulation(bn_model_user_action=bn_model_user_action, var_user_action_target_action=['user_action'],
# game_state_bn_name="game_state",
# attempt_bn_name="attempt",
# agent_assistance_bn_name="agent_assistance",
# agent_feedback_bn_name="agent_feedback",
# agent_policy=agent_policy,
# state_space=states_space_list, action_space=action_space_list,
# epochs=epochs, task_complexity=5, max_attempt_per_object=4, alpha_learning=0.1)
#
# utils.plot2D_game_performance("results/user_performance.png", epochs, scaling_factor, game_performance_per_episode)
# utils.plot2D_assistance("results/agent_assistance.png", epochs, scaling_factor, agent_assistance_per_episode)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment