diff --git a/main.py b/main.py index f81f9f0f21b7c8ce85c5e8ecea76f8c0d5d046a0..764f0d13ae2d5d96eb3b355d2f8efcdd096424ea 100644 --- a/main.py +++ b/main.py @@ -29,12 +29,12 @@ import argparse from episode import Episode from cognitive_game_env import CognitiveGame from environment import Environment -import maxent as M -import plot as P -import solver as S -import optimizer as O -import img_utils as I -import value_iteration as vi +import src.maxent as M +import src.plot as P +import src.solver as S +import src.optimizer as O +import src.img_utils as I +import src.value_iteration as vi import simulation as Sim import bn_functions as bn_functions @@ -237,8 +237,9 @@ def compute_agent_policy(training_set_filename, state_space, action_space, episo action_index = action_point agent_policy_counter[state_index][action_index] += 1 row_t_0 = row['user_action'] + min_val = np.finfo(float).eps for s in range(len(state_space)): - agent_policy_prob[s] = list(map(lambda x:x/(sum(agent_policy_counter[s])+0.001), agent_policy_counter[s])) + agent_policy_prob[s] = list(map(lambda x:x/(sum(agent_policy_counter[s])+min_val), agent_policy_counter[s])) return agent_policy_prob @@ -248,8 +249,8 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument('--bn_model_folder', '--bn_model_folder', type=str,help="folder in which all the user and the agent models are stored ", default="/home/pal/Documents/Framework/GenerativeMutualShapingRL/BN_Models") - parser.add_argument('--bn_agent_model_filename', '--bn_agent_model', type=str,help="file path of the agent bn model", - default="/home/pal/Documents/Framework/bn_generative_model/bn_agent_model/agent_test.bif") + # parser.add_argument('--bn_agent_model_filename', '--bn_agent_model', type=str,help="file path of the agent bn model", + # default="/home/pal/Documents/Framework/bn_generative_model/bn_agent_model/agent_test.bif") parser.add_argument('--epoch', '--epoch', type=int,help="number of epochs in the simulation", default=200) parser.add_argument('--run', '--run', type=int, help="number of runs in the simulation", default=50) parser.add_argument('--output_policy_filename', '--p', type=str,help="output policy from the simulation", @@ -259,9 +260,9 @@ def main(): parser.add_argument('--output_value_filename', '--v', type=str, help="output value function from the simulation", default="value_function.pkl") parser.add_argument('--therapist_patient_interaction_folder', '--tpi_path', type=str,help="therapist-patient interaction folder", - default="/home/pal/carf_ws/src/carf/caregiver_in_the_loop/log") + default="/home/pal/Documents/Framework/GenerativeMutualShapingRL/therapist-patient-interaction") parser.add_argument('--agent_patient_interaction_folder', '--api_path', type=str,help="agent-patient interaction folder", - default="/home/pal/carf_ws/src/carf/robot_in_the_loop/log") + default="/home/pal/carf_ws/src/carf/robot-patient-interaction") parser.add_argument('--user_id', '--id', type=int,help="user id", required=True) parser.add_argument('--with_feedback', '--f', type=eval, choices=[True, False], help="offering sociable", required=True) parser.add_argument('--session', '--s', type=int, help="session of the agent-human interaction", required=True) @@ -278,9 +279,9 @@ def main(): # initialise the agent bn_user_model_filename = args.bn_model_folder +"/"+str(user_id)+"/"+str(with_feedback)+"/user_model.bif" bn_agent_model_filename = args.bn_model_folder+"/"+str(user_id)+"/"+str(with_feedback)+"/agent_model.bif" - learned_policy_filename = args.output_policy_filename - learned_reward_filename = args.output_reward_filename - learned_value_f_filename = args.output_value_filename + learned_policy_filename = args.agent_patient_interaction_folder+"/"+str(user_id)+"/"+str(with_feedback)+"/"+str(session+1)+"/"+args.output_policy_filename + learned_reward_filename = args.agent_patient_interaction_folder+"/"+str(user_id)+"/"+str(with_feedback)+"/"+str(session+1)+"/"+args.output_reward_filename + learned_value_f_filename = args.agent_patient_interaction_folder+"/"+str(user_id)+"/"+str(with_feedback)+"/"+str(session+1)+"/"+args.output_value_filename therapist_patient_interaction_folder = args.therapist_patient_interaction_folder agent_patient_interaction_folder = args.agent_patient_interaction_folder scaling_factor = 1 @@ -312,6 +313,7 @@ def main(): #output folders output_folder_data_path = os.getcwd() + "/results/" + str(user_id) +"/"+str(with_feedback)+"/"+str(session) + if not os.path.exists(os.getcwd() + "/results"+"/"+str(user_id)): os.mkdir(os.getcwd() + "/results"+"/"+str(user_id)) if not os.path.exists(os.getcwd() + "/results"+"/"+str(user_id) +"/"+str(with_feedback)): @@ -319,13 +321,19 @@ def main(): if not os.path.exists(output_folder_data_path): os.mkdir(output_folder_data_path) + if not os.path.exists(args.agent_patient_interaction_folder+"/"+str(user_id)): + os.mkdir(args.agent_patient_interaction_folder+"/"+str(user_id)) + if not os.path.exists(args.agent_patient_interaction_folder+"/"+str(user_id)+"/"+str(with_feedback)): + os.mkdir(args.agent_patient_interaction_folder+"/"+str(user_id)+"/"+str(with_feedback)) + if not os.path.exists(args.agent_patient_interaction_folder + "/" + str(user_id) + "/" + str(with_feedback) + "/" + str(session + 1)): + os.mkdir(args.agent_patient_interaction_folder + "/" + str(user_id) + "/" + str(with_feedback) + "/" + str(session + 1)) -#1. CREATE INITIAL USER COGNITIVE MODEL FROM DATA + #1. CREATE INITIAL USER COGNITIVE MODEL FROM DATA df_from_data, episode_length = merge_user_log(tpi_folder_pathname=therapist_patient_interaction_folder, file_output=output_folder_data_path+"/summary_bn_variables_from_data.csv", user_id=user_id, with_feedback=with_feedback, - rpi_folder_pathname=None,#agent_patient_interaction_folder, + rpi_folder_pathname=agent_patient_interaction_folder, column_to_remove=None) #2. CREATE POLICY FROM DATA @@ -397,11 +405,11 @@ def main(): maxent_V, maxent_P = vi.value_iteration(cognitive_game_world.p_transition, maxent_R, gamma=0.99, error=1e-2, deterministic=False) print(maxent_P) - with open(output_folder_data_path+"/"+learned_policy_filename, 'wb') as f: + with open(learned_policy_filename, 'wb') as f: pickle.dump(maxent_P, f, protocol=2) - with open(output_folder_data_path+"/"+learned_reward_filename, 'wb') as f: + with open(learned_reward_filename, 'wb') as f: pickle.dump(maxent_R, f, protocol=2) - with open(output_folder_data_path+"/"+learned_value_f_filename, 'wb') as f: + with open(learned_value_f_filename, 'wb') as f: pickle.dump(maxent_V, f, protocol=2) # if n>0: