diff --git a/main.py b/main.py index 764f0d13ae2d5d96eb3b355d2f8efcdd096424ea..f440304fa57080249c1cb062fba46c8a374642f2 100644 --- a/main.py +++ b/main.py @@ -282,6 +282,7 @@ def main(): learned_policy_filename = args.agent_patient_interaction_folder+"/"+str(user_id)+"/"+str(with_feedback)+"/"+str(session+1)+"/"+args.output_policy_filename learned_reward_filename = args.agent_patient_interaction_folder+"/"+str(user_id)+"/"+str(with_feedback)+"/"+str(session+1)+"/"+args.output_reward_filename learned_value_f_filename = args.agent_patient_interaction_folder+"/"+str(user_id)+"/"+str(with_feedback)+"/"+str(session+1)+"/"+args.output_value_filename + therapist_patient_interaction_folder = args.therapist_patient_interaction_folder agent_patient_interaction_folder = args.agent_patient_interaction_folder scaling_factor = 1 @@ -308,7 +309,7 @@ def main(): action_space_list = action_space terminal_state = [(Game_State.counter.value, i, user_action[j]) for i in range(1, Attempt.counter.value + 1) for j in range(len(user_action))] - initial_state = (1, 1, 0) + initial_state = (1, 1, 1) #output folders @@ -350,7 +351,6 @@ def main(): bn_model_agent_behaviour_from_data_and_therapist = None - if os.path.exists(output_folder_data_path): bn_model_user_action_from_data_and_therapist = Sim.build_model_from_data( csv_filename=output_folder_data_path + "/summary_bn_variables_from_data.csv", dag_filename=bn_user_model_filename, @@ -385,6 +385,14 @@ def main(): utils.plot2D_game_performance(plot_game_performance_path, epochs, scaling_factor, game_performance_per_episode) utils.plot2D_assistance(plot_agent_assistance_path, epochs, scaling_factor, agent_assistance_per_episode) + sim_patient_performance_filename = "sim_patient_performance.pkl" + sim_agent_assistance_filename = "sim_agent_assistance.pkl" + + with open(output_folder_data_path+"/"+sim_agent_assistance_filename, 'wb') as f: + pickle.dump(game_performance_per_episode, f, protocol=2) + with open(output_folder_data_path + "/" + sim_patient_performance_filename, 'wb') as f: + pickle.dump(agent_assistance_per_episode, f, protocol=2) + # add episodes from different policies # for e in range(len(episodes)): # episodes_from_different_policies.append(Episode(episodes[e]._t)) @@ -402,7 +410,7 @@ def main(): # R(s) and pi(s) generated from the first sim maxent_R = maxent(world=cognitive_game_world, terminal=terminals, trajectories=episodes) - maxent_V, maxent_P = vi.value_iteration(cognitive_game_world.p_transition, maxent_R, gamma=0.99, error=1e-2, + maxent_V, maxent_P = vi.value_iteration(cognitive_game_world.p_transition, maxent_R, gamma=0.9, error=1e-4, deterministic=False) print(maxent_P) with open(learned_policy_filename, 'wb') as f: @@ -425,18 +433,22 @@ def main(): # else: # maxent_P_real_sim[state_index][action_index] = 0.02 # maxent_P_real_sim[state_index] = list(map(lambda x:x/sum(maxent_P_real_sim[state_index]), maxent_P_real_sim[state_index])) - + plt.clf() sns.heatmap(np.reshape(maxent_R, (4, 12)), cmap="Spectral", annot=True, cbar=False) plt.savefig(output_folder_data_path + "/maxent_R.jpg") - plt.show() + plt.clf() sns.heatmap(np.reshape(maxent_V, (4, 12)), cmap="Spectral", annot=True, cbar=False) plt.savefig(output_folder_data_path + "/maxent_V.jpg") - plt.show() + plt.clf() maxent_P_det = list(map(lambda x: np.argmax(x), maxent_P)) sns.heatmap(np.reshape(maxent_P_det, (4, 12)), cmap="Spectral", annot=True, cbar=False) plt.savefig(output_folder_data_path + "/maxent_P.jpg") - plt.show() + plt.clf() + f = open(output_folder_data_path+"/"+sim_agent_assistance_filename,'rb') + mydic = pickle.load(f) + f.close() + print(mydic) if __name__ == '__main__':