diff --git a/main.py b/main.py index cd9fb4b53a8db71c9aa9c7263dd365da93ebd628..a8063c5c0dbf5a9301c320cf8dc71bb2dda48cd0 100644 --- a/main.py +++ b/main.py @@ -2,6 +2,7 @@ import bnlearn import numpy as np import enum import random +import matplotlib.pyplot as plt #define constants class User_Action(enum.Enum): @@ -55,12 +56,46 @@ print("user_action -> game_state ", model['model'].cpds[2].values) print("robot_feedback -> robot_assistance ", model['model'].cpds[5].values) print("user_action -> reactivity, memory ", model['model'].cpds[6].values) + +def plot2D(save_path, n_episodes, *y): + # The position of the bars on the x-axis + barWidth = 0.35 + r = np.arange(n_episodes) # the x locations for the groups + + # Get values from the group and categories + x = [i for i in range(n_episodes)] + correct = y[0][0] + wrong = y[0][1] + timeout = y[0][2] + # add colors + #colors = ['#FF9999', '#00BFFF', '#C1FFC1', '#CAE1FF', '#FFDEAD'] + + # plot bars + plt.figure(figsize=(10, 7)) + plt.bar(r, correct, edgecolor='white', width=barWidth, label="correct") + plt.bar(r, wrong, bottom=np.array(correct), edgecolor='white', width=barWidth, label='wrong') + plt.bar(r, timeout, bottom=np.array(correct) + np.array(wrong), edgecolor='white', + width=barWidth, label='timeout') + plt.legend() + # Custom X axis + plt.xticks(r, x, fontweight='bold') + plt.ylabel("performance") + plt.savefig(save_path) + plt.show() + def compute_prob(cpds_table): + ''' + Given the counters generate the probability distributions + Args: + cpds_table: with counters + Return: + the probs for the cpds table + ''' for val in range(len(cpds_table)): cpds_table[val] = list(map(lambda x: x / (sum(cpds_table[val])+0.00001), cpds_table[val])) return cpds_table -def avg_prob(ref_cpds_table, current_cpds_table): +def average_prob(ref_cpds_table, current_cpds_table): ''' Args: ref_cpds_table: table from bnlearn @@ -74,129 +109,157 @@ def avg_prob(ref_cpds_table, current_cpds_table): res_cpds_table[elem1][elem2] = (ref_cpds_table[elem1][elem2]+current_cpds_table[elem1][elem2])/2 return res_cpds_table +def generate_user_action(actions_prob): + ''' + Select one of the actions according to the actions_prob + Args: + actions_prob: the result of the query to the BN + Return: + the id of the selected action + ''' + action_id = 0 + correct_action = actions_prob[0] + wrong_action = actions_prob[1] + timeout = actions_prob[2] + rnd_val = random.random() + if rnd_val<=correct_action: + action_id = 0 + elif rnd_val>correct_action \ + and rnd_val<correct_action+wrong_action: + action_id = 1 + else: + action_id = 2 + return action_id -def simulation(robot_assistance_vect, robot_feedback_vect): + +def simulation(robot_assistance_vect, robot_feedback_vect, memory, attention, reactivity, epochs=50, non_stochastic=False): #metrics we need in order to compute the afterwords the belief ''' CPD 0: for each attempt 1 to 4 store the number of correct, wrong and timeout ''' - attempt_counter_per_action = [[0 for j in range(User_Action.counter.value)] for i in range(Attempt.counter.value)] + attempt_counter_per_action = [[0 for i in range(Attempt.counter.value)] for j in range(User_Action.counter.value)] ''' CPD 2: for each game_state 0 to 2 store the number of correct, wrong and timeout ''' - game_state_counter_per_action = [[0 for j in range(User_Action.counter.value)] for i in range(Game_State.counter.value)] + game_state_counter_per_action = [[0 for i in range(Game_State.counter.value)] for j in range(User_Action.counter.value)] ''' CPD 5: for each robot feedback store the number of correct, wrong and timeout ''' - robot_feedback_per_action = [[0 for j in range(User_Action.counter.value)] for i in range(Robot_Feedback.counter.value)] + robot_feedback_per_action = [[0 for i in range(Robot_Feedback.counter.value)] for j in range(User_Action.counter.value)] ''' CPD 6: for each robot assistance store the number of pos and neg feedback ''' - robot_assistance_per_feedback = [[0 for j in range(Robot_Feedback.counter.value)] for i in range(Robot_Assistance.counter.value)] - - task_complexity = 5 - task_evolution = 0 - attempt_counter = 0 - game_state_counter = 0 - - iter_counter = 0 - correct_move_counter = 0 - wrong_move_counter = 0 - timeout_counter = 0 - - '''Simulation framework''' - while(task_evolution<=task_complexity): - if task_evolution>=0 and task_evolution<=2: - game_state_counter = 0 - elif task_evolution>=3 and task_evolution<=4: - game_state_counter = 1 - else: - game_state_counter = 2 - #select robot assistance - robot_assistance_action = random.randint(min(robot_assistance_vect), max(robot_assistance_vect)) - #select robot feedback - robot_feedback_action = random.randint(min(robot_feedback_vect), max(robot_feedback_vect)) - - print("robot_assistance {}, attempt {}, game {}, robot_feedback {}".format(robot_assistance_action, attempt_counter, game_state_counter, robot_feedback_action)) - query = bnlearn.inference.fit(model, variables=['user_action'], evidence={'robot_assistance': robot_assistance_action, - 'attempt': attempt_counter, - 'game_state': game_state_counter, - 'robot_feedback': robot_feedback_action, - 'memory': 0, - 'attention': 0, - 'reactivity': 0 - }) - user_move_action = np.argmax(query.values, axis=0) - - robot_assistance_per_feedback[robot_assistance_action][robot_feedback_action] += 1 - attempt_counter_per_action[attempt_counter][user_move_action] += 1 - game_state_counter_per_action[game_state_counter][user_move_action] += 1 - robot_feedback_per_action[robot_feedback_action][user_move_action] += 1 - - iter_counter += 1 - if user_move_action == 0: - attempt_counter += 0 - task_evolution += 1 - correct_move_counter += 1 - elif user_move_action == 1 and attempt_counter<3: - attempt_counter += 1 - wrong_move_counter += 1 - elif user_move_action == 2 and attempt_counter<3: - attempt_counter += 1 - wrong_move_counter += 1 - else: - attempt_counter += 0 - task_evolution += 1 - timeout_counter += 1 - - print("correct {}, wrong {}, timeout {}".format(query.values[0], - query.values[1], - query.values[2])) - - - print("robot_assistance_per_feedback {}".format(robot_assistance_per_feedback)) - print("attempt_counter_per_action {}".format(attempt_counter_per_action)) - print("game_state_counter_per_action {}".format(game_state_counter_per_action)) - print("robot_feedback_per_action {}".format(robot_feedback_per_action)) - print("iter {}, correct {}, wrong {}, timeout {}".format(iter_counter, correct_move_counter, wrong_move_counter, timeout_counter)) - - return attempt_counter_per_action, game_state_counter_per_action, robot_assistance_per_feedback, robot_feedback_per_action - - -robot_assistance_vect = [0, 1, 2, 3, 4] -robot_feedback_vect = [0, 1] -attempt_counter_per_action, game_state_counter_per_action, \ -robot_assistance_per_feedback, robot_feedback_per_action = simulation(robot_assistance_vect, robot_feedback_vect) - -print("************BEFORE*************") -print(model['model'].cpds[0].values) -print(model['model'].cpds[2].values) -print(model['model'].cpds[5].values) -print(model['model'].cpds[6].values) - -prob_over_attempt_per_action = compute_prob(attempt_counter_per_action) -prob_over_game_per_action = compute_prob(game_state_counter_per_action) -prob_over_feedback_per_action = compute_prob(robot_feedback_per_action) -prob_over_assistance_per_feedback = compute_prob(robot_assistance_per_feedback) - -print("************DURING*************") -print(prob_over_attempt_per_action) -print(prob_over_game_per_action) -print(prob_over_feedback_per_action) -print(prob_over_assistance_per_feedback) - -res_prob_over_attempt_per_action = avg_prob(model['model'].cpds[0].values, - prob_over_attempt_per_action) -res_prob_over_game_per_action = avg_prob(model['model'].cpds[2].values, - prob_over_game_per_action) -res_prob_over_feedback_per_action = avg_prob(model['model'].cpds[6].values, - prob_over_feedback_per_action) -res_prob_over_assistance_per_feedback = avg_prob(model['model'].cpds[5].values, - prob_over_assistance_per_feedback) - - -print("************AFTER*************") -print(res_prob_over_attempt_per_action) -print(res_prob_over_game_per_action) -print(res_prob_over_feedback_per_action) -print(res_prob_over_assistance_per_feedback) + robot_assistance_per_feedback = [[0 for i in range(Robot_Assistance.counter.value)] for j in range(Robot_Feedback.counter.value)] + + + #output variables: + n_correct_per_episode = [0]*epochs + n_wrong_per_episode = [0]*epochs + n_timeout_per_episode = [0]*epochs + + + for e in range(epochs): + '''Simulation framework''' + task_complexity = 5 + task_evolution = 0 + + attempt_counter = 0 + game_state_counter = 0 + iter_counter = 0 + correct_move_counter = 0 + wrong_move_counter = 0 + timeout_counter = 0 + while(task_evolution<=task_complexity): + if task_evolution>=0 and task_evolution<=2: + game_state_counter = 0 + elif task_evolution>=3 and task_evolution<=4: + game_state_counter = 1 + else: + game_state_counter = 2 + #select robot assistance + robot_assistance_action = random.randint(min(robot_assistance_vect), max(robot_assistance_vect)) + #select robot feedback + robot_feedback_action = random.randint(min(robot_feedback_vect), max(robot_feedback_vect)) + + print("robot_assistance {}, attempt {}, game {}, robot_feedback {}".format(robot_assistance_action, attempt_counter, game_state_counter, robot_feedback_action)) + query = bnlearn.inference.fit(model, variables=['user_action'], evidence={'robot_assistance': robot_assistance_action, + 'attempt': attempt_counter, + 'game_state': game_state_counter, + 'robot_feedback': robot_feedback_action, + 'memory': memory, + 'attention': attention, + 'reactivity': reactivity + }) + #generate a random number and trigger one of the three possible action + user_action = generate_user_action(query.values)#np.argmax(query.values, axis=0) + + robot_assistance_per_feedback[robot_feedback_action][robot_assistance_action] += 1 + attempt_counter_per_action[user_action][attempt_counter] += 1 + game_state_counter_per_action[user_action][game_state_counter] += 1 + robot_feedback_per_action[user_action][robot_feedback_action] += 1 + + iter_counter += 1 + if user_action == 0: + attempt_counter = 0 + task_evolution += 1 + correct_move_counter += 1 + elif user_action == 1 and attempt_counter<3: + attempt_counter += 1 + wrong_move_counter += 1 + elif user_action == 2 and attempt_counter<3: + attempt_counter += 1 + wrong_move_counter += 1 + else: + attempt_counter = 0 + task_evolution += 1 + timeout_counter += 1 + + print("task_evolution {}, attempt_counter {}, timeout_counter {}".format(task_evolution, iter_counter, timeout_counter)) + + print("robot_assistance_per_feedback {}".format(robot_assistance_per_feedback)) + print("attempt_counter_per_action {}".format(attempt_counter_per_action)) + print("game_state_counter_per_action {}".format(game_state_counter_per_action)) + print("robot_feedback_per_action {}".format(robot_feedback_per_action)) + print("iter {}, correct {}, wrong {}, timeout {}".format(iter_counter, correct_move_counter, wrong_move_counter, timeout_counter)) + + print("correct_move {}, wrong_move {}, timeout {}".format(correct_move_counter, wrong_move_counter, timeout_counter)) + #transform counters into probabilities + prob_over_attempt_per_action = compute_prob(attempt_counter_per_action) + prob_over_game_per_action = compute_prob(game_state_counter_per_action) + prob_over_feedback_per_action = compute_prob(robot_feedback_per_action) + prob_over_assistance_per_feedback = compute_prob(robot_assistance_per_feedback) + #average the probabilities obtained with the cpdf tables + updated_prob_over_attempt_per_action = average_prob(np.transpose(model['model'].cpds[0].values), + prob_over_attempt_per_action) + updated_prob_over_game_per_action = average_prob(np.transpose(model['model'].cpds[2].values), + prob_over_game_per_action) + updated_prob_over_feedback_per_action = average_prob(np.transpose(model['model'].cpds[6].values), + prob_over_feedback_per_action) + updated_prob_over_assistance_per_feedback = average_prob(np.transpose(model['model'].cpds[5].values), + prob_over_assistance_per_feedback) + + model['model'].cpds[0].values = np.transpose(updated_prob_over_attempt_per_action) + model['model'].cpds[2].values = np.transpose(updated_prob_over_game_per_action) + model['model'].cpds[6].values = np.transpose(updated_prob_over_feedback_per_action) + model['model'].cpds[5].values = np.transpose(updated_prob_over_assistance_per_feedback) + + n_correct_per_episode[e] = correct_move_counter + n_wrong_per_episode[e] = wrong_move_counter + n_timeout_per_episode[e] = timeout_counter + + return n_correct_per_episode, n_wrong_per_episode, n_timeout_per_episode + +robot_assistance = [i for i in range(Robot_Assistance.counter.value)] +robot_feedback = [i for i in range(Robot_Feedback.counter.value)] +epochs = 10 +memory = 0; attention = 0; reactivity = 1; +results = simulation(robot_assistance, robot_feedback, memory, attention, reactivity, 10) +plot_path = "epoch_"+str(epochs)+"_memory_"+str(memory)+"_attention_"+str(attention)+"_reactivity_"+str(reactivity)+".jpg" +plot2D(plot_path, epochs, results) + +#TODO +''' +- define a function that takes the state as input and return the user action and its reaction time +- evalute if the persona is wrong how long does it take for the simulator to detect that +- check percentages +''' \ No newline at end of file