diff --git a/.gitignore b/.gitignore
index af56f610571e02b53c6a162316a0555ecf63a845..601677888b8104461ab7944b11f85ec5cdd02848 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,6 @@
 .DS_Store
 .idea/
+__pycache__/
+old_models/
+results/
+
diff --git a/bn_agent_model/agent_assistive_model.bif b/bn_agent_model/agent_assistive_model.bif
deleted file mode 100644
index e3b6291715edb2c0e4c65f13c9e88a310e37d9a7..0000000000000000000000000000000000000000
--- a/bn_agent_model/agent_assistive_model.bif
+++ /dev/null
@@ -1,49 +0,0 @@
-network agent_assistive_model {
-}
-
-%VARIABLES DEFINITION
-
-variable agent_assistance {
-  type discrete [ 6 ] { lev_0, lev_1, lev_2, lev_3, lev_4, lev_5};
-}
-variable attempt {
-  type discrete [ 4 ] { att_1, att_2, att_3, att_4 };
-}
-variable game_state {
-  type discrete [ 3 ] { beg, mid, end };
-}
-
-
-%INDIVIDUAL PROBABILITIES DEFINITION
-probability ( agent_assistance ) {
-  table 0.17, 0.17, 0.17, 0.17, 0.16, 0.16;
-}
-probability ( game_state ) {
-  table 0.34, 0.33, 0.33;
-}
-probability ( attempt ) {
-  table 0.25, 0.25, 0.25, 0.25;
-}
-
-
-#Conditional Probabilities
-
-
-probability (agent_assistance | game_state, attempt) {
-
-(beg, att_1) 0.5, 0.3, 0.2, 0.0, 0.0, 0.0;
-(beg, att_2) 0.2, 0.3, 0.2, 0.1, 0.1, 0.1;
-(beg, att_3) 0.3, 0.2, 0.2, 0.1, 0.1, 0.1;
-(beg, att_4) 0.3, 0.3, 0.1, 0.1, 0.1, 0.1;
-
-(mid, att_1) 0.5, 0.3, 0.2, 0.0, 0.0, 0.0;
-(mid, att_2) 0.2, 0.3, 0.2, 0.1, 0.1, 0.1;
-(mid, att_3) 0.3, 0.2, 0.2, 0.1, 0.1, 0.1;
-(mid, att_4) 0.3, 0.3, 0.1, 0.1, 0.1, 0.1;
-
-(end, att_1) 0.5, 0.3, 0.2, 0.0, 0.0, 0.0;
-(end, att_2) 0.2, 0.3, 0.2, 0.1, 0.1, 0.1;
-(end, att_3) 0.3, 0.2, 0.2, 0.1, 0.1, 0.1;
-(end, att_4) 0.3, 0.3, 0.1, 0.1, 0.1, 0.1;
-
-}
diff --git a/bn_agent_model/agent_feedback_model.bif b/bn_agent_model/agent_feedback_model.bif
deleted file mode 100644
index 73213773831f55f5f0f8868e40ac8d137806d0e7..0000000000000000000000000000000000000000
--- a/bn_agent_model/agent_feedback_model.bif
+++ /dev/null
@@ -1,77 +0,0 @@
-network agent_feedback_model {
-}
-
-%VARIABLES DEFINITION
-variable reactivity {
-  type discrete [3] {slow, medium, fast};
-}
-variable memory {
-  type discrete[3] {low, medium, high};
-}
-variable agent_feedback {
-  type discrete [ 2 ] { no, yes };
-}
-variable attempt {
-  type discrete [ 4 ] { att_1, att_2, att_3, att_4 };
-}
-variable game_state {
-  type discrete [ 3 ] { beg, mid, end };
-}
-
-variable user_capability {
-  type discrete [ 3 ] { very_mild, mild, severe };
-}
-
-%INDIVIDUAL PROBABILITIES DEFINITION
-probability ( agent_feedback ) {
-  table 0.5, 0.5;
-}
-probability ( game_state ) {
-  table 0.34, 0.33, 0.33;
-}
-probability ( attempt ) {
-  table 0.25, 0.25, 0.25, 0.25;
-}
-probability ( user_capability ) {
-  table 0.33, 0.33, 0.34;
-}
-#CPDS 4 #SPECIFICALLY FOR THE GIVEN PATIENT
-probability ( reactivity ) {
-  table 0.34, 0.33, 0.33;
-}
-#CPDS 3 #SPECIFICALLY FOR THE GIVEN PATIENT
-probability ( memory ) {
-  table 0.33, 0.33, 0.34;
-}
-
-#Conditional Probabilities
-
-#CPDS X (very_mild, mild, severe)
-probability (user_capability | memory, reactivity) {
-(low, slow)  0.1, 0.2, 0.7;
-(medium, slow)  0.2, 0.6, 0.2;
-(high, slow)  0.7, 0.2, 0.1;
-
-(low, medium)  0.1, 0.3, 0.6;
-(medium, medium)  0.3, 0.6, 0.1;
-(high, medium)  0.1, 0.4, 0.5;
-
-(low, fast)  0.3, 0.2, 0.5;
-(medium, fast)  0.7, 0.2, 0.1;
-(high, fast)  0.8, 0.1, 0.1;
-}
-
-probability (agent_feedback | user_capability) {
-(very_mild) 0.2, 0.8;
-(mild)  0.5, 0.5;
-(severe) 0.8, 0.2;
-}
-
-probability (game_state | agent_feedback) {
-(no) 0.2, 0.4, 0.4;
-(yes) 0.4, 0.4, 0.2;
-}
-probability (attempt | agent_feedback)  {
-(no) 0.1,0.15, 0.25, 0.5;
-(yes) 0.4, 0.3, 0.2, 0.1;
-}
diff --git a/bn_agent_model/utilities.py b/bn_agent_model/utilities.py
deleted file mode 100644
index 56fcb0520c10548941d0c8b3867d639f2d6f152f..0000000000000000000000000000000000000000
--- a/bn_agent_model/utilities.py
+++ /dev/null
@@ -1,78 +0,0 @@
-import random
-import bn_functions
-
-
-def get_dynamic_variables(evidence_variables_name, evidence_variables_value):
-    '''
-    This func returns a dict of the form name:value and it defines the "evidences"
-     that will be used to query the BN
-    Args:
-        :evidence_variables_name: the name of the variable
-        :evidence_variables_value: the value of the given variable
-    Return:
-         a dict of the form name:value
-    '''
-    if len(evidence_variables_name)!=len(evidence_variables_value):
-        assert "The variables name numbers is different from the variables value"
-    else:
-        dynamic_variables = {evidence_variables_name[i]:evidence_variables_value[i] for i in range(len(evidence_variables_name))}
-        return dynamic_variables
-
-def infer_prob(user_bn_model, variable_to_infer, evidence_vars_name, evidence_vars_value):
-    '''
-    Given the model, the variable to infer, and the evidences returns the distribution prob for that variable
-    Args:
-        user_bn_model:
-        variable_to_infer:
-        evidence_vars_name:
-        evidence_vars_value:
-    Returns:
-        the probability distribution for varibale_to_infer
-    '''
-    evidence = get_dynamic_variables(evidence_vars_name, evidence_vars_value)
-    dist_prob = bn_functions.get_inference_from_state(user_bn_model,
-                                                              variables=variable_to_infer,
-                                                              evidence=evidence)
-    return dist_prob
-
-def get_stochastic_action(actions_distr_prob):
-    '''
-    Select one of the actions according to the actions_prob
-    Args:
-        actions_prob: the probability of the Persona based on the BN to make a correct move, wrong move, timeout
-    Return:
-        the id of the selected action
-    N.B:
-    '''
-    def compute_distance(values, target):
-        '''
-        Return the index of the most closest value in values to target
-        Args:
-            target: the target value
-            values: a list of values from 0 to 1
-        Return:
-             return the index of the value closer to target
-        '''
-        min_dist = 1
-        index = 0
-        for i in range(len(values)):
-            if abs(target-values[i])<min_dist:
-                min_dist = abs(target-values[i])
-                index = i
-        return index
-
-    actions_distr_prob_scaled = [0]*len(actions_distr_prob)
-    accum = 0
-    for i in range(len(actions_distr_prob)):
-        accum += actions_distr_prob[i]
-        actions_distr_prob_scaled[i] = accum
-
-    rnd_val = random.uniform(0, 1)
-    action_id = compute_distance(actions_distr_prob_scaled, rnd_val)
-
-    return action_id
-
-
-actions_prob_distr =  [0.32, 0.105, 0.035, 0.035, 0.005, 0.36,  0.065, 0.035, 0.035, 0.005]
-action_index = get_stochastic_action(actions_prob_distr)
-print(action_index)
\ No newline at end of file
diff --git a/bn_functions.py b/bn_functions.py
index a8502a0c8b41845fa424db5ca542c28660e88cea..c5288617ad658efdfb9d1396dfbe760fc3a8101b 100644
--- a/bn_functions.py
+++ b/bn_functions.py
@@ -178,7 +178,7 @@ def get_stochastic_action(actions_distr_prob):
         accum += actions_distr_prob[i]
         actions_distr_prob_scaled[i] = accum
 
-    rnd_val = random.uniform(0, 1)
+    rnd_val = random.random()
     action_id = compute_distance(actions_distr_prob_scaled, rnd_val)
 
     return action_id
diff --git a/bn_models/agent_model_id_1_True.bif b/bn_models/agent_model_id_1_True.bif
new file mode 100644
index 0000000000000000000000000000000000000000..ebc8cee75c6472cfe11647188fe3c0aea77d50f2
--- /dev/null
+++ b/bn_models/agent_model_id_1_True.bif
@@ -0,0 +1,40 @@
+network agent_assistive_model {
+}
+
+%VARIABLES DEFINITION
+
+variable agent_assistance {
+  type discrete [ 6 ] { lev_0, lev_1, lev_2, lev_3, lev_4, lev_5};
+}
+variable attempt {
+  type discrete [ 4 ] { att_1, att_2, att_3, att_4 };
+}
+variable game_state {
+  type discrete [ 3 ] { beg, mid, end };
+}
+
+
+%INDIVIDUAL PROBABILITIES DEFINITION
+probability ( agent_assistance ) {
+  table 0.17, 0.17, 0.17, 0.17, 0.16, 0.16;
+}
+probability ( game_state ) {
+  table 0.34, 0.33, 0.33;
+}
+probability ( attempt ) {
+  table 0.25, 0.25, 0.25, 0.25;
+}
+probability (agent_assistance | game_state, attempt) { 
+(beg, att_1)	0.2857142857142857, 0.35714285714285715, 0.14285714285714285, 0.07142857142857142, 0.07142857142857142, 0.07142857142857142; 
+(beg, att_2)	0.07142857142857142, 0.21428571428571427, 0.35714285714285715, 0.21428571428571427, 0.07142857142857142, 0.07142857142857142; 
+(beg, att_3)	0.06666666666666667, 0.13333333333333333, 0.13333333333333333, 0.3333333333333333, 0.26666666666666666, 0.06666666666666667; 
+(beg, att_4)	0.058823529411764705, 0.058823529411764705, 0.11764705882352941, 0.29411764705882354, 0.23529411764705882, 0.23529411764705882; 
+(mid, att_1)	0.2857142857142857, 0.35714285714285715, 0.14285714285714285, 0.07142857142857142, 0.07142857142857142, 0.07142857142857142; 
+(mid, att_2)	0.07142857142857142, 0.21428571428571427, 0.35714285714285715, 0.21428571428571427, 0.07142857142857142, 0.07142857142857142; 
+(mid, att_3)	0.06666666666666667, 0.13333333333333333, 0.13333333333333333, 0.3333333333333333, 0.26666666666666666, 0.06666666666666667; 
+(mid, att_4)	0.058823529411764705, 0.058823529411764705, 0.11764705882352941, 0.29411764705882354, 0.23529411764705882, 0.23529411764705882; 
+(end, att_1)	0.2857142857142857, 0.35714285714285715, 0.14285714285714285, 0.07142857142857142, 0.07142857142857142, 0.07142857142857142; 
+(end, att_2)	0.07142857142857142, 0.21428571428571427, 0.35714285714285715, 0.21428571428571427, 0.07142857142857142, 0.07142857142857142; 
+(end, att_3)	0.06666666666666667, 0.13333333333333333, 0.13333333333333333, 0.3333333333333333, 0.26666666666666666, 0.06666666666666667; 
+(end, att_4)	0.058823529411764705, 0.058823529411764705, 0.11764705882352941, 0.29411764705882354, 0.23529411764705882, 0.23529411764705882; 
+}
\ No newline at end of file
diff --git a/bn_models/agent_model_template.bif b/bn_models/agent_model_template.bif
new file mode 100644
index 0000000000000000000000000000000000000000..1ca11f0133460feef8789a72c56f5764d688d8fa
--- /dev/null
+++ b/bn_models/agent_model_template.bif
@@ -0,0 +1,26 @@
+network agent_assistive_model {
+}
+
+%VARIABLES DEFINITION
+
+variable agent_assistance {
+  type discrete [ 6 ] { lev_0, lev_1, lev_2, lev_3, lev_4, lev_5};
+}
+variable attempt {
+  type discrete [ 4 ] { att_1, att_2, att_3, att_4 };
+}
+variable game_state {
+  type discrete [ 3 ] { beg, mid, end };
+}
+
+
+%INDIVIDUAL PROBABILITIES DEFINITION
+probability ( agent_assistance ) {
+  table 0.17, 0.17, 0.17, 0.17, 0.16, 0.16;
+}
+probability ( game_state ) {
+  table 0.34, 0.33, 0.33;
+}
+probability ( attempt ) {
+  table 0.25, 0.25, 0.25, 0.25;
+}
diff --git a/bn_persona_model/persona_model_test.bif b/bn_models/persona_model_template.bif
similarity index 58%
rename from bn_persona_model/persona_model_test.bif
rename to bn_models/persona_model_template.bif
index 7db034eb641388d111bfe38eae5ad90bdb4d2a6f..d8227eaa35963ecfe91423854c19ea8ac6fdcafb 100644
--- a/bn_persona_model/persona_model_test.bif
+++ b/bn_models/persona_model_template.bif
@@ -31,21 +31,3 @@ probability ( attempt ) {
 probability ( user_action ) {
   table 0.33, 0.33, 0.34;
 }
-probability (game_state | user_action)  {
-   (correct) 0.30, 0.30, 0.4;
-   (wrong) 0.35, 0.35, 0.3;
-   (timeout) 0.33, 0.33, 0.34;
-}
-probability (attempt | user_action)  {
-   (correct) 0.25, 0.25, 0.25, 0.25;
-   (wrong) 0.4, 0.3, 0.2, 0.1;
-   (timeout) 0.4, 0.3, 0.2, 0.1;
-}
-probability (user_action | agent_assistance) {
-(lev_0) 0.05    0.85	0.1;
-(lev_1)	0.1     0.8		0.1;
-(lev_2)	0.2		0.7		0.1;
-(lev_3)	0.33	0.57	0.1;
-(lev_4)	0.9		0.1		0.0;
-(lev_5)	1.0		0.0		0.0;
-}
diff --git a/bn_models/user_model_id_1_True.bif b/bn_models/user_model_id_1_True.bif
new file mode 100644
index 0000000000000000000000000000000000000000..3f1ae72adce9568df8bf01f4df5d1f28b9b1571d
--- /dev/null
+++ b/bn_models/user_model_id_1_True.bif
@@ -0,0 +1,51 @@
+network persona_model {
+}
+
+%VARIABLES DEFINITION
+
+variable agent_assistance {
+  type discrete [ 6 ] { lev_0, lev_1, lev_2, lev_3, lev_4, lev_5 };
+}
+variable attempt {
+  type discrete [ 4 ] { att_1, att_2, att_3, att_4 };
+}
+variable game_state {
+  type discrete [ 3 ] { beg, mid, end };
+}
+
+variable user_action {
+  type discrete [ 3 ] { correct, wrong, timeout };
+}
+
+%INDIVIDUAL PROBABILITIES DEFINITION
+
+probability ( agent_assistance ) {
+  table 0.17, 0.16, 0.16, 0.17, 0.17, 0.17;
+}
+probability ( game_state) {
+  table 0.34, 0.33, 0.33;
+}
+probability ( attempt ) {
+  table 0.25, 0.25, 0.25, 0.25;
+}
+probability ( user_action ) {
+  table 0.33, 0.33, 0.34;
+}
+probability (game_state | user_action)  { 
+(correct)	0.2222222222222222,0.3333333333333333,0.4444444444444444; 
+(wrong)	0.5,0.3333333333333333,0.16666666666666666; 
+(timeout)	0.5,0.3333333333333333,0.16666666666666666; 
+}
+probability (attempt | user_action)  { 
+(correct)	0.15384615384615385,0.23076923076923078,0.3076923076923077,0.3076923076923077; 
+(wrong)	0.42857142857142855,0.2857142857142857,0.14285714285714285,0.14285714285714285; 
+(timeout)	0.42857142857142855,0.2857142857142857,0.14285714285714285,0.14285714285714285; 
+}
+probability (user_action | agent_assistance) { 
+(lev_0)	0.4,0.3,0.3; 
+(lev_1)	0.6,0.2,0.2; 
+(lev_2)	0.6,0.2,0.2; 
+(lev_3)	0.8,0.1,0.1; 
+(lev_4)	1.0,0.0,0.0; 
+(lev_5)	1.0,0.0,0.0; 
+}
\ No newline at end of file
diff --git a/bn_other_user_model/user_model.bif b/bn_other_user_model/user_model.bif
deleted file mode 100644
index 79daa7f3893c814a65ca7b186ade276a025ba4c1..0000000000000000000000000000000000000000
--- a/bn_other_user_model/user_model.bif
+++ /dev/null
@@ -1,103 +0,0 @@
-network persona_model_4 {
-}
-
-%VARIABLES DEFINITION
-variable reactivity {
-  type discrete [3] {slow, medium, fast};
-}
-variable memory {
-  type discrete[3] {low, medium, high};
-}
-variable attention {
-  type discrete[3] {low, medium, high};
-}
-variable robot_assistance {
-  type discrete [ 5 ] { lev_0, lev_1, lev_2, lev_3, lev_4 };
-}
-variable attempt {
-  type discrete [ 4 ] { att_1, att_2, att_3, att_4 };
-}
-variable game_state {
-  type discrete [ 3 ] { beg, mid, end };
-}
-variable robot_feedback {
-  type discrete [ 2 ] { yes, no };
-}
-variable user_action {
-  type discrete [ 3 ] { correct, wrong, timeout };
-}
-
-%INDIVIDUAL PROBABILITIES DEFINITION
-probability ( robot_assistance ) {
-  table 0.2, 0.2, 0.2, 0.2, 0.2;
-}
-probability ( game_state ) {
-  table 0.34, 0.33, 0.33;
-}
-probability ( attempt ) {
-  table 0.25, 0.25, 0.25, 0.25;
-}
-probability ( user_action ) {
-  table 0.33, 0.33, 0.34;
-}
-#CPDS 4 #SPECIFICALLY FOR THE GIVEN PATIENT
-probability ( reactivity ) {
-  table 0.34, 0.33, 0.33;
-}
-#CPDS 3 #SPECIFICALLY FOR THE GIVEN PATIENT
-probability ( memory ) {
-  table 0.33, 0.33, 0.34;
-}
-#CPDS 1 #SPECIFICALLY FOR THE GIVEN PATIENT
-probability ( attention ) {
-  table 0.33, 0.33, 0.34;
-}
-probability ( robot_feedback ) {
-  table 0.5, 0.5;
-}
-probability ( reactivity | attention ) {
-  (low) 0.5, 0.4, 0.1;
-  (medium)  0.3, 0.5, 0.2;
-  (high)  0.1, 0.2, 0.7;
-}
-#CPDS 7
-probability (user_action | memory, reactivity) {
-(low, slow)  0.2, 0.5, 0.3;
-(low, medium) 0.3, 0.5, 0.2;
-(low, fast) 0.4, 0.5, 0.1;
-(medium, slow) 0.5, 0.3, 0.2;
-(medium, medium) 0.55, 0.35, 0.1;
-(medium, fast) 0.6, 0.4, 0.0;
-(high, slow)  0.5, 0.4, 0.1;
-(high, medium) 0.6, 0.3, 0.1;
-(high, fast) 0.8, 0.2, 0.0;
-}
-#CPDS 5
-probability (robot_feedback | user_action) {
-  (correct) 0.5, 0.5;
-  (wrong) 0.5, 0.5;
-  (timeout) 0.5, 0.5;
-}
-#CPDS 6
-probability (robot_assistance | user_action) {
-  (correct) 0.05 0.1 0.15 0.3 0.4;
-  (wrong) 0.4 0.2 0.2 0.1 0.1;
-  (timeout) 0.4 0.2 0.2 0.1 0.1;
-}
-#CPDS 2
-probability (game_state | user_action)  {
-   (correct) 0.2, 0.4, 0.4;
-   (wrong) 0.4, 0.4, 0.2;
-   (timeout) 0.6, 0.3, 0.1;
-}
-#CPDS 0
-probability (attempt | user_action)  {
-   (correct) 0.1, 0.2, 0.3, 0.4;
-   (wrong) 0.7, 0.2, 0.1, 0.0;
-   (timeout) 0.6, 0.3, 0.1, 0.0;
-}
-#CPDS 5
-probability (robot_assistance | robot_feedback) {
-  (yes) 0.5 0.3 0.1 0.1 0.0;
-  (no) 0.0 0.1 0.1 0.3 0.5;
-}
\ No newline at end of file
diff --git a/bn_persona_model/__pycache__/utilities.cpython-36.pyc b/bn_persona_model/__pycache__/utilities.cpython-36.pyc
deleted file mode 100644
index 9ae85612e5c19ba5c07e4ca739b2c68190e8318e..0000000000000000000000000000000000000000
Binary files a/bn_persona_model/__pycache__/utilities.cpython-36.pyc and /dev/null differ
diff --git a/bn_persona_model/persona_model.bif b/bn_persona_model/persona_model.bif
deleted file mode 100644
index ef8062593b35916b8470d1079aefc607ab6c6d08..0000000000000000000000000000000000000000
--- a/bn_persona_model/persona_model.bif
+++ /dev/null
@@ -1,103 +0,0 @@
-network persona_model {
-}
-
-%VARIABLES DEFINITION
-variable reactivity {
-  type discrete [3] {slow, medium, fast};
-}
-variable memory {
-  type discrete[3] {low, medium, high};
-}
-variable attention {
-  type discrete[3] {low, medium, high};
-}
-variable robot_assistance {
-  type discrete [ 5 ] { lev_0, lev_1, lev_2, lev_3, lev_4 };
-}
-variable attempt {
-  type discrete [ 4 ] { att_1, att_2, att_3, att_4 };
-}
-variable game_state {
-  type discrete [ 3 ] { beg, mid, end };
-}
-variable robot_feedback {
-  type discrete [ 2 ] { yes, no };
-}
-variable user_action {
-  type discrete [ 3 ] { correct, wrong, timeout };
-}
-
-%INDIVIDUAL PROBABILITIES DEFINITION
-probability ( robot_assistance ) {
-  table 0.2, 0.2, 0.2, 0.2, 0.2;
-}
-probability ( game_state ) {
-  table 0.34, 0.33, 0.33;
-}
-probability ( attempt ) {
-  table 0.25, 0.25, 0.25, 0.25;
-}
-probability ( user_action ) {
-  table 0.33, 0.33, 0.34;
-}
-#CPDS 4 #SPECIFICALLY FOR THE GIVEN PATIENT
-probability ( reactivity ) {
-  table 0.34, 0.33, 0.33;
-}
-#CPDS 3 #SPECIFICALLY FOR THE GIVEN PATIENT
-probability ( memory ) {
-  table 0.33, 0.33, 0.34;
-}
-#CPDS 1 #SPECIFICALLY FOR THE GIVEN PATIENT
-probability ( attention ) {
-  table 0.33, 0.33, 0.34;
-}
-probability ( robot_feedback ) {
-  table 0.5, 0.5;
-}
-probability ( reactivity | attention ) {
-  (low) 0.7, 0.2, 0.1;
-  (medium)  0.2, 0.5, 0.3;
-  (high)  0.1, 0.2, 0.7;
-}
-#CPDS 7
-probability (user_action | memory, reactivity) {
-(low, slow)  0.2, 0.5, 0.3;
-(low, medium) 0.3, 0.5, 0.2;
-(low, fast) 0.4, 0.5, 0.1;
-(medium, slow) 0.5, 0.3, 0.2;
-(medium, medium) 0.55, 0.35, 0.1;
-(medium, fast) 0.6, 0.4, 0.0;
-(high, slow)  0.5, 0.4, 0.1;
-(high, medium) 0.6, 0.3, 0.1;
-(high, fast) 0.8, 0.2, 0.0;
-}
-#CPDS 5
-probability (robot_feedback | user_action) {
-  (correct) 0.8, 0.2;
-  (wrong) 0.5, 0.5;
-  (timeout) 0.2, 0.8;
-}
-#CPDS 6
-probability (robot_assistance | user_action) {
-  (correct) 0.05 0.1 0.15 0.3 0.4;
-  (wrong) 0.4 0.3 0.2 0.05 0.05;
-  (timeout) 0.4 0.4 0.1 0.05 0.05;
-}
-#CPDS 2
-probability (game_state | user_action)  {
-   (correct) 0.2, 0.4, 0.4;
-   (wrong) 0.4, 0.4, 0.2;
-   (timeout) 0.6, 0.3, 0.1;
-}
-#CPDS 0
-probability (attempt | user_action)  {
-   (correct) 0.1, 0.2, 0.3, 0.4;
-   (wrong) 0.5, 0.3, 0.15, 0.05;
-   (timeout) 0.4, 0.3, 0.2, 0.1;
-}
-#CPDS 5
-probability (robot_assistance | robot_feedback) {
-  (yes) 0.5 0.3 0.1 0.1 0.0;
-  (no) 0.0 0.1 0.1 0.3 0.5;
-}
\ No newline at end of file
diff --git a/bn_persona_model/user_action_model.bif b/bn_persona_model/user_action_model.bif
deleted file mode 100644
index 24c07ddccbba6dda3be711a028f48794d42c036c..0000000000000000000000000000000000000000
--- a/bn_persona_model/user_action_model.bif
+++ /dev/null
@@ -1,84 +0,0 @@
-network persona_model {
-}
-
-%VARIABLES DEFINITION
-variable user_reactivity {
-  type discrete [3] {slow, medium, fast};
-}
-variable user_memory {
-  type discrete[3] {low, medium, high};
-}
-variable agent_assistance {
-  type discrete [ 6 ] { lev_0, lev_1, lev_2, lev_3, lev_4, lev_5 };
-}
-variable attempt {
-  type discrete [ 4 ] { att_1, att_2, att_3, att_4 };
-}
-variable game_state {
-  type discrete [ 3 ] { beg, mid, end };
-}
-variable agent_feedback {
-  type discrete [ 2 ] { yes, no };
-}
-variable user_action {
-  type discrete [ 3 ] { correct, wrong, timeout };
-}
-
-
-%INDIVIDUAL PROBABILITIES DEFINITION
-probability ( agent_assistance ) {
-  table 0.16, 0.16, 0.17, 0.16, 0.17, 0.16;
-}
-probability ( game_state ) {
-  table 0.34, 0.33, 0.33;
-}
-probability ( attempt ) {
-  table 0.25, 0.25, 0.25, 0.25;
-}
-probability ( user_action ) {
-  table 0.33, 0.33, 0.34;
-}
-#CPDS 4 #SPECIFICALLY FOR THE GIVEN PATIENT
-probability ( user_reactivity ) {
-  table 0.34, 0.33, 0.33;
-}
-#CPDS 3 #SPECIFICALLY FOR THE GIVEN PATIENT
-probability ( user_memory ) {
-  table 0.33, 0.33, 0.34;
-}
-probability ( agent_feedback ) {
-  table 0.5, 0.5;
-}
-
-
-probability (user_action | user_memory, user_reactivity) {
-(low, slow)  0.2, 0.2, 0.6;
-(low, medium) 0.3, 0.2, 0.5;
-(low, fast) 0.4, 0.4, 0.2;
-(medium, slow) 0.3, 0.1, 0.6;
-(medium, medium) 0.4, 0.2, 0.4;
-(medium, fast) 0.6, 0.2, 0.2;
-(high, slow)  0.7, 0.1, 0.2;
-(high, medium) 0.8, 0.1, 0.1;
-(high, fast) 0.8, 0.2, 0.0;
-}
-probability (agent_feedback | user_action) {
-  (correct) 0.8, 0.2;
-  (wrong) 0.5, 0.5;
-  (timeout) 0.2, 0.8;
-}
-probability (agent_assistance | user_action) {
-  (correct) 0.05 0.05 0.1 0.1 0.35 0.35;
-  (wrong) 0.45 0.3 0.2 0.05 0.0 0.0;
-  (timeout) 0.4 0.4 0.2 0.0 0.0 0.0;
-}
-probability (game_state | user_action)  {
-   (correct) 0.2, 0.4, 0.4;
-   (wrong) 0.4, 0.4, 0.2;
-   (timeout) 0.6, 0.3, 0.1;
-}
-probability (attempt | user_action)  {
-   (correct) 0.1, 0.2, 0.3, 0.4;
-   (wrong) 0.5, 0.3, 0.15, 0.05;
-   (timeout) 0.4, 0.3, 0.2, 0.1;
-}
\ No newline at end of file
diff --git a/bn_persona_model/user_react_time_model.bif b/bn_persona_model/user_react_time_model.bif
deleted file mode 100644
index 1c2bc5d8435fb9009e842873154a8e15de533430..0000000000000000000000000000000000000000
--- a/bn_persona_model/user_react_time_model.bif
+++ /dev/null
@@ -1,95 +0,0 @@
-network persona_model {
-}
-
-%VARIABLES DEFINITION
-variable reactivity {
-  type discrete [3] {slow, medium, fast};
-}
-variable memory {
-  type discrete[3] {low, medium, high};
-}
-variable attention {
-  type discrete[3] {low, medium, high};
-}
-variable agent_assistance {
-  type discrete [ 6 ] { lev_0, lev_1, lev_2, lev_3, lev_4, lev_5 };
-}
-variable attempt {
-  type discrete [ 4 ] { att_1, att_2, att_3, att_4 };
-}
-variable game_state {
-  type discrete [ 3 ] { beg, mid, end };
-}
-variable agent_feedback {
-  type discrete [ 2 ] { yes, no };
-}
-variable user_react_time {
-  type discrete [ 3 ] { slow, normal, fast};
-}
-
-%INDIVIDUAL PROBABILITIES DEFINITION
-variable agent_assistance {
-  type discrete [ 6 ] { lev_0, lev_1, lev_2, lev_3, lev_4, lev_5 };
-}
-probability ( game_state ) {
-  table 0.34, 0.33, 0.33;
-}
-probability ( attempt ) {
-  table 0.25, 0.25, 0.25, 0.25;
-}
-#CPDS 4 #SPECIFICALLY FOR THE GIVEN PATIENT
-probability ( reactivity ) {
-  table 0.34, 0.33, 0.33;
-}
-#CPDS 3 #SPECIFICALLY FOR THE GIVEN PATIENT
-probability ( memory ) {
-  table 0.33, 0.33, 0.34;
-}
-#CPDS 1 #SPECIFICALLY FOR THE GIVEN PATIENT
-probability ( attention ) {
-  table 0.33, 0.33, 0.34;
-}
-probability ( agent_feedback ) {
-  table 0.5, 0.5;
-}
-probability (user_react_time) {
-  table 0.33, 0.33, 0.34
-}
-probability ( reactivity | attention ) {
-  (low) 0.7, 0.2, 0.1;
-  (medium)  0.2, 0.5, 0.3;
-  (high)  0.1, 0.2, 0.7;
-}
-
-
-probability (user_react_time | memory, reactivity) {
-(low, slow)  0.2, 0.5, 0.3;
-(low, medium) 0.3, 0.5, 0.2;
-(low, fast) 0.4, 0.5, 0.1;
-(medium, slow) 0.5, 0.3, 0.2;
-(medium, medium) 0.55, 0.35, 0.1;
-(medium, fast) 0.6, 0.4, 0.0;
-(high, slow)  0.5, 0.4, 0.1;
-(high, medium) 0.6, 0.3, 0.1;
-(high, fast) 0.8, 0.2, 0.0;
-}
-probability (agent_feedback | user_react_time) {
-  (slow) 0.8, 0.2;
-  (normal) 0.5, 0.5;
-  (fast) 0.2, 0.8;
-}
-probability (agent_assistance | user_react_time) {
-  (slow) 0.05 0.05 0.1 0.1 0.35 0.35;
-  (normal) 0.45 0.3 0.2 0.05 0.0 0.0;
-  (fast) 0.4 0.4 0.2 0.0 0.0 0.0;
-}
-probability (game_state | user_react_time)  {
-   (slow) 0.2, 0.4, 0.4;
-   (normal) 0.4, 0.4, 0.2;
-   (fast) 0.6, 0.3, 0.1;
-}
-probability (attempt | user_react_time)  {
-   (slow) 0.1, 0.2, 0.3, 0.4;
-   (normal) 0.5, 0.3, 0.15, 0.05;
-   (fast) 0.4, 0.3, 0.2, 0.1;
-}
diff --git a/bn_persona_model/utilities.py b/bn_persona_model/utilities.py
deleted file mode 100644
index e44dbec0cb9b5afe5f7e6a0b6f10751aa021e71d..0000000000000000000000000000000000000000
--- a/bn_persona_model/utilities.py
+++ /dev/null
@@ -1,90 +0,0 @@
-import random
-import bn_functions
-
-
-def get_dynamic_variables(variables_name, variables_value):
-    if len(variables_name)!=len(variables_value):
-        assert "The variables name numbers is different from the variables value"
-    else:
-        dynamic_variables = {variables_name[i]:variables_value[i] for i in range(len(variables_name))}
-        return dynamic_variables
-
-
-
-
-def compute_next_state(user_action, task_evolution, attempt_counter, correct_move_counter,
-                       wrong_move_counter, timeout_counter
-                       ):
-    '''
-    The function computes given the current state and action of the user, the next state
-    Args:
-        user_action: 0,1,2
-        task_evolution: beg, mid, end
-        correct_move_counter:
-        attempt_counter:
-        wrong_move_counter:
-        timeout_counter:
-    Return:
-        the counters updated according to the user_action
-    '''
-    if user_action == 0:
-        attempt_counter = 0
-        task_evolution += 1
-        correct_move_counter += 1
-    # if the user made a wrong move and still did not reach the maximum number of attempts
-    elif user_action == 1 and attempt_counter < 3:
-        attempt_counter += 1
-        wrong_move_counter += 1
-    # if the user did not move any token and still did not reach the maximum number of attempts
-    elif user_action == 2 and attempt_counter < 3:
-        attempt_counter += 1
-        timeout_counter += 1
-    # the robot or therapist makes the correct move on the patient's behalf
-    else:
-        attempt_counter = 0
-        task_evolution += 1
-        correct_move_counter += 1
-
-    return task_evolution, attempt_counter, correct_move_counter, wrong_move_counter, timeout_counter
-
-def get_user_action_prob(user_bn_model, robot_assistance_action, robot_feedback_action,
-                         attempt_counter, game_state_counter, user_memory, user_attention, user_reactivity):
-    user_actions_prob = bn_functions.get_inference_from_state(user_bn_model,
-                                                              variables=['user_action'],
-                                                              evidence={'robot_assistance': robot_assistance_action,
-                                                                        'attempt': attempt_counter,
-                                                                        'game_state': game_state_counter,
-                                                                        'robot_feedback': robot_feedback_action,
-                                                                        'memory': user_memory,
-                                                                        'attention': user_attention,
-                                                                        'reactivity': user_reactivity})
-    return user_actions_prob
-
-
-def get_stochatic_action(actions_prob):
-    '''
-    Select one of the actions according to the actions_prob
-    Args:
-        actions_prob: the probability of the Persona based on the BN to make a correct move, wrong move, timeout
-    Return:
-        the id of the selected action
-    N.B:
-    '''
-    action_id = None
-    correct_action_from_BN = actions_prob[0]
-    wrong_action_from_BN = actions_prob[1]
-    timeout_action_from_BN = actions_prob[2]
-
-    rnd_val = random.uniform(0,1)
-    #if user_prob is lower than the correct action prob then is the correct one
-    if rnd_val<=correct_action_from_BN:
-        action_id = 0
-    #if rnd is larger than the correct action prob and lower than wrong
-    #  action prob then is the wrong one
-    elif rnd_val>correct_action_from_BN \
-        and rnd_val<(correct_action_from_BN+wrong_action_from_BN):
-        action_id = 1
-    #timeout
-    else:
-        action_id = 2
-    return action_id
diff --git a/bn_robot_model/__pycache__/utilities.cpython-36.pyc b/bn_robot_model/__pycache__/utilities.cpython-36.pyc
deleted file mode 100644
index 28581fb5fa7c725dab25c9a4c7b5ca743e6f6c3a..0000000000000000000000000000000000000000
Binary files a/bn_robot_model/__pycache__/utilities.cpython-36.pyc and /dev/null differ
diff --git a/simulation.py b/simulation.py
index 27e1eb769e35115b27dd5633667f9eae267444bc..15bd225bb57ed75937340104501069a88f760768 100644
--- a/simulation.py
+++ b/simulation.py
@@ -3,6 +3,7 @@ import os
 import bnlearn
 import numpy as np
 import random
+import copy
 #import classes and modules
 from bn_variables import Agent_Assistance, Agent_Feedback, User_Action, User_React_time, Game_State, Attempt
 import bn_functions
@@ -156,10 +157,6 @@ def compute_next_state(user_action, task_progress_counter, attempt_counter, corr
         task_progress_counter +=1
         print("Reach the end of the episode")
 
-    # TODO call the function to compute the state of the game (beg, mid, end)
-
-
-
 
     next_state = (game_state_counter, attempt_counter, user_action)
 
@@ -177,20 +174,19 @@ def select_agent_action(agent_action, epsilon):
     if random.random()>epsilon:
         return np.argmax(agent_action)
     else:
-        agent_action[np.argmax(agent_action)]=0
-        return  np.argmax(agent_action)
+        agent_action_rm_best = agent_action[:]
+        agent_action_rm_best[np.argmax(agent_action)] = 0
+        return np.argmax(agent_action_rm_best)
 
 def simulation(bn_model_user_action,
                bn_model_agent_behaviour,
                var_user_action_target_action,
                var_agent_behaviour_target_action,
                game_state_bn_name, attempt_bn_name,
-               agent_assistance_bn_name, agent_feedback_bn_name,
-               user_pref_assistance,
-               agent_behaviour,
+               agent_assistance_bn_name,
                agent_policy,
                state_space, action_space,
-               epochs=50, task_complexity=5, max_attempt_per_object=4, alpha_learning=0):
+               epoch=50,  run = 50, task_complexity=5, max_attempt_per_object=4, alpha_learning=0):
     '''
     Args:
 
@@ -201,170 +197,174 @@ def simulation(bn_model_user_action,
 
     '''
 
-    # user_action_per_robot_feedback_robot_assistance = [[[0 for i in range(User_Action.counter.value)]
-    #                                                        for j in range(Agent_Assistance.counter.value)]
-    #                                                        for l in range(Agent_Feedback.counter.value)
-    #                                                     ]
-    # attempt_counter_per_user_action = [[0 for i in range(Attempt.counter.value)] for j in
-    #                                    range(User_Action.counter.value)]
-    # game_state_counter_per_user_action = [[0 for i in range(Game_State.counter.value)] for j in
-    #                                       range(User_Action.counter.value)]
+    user_action_per_agent_assistance = [[0 for i in range(User_Action.counter.value)]
+                                                           for j in range(Agent_Assistance.counter.value)]
+    attempt_counter_per_user_action = [[0 for i in range(Attempt.counter.value)] for j in
+                                       range(User_Action.counter.value)]
+    game_state_counter_per_user_action = [[0 for i in range(Game_State.counter.value)] for j in
+                                          range(User_Action.counter.value)]
 
     #output variables:
-    n_correct_per_episode = [0]*epochs
-    n_wrong_per_episode = [0]*epochs
-    n_timeout_per_episode = [0]*epochs
-    n_max_attempt_per_episode = [0]*epochs
-    game_performance_episode = [0]*epochs
-    n_assistance_lev_per_episode = [[0 for i in range(Agent_Assistance.counter.value)] for j in range(epochs)]
-    n_feedback_per_episode = [[0 for i in range(Agent_Feedback.counter.value)] for j in range(epochs)]
-    n_react_time_per_episode = [[0 for i in range(User_React_time.counter.value)] for j in range(epochs)]
+    n_correct_per_episode_epoch = [0]*epoch
+    n_wrong_per_episode_epoch = [0]*epoch
+    n_timeout_per_episode_epoch = [0]*epoch
+    n_max_attempt_per_episode_epoch = [0]*epoch
+    game_performance_episode_epoch = [0]*epoch
+    n_assistance_lev_per_episode_epoch = [[0 for i in range(Agent_Assistance.counter.value)] for j in range(epoch)]
 
 
     #data structure to memorise a sequence of episode
     episodes = []
     ep = Episode()
+    bn_model_user_action_ref = copy.deepcopy(bn_model_user_action)
 
-    for e in range(epochs):
+    for e in range(epoch):
         print("##########################################################")
-        print("EPISODE ",e)
+        print("EPISODE ", e)
         print("##########################################################")
-
-        '''Simulation framework'''
-        #counters
-        game_state_counter = 0
-        attempt_counter = 1
-        iter_counter = 0
-        correct_move_counter = 0
-        wrong_move_counter = 0
-        timeout_counter = 0
-        max_attempt_counter = 0
-
-        #The following variables are used to update the BN at the end of the episode
-        # user_action_dynamic_variables = {
-        #                                 'attempt': attempt_counter_per_user_action,
-        #                                 'game_state': game_state_counter_per_user_action,
-        #                                 'user_action': user_action_per_robot_feedback_robot_assistance
-        #                                 }
-
-        #data structure to memorise the sequence of states  (state, action, next_state)
-        episode = []
-        selected_user_action = 0
-        task_progress_counter = 0
-        #####################SIMULATE ONE EPISODE#########################################
-        while(task_progress_counter<=task_complexity):
-
-            current_state = (game_state_counter, attempt_counter, selected_user_action)
-            current_state_index = ep.state_from_point_to_index(state_space, current_state)
-            # if agent_policy==[]:
-            #     selected_agent_feedback_action, selected_agent_assistance_action = \
-            #         generate_agent_assistance(preferred_assistance=user_pref_assistance,
-            #                                   agent_behaviour=agent_behaviour,
-            #                                   current_state=current_state_index,
-            #                                   state_space=state_space,
-            #                                   action_space=action_space
-            #                                   )
-            # else:
-            #     #TODO agent_policy is a list of 12 items
-            #     # select the one with the highest probability 1-epsilon of the times and one of the others epsilon times
-            #
-            #     selected_agent_feedback_action, selected_agent_assistance_action = ep.state_from_index_to_point(action_space, select_agent_action(agent_policy[current_state_index], epsilon=0.1))
-
-            vars_agent_evidence = {game_state_bn_name: game_state_counter,
-                                  attempt_bn_name: attempt_counter - 1,
-                                  }
-
-            query_agent_behaviour_prob = bn_functions.infer_prob_from_state(user_bn_model=bn_model_agent_behaviour,
-                                                                        infer_variable=var_agent_behaviour_target_action,
-                                                                        evidence_variables=vars_agent_evidence)
-
-            selected_agent_behaviour_action = bn_functions.get_stochastic_action(query_agent_behaviour_prob.values)
-            #selected_agent_behaviour_action = np.argmax(query_agent_behaviour_prob.values)
-
-            #counters for plots
-            n_assistance_lev_per_episode[e][selected_agent_behaviour_action] += 1
-            print("agent_assistance {},  attempt {}, game {}".format(selected_agent_behaviour_action, attempt_counter, game_state_counter))
-
-            ##########################QUERY FOR THE USER ACTION AND REACT TIME#####################################
-            #return the user action in this state based on the Persona profile
-            vars_user_evidence = {    game_state_bn_name: game_state_counter,
-                                      attempt_bn_name: attempt_counter - 1,
-                                      agent_assistance_bn_name: selected_agent_behaviour_action,
-                                      }
-
-            query_user_action_prob = bn_functions.infer_prob_from_state(user_bn_model=bn_model_user_action,
-                                                                        infer_variable=var_user_action_target_action,
-                                                                        evidence_variables=vars_user_evidence)
-
-            selected_user_action = bn_functions.get_stochastic_action(query_user_action_prob.values)
-
-            # # updates counters for simulation
-            # # remap user_action index
-            # if selected_user_action == 0:
-            #   selected_user_action = 1
-            # elif selected_user_action == 1:
-            #   selected_user_action = -1
-            # else:
-            #   selected_user_action = 0
-
-            #updates counters for simulation
-            iter_counter += 1
-            next_state, task_progress_counter, game_state_counter, attempt_counter, correct_move_counter, \
-            wrong_move_counter, timeout_counter, max_attempt_counter = compute_next_state(selected_user_action,
-                                                                        task_progress_counter,
-                                                                        attempt_counter,
-                                                                        correct_move_counter, wrong_move_counter,
-                                                                        timeout_counter, max_attempt_counter,
-                                                                        max_attempt_per_object)
-
-            # update counters
-            # if game_state_counter <= 2:
-            #     user_action_per_robot_feedback_robot_assistance[selected_agent_feedback_action][selected_agent_assistance_action][selected_user_action] += 1
-            #     attempt_counter_per_user_action[selected_user_action][attempt_counter - 1] += 1
-            #     game_state_counter_per_user_action[selected_user_action][game_state_counter] += 1
-
-            # store the (state, action, next_state)
-            episode.append((ep.state_from_point_to_index(state_space, current_state),
-                            selected_agent_behaviour_action,
-                            ep.state_from_point_to_index(state_space, next_state)))
-
-            print("current_state ", current_state, " next_state ", next_state)
-        ####################################END of EPISODE#######################################
-        print("game_state_counter {}, iter_counter {}, correct_counter {}, wrong_counter {}, "
-              "timeout_counter {}, max_attempt {}".format(game_state_counter, iter_counter, correct_move_counter,
-                                                          wrong_move_counter, timeout_counter, max_attempt_counter))
-
-        #save episode
-        episodes.append(Episode(episode))
-
-
-
-        #update user models
-        # bn_model_user_action = bn_functions.update_cpds_tables(bn_model_user_action, user_action_dynamic_variables, alpha_learning)
-        #
-
-        #reset counter
-        user_action_per_robot_feedback_robot_assistance = [[[0 for i in range(User_Action.counter.value)]
-                                                            for j in range(Agent_Assistance.counter.value)]
-                                                           for l in range(Agent_Feedback.counter.value)
-                                                           ]
-        attempt_counter_per_user_action = [[0 for i in range(Attempt.counter.value)] for j in
-                                           range(User_Action.counter.value)]
-        game_state_counter_per_user_action = [[0 for i in range(Game_State.counter.value)] for j in
-                                              range(User_Action.counter.value)]
-
-        #for plots
-        n_correct_per_episode[e] = correct_move_counter
-        n_wrong_per_episode[e] = wrong_move_counter
-        n_timeout_per_episode[e] = timeout_counter
-        n_max_attempt_per_episode[e] = max_attempt_counter
-        game_performance_episode[e] = [n_correct_per_episode[e],
-                                       n_wrong_per_episode[e],
-                                       n_timeout_per_episode[e],
-                                       n_max_attempt_per_episode[e]]
-
-
-    return game_performance_episode, n_react_time_per_episode, n_assistance_lev_per_episode, n_feedback_per_episode, episodes
+        bn_model_user_action =  copy.deepcopy(bn_model_user_action_ref)
+
+        n_correct_per_episode_run = [0] * run
+        n_wrong_per_episode_run = [0] * run
+        n_timeout_per_episode_run = [0] * run
+        n_max_attempt_per_episode_run = [0] * run
+        game_performance_episode_run = [0] * run
+        n_assistance_lev_per_episode_run = [[0 for i in range(Agent_Assistance.counter.value)] for j in range(run)]
+
+        for r in range(run):
+
+            '''Simulation framework'''
+            #counters
+            game_state_counter = 0
+            attempt_counter = 1
+            iter_counter = 0
+            correct_move_counter = 0
+            wrong_move_counter = 0
+            timeout_counter = 0
+            max_attempt_counter = 0
+
+            #The following variables are used to update the BN at the end of the episode
+            user_action_dynamic_variables = {
+                                            'attempt': attempt_counter_per_user_action,
+                                            'game_state': game_state_counter_per_user_action,
+                                            'user_action': user_action_per_agent_assistance
+                                            }
+
+
+
+            #data structure to memorise the sequence of states  (state, action, next_state)
+            episode = []
+            selected_user_action = 0
+            task_progress_counter = 0
+            #####################SIMULATE ONE EPISODE#########################################
+            while(task_progress_counter<=task_complexity):
+
+                current_state = (game_state_counter, attempt_counter, selected_user_action)
+                current_state_index = ep.state_from_point_to_index(state_space, current_state)
+                if agent_policy==[]:
+                    vars_agent_evidence = {game_state_bn_name: game_state_counter,
+                                          attempt_bn_name: attempt_counter - 1,
+                                          }
+
+                    query_agent_behaviour_prob = bn_functions.infer_prob_from_state(user_bn_model=bn_model_agent_behaviour,
+                                                                                infer_variable=var_agent_behaviour_target_action,
+                                                                                evidence_variables=vars_agent_evidence)
+
+                    #selected_agent_behaviour_action = bn_functions.get_stochastic_action(query_agent_behaviour_prob.values)
+                    selected_agent_behaviour_action = select_agent_action(query_agent_behaviour_prob.values, epsilon=0.2)
+                else:
+                    selected_agent_behaviour_action = select_agent_action(agent_policy[current_state_index], epsilon=0.2)
+                    #selected_agent_behaviour_action = bn_functions.get_stochastic_action(agent_policy[current_state_index])
+                    #selected_agent_behaviour_action =np.argmax(agent_policy[current_state_index])
+
+                #counters for plots
+                n_assistance_lev_per_episode_run[r][selected_agent_behaviour_action] += 1
+                print("agent_assistance {},  attempt {}, game {}".format(selected_agent_behaviour_action, attempt_counter, game_state_counter))
+
+                ##########################QUERY FOR THE USER ACTION AND REACT TIME#####################################
+                #return the user action in this state based on the Persona profile
+                vars_user_evidence = {    game_state_bn_name: game_state_counter,
+                                          attempt_bn_name: attempt_counter - 1,
+                                          agent_assistance_bn_name: selected_agent_behaviour_action,
+                                          }
+
+                query_user_action_prob = bn_functions.infer_prob_from_state(user_bn_model=bn_model_user_action,
+                                                                            infer_variable=var_user_action_target_action,
+                                                                            evidence_variables=vars_user_evidence)
+
+                selected_user_action = bn_functions.get_stochastic_action(query_user_action_prob.values)
+                #selected_user_action = np.argmax(query_user_action_prob.values)
+
+                #updates counters for simulation
+                iter_counter += 1
+                next_state, task_progress_counter, game_state_counter, attempt_counter, correct_move_counter, \
+                wrong_move_counter, timeout_counter, max_attempt_counter = compute_next_state(selected_user_action,
+                                                                            task_progress_counter,
+                                                                            attempt_counter,
+                                                                            correct_move_counter, wrong_move_counter,
+                                                                            timeout_counter, max_attempt_counter,
+                                                                            max_attempt_per_object)
+
+                # update counters
+                if game_state_counter <= 2:
+                    user_action_per_agent_assistance[selected_agent_behaviour_action][selected_user_action] += 1
+                    attempt_counter_per_user_action[selected_user_action][attempt_counter - 1] += 1
+                    game_state_counter_per_user_action[selected_user_action][game_state_counter] += 1
+
+                # store the (state, action, next_state)
+                episode.append((ep.state_from_point_to_index(state_space, current_state),
+                                selected_agent_behaviour_action,
+                                ep.state_from_point_to_index(state_space, next_state)))
+
+                print("current_state ", current_state, " user_action:", selected_user_action, " next_state ", next_state)
+            ####################################END of EPISODE#######################################
+            print("game_state_counter {}, iter_counter {}, correct_counter {}, wrong_counter {}, "
+                  "timeout_counter {}, max_attempt {}".format(game_state_counter, iter_counter, correct_move_counter,
+                                                              wrong_move_counter, timeout_counter, max_attempt_counter))
+
+            #save episode
+            episodes.append(Episode(episode))
+
+            #update user models
+            # bn_model_user_action = bn_functions.update_cpds_tables(bn_model_user_action, user_action_dynamic_variables, alpha_learning)
+
+            #reset counter
+            user_action_per_agent_assistance = [[0 for i in range(User_Action.counter.value)]
+                                                for j in range(Agent_Assistance.counter.value)]
+            attempt_counter_per_user_action = [[0 for i in range(Attempt.counter.value)] for j in
+                                               range(User_Action.counter.value)]
+            game_state_counter_per_user_action = [[0 for i in range(Game_State.counter.value)] for j in
+                                                  range(User_Action.counter.value)]
+
+            #for plots
+            n_correct_per_episode_run[r] = correct_move_counter
+            n_wrong_per_episode_run[r] = wrong_move_counter
+            n_timeout_per_episode_run[r] = timeout_counter
+            n_max_attempt_per_episode_run[r] = max_attempt_counter
+            game_performance_episode_run[r] = [n_correct_per_episode_run[r],
+                                           n_wrong_per_episode_run[r],
+                                           n_timeout_per_episode_run[r],
+                                           n_max_attempt_per_episode_run[r]]
+
+        #compute average of the values for one epoch and store it
+        n_correct_per_episode_epoch[e] = sum(n_correct_per_episode_run)/run
+        n_wrong_per_episode_epoch[e] = sum(n_wrong_per_episode_run)/run
+        n_timeout_per_episode_epoch[e] = sum(n_timeout_per_episode_run)/run
+        n_max_attempt_per_episode_epoch[e] = sum(n_max_attempt_per_episode_run)/run
+        game_performance_episode_epoch[e] = list(map(lambda x: sum(x)/run, zip(*game_performance_episode_run)))
+        n_assistance_lev_per_episode_epoch[e] = list(map(lambda x: sum(x)/run, zip(*n_assistance_lev_per_episode_run)))
+
+        #reset variables
+        n_correct_per_episode_run = [0] * run
+        n_wrong_per_episode_run = [0] * run
+        n_timeout_per_episode_run = [0] * run
+        n_max_attempt_per_episode_run = [0] * run
+        game_performance_episode_run = [0] * run
+        n_assistance_lev_per_episode_run = [[0 for i in range(Agent_Assistance.counter.value)] for j in range(run)]
+
+
+
+    return game_performance_episode_epoch, n_assistance_lev_per_episode_epoch, episodes
 
 
 
@@ -380,7 +380,7 @@ def simulation(bn_model_user_action,
 # epochs = 20
 # scaling_factor = 1
 # # initialise the agent
-# bn_model_user_action = bnlearn.import_DAG('/home/pal/Documents/Framework/bn_generative_model/bn_persona_model/persona_model_test.bif')
+# bn_model_user_action = bnlearn.import_DAG('/home/pal/Documents/Framework/bn_generative_model/bn_persona_model/persona_model_template.bif')
 #
 # # initialise memory, attention and reactivity variables
 # persona_memory = 0;
diff --git a/test.py b/test.py
index a5ec2a8049cf2a8b6749b444c611b0257a82f908..b5fd6abccd6355314018406ca22782d58d7840f2 100644
--- a/test.py
+++ b/test.py
@@ -25,29 +25,28 @@ def import_data_from_csv(csv_filename, dag_filename):
 
 
 
-DAG_shared = import_data_from_csv(csv_filename='bn_persona_model/cognitive_game.csv', dag_filename='bn_persona_model/persona_model_test.bif')
+#DAG_shared = import_data_from_csv(csv_filename='/data/test.csv', dag_filename='bn_persona_model/persona_model_template.bif')
 
 
-# DAG = bn.import_DAG('bn_persona_model/persona_model_test.bif')
+DAG_agent = bn.import_DAG('old_models/bn_agent_model/agent_test.bif')
+DAG_user = bn.import_DAG('old_models/bn_persona_model/persona_test.bif')
+
 # #G = bn.plot(DAG)
 #
-# q_origin = bn.inference.fit(DAG, variables=[ 'user_action'], evidence={
+# q_origin = bn.inference.fit(DAG_agent, variables=[ 'agent_assistance'], evidence={
 #                                                                 'game_state':0,
-#                                                                 'attempt':0,
-#                                                                 'agent_feedback':0,
-#                                                                 'agent_assistance':0,
+#                                                                 'attempt':3,
+#                                                                 #'agent_assistance':2,
 # })
 
-# q_shared = bn.inference.fit(DAG_shared, variables=[ 'user_action'], evidence={
-#                                                                 'game_state':0,
-#                                                                 'attempt':0,
-#                                                                 'agent_feedback':1,
-#                                                                 'user_memory': 2,
-#                                                                 'user_reactivity':2,
-#                                                                 'agent_assistance':0,
-# })
-#
-# print("Q origin: ", q_origin.values, " Q shared ", q_shared.values)
+q_shared = bn.inference.fit(DAG_user, variables=[ 'user_action'], evidence={
+                                                                'game_state':0,
+                                                                'attempt':3,
+                                                                'agent_assistance':0,
+})
+
+
+print( " Q shared ", q_shared.values)
 # df = pd.read_csv('bn_persona_model/cognitive_game.csv')
 # df = bn.sampling(DAG, n=10000)
 # #model_sl = bn.structure_learning.fit(df, methodtype='hc', scoretype='bic')
@@ -72,10 +71,10 @@ DAG_shared = import_data_from_csv(csv_filename='bn_persona_model/cognitive_game.
 #                 print("GS:", gs, " ATT:", att, " AA", aas, " AF", af)
 #
 # df.head()
-# DAG = bn.import_DAG('bn_persona_model/persona_model_test.bif', CPD=False)
+# DAG = bn.import_DAG('bn_persona_model/persona_model_template.bif', CPD=False)
 # bn.plot(DAG)
 # DAG_update = bn.parameter_learning.fit(DAG, df)
-# DAG_true = bn.import_DAG('bn_persona_model/persona_model_test.bif', CPD=True)
+# DAG_true = bn.import_DAG('bn_persona_model/persona_model_template.bif', CPD=True)
 # q1 = bn.inference.fit(DAG_update, variables=['user_action'], evidence={
 #                                                                 'game_state': 0,
 #                                                                 'attempt':2,