Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
BN_GenerativeModel
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Antonio Andriella
BN_GenerativeModel
Commits
59c6de46
Commit
59c6de46
authored
4 years ago
by
Antonio Andriella
Browse files
Options
Downloads
Patches
Plain Diff
Working simulation of BN with update
parent
65b57306
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
simulation.py
+174
-247
174 additions, 247 deletions
simulation.py
with
174 additions
and
247 deletions
simulation.py
+
174
−
247
View file @
59c6de46
...
...
@@ -8,95 +8,96 @@ from bn_variables import Agent_Assistance, Agent_Feedback, User_Action, User_Rea
import
bn_functions
import
utils
from
episode
import
Episode
import
pandas
as
pd
def
build_model_from_data
(
csv_filename
,
dag_filename
,
dag_model
=
None
):
print
(
"
/************************************************************/
"
)
print
(
"
Init model
"
)
DAG
=
bnlearn
.
import_DAG
(
dag_filename
)
df_caregiver
=
bnlearn
.
sampling
(
DAG
,
n
=
10000
)
print
(
"
/************************************************************/
"
)
print
(
"
real_user Model
"
)
DAG_real_user_no_cpd
=
bnlearn
.
import_DAG
(
dag_filename
,
CPD
=
False
)
df_real_user
=
pd
.
read_csv
(
csv_filename
)
DAG_real_user
=
bnlearn
.
parameter_learning
.
fit
(
DAG_real_user_no_cpd
,
df_real_user
,
methodtype
=
'
bayes
'
)
df_real_user
=
bnlearn
.
sampling
(
DAG_real_user
,
n
=
10000
)
print
(
"
/************************************************************/
"
)
print
(
"
Shared knowledge
"
)
DAG_shared_no_cpd
=
bnlearn
.
import_DAG
(
dag_filename
,
CPD
=
False
)
shared_knowledge
=
[
df_real_user
,
df_caregiver
]
conc_shared_knowledge
=
pd
.
concat
(
shared_knowledge
)
DAG_shared
=
bnlearn
.
parameter_learning
.
fit
(
DAG_shared_no_cpd
,
conc_shared_knowledge
)
#df_conc_shared_knowledge = bn.sampling(DAG_shared, n=10000)
return
DAG_shared
def
generate_agent_assistance
(
preferred_assistance
,
agent_behaviour
,
current_state
,
state_space
,
action_space
):
episode
=
Episode
()
game_state
,
attempt
,
prev_user_outcome
=
episode
.
state_from_index_to_point
(
state_space
,
current_state
)
robot_action
=
0
#agent_behaviour is a tuple first item is the feedback, second item is the robot assistance
print
(
game_state
,
attempt
,
prev_user_outcome
)
if
attempt
==
1
:
robot_action
=
episode
.
state_from_point_to_index
(
action_space
,
(
random
.
randint
(
0
,
1
),
0
))
elif
attempt
!=
1
and
prev_user_outcome
==
0
:
if
attempt
==
2
and
agent_behaviour
==
"
challenge
"
:
robot_action
=
episode
.
state_from_point_to_index
(
action_space
,
(
random
.
randint
(
0
,
1
),
min
(
max
(
0
,
preferred_assistance
-
1
),
5
)))
print
(
"
catt2
"
)
elif
attempt
==
2
and
agent_behaviour
==
"
help
"
:
robot_action
=
episode
.
state_from_point_to_index
(
action_space
,
(
random
.
randint
(
0
,
1
),
min
(
max
(
0
,
preferred_assistance
),
5
)))
print
(
"
hatt2
"
)
elif
attempt
==
3
and
agent_behaviour
==
"
challenge
"
:
robot_action
=
episode
.
state_from_point_to_index
(
action_space
,
(
random
.
randint
(
0
,
1
),
min
(
max
(
0
,
preferred_assistance
-
2
),
5
)))
print
(
"
catt3
"
)
elif
attempt
==
3
and
agent_behaviour
==
"
help
"
:
robot_action
=
episode
.
state_from_point_to_index
(
action_space
,
(
random
.
randint
(
0
,
1
),
min
(
max
(
0
,
preferred_assistance
+
1
),
5
)))
print
(
"
hatt3
"
)
elif
attempt
==
4
and
agent_behaviour
==
"
challenge
"
:
robot_action
=
episode
.
state_from_point_to_index
(
action_space
,
(
random
.
randint
(
0
,
1
),
min
(
max
(
0
,
preferred_assistance
-
3
),
5
)))
print
(
"
catt4
"
)
elif
attempt
==
4
and
agent_behaviour
==
"
help
"
:
robot_action
=
episode
.
state_from_point_to_index
(
action_space
,
(
random
.
randint
(
0
,
1
),
min
(
max
(
0
,
preferred_assistance
+
2
),
5
)))
print
(
"
hatt4
"
)
elif
attempt
!=
1
and
prev_user_outcome
==
-
1
:
if
attempt
==
2
and
agent_behaviour
==
"
challenge
"
:
robot_action
=
episode
.
state_from_point_to_index
(
action_space
,
(
random
.
randint
(
0
,
1
),
min
(
max
(
0
,
preferred_assistance
+
1
),
5
)))
print
(
"
catt2
"
)
elif
attempt
==
2
and
agent_behaviour
==
"
help
"
:
robot_action
=
episode
.
state_from_point_to_index
(
action_space
,
(
random
.
randint
(
0
,
1
),
min
(
max
(
0
,
preferred_assistance
),
5
)))
print
(
"
hatt2
"
)
elif
attempt
==
3
and
agent_behaviour
==
"
challenge
"
:
robot_action
=
episode
.
state_from_point_to_index
(
action_space
,
(
random
.
randint
(
0
,
1
),
min
(
max
(
0
,
preferred_assistance
+
2
),
5
)))
print
(
"
catt3
"
)
elif
attempt
==
3
and
agent_behaviour
==
"
help
"
:
robot_action
=
episode
.
state_from_point_to_index
(
action_space
,
(
random
.
randint
(
0
,
1
),
min
(
max
(
0
,
preferred_assistance
-
1
),
5
)))
print
(
"
hatt3
"
)
elif
attempt
==
4
and
agent_behaviour
==
"
challenge
"
:
robot_action
=
episode
.
state_from_point_to_index
(
action_space
,
(
random
.
randint
(
0
,
1
),
min
(
max
(
0
,
preferred_assistance
+
2
),
5
)))
print
(
"
catt4
"
)
elif
attempt
==
4
and
agent_behaviour
==
"
help
"
:
robot_action
=
episode
.
state_from_point_to_index
(
action_space
,
(
random
.
randint
(
0
,
1
),
min
(
max
(
0
,
preferred_assistance
-
3
),
5
)))
print
(
"
hatt4
"
)
agent_feedback
,
agent_assistance
=
episode
.
state_from_index_to_point
(
action_space
,
robot_action
)
return
agent_feedback
,
agent_assistance
#
# def choose_next_states(task_progress, game_state_t0, n_attempt_t0, max_attempt_per_object,
# selected_agent_assistance_action,
# bn_model_user_action, var_user_action_target_action):
#
# def get_next_state(task_progress, game_state_t0, n_attempt_t0, max_attempt_per_object):
#
# next_state = []
#
# #correct move on the last state of the bin
# if (task_progress == 1 or task_progress == 3 or task_progress == 4) and n_attempt_t0<max_attempt_per_object:
# next_state.append((game_state_t0+1, n_attempt_t0+1))
# #correct state bu still in the bin
# elif task_progress == 0 or task_progress == 2 and n_attempt_t0<max_attempt_per_object:
# next_state.append((game_state_t0, n_attempt_t0+1))
# elif (task_progress == 1 or task_progress == 3 or task_progress == 4) and n_attempt_t0>=max_attempt_per_object:
# assert "you reach the maximum number of attempt the agent will move it for you"
# elif task_progress == 0 or task_progress == 2 and n_attempt_t0>=max_attempt_per_object:
# assert "you reach the maximum number of attempt the agent will move it for you"
#
# return next_state
#
# next_state = get_next_state(task_progress, game_state_t0, n_attempt_t0, max_attempt_per_object)
# query_answer_probs = []
# for t in next_state:
# vars_user_evidence = {"game_state_t0": game_state_t0,
# "attempt_t0": n_attempt_t0 - 1,
# "robot_assistance": selected_agent_assistance_action,
# "game_state_t1": t[0],
# "attempt_t1": t[1],
# }
#
# query_user_action_prob = bn_functions.infer_prob_from_state(bn_model_user_action,
# infer_variable=var_user_action_target_action,
# evidence_variables=vars_user_evidence)
# query_answer_probs.append(query_user_action_prob)
#
#
# #do the inference here
# #1. check given the current_state which are the possible states
# #2. for each of the possible states get the probability of user_action
# #3. select the state with the most higher action and execute it
# #4. return user_action
#
def
generate_agent_assistance
(
preferred_assistance
,
agent_behaviour
,
n_game_state
,
n_attempt
,
alpha_action
=
0.1
):
agent_policy
=
[[
0
for
j
in
range
(
n_attempt
)]
for
i
in
range
(
n_game_state
)]
previous_assistance
=
-
1
def
get_alternative_action
(
agent_assistance
,
previous_assistance
,
agent_behaviour
,
alpha_action
):
agent_assistance_res
=
agent_assistance
if
previous_assistance
==
agent_assistance
:
if
agent_behaviour
==
"
challenge
"
:
if
random
.
random
()
>
alpha_action
:
agent_assistance_res
=
min
(
max
(
0
,
agent_assistance
-
1
),
5
)
else
:
agent_assistance_res
=
min
(
max
(
0
,
agent_assistance
),
5
)
else
:
if
random
.
random
()
>
alpha_action
:
agent_assistance_res
=
min
(
max
(
0
,
agent_assistance
+
1
),
5
)
else
:
agent_assistance_res
=
min
(
max
(
0
,
agent_assistance
),
5
)
return
agent_assistance_res
for
gs
in
range
(
n_game_state
):
for
att
in
range
(
n_attempt
):
if
att
==
0
:
if
random
.
random
()
>
alpha_action
:
agent_policy
[
gs
][
att
]
=
preferred_assistance
previous_assistance
=
agent_policy
[
gs
][
att
]
else
:
if
random
.
random
()
>
0.5
:
agent_policy
[
gs
][
att
]
=
min
(
max
(
0
,
preferred_assistance
-
1
),
5
)
previous_assistance
=
agent_policy
[
gs
][
att
]
else
:
agent_policy
[
gs
][
att
]
=
min
(
max
(
0
,
preferred_assistance
+
1
),
5
)
previous_assistance
=
agent_policy
[
gs
][
att
]
else
:
if
agent_behaviour
==
"
challenge
"
:
agent_policy
[
gs
][
att
]
=
min
(
max
(
0
,
preferred_assistance
-
1
),
5
)
agent_policy
[
gs
][
att
]
=
get_alternative_action
(
agent_policy
[
gs
][
att
],
previous_assistance
,
agent_behaviour
,
alpha_action
)
previous_assistance
=
agent_policy
[
gs
][
att
]
else
:
agent_policy
[
gs
][
att
]
=
min
(
max
(
0
,
preferred_assistance
+
1
),
5
)
agent_policy
[
gs
][
att
]
=
get_alternative_action
(
agent_policy
[
gs
][
att
],
previous_assistance
,
agent_behaviour
,
alpha_action
)
previous_assistance
=
agent_policy
[
gs
][
att
]
return
agent_policy
def
compute_next_state
(
user_action
,
task_progress_counter
,
attempt_counter
,
correct_move_counter
,
...
...
@@ -167,11 +168,12 @@ def compute_next_state(user_action, task_progress_counter, attempt_counter, corr
def
simulation
(
bn_model_user_action
,
var_user_action_target_action
,
bn_model_user_react_time
,
var_user_react_time_target_action
,
user_memory_name
,
user_memory_value
,
user_attention_name
,
user_attention_value
,
user_reactivity_name
,
user_reactivity_value
,
task_progress_t0_name
,
task_progress_t1_name
,
game_attempt_t0_name
,
game_attempt_t1_name
,
agent_assistance_name
,
agent_policy
,
def
simulation
(
bn_model_user_action
,
var_user_action_target_action
,
game_state_bn_name
,
attempt_bn_name
,
agent_assistance_bn_name
,
agent_feedback_bn_name
,
user_pref_assistance
,
agent_behaviour
,
agent_policy
,
state_space
,
action_space
,
epochs
=
50
,
task_complexity
=
5
,
max_attempt_per_object
=
4
,
alpha_learning
=
0
):
'''
...
...
@@ -183,33 +185,15 @@ def simulation(bn_model_user_action, var_user_action_target_action, bn_model_use
n_timeout_per_episode:
'''
#TODO: remove agent_assistance_vect and agent_feedback_vect
#metrics we need, in order to compute afterwords the belief
agent_feedback_per_action
=
[[
0
for
i
in
range
(
Agent_Feedback
.
counter
.
value
)]
for
j
in
range
(
User_Action
.
counter
.
value
)]
agent_assistance_per_action
=
[[
0
for
i
in
range
(
Agent_Assistance
.
counter
.
value
)]
for
j
in
range
(
User_Action
.
counter
.
value
)]
attempt_counter_per_react_time
=
[[
0
for
i
in
range
(
Attempt
.
counter
.
value
)]
for
j
in
range
(
User_React_time
.
counter
.
value
)]
game_state_counter_per_react_time
=
[[
0
for
i
in
range
(
Game_State
.
counter
.
value
)]
for
j
in
range
(
User_React_time
.
counter
.
value
)]
agent_feedback_per_react_time
=
[[
0
for
i
in
range
(
Agent_Feedback
.
counter
.
value
)]
for
j
in
range
(
User_React_time
.
counter
.
value
)]
agent_assistance_per_react_time
=
[[
0
for
i
in
range
(
Agent_Assistance
.
counter
.
value
)]
for
j
in
range
(
User_React_time
.
counter
.
value
)]
game_state_counter_per_agent_feedback
=
[[
0
for
i
in
range
(
Game_State
.
counter
.
value
)]
for
j
in
range
(
Agent_Feedback
.
counter
.
value
)]
attempt_counter_per_agent_feedback
=
[[
0
for
i
in
range
(
Attempt
.
counter
.
value
)]
for
j
in
range
(
Agent_Feedback
.
counter
.
value
)]
game_state_counter_per_agent_assistance
=
[[
0
for
i
in
range
(
Game_State
.
counter
.
value
)]
for
j
in
range
(
Agent_Assistance
.
counter
.
value
)]
attempt_counter_per_agent_assistance
=
[[
0
for
i
in
range
(
Attempt
.
counter
.
value
)]
for
j
in
range
(
Agent_Assistance
.
counter
.
value
)]
user_action_per_game_state_attempt_counter_agent_assistance
=
[[[[
0
for
i
in
range
(
User_Action
.
counter
.
value
)]
for
l
in
range
(
Game_State
.
counter
.
value
)]
for
j
in
range
(
Attempt
.
counter
.
value
)]
for
k
in
range
(
Agent_Assistance
.
counter
.
value
)]
user_action_per_agent_assistance
=
[[
0
for
i
in
range
(
User_Action
.
counter
.
value
)]
for
j
in
range
(
Agent_Assistance
.
counter
.
value
)]
attempt_counter_per_user_action
=
[[
0
for
i
in
range
(
Attempt
.
counter
.
value
)]
for
j
in
range
(
User_Action
.
counter
.
value
)]
user_action_per_robot_feedback_robot_assistance
=
[[[
0
for
i
in
range
(
User_Action
.
counter
.
value
)]
for
j
in
range
(
Agent_Assistance
.
counter
.
value
)]
for
l
in
range
(
Agent_Feedback
.
counter
.
value
)
]
attempt_counter_per_user_action
=
[[
0
for
i
in
range
(
Attempt
.
counter
.
value
)]
for
j
in
range
(
User_Action
.
counter
.
value
)]
game_state_counter_per_user_action
=
[[
0
for
i
in
range
(
Game_State
.
counter
.
value
)]
for
j
in
range
(
User_Action
.
counter
.
value
)]
range
(
User_Action
.
counter
.
value
)]
#output variables:
n_correct_per_episode
=
[
0
]
*
epochs
...
...
@@ -243,22 +227,11 @@ def simulation(bn_model_user_action, var_user_action_target_action, bn_model_use
#The following variables are used to update the BN at the end of the episode
user_action_dynamic_variables
=
{
'
attempt
_t1
'
:
attempt_counter_per_user_action
,
'
game_state
_t1
'
:
game_state_counter_per_user_action
,
'
user_action
'
:
user_action_per_
game_state_attempt_counter_agen
t_assistance
'
attempt
'
:
attempt_counter_per_user_action
,
'
game_state
'
:
game_state_counter_per_user_action
,
'
user_action
'
:
user_action_per_
robot_feedback_robo
t_assistance
}
user_react_time_dynamic_variables
=
{
'
attempt
'
:
attempt_counter_per_react_time
,
'
game_state
'
:
game_state_counter_per_react_time
,
'
agent_assistance
'
:
agent_assistance_per_react_time
,
'
agent_feedback
'
:
agent_feedback_per_react_time
}
agent_assistance_dynamic_variables
=
{
'
attempt
'
:
attempt_counter_per_agent_assistance
,
'
game_state
'
:
game_state_counter_per_agent_assistance
}
agent_feedback_dynamic_variables
=
{
'
attempt
'
:
attempt_counter_per_agent_feedback
,
'
game_state
'
:
game_state_counter_per_agent_feedback
}
#data structure to memorise the sequence of states (state, action, next_state)
episode
=
[]
selected_user_action
=
0
...
...
@@ -267,60 +240,37 @@ def simulation(bn_model_user_action, var_user_action_target_action, bn_model_use
while
(
task_progress_counter
<=
task_complexity
):
current_state
=
(
game_state_counter
,
attempt_counter
,
selected_user_action
)
current_state_index
=
ep
.
state_from_point_to_index
(
state_space
,
current_state
)
if
agent_policy
==
[]:
selected_agent_feedback_action
,
selected_agent_assistance_action
=
\
generate_agent_assistance
(
preferred_assistance
=
user_pref_assistance
,
agent_behaviour
=
agent_behaviour
,
current_state
=
current_state_index
,
state_space
=
state_space
,
action_space
=
action_space
)
else
:
selected_agent_feedback_action
,
selected_agent_assistance_action
=
ep
.
state_from_index_to_point
(
action_space
,
agent_policy
[
current_state_index
])
selected_agent_assistance_action
=
agent_policy
[
game_state_counter
][
attempt_counter
-
1
]
#random.randint(0,5)
selected_agent_feedback_action
=
0
#random.randint(0,1)
#counters for plots
n_assistance_lev_per_episode
[
e
][
selected_agent_assistance_action
]
+=
1
current_agent_action
=
(
selected_agent_feedback_action
,
selected_agent_assistance_action
)
print
(
"
agent_assistance {}, attempt {}, game {}, agent_feedback {}
"
.
format
(
selected_agent_assistance_action
,
attempt_counter
,
game_state_counter
,
selected_agent_feedback_action
))
print
(
"
agent_assistance {}, agent_feedback {}, attempt {}, game {}
"
.
format
(
selected_agent_assistance_action
,
selected_agent_feedback_action
,
attempt_counter
,
game_state_counter
))
##########################QUERY FOR THE USER ACTION AND REACT TIME#####################################
#compare the real user with the estimated Persona and returns a user action (0, 1a, 2)
#return the user action in this state based on the Persona profile
vars_user_evidence
=
{
task_progress_t0_name
:
game_state_counter
,
game_attempt_t0_name
:
attempt_counter
-
1
,
task_progress_t1_name
:
game_state_counter
,
game_attempt_t1_name
:
attempt_counter
-
1
,
agent_assistance_name
:
selected_agent_assistance_action
,
vars_user_evidence
=
{
game_state_bn_name
:
game_state_counter
,
attempt_bn_name
:
attempt_counter
-
1
,
agent_assistance_bn_name
:
selected_agent_assistance_action
,
agent_feedback_bn_name
:
selected_agent_feedback_action
,
}
query_user_action_prob
=
bn_functions
.
infer_prob_from_state
(
bn_model_user_action
,
query_user_action_prob
=
bn_functions
.
infer_prob_from_state
(
user_bn_model
=
bn_model_user_action
,
infer_variable
=
var_user_action_target_action
,
evidence_variables
=
vars_user_evidence
)
# query_user_react_time_prob = bn_functions.infer_prob_from_state(bn_model_user_react_time,
# infer_variable=var_user_react_time_target_action,
# evidence_variables=vars_user_evidence)
#
#
selected_user_action
=
bn_functions
.
get_stochastic_action
(
query_user_action_prob
.
values
)
# selected_user_react_time = bn_functions.get_stochastic_action(query_user_react_time_prob.values)
# counters for plots
# n_react_time_per_episode[e][selected_user_react_time] += 1
#updates counters for user action
user_action_per_game_state_attempt_counter_agent_assistance
[
selected_agent_assistance_action
][
attempt_counter
-
1
][
game_state_counter
][
selected_user_action
]
+=
1
attempt_counter_per_user_action
[
selected_user_action
][
attempt_counter
-
1
]
+=
1
game_state_counter_per_user_action
[
selected_user_action
][
game_state_counter
]
+=
1
user_action_per_agent_assistance
[
selected_agent_assistance_action
][
selected_user_action
]
+=
1
#update counter for user react time
# agent_assistance_per_react_time[selected_user_react_time][selected_agent_assistance_action] += 1
# attempt_counter_per_react_time[selected_user_react_time][attempt_counter-1] += 1
# game_state_counter_per_react_time[selected_user_react_time][game_state_counter] += 1
# agent_feedback_per_react_time[selected_user_react_time][selected_agent_feedback_action] += 1
#update counter for agent feedback
game_state_counter_per_agent_feedback
[
selected_agent_feedback_action
][
game_state_counter
]
+=
1
attempt_counter_per_agent_feedback
[
selected_agent_feedback_action
][
attempt_counter
-
1
]
+=
1
#update counter for agent assistance
game_state_counter_per_agent_assistance
[
selected_agent_assistance_action
][
game_state_counter
]
+=
1
attempt_counter_per_agent_assistance
[
selected_agent_assistance_action
][
attempt_counter
-
1
]
+=
1
# updates counters for simulation
# remap user_action index
...
...
@@ -357,35 +307,17 @@ def simulation(bn_model_user_action, var_user_action_target_action, bn_model_use
#update user models
bn_model_user_action
=
bn_functions
.
update_cpds_tables
(
bn_model_user_action
,
user_action_dynamic_variables
,
alpha_learning
)
bn_model_user_react_time
=
bn_functions
.
update_cpds_tables
(
bn_model_user_react_time
,
user_react_time_dynamic_variables
)
#update agent models
print
(
"
user_given_game_attempt:
"
,
bn_model_user_action
[
'
model
'
].
cpds
[
0
].
values
)
print
(
"
user_given_robot:
"
,
bn_model_user_action
[
'
model
'
].
cpds
[
5
].
values
)
print
(
"
game_user:
"
,
bn_model_user_action
[
'
model
'
].
cpds
[
3
].
values
)
print
(
"
attempt_user:
"
,
bn_model_user_action
[
'
model
'
].
cpds
[
2
].
values
)
#reset counter
user_action_per_game_state_attempt_counter_agent_assistance
=
[[[[
0
for
i
in
range
(
User_Action
.
counter
.
value
)]
for
l
in
range
(
Game_State
.
counter
.
value
)]
for
j
in
range
(
Attempt
.
counter
.
value
)]
for
k
in
range
(
Agent_Assistance
.
counter
.
value
)]
user_action_per_agent_assistance
=
[[
0
for
i
in
range
(
User_Action
.
counter
.
value
)]
for
j
in
range
(
Agent_Assistance
.
counter
.
value
)]
user_action_per_robot_feedback_robot_assistance
=
[[[
0
for
i
in
range
(
User_Action
.
counter
.
value
)]
for
j
in
range
(
Agent_Assistance
.
counter
.
value
)]
for
l
in
range
(
Agent_Feedback
.
counter
.
value
)
]
attempt_counter_per_user_action
=
[[
0
for
i
in
range
(
Attempt
.
counter
.
value
)]
for
j
in
range
(
User_Action
.
counter
.
value
)]
game_state_counter_per_user_action
=
[[
0
for
i
in
range
(
Game_State
.
counter
.
value
)]
for
j
in
range
(
User_Action
.
counter
.
value
)]
attempt_counter_per_react_time
=
[[
0
for
i
in
range
(
Attempt
.
counter
.
value
)]
for
j
in
range
(
User_React_time
.
counter
.
value
)]
game_state_counter_per_react_time
=
[[
0
for
i
in
range
(
Game_State
.
counter
.
value
)]
for
j
in
range
(
User_React_time
.
counter
.
value
)]
agent_feedback_per_react_time
=
[[
0
for
i
in
range
(
Agent_Feedback
.
counter
.
value
)]
for
j
in
range
(
User_React_time
.
counter
.
value
)]
agent_assistance_per_react_time
=
[[
0
for
i
in
range
(
Agent_Assistance
.
counter
.
value
)]
for
j
in
range
(
User_React_time
.
counter
.
value
)]
#for plots
n_correct_per_episode
[
e
]
=
correct_move_counter
n_wrong_per_episode
[
e
]
=
wrong_move_counter
...
...
@@ -408,57 +340,52 @@ def simulation(bn_model_user_action, var_user_action_target_action, bn_model_use
#############################################################################
agent_policy
=
generate_agent_assistance
(
preferred_assistance
=
2
,
agent_behaviour
=
"
help
"
,
n_game_state
=
Game_State
.
counter
.
value
,
n_attempt
=
Attempt
.
counter
.
value
,
alpha_action
=
0.5
)
# SIMULATION PARAMS
epochs
=
20
scaling_factor
=
1
# initialise the agent
bn_model_caregiver_assistance
=
bnlearn
.
import_DAG
(
'
/home/pal/Documents/Framework/bn_generative_model/bn_agent_model/agent_assistive_model.bif
'
)
bn_model_caregiver_feedback
=
None
#bnlearn.import_DAG('/home/pal/Documents/Framework/bn_generative_model/bn_agent_model/agent_feedback_model.bif')
bn_model_user_action
=
bnlearn
.
import_DAG
(
'
/home/pal/Documents/Framework/bn_generative_model/bn_persona_model/persona_model_test.bif
'
)
bn_model_user_react_time
=
bnlearn
.
import_DAG
(
'
/home/pal/Documents/Framework/bn_generative_model/bn_persona_model/user_react_time_model.bif
'
)
# initialise memory, attention and reactivity variables
persona_memory
=
0
;
persona_attention
=
0
;
persona_reactivity
=
1
;
# define state space struct for the irl algorithm
episode_instance
=
Episode
()
# DEFINITION OF THE MDP
# define state space struct for the irl algorithm
attempt
=
[
i
for
i
in
range
(
1
,
Attempt
.
counter
.
value
+
1
)]
# +1 (3,_,_) absorbing state
game_state
=
[
i
for
i
in
range
(
0
,
Game_State
.
counter
.
value
+
1
)]
user_action
=
[
i
for
i
in
range
(
-
1
,
User_Action
.
counter
.
value
-
1
)]
state_space
=
(
game_state
,
attempt
,
user_action
)
states_space_list
=
list
(
itertools
.
product
(
*
state_space
))
state_space_index
=
[
episode_instance
.
state_from_point_to_index
(
states_space_list
,
s
)
for
s
in
states_space_list
]
agent_assistance_action
=
[
i
for
i
in
range
(
Agent_Assistance
.
counter
.
value
)]
agent_feedback_action
=
[
i
for
i
in
range
(
Agent_Feedback
.
counter
.
value
)]
action_space
=
(
agent_feedback_action
,
agent_assistance_action
)
action_space_list
=
list
(
itertools
.
product
(
*
action_space
))
action_space_index
=
[
episode_instance
.
state_from_point_to_index
(
action_space_list
,
a
)
for
a
in
action_space_list
]
terminal_state
=
[(
Game_State
.
counter
.
value
,
i
,
user_action
[
j
])
for
i
in
range
(
1
,
Attempt
.
counter
.
value
+
1
)
for
j
in
range
(
len
(
user_action
))]
initial_state
=
(
1
,
1
,
0
)
#1. RUN THE SIMULATION WITH THE PARAMS SET BY THE CAREGIVER
game_performance_per_episode
,
react_time_per_episode
,
agent_assistance_per_episode
,
agent_feedback_per_episode
,
episodes_list
=
\
simulation
(
bn_model_user_action
=
bn_model_user_action
,
var_user_action_target_action
=
[
'
user_action
'
],
bn_model_user_react_time
=
bn_model_user_react_time
,
var_user_react_time_target_action
=
[
'
user_react_time
'
],
user_memory_name
=
"
memory
"
,
user_memory_value
=
persona_memory
,
user_attention_name
=
"
attention
"
,
user_attention_value
=
persona_attention
,
user_reactivity_name
=
"
reactivity
"
,
user_reactivity_value
=
persona_reactivity
,
task_progress_t0_name
=
"
game_state_t0
"
,
task_progress_t1_name
=
"
game_state_t1
"
,
game_attempt_t0_name
=
"
attempt_t0
"
,
game_attempt_t1_name
=
"
attempt_t1
"
,
agent_assistance_name
=
"
agent_assistance
"
,
agent_policy
=
agent_policy
,
state_space
=
states_space_list
,
action_space
=
action_space_list
,
epochs
=
epochs
,
task_complexity
=
5
,
max_attempt_per_object
=
4
,
alpha_learning
=
0.1
)
utils
.
plot2D_game_performance
(
"
/home/pal/Documents/Framework/bn_generative_model/results/user_performance.png
"
,
epochs
,
scaling_factor
,
game_performance_per_episode
)
utils
.
plot2D_assistance
(
"
/home/pal/Documents/Framework/bn_generative_model/results/agent_assistance.png
"
,
epochs
,
scaling_factor
,
agent_assistance_per_episode
)
# agent_policy = generate_agent_assistance(preferred_assistance=2, agent_behaviour="help", n_game_state=Game_State.counter.value, n_attempt=Attempt.counter.value, alpha_action=0.1)
# print(agent_policy)
#
# # SIMULATION PARAMS
# epochs = 20
# scaling_factor = 1
# # initialise the agent
# bn_model_user_action = bnlearn.import_DAG('/home/pal/Documents/Framework/bn_generative_model/bn_persona_model/persona_model_test.bif')
#
# # initialise memory, attention and reactivity variables
# persona_memory = 0;
# persona_attention = 0;
# persona_reactivity = 1;
#
# # define state space struct for the irl algorithm
# episode_instance = Episode()
# # DEFINITION OF THE MDP
# # define state space struct for the irl algorithm
# attempt = [i for i in range(1, Attempt.counter.value + 1)]
# # +1 (3,_,_) absorbing state
# game_state = [i for i in range(0, Game_State.counter.value + 1)]
# user_action = [i for i in range(-1, User_Action.counter.value - 1)]
# state_space = (game_state, attempt, user_action)
# states_space_list = list(itertools.product(*state_space))
# state_space_index = [episode_instance.state_from_point_to_index(states_space_list, s) for s in states_space_list]
# agent_assistance_action = [i for i in range(Agent_Assistance.counter.value)]
# agent_feedback_action = [i for i in range(Agent_Feedback.counter.value)]
# action_space = (agent_feedback_action, agent_assistance_action)
# action_space_list = list(itertools.product(*action_space))
# action_space_index = [episode_instance.state_from_point_to_index(action_space_list, a) for a in action_space_list]
# terminal_state = [(Game_State.counter.value, i, user_action[j]) for i in range(1, Attempt.counter.value + 1) for j in
# range(len(user_action))]
# initial_state = (1, 1, 0)
#
# #1. RUN THE SIMULATION WITH THE PARAMS SET BY THE CAREGIVER
#
#
# game_performance_per_episode, react_time_per_episode, agent_assistance_per_episode, agent_feedback_per_episode, episodes_list = \
# simulation(bn_model_user_action=bn_model_user_action, var_user_action_target_action=['user_action'],
# game_state_bn_name="game_state",
# attempt_bn_name="attempt",
# agent_assistance_bn_name="agent_assistance",
# agent_feedback_bn_name="agent_feedback",
# agent_policy=agent_policy,
# state_space=states_space_list, action_space=action_space_list,
# epochs=epochs, task_complexity=5, max_attempt_per_object=4, alpha_learning=0.1)
#
# utils.plot2D_game_performance("results/user_performance.png", epochs, scaling_factor, game_performance_per_episode)
# utils.plot2D_assistance("results/agent_assistance.png", epochs, scaling_factor, agent_assistance_per_episode)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment