diff --git a/cognitive_game_env.py b/cognitive_game_env.py index c40d882202c0d4a60cea61c5cdda0de5c89996b6..317854363ae99182aad1a1af3c2aa9baae325efd 100644 --- a/cognitive_game_env.py +++ b/cognitive_game_env.py @@ -307,9 +307,9 @@ class CognitiveGame: :param trajs: generated by the expert :return: Nx3 feature map """ - max_attempt = self.n_solution*self.n_attempt - max_time = self.timeout*self.n_solution*self.n_attempt - N = self.n_solution * self.n_attempt * self.n_user_action + max_attempt = self.task_length*self.n_max_attempt + max_time = self.timeout*self.task_length*self.n_max_attempt + N = (self.task_length+1) * self.n_max_attempt * len(self.user_action) feat = np.ones([N, 4]) @@ -318,11 +318,17 @@ class CognitiveGame: for traj in trajs: for s1, a, s2 in traj._t: ix, iy, iz = self.state_index_to_point(s1) - feat[s1, 0] = 0 - feat[s1, 1] = 0#max_attempt*ix - feat[s1, 2] = ix - feat[s1, 3] = iy - + # feat[s1, 0] = 0 + # feat[s1, 1] = 0 + feat[s1, 0] = (ix) + feat[s1, 1] = (iy)#*(ix+1) + feat[s1, 2] = iz + feat[s1, 3] = a + # for index in range(len(self.action_space)): + # if index == a: + # feat[s1, 3+index] = 1 + # else: + # feat[s1, 3 + index] = 0 # for trans in traj: # if trans[0] == i: # current_react_time = trans[2]