diff --git a/episode.py b/episode.py
new file mode 100644
index 0000000000000000000000000000000000000000..dde545312e0ce1f5948413342f0770858f2c5920
--- /dev/null
+++ b/episode.py
@@ -0,0 +1,294 @@
+"""
+Episodes representing expert demonstrations and automated generation
+thereof.
+"""
+
+from Environment import Environment
+
+import numpy as np
+from itertools import chain
+import itertools
+import os
+import time
+import sys
+
+class Episode:
+    """
+    A episode consisting of states, corresponding actions, and outcomes.
+
+    Args:
+        transitions: The transitions of this episode as an array of
+            tuples `(state_from, action, state_to)`. Note that `state_to` of
+            an entry should always be equal to `state_from` of the next
+            entry.
+    """
+
+    def __init__(self, states=[]):
+        self._t = list()
+        for s in states:
+            self._t.append(tuple(s))
+
+    def transition(self, state_from, action, state_to):
+        self._t.append((state_from, action, state_to))
+
+    def transitions(self):
+        """
+        The transitions of this episode.
+
+        Returns:
+            All transitions in this episode as array of tuples
+            `(state_from, action, state_to)`.
+        """
+        return self._t
+
+    def states(self):
+        """
+        The states visited in this episode.
+
+        Returns:
+            All states visited in this episode as iterator in the order
+            they are visited. If a state is being visited multiple times,
+            the iterator will return the state multiple times according to
+            when it is visited.
+        """
+        return map(lambda x: x[0], chain(self._t, [(self._t[-1][2], 0, 0)]))
+
+    def __repr__(self):
+        return "EpisodeGenerator({})".format(repr(self._t))
+
+    def __str__(self):
+        return "{}".format(self._t)
+
+
+def generate_episode(world, policy, start, final):
+    """
+    Generate a single episode.
+
+    Args:
+        world: The world for which the episode should be generated.
+        policy: A function (state: Integer) -> (action: Integer) mapping a
+            state to an action, specifying which action to take in which
+            state. This function may return different actions for multiple
+            invokations with the same state, i.e. it may make a
+            probabilistic decision and will be invoked anew every time a
+            (new or old) state is visited (again).
+        start: The starting state (as Integer index).
+        final: A collection of terminal states. If a episode reaches a
+            terminal state, generation is complete and the episode is
+            returned.
+
+    Returns:
+        A generated Episode instance adhering to the given arguments.
+    """
+
+    state = start
+
+    episode = []
+    while state not in final:
+        action = policy(state)
+
+        next_s = range(world.n_states)
+        next_p = world.p_transition[state, :, action]
+
+        next_state = np.random.choice(next_s, p=next_p)
+
+        episode += [(state, action, next_state)]
+        state = next_state
+
+    return Episode(episode)
+
+
+
+
+def policy_adapter(policy):
+    """
+    A policy adapter for deterministic policies.
+
+    Adapts a deterministic policy given as array or map
+    `policy[state] -> action` for the episode-generation functions.
+
+    Args:
+        policy: The policy as map/array
+            `policy[state: Integer] -> action: Integer`
+            representing the policy function p(state).
+
+    Returns:
+        A function `(state: Integer) -> action: Integer` acting out the
+        given policy.
+    """
+    return lambda state: policy[state]
+
+
+def stochastic_policy_adapter(policy):
+    """
+    A policy adapter for stochastic policies.
+
+    Adapts a stochastic policy given as array or map
+    `policy[state, action] -> probability` for the episode-generation
+    functions.
+
+    Args:
+        policy: The stochastic policy as map/array
+            `policy[state: Integer, action: Integer] -> probability`
+            representing the probability distribution p(action | state) of
+            an action given a state.
+
+    Returns:
+        A function `(state: Integer) -> action: Integer` acting out the
+        given policy, choosing an action randomly based on the distribution
+        defined by the given policy.
+    """
+    return lambda state: np.random.choice([*range(policy.shape[1])], p=policy[state, :])
+
+
+def get_states(states, initial_state):
+    states_list = list(itertools.product(*states))
+    states_list.insert(0, initial_state)
+    return states_list
+
+
+def point_to_index(point, states):
+  return states.index(tuple(point))
+
+
+def state_from_index_to_coord(state_tuple, index):
+    return state_tuple[index]
+
+def load_episodes(file):
+    '''
+    It returns the episodes related to the saved file
+    :param file:
+    :param episode: look at main.py
+    :param sol_per_pop: look at main.py
+    :return: a list of episodes
+    '''
+    print("LOADING...")
+
+    trajs = list()
+    with open(file, "rb") as f:
+        traj = np.load(f, allow_pickle=True)
+        for t in range(len(traj)):
+            trajs.append(Episode(traj[t]))
+            print("loaded traj ", t)
+        f.close()
+    for t in trajs:
+        print(t._t)
+    return trajs
+
+
+def generate_statistics(state_list, action_space, episodes):
+    '''
+    This function computes the state x state x action matrix that
+    corresponds to the transition table we will use later
+    '''
+    print(state_list)
+    n_states = len(state_list)
+    n_actions = len(action_space)
+
+    #create a matrix state x state x action
+    table = np.zeros(shape=(n_states, n_states,  n_actions))
+    start_time = time.time()
+    s1, s2, a = range(n_states), range(n_states), range(n_actions)
+    for s_from in s1:
+        for act in a:
+            for s_to in s2:
+                #convert to coord
+                s_from_coord = state_from_index_to_coord(state_list, s_from)
+                s_to_coord = state_from_index_to_coord(state_list, s_to)
+                #print("from:", s_from_coord," to:", s_to_coord)
+                #print()
+                for traj in episodes:
+                    if (s_from, act, s_to) in traj._t:
+                        table[s_from, s_to, act] += 1
+    elapsed_time = time.time()-start_time
+    print("processing time:{}".format(elapsed_time))
+    return table
+
+
+def compute_probabilities(transition_matrix, terminal_states):
+    """
+    We compute the transitions for each state_from -> action -> state_to
+    :param transition_matrix:  matrix that has shape n_states x n_states x action
+    :return:
+    """
+    n_state_from, n_state_to, n_actions = transition_matrix.shape
+    transition_matrix_with_prob = np.zeros((n_state_from, n_state_to, n_actions))
+
+    for s_from in range(n_state_from):
+        s_in_prob = list()
+        sum_over_prob = 0
+        #get the episode from s_from to all the possible state_to given the 5 actions
+        #get all the occurrence on each column and compute the probabilities
+        #remember for each column the sum of probabilities has to be 1
+        for a in range(n_actions):
+            trans_state_from = list(zip(*transition_matrix[s_from]))[a]
+            #needs to be done to avoid nan (0/0)
+
+            sum_over_prob = sum(trans_state_from) if sum(trans_state_from)>0 else sys.float_info.min
+
+            s_in_prob.append(list(map(lambda x: x/sum_over_prob, trans_state_from)))
+
+        transition_matrix_with_prob[s_from][:][:] = np.asarray(s_in_prob).T
+
+    for state in terminal_states:
+        transition_matrix_with_prob[state][state][0] = 1
+
+    return transition_matrix_with_prob
+
+
+def read_trans_matrix(file):
+    print("Loading trans matrix...")
+    fileinfo = os.stat(file)
+    trans_matrix = list()
+    with open(file, "rb") as f:
+        trans_matrix = np.load(f, allow_pickle=True)
+
+    #trans_matrix_reshaped = np.asarray(trans).reshape(n_states, n_states, n_actions)
+    print("Done")
+    return trans_matrix
+
+
+def main():
+
+    file_path = "/home/aandriella/Documents/Codes/MY_FRAMEWORK/BN_GenerativeModel/results/1/episodes.npy"
+    episodes = load_episodes(file_path)
+    initial_state = (1, 1, 0)
+    n_max_attempt = 5
+    task_length = 6
+    # Environment setup for RL agent assistance
+    action_space = ['LEV_0', 'LEV_1', 'LEV_2', 'LEV_3', 'LEV_4', 'LEV_5']
+    user_actions_state = [-1, 0, 1]
+    final_states = [(task_length, a, u)   for a in range(1, n_max_attempt) for u in range(-1, 2) ]
+    # defintion of state space
+    attempt = [i for i in range(1, n_max_attempt)]
+    game_state = [i for i in range(1, task_length+1)]
+    user_actions = [i for i in (user_actions_state)]
+    states_space = (game_state, attempt, user_actions)  # , task_levels)
+
+    env = Environment(action_space, initial_state, final_states, user_actions, states_space,
+                                 task_length, n_max_attempt, timeout=0, n_levels_assistance=6)
+    #
+    trans_matrix = generate_statistics(env.states, env.action_space, episodes)
+    path_trans_matrix_occ = "/home/aandriella/Documents/Codes/MY_FRAMEWORK/BN_GenerativeModel/results/1/trans_matrix_occ.npy"
+    path_trans_matrix_prob = "/home/aandriella/Documents/Codes/MY_FRAMEWORK/BN_GenerativeModel/results/1/trans_matrix_prob.npy"
+    terminal_states = [env.point_to_index(state) for state in final_states]
+
+
+    # save the episode on a file
+    with open(path_trans_matrix_occ, "ab") as f:
+        np.save(f, trans_matrix)
+        f.close()
+    trans_matrix_occ = read_trans_matrix(path_trans_matrix_occ)
+    print(trans_matrix_occ.shape)
+    trans_matrix_prob = compute_probabilities(trans_matrix_occ, terminal_states)
+    # save the episode on a file
+    with open(path_trans_matrix_prob, "ab") as f:
+        np.save(f, trans_matrix_prob)
+        f.close()
+
+    #prob = read_trans_matrix(path_trans_matrix_prob, 0, 0)
+
+
+
+if __name__ == "__main__":
+    main()