From b4d9fdba69c1f8233e0e77d410ef4c124ce77bb0 Mon Sep 17 00:00:00 2001
From: Antonio Andriella <aandriella@iri.upc.edu>
Date: Tue, 27 Oct 2020 08:29:10 +0100
Subject: [PATCH] add policy_load and get_state_action methods

---
 src/robot_behaviour/robot_reproducer.py | 28 +++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/src/robot_behaviour/robot_reproducer.py b/src/robot_behaviour/robot_reproducer.py
index f70596a..8733120 100644
--- a/src/robot_behaviour/robot_reproducer.py
+++ b/src/robot_behaviour/robot_reproducer.py
@@ -7,12 +7,14 @@ facial expression and gesture. Every time we check if the action
 import rospy
 import random
 import ast
+import pickle
+import numpy as np
 from robot_behaviour.face_reproducer import Face
 from robot_behaviour.speech_reproducer import Speech
 from robot_behaviour.gesture_reproducer import Gesture
 
 class Robot:
-  def __init__(self, speech, sentences_file, face=None, gesture=None):
+  def __init__(self, speech, sentences_file, action_policy_filename=None, face=None, gesture=None):
     '''
     :param speech: instance of class Speech
     :param sentences_file: the file where all the sentences are stored
@@ -23,6 +25,7 @@ class Robot:
     self.sentences = self.load_sentences(sentences_file)
     self.face = face
     self.gesture = gesture
+    self.action_policy = self.load_robot_policy(action_policy_filename)
 
     self.action = {
       "instruction": self.instruction,
@@ -52,10 +55,31 @@ class Robot:
       "neutral" : self.neutral
     }
 
+
+  def load_robot_policy(self, learned_policy_filename):
+    with open(learned_policy_filename, "rb") as f:
+      loaded_policy = pickle.load(f)
+      return loaded_policy
+
+  def get_irl_state_action(self, state_index, epsilon=0.1):
+    action = 0
+    print("Select it between the following:", self.action_policy[state_index])
+    if random.random() < epsilon:
+      new_list = (self.action_policy[state_index])
+      best_action_index = np.argmax(self.action_policy[state_index])
+      new_list[best_action_index] = 0
+      action = np.argmax(new_list)
+    else:
+       action = np.argmax(self.action_policy[state_index])
+    return action
+
+  def get_random_state_action(self):
+    return random.randint(0, 6)
+
+
   def send_to_rest(self):
     self.gesture.initial_pos()
 
-
   def load_sentences(self, file):
     file = open(file, "r")
     contents = file.read()
-- 
GitLab