aidudezzz · eakirtas · Sep 30, 2021 · Jul 16, 2021
diff --git a/examples/find_and_avoid/README.md b/examples/find_and_avoid/README.md
@@ -1,6 +1,6 @@
 # Find target & avoid obstacles
 
-NOTE: This agent do not converge
+:warning: NOTE: This agent do not converge well
 
 This is a typical find target and avoid obstacles task with a simple world
 configuration. For this task the E-puck robot is used, which is a compact mobile
@@ -21,4 +21,8 @@ gas and wheel).
     [implementation](https://github.com/philtabor/Actor-Critic-Methods-Paper-To-Code/tree/master/DDPG)
     which is presented in [Youtube
     video](https://www.youtube.com/watch?v=6Yd5WnYls_Y).
+
+|Trained Agent Showcase|Reward Per Episode Plot|
+|----------------------|-----------------------|
+|![image](https://github.com/KelvinYang0320/deepworlds/blob/dev/examples/find_and_avoid/doc/demo.gif)|![image](https://github.com/KelvinYang0320/deepworlds/blob/dev/examples/find_and_avoid/doc/trend.png)|
 
diff --git a/examples/find_and_avoid/controllers/robot/robot.py b/examples/find_and_avoid/controllers/robot/robot.py
@@ -30,12 +30,11 @@ def use_message_data(self, message):
         # Action 0 is turning
         wheel = float(message[0])
 
-        # Normalzie gas from [-1, 1] to [0.3, 1.3] to make robot always move forward
-        gas *= 4
-        # Clip it
-        # gas = np.clip(gas, -0.5, 4.0)
+        # Mapping gas from [-1, 1] to [0, 4] to make robot always move forward
+        gas = (gas+1)*2
+        gas = np.clip(gas, 0, 4.0)
 
-        # Clip turning rate to [-1, 1]
+        # Mapping turning rate from [-1, 1] to [-2, 2]
         wheel *= 2
         wheel = np.clip(wheel, -2, 2)
 
@@ -44,11 +43,10 @@ def use_message_data(self, message):
         self.motorSpeeds[1] = gas - wheel
 
         # Clip final motor speeds to [-4, 4] to be sure that motors get valid values
-        self.motorSpeeds = np.clip(self.motorSpeeds, -4, 4)
+        self.motorSpeeds = np.clip(self.motorSpeeds, 0, 6)
 
         # Apply motor speeds
-        self.leftMotor.setVelocity(self.motorSpeeds[0])
-        self.rightMotor.setVelocity(self.motorSpeeds[1])
+        self._setVelocity(self.motorSpeeds[0], self.motorSpeeds[1])
 
     def setup_rangefinders(self, n_rangefinders):
         # Sensor setup
@@ -65,12 +63,14 @@ def setup_motors(self):
         # Motor setup
         self.leftMotor = self.robot.getDevice('left wheel motor')
         self.rightMotor = self.robot.getDevice('right wheel motor')
+        self._setVelocity(0.0, 0.0)
+        self.motorSpeeds = [0.0, 0.0]
+
+    def _setVelocity(self, v1, v2):
         self.leftMotor.setPosition(float('inf'))
         self.rightMotor.setPosition(float('inf'))
-        self.leftMotor.setVelocity(0.0)
-        self.rightMotor.setVelocity(0.0)
-
-        self.motorSpeeds = [0.0, 0.0]
+        self.leftMotor.setVelocity(v1)
+        self.rightMotor.setVelocity(v2)
 
 
 robot_controller = FindTargetRobot(8)

diff --git a/examples/find_and_avoid/controllers/supervisor/checkConvergence.py b/examples/find_and_avoid/controllers/supervisor/checkConvergence.py
@@ -0,0 +1,28 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import time
+def moving_average(x, w):
+    return np.convolve(x, np.ones(w), 'valid') / w
+
+def Plot(text_path="./exports/", dest_path="./exports/"):
+    fp=open(text_path+"Episode-score.txt", "r")
+    lines = fp.read().splitlines()
+    scores = list(map(float, lines))
+    episode = list(range(1, 1+len(scores)))
+    plt.figure()
+    plt.title("Episode scores over episode")
+    plt.plot(episode, scores, label='Raw data')
+    SMA = moving_average(scores, 50)
+    plt.plot(SMA, label='SMA50')
+    plt.xlabel("episode")
+    plt.ylabel("episode score")
+
+    plt.legend()
+    plt.savefig(dest_path+'trend.png')
+    plt.close()
+
+if __name__ == '__main__':
+    while(1):
+
+        Plot()
+        time.sleep(60)
diff --git a/examples/find_and_avoid/controllers/supervisor/supervisor.py b/examples/find_and_avoid/controllers/supervisor/supervisor.py
@@ -6,7 +6,7 @@
 
 import utilities as utils
 from models.networks import DDPG
-
+import os
 OBSERVATION_SPACE = 10
 ACTION_SPACE = 2
 
@@ -26,12 +26,21 @@ def __init__(self, robot, target):
         self.target_name = target
         self.robot = self.getFromDef(robot)
         self.target = self.getFromDef(target)
-        self.findThreshold = 0.12
+        self.findThreshold = 0.05
         self.steps = 0
-        self.steps_threshold = 600
+        self.steps_threshold = 500
         self.message = []
         self.should_done = False
 
+        self.pre_distance = None
+        '''
+        Get other 2 intermediate targets when training the robot in small_world.wbt instead of small_world_easy.wbt.
+
+        self.mid1 = self.getFromDef("mid1")
+        self.mid2 = self.getFromDef("mid2")
+        '''
+        self.is_solved = False
+
     def get_default_observation(self):
         return [0 for i in range(OBSERVATION_SPACE)]
 
@@ -80,32 +89,42 @@ def get_reward(self, action):
             return 0
 
         rf_values = np.array(self.message[:8])
-
+        
         reward = 0
 
-        if self.steps > self.steps_threshold:
-            return -10
-
-        if utils.get_distance_from_target(self.robot,
-                                          self.target) < self.findThreshold:
-            return +10
-
-        if np.abs(action[1]) > 1.5 or np.abs(action[0]) > 1.5:
-            if self.steps > 10:
-                self.should_done = True
-            return -1
-
-        if np.max(rf_values) > 500:
-            if self.steps > 10:
-                self.should_done = True
-            return -1
-        elif np.max(rf_values) > 200:
-            return -0.5
-
-        # if (distance != 0):
-        #     reward = 0.1 * np.round((0.6 / distance), 1)
-
+        # # (1) Take too many steps
+        # if self.steps > self.steps_threshold:
+        #     return -10
         # reward -= (self.steps / self.steps_threshold)
+
+        # # (2) Reward according to distance
+        target_ = self.target
+
+        if self.pre_distance == None:
+            self.pre_distance = utils.get_distance_from_target(self.robot, target_)
+        else:
+            cur_distance = utils.get_distance_from_target(self.robot, target_)
+            reward = self.pre_distance - cur_distance
+            self.pre_distance = cur_distance
+
+        # # (3) Find the target
+        # if utils.get_distance_from_target(self.robot, self.target) < self.findThreshold:
+        #     reward += 5
+
+        # # (4) Action 1 (gas) or Action 0 (turning) should <= 1.5
+        # if np.abs(action[1]) > 1.5 or np.abs(action[0]) > 1.5:
+        #     if self.steps > 10:
+        #         self.should_done = True
+        #     return -1
+
+        # # (5) Stop or Punish the agent when the robot is getting to close to obstacle
+        # if np.max(rf_values) > 500:
+        #     if self.steps > 10:
+        #         self.should_done = True
+        #     return -1
+        # elif np.max(rf_values) > 200:
+        #     return -0.5
+
         return reward
 
     def is_done(self):
@@ -114,74 +133,106 @@ def is_done(self):
 
         if distance < self.findThreshold:
             print("======== + Solved + ========")
+            self.is_solved = True
             return True
 
         if self.steps > self.steps_threshold or self.should_done:
-
             return True
 
         return False
 
     def reset(self):
         self.steps = 0
         self.should_done = False
+        self.pre_distance = None
+        self.is_solved = False
 
         return super().reset()
 
     def get_info(self):
         pass
 
 
-supervisor_pre = FindTargetSupervisor('robot', 'target')
-supervisor_env = KeyboardPrinter(supervisor_pre)
-agent = DDPG(lr_actor=0.00025,
-             lr_critic=0.00025,
-             input_dims=[10],
-             gamma=0.99,
-             tau=0.001,
-             env=supervisor_env,
-             batch_size=256,
-             layer1_size=400,
-             layer2_size=300,
-             n_actions=2,
-             load_models=False,
-             save_dir='./models/saved/ddpg/')
-
-score_history = []
-
-np.random.seed(0)
-
-for i in range(1, 500):
-    done = False
-    score = 0
-    obs = list(map(float, supervisor_env.reset()))
-
-    first_iter = True
-    if i % 250 == 0:
-        print("================= TESTING =================")
-        while not done:
-            act = agent.choose_action_test(obs).tolist()
-            new_state, _, done, _ = supervisor_env.step(act)
-            obs = list(map(float, new_state))
+def create_path(path):
+    try:
+        os.makedirs(path)
+    except OSError:
+        print ("Creation of the directory %s failed" % path)     
     else:
-        print("================= TRAINING =================")
-        while not done:
-            if (not first_iter):
-                act = agent.choose_action_train(obs).tolist()
-            else:
-                first_iter = False
-                act = [0, 0]
-
-            new_state, reward, done, info = supervisor_env.step(act)
-            agent.remember(obs, act, reward, new_state, int(done))
-            agent.learn()
-            score += reward
-
-            obs = list(map(float, new_state))
-
-    score_history.append(score)
-    print("===== Episode", i, "score %.2f" % score,
-          "100 game average %.2f" % np.mean(score_history[-100:]))
-
-    # if i % 100 == 0:
-    #     agent.save_models()
+        print ("Successfully created the directory %s " % path)
+
+if __name__ == '__main__':
+    create_path("./models/saved/ddpg/")
+    create_path("./exports/")
+
+    supervisor_pre = FindTargetSupervisor('robot', 'target')
+    supervisor_env = KeyboardPrinter(supervisor_pre)
+    agent = DDPG(lr_actor=0.00025,
+                lr_critic=0.00025,
+                input_dims=[10],
+                gamma=0.99,
+                tau=0.001,
+                env=supervisor_env,
+                batch_size=256,
+                layer1_size=400,
+                layer2_size=300,
+                layer3_size=200,
+                n_actions=2,
+                load_models=False,
+                save_dir='./models/saved/ddpg/')
+    # # Load from checkpoint
+    # agent.load_models(lr_critic=0.00025, lr_actor=0.00025, 
+    #                 input_dims=[10], 
+    #                 layer1_size=400,
+    #                 layer2_size=300, 
+    #                 layer3_size=200, 
+    #                 n_actions=2, 
+    #                 load_dir='./models/saved/ddpg/')
+    score_history = []
+
+    np.random.seed(0)
+    N_episode = 600
+    for i in range(N_episode+1):
+        done = False
+        score = 0
+        obs = list(map(float, supervisor_env.reset()))
+
+        first_iter = True
+
+        if score_history == [] or np.mean(score_history[-50:])<0.5 or score_history[-1]<0.65:
+            print("================= TRAINING =================")
+            while not done:
+                if (not first_iter):
+                    act = agent.choose_action_train(obs).tolist()
+                else:
+                    first_iter = False
+                    act = [0, 0]
+
+                new_state, reward, done, info = supervisor_env.step(act)
+                agent.remember(obs, act, reward, new_state, int(done))
+                agent.learn()
+                score += reward
+
+                obs = list(map(float, new_state))
+        else:
+            print("================= TESTING =================")
+            while not done:
+                if (not first_iter):
+                    act = agent.choose_action_test(obs).tolist()
+                else:
+                    first_iter = False
+                    act = [0, 0]
+
+                new_state, _, done, _ = supervisor_env.step(act)
+                obs = list(map(float, new_state))
+
+
+        score_history.append(score)
+        fp = open("./exports/Episode-score.txt","a")
+        fp.write(str(score)+'\n')
+        fp.close()
+        print("===== Episode", i, "score %.2f" % score,
+            "50 game average %.2f" % np.mean(score_history[-50:]))
+
+        if supervisor_pre.is_solved == True:
+            agent.save_models()
diff --git a/examples/find_and_avoid/doc/demo.gif b/examples/find_and_avoid/doc/demo.gif
diff --git a/examples/find_and_avoid/doc/trend.png b/examples/find_and_avoid/doc/trend.png