Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix convergeness preliminarily #47

Merged
merged 1 commit into from
Sep 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion examples/find_and_avoid/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Find target & avoid obstacles

NOTE: This agent do not converge
:warning: NOTE: This agent do not converge well

This is a typical find target and avoid obstacles task with a simple world
configuration. For this task the E-puck robot is used, which is a compact mobile
Expand All @@ -21,4 +21,8 @@ gas and wheel).
[implementation](https://github.com/philtabor/Actor-Critic-Methods-Paper-To-Code/tree/master/DDPG)
which is presented in [Youtube
video](https://www.youtube.com/watch?v=6Yd5WnYls_Y).

|Trained Agent Showcase|Reward Per Episode Plot|
|----------------------|-----------------------|
|![image](https://github.com/KelvinYang0320/deepworlds/blob/dev/examples/find_and_avoid/doc/demo.gif)|![image](https://github.com/KelvinYang0320/deepworlds/blob/dev/examples/find_and_avoid/doc/trend.png)|

24 changes: 12 additions & 12 deletions examples/find_and_avoid/controllers/robot/robot.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,11 @@ def use_message_data(self, message):
# Action 0 is turning
wheel = float(message[0])

# Normalzie gas from [-1, 1] to [0.3, 1.3] to make robot always move forward
gas *= 4
# Clip it
# gas = np.clip(gas, -0.5, 4.0)
# Mapping gas from [-1, 1] to [0, 4] to make robot always move forward
gas = (gas+1)*2
gas = np.clip(gas, 0, 4.0)

# Clip turning rate to [-1, 1]
# Mapping turning rate from [-1, 1] to [-2, 2]
wheel *= 2
wheel = np.clip(wheel, -2, 2)

Expand All @@ -44,11 +43,10 @@ def use_message_data(self, message):
self.motorSpeeds[1] = gas - wheel

# Clip final motor speeds to [-4, 4] to be sure that motors get valid values
self.motorSpeeds = np.clip(self.motorSpeeds, -4, 4)
self.motorSpeeds = np.clip(self.motorSpeeds, 0, 6)

# Apply motor speeds
self.leftMotor.setVelocity(self.motorSpeeds[0])
self.rightMotor.setVelocity(self.motorSpeeds[1])
self._setVelocity(self.motorSpeeds[0], self.motorSpeeds[1])

def setup_rangefinders(self, n_rangefinders):
# Sensor setup
Expand All @@ -65,12 +63,14 @@ def setup_motors(self):
# Motor setup
self.leftMotor = self.robot.getDevice('left wheel motor')
self.rightMotor = self.robot.getDevice('right wheel motor')
self._setVelocity(0.0, 0.0)
self.motorSpeeds = [0.0, 0.0]

def _setVelocity(self, v1, v2):
self.leftMotor.setPosition(float('inf'))
self.rightMotor.setPosition(float('inf'))
self.leftMotor.setVelocity(0.0)
self.rightMotor.setVelocity(0.0)

self.motorSpeeds = [0.0, 0.0]
self.leftMotor.setVelocity(v1)
self.rightMotor.setVelocity(v2)


robot_controller = FindTargetRobot(8)
Expand Down
28 changes: 28 additions & 0 deletions examples/find_and_avoid/controllers/supervisor/checkConvergence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import matplotlib.pyplot as plt
import numpy as np
import time
def moving_average(x, w):
return np.convolve(x, np.ones(w), 'valid') / w

def Plot(text_path="./exports/", dest_path="./exports/"):
fp=open(text_path+"Episode-score.txt", "r")
lines = fp.read().splitlines()
scores = list(map(float, lines))
episode = list(range(1, 1+len(scores)))
plt.figure()
plt.title("Episode scores over episode")
plt.plot(episode, scores, label='Raw data')
SMA = moving_average(scores, 50)
plt.plot(SMA, label='SMA50')
plt.xlabel("episode")
plt.ylabel("episode score")

plt.legend()
plt.savefig(dest_path+'trend.png')
plt.close()

if __name__ == '__main__':
while(1):

Plot()
time.sleep(60)
209 changes: 130 additions & 79 deletions examples/find_and_avoid/controllers/supervisor/supervisor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import utilities as utils
from models.networks import DDPG

import os
OBSERVATION_SPACE = 10
ACTION_SPACE = 2

Expand All @@ -26,12 +26,21 @@ def __init__(self, robot, target):
self.target_name = target
self.robot = self.getFromDef(robot)
self.target = self.getFromDef(target)
self.findThreshold = 0.12
self.findThreshold = 0.05
self.steps = 0
self.steps_threshold = 600
self.steps_threshold = 500
self.message = []
self.should_done = False

self.pre_distance = None
'''
Get other 2 intermediate targets when training the robot in small_world.wbt instead of small_world_easy.wbt.

self.mid1 = self.getFromDef("mid1")
self.mid2 = self.getFromDef("mid2")
'''
self.is_solved = False

def get_default_observation(self):
return [0 for i in range(OBSERVATION_SPACE)]

Expand Down Expand Up @@ -80,32 +89,42 @@ def get_reward(self, action):
return 0

rf_values = np.array(self.message[:8])

reward = 0

if self.steps > self.steps_threshold:
return -10

if utils.get_distance_from_target(self.robot,
self.target) < self.findThreshold:
return +10

if np.abs(action[1]) > 1.5 or np.abs(action[0]) > 1.5:
if self.steps > 10:
self.should_done = True
return -1

if np.max(rf_values) > 500:
if self.steps > 10:
self.should_done = True
return -1
elif np.max(rf_values) > 200:
return -0.5

# if (distance != 0):
# reward = 0.1 * np.round((0.6 / distance), 1)

# # (1) Take too many steps
# if self.steps > self.steps_threshold:
# return -10
# reward -= (self.steps / self.steps_threshold)

# # (2) Reward according to distance
target_ = self.target

if self.pre_distance == None:
self.pre_distance = utils.get_distance_from_target(self.robot, target_)
else:
cur_distance = utils.get_distance_from_target(self.robot, target_)
reward = self.pre_distance - cur_distance
self.pre_distance = cur_distance

# # (3) Find the target
# if utils.get_distance_from_target(self.robot, self.target) < self.findThreshold:
# reward += 5

# # (4) Action 1 (gas) or Action 0 (turning) should <= 1.5
# if np.abs(action[1]) > 1.5 or np.abs(action[0]) > 1.5:
# if self.steps > 10:
# self.should_done = True
# return -1

# # (5) Stop or Punish the agent when the robot is getting to close to obstacle
# if np.max(rf_values) > 500:
# if self.steps > 10:
# self.should_done = True
# return -1
# elif np.max(rf_values) > 200:
# return -0.5

return reward

def is_done(self):
Expand All @@ -114,74 +133,106 @@ def is_done(self):

if distance < self.findThreshold:
print("======== + Solved + ========")
self.is_solved = True
return True

if self.steps > self.steps_threshold or self.should_done:

return True

return False

def reset(self):
self.steps = 0
self.should_done = False
self.pre_distance = None
self.is_solved = False

return super().reset()

def get_info(self):
pass


supervisor_pre = FindTargetSupervisor('robot', 'target')
supervisor_env = KeyboardPrinter(supervisor_pre)
agent = DDPG(lr_actor=0.00025,
lr_critic=0.00025,
input_dims=[10],
gamma=0.99,
tau=0.001,
env=supervisor_env,
batch_size=256,
layer1_size=400,
layer2_size=300,
n_actions=2,
load_models=False,
save_dir='./models/saved/ddpg/')

score_history = []

np.random.seed(0)

for i in range(1, 500):
done = False
score = 0
obs = list(map(float, supervisor_env.reset()))

first_iter = True
if i % 250 == 0:
print("================= TESTING =================")
while not done:
act = agent.choose_action_test(obs).tolist()
new_state, _, done, _ = supervisor_env.step(act)
obs = list(map(float, new_state))
def create_path(path):
try:
os.makedirs(path)
except OSError:
print ("Creation of the directory %s failed" % path)
else:
print("================= TRAINING =================")
while not done:
if (not first_iter):
act = agent.choose_action_train(obs).tolist()
else:
first_iter = False
act = [0, 0]

new_state, reward, done, info = supervisor_env.step(act)
agent.remember(obs, act, reward, new_state, int(done))
agent.learn()
score += reward

obs = list(map(float, new_state))

score_history.append(score)
print("===== Episode", i, "score %.2f" % score,
"100 game average %.2f" % np.mean(score_history[-100:]))

# if i % 100 == 0:
# agent.save_models()
print ("Successfully created the directory %s " % path)

if __name__ == '__main__':
create_path("./models/saved/ddpg/")
create_path("./exports/")

supervisor_pre = FindTargetSupervisor('robot', 'target')
supervisor_env = KeyboardPrinter(supervisor_pre)
agent = DDPG(lr_actor=0.00025,
lr_critic=0.00025,
input_dims=[10],
gamma=0.99,
tau=0.001,
env=supervisor_env,
batch_size=256,
layer1_size=400,
layer2_size=300,
layer3_size=200,
n_actions=2,
load_models=False,
save_dir='./models/saved/ddpg/')
# # Load from checkpoint
# agent.load_models(lr_critic=0.00025, lr_actor=0.00025,
# input_dims=[10],
# layer1_size=400,
# layer2_size=300,
# layer3_size=200,
# n_actions=2,
# load_dir='./models/saved/ddpg/')
score_history = []

np.random.seed(0)
N_episode = 600
for i in range(N_episode+1):
done = False
score = 0
obs = list(map(float, supervisor_env.reset()))

first_iter = True

if score_history == [] or np.mean(score_history[-50:])<0.5 or score_history[-1]<0.65:
print("================= TRAINING =================")
while not done:
if (not first_iter):
act = agent.choose_action_train(obs).tolist()
else:
first_iter = False
act = [0, 0]

new_state, reward, done, info = supervisor_env.step(act)
agent.remember(obs, act, reward, new_state, int(done))
agent.learn()
score += reward

obs = list(map(float, new_state))
else:
print("================= TESTING =================")
while not done:
if (not first_iter):
act = agent.choose_action_test(obs).tolist()
else:
first_iter = False
act = [0, 0]

new_state, _, done, _ = supervisor_env.step(act)
obs = list(map(float, new_state))


score_history.append(score)
fp = open("./exports/Episode-score.txt","a")
fp.write(str(score)+'\n')
fp.close()
print("===== Episode", i, "score %.2f" % score,
"50 game average %.2f" % np.mean(score_history[-50:]))

if supervisor_pre.is_solved == True:
agent.save_models()
Binary file added examples/find_and_avoid/doc/demo.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added examples/find_and_avoid/doc/trend.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading