-
Notifications
You must be signed in to change notification settings - Fork 2
/
main.py
110 lines (88 loc) · 2.3 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import gym
from keras.layers import Input
from keras.optimizers import Adam
from rl.agents import DDPGAgent
from rl.memory import SequentialMemory
from rl.random import OrnsteinUhlenbeckProcess
from models import actor, critic
############
# Setup
############
ENV_NAME = 'robot_env:robot-env-path-v0'
# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.shape[0]
################
# Parameters
################
# Training parameters
batch_size = 64
lr = 1e-3
max_episode_steps = 100
limit = 100000
nb_steps = 10000
# Agent parameters
num_steps_warmup_critic = 100
num_steps_warmup_actor = 100
gamma = 0.99
###############
# Models
###############
action_input = Input(shape=(nb_actions,), name='action_input')
observation_input = Input(shape=(batch_size,) + env.observation_space.shape, name='observation_input')
num_feat = 32
# print(env.observation_space.shape)
# print(nb_actions)
# assert False
actor = actor.build_actor(
batch_size=batch_size,
nb_actions=nb_actions,
env=env)
critic = critic.build_critic(
action_input=action_input,
observation_input=observation_input)
# Optimizer
opt = Adam(lr=lr, clipnorm=1.0)
# opt = Adam(lr=lr, clipnorm=0.01)
# Build and compile agent
memory = SequentialMemory(
limit=limit,
window_length=batch_size)
random_process = OrnsteinUhlenbeckProcess(
size=nb_actions,
theta=.15,
mu=0.,
sigma=.3)
agent = DDPGAgent(
nb_actions=nb_actions,
actor=actor,
critic=critic,
critic_action_input=action_input,
memory=memory,
nb_steps_warmup_critic=num_steps_warmup_critic,
nb_steps_warmup_actor=num_steps_warmup_actor,
random_process=random_process,
gamma=gamma,
target_model_update=1e-2)
agent.compile(opt, metrics=['mae'])
history = agent.fit(
env,
nb_steps=30000,
visualize=True,
verbose=0,
nb_max_episode_steps=max_episode_steps)
print(history)
# After training is done, we save the final weights.
agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
#%%
# Finally, evaluate our algorithm for 5 episodes.
agent.test(
env,
nb_episodes=5,
visualize=True,
nb_max_episode_steps=max_episode_steps)