-
Notifications
You must be signed in to change notification settings - Fork 0
/
z-experiments.py
137 lines (103 loc) · 4.79 KB
/
z-experiments.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import argparse
import os
import time
import numpy as np
import torch
from matplotlib import pyplot as plt
from multiagent import scenarios
from multiagent.environment import MultiAgentEnv
# for making a gif
import imageio
import glob
from MADDPG import MADDPG
# Simple experiments, vary the arguments of simple_tag and see what happens.
## Method to iteratively run experiments with different params
## Create dictionary with experiment args to call
def default_args():
"returns default args for simple_tag"
parser = argparse.ArgumentParser()
parser.add_argument('--env', type=str, default="simple_tag_4g_1b", help='name of the environment',
choices=['simple_adversary', 'simple_crypto', 'simple_push', 'simple_reference',
'simple_speaker_listener', 'simple_spread', 'simple_tag',
'simple_world_comm', 'simple_tag_colab'])
parser.add_argument('--folder', type=str, default='2', help='name of the folder where model is saved')
parser.add_argument('--episode-length', type=int, default=50, help='steps per episode')
parser.add_argument('--episode-num', type=int, default=30, help='total number of episode')
args = parser.parse_args()
return args
def run_experiment():
# list params/ args
pass
def create_gif(input_folder, output_gif):
# Gather all frames, in order
frame_files = sorted(glob.glob(os.path.join(input_folder, '*.png')))
# Read frames into a list
frames = [imageio.imread(frame_file) for frame_file in frame_files]
# Save frames as a GIF
imageio.mimsave(output_gif, frames)
def evaluate_model(args=None, save_video=True):
if args is None:
args = default_args()
scenario = scenarios.load(f'{args.env}.py').Scenario()
world = scenario.make_world()
env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation)
# create env
scenario = scenarios.load(f'{args.env}.py').Scenario()
world = scenario.make_world()
env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation)
# get dimension info about observation and action
obs_dim_list = []
for obs_space in env.observation_space: # continuous observation
obs_dim_list.append(obs_space.shape[0]) # Box
act_dim_list = []
for act_space in env.action_space: # discrete action
act_dim_list.append(act_space.n) # Discrete
maddpg = MADDPG(obs_dim_list, act_dim_list, 0, 0, 0)
model_dir = os.path.join('results', args.env, args.folder)
assert os.path.exists(model_dir)
data = torch.load(os.path.join(model_dir, 'model.pt'))
for agent, actor_parameter in zip(maddpg.agents, data):
agent.actor.load_state_dict(actor_parameter)
print(f'MADDPG load model.pt from {model_dir}')
frame_dir = os.path.join(model_dir, "frames")
# Create directories if they don't exist
os.makedirs(frame_dir, exist_ok=True)
total_reward = np.zeros((args.episode_num, env.n)) # reward of each episode
for episode in range(args.episode_num):
obs = env.reset()
# record reward of each agent in this episode
episode_reward = np.zeros((args.episode_length, env.n))
for step in range(args.episode_length): # interact with the env for an episode
actions = maddpg.select_action(obs)
next_obs, rewards, dones, infos = env.step(actions)
episode_reward[step] = rewards
if save_video:
frame = env.render(mode='rgb_array')
name = f'episode_{episode}_step_{step}.png'
plt.imsave(os.path.join(model_dir, "frames", name), frame)
else:
env.render()
time.sleep(0.02)
obs = next_obs
# episode finishes
# calculate cumulative reward of each agent in this episode
cumulative_reward = episode_reward.sum(axis=0)
total_reward[episode] = cumulative_reward
print(f'episode {episode + 1}: cumulative reward: {cumulative_reward}')
# Create gif
create_gif(os.path.join(model_dir, "frames"), os.path.join(model_dir, "animation.gif"))
# all episodes performed, evaluate finishes
fig, ax = plt.subplots()
x = range(1, args.episode_num + 1)
for agent in range(env.n):
ax.plot(x, total_reward[:, agent], label=agent)
# ax.plot(x, get_running_reward(total_reward[:, agent]))
ax.legend()
ax.set_xlabel('episode')
ax.set_ylabel('reward')
title = f'evaluating result of maddpg solve {args.env}'
ax.set_title(title)
plt.savefig(os.path.join(model_dir, title))
if __name__ == "__main__":
args = default_args()
create_gif(os.path.join("results/simple_tag_4g_1b/2", "frames"), os.path.join("results/simple_tag_4g_1b/2", "animation.gif"))