From 6398c00be550ff47a7c67deaf81602c8118afe24 Mon Sep 17 00:00:00 2001 From: Kunal Kumar Sahoo Date: Tue, 6 Aug 2024 12:19:20 +0530 Subject: [PATCH 1/2] Fixed runtime issues of RLLib --- experiments/ppo_4x4grid.py | 1 + experiments/sb3_grid4x4.py | 30 ++++++------------------------ 2 files changed, 7 insertions(+), 24 deletions(-) diff --git a/experiments/ppo_4x4grid.py b/experiments/ppo_4x4grid.py index 5e75608b..fa8cbcce 100755 --- a/experiments/ppo_4x4grid.py +++ b/experiments/ppo_4x4grid.py @@ -20,6 +20,7 @@ if __name__ == "__main__": + print(os.getcwd()) ray.init() env_name = "4x4grid" diff --git a/experiments/sb3_grid4x4.py b/experiments/sb3_grid4x4.py index bb646fcc..a667db76 100644 --- a/experiments/sb3_grid4x4.py +++ b/experiments/sb3_grid4x4.py @@ -5,7 +5,6 @@ import numpy as np import supersuit as ss import traci -from pyvirtualdisplay.smartdisplay import SmartDisplay from stable_baselines3 import PPO from stable_baselines3.common.callbacks import EvalCallback from stable_baselines3.common.evaluation import evaluate_policy @@ -16,9 +15,8 @@ if __name__ == "__main__": - RESOLUTION = (3200, 1800) - env = sumo_rl.grid4x4(use_gui=True, out_csv_name="outputs/grid4x4/ppo_test", virtual_display=RESOLUTION) + env = sumo_rl.grid4x4(use_gui=False, out_csv_name="outputs/grid4x4/ppo_train") max_time = env.unwrapped.env.sim_max_time delta_time = env.unwrapped.env.delta_time @@ -26,7 +24,7 @@ print("Environment created") env = ss.pettingzoo_env_to_vec_env_v1(env) - env = ss.concat_vec_envs_v1(env, 2, num_cpus=1, base_class="stable_baselines3") + env = ss.concat_vec_envs_v1(env, 2, num_cpus=16, base_class="stable_baselines3") env = VecMonitor(env) model = PPO( @@ -34,16 +32,9 @@ env, verbose=3, gamma=0.95, - n_steps=256, - ent_coef=0.0905168, learning_rate=0.00062211, - vf_coef=0.042202, - max_grad_norm=0.9, - gae_lambda=0.99, - n_epochs=5, - clip_range=0.3, batch_size=256, - tensorboard_log="./logs/grid4x4/ppo_test", + tensorboard_log="./logs/grid4x4/ppo_train", ) print("Starting training") @@ -55,28 +46,19 @@ print(mean_reward) print(std_reward) + model.save('ppo_output') + # Maximum number of steps before reset, +1 because I'm scared of OBOE print("Starting rendering") num_steps = (max_time // delta_time) + 1 obs = env.reset() - if os.path.exists("temp"): - shutil.rmtree("temp") - - os.mkdir("temp") - # img = disp.grab() - # img.save(f"temp/img0.jpg") - img = env.render() for t in trange(num_steps): actions, _ = model.predict(obs, state=None, deterministic=False) obs, reward, done, info = env.step(actions) - img = env.render() - img.save(f"temp/img{t}.jpg") - - subprocess.run(["ffmpeg", "-y", "-framerate", "5", "-i", "temp/img%d.jpg", "output.mp4"]) + env.render() print("All done, cleaning up") - shutil.rmtree("temp") env.close() From b4de5473490b6715d9ff901e0ea755fcc5495687 Mon Sep 17 00:00:00 2001 From: Kunal Kumar Sahoo Date: Tue, 6 Aug 2024 12:22:36 +0530 Subject: [PATCH 2/2] Added an extra attribute to ignore route errors --- sumo_rl/environment/env.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sumo_rl/environment/env.py b/sumo_rl/environment/env.py index 46fd1dbe..f8a42c85 100755 --- a/sumo_rl/environment/env.py +++ b/sumo_rl/environment/env.py @@ -104,6 +104,7 @@ def __init__( sumo_warnings: bool = True, additional_sumo_cmd: Optional[str] = None, render_mode: Optional[str] = None, + ignore_route_errors: Optional[bool] = True ) -> None: """Initialize the environment.""" assert render_mode is None or render_mode in self.metadata["render_modes"], "Invalid render mode." @@ -138,6 +139,7 @@ def __init__( self.additional_sumo_cmd = additional_sumo_cmd self.add_system_info = add_system_info self.add_per_agent_info = add_per_agent_info + self.ignore_route_errors = ignore_route_errors self.label = str(SumoEnvironment.CONNECTION_LABEL) SumoEnvironment.CONNECTION_LABEL += 1 self.sumo = None @@ -227,6 +229,8 @@ def _start_simulation(self): self.disp = SmartDisplay(size=self.virtual_display) self.disp.start() print("Virtual display started.") + if self.ignore_route_errors: + sumo_cmd.append("--ignore-route-errors") if LIBSUMO: traci.start(sumo_cmd)