from cfg.env.env to cfg.env.wrapper + update configs.md

Eclectic-Sheep · Sep 18, 2023 · 78b1edf · 78b1edf
1 parent ee1607a
commit 78b1edf
Show file tree

Hide file tree

Showing 19 changed files with 89 additions and 34 deletions.
diff --git a/howto/configs.md b/howto/configs.md
@@ -97,10 +97,10 @@ defaults:
   - buffer: default.yaml
   - checkpoint: default.yaml
   - env: default.yaml
-  - exp: ???
   - fabric: default.yaml
-  - hydra: default.yaml
   - metric: default.yaml
+  - hydra: default.yaml
+  - exp: ???
 
 num_threads: 1
 total_steps: ???
@@ -124,10 +124,6 @@ cnn_keys:
 mlp_keys:
   encoder: []
   decoder: ${mlp_keys.encoder}
-
-# Buffer
-buffer:
-  memmap: True
 ```
 
 ### Algorithms
@@ -317,7 +313,50 @@ The environment configs can be found under the `sheeprl/configs/env` folders. Sh
 * [MineRL (v0.4.4)](https://minerl.readthedocs.io/en/v0.4.4/)
 * [MineDojo (v0.1.0)](https://docs.minedojo.org/)
 
-In this way one can easily try out the overall framework with standard RL environments.
+In this way one can easily try out the overall framework with standard RL environments. The `default.yaml` config contains all the environment parameters shared by (possibly) all the environments:
+
+```yaml
+id: ???
+num_envs: 4
+frame_stack: 1
+sync_env: False
+screen_size: 64
+action_repeat: 1
+grayscale: False
+clip_rewards: False
+capture_video: True
+frame_stack_dilation: 1
+max_episode_steps: null
+reward_as_observation: False
+```
+
+Every custom environment must then "inherit" from this default config, override the particular parameters and define the the `wrapper` field, which is the one that will be directly instantiated at runtime. The `wrapper` field must define all the specific parameters to be passed to the `_target_` function when the wrapper will be instantiated. Take for example the `atari.yaml` config:
+
+```yaml
+defaults:
+  - default
+  - _self_
+
+# Override from `default` config
+action_repeat: 4
+id: PongNoFrameskip-v4
+max_episode_steps: 27000
+
+# Wrapper to be instantiated
+wrapper:
+  _target_: gymnasium.wrappers.AtariPreprocessing  # https://gymnasium.farama.org/api/wrappers/misc_wrappers/#gymnasium.wrappers.AtariPreprocessing
+  env:
+    _target_: gymnasium.make
+    id: ${env.id}
+    render_mode: rgb_array
+  noop_max: 30
+  terminal_on_life_loss: False
+  frame_skip: ${env.action_repeat}
+  screen_size: ${env.screen_size}
+  grayscale_obs: ${env.grayscale}
+  scale_obs: False
+  grayscale_newaxis: True
+```
 
 > **Warning**
 >

diff --git a/sheeprl/algos/ppo/ppo.py b/sheeprl/algos/ppo/ppo.py
@@ -105,7 +105,7 @@ def train(
 
 @register_algorithm()
 def main(fabric: Fabric, cfg: DictConfig):
-    if "minedojo" in cfg.env.env._target_.lower():
+    if "minedojo" in cfg.env.wrapper._target_.lower():
         raise ValueError(
             "MineDojo is not currently supported by PPO agent, since it does not take "
             "into consideration the action masks provided by the environment, but needed "

diff --git a/sheeprl/algos/ppo/ppo_decoupled.py b/sheeprl/algos/ppo/ppo_decoupled.py
@@ -544,7 +544,7 @@ def main(fabric: Fabric, cfg: DictConfig):
             "`python sheeprl.py exp=ppo_decoupled fabric.devices=2 ...`"
         )
 
-    if "minedojo" in cfg.env.env._target_.lower():
+    if "minedojo" in cfg.env.wrapper._target_.lower():
         raise ValueError(
             "MineDojo is not currently supported by PPO agent, since it does not take "
             "into consideration the action masks provided by the environment, but needed "

diff --git a/sheeprl/algos/sac/sac_decoupled.py b/sheeprl/algos/sac/sac_decoupled.py
@@ -446,7 +446,7 @@ def main(fabric: Fabric, cfg: DictConfig):
             "`python sheeprl.py exp=sac_decoupled fabric.devices=2 ...`"
         )
 
-    if "minedojo" in cfg.env.env._target_.lower():
+    if "minedojo" in cfg.env.wrapper._target_.lower():
         raise ValueError(
             "MineDojo is not currently supported by PPO agent, since it does not take "
             "into consideration the action masks provided by the environment, but needed "

diff --git a/sheeprl/algos/sac_ae/sac_ae.py b/sheeprl/algos/sac_ae/sac_ae.py
@@ -128,7 +128,7 @@ def train(
 
 @register_algorithm()
 def main(fabric: Fabric, cfg: DictConfig):
-    if "minedojo" in cfg.env.env._target_.lower():
+    if "minedojo" in cfg.env.wrapper._target_.lower():
         raise ValueError(
             "MineDojo is not currently supported by SAC-AE agent, since it does not take "
             "into consideration the action masks provided by the environment, but needed "

diff --git a/sheeprl/configs/env/atari.yaml b/sheeprl/configs/env/atari.yaml
@@ -2,11 +2,14 @@ defaults:
   - default
   - _self_
 
-id: PongNoFrameskip-v4
+# Override from `default` config
 action_repeat: 4
+id: PongNoFrameskip-v4
 max_episode_steps: 27000
-env:
-  _target_: gymnasium.wrappers.AtariPreprocessing
+
+# Wrapper to be instantiated
+wrapper:
+  _target_: gymnasium.wrappers.AtariPreprocessing # https://gymnasium.farama.org/api/wrappers/misc_wrappers/#gymnasium.wrappers.AtariPreprocessing
   env:
     _target_: gymnasium.make
     id: ${env.id}

diff --git a/sheeprl/configs/env/default.yaml b/sheeprl/configs/env/default.yaml
@@ -1,6 +1,7 @@
+id: ???
 num_envs: 4
-sync_env: False
 frame_stack: 1
+sync_env: False
 screen_size: 64
 action_repeat: 1
 grayscale: False

diff --git a/sheeprl/configs/env/diambra.yaml b/sheeprl/configs/env/diambra.yaml
@@ -2,12 +2,14 @@ defaults:
   - default
   - _self_
 
+# Override from `default` config
 id: doapp
-action_repeat: 1
 frame_stack: 4
 sync_env: True
+action_repeat: 1
 
-env:
+# Wrapper to be instantiated
+wrapper:
   _target_: sheeprl.envs.diambra.DiambraWrapper
   id: ${env.id}
   action_space: discrete

diff --git a/sheeprl/configs/env/dmc.yaml b/sheeprl/configs/env/dmc.yaml
@@ -2,11 +2,13 @@ defaults:
   - default
   - _self_
 
+# Override from `default` config
 id: walker_walk
 action_repeat: 1
 max_episode_steps: 1000
 
-env:
+# Wrapper to be instantiated
+wrapper:
   _target_: sheeprl.envs.dmc.DMCWrapper
   id: ${env.id}
   width: ${env.screen_size}

diff --git a/sheeprl/configs/env/dummy.yaml b/sheeprl/configs/env/dummy.yaml
@@ -2,8 +2,10 @@ defaults:
   - default
   - _self_
 
+# Override from `default` config
 id: discrete_dummy
 
-env:
+# Wrapper to be instantiated
+wrapper:
   _target_: sheeprl.utils.env.get_dummy_env
   id: ${env.id}
diff --git a/sheeprl/configs/env/gym.yaml b/sheeprl/configs/env/gym.yaml
@@ -2,10 +2,12 @@ defaults:
   - default
   - _self_
 
+# Override from `default` config
 id: CartPole-v1
 mask_velocities: False
 
-env:
+# Wrapper to be instantiated
+wrapper:
   _target_: gymnasium.make
   id: ${env.id}
   render_mode: rgb_array
diff --git a/sheeprl/configs/env/minecraft.yaml b/sheeprl/configs/env/minecraft.yaml
@@ -2,8 +2,8 @@ defaults:
   - default
   - _self_
 
-min_pitch: -60
 max_pitch: 60
-break_speed_multiplier: 100
+min_pitch: -60
+sticky_jump: 10
 sticky_attack: 30
-sticky_jump: 10
+break_speed_multiplier: 100
diff --git a/sheeprl/configs/env/minedojo.yaml b/sheeprl/configs/env/minedojo.yaml
@@ -2,10 +2,12 @@ defaults:
   - minecraft
   - _self_
 
+# Override from `minecraft` config
 id: open-ended
 action_repeat: 1
 
-env:
+# Wrapper to be instantiated
+wrapper:
   _target_: sheeprl.envs.minedojo.MineDojoWrapper
   id: ${env.id}
   height: ${env.screen_size}

diff --git a/sheeprl/configs/env/minerl.yaml b/sheeprl/configs/env/minerl.yaml
@@ -2,10 +2,12 @@ defaults:
   - minecraft
   - _self_
 
+# Override from `minecraft` config
 id: custom_navigate
 action_repeat: 1
 
-env:
+# Wrapper to be instantiated
+wrapper:
   _target_: sheeprl.envs.minerl.MineRLWrapper
   id: ${env.id}
   height: ${env.screen_size}

diff --git a/sheeprl/configs/exp/dreamer_v3_L_doapp.yaml b/sheeprl/configs/exp/dreamer_v3_L_doapp.yaml
@@ -14,7 +14,7 @@ env:
   id: doapp
   num_envs: 8
   frame_stack: 1
-  env:
+  wrapper:
     diambra_settings:
       characters: Kasumi
 

diff --git a/sheeprl/configs/exp/dreamer_v3_L_doapp_128px_gray_combo_discrete.yaml b/sheeprl/configs/exp/dreamer_v3_L_doapp_128px_gray_combo_discrete.yaml
@@ -18,7 +18,7 @@ env:
   frame_stack: 1
   screen_size: 128
   reward_as_observation: True
-  env:
+  wrapper:
     attack_but_combination: True
     diambra_settings:
       characters: Kasumi
@@ -81,4 +81,4 @@ algo:
 
 # Metric
 metric:
-  log_every: 10000
+  log_every: 10000
diff --git a/sheeprl/configs/exp/dreamer_v3_L_navigate.yaml b/sheeprl/configs/exp/dreamer_v3_L_navigate.yaml
@@ -14,7 +14,7 @@ env:
   num_envs: 4
   id: custom_navigate
   reward_as_observation: True
-  env:
+  wrapper:
     multihot_inventory: False
 
 # Checkpoint

diff --git a/sheeprl/configs/exp/dreamer_v3_dmc_walker_walk.yaml b/sheeprl/configs/exp/dreamer_v3_dmc_walker_walk.yaml
@@ -20,7 +20,7 @@ env:
   num_envs: 1
   max_episode_steps: 1000
   id: walker_walk
-  env:
+  wrapper:
     from_vectors: True
     from_pixels: True
 
@@ -52,4 +52,4 @@ algo:
 
 # Metric
 metric:
-  log_every: 5000
+  log_every: 5000
diff --git a/sheeprl/utils/env.py b/sheeprl/utils/env.py
@@ -81,11 +81,11 @@ def thunk() -> gym.Env:
             env_spec = ""
 
         instantiate_kwargs = {}
-        if "seed" in cfg.env.env:
+        if "seed" in cfg.env.wrapper:
             instantiate_kwargs["seed"] = seed
-        if "rank" in cfg.env.env:
+        if "rank" in cfg.env.wrapper:
             instantiate_kwargs["rank"] = rank + vector_env_idx
-        env = hydra.utils.instantiate(cfg.env.env, **instantiate_kwargs)
+        env = hydra.utils.instantiate(cfg.env.wrapper, **instantiate_kwargs)
 
         # action repeat
         if (