[Envs] Add MPE tasks

Signed-off-by: Matteo Bettini <[email protected]>
facebookresearch · Oct 6, 2023 · e764b02 · e764b02
1 parent 0b08681
commit e764b02
Show file tree

Hide file tree

Showing 25 changed files with 331 additions and 13 deletions.
diff --git a/README.md b/README.md
@@ -205,13 +205,12 @@ determine the training strategy. Here is a table with the currently implemented
 challenge to solve.
 They differ based on many aspects, here is a table with the current environments in BenchMARL
 
-| Enviromnent | Tasks                                 | Cooperation               | Global state | Reward function               | 
-|-------------|---------------------------------------|---------------------------|--------------|-------------------------------|
-| [VMAS](https://github.com/proroklab/VectorizedMultiAgentSimulator) | [TBC](benchmarl/conf/task/vmas)       | Cooperative + Competitive | No           | Shared + Independent + Global |  
-| [SMAC](https://github.com/oxwhirl/smac)   | [TBC](benchmarl/conf/task/smacv2)     | Cooperative               | Yes          | Global                        |  
-| [SMACv2](https://github.com/oxwhirl/smacv2) | [TBC](benchmarl/conf/task/smacv2)     | Cooperative               | Yes          | Global                        |  
-| [MPE](https://github.com/openai/multiagent-particle-envs)     | [TBC](benchmarl/conf/task/pettingzoo) | Cooperative + Competitive | Yes          | Shared + Independent          |   
-| [SISL](https://github.com/sisl/MADRL)    | [TBC](benchmarl/conf/task/pettingzoo)       | Cooperative               | No           | Shared                        |  
+| Enviromnent | Tasks                               | Cooperation               | Global state | Reward function               | 
+|-------------|-------------------------------------|---------------------------|--------------|-------------------------------|
+| [VMAS](https://github.com/proroklab/VectorizedMultiAgentSimulator) | [5](benchmarl/conf/task/vmas)       | Cooperative + Competitive | No           | Shared + Independent + Global |  
+| [SMACv2](https://github.com/oxwhirl/smacv2) | [15](benchmarl/conf/task/smacv2)    | Cooperative               | Yes          | Global                        |  
+| [MPE](https://github.com/openai/multiagent-particle-envs)     | [8](benchmarl/conf/task/pettingzoo) | Cooperative + Competitive | Yes          | Shared + Independent          |   
+| [SISL](https://github.com/sisl/MADRL)    | [3](benchmarl/conf/task/pettingzoo) | Cooperative               | No           | Shared                        |  
 
 > [!NOTE]  
 > BenchMARL uses the [TorchRL MARL API](https://github.com/pytorch/rl/issues/1463) for grouping agents.

diff --git a/benchmarl/conf/task/pettingzoo/multiwalker.yaml b/benchmarl/conf/task/pettingzoo/multiwalker.yaml
@@ -3,6 +3,24 @@ defaults:
   - pettingzoo_multiwalker_config
 
 task: "multiwalker_v9"
+# number of bipedal walker agents in environment
 n_walkers: 3
+# whether reward is distributed among all agents or allocated individually
 shared_reward: False
 max_cycles: 500
+# noise applied to neighbors and package positional observations
+position_noise: 0.001
+#  noise applied to neighbors and package rotational observations
+angle_noise: 0.001
+# reward received is forward_reward * change in position of the package
+forward_reward: 1.0
+# reward applied when an agent falls
+fall_reward: -10
+# reward applied to each walker if they fail to carry the package to the right edge of the terrain
+terminate_reward: -100
+# If True (default), a single walker falling causes all agents to be done, and they all receive an additional terminate_reward. If False, then only the fallen agent(s) receive fall_reward, and the rest of the agents are not done i.e. the environment continues.
+terminate_on_fall: true
+# Remove a walker when it falls (only works when terminate_on_fall is False)
+remove_on_fall: true
+# length of terrain in number of steps
+terrain_length: 200
diff --git a/benchmarl/conf/task/pettingzoo/pursuit.yaml b/benchmarl/conf/task/pettingzoo/pursuit.yaml
@@ -0,0 +1,32 @@
+defaults:
+  - _self_
+  - pettingzoo_pursuit_config
+
+task: "pursuit_v4"
+# Size of environment world space
+x_size: 16
+y_size: 16
+# Whether the rewards should be distributed among all agents
+shared_reward: true
+# Number of evaders
+n_evaders: 30
+#  Number of pursuers
+n_pursuers: 8
+# Size of the box around the agent that the agent observes.
+obs_range: 7
+# Number pursuers required around an evader to be considered caught
+n_catch: 2
+# Toggles if evaders can move or not
+freeze_evaders: false
+# Reward for ‘tagging’, or being single evader.
+tag_reward: 0.01
+# Reward added when a pursuer or pursuers catch an evade
+cetch_reward: 5.0
+#Reward to agent added in each step
+urgency_reward: -0.1
+# Toggles whether evader is removed when surrounded, or when n_catch pursuers are on top of evader
+surround: true
+# Size of box (from center, in proportional units) which agents can randomly spawn into the environment world.
+# Default is 1.0, which means they can spawn anywhere on the map. A value of 0 means all agents spawn in the center.
+constraint_window: 1.0
+max_cycles: 500
diff --git a/benchmarl/conf/task/pettingzoo/simple_adversary.yaml b/benchmarl/conf/task/pettingzoo/simple_adversary.yaml
@@ -0,0 +1,7 @@
+defaults:
+  - _self_
+  - pettingzoo_simple_adversary_config
+
+task: "simple_adversary_v3"
+N: 2
+max_cycles: 100
diff --git a/benchmarl/conf/task/pettingzoo/simple_crypto.yaml b/benchmarl/conf/task/pettingzoo/simple_crypto.yaml
@@ -0,0 +1,6 @@
+defaults:
+  - _self_
+  - pettingzoo_simple_crypto_config
+
+task: "simple_crypto_v3"
+max_cycles: 100
diff --git a/benchmarl/conf/task/pettingzoo/simple_push.yaml b/benchmarl/conf/task/pettingzoo/simple_push.yaml
@@ -0,0 +1,6 @@
+defaults:
+  - _self_
+  - pettingzoo_simple_push_config
+
+task: "simple_push_v3"
+max_cycles: 100
diff --git a/benchmarl/conf/task/pettingzoo/simple_reference.yaml b/benchmarl/conf/task/pettingzoo/simple_reference.yaml
@@ -0,0 +1,7 @@
+defaults:
+  - _self_
+  - pettingzoo_simple_reference_config
+
+task: "simple_reference_v3"
+max_cycles: 100
+local_ratio: 0.5
diff --git a/benchmarl/conf/task/pettingzoo/simple_speaker_listener.yaml b/benchmarl/conf/task/pettingzoo/simple_speaker_listener.yaml
@@ -0,0 +1,6 @@
+defaults:
+  - _self_
+  - pettingzoo_simple_speaker_listener_config
+
+task: "simple_speaker_listener_v4"
+max_cycles: 100
diff --git a/benchmarl/conf/task/pettingzoo/simple_spread.yaml b/benchmarl/conf/task/pettingzoo/simple_spread.yaml
@@ -0,0 +1,8 @@
+defaults:
+  - _self_
+  - pettingzoo_simple_spread_config
+
+task: "simple_spread_v3"
+max_cycles: 100
+N: 3
+local_ratio: 0.5
diff --git a/benchmarl/conf/task/pettingzoo/simple_tag.yaml b/benchmarl/conf/task/pettingzoo/simple_tag.yaml
@@ -3,7 +3,7 @@ defaults:
   - pettingzoo_simple_tag_config
 
 task: "simple_tag_v3"
-num_good: 1
+num_good: 2
 num_adversaries: 3
 num_obstacles: 2
 max_cycles: 100
diff --git a/benchmarl/conf/task/pettingzoo/simple_world_comm.yaml b/benchmarl/conf/task/pettingzoo/simple_world_comm.yaml
@@ -0,0 +1,11 @@
+defaults:
+  - _self_
+  - pettingzoo_simple_world_comm_config
+
+task: "simple_world_comm_v3"
+num_good: 2
+num_adversaries: 4
+num_obstacles: 1
+num_food: 2
+num_forests: 2
+max_cycles: 100
diff --git a/benchmarl/conf/task/pettingzoo/waterworld.yaml b/benchmarl/conf/task/pettingzoo/waterworld.yaml
@@ -0,0 +1,25 @@
+defaults:
+  - _self_
+  - pettingzoo_waterworld_config
+
+task: "waterworld_v4"
+max_cycles: 500
+n_pursuers: 2
+n_evaders: 5
+n_poisons: 10
+n_obstacles: 1
+n_coop: 1
+n_sensors: 30
+sensor_range: 0.2
+radius: 0.015
+obstacle_radius: 0.1
+pursuer_max_accel: 0.5
+pursuer_speed: 0.2
+evader_speed: 0.1
+poison_speed: 0.1
+poison_reward: -1.0
+food_reward: 10.0
+encounter_reward: 0.01
+thrust_penalty: -0.5
+local_ratio: 1.0
+speed_features: True
diff --git a/benchmarl/environments/__init__.py b/benchmarl/environments/__init__.py
@@ -12,7 +12,18 @@
 
 
 from .pettingzoo.multiwalker import TaskConfig as MultiwalkerConfig
+from .pettingzoo.pursuit import TaskConfig as PursuitConfig
+from .pettingzoo.simple_adverasary import TaskConfig as SimpleAdversaryConfig
+from .pettingzoo.simple_crypto import TaskConfig as SimpleCryptoConfig
+from .pettingzoo.simple_push import TaskConfig as SimplePushConfig
+from .pettingzoo.simple_reference import TaskConfig as SimpleReferenceConfig
+from .pettingzoo.simple_speaker_listener import (
+    TaskConfig as SimpleSpeakerListenerConfig,
+)
+from .pettingzoo.simple_spread import TaskConfig as SimpleSpreadConfig
 from .pettingzoo.simple_tag import TaskConfig as SimpleTagConfig
+from .pettingzoo.simple_world_comm import TaskConfig as SimpleWorldComm
+from .pettingzoo.waterworld import TaskConfig as WaterworldConfig
 from .vmas.balance import TaskConfig as BalanceConfig
 from .vmas.navigation import TaskConfig as NavigationConfig
 from .vmas.sampling import TaskConfig as SamplingConfig
@@ -26,5 +37,14 @@
     "vmas_transport_config": TransportConfig,
     "vmas_wheel_config": WheelConfig,
     "pettingzoo_multiwalker_config": MultiwalkerConfig,
+    "pettingzoo_pursuit_config": PursuitConfig,
+    "pettingzoo_waterworld_config": WaterworldConfig,
+    "pettingzoo_simple_adversary_config": SimpleAdversaryConfig,
+    "pettingzoo_simple_crypto_config": SimpleCryptoConfig,
+    "pettingzoo_simple_push_config": SimplePushConfig,
+    "pettingzoo_simple_reference_config": SimpleReferenceConfig,
+    "pettingzoo_simple_speaker_listener_config": SimpleSpeakerListenerConfig,
+    "pettingzoo_simple_spread_config": SimpleSpreadConfig,
     "pettingzoo_simple_tag_config": SimpleTagConfig,
+    "pettingzoo_simple_world_comm_config": SimpleWorldComm,
 }
diff --git a/benchmarl/environments/pettingzoo/common.py b/benchmarl/environments/pettingzoo/common.py
@@ -10,7 +10,14 @@
 
 class PettingZooTask(Task):
     MULTIWALKER = None
+    SIMPLE_ADVERSARY = None
+    SIMPLE_CRYPTO = None
+    SIMPLE_PUSH = None
+    SIMPLE_REFERENCE = None
+    SIMPLE_SPEAKER_LISTENER = None
+    SIMPLE_SPREAD = None
     SIMPLE_TAG = None
+    SIMPLE_WORLD_COMM = None
 
     def get_env_fun(
         self,
@@ -33,17 +40,48 @@ def get_env_fun(
         )
 
     def supports_continuous_actions(self) -> bool:
-        if self in {PettingZooTask.MULTIWALKER, PettingZooTask.SIMPLE_TAG}:
+        if self in {
+            PettingZooTask.MULTIWALKER,
+            PettingZooTask.SIMPLE_TAG,
+            PettingZooTask.SIMPLE_ADVERSARY,
+            PettingZooTask.SIMPLE_CRYPTO,
+            PettingZooTask.SIMPLE_PUSH,
+            PettingZooTask.SIMPLE_REFERENCE,
+            PettingZooTask.SIMPLE_SPEAKER_LISTENER,
+            PettingZooTask.SIMPLE_SPREAD,
+            PettingZooTask.SIMPLE_TAG,
+            PettingZooTask.SIMPLE_WORLD_COMM,
+        }:
             return True
         return False
 
     def supports_discrete_actions(self) -> bool:
-        if self in {PettingZooTask.SIMPLE_TAG}:
+        if self in {
+            PettingZooTask.SIMPLE_TAG,
+            PettingZooTask.SIMPLE_ADVERSARY,
+            PettingZooTask.SIMPLE_CRYPTO,
+            PettingZooTask.SIMPLE_PUSH,
+            PettingZooTask.SIMPLE_REFERENCE,
+            PettingZooTask.SIMPLE_SPEAKER_LISTENER,
+            PettingZooTask.SIMPLE_SPREAD,
+            PettingZooTask.SIMPLE_TAG,
+            PettingZooTask.SIMPLE_WORLD_COMM,
+        }:
             return True
         return False
 
     def has_state(self) -> bool:
-        if self in {PettingZooTask.SIMPLE_TAG}:
+        if self in {
+            PettingZooTask.SIMPLE_TAG,
+            PettingZooTask.SIMPLE_ADVERSARY,
+            PettingZooTask.SIMPLE_CRYPTO,
+            PettingZooTask.SIMPLE_PUSH,
+            PettingZooTask.SIMPLE_REFERENCE,
+            PettingZooTask.SIMPLE_SPEAKER_LISTENER,
+            PettingZooTask.SIMPLE_SPREAD,
+            PettingZooTask.SIMPLE_TAG,
+            PettingZooTask.SIMPLE_WORLD_COMM,
+        }:
             return True
         return False
 

diff --git a/benchmarl/environments/pettingzoo/multiwalker.py b/benchmarl/environments/pettingzoo/multiwalker.py
@@ -7,3 +7,11 @@ class TaskConfig:
     n_walkers: int = MISSING
     shared_reward: bool = MISSING
     max_cycles: int = MISSING
+    position_noise: float = MISSING
+    angle_noise: float = MISSING
+    forward_reward: float = MISSING
+    fall_reward: float = MISSING
+    terminate_reward: float = MISSING
+    terminate_on_fall: bool = MISSING
+    remove_on_fall: bool = MISSING
+    terrain_length: float = MISSING
diff --git a/benchmarl/environments/pettingzoo/pursuit.py b/benchmarl/environments/pettingzoo/pursuit.py
@@ -0,0 +1,21 @@
+from dataclasses import dataclass, MISSING
+
+
+@dataclass
+class TaskConfig:
+    task: str = MISSING
+    n_walkers: int = MISSING
+    shared_reward: bool = MISSING
+    max_cycles: int = MISSING
+    x_size: int = MISSING
+    y_size: int = MISSING
+    n_evaders: int = MISSING
+    n_pursuers: int = MISSING
+    obs_range: int = MISSING
+    n_catch: int = MISSING
+    freeze_evaders: bool = MISSING
+    tag_reward: float = MISSING
+    catch_reward: float = MISSING
+    urgency_reward: float = MISSING
+    surround: bool = MISSING
+    constraint_window: float = MISSING
diff --git a/benchmarl/environments/pettingzoo/simple_adverasary.py b/benchmarl/environments/pettingzoo/simple_adverasary.py
@@ -0,0 +1,8 @@
+from dataclasses import dataclass, MISSING
+
+
+@dataclass
+class TaskConfig:
+    task: str = MISSING
+    N: int = MISSING
+    max_cycles: int = MISSING
diff --git a/benchmarl/environments/pettingzoo/simple_crypto.py b/benchmarl/environments/pettingzoo/simple_crypto.py
@@ -0,0 +1,7 @@
+from dataclasses import dataclass, MISSING
+
+
+@dataclass
+class TaskConfig:
+    task: str = MISSING
+    max_cycles: int = MISSING
diff --git a/benchmarl/environments/pettingzoo/simple_push.py b/benchmarl/environments/pettingzoo/simple_push.py
@@ -0,0 +1,8 @@
+from dataclasses import dataclass, MISSING
+
+
+@dataclass
+class TaskConfig:
+    task: str = MISSING
+    max_cycles: int = MISSING
+    local_ratio: float = MISSING
diff --git a/benchmarl/environments/pettingzoo/simple_reference.py b/benchmarl/environments/pettingzoo/simple_reference.py
@@ -0,0 +1,7 @@
+from dataclasses import dataclass, MISSING
+
+
+@dataclass
+class TaskConfig:
+    task: str = MISSING
+    max_cycles: int = MISSING
diff --git a/benchmarl/environments/pettingzoo/simple_speaker_listener.py b/benchmarl/environments/pettingzoo/simple_speaker_listener.py
@@ -0,0 +1,7 @@
+from dataclasses import dataclass, MISSING
+
+
+@dataclass
+class TaskConfig:
+    task: str = MISSING
+    max_cycles: int = MISSING
diff --git a/benchmarl/environments/pettingzoo/simple_spread.py b/benchmarl/environments/pettingzoo/simple_spread.py
@@ -0,0 +1,9 @@
+from dataclasses import dataclass, MISSING
+
+
+@dataclass
+class TaskConfig:
+    task: str = MISSING
+    max_cycles: int = MISSING
+    local_ratio: float = MISSING
+    N: int = MISSING
diff --git a/benchmarl/environments/pettingzoo/simple_world_comm.py b/benchmarl/environments/pettingzoo/simple_world_comm.py
@@ -0,0 +1,12 @@
+from dataclasses import dataclass, MISSING
+
+
+@dataclass
+class TaskConfig:
+    task: str = MISSING
+    max_cycles: int = MISSING
+    num_good: int = MISSING
+    num_adversaries: int = MISSING
+    num_obstacles: int = MISSING
+    num_food: int = MISSING
+    num_forests: int = MISSING
diff --git a/benchmarl/environments/pettingzoo/waterworld.py b/benchmarl/environments/pettingzoo/waterworld.py
@@ -0,0 +1,26 @@
+from dataclasses import dataclass, MISSING
+
+
+@dataclass
+class TaskConfig:
+    task: str = MISSING
+    max_cycles: int = MISSING
+    n_pursuers: int = MISSING
+    n_evaders: int = MISSING
+    n_poisons: int = MISSING
+    n_obstacles: int = MISSING
+    n_coop: int = MISSING
+    n_sensors: int = MISSING
+    sensor_range: float = MISSING
+    radius: float = MISSING
+    obstacle_radius: float = MISSING
+    pursuer_max_accel: float = MISSING
+    pursuer_speed: float = MISSING
+    evader_speed: float = MISSING
+    poison_speed: float = MISSING
+    poison_reward: float = MISSING
+    food_reward: float = MISSING
+    encounter_rewar: float = MISSING
+    thrust_penalty: float = MISSING
+    local_ratio: float = MISSING
+    speed_features: bool = MISSING