Update

[ghstack-poisoned]
pytorch · Dec 2, 2024 · 0f6187a · 0f6187a
1 parent 5535a05
commit 0f6187a
Show file tree

Hide file tree

Showing 15 changed files with 693 additions and 69 deletions.
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -28,3 +28,7 @@ vmas
 onnxscript
 onnxruntime
 onnx
+plotly
+igraph
+transformers
+datasets
diff --git a/docs/source/_static/img/rollout-llm.png b/docs/source/_static/img/rollout-llm.png
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -105,6 +105,7 @@ Intermediate
    tutorials/dqn_with_rnn
    tutorials/rb_tutorial
    tutorials/export
+   tutorials/beam_search_with_gpt
 
 Advanced
 --------

diff --git a/test/mocking_classes.py b/test/mocking_classes.py
@@ -1776,14 +1776,18 @@ def __init__(self):
             tensor=Unbounded(3),
             non_tensor=NonTensor(shape=()),
         )
+        self._saved_obs_spec = self.observation_spec.clone()
         self.state_spec = Composite(
             non_tensor=NonTensor(shape=()),
         )
+        self._saved_state_spec = self.state_spec.clone()
         self.reward_spec = Unbounded(1)
+        self._saved_full_reward_spec = self.full_reward_spec.clone()
         self.action_spec = Unbounded(1)
+        self._saved_full_action_spec = self.full_action_spec.clone()
 
     def _reset(self, tensordict):
-        data = self.observation_spec.zero()
+        data = self._saved_obs_spec.zero()
         data.set_non_tensor("non_tensor", 0)
         data.update(self.full_done_spec.zero())
         return data
@@ -1792,10 +1796,10 @@ def _step(
         self,
         tensordict: TensorDictBase,
     ) -> TensorDictBase:
-        data = self.observation_spec.zero()
+        data = self._saved_obs_spec.zero()
         data.set_non_tensor("non_tensor", tensordict["non_tensor"] + 1)
         data.update(self.full_done_spec.zero())
-        data.update(self.full_reward_spec.zero())
+        data.update(self._saved_full_reward_spec.zero())
         return data
 
     def _set_seed(self, seed: Optional[int]):

diff --git a/test/test_env.py b/test/test_env.py
@@ -3526,8 +3526,13 @@ def test_single_env_spec():
     assert env.input_spec.is_in(env.input_spec_unbatched.zeros(env.shape))
 
 
-def test_auto_spec():
-    env = CountingEnv()
+@pytest.mark.parametrize("env_type", [CountingEnv, EnvWithMetadata])
+def test_auto_spec(env_type):
+    if env_type is EnvWithMetadata:
+        obs_vals = ["tensor", "non_tensor"]
+    else:
+        obs_vals = "observation"
+    env = env_type()
     td = env.reset()
 
     policy = lambda td, action_spec=env.full_action_spec.clone(): td.update(
@@ -3550,7 +3555,7 @@ def test_auto_spec():
         shape=env.full_state_spec.shape, device=env.full_state_spec.device
     )
     env._action_keys = ["action"]
-    env.auto_specs_(policy, tensordict=td.copy())
+    env.auto_specs_(policy, tensordict=td.copy(), observation_key=obs_vals)
     env.check_env_specs(tensordict=td.copy())
 
 

diff --git a/torchrl/_utils.py b/torchrl/_utils.py
@@ -829,6 +829,7 @@ def _can_be_pickled(obj):
 def _make_ordinal_device(device: torch.device):
     if device is None:
         return device
+    device = torch.device(device)
     if device.type == "cuda" and device.index is None:
         return torch.device("cuda", index=torch.cuda.current_device())
     if device.type == "mps" and device.index is None:

diff --git a/torchrl/data/map/hash.py b/torchrl/data/map/hash.py
@@ -75,7 +75,8 @@ def forward(self, features: torch.Tensor) -> torch.Tensor:
 class SipHash(Module):
     """A Module to Compute SipHash values for given tensors.
 
-    A hash function module based on SipHash implementation in python.
+    A hash function module based on SipHash implementation in python. Input tensors should have shape ``[batch_size, num_features]``
+    and the output shape will be ``[batch_size]``.
 
     Args:
         as_tensor (bool, optional): if ``True``, the bytes will be turned into integers

diff --git a/torchrl/data/map/tdstorage.py b/torchrl/data/map/tdstorage.py
@@ -177,7 +177,7 @@ def from_tensordict_pair(
         collate_fn: Callable[[Any], Any] | None = None,
         write_fn: Callable[[Any, Any], Any] | None = None,
         consolidated: bool | None = None,
-    ):
+    ) -> TensorDictMap:
         """Creates a new TensorDictStorage from a pair of tensordicts (source and dest) using pre-defined rules of thumb.
 
         Args:
@@ -308,7 +308,23 @@ def __setitem__(self, item: TensorDictBase, value: TensorDictBase):
         if not self._has_lazy_out_keys():
             # TODO: make this work with pytrees and avoid calling select if keys match
             value = value.select(*self.out_keys, strict=False)
+        item, value = self._maybe_add_batch(item, value)
+        index = self._to_index(item, extend=True)
+        if index.unique().numel() < index.numel():
+            # If multiple values point to the same place in the storage, we cannot process them by batch
+            # There could be a better way to deal with this, using unique ids.
+            vals = []
+            for it, val in zip(item.split(1), value.split(1)):
+                self[it] = val
+                vals.append(val)
+            # __setitem__ may affect the content of the input data
+            value.update(TensorDictBase.lazy_stack(vals))
+            return
         if self.write_fn is not None:
+            # We use this block in the following context: the value written in the storage is already present,
+            # but it needs to be updated.
+            # We first check if the value is already there using `contains`. If so, we pass the new value and the
+            # previous one to write_fn. The values that are not present are passed alone.
             if len(self):
                 modifiable = self.contains(item)
                 if modifiable.any():
@@ -322,8 +338,6 @@ def __setitem__(self, item: TensorDictBase, value: TensorDictBase):
                     value = self.write_fn(value)
             else:
                 value = self.write_fn(value)
-        item, value = self._maybe_add_batch(item, value)
-        index = self._to_index(item, extend=True)
         self.storage.set(index, value)
 
     def __len__(self):