Skip to content

Commit

Permalink
distributed off policy algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
hnyu committed Dec 3, 2024
1 parent d30cdb4 commit 33f9352
Show file tree
Hide file tree
Showing 5 changed files with 648 additions and 16 deletions.
10 changes: 3 additions & 7 deletions alf/algorithms/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,13 +430,9 @@ def observe_for_replay(self, exp):
:math:`[B, \ldots]`, where :math:`B` is the batch size of the
batched environment.
"""
if not self._use_rollout_state:
exp = exp._replace(state=())
elif id(self.rollout_state_spec) != id(self.train_state_spec):
# Prune exp's state (rollout_state) according to the train state spec
exp = exp._replace(
state=alf.nest.prune_nest_like(
exp.state, self.train_state_spec, value_to_match=()))
exp = common.prune_exp_replay_state(exp, self._use_rollout_state,
self.rollout_state_spec,
self.train_state_spec)

if self._replay_buffer is None:
self._set_replay_buffer(exp)
Expand Down
Loading

0 comments on commit 33f9352

Please sign in to comment.