Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
lucidrains committed Nov 18, 2024
1 parent d70530c commit 3d4084c
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 11 deletions.
19 changes: 9 additions & 10 deletions pi_zero_pytorch/pi_zero.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,8 +623,6 @@ def ode_fn(timestep, denoised_actions):
cond_scale = cond_scale,
remove_parallel_component = remove_parallel_component,
keep_parallel_frac = keep_parallel_frac,
return_actions_flow = True,
return_state_keys_values = True
)

if cache_kv:
Expand Down Expand Up @@ -664,19 +662,22 @@ def forward_with_reward_cfg(
cond_scale = 0.,
remove_parallel_component = False,
keep_parallel_frac = 0.,
return_state_keys_values = True,

**kwargs
):
assert return_state_keys_values, 'cached key values must be turned on'
assert self.can_cfg, 'you need to train with reward token dropout'

with_reward_cache, without_reward_cache = cached_state_keys_values

forward_kwargs = dict(
return_state_keys_values = True,
return_actions_flow = True,
)

maybe_reward_out = self.forward(
*args,
reward_tokens = reward_tokens,
cached_state_keys_values = with_reward_cache,
return_state_keys_values = return_state_keys_values,
**forward_kwargs,
**kwargs
)

Expand All @@ -685,15 +686,13 @@ def forward_with_reward_cfg(
if not exists(reward_tokens) or cond_scale == 0.:
return action_flow_with_reward, (with_reward_cache_kv, None)

no_reward_out = self.forward(
action_flow_without_reward, without_reward_cache_kv = self.forward(
*args,
cached_state_keys_values = without_reward_cache,
return_state_keys_values = return_state_keys_values,
**forward_kwargs,
**kwargs
)

action_flow_without_reward, without_reward_cache_kv = no_reward_out

update = action_flow_with_reward - action_flow_without_reward

if remove_parallel_component:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "pi-zero-pytorch"
version = "0.0.18"
version = "0.0.19"
description = "π0 in Pytorch"
authors = [
{ name = "Phil Wang", email = "[email protected]" }
Expand Down

0 comments on commit 3d4084c

Please sign in to comment.