Skip to content

Commit

Permalink
oops, need to use flex attention due to softclamping from gemma 2b
Browse files Browse the repository at this point in the history
  • Loading branch information
lucidrains committed Nov 7, 2024
1 parent 37962b2 commit 962b05b
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 21 deletions.
50 changes: 30 additions & 20 deletions pi_zero_pytorch/pi_zero.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from __future__ import annotations
from typing import Callable
from functools import partial

import torch
Expand Down Expand Up @@ -107,7 +108,8 @@ def forward_actions_with_cached_state(
actions,
cached_state_keys_values: tuple[Tensor, Tensor],
actions_value_residual: Tensor | None = None,
return_keys_values = False
return_keys_values = False,
flex_attn_fn: Callable | None = None
):
aq, ak, av = self.to_actions_qkv(actions).chunk(3, dim = -1)

Expand All @@ -126,13 +128,18 @@ def forward_actions_with_cached_state(

# attention

q = q * self.scale
if exists(flex_attn_fn):
out = flex_attn_fn(q, k, v)
else:
q = q * self.scale

sim = einsum(q, k, 'b h i d, b h j d -> b h i j')

sim = einsum(q, k, 'b h i d, b h j d -> b h i j')
sim = softclamp(sim, self.softclamp_value)

attn = sim.softmax(dim = -1)
attn = sim.softmax(dim = -1)

out = einsum(attn, v, 'b h i j, b h j d -> b h i d')
out = einsum(attn, v, 'b h i j, b h j d -> b h i d')

# merge attention heads

Expand All @@ -151,7 +158,7 @@ def forward(
actions,
actions_value_residual: Tensor | None = None,
return_keys_values = False,
flex_attn_fn: callable | None = None
flex_attn_fn: Callable | None = None
):
seq_len, device = multimodal_seq.shape[-2], multimodal_seq.device

Expand Down Expand Up @@ -474,8 +481,8 @@ def forward(
cached_state_keys_values: list[tuple[Tensor, Tensor]] | None = None,
**kwargs
):
received_state_cache = exists(cached_state_keys_values)
assert not (received_state_cache and not return_actions_flow), 'must be generating action trajectory if receiving cached state key values'
inferencing = exists(cached_state_keys_values)
assert not (inferencing and not return_actions_flow), 'must be generating action trajectory if receiving cached state key values'

if not exists(actions):
return self.sample(images, token_ids, joint_state, **kwargs)
Expand Down Expand Up @@ -505,7 +512,7 @@ def forward(
time_cond = self.to_time_cond(times)
action_tokens = self.to_action_tokens(actions)

if not received_state_cache:
if not inferencing:
# language

labels = token_ids[:, 1:]
Expand Down Expand Up @@ -545,17 +552,20 @@ def forward(

flex_attn_fn = None

if self.use_flex_attn and state_tokens.is_cuda and not received_state_cache:
if self.use_flex_attn and state_tokens.is_cuda:

prefix_length = state_tokens.shape[-2]
seq_len = prefix_length + action_tokens.shape[-2]
block_mask = None

block_mask = create_block_mask(
create_pizero_attn_mask(prefix_length),
Q_LEN = seq_len,
KV_LEN = seq_len,
device = state_tokens.device
)
if not inferencing:
prefix_length = state_tokens.shape[-2]
seq_len = prefix_length + action_tokens.shape[-2]

block_mask = create_block_mask(
create_pizero_attn_mask(prefix_length),
Q_LEN = seq_len,
KV_LEN = seq_len,
device = state_tokens.device
)

score_mod_fn = softclamp_score_mod(self.attn_softclamp_value)

Expand All @@ -577,7 +587,7 @@ def forward(

# transformer

if not received_state_cache:
if not inferencing:
for (
(attn, state_ff, actions_ff),
(attn_ada_rmsnorm, attn_ada_layerscale, ff_ada_rmsnorm, ff_ada_layerscale)
Expand Down Expand Up @@ -629,7 +639,7 @@ def forward(

action_tokens = ff_ada_rmsnorm(action_tokens, time_cond)

if not received_state_cache:
if not inferencing:
# unpack and unembed to predictions

visual_tokens, tokens, _ = unpack(state_tokens, packed_shape, 'b * d')
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "pi-zero-pytorch"
version = "0.0.3"
version = "0.0.5"
description = "π0 in Pytorch"
authors = [
{ name = "Phil Wang", email = "[email protected]" }
Expand Down

0 comments on commit 962b05b

Please sign in to comment.