Skip to content

Commit

Permalink
Add SamplerEval
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 708307065
  • Loading branch information
Conchylicultor authored and The gemma Authors committed Jan 2, 2025
1 parent 0d6ae85 commit 80685e1
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 3 deletions.
10 changes: 9 additions & 1 deletion gemma/sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ def __init__(
transformer: transformer_lib.Transformer,
vocab: spm.SentencePieceProcessor,
params: params_lib.Params,
cache_length: int | None = None,
):
"""Initializes a sampler for a Gemma model.
Expand All @@ -122,6 +123,9 @@ def __init__(
self.vocab = vocab
self.params = params
self._compiled_sample_fn = jax.jit(self._sample_fn)
self.cache_length = cache_length or transformer.config.max_cache_length
if self.cache_length is None:
raise ValueError('Sampler `cache_length` should be set.')

@property
def dtype(self) -> jnp.dtype:
Expand Down Expand Up @@ -192,7 +196,11 @@ def _sample_step(

def init_cache(self, bsz) -> dict[str, modules.LayerCache]:
"""Initializes the attention cache for each layer."""
return self.transformer.config.init_cache(bsz, dtype=self.dtype)
return self.transformer.config.init_cache(
bsz,
dtype=self.dtype,
cache_length=self.cache_length,
)

def init_sample_state(
self,
Expand Down
19 changes: 17 additions & 2 deletions gemma/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import dataclasses
import enum
from typing import Iterable
import warnings

from flax import linen as nn
from gemma import layers
Expand Down Expand Up @@ -71,6 +72,15 @@ class TransformerConfig:
sliding_window_size: int | None = None
transpose_gating_einsum: bool = False

def __post_init__(self):
if self.max_cache_length is not None:
warnings.warn(
'TransformerConfig.max_cache_length is deprecated and will be'
' REMOVED!!! Instead, set the `cache_length` in the `Sampler` class.',
DeprecationWarning,
stacklevel=2,
)

def query_pre_attn_scalar(self) -> float:
"""Returns the scalar to multiply the query by before attention."""
match self.query_pre_attn_norm:
Expand Down Expand Up @@ -227,10 +237,15 @@ def init_cache(
self,
batch_size: int,
dtype: jnp.dtype = jnp.bfloat16,
*,
cache_length: int | None = None,
) -> Cache:
"""Initializes a new Transformer cache."""
if self.max_cache_length is None:
raise ValueError('max_cache_length must be set to initialize cache.')
cache_length = cache_length or self.max_cache_length
if cache_length is None:
raise ValueError(
'Missing `cache_length=` kwarg when calling `init_cache()`.'
)
cache = {
f'layer_{i}': modules.Attention.init_cache(
self.max_cache_length,
Expand Down

0 comments on commit 80685e1

Please sign in to comment.