removed hard coded BLOCK_SIZE

stanford-crfm · Feb 14, 2024 · 3de9e60 · 3de9e60
1 parent 4ed3217
commit 3de9e60
Show file tree

Hide file tree

Showing 2 changed files with 1 addition and 2 deletions.
diff --git a/src/levanter/models/flash_attention.py b/src/levanter/models/flash_attention.py
@@ -18,7 +18,7 @@
 from levanter.models.attention import AttentionMask, materialize_mask
 
 
-# TODO: tune
+# TODO: Tune
 BLOCK_SIZE = 128
 
 

diff --git a/src/levanter/models/gpt2.py b/src/levanter/models/gpt2.py
@@ -195,7 +195,6 @@ def __call__(self, x: NamedArray, mask: Optional[AttentionMask | NamedArray], la
             prng=k_drop,
             attention_dtype=jnp.float32 if self.config.upcast_attn else None,
         )
-        print(f"\n\nATTENTION OUTPUT: {attn_output}\n\n")
         attn_output = self.c_proj(attn_output, key=k_out)
 
         if self.config.upcast_attn: