ok bf16 works for attention now

stanford-crfm · Dec 19, 2024 · 01f1792 · 01f1792
1 parent 95f793e
commit 01f1792
Showing 1 changed file with 3 additions and 3 deletions.
diff --git a/src/levanter/models/attention.py b/src/levanter/models/attention.py
@@ -806,10 +806,10 @@ def _tpu_splash_attention(
     if bias is not None:
         raise NotImplementedError("Splash attention does not support bias")
 
-    if attention_dtype is not None and attention_dtype != jnp.float32:
-        warnings.warn("Splash attention only supports float32. Switching to float32.")
+    # if attention_dtype is not None and attention_dtype != jnp.float32:
+    #     warnings.warn("Splash attention only supports float32. Switching to float32.")
 
-    attention_dtype = jnp.float32
+    # attention_dtype = jnp.float32
 
     q_class, k_class, v_class = _bin_and_group_axes_by_function(query, key, value, QPos, KPos, Key)