mosaicml · ShashankMosaicML · Nov 18, 2024 · Nov 18, 2024 · Nov 18, 2024 · Nov 18, 2024
@@ -176,6 +176,63 @@
     description=_attention_implementations_description,
 )
 
+_flex_attention_score_mods_description = (
+    """The flex_attention_score_mods registry is used to register functions that implement flex attention score mods.
+
+    One example is 'alibi'. See attention.py for examples.
+
+    Args:
+        kwargs: Dict[str, Any]: Additional keyword arguments the implementation accepts.
+    Returns:
+        Callable[[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor], Tensor]: The score mod function (see https://github.com/pytorch/pytorch/blob/main/torch/nn/attention/flex_attention.py)
+    """
+)
+flex_attention_score_mods = create_registry(
+    'llmfoundry',
+    'flex_attention_score_mods',
+    generic_type=Callable,
+    entry_points=True,
+    description=_flex_attention_score_mods_description,
+)
+
+_flex_attention_mask_mods_description = (
+    """The flex_attention_masks registry is used to register functions that implement flex attention mask mods.
+
+    One example is 'sequence_id'. See attention.py for examples.
+
+    Args:
+        kwargs: Dict[str, Any]: Additional keyword arguments the implementation accepts.
+    Returns:
+        Callable[[Tensor, Tensor, Tensor, Tensor], Tensor]: The mask mod function (see https://github.com/pytorch/pytorch/blob/main/torch/nn/attention/flex_attention.py)
+    """
+)
+flex_attention_mask_mods = create_registry(
+    'llmfoundry',
+    'flex_attention_mask_mods',
+    generic_type=Callable,
+    entry_points=True,
+    description=_flex_attention_mask_mods_description,
+)
+
+_sequence_id_transformer_registry = (
+    """The sequence_id_transformer_registry registry is used to register functions that implement sequence id transformations.
+
+    One example is 'attention_mask_in_length' in modeling_mpt.py.
+
+    Args:
+        torch.Tensor: The sequence id tensor.
+    Returns:
+        Any: The sequence id transformed.
+    """
+)
+sequence_id_transformer_registry = create_registry(
+    'llmfoundry',
+    'sequence_id_transformer_registry',
+    generic_type=Callable,
+    entry_points=True,
+    description=_sequence_id_transformer_registry,
+)
+
 _param_init_fns_description = (
     """The param_init_fns registry is used to register functions that initialize parameters.
 
@@ -231,5 +288,8 @@
     'ffns_with_megablocks',
     'attention_classes',
     'attention_implementations',
+    'flex_attention_score_mods',
+    'flex_attention_mask_mods',
+    'sequence_id_transformer_registry',
     'fcs',
 ]