lnccbrown · AlexanderFengler · Dec 16, 2023 · Nov 28, 2023 · Nov 28, 2023 · Nov 28, 2023
diff --git a/pyproject.toml b/pyproject.toml
@@ -30,7 +30,7 @@ bambi = "^0.12.0"
 numpyro = "^0.12.1"
 hddm-wfpt = "^0.1.1"
 seaborn = "^0.13.0"
-pytensor = "<=2.17.3"
+pytensor = "<2.17.4"
 
 [tool.poetry.group.dev.dependencies]
 pytest = "^7.3.1"
@@ -69,7 +69,7 @@ profile = "black"
 
 [tool.ruff]
 line-length = 88
-target-version = "py39"
+target-version = "py310"
 unfixable = ["E711"]
 
 select = [
@@ -132,6 +132,8 @@ ignore = [
     "B020",
     # Function definition does not bind loop variable
     "B023",
+    # zip()` without an explicit `strict=
+    "B905",
     # Functions defined inside a loop must not use variables redefined in the loop
     # "B301",  # not yet implemented
     # Too many arguments to function call
@@ -166,14 +168,7 @@ ignore = [
     "TID252",
 ]
 
-exclude = [
-    ".github",
-    "docs",
-    "notebook",
-    "tests",
-    "src/hssm/likelihoods/hddm_wfpt/cdfdif_wrapper.c",
-    "src/hssm/likelihoods/hddm_wfpt/wfpt.cpp",
-]
+exclude = [".github", "docs", "notebook", "tests"]
 
 [tool.ruff.pydocstyle]
 convention = "numpy"

diff --git a/src/hssm/hssm.py b/src/hssm/hssm.py
@@ -24,6 +24,7 @@
 import seaborn as sns
 import xarray as xr
 from bambi.model_components import DistributionalComponent
+from bambi.transformations import transformations_namespace
 
 from hssm.defaults import (
     LoglikKind,
@@ -164,6 +165,9 @@ class HSSM:
         recommended when you are using hierarchical models.
         The default value is `None` when `hierarchical` is `False` and `"safe"` when
         `hierarchical` is `True`.
+    extra_namespace : optional
+        Additional user supplied variables with transformations or data to include in
+        the environment where the formula is evaluated. Defaults to `None`.
     **kwargs
         Additional arguments passed to the `bmb.Model` object.
 
@@ -214,6 +218,7 @@ def __init__(
         hierarchical: bool = False,
         link_settings: Literal["log_logit"] | None = None,
         prior_settings: Literal["safe"] | None = None,
+        extra_namespace: dict[str, Any] | None = None,
         **kwargs,
     ):
         self.data = data
@@ -232,6 +237,11 @@ def __init__(
         self.link_settings = link_settings
         self.prior_settings = prior_settings
 
+        additional_namespace = transformations_namespace.copy()
+        if extra_namespace is not None:
+            additional_namespace.update(extra_namespace)
+        self.additional_namespace = additional_namespace
+
         responses = self.data["response"].unique().astype(int)
         self.n_responses = len(responses)
         if self.n_responses == 2:
@@ -312,7 +322,12 @@ def __init__(
         )
 
         self.model = bmb.Model(
-            self.formula, data, family=self.family, priors=self.priors, **other_kwargs
+            self.formula,
+            data=data,
+            family=self.family,
+            priors=self.priors,
+            extra_namespace=extra_namespace,
+            **other_kwargs,
         )
 
         self._aliases = get_alias_dict(self.model, self._parent_param)
@@ -852,6 +867,8 @@ def _add_kwargs_and_p_outlier_to_include(
         """Process kwargs and p_outlier and add them to include."""
         if include is None:
             include = []
+        else:
+            include = include.copy()
         params_in_include = [param["name"] for param in include]
 
         # Process kwargs
@@ -913,7 +930,7 @@ def _preprocess_rest(self, processed: dict[str, Param]) -> dict[str, Param]:
                     bounds = self.model_config.bounds.get(param_str)
                     param = Param(
                         param_str,
-                        formula="1 + (1|participant_id)",
+                        formula=f"{param_str} ~ 1 + (1|participant_id)",
                         bounds=bounds,
                     )
                 else:
@@ -956,15 +973,26 @@ def _find_parent(self) -> tuple[str, Param]:
 
     def _override_defaults(self):
         """Override the default priors or links."""
+        is_ddm = (
+            self.model_name in ["ddm", "ddm_sdv"] and self.loglik_kind == "analytical"
+        ) or (self.model_name == "ddm_full" and self.loglik_kind == "blackbox")
         for param in self.list_params:
             param_obj = self.params[param]
             if self.prior_settings == "safe":
-                param_obj.override_default_priors(self.data)
+                if is_ddm:
+                    param_obj.override_default_priors_ddm(
+                        self.data, self.additional_namespace
+                    )
+                else:
+                    param_obj.override_default_priors(
+                        self.data, self.additional_namespace
+                    )
             elif self.link_settings == "log_logit":
                 param_obj.override_default_link()
 
     def _process_all(self):
         """Process all params."""
+        assert self.list_params is not None
         for param in self.list_params:
             self.params[param].convert()
 

diff --git a/src/hssm/param.py b/src/hssm/param.py
@@ -1,14 +1,16 @@
 """The Param utility class."""
 
 import logging
-from typing import Any, Union, cast
+from copy import deepcopy
+from typing import Any, Literal, Union, cast
 
 import bambi as bmb
 import numpy as np
 import pandas as pd
+from formulae import design_matrices
 
 from .link import Link
-from .prior import Prior
+from .prior import Prior, get_default_prior, get_hddm_default_prior
 
 # PEP604 union operator "|" not supported by pylint
 # Fall back to old syntax
@@ -98,14 +100,7 @@ def override_default_link(self):
 
         This is most likely because both default prior and default bounds are supplied.
         """
-        if self._is_converted:
-            raise ValueError(
-                (
-                    "Cannot override the default link function for parameter %s."
-                    + " The object has already been processed."
-                )
-                % self.name,
-            )
+        self._ensure_not_converted(context="link")
 
         if not self.is_regression or self._link_specified:
             return  # do nothing
@@ -136,8 +131,62 @@ def override_default_link(self):
                 upper,
             )
 
-    def override_default_priors(self, data: pd.DataFrame):
-        """Override the default priors.
+    def override_default_priors(self, data: pd.DataFrame, eval_env: dict[str, Any]):
+        """Override the default priors - the general case.
+
+        By supplying priors for all parameters in the regression, we can override the
+        defaults that Bambi uses.
+
+        Parameters
+        ----------
+        data
+            The data used to fit the model.
+        eval_env
+            The environment used to evaluate the formula.
+        """
+        self._ensure_not_converted(context="prior")
+
+        if not self.is_regression:
+            return
+
+        override_priors = {}
+        dm = self._get_design_matrices(data, eval_env)
+
+        has_common_intercept = False
+        for name, term in dm.common.terms.items():
+            if term.kind == "intercept":
+                has_common_intercept = True
+                override_priors[name] = get_default_prior(
+                    "common_intercept", self.bounds
+                )
+            else:
+                override_priors[name] = get_default_prior("common", bounds=None)
+
+        for name, term in dm.group.terms.items():
+            if term.kind == "intercept":
+                if has_common_intercept:
+                    override_priors[name] = get_default_prior("group_intercept", None)
+                else:
+                    # treat the term as any other group-specific term
+                    _logger.warning(
+                        f"No common intercept. Bounds for parameter {self.name} is not"
+                        + " applied due to a current limitation of Bambi."
+                        + " This will change in the future."
+                    )
+                    override_priors[name] = get_default_prior(
+                        "group_specific", bounds=None
+                    )
+            else:
+                override_priors[name] = get_default_prior("group_specific", bounds=None)
+
+        if not self.prior:
+            self.prior = override_priors
+        else:
+            prior = cast(dict[str, ParamSpec], self.prior)
+            self.prior = merge_dicts(override_priors, prior)
+
+    def override_default_priors_ddm(self, data: pd.DataFrame, eval_env: dict[str, Any]):
+        """Override the default priors - the ddm case.
 
         By supplying priors for all parameters in the regression, we can override the
         defaults that Bambi uses.
@@ -146,8 +195,82 @@ def override_default_priors(self, data: pd.DataFrame):
         ----------
         data
             The data used to fit the model.
+        eval_env
+            The environment used to evaluate the formula.
+        """
+        self._ensure_not_converted(context="prior")
+        assert self.name is not None
+
+        if not self.is_regression:
+            return
+
+        override_priors = {}
+        dm = self._get_design_matrices(data, eval_env)
+
+        has_common_intercept = False
+        for name, term in dm.common.terms.items():
+            if term.kind == "intercept":
+                has_common_intercept = True
+                override_priors[name] = get_hddm_default_prior(
+                    "common_intercept", self.name, self.bounds
+                )
+            else:
+                override_priors[name] = get_hddm_default_prior(
+                    "common", self.name, bounds=None
+                )
+
+        for name, term in dm.group.terms.items():
+            if term.kind == "intercept":
+                if has_common_intercept:
+                    override_priors[name] = get_hddm_default_prior(
+                        "group_intercept", self.name, bounds=None
+                    )
+                else:
+                    # treat the term as any other group-specific term
+                    _logger.warning(
+                        f"No common intercept. Bounds for parameter {self.name} is not"
+                        + " applied due to a current limitation of Bambi."
+                        + " This will change in the future."
+                    )
+                    override_priors[name] = get_hddm_default_prior(
+                        "group_intercept", self.name, bounds=None
+                    )
+            else:
+                override_priors[name] = get_hddm_default_prior(
+                    "group_specific", self.name, bounds=None
+                )
+
+        if not self.prior:
+            self.prior = override_priors
+        else:
+            prior = cast(dict[str, ParamSpec], self.prior)
+            self.prior = merge_dicts(override_priors, prior)
+
+    def _get_design_matrices(self, data: pd.DataFrame, extra_namespace: dict[str, Any]):
+        """Get the design matrices for the regression.
+
+        Parameters
+        ----------
+        data
+            A pandas DataFrame
+        eval_env
+            The evaluation environment
         """
-        return  # Will implement in the next PR
+        formula = cast(str, self.formula)
+        rhs = formula.split("~")[1]
+        formula = "rt ~ " + rhs
+        dm = design_matrices(formula, data=data, extra_namespace=extra_namespace)
+
+        return dm
+
+    def _ensure_not_converted(self, context=Literal["link", "prior"]):
+        """Ensure that the object has not been converted."""
+        if self._is_converted:
+            context = "link function" if context == "link" else "priors"
+            raise ValueError(
+                f"Cannot override the default {context} for parameter {self.name}."
+                + " The object has already been processed."
+            )
 
     def set_parent(self):
         """Set the Param as parent."""
@@ -531,3 +654,14 @@ def _make_default_prior(bounds: tuple[float, float]) -> bmb.Prior:
         return bmb.Prior("TruncatedNormal", mu=lower, lower=lower, sigma=2.0)
     else:
         return bmb.Prior(name="Uniform", lower=lower, upper=upper)
+
+
+def merge_dicts(dict1: dict, dict2: dict) -> dict:
+    """Recursively merge two dictionaries."""
+    merged = deepcopy(dict1)
+    for key, value in dict2.items():
+        if key in merged and isinstance(merged[key], dict) and isinstance(value, dict):
+            merged[key] = merge_dicts(merged[key], value)
+        else:
+            merged[key] = value
+    return merged