From 407cad84ee9f5dcbf921cb9794be4772be723e9c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Paul-Christian=20B=C3=BCrkner?= <paul.buerkner@gmail.com>
Date: Thu, 12 Sep 2024 16:31:21 +0200
Subject: [PATCH] fix issue #1652

---
 R/data-helpers.R                 | 4 ++++
 R/prepare_predictions.R          | 9 +++++----
 man/fitted.brmsfit.Rd            | 9 +++++----
 man/get_refmodel.brmsfit.Rd      | 9 +++++----
 man/log_lik.brmsfit.Rd           | 9 +++++----
 man/loo_moment_match.brmsfit.Rd  | 9 +++++----
 man/posterior_epred.brmsfit.Rd   | 9 +++++----
 man/posterior_linpred.brmsfit.Rd | 9 +++++----
 man/posterior_predict.brmsfit.Rd | 9 +++++----
 man/pp_check.brmsfit.Rd          | 9 +++++----
 man/pp_mixture.brmsfit.Rd        | 9 +++++----
 man/predict.brmsfit.Rd           | 9 +++++----
 man/predictive_error.brmsfit.Rd  | 9 +++++----
 man/prepare_predictions.Rd       | 9 +++++----
 man/psis.brmsfit.Rd              | 9 +++++----
 man/reloo.brmsfit.Rd             | 9 +++++----
 man/residuals.brmsfit.Rd         | 9 +++++----
 man/standata.brmsfit.Rd          | 9 +++++----
 18 files changed, 89 insertions(+), 68 deletions(-)

diff --git a/R/data-helpers.R b/R/data-helpers.R
index b8a5eca7f..d63d9a81e 100644
--- a/R/data-helpers.R
+++ b/R/data-helpers.R
@@ -558,6 +558,10 @@ validate_newdata <- function(
     new_levels <- get_levels(bterms, data = newdata)
     for (g in names(old_levels)) {
       unknown_levels <- setdiff(new_levels[[g]], old_levels[[g]])
+      # NA is not found by get_levels but still behaves like a new level (#1652)
+      if (anyNA(newdata[[g]])) {
+        c(unknown_levels) <- NA
+      }
       if (length(unknown_levels)) {
         unknown_levels <- collapse_comma(unknown_levels)
         stop2(
diff --git a/R/prepare_predictions.R b/R/prepare_predictions.R
index 90c15ef6f..508d81725 100644
--- a/R/prepare_predictions.R
+++ b/R/prepare_predictions.R
@@ -1173,10 +1173,11 @@ is.bprepnl <- function(x) {
 #'
 #' @param x An \R object typically of class \code{'brmsfit'}.
 #' @param newdata An optional data.frame for which to evaluate predictions. If
-#'   \code{NULL} (default), the original data of the model is used.
-#'   \code{NA} values within factors are interpreted as if all dummy
-#'   variables of this factor are zero. This allows, for instance, to make
-#'   predictions of the grand mean when using sum coding.
+#'   \code{NULL} (default), the original data of the model is used. \code{NA}
+#'   values within factors (excluding grouping variables) are interpreted as if
+#'   all dummy variables of this factor are zero. This allows, for instance, to
+#'   make predictions of the grand mean when using sum coding. \code{NA} values
+#'   within grouping variables are treated as a new level.
 #' @param re_formula formula containing group-level effects to be considered in
 #'   the prediction. If \code{NULL} (default), include all group-level effects;
 #'   if \code{NA} or \code{~0}, include no group-level effects.
diff --git a/man/fitted.brmsfit.Rd b/man/fitted.brmsfit.Rd
index 730fbc4ae..9811eef2e 100644
--- a/man/fitted.brmsfit.Rd
+++ b/man/fitted.brmsfit.Rd
@@ -25,10 +25,11 @@
 \item{object}{An object of class \code{brmsfit}.}
 
 \item{newdata}{An optional data.frame for which to evaluate predictions. If
-\code{NULL} (default), the original data of the model is used.
-\code{NA} values within factors are interpreted as if all dummy
-variables of this factor are zero. This allows, for instance, to make
-predictions of the grand mean when using sum coding.}
+\code{NULL} (default), the original data of the model is used. \code{NA}
+values within factors (excluding grouping variables) are interpreted as if
+all dummy variables of this factor are zero. This allows, for instance, to
+make predictions of the grand mean when using sum coding. \code{NA} values
+within grouping variables are treated as a new level.}
 
 \item{re_formula}{formula containing group-level effects to be considered in
 the prediction. If \code{NULL} (default), include all group-level effects;
diff --git a/man/get_refmodel.brmsfit.Rd b/man/get_refmodel.brmsfit.Rd
index 9a6050272..60ec62f2c 100644
--- a/man/get_refmodel.brmsfit.Rd
+++ b/man/get_refmodel.brmsfit.Rd
@@ -19,10 +19,11 @@ get_refmodel.brmsfit(
 \item{object}{An object of class \code{brmsfit}.}
 
 \item{newdata}{An optional data.frame for which to evaluate predictions. If
-\code{NULL} (default), the original data of the model is used.
-\code{NA} values within factors are interpreted as if all dummy
-variables of this factor are zero. This allows, for instance, to make
-predictions of the grand mean when using sum coding.}
+\code{NULL} (default), the original data of the model is used. \code{NA}
+values within factors (excluding grouping variables) are interpreted as if
+all dummy variables of this factor are zero. This allows, for instance, to
+make predictions of the grand mean when using sum coding. \code{NA} values
+within grouping variables are treated as a new level.}
 
 \item{resp}{Optional names of response variables. If specified, predictions
 are performed only for the specified response variables.}
diff --git a/man/log_lik.brmsfit.Rd b/man/log_lik.brmsfit.Rd
index d7a18d26a..67dafa487 100644
--- a/man/log_lik.brmsfit.Rd
+++ b/man/log_lik.brmsfit.Rd
@@ -24,10 +24,11 @@
 \item{object}{A fitted model object of class \code{brmsfit}.}
 
 \item{newdata}{An optional data.frame for which to evaluate predictions. If
-\code{NULL} (default), the original data of the model is used.
-\code{NA} values within factors are interpreted as if all dummy
-variables of this factor are zero. This allows, for instance, to make
-predictions of the grand mean when using sum coding.}
+\code{NULL} (default), the original data of the model is used. \code{NA}
+values within factors (excluding grouping variables) are interpreted as if
+all dummy variables of this factor are zero. This allows, for instance, to
+make predictions of the grand mean when using sum coding. \code{NA} values
+within grouping variables are treated as a new level.}
 
 \item{re_formula}{formula containing group-level effects to be considered in
 the prediction. If \code{NULL} (default), include all group-level effects;
diff --git a/man/loo_moment_match.brmsfit.Rd b/man/loo_moment_match.brmsfit.Rd
index fd0f45595..e6e6a418e 100644
--- a/man/loo_moment_match.brmsfit.Rd
+++ b/man/loo_moment_match.brmsfit.Rd
@@ -33,10 +33,11 @@ See \code{\link[loo:pareto-k-diagnostic]{pareto_k_ids}}
 for more details.}
 
 \item{newdata}{An optional data.frame for which to evaluate predictions. If
-\code{NULL} (default), the original data of the model is used.
-\code{NA} values within factors are interpreted as if all dummy
-variables of this factor are zero. This allows, for instance, to make
-predictions of the grand mean when using sum coding.}
+\code{NULL} (default), the original data of the model is used. \code{NA}
+values within factors (excluding grouping variables) are interpreted as if
+all dummy variables of this factor are zero. This allows, for instance, to
+make predictions of the grand mean when using sum coding. \code{NA} values
+within grouping variables are treated as a new level.}
 
 \item{resp}{Optional names of response variables. If specified, predictions
 are performed only for the specified response variables.}
diff --git a/man/posterior_epred.brmsfit.Rd b/man/posterior_epred.brmsfit.Rd
index a8e57ebc4..75cc93724 100644
--- a/man/posterior_epred.brmsfit.Rd
+++ b/man/posterior_epred.brmsfit.Rd
@@ -24,10 +24,11 @@
 \item{object}{An object of class \code{brmsfit}.}
 
 \item{newdata}{An optional data.frame for which to evaluate predictions. If
-\code{NULL} (default), the original data of the model is used.
-\code{NA} values within factors are interpreted as if all dummy
-variables of this factor are zero. This allows, for instance, to make
-predictions of the grand mean when using sum coding.}
+\code{NULL} (default), the original data of the model is used. \code{NA}
+values within factors (excluding grouping variables) are interpreted as if
+all dummy variables of this factor are zero. This allows, for instance, to
+make predictions of the grand mean when using sum coding. \code{NA} values
+within grouping variables are treated as a new level.}
 
 \item{re_formula}{formula containing group-level effects to be considered in
 the prediction. If \code{NULL} (default), include all group-level effects;
diff --git a/man/posterior_linpred.brmsfit.Rd b/man/posterior_linpred.brmsfit.Rd
index 06f19879e..683fa82ff 100644
--- a/man/posterior_linpred.brmsfit.Rd
+++ b/man/posterior_linpred.brmsfit.Rd
@@ -30,10 +30,11 @@ If \code{TRUE}, draws of the transformed linear predictor,
 that is, after applying the inverse link function are returned.}
 
 \item{newdata}{An optional data.frame for which to evaluate predictions. If
-\code{NULL} (default), the original data of the model is used.
-\code{NA} values within factors are interpreted as if all dummy
-variables of this factor are zero. This allows, for instance, to make
-predictions of the grand mean when using sum coding.}
+\code{NULL} (default), the original data of the model is used. \code{NA}
+values within factors (excluding grouping variables) are interpreted as if
+all dummy variables of this factor are zero. This allows, for instance, to
+make predictions of the grand mean when using sum coding. \code{NA} values
+within grouping variables are treated as a new level.}
 
 \item{re_formula}{formula containing group-level effects to be considered in
 the prediction. If \code{NULL} (default), include all group-level effects;
diff --git a/man/posterior_predict.brmsfit.Rd b/man/posterior_predict.brmsfit.Rd
index 0a860ebb4..3e3f89740 100644
--- a/man/posterior_predict.brmsfit.Rd
+++ b/man/posterior_predict.brmsfit.Rd
@@ -25,10 +25,11 @@
 \item{object}{An object of class \code{brmsfit}.}
 
 \item{newdata}{An optional data.frame for which to evaluate predictions. If
-\code{NULL} (default), the original data of the model is used.
-\code{NA} values within factors are interpreted as if all dummy
-variables of this factor are zero. This allows, for instance, to make
-predictions of the grand mean when using sum coding.}
+\code{NULL} (default), the original data of the model is used. \code{NA}
+values within factors (excluding grouping variables) are interpreted as if
+all dummy variables of this factor are zero. This allows, for instance, to
+make predictions of the grand mean when using sum coding. \code{NA} values
+within grouping variables are treated as a new level.}
 
 \item{re_formula}{formula containing group-level effects to be considered in
 the prediction. If \code{NULL} (default), include all group-level effects;
diff --git a/man/pp_check.brmsfit.Rd b/man/pp_check.brmsfit.Rd
index fadb2f1d2..737333f86 100644
--- a/man/pp_check.brmsfit.Rd
+++ b/man/pp_check.brmsfit.Rd
@@ -49,10 +49,11 @@ Only used for ppc types having an \code{x} argument
 and ignored otherwise.}
 
 \item{newdata}{An optional data.frame for which to evaluate predictions. If
-\code{NULL} (default), the original data of the model is used.
-\code{NA} values within factors are interpreted as if all dummy
-variables of this factor are zero. This allows, for instance, to make
-predictions of the grand mean when using sum coding.}
+\code{NULL} (default), the original data of the model is used. \code{NA}
+values within factors (excluding grouping variables) are interpreted as if
+all dummy variables of this factor are zero. This allows, for instance, to
+make predictions of the grand mean when using sum coding. \code{NA} values
+within grouping variables are treated as a new level.}
 
 \item{resp}{Optional names of response variables. If specified, predictions
 are performed only for the specified response variables.}
diff --git a/man/pp_mixture.brmsfit.Rd b/man/pp_mixture.brmsfit.Rd
index 82ff3f6a8..e7be94760 100644
--- a/man/pp_mixture.brmsfit.Rd
+++ b/man/pp_mixture.brmsfit.Rd
@@ -25,10 +25,11 @@ pp_mixture(x, ...)
 \item{x}{An \R object usually of class \code{brmsfit}.}
 
 \item{newdata}{An optional data.frame for which to evaluate predictions. If
-\code{NULL} (default), the original data of the model is used.
-\code{NA} values within factors are interpreted as if all dummy
-variables of this factor are zero. This allows, for instance, to make
-predictions of the grand mean when using sum coding.}
+\code{NULL} (default), the original data of the model is used. \code{NA}
+values within factors (excluding grouping variables) are interpreted as if
+all dummy variables of this factor are zero. This allows, for instance, to
+make predictions of the grand mean when using sum coding. \code{NA} values
+within grouping variables are treated as a new level.}
 
 \item{re_formula}{formula containing group-level effects to be considered in
 the prediction. If \code{NULL} (default), include all group-level effects;
diff --git a/man/predict.brmsfit.Rd b/man/predict.brmsfit.Rd
index c24e15411..a197c395f 100644
--- a/man/predict.brmsfit.Rd
+++ b/man/predict.brmsfit.Rd
@@ -26,10 +26,11 @@
 \item{object}{An object of class \code{brmsfit}.}
 
 \item{newdata}{An optional data.frame for which to evaluate predictions. If
-\code{NULL} (default), the original data of the model is used.
-\code{NA} values within factors are interpreted as if all dummy
-variables of this factor are zero. This allows, for instance, to make
-predictions of the grand mean when using sum coding.}
+\code{NULL} (default), the original data of the model is used. \code{NA}
+values within factors (excluding grouping variables) are interpreted as if
+all dummy variables of this factor are zero. This allows, for instance, to
+make predictions of the grand mean when using sum coding. \code{NA} values
+within grouping variables are treated as a new level.}
 
 \item{re_formula}{formula containing group-level effects to be considered in
 the prediction. If \code{NULL} (default), include all group-level effects;
diff --git a/man/predictive_error.brmsfit.Rd b/man/predictive_error.brmsfit.Rd
index 520dc36f1..6e3485630 100644
--- a/man/predictive_error.brmsfit.Rd
+++ b/man/predictive_error.brmsfit.Rd
@@ -22,10 +22,11 @@
 \item{object}{An object of class \code{brmsfit}.}
 
 \item{newdata}{An optional data.frame for which to evaluate predictions. If
-\code{NULL} (default), the original data of the model is used.
-\code{NA} values within factors are interpreted as if all dummy
-variables of this factor are zero. This allows, for instance, to make
-predictions of the grand mean when using sum coding.}
+\code{NULL} (default), the original data of the model is used. \code{NA}
+values within factors (excluding grouping variables) are interpreted as if
+all dummy variables of this factor are zero. This allows, for instance, to
+make predictions of the grand mean when using sum coding. \code{NA} values
+within grouping variables are treated as a new level.}
 
 \item{re_formula}{formula containing group-level effects to be considered in
 the prediction. If \code{NULL} (default), include all group-level effects;
diff --git a/man/prepare_predictions.Rd b/man/prepare_predictions.Rd
index ec75a1975..4062895fd 100644
--- a/man/prepare_predictions.Rd
+++ b/man/prepare_predictions.Rd
@@ -35,10 +35,11 @@ prepare_predictions(x, ...)
 \item{x}{An \R object typically of class \code{'brmsfit'}.}
 
 \item{newdata}{An optional data.frame for which to evaluate predictions. If
-\code{NULL} (default), the original data of the model is used.
-\code{NA} values within factors are interpreted as if all dummy
-variables of this factor are zero. This allows, for instance, to make
-predictions of the grand mean when using sum coding.}
+\code{NULL} (default), the original data of the model is used. \code{NA}
+values within factors (excluding grouping variables) are interpreted as if
+all dummy variables of this factor are zero. This allows, for instance, to
+make predictions of the grand mean when using sum coding. \code{NA} values
+within grouping variables are treated as a new level.}
 
 \item{re_formula}{formula containing group-level effects to be considered in
 the prediction. If \code{NULL} (default), include all group-level effects;
diff --git a/man/psis.brmsfit.Rd b/man/psis.brmsfit.Rd
index d0f0b90f7..2af401ca9 100644
--- a/man/psis.brmsfit.Rd
+++ b/man/psis.brmsfit.Rd
@@ -13,10 +13,11 @@ Argument is named "log_ratios" to match the argument name of the
 \code{\link[loo:psis]{loo::psis}} generic function.}
 
 \item{newdata}{An optional data.frame for which to evaluate predictions. If
-\code{NULL} (default), the original data of the model is used.
-\code{NA} values within factors are interpreted as if all dummy
-variables of this factor are zero. This allows, for instance, to make
-predictions of the grand mean when using sum coding.}
+\code{NULL} (default), the original data of the model is used. \code{NA}
+values within factors (excluding grouping variables) are interpreted as if
+all dummy variables of this factor are zero. This allows, for instance, to
+make predictions of the grand mean when using sum coding. \code{NA} values
+within grouping variables are treated as a new level.}
 
 \item{resp}{Optional names of response variables. If specified, predictions
 are performed only for the specified response variables.}
diff --git a/man/reloo.brmsfit.Rd b/man/reloo.brmsfit.Rd
index 8a51e82f5..0e9100f32 100644
--- a/man/reloo.brmsfit.Rd
+++ b/man/reloo.brmsfit.Rd
@@ -36,10 +36,11 @@ See \code{\link[loo:pareto-k-diagnostic]{pareto_k_ids}}
 for more details.}
 
 \item{newdata}{An optional data.frame for which to evaluate predictions. If
-\code{NULL} (default), the original data of the model is used.
-\code{NA} values within factors are interpreted as if all dummy
-variables of this factor are zero. This allows, for instance, to make
-predictions of the grand mean when using sum coding.}
+\code{NULL} (default), the original data of the model is used. \code{NA}
+values within factors (excluding grouping variables) are interpreted as if
+all dummy variables of this factor are zero. This allows, for instance, to
+make predictions of the grand mean when using sum coding. \code{NA} values
+within grouping variables are treated as a new level.}
 
 \item{resp}{Optional names of response variables. If specified, predictions
 are performed only for the specified response variables.}
diff --git a/man/residuals.brmsfit.Rd b/man/residuals.brmsfit.Rd
index b124b81a5..30bcca1bf 100644
--- a/man/residuals.brmsfit.Rd
+++ b/man/residuals.brmsfit.Rd
@@ -24,10 +24,11 @@
 \item{object}{An object of class \code{brmsfit}.}
 
 \item{newdata}{An optional data.frame for which to evaluate predictions. If
-\code{NULL} (default), the original data of the model is used.
-\code{NA} values within factors are interpreted as if all dummy
-variables of this factor are zero. This allows, for instance, to make
-predictions of the grand mean when using sum coding.}
+\code{NULL} (default), the original data of the model is used. \code{NA}
+values within factors (excluding grouping variables) are interpreted as if
+all dummy variables of this factor are zero. This allows, for instance, to
+make predictions of the grand mean when using sum coding. \code{NA} values
+within grouping variables are treated as a new level.}
 
 \item{re_formula}{formula containing group-level effects to be considered in
 the prediction. If \code{NULL} (default), include all group-level effects;
diff --git a/man/standata.brmsfit.Rd b/man/standata.brmsfit.Rd
index a5b1161c7..4af4dc9f1 100644
--- a/man/standata.brmsfit.Rd
+++ b/man/standata.brmsfit.Rd
@@ -18,10 +18,11 @@
 \item{object}{An object of class \code{brmsfit}.}
 
 \item{newdata}{An optional data.frame for which to evaluate predictions. If
-\code{NULL} (default), the original data of the model is used.
-\code{NA} values within factors are interpreted as if all dummy
-variables of this factor are zero. This allows, for instance, to make
-predictions of the grand mean when using sum coding.}
+\code{NULL} (default), the original data of the model is used. \code{NA}
+values within factors (excluding grouping variables) are interpreted as if
+all dummy variables of this factor are zero. This allows, for instance, to
+make predictions of the grand mean when using sum coding. \code{NA} values
+within grouping variables are treated as a new level.}
 
 \item{re_formula}{formula containing group-level effects to be considered in
 the prediction. If \code{NULL} (default), include all group-level effects;