From d22efc5f684c92bd585bdbe71eaa126c64c75256 Mon Sep 17 00:00:00 2001 From: "Daniel J. McDonald" Date: Fri, 22 Dec 2023 13:32:19 -0800 Subject: [PATCH] prepro and models vignette edits * fix bolixed math * pivot simply * deal with training = NULL in `prep()` --- vignettes/preprocessing-and-models.Rmd | 47 +++++++++++++------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/vignettes/preprocessing-and-models.Rmd b/vignettes/preprocessing-and-models.Rmd index efc0024d8..f1cdb3c87 100644 --- a/vignettes/preprocessing-and-models.Rmd +++ b/vignettes/preprocessing-and-models.Rmd @@ -98,10 +98,11 @@ intercept coefficients, we can allow for an intercept shift between states. The model takes the form \begin{aligned} -\log\left( \mu*{t+7} \right) &= \beta_0 + \delta_1 s*{\text{state}_1} + -\delta_2 s_{\text{state}_2} + \cdots + \nonumber \\ &\quad\beta_1 \text{deaths}_{t} + -\beta*2 \text{deaths}*{t-7} + \beta*3 \text{cases}*{t} + -\beta*4 \text{cases}*{t-7}, +\log\left( \mu_{t+7} \right) &= \beta_0 + \delta_1 s_{\text{state}_1} + +\delta_2 s_{\text{state}_2} + \cdots + \nonumber \\ +&\quad\beta_1 \text{deaths}_{t} + +\beta_2 \text{deaths}_{t-7} + \beta_3 \text{cases}_{t} + +\beta_4 \text{cases}_{t-7}, \end{aligned} where $\mu_{t+7} = \mathbb{E}(y_{t+7})$, and $y_{t+7}$ is assumed to follow a Poisson distribution with mean $\mu_{t+7}$; $s_{\text{state}}$ are dummy @@ -195,11 +196,13 @@ rates, by incorporating offset terms in the model. To model death rates, the Poisson regression would be expressed as: \begin{aligned} -\log\left( \mu*{t+7} \right) &= \log(\text{population}) + -\beta_0 + \delta_1 s*{\text{state}_1} + -\delta_2 s_{\text{state}_2} + \cdots + \nonumber \\ &\quad\beta_1 \text{deaths}_{t} + -\beta*2 \text{deaths}*{t-7} + \beta*3 \text{cases}*{t} + -\beta*4 \text{cases}*{t-7}\end{aligned} +\log\left( \mu_{t+7} \right) &= \log(\text{population}) + +\beta_0 + \delta_1 s_{\text{state}_1} + +\delta_2 s_{\text{state}_2} + \cdots + \nonumber \\ +&\quad\beta_1 \text{deaths}_{t} + +\beta_2 \text{deaths}_{t-7} + \beta_3 \text{cases}_{t} + +\beta_4 \text{cases}_{t-7} +\end{aligned} where $\log(\text{population})$ is the log of the state population that was used to scale the count data on the left-hand side of the equation. This offset is simply a predictor with coefficient fixed at 1 rather than estimated. @@ -371,9 +374,7 @@ To look at the prediction intervals: ```{r} p %>% select(geo_value, target_date, .pred_scaled, .pred_distn_scaled) %>% - mutate(.pred_distn_scaled = nested_quantiles(.pred_distn_scaled)) %>% - unnest(.pred_distn_scaled) %>% - pivot_wider(names_from = quantile_levels, values_from = values) + pivot_quantiles_wider(.pred_distn_scaled) ``` Last but not least, let's take a look at the regression fit and check the @@ -425,16 +426,16 @@ $$ where $j$ is either down, flat, or up \begin{aligned} -g*{\text{down}}(x) &= 0.\\ -g*{\text{flat}}(x)&= \text{ln}\left(\frac{Pr(Z*{\ell,t}=\text{flat}|x)}{Pr(Z*{\ell,t}=\text{down}|x)}\right) = -\beta*{10} + \beta*{11}t + \delta*{10} s*{\text{state*1}} + -\delta*{11} s*{\text{state_2}} + \cdots \nonumber \\ -&\quad + \beta*{12} Y^{\Delta}_{\ell, t} + +g_{\text{down}}(x) &= 0.\\ +g_{\text{flat}}(x) &= \log\left(\frac{Pr(Z_{\ell,t}=\text{flat}\mid x)}{Pr(Z_{\ell,t}=\text{down}\mid x)}\right) = +\beta_{10} + \beta_{11} t + \delta_{10} s_{\text{state_1}} + +\delta_{11} s_{\text{state_2}} + \cdots \nonumber \\ +&\quad + \beta_{12} Y^{\Delta}_{\ell, t} + \beta_{13} Y^{\Delta}_{\ell, t-7} \\ -g_{\text{flat}}(x) &= \text{ln}\left(\frac{Pr(Z*{\ell,t}=\text{up}|x)}{Pr(Z*{\ell,t}=\text{down}|x)}\right) = -\beta*{20} + \beta*{21}t + \delta*{20} s*{\text{state*1}} + -\delta*{21} s*{\text{state}\_2} + \cdots \nonumber \\ -&\quad + \beta*{22} Y^{\Delta}_{\ell, t} + +g_{\text{flat}}(x) &= \log\left(\frac{Pr(Z_{\ell,t}=\text{up}\mid x)}{Pr(Z_{\ell,t}=\text{down} \mid x)}\right) = +\beta_{20} + \beta_{21}t + \delta_{20} s_{\text{state_1}} + +\delta_{21} s_{\text{state}\_2} + \cdots \nonumber \\ +&\quad + \beta_{22} Y^{\Delta}_{\ell, t} + \beta_{23} Y^{\Delta}\_{\ell, t-7} \end{aligned} @@ -533,7 +534,7 @@ p1 <- epi_recipe(ex) %>% step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% step_epi_ahead(death_rate, ahead = 7, role = "outcome") %>% step_epi_naomit() %>% - prep() + prep(ex) b1 <- bake(p1, ex) b1 @@ -550,7 +551,7 @@ p2 <- epi_recipe(ex) %>% ahead7death_rate = lead(death_rate, 7) ) %>% step_epi_naomit() %>% - prep() + prep(ex) b2 <- bake(p2, ex) b2