Skip to content

Commit

Permalink
updates to codebase to reflect recent dev (#93)
Browse files Browse the repository at this point in the history
  • Loading branch information
kaitejohnson authored Jun 26, 2024
1 parent 5e6eb64 commit b3a3f1a
Show file tree
Hide file tree
Showing 26 changed files with 4,000 additions and 65 deletions.
6 changes: 0 additions & 6 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,6 @@ secrets.yaml
# Azure batch configuration
batch_config.toml

# batch
batch/
Makefile
Containerfile
.containerignore

# rendered documents ignored by default
*.html
*.pdf
Expand Down
21 changes: 20 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,26 @@ repos:
hooks:
- id: style-files
- id: lintr
#####

########
# Python
- repo: https://github.com/psf/black
rev: 23.10.0
hooks:
- id: black
args: ['--line-length', '79']
- repo: https://github.com/PyCQA/isort
rev: 5.12.0
hooks:
- id: isort
args: ['--profile', 'black',
'--line-length', '79']
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.0
hooks:
- id: ruff

#########
# Secrets
- repo: https://github.com/Yelp/detect-secrets
rev: v1.4.0
Expand Down
6 changes: 4 additions & 2 deletions _targets_eval.R
Original file line number Diff line number Diff line change
Expand Up @@ -869,7 +869,8 @@ downstream_targets <- list(
name = plot_summarized_scores_w_data,
command = get_plot_scores_w_data(
grouped_submission_scores,
eval_hosp_data
eval_hosp_data,
eval_config$figure_dir
),
pattern = map(grouped_submission_scores),
iteration = "list",
Expand Down Expand Up @@ -900,7 +901,8 @@ downstream_targets <- list(
name = plot_quantile_comparison,
command = get_plot_quantile_comparison(
all_hosp_quantiles,
eval_hosp_data
eval_hosp_data,
eval_config$figure_dir
),
pattern = map(all_hosp_quantiles),
iteration = "list"
Expand Down
195 changes: 165 additions & 30 deletions _targets_eval_postprocessing.R
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,15 @@ upstream_targets <- list(
last_hosp_data_date = eval_config$eval_date,
ww_data_mapping = eval_config$ww_data_mapping
)
),
# Returns a dataframe with each location and date and a corresponding
# epidemic phase
tar_target(
name = epidemic_phases,
command = get_epidemic_phases_from_rt(
locations = unique(eval_config$location_ww),
retro_rt_path = eval_config$retro_rt_path
)
)
)

Expand Down Expand Up @@ -141,14 +150,15 @@ combined_targets <- list(
## Flags------------------------------------------------------------------
tar_target(
name = all_flags_ww,
command = combine_outputs(
output_type = "flags",
scenarios = eval_config$scenario,
forecast_dates = eval_config$forecast_dates,
locations = eval_config$location_ww,
eval_output_subdir = eval_config$output_dir,
model_type = "ww"
)
command =
combine_outputs(
output_type = "flags",
scenarios = eval_config$scenario,
forecast_dates = eval_config$forecast_date_ww,
locations = eval_config$location_ww,
eval_output_subdir = eval_config$output_dir,
model_type = "ww"
)
),
tar_target(
name = all_flags_hosp,
Expand All @@ -161,6 +171,25 @@ combined_targets <- list(
model_type = "hosp"
)
),
tar_target(
name = all_flags,
command = dplyr::bind_rows(all_flags_ww, all_flags_hosp)
),
tar_target(
name = convergence_df_ww,
command = get_convergence_df(
all_flags_ww,
default_scenario = "status_quo"
) |>
dplyr::rename(any_flags_ww = any_flags)
),
tar_target(
name = convergence_df_hosp,
command = get_convergence_df(all_flags_hosp,
default_scenario = "no_wastewater"
) |>
dplyr::rename(any_flags_hosp = any_flags)
),
### Scores from quantiles-------------------------------------------------
tar_target(
name = all_ww_scores_quantiles,
Expand All @@ -169,7 +198,7 @@ combined_targets <- list(
scenarios = eval_config$scenario,
forecast_dates = eval_config$forecast_date_ww,
locations = eval_config$location_ww,
eval_output_subdir = file.path("output", "eval"),
eval_output_subdir = eval_config$output_dir,
model_type = "ww"
)
),
Expand All @@ -180,7 +209,7 @@ combined_targets <- list(
scenarios = "no_wastewater",
forecast_dates = eval_config$forecast_date_hosp,
locations = eval_config$location_hosp,
eval_output_subdir = file.path("output", "eval"),
eval_output_subdir = eval_config$output_dir,
model_type = "hosp"
)
),
Expand All @@ -192,7 +221,7 @@ combined_targets <- list(
scenarios = eval_config$scenario,
forecast_dates = eval_config$forecast_date_ww,
locations = eval_config$location_ww,
eval_output_subdir = file.path("output", "eval"),
eval_output_subdir = eval_config$output_dir,
model_type = "ww"
)
),
Expand All @@ -203,7 +232,7 @@ combined_targets <- list(
scenarios = "no_wastewater",
forecast_dates = eval_config$forecast_date_hosp,
locations = eval_config$location_hosp,
eval_output_subdir = file.path("output", "eval"),
eval_output_subdir = eval_config$output_dir,
model_type = "hosp"
)
),
Expand All @@ -214,7 +243,7 @@ combined_targets <- list(
scenarios = eval_config$scenario,
forecast_dates = eval_config$forecast_date_ww,
locations = eval_config$location_ww,
eval_output_subdir = file.path("output", "eval"),
eval_output_subdir = eval_config$output_dir,
model_type = "ww"
)
),
Expand All @@ -226,7 +255,7 @@ combined_targets <- list(
scenarios = eval_config$scenario,
forecast_dates = eval_config$forecast_date_ww,
locations = eval_config$location_ww,
eval_output_subdir = file.path("output", "eval"),
eval_output_subdir = eval_config$output_dir,
model_type = "ww"
)
),
Expand All @@ -237,15 +266,131 @@ combined_targets <- list(
scenarios = "no_wastewater",
forecast_dates = eval_config$forecast_date_hosp,
locations = eval_config$location_hosp,
eval_output_subdir = file.path("output", "eval"),
eval_output_subdir = eval_config$output_dir,
model_type = "hosp"
)
)
)

# Head-to-head comparison targets-------------------------------------------
# This set of targets will be conditioned on the presence of sufficient
# wastewater, whereas the below targets assume that for every location and
# forecast date we had to submit a forecast, and so we used the hospital
# admissions only model if wastewater was missing.
# These are only relevant for the status quo scenario
head_to_head_targets <- list(
tar_target(
name = all_ww_quantiles_sq,
command = all_ww_quantiles |>
dplyr::filter(scenario == "status_quo")
),
# Get a table of locations and forecast dates with sufficient wastewater
tar_target(
name = table_of_loc_dates_w_ww,
command = get_table_sufficient_ww(all_ww_quantiles)
),
# Get a table indicating whether there are locations and forecast dates with
# convergence issues
tar_target(
name = convergence_df,
command = convergence_df_hosp |>
dplyr::left_join(convergence_df_ww,
by = c("location", "forecast_date")
)
),
# Get the full set of quantiles, filtered down to only states and
# forecast dates with sufficient wastewater for both ww model and hosp only
# model. Then join the convergence df
tar_target(
name = hosp_quantiles_filtered,
command = dplyr::bind_rows(
all_ww_hosp_quantiles,
all_hosp_model_quantiles
) |>
dplyr::left_join(table_of_loc_dates_w_ww,
by = c("location", "forecast_date")
) |>
dplyr::filter(
ww_sufficient # filters to location forecast dates with sufficient ww
) |>
dplyr::left_join(
convergence_df,
by = c(
"location",
"forecast_date"
)
) |>
dplyr::left_join(
epidemic_phases,
by = c(
"location" = "state_abbr",
"date" = "reference_date"
)
)
),
# Do the same thing for the sampled scores, combining ww and hosp under
# the status quo scenario, filtering to the locations and forecast dates
# with sufficient wastewater, and then joining the convergence flags
tar_target(
name = scores_filtered,
command = dplyr::bind_rows(
all_hosp_scores,
all_ww_scores |>
dplyr::filter(scenario == "status_quo")
) |>
dplyr::left_join(table_of_loc_dates_w_ww,
by = c("location", "forecast_date")
) |>
dplyr::filter(ww_sufficient) |>
dplyr::left_join(
convergence_df,
by = c(
"location",
"forecast_date"
)
) |>
dplyr::left_join(
epidemic_phases,
by = c(
"location" = "state_abbr",
"date" = "reference_date"
)
)
),
# Repeat for the quantile-based scores
tar_target(
name = scores_quantiles_filtered,
command = dplyr::bind_rows(
all_hosp_scores_quantiles,
all_ww_scores_quantiles |>
dplyr::filter(scenario == "status_quo")
) |>
dplyr::left_join(table_of_loc_dates_w_ww,
by = c("location", "forecast_date")
) |>
dplyr::filter(
isTRUE(ww_sufficient)
) |>
dplyr::left_join(
convergence_df,
by = c(
"location",
"forecast_date"
)
) |>
dplyr::left_join(
epidemic_phases,
by = c(
"location" = "state_abbr",
"date" = "reference_date"
)
)
)
)


# Head-to-head-scenario targets------------------------------------------------
head_to_head_scenario_targets <- list(
# Scenario targets------------------------------------------------
scenario_targets <- list(
tar_target(
name = all_raw_scores,
command = data.table::as.data.table(
Expand All @@ -262,18 +407,7 @@ head_to_head_scenario_targets <- list(
name = all_errors,
command = dplyr::bind_rows(all_hosp_errors, all_ww_errors)
),
tar_target(
name = all_flags,
command = dplyr::bind_rows(all_flags_ww, all_flags_hosp)
),
tar_target(
name = nonconverge_df,
command = all_flags |> distinct(
location, forecast_date, model, scenario
) |>
dplyr::mutate(convergence = FALSE) # Add a column that indicates did not pass
# convergence. We will use this in the head-to-head comparison to statify by convergence
),


## Raw scores-----------------------------------------
# These are the scores from each scenario and location without buffering
Expand Down Expand Up @@ -653,7 +787,8 @@ hub_comparison_plots <- list(
list(
upstream_targets,
combined_targets,
head_to_head_scenario_targets,
head_to_head_targets,
scenario_targets,
hub_targets,
hub_comparison_plots
)
4 changes: 3 additions & 1 deletion model_definition.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,9 @@ where $\gamma$ is the _infection feedback term_ controlling the strength of the

Following other semi-mechanistic renewal frameworks, we model the _expected_ hospital admissions per capita $H(t)$ as a convolution of the _expected_ latent incident infections per capita $I(t)$, and a discrete infection to hospitalization distribution $d(\tau)$, scaled by the probability of being hospitalized $p_\mathrm{hosp}(t)$.

To account for day-of-week effects in hospital reporting, we use an estimated _weekday effect_ $\omega(t)$. If $t$ and $t'$ are the same day of the week, $\omega(t) = \omega(t')$. The seven values that $\omega(t)$ takes on are constrained to have mean 1.
To account for day-of-week effects in hospital reporting, we use an estimated _weekday effect_ $\omega(t)$. If $t$ and $t'$ are the same day of the week, $\omega(t) = \omega(t')$.
The seven values that $\omega(t)$ takes on are constrained to be non-negative and have a mean of 1.
This allows us to model the possibility that certain days of the week could have systematically high or low admissions reporting while holding the predicted weekly total reported admissions constant (i.e. the predicted weekly total is the same with and without these day-of-week reporting effects).

$$H(t) = \omega(t) p_\mathrm{hosp}(t) \sum_{\tau = 0}^{T_d} d(\tau) I(t-\tau)$$

Expand Down
24 changes: 24 additions & 0 deletions scratch/ar1_example.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
ar1 <- function(mu, ac, sd, z) {
n <- length(z)
x <- rep(0, n)
tvd <- rep(0, n)

tvd[1] <- sd * z[1]
x[1] <- mu[1] + tvd[1]

for (i in 2:n) {
tvd[i] <- ac * tvd[i - 1] + sd * z[i]
x[i] <- mu[i] + tvd[i]
}
return(x)
}

p_hosp_mean <- rep(0.01, 26)
p_hosp_logit <- qlogis(p_hosp_mean)
ac <- 0.01
sd <- 0.3
z <- rnorm(26)

p_hosp_t_logit <- ar1(p_hosp_logit, ac, sd, z)

plot(plogis(p_hosp_t_logit))
Loading

0 comments on commit b3a3f1a

Please sign in to comment.