Skip to content

Commit

Permalink
Updates before WA runthrough
Browse files Browse the repository at this point in the history
  • Loading branch information
arranhamlet committed Nov 12, 2024
1 parent 7afa768 commit e5b581e
Show file tree
Hide file tree
Showing 8 changed files with 363 additions and 66 deletions.
2 changes: 1 addition & 1 deletion R/1_assess_nonspatial_state.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ pop_data <- import(here("data", "WA", "simulated_catchment_populations.csv"))
#Compile model - have to specify a specific location for windows computers where there is no space (i.e. no /Program Files/)
#Added the argument update_files so that if the prior version of the package is different to the current, it does a fresh
#move of the files. The stan files can be different between package versions.
compiled_model <- compile_model_upd(update_files = prior != current)
compiled_model <- compile_model_upd(update_files = T)

#Specify fit options
fit_this <- get_mcmc_options(
Expand Down
2 changes: 1 addition & 1 deletion R/3_assess_spatial_state.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ pop_data <- import(here("data", "WA", "simulated_catchment_populations.csv"))
facility_distance <- import(here("data", "WA", "simulated_facility_distances.csv"))

#Compile model - have to specify a specific location for windows computers where there is no space (i.e. no /Program Files/)
compiled_model <- compile_model_upd(update_files = prior != current)
compiled_model <- compile_model_upd(update_files = T)

#Specify fit options
fit_this <- get_mcmc_options(
Expand Down
114 changes: 92 additions & 22 deletions R/functions/WA_nonspatial_run.R
Original file line number Diff line number Diff line change
Expand Up @@ -154,18 +154,53 @@ WA_nonspatial_run <- function(
)

# Create figures and score models -----------------------------------------
hospitalization_forecasts_object <- processed_outputs[[1]] %>%
rename(
observed = true_value,
predicted = prediction,
sample_id = sample
) %>%
as_forecast_sample(
forecast_unit = c(
"date", "model", "forecast_or_fit"
)
)

wastewater_forecasts_object <- processed_outputs[[4]] %>%
rename(
observed = true_value,
predicted = prediction,
sample_id = sample
) %>%
as_forecast_sample(
forecast_unit = c(
"date", "model", "forecast_or_fit", "site"
)
)

attr(hospitalization_forecasts_object, "metrics") <- c(
"bias", "dss", "crps", "overprediction",
"underprediction", "dispersion", "log_score",
"mad", "ae_median", "se_mean"
)

attr(wastewater_forecasts_object, "metrics") <- c(
"bias", "dss", "crps", "overprediction",
"underprediction", "dispersion", "log_score",
"mad", "ae_median", "se_mean"
)


#Score models on hospitalization forecasting
hospitalization_forecasts <- processed_outputs[[1]] %>%
score(metrics = c("mad", "bias", "dss", "crps", "ae_median", "se_mean", "log_score")) %>%
hospitalization_forecasts <- hospitalization_forecasts_object %>%
score() %>%
summarise_scores(by = c("model",
"forecast_or_fit"))

#Score model on wastewater forecasting
wastewater_forecasts <- processed_outputs[[4]] %>%
drop_na(true_value) %>%
score(metrics = c("mad", "bias", "dss", "crps", "ae_median", "se_mean", "log_score")) %>%
summarise_scores(by = "forecast_or_fit")
wastewater_forecasts <- wastewater_forecasts_object %>%
score() %>%
summarise_scores(by = c("model", "forecast_or_fit"))

#Plot hospitalization forecast
hospitalization_forecast <- ggplot(
Expand Down Expand Up @@ -289,7 +324,7 @@ WA_nonspatial_run <- function(
run = x
),
file = paste0(folder_full, "run_", x, "_of_", length(random_dates), "_hosp_rawpredictions.csv"),
encoding = "gzip")
compress = "gzip")

#Output full raw predictions for wastewater data - using fwrite and gzip compression to reduce filesize by 100x
#however it means that you cant open the file directly in excel
Expand All @@ -298,7 +333,7 @@ WA_nonspatial_run <- function(
run = x
),
file = paste0(folder_full, "run_", x, "_of_", length(random_dates), "_ww_rawpredictions.csv"),
encoding = "gzip")
compress = "gzip")

#Output model scores
write.csv(x = hospitalization_forecasts %>%
Expand Down Expand Up @@ -359,23 +394,58 @@ WA_nonspatial_run <- function(
}, simplify = FALSE))

#Model score
model_score_raw_hosp <- all_rawpred_hosp %>%
score(metrics = c("mad", "bias", "dss", "crps", "ae_median", "se_mean", "log_score")) %>%
#Model score
all_rawpred_hosp_object <- all_rawpred_hosp %>%
rename(
observed = true_value,
predicted = prediction,
sample_id = sample
) %>%
as_forecast_sample(
forecast_unit = c(
"date", "model", "forecast_or_fit", "run"
)
)

attr(all_rawpred_hosp_object, "metrics") <- c(
"bias", "dss", "crps", "overprediction",
"underprediction", "dispersion", "log_score",
"mad", "ae_median", "se_mean"
)

model_score_raw_hosp <- all_rawpred_hosp_object %>%
score() %>%
summarise_scores(by = c("model", "forecast_or_fit")) %>%
rbind(all_rawpred_hosp %>%
score() %>%
summarise_scores(by = "model") %>%
mutate(forecast_or_fit = "All")) %>%
arrange(forecast_or_fit, model)

model_score_raw_ww <- all_rawpred_ww %>%
score(metrics = c("mad", "bias", "dss", "crps", "ae_median", "se_mean", "log_score")) %>%
summarise_scores(by = c("site", "forecast_or_fit")) %>%
rbind(all_rawpred_ww %>%
score(metrics = c("mad", "bias", "dss", "crps", "ae_median", "se_mean", "log_score")) %>%
summarise_scores(by = "site") %>%
mutate(forecast_or_fit = "All")) %>%
arrange(forecast_or_fit, site)
#Now for wastewater
all_rawpred_ww_object <- all_rawpred_ww %>%
rename(
observed = true_value,
predicted = prediction,
sample_id = sample
) %>%
as_forecast_sample(
forecast_unit = c(
"date", "model", "forecast_or_fit", "run", "site"
)
)

model_score_raw_ww <- all_rawpred_ww_object %>%
score() %>%
summarise_scores(by = c("site", "forecast_or_fit", "model")) %>%
arrange(forecast_or_fit, site, model)

model_score_raw_all <- all_rawpred_ww_object %>%
score() %>%
summarise_scores(by = c("forecast_or_fit", "model")) %>%
mutate(site = "All") %>%
arrange(forecast_or_fit, site, model)

model_score_raw_ww <- rbind(
model_score_raw_ww,
model_score_raw_all
)

#What is the order of runs in time
order_of_runs_date <- data.frame(run = all_rawpred_hosp$run, date = all_rawpred_hosp$date) %>%
Expand Down
120 changes: 97 additions & 23 deletions R/functions/WA_spatial_run.R
Original file line number Diff line number Diff line change
Expand Up @@ -197,17 +197,52 @@ WA_spatial_run <- function(
)

# Create figures and score models -----------------------------------------
hospitalization_forecasts_object <- processed_outputs[[1]] %>%
rename(
observed = true_value,
predicted = prediction,
sample_id = sample
) %>%
as_forecast_sample(
forecast_unit = c(
"date", "model", "forecast_or_fit"
)
)

wastewater_forecasts_object <- processed_outputs[[4]] %>%
rename(
observed = true_value,
predicted = prediction,
sample_id = sample
) %>%
as_forecast_sample(
forecast_unit = c(
"date", "model", "forecast_or_fit", "site"
)
)

attr(hospitalization_forecasts_object, "metrics") <- c(
"bias", "dss", "crps", "overprediction",
"underprediction", "dispersion", "log_score",
"mad", "ae_median", "se_mean"
)

attr(wastewater_forecasts_object, "metrics") <- c(
"bias", "dss", "crps", "overprediction",
"underprediction", "dispersion", "log_score",
"mad", "ae_median", "se_mean"
)


#Score models on hospitalization forecasting
hospitalization_forecasts <- processed_outputs[[1]] %>%
score(metrics = c("mad", "bias", "dss", "crps", "ae_median", "se_mean", "log_score")) %>%
hospitalization_forecasts <- hospitalization_forecasts_object %>%
score() %>%
summarise_scores(by = c("model",
"forecast_or_fit"))

#Score model on wastewater forecasting
wastewater_forecasts <- processed_outputs[[4]] %>%
drop_na(true_value) %>%
score(metrics = c("mad", "bias", "dss", "crps", "ae_median", "se_mean", "log_score")) %>%
wastewater_forecasts <- wastewater_forecasts_object %>%
score() %>%
summarise_scores(by = c("model", "forecast_or_fit"))

#Plot hospitalization forecast
Expand Down Expand Up @@ -443,24 +478,58 @@ WA_spatial_run <- function(
}, simplify = FALSE))

#Model score
model_score_raw_hosp <- all_rawpred_hosp %>%
score(metrics = c("mad", "bias", "dss", "crps", "ae_median", "se_mean", "log_score")) %>%
all_rawpred_hosp_object <- all_rawpred_hosp %>%
rename(
observed = true_value,
predicted = prediction,
sample_id = sample
) %>%
as_forecast_sample(
forecast_unit = c(
"date", "model", "forecast_or_fit", "run"
)
)

attr(all_rawpred_hosp_object, "metrics") <- c(
"bias", "dss", "crps", "overprediction",
"underprediction", "dispersion", "log_score",
"mad", "ae_median", "se_mean"
)

model_score_raw_hosp <- all_rawpred_hosp_object %>%
score() %>%
summarise_scores(by = c("model", "forecast_or_fit")) %>%
rbind(all_rawpred_hosp %>%
score() %>%
summarise_scores(by = "model") %>%
mutate(forecast_or_fit = "All")) %>%
arrange(forecast_or_fit, model)

model_score_raw_ww <- all_rawpred_ww %>%
score(metrics = c("mad", "bias", "dss", "crps", "ae_median", "se_mean", "log_score")) %>%
#Now for wastewater
all_rawpred_ww_object <- all_rawpred_ww %>%
rename(
observed = true_value,
predicted = prediction,
sample_id = sample
) %>%
as_forecast_sample(
forecast_unit = c(
"date", "model", "forecast_or_fit", "run", "site"
)
)

model_score_raw_ww <- all_rawpred_ww_object %>%
score() %>%
summarise_scores(by = c("site", "forecast_or_fit", "model")) %>%
rbind(all_rawpred_ww %>%
score(metrics = c("mad", "bias", "dss", "crps", "ae_median", "se_mean", "log_score")) %>%
summarise_scores(by = c("site", "model")) %>%
mutate(forecast_or_fit = "All")) %>%
arrange(forecast_or_fit, site, model)

model_score_raw_all <- all_rawpred_ww_object %>%
score() %>%
summarise_scores(by = c("forecast_or_fit", "model")) %>%
mutate(site = "All") %>%
arrange(forecast_or_fit, site, model)

model_score_raw_ww <- rbind(
model_score_raw_ww,
model_score_raw_all
)

#What is the order of runs in time
order_of_runs_date <- data.frame(run = all_rawpred_hosp$run, date = all_rawpred_hosp$date) %>%
group_by(run) %>%
Expand Down Expand Up @@ -542,15 +611,15 @@ WA_spatial_run <- function(

#Wastewater improvement
ww_score_gathered <- model_score_raw_ww %>%
gather(mad:se_mean,
gather(bias:se_mean,
key = "metric",
value = "value") %>%
group_by(site, forecast_or_fit, metric) %>%
mutate(no_correlation_value = value[model == "No correlation"])

ww_score_gathered_format <- ww_score_gathered %>%
group_by(site, metric, forecast_or_fit, model) %>%
mutate(difference = 100 * (no_correlation_value - value)/value) %>%
mutate(difference = 100 * (abs(no_correlation_value) - abs(value))/abs(value)) %>%
filter(model != "No correlation") %>%
mutate(metric_full = case_when(
metric == "mad" ~ "Mean absolute deviation",
Expand All @@ -559,17 +628,21 @@ WA_spatial_run <- function(
metric == "crps" ~ "Ranked Probability score",
metric == "ae_median" ~ "Absolute error (median)",
metric == "se_mean" ~ "Standard error (mean)",
metric == "log_score" ~ "Log score"
metric == "log_score" ~ "Log score",
metric == "overprediction" ~ "Overprediction",
metric == "underprediction" ~ "Underprediction",
metric == "dispersion" ~ "Dispersion"
)) %>%
mutate(
site = factor(site, levels = sort(as.numeric(unique(ww_score_gathered$site)))),
site = factor(site, levels = c("All", sort(as.numeric(unique(ww_score_gathered$site))))),
metric_model = paste(metric, model, sep = " - ")
)

#Generate overall plot
metric_better_plot <- ggplot(
data = ww_score_gathered_format %>%
filter(forecast_or_fit == "Forecast"),
filter(forecast_or_fit == "Forecast" &
!metric %in% c("overprediction", "underprediction")),
mapping = aes(
x = site,
y = metric_full,
Expand Down Expand Up @@ -598,7 +671,8 @@ WA_spatial_run <- function(

#Output
ggsave(paste0(folder_summary, "all_hospitalization_forecasts.jpg"), hospitalization_forecast, width = 10, height = 6)
ggsave(paste0(folder_summary, "spatial_score_change.jpg"), metric_better_plot, width = 10, height = 10)

ggsave(paste0(folder_summary, "spatial_score_change.jpg"), metric_better_plot, width = 10, height = 7)

openxlsx::write.xlsx(x = as_tibble(model_score_raw_hosp),
file = paste0(folder_summary, "scoring_hospitalization.xlsx"))
Expand Down
8 changes: 5 additions & 3 deletions R/functions/compile_model_upd.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ compile_model_upd <- function(custom_location = F, update_files = F){
#Put back together above the portion with a
new_folder <- paste0(if(custom_location == T) custom_location else reconstructed_path, "/wastewater_stan_files/")

#Create directory
if(!dir.exists(new_folder)) dir.create(new_folder)

#Delete directory if it exists
unlink(new_folder, recursive = TRUE)
#Recreate directory
dir.create(new_folder)

#List files to move
these_files_to_move <- list.files(dirname(stan_location), full.names = T)

Expand Down
18 changes: 2 additions & 16 deletions R/functions/summarize_outputs.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,7 @@ summarize_outputs <- function(
if(does_ww_data_exist){

#Set up wastewater dataset
ww_raw_data <- if(packageDescription("wwinference")$GithubRef == "spatial-main"){
get_draws_df(model_list[[x]]) %>% filter(name == "predicted wastewater") %>%
rename(
site = lab
)
} else {
get_draws(model_list[[x]])$predicted_ww
}
ww_raw_data <- get_draws(model_list[[x]])$predicted_ww

ww_draws <- ww_raw_data %>%
rename(
Expand Down Expand Up @@ -76,14 +69,7 @@ summarize_outputs <- function(
)

#Get model draws
hosp_raw_data <- if(packageDescription("wwinference")$GithubRef == "spatial-main"){
get_draws_df(model_list[[x]]) %>% filter(name == "predicted counts") %>%
rename(
site = lab
)
} else {
get_draws(model_list[[x]])$predicted_counts
}
hosp_raw_data <- get_draws(model_list[[x]])$predicted_counts

draws <- hosp_raw_data %>%
mutate(
Expand Down
Loading

0 comments on commit e5b581e

Please sign in to comment.