From 0213cf993f1bebd2234cafd876c1091af73d97d4 Mon Sep 17 00:00:00 2001 From: javierps Date: Thu, 29 Aug 2024 11:57:30 +0200 Subject: [PATCH] more cleanup on final figures script --- Analysis/R/make_final_figures_and_tables.R | 156 +++++++++--------- packages/taxdat/DESCRIPTION | 2 +- packages/taxdat/NAMESPACE | 1 + packages/taxdat/R/output_figures_and_tables.R | 10 +- 4 files changed, 91 insertions(+), 78 deletions(-) diff --git a/Analysis/R/make_final_figures_and_tables.R b/Analysis/R/make_final_figures_and_tables.R index e357599a2..7f262309f 100644 --- a/Analysis/R/make_final_figures_and_tables.R +++ b/Analysis/R/make_final_figures_and_tables.R @@ -963,38 +963,6 @@ if (opt$redo | !file.exists(opt$bundle_filename)) { mai_adm_all <- mai_adm_all %>% inner_join(population %>% select(location_period_id, pop = mean, period)) - # ## Incidence ratios around rivers and lakes ----- - # - # dist_sf <- list("rivers" = rivers_sf, - # "lakes" = lakes_sf, - # "coasts" = coasts_sf, - # "freshwater" = bind_rows( - # rivers_sf, - # lakes_sf - # ), - # "water" = bind_rows( - # rivers_sf, - # lakes_sf, - # coasts_sf - # )) - # - # - # irr_dat <- map_df(seq_along(dist_sf), function(x) { - # map_df(c("within", "category"), function(y) { - # cat("---- ", names(dist_sf)[x], y, "\n") - # - # irr_dist(dist_sf[[x]], - # dist_vec = seq(5e3, 100e3, by = 10e3), - # mai_adm = mai_adm, - # mai_adm_all = mai_adm_all, - # grid_cases = grid_cases, - # afr_sf = afr_sf, - # dist_definition = y, - # by_region = T) %>% - # mutate(what = names(dist_sf)[x]) - # }) - # }) - ## Compute stats for targetting analysis --------------------------------------- make_adm2_results <- function(p) { @@ -1337,7 +1305,7 @@ if (opt$redo | !file.exists(opt$bundle_filename)) { # Figure 1: cases --------------------------------------------------------- -## Figure 1A: gridded cases by time period -------------------------------- +## Figure 1A: gridded cases by time period --------- # Gridded maps of cases p_fig1A <- output_plot_map(sf_obj = grid_cases %>% @@ -1365,7 +1333,7 @@ if (opt$save_panel_figures) { dpi = 300) } -## Figure 1B: cases by region and time period -------------------------------- +## Figure 1B: cases by region and time period --------- # Horizontal barplot of cases by region p_fig1B <- cases_by_region %>% mutate(AFRO_region = factor(AFRO_region, levels = get_AFRO_region_levels()), @@ -1418,12 +1386,13 @@ p_fig1 <- cowplot::plot_grid( rel_heights = c(.35, 1) ) + theme(panel.background = element_rect(fill = "white", color = "white")) -p_fig1_v2 <- ggdraw() + +# Add inset of WHO regions +p_fig1 <- ggdraw() + draw_plot(p_fig1) + draw_plot(p_fig_BC_regions, x = 0.78, y = .77, width = .23, height = .23) # Save -ggsave(plot = p_fig1_v2, +ggsave(plot = p_fig1, filename = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_1.png"), width = 10, height = 7.5, @@ -1687,6 +1656,7 @@ risk_pop_regions <- pop_at_risk_regions %>% mutate(risk_cat = ifelse(risk_cat == ">100","\u2265100",risk_cat)) %>% mutate(risk_cat = factor(risk_cat,levels = c("<1","1-10","10-20","20-50","50-100","\u2265100" ))) +# Figure 3A p_fig3A <- risk_pop_regions %>% ggplot(aes(y = risk_cat, x = mean)) + geom_bar(aes(fill = AFRO_region), stat = "identity", width = .5) + @@ -1767,8 +1737,9 @@ ggsave(plot = p_fig3, dpi = 600) -# Figure 4 ---------------------------------------------------------------- +# Figure 4: 10-year risk categories ------------------------------------- +# Compute 10-year risk categories endemicity_df_50_v2 <- risk_pop_50_adm2 %>% mutate(high_risk = risk_cat %in% get_risk_cat_dict()[3:6], low_risk = risk_cat %in% get_risk_cat_dict()[1]) %>% @@ -1789,28 +1760,8 @@ endemicity_df_50_v2 <- risk_pop_50_adm2 %>% saveRDS(endemicity_df_50_v2, file = str_glue("{opt$output_dir}/endemicity_50.rds")) -endemicity_df_95_v2 <- risk_pop_95_adm2 %>% - mutate(high_risk = risk_cat %in% get_risk_cat_dict()[3:6], - low_risk = risk_cat %in% get_risk_cat_dict()[1]) %>% - group_by(country, location_period_id) %>% - summarise( - endemicity = case_when( - sum(high_risk) == 2 ~ "high-both", - sum(low_risk) == 2 ~ "low-both", - sum(high_risk) == 1 ~ "high-either", - T ~ "mix"), - pop = max(pop) - ) %>% - mutate( - endemicity = factor(endemicity, - levels = c("high-both", "high-either", "mix", "low-both"), - labels = c("sustained high", "history of high", "history of moderate", "sustained low"))) - -saveRDS(endemicity_df_95_v2, file = str_glue("{opt$output_dir}/endemicity_95.rds")) - - -## Figure 4A: Alluvial plot ---- +## Figure 4A: Alluvial plot --------- # Process data for alluvial plot for_alluvial <- risk_pop_50_adm2 %>% as_tibble() %>% @@ -1862,7 +1813,7 @@ p_fig4A <- for_alluvial %>% guides(fill = "none") -## Figure 4B: Map of 10-year categories ---- +## Figure 4B: Map of 10-year categories --------- # Tile for legend hrisk_cat <- taxdat::get_risk_cat_dict()[-c(1:2)] @@ -1908,6 +1859,7 @@ p_fig4B <- endemicity_df_50_v2 %>% panel.background = element_rect(fill = "white", color = "white")) + guides(fill = guide_legend("10-year incidence\ncategory")) + # Add legend to map p_fig4B_legend <- ggdraw( p_fig4B + @@ -1929,7 +1881,7 @@ if (opt$save_panel_figures) { } -## Assemble Figure 4 ---- +## Assemble Figure 4 --------- p_fig4 <- plot_grid( p_fig4A + @@ -1961,8 +1913,7 @@ load(str_c(opt$output_dir, "/recent_cholera_outbreaks_res.rdata")) final_joins <- final_joins %>% mutate(admin_level = str_c("ADM", admin_level)) - -## Figure 5A: Map of cholera occurrence locations ---- +## Figure 5A: Map of cholera occurrence locations --------- p_ob_map <- endemicity_df_50_v2 %>% inner_join(u_space_sf, .) %>% select(-admin_level) %>% @@ -2003,7 +1954,7 @@ if (opt$save_panel_figures) { dpi = 100) } -## Figure 5A: Distribution of 10-year risk categories among ADM2 locations ---- +## Figure 5A: Distribution of 10-year risk categories among ADM2 locations --------- ob_count_dat <- obs_outbreaks %>% mutate(endemicity = stringr::str_remove_all(endemicity, " risk")) %>% mutate(occurrence = " cholera\n observed") %>% @@ -2033,7 +1984,7 @@ p_frac_overall <- ob_count_dat %>% coord_flip() -## Figure 5B: Cholera occurrence model estimates ---- +## Figure 5B: Cholera occurrence model estimates --------- pd1 <- position_dodge(.4) pd2 <- position_dodge(.3) @@ -2105,7 +2056,7 @@ p_or <- plot_grid( theme(plot.margin = unit(c(1, 3, 1, 3), units = "lines")) -## Assemble Figure 5 ---- +## Assemble Figure 5 --------- p_fig5 <- plot_grid( plot_grid( @@ -2132,11 +2083,11 @@ ggsave(plot = p_fig5, dpi = 300) - # Figure 6: targetting ---------------------------------------------------- - +# Compare strategies to target populations target_pop_levels <- c(1e7, seq(5e7, 4e8, by = 5e7)) +# Compute population covered by interventions pop_frac_sel_2016_2020 <- map_df(target_pop_levels, function(x) { print(x) cumul_pop_frac_stats_2016_2020 %>% @@ -2148,6 +2099,7 @@ pop_frac_sel_2016_2020 <- map_df(target_pop_levels, function(x) { }) }) +# Compute fraction of population covered by interventions case_frac_sel_2016_2020 <- map_df(target_pop_levels, function(x) { print(x) cumul_case_frac_stats_2016_2020 %>% @@ -2159,6 +2111,7 @@ case_frac_sel_2016_2020 <- map_df(target_pop_levels, function(x) { }) }) +# Compute fraction of population covered by interventions case_frac_sel_2011_2015 <- map_df(target_pop_levels, function(x) { print(x) cumul_case_frac_stats_2011_2015 %>% @@ -2171,14 +2124,6 @@ case_frac_sel_2011_2015 <- map_df(target_pop_levels, function(x) { }) # Plot -colors_ranking <- function() { - c("2011-2015" = "purple", - "2016-2020" = "orange", - "2011-2020" = "darkgreen", - "2022-2023"= "darkgray", - "optimal"= "gray") -} - pd <- position_dodge(width = .8, preserve = "single") data_for_figure6_2016_2020 <- bind_rows( @@ -2203,6 +2148,7 @@ data_for_figure6_2016_2020 <- bind_rows( leg_title <- "Period used for\ntargeting" pd <- position_dodge(width = 2.5e7, preserve = "single") +# Bar plots of coverage p_targets_v2_2016_2020 <- data_for_figure6_2016_2020 %>% mutate(ranking = case_when(ranking == "optimal" ~ "2022-2023", T ~ ranking) %>% @@ -2257,6 +2203,7 @@ p_targets_v2_2016_2020 <- data_for_figure6_2016_2020 %>% ggpattern::scale_pattern_manual(values = c(oracle = "stripe", prospective = "none")) +# Arrow labels data_arrows_figure6_v2_2016_2020 <- data_for_figure6_2016_2020 %>% filter(target_pop == 1e8, str_detect(what, "cases")) %>% @@ -2280,6 +2227,7 @@ data_arrows_figure6_v2_2016_2020 <- data_for_figure6_2016_2020 %>% str_glue("unreached {what_label[1]}"))) %>% slice(1) +## Assemble Figure 6 ---------- p_targets2_2016_2020 <- p_targets_v2_2016_2020 + theme(legend.background = element_blank()) + geom_segment( @@ -2302,13 +2250,15 @@ p_targets2_2016_2020 <- p_targets_v2_2016_2020 + geom_hline(aes(yintercept = 1), color = "darkgray", lty = 2, lwd = .6) +# Save ggsave(p_targets2_2016_2020, filename = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_6.png"), width = 12, height = 5.5, dpi = 300) -# Supplementary figures --------------------------------------------------- +# Supplementary figures --------------------------------------------------- + ## Model fit ---- # Scatter plot of ADM0 level units # Scatter plot of all admin units for mean of observations @@ -2498,6 +2448,27 @@ ggsave(plot = p_fig3_95, dpi = 600) ### Figure 4 supplement figures (95% cutoff) ---- + +endemicity_df_95_v2 <- risk_pop_95_adm2 %>% + mutate(high_risk = risk_cat %in% get_risk_cat_dict()[3:6], + low_risk = risk_cat %in% get_risk_cat_dict()[1]) %>% + group_by(country, location_period_id) %>% + summarise( + endemicity = case_when( + sum(high_risk) == 2 ~ "high-both", + sum(low_risk) == 2 ~ "low-both", + sum(high_risk) == 1 ~ "high-either", + T ~ "mix"), + pop = max(pop) + ) %>% + mutate( + endemicity = factor(endemicity, + levels = c("high-both", "high-either", "mix", "low-both"), + labels = c("sustained high", "history of high", "history of moderate", "sustained low"))) + +saveRDS(endemicity_df_95_v2, file = str_glue("{opt$output_dir}/endemicity_95.rds")) + + p_fig4C_95 <- endemicity_df_95_v2 %>% inner_join(u_space_sf, .) %>% output_plot_map(sf_obj = ., @@ -2952,7 +2923,40 @@ p_targets_v2_2011_2015_supp <- data_for_figure6_2011_2015 %>% ggsave(p_targets_v2_2011_2015_supp, filename = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_6_supp.png"), width = 7.5, height = 5.5, dpi = 300) + + # Scraps ------------------------------------------------------------------ +# Incidence ratios around rivers and lakes +# +# dist_sf <- list("rivers" = rivers_sf, +# "lakes" = lakes_sf, +# "coasts" = coasts_sf, +# "freshwater" = bind_rows( +# rivers_sf, +# lakes_sf +# ), +# "water" = bind_rows( +# rivers_sf, +# lakes_sf, +# coasts_sf +# )) +# +# +# irr_dat <- map_df(seq_along(dist_sf), function(x) { +# map_df(c("within", "category"), function(y) { +# cat("---- ", names(dist_sf)[x], y, "\n") +# +# irr_dist(dist_sf[[x]], +# dist_vec = seq(5e3, 100e3, by = 10e3), +# mai_adm = mai_adm, +# mai_adm_all = mai_adm_all, +# grid_cases = grid_cases, +# afr_sf = afr_sf, +# dist_definition = y, +# by_region = T) %>% +# mutate(what = names(dist_sf)[x]) +# }) +# }) # compute_irr <- function(target_cells_sf, # grid_cases, # grid_rates, diff --git a/packages/taxdat/DESCRIPTION b/packages/taxdat/DESCRIPTION index 2e5809f59..a50603932 100644 --- a/packages/taxdat/DESCRIPTION +++ b/packages/taxdat/DESCRIPTION @@ -10,7 +10,7 @@ Encoding: UTF-8 License: GPL (>= 2) Depends: R (>= 3.4),ISOcodes,magrittr, dplyr, tidyr, igraph, geodata Suggests: readxl, sf, sp, raster, rgdal, testthat, knitr, rmarkdown -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 Collate: 'plot_cache_function.R' 'get_genquant.R' diff --git a/packages/taxdat/NAMESPACE b/packages/taxdat/NAMESPACE index f5b98ce60..19a2294e7 100644 --- a/packages/taxdat/NAMESPACE +++ b/packages/taxdat/NAMESPACE @@ -110,6 +110,7 @@ export(close_parallel_setup) export(collapse_grid) export(color_scale) export(colors_afro_regions) +export(colors_ranking) export(compute_adjustment_UN_population) export(compute_cumul_proportion_thresh) export(compute_mean_rate) diff --git a/packages/taxdat/R/output_figures_and_tables.R b/packages/taxdat/R/output_figures_and_tables.R index 25dc21dae..b11c7c7df 100644 --- a/packages/taxdat/R/output_figures_and_tables.R +++ b/packages/taxdat/R/output_figures_and_tables.R @@ -24,7 +24,6 @@ colors_endemicity <- function(){ c("#993404", "#D95F0E", "#FED98E", "gray") } - colors_periods <- function(){c("purple", "orange")} colors_risk_categories <- function() { @@ -41,6 +40,15 @@ colors_afro_regions <- function(){ colors } + +#' @export +colors_ranking <- function() { + c("2011-2015" = "purple", + "2016-2020" = "orange", + "2011-2020" = "darkgreen", + "2022-2023"= "darkgray", + "optimal"= "gray") +} # Figure functions -------------------------------------------------------- #' output_plot_map