From ac54d9e2f2eebcf4e9a963a2d7ab46138ecb463f Mon Sep 17 00:00:00 2001
From: Qulu <44678888+QLLZ@users.noreply.github.com>
Date: Wed, 10 Jul 2024 09:53:34 -0400
Subject: [PATCH 1/5] updated figure code

---
 Analysis/R/make_final_figures_and_tables.R   | 7 ++++---
 Analysis/output/mapping_manuscript_stats.Rmd | 8 +++++++-
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/Analysis/R/make_final_figures_and_tables.R b/Analysis/R/make_final_figures_and_tables.R
index d29fe04bd..9a3417a06 100644
--- a/Analysis/R/make_final_figures_and_tables.R
+++ b/Analysis/R/make_final_figures_and_tables.R
@@ -1519,7 +1519,9 @@ strip <- ggh4x::strip_themed(
 p_fig2A_arrows <- plot_grid(
   # SSA
   make_dotlineplot(dat_for_incid_dotplot %>% 
-                     filter(country == "SSA")) +
+                     filter(country == "SSA") %>% 
+  mutate(country = case_when(country == "SSA"~"Africa",
+                             .default = country))) +
     theme(axis.title.x = element_blank(),
           axis.text.x = element_blank(),
           axis.ticks.x = element_blank()) +
@@ -1813,7 +1815,6 @@ ggsave(plot = p_fig2,
        height = 9,
        dpi = 600)
 
-
 # Figure 3: population at risk --------------------------------------------
 
 ## Fig. 3A: People per risk category --------
@@ -3073,7 +3074,7 @@ p_targets_v2_2016_2020 <- data_for_figure6_2016_2020 %>%
   scale_fill_manual(values = colors_ranking()) +
   scale_color_manual(values = colors_ranking()) +
   labs(x = "Population targeted (out of total of 1.1 billion in 2020)",
-       y = "Proportion reached of ...") +
+       y = "Proportion reached") +
   scale_y_continuous(breaks = c(0, .25, .5, .75, 1), labels = c("0%", "25%", "50%", "75%", "100%")) +
   scale_x_continuous(breaks = c(10, seq(50, 400, by = 50))*1e6, 
                      labels = function(x) str_c(formatC(x*1e-6, format = "f", digits = 0), "M")) +
diff --git a/Analysis/output/mapping_manuscript_stats.Rmd b/Analysis/output/mapping_manuscript_stats.Rmd
index 0a14233b8..eaf2375b9 100644
--- a/Analysis/output/mapping_manuscript_stats.Rmd
+++ b/Analysis/output/mapping_manuscript_stats.Rmd
@@ -934,7 +934,7 @@ by=c("country",'admin_level','location_period_id',"shp_id")) %>%
 
 
 ##Continent-wide mean annual incidence (figure 2A)
-load(paste0(params$output_directory,"/data_bundle_for_figures_test.rdata"))
+load("/home/cholerapipelinerestmp/postprocess/processed_outputs/data_bundle_for_figures_test.rdata")
 combined_mai_changes <- combined_mai_changes %>% filter(country == "SSA")
 print(paste0("The continent-wide mean annual incidence rate (2011-2015:", round(combined_mai_changes$`2011-2015`*10^5,2)," per 100,000 people; 2016-2020: ", round(combined_mai_changes$`2016-2020`*10^5,2), " per 100,000 people) remained steady across both periods, hovering just above 10 cases per 100,000 population (Fig 2A). "))
 
@@ -1550,4 +1550,10 @@ pop_frac_sel_2016_2020 %>%
   kableExtra::kbl(caption = "Table 32. Population living in ADM2 units with cholera occurrence in 2022-2023 reached by different strategies") %>% 
   kableExtra::kable_paper(full_width = F)
 
+```
+```{r additional text about cholera occurrence in 2022-2023, echo=FALSE,message=FALSE}
+getwd()
+load("C:/Users/zheng/Cholera/dev_postprocess/2024_06_05/outbreak_analysis_data.rdata")
+print(paste0("The distinct countries in which we have cholera occurrence in 2022-2023: ",
+            dat %>% filter(in_outbreak) %>% distinct(country)))
 ```
\ No newline at end of file

From c5a5ac3d26dd48caad62a29ba11c30089a0dccad Mon Sep 17 00:00:00 2001
From: javierps <javier.perezsaez@gmail.com>
Date: Mon, 5 Aug 2024 13:15:02 +0200
Subject: [PATCH 2/5] update code for figures

---
 Analysis/R/make_final_figures_and_tables.R                   | 2 +-
 Analysis/Stan/outbreak_analysis_multilevel_hierarchical.stan | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/Analysis/R/make_final_figures_and_tables.R b/Analysis/R/make_final_figures_and_tables.R
index 52c661d7a..e3a713c9d 100644
--- a/Analysis/R/make_final_figures_and_tables.R
+++ b/Analysis/R/make_final_figures_and_tables.R
@@ -1797,7 +1797,7 @@ ggsave(p_fig2B,
 
 p_fig2 <- plot_grid(
   p_fig2A +
-    theme(plot.margin = unit(c(1, 0, 1, 1), "lines")),
+    theme(plot.margin = unit(c(1, 1, 1, 1), "lines")),
   p_fig2B,
   ncol = 2,
   nrow = 1,
diff --git a/Analysis/Stan/outbreak_analysis_multilevel_hierarchical.stan b/Analysis/Stan/outbreak_analysis_multilevel_hierarchical.stan
index f1acda2f1..8453bd3e5 100644
--- a/Analysis/Stan/outbreak_analysis_multilevel_hierarchical.stan
+++ b/Analysis/Stan/outbreak_analysis_multilevel_hierarchical.stan
@@ -126,7 +126,9 @@ model {
   
   // Priors
   // regression coefficients
-  mu_beta ~ normal(0, 2);    
+  mu_beta[1] ~ normal(0, 2);
+  mu_beta[2:M] ~ normal(0, 1);
+  
   sd_beta ~ normal(0, 2.5);
   for (u in 1:U) {
     r_mu_beta_tilde[u, ] ~ std_normal();
@@ -181,6 +183,7 @@ generated quantities {
     
     pred_prob = exp(log_eta);
     pred_obs_prob_adm2 = pred_prob[ind_obs_adm2];
+    pred_prob_true = exp(log_prob);
     
     for (i in 1:N_obs_other_adm) {
       real x = 0;

From c62dde0347cc67612bfa1f99038578efd76b21b5 Mon Sep 17 00:00:00 2001
From: Qulu <44678888+QLLZ@users.noreply.github.com>
Date: Mon, 5 Aug 2024 07:41:31 -0400
Subject: [PATCH 3/5] Update mapping_manuscript_stats.Rmd

removed some unnecessary code
---
 Analysis/output/mapping_manuscript_stats.Rmd | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Analysis/output/mapping_manuscript_stats.Rmd b/Analysis/output/mapping_manuscript_stats.Rmd
index eaf2375b9..629988b9e 100644
--- a/Analysis/output/mapping_manuscript_stats.Rmd
+++ b/Analysis/output/mapping_manuscript_stats.Rmd
@@ -1552,8 +1552,6 @@ pop_frac_sel_2016_2020 %>%
 
 ```
 ```{r additional text about cholera occurrence in 2022-2023, echo=FALSE,message=FALSE}
-getwd()
-load("C:/Users/zheng/Cholera/dev_postprocess/2024_06_05/outbreak_analysis_data.rdata")
 print(paste0("The distinct countries in which we have cholera occurrence in 2022-2023: ",
             dat %>% filter(in_outbreak) %>% distinct(country)))
 ```
\ No newline at end of file

From d76daf950012dbc004e981264293579ed680321e Mon Sep 17 00:00:00 2001
From: javierps <javier.perezsaez@gmail.com>
Date: Tue, 6 Aug 2024 16:46:56 +0200
Subject: [PATCH 4/5] tidy script for final figures

---
 Analysis/R/make_final_figures_and_tables.R | 1996 ++++++++++----------
 1 file changed, 1003 insertions(+), 993 deletions(-)

diff --git a/Analysis/R/make_final_figures_and_tables.R b/Analysis/R/make_final_figures_and_tables.R
index 6f2650351..e357599a2 100644
--- a/Analysis/R/make_final_figures_and_tables.R
+++ b/Analysis/R/make_final_figures_and_tables.R
@@ -36,7 +36,10 @@ opt_list <- list(
               help = "Data bundle to avoid re-processing"),
   make_option(opt_str = c("-r", "--redo"), type = "logical",
               default = FALSE, 
-              help = "Redo re-processing")
+              help = "Redo re-processing"),
+  make_option(opt_str = c("-s", "--save_panel_figures"), type = "logical",
+              default = FALSE, 
+              help = "Save each panel figure as a separate file")
 )
 
 opt <- parse_args(OptionParser(option_list = opt_list))
@@ -72,6 +75,15 @@ combine_period_output <- function(prefix_list,
   res
 }
 
+#' compute_rate_changes
+#' Helper to compute the change in incidence reates between periods
+#' 
+#' @param df 
+#'
+#' @return
+#' @export
+#'
+#' @examples
 compute_rate_changes <- function(df) {
   df %>% 
     st_drop_geometry() %>% 
@@ -83,7 +95,16 @@ compute_rate_changes <- function(df) {
            rate_diff = `2016-2020` - `2011-2015`)
 }
 
-# Compute change statistics (could package into function)
+#' merge_ratio_draws
+#' Helper to compute the ratio of incidence rates across HMC draws
+#' @param df1 
+#' @param df2 
+#' @param unit_col 
+#'
+#' @return
+#' @export
+#'
+#' @examples
 merge_ratio_draws <- function(df1, 
                               df2,
                               unit_col = "location_period_id") {
@@ -127,6 +148,16 @@ merge_ratio_draws <- function(df1,
 }
 
 
+#' make_adm_case_table
+#' Make a table of cases by admin level
+#' 
+#' @param mai_adm_cases 
+#' @param admin_levels 
+#'
+#' @return
+#' @export
+#'
+#' @examples
 make_adm_case_table <- function(mai_adm_cases,
                                 admin_levels = "ADM0") {
   
@@ -165,6 +196,15 @@ make_adm_case_table <- function(mai_adm_cases,
   }
 }
 
+#' save_table_to_docx
+#'
+#' @param tbl 
+#' @param output_path 
+#'
+#' @return
+#' @export
+#'
+#' @examples
 save_table_to_docx <- function(tbl, output_path) {
   tbl  %>% 
     flextable::as_flextable( max_row = Inf) %>%
@@ -173,6 +213,18 @@ save_table_to_docx <- function(tbl, output_path) {
 }
 
 
+#' get_mean_rate
+#' Compute the mean incidence rate 
+#'
+#' @param mai_adm_all 
+#' @param this_unit 
+#' @param this_period 
+#' @param by_region 
+#'
+#' @return
+#' @export
+#'
+#' @examples
 get_mean_rate <- function(mai_adm_all,
                           this_unit,
                           this_period,
@@ -198,221 +250,40 @@ get_mean_rate <- function(mai_adm_all,
   mean_rate
 }
 
-compute_irr <- function(target_cells_sf, 
-                        grid_cases,
-                        grid_rates,
-                        mean_rate) {
-  
-  # Get population of these cells
-  target_cells <- grid_cases %>% 
-    st_drop_geometry() %>% 
-    select(rid, x, y, cases = mean) %>% 
-    inner_join(
-      grid_rates %>% 
-        st_drop_geometry() %>% 
-        select(rid, x, y, rate = mean),
-      by = c("rid", "x", "y")
-    ) %>% 
-    mutate(pop = cases/rate) %>% 
-    inner_join(target_cells_sf %>% 
-                 st_drop_geometry(),
-               by = c("rid", "x", "y"))
-  
-  
-  # Compute expected cases
-  stats <- target_cells %>% 
-    mutate(mean_rate = mean_rate,
-           expected_cases = pop * mean_rate) %>% 
-    summarise(observed = sum(mean),
-              expected = sum(expected_cases),
-              ratio = observed/expected)
-  
-  stats
-}
-
 
-#' Title
-#'
-#' @param target 
-#' @param dist_vec 
-#' @param mai_adm 
-#' @param grid_cases 
-#' @param afr_sf 
-#' @param dist_definition 
+#' unpack_pop_at_risk
+#' Define population at risk categories
+#' 
+#' @param df 
 #'
 #' @return
 #' @export
 #'
 #' @examples
-irr_dist <- function(target,
-                     dist_vec = seq(5e4, 50e4, by = 5e4),
-                     mai_adm,
-                     mai_adm_all,
-                     grid_cases,
-                     afr_sf,
-                     dist_definition = "within",
-                     by_region = TRUE) {
-  
-  if (by_region) {
-    u_units <- unique(mai_adm$AFRO_region)
-  } else {
-    u_units <- unique(mai_adm$country)
-  }
-  
-  res <- map_df(
-    u_units, 
-    function(this_unit) {
-      map_df(
-        unique(mai_adm$period), 
-        function(this_period) {
-          
-          cat("Unit", this_unit, "period", this_period, "\n")
-          
-          # Get all cells for which we have at least case
-          case_cells <- grid_cases %>% 
-            filter(mean > 1, period == this_period)
-          
-          if (by_region) {
-            in_unit <- st_intersects(case_cells, 
-                                     afr_sf %>% 
-                                       filter(AFRO_region== this_unit),
-                                     sparse = FALSE) %>% 
-              rowSums() %>% 
-              {.>0}
-          } else {
-            in_unit <- st_intersects(case_cells, 
-                                     afr_sf %>% 
-                                       filter(country == this_unit),
-                                     sparse = FALSE) %>% 
-              rowSums() %>% 
-              {.>0}
-          }
-          
-          
-          if (sum(in_unit) == 0) {
-            res <- tibble(observed = NA,
-                          expected = NA,
-                          ratio = NA,
-                          dist_thresh = NA)
-          } else {
-            
-            this_case_cells <- case_cells[in_unit, ]
-            
-            # Make buffer
-            centroids <- st_centroid(this_case_cells)
-            centroids <- st_transform(centroids, st_crs(target))
-            
-            # Define whether it is within distance or bands
-            if (dist_definition == "within") {
-              dist_vec2 <- dist_vec
-              len <- length(dist_vec2)
-              mean_dist <- dist_vec
-            } else {
-              dist_vec2 <- cbind(c(0, dist_vec[-length(dist_vec)]), dist_vec)
-              len <- nrow(dist_vec2)
-              mean_dist <- apply(dist_vec2, 1, mean)
-            }
-            
-            # Filter all grid cells within distance
-            res <- map_df(
-              seq_len(len), 
-              function(x) {
-                
-                
-                if (dist_definition == "within") {
-                  # Text to save
-                  dist_txt <- as.character(dist_vec[x]/1e3)
-                  
-                  within_dist <-  st_is_within_distance(centroids, target,
-                                                        dist = dist_vec[x],
-                                                        sparse = F) %>% 
-                    rowSums() %>% 
-                    {.>0} 
-                  
-                  target_cells_sf <- centroids[within_dist, ]
-                  
-                } else {
-                  
-                  # Text to save
-                  dist_txt <- str_c(formatC(dist_vec2[x, ]/1e3, format = "f", digits = 0, big.mark = ","), collapse = "-")
-                  
-                  within_dist <- st_is_within_distance(centroids, target,
-                                                       dist = dist_vec2[x, 2],
-                                                       sparse = F) %>% 
-                    rowSums() %>% 
-                    {.>0}
-                  
-                  beyond_dist <- st_is_within_distance(centroids, target,
-                                                       dist = dist_vec2[x, 1] + 1,
-                                                       sparse = F) %>% 
-                    rowSums() %>% 
-                    {. > 0} %>% 
-                    {!.}
-                  
-                  target_cells_sf <- centroids[within_dist & beyond_dist, ]
-                  
-                }
-                
-                if (sum(within_dist) > 0) {
-                  
-                  mean_rate <- get_mean_rate(mai_adm_all = mai_adm_all,
-                                             this_unit = this_unit,
-                                             this_period = this_period,
-                                             by_region = by_region)
-                  
-                  
-                  stats <- compute_irr(target_cells_sf = target_cells_sf, 
-                                       grid_cases = grid_cases,
-                                       grid_rates = grid_rates,
-                                       mean_rate = mean_rate)
-                  
-                } else {
-                  stats <- tibble(observed = NA,
-                                  expected = NA,
-                                  ratio = NA)
-                }
-                
-                
-                stats %>% 
-                  mutate(dist = dist_txt,
-                         dist_definition = dist_definition,
-                         mean_dist = mean_dist[x])
-              })
-          }
-          
-          res %>% 
-            mutate(unit = this_unit,
-                   period = this_period)
-        })
-    })
-  
-  if (by_region) {
-    res <- res %>% rename(AFRO_region = unit)
-  } else {
-    res <- res %>% rename(country = unit)
-  }
-  
-  res %>% 
-    rowwise() %>% 
-    mutate(lo = exp(log(ratio) - 1.96 * sqrt(1/observed + 1/expected)),
-           hi = exp(log(ratio) + 1.96 * sqrt(1/observed + 1/expected))) %>% 
-    ungroup() 
-}
-
-
 unpack_pop_at_risk <- function(df) {
+  
   risk_cat_map <- get_risk_cat_dict()
   names(risk_cat_map) <- janitor::make_clean_names(risk_cat_map) %>% 
     str_remove("x")
   
   
   df %>% 
-    mutate(risk_cat = str_extract(variable, str_c(rev(names(risk_cat_map)), collapse = "|")),
-           risk_cat = risk_cat_map[risk_cat] %>% factor(levels = risk_cat_map),
-           admin_level = str_c("ADM", str_extract(variable, "(?<=adm)[0-9]+"))
+    mutate(
+      risk_cat = str_extract(variable, str_c(rev(names(risk_cat_map)), collapse = "|")),
+      risk_cat = factor(risk_cat_map[risk_cat], levels = risk_cat_map),
+      admin_level = str_c("ADM", str_extract(variable, "(?<=adm)[0-9]+"))
     )
 }
 
+#' parse_AFRO_region
+#' Extract AFRO region from string
+#'
+#' @param df 
+#'
+#' @return
+#' @export
+#'
+#' @examples
 parse_AFRO_region <- function(df) {
   
   if (!("AFRO_region" %in% colnames(df))) {
@@ -427,6 +298,16 @@ parse_AFRO_region <- function(df) {
                                 levels = get_AFRO_region_levels()))
 }
 
+#' unpack_region_draws
+#' Make long format table
+#' 
+#' @param df 
+#' @param value_col 
+#'
+#' @return
+#' @export
+#'
+#' @examples
 unpack_region_draws <- function(df, 
                                 value_col = "country_rates") {
   df %>% 
@@ -498,6 +379,17 @@ compute_cumulative_stats <- function(data,
 }
 
 
+#' compute_cumul_differences
+#' Compute the cumulative difference between intervention strategies
+#'
+#' @param data 
+#' @param cumul_draws 
+#' @param target_ranking 
+#'
+#' @return
+#' @export
+#'
+#' @examples
 compute_cumul_differences <- function(data,
                                       cumul_draws,
                                       target_ranking) {
@@ -530,6 +422,17 @@ compute_cumul_differences <- function(data,
 }
 
 
+#' compute_cumul_frac
+#' Compute the cumulative fraction between intervention strategies
+#'
+#' @param data 
+#' @param cumul_draws 
+#' @param target_ranking 
+#'
+#' @return
+#' @export
+#'
+#' @examples
 compute_cumul_frac <- function(data,
                                cumul_draws,
                                target_ranking) {
@@ -565,6 +468,104 @@ compute_cumul_frac <- function(data,
   frac_stats
 }
 
+#' make_dotlineplot
+#' Plot for figure 2
+#' @param df 
+#'
+#' @return
+#' @export
+#'
+#' @examples
+make_dotlineplot <- function(df) {
+  df %>% 
+    pivot_longer(cols = c("p1", "p2"),
+                 names_to = "period")  %>% 
+    group_by(country) %>% 
+    mutate(p2_value = value[period == "p2"]) %>% 
+    ungroup() %>% 
+    mutate(country = factor(country) %>% 
+             forcats::fct_reorder(p2_value)) %>%
+    mutate(period = ifelse(period == "p1", "2011-2015", "2016-2020")) %>% 
+    ggplot(aes(y = country)) +
+    geom_point(aes(x = value, pch = period), size = 2)  +
+    geom_segment(data = df, 
+                 aes(x = p1, y = country, xend = p2, yend = country,
+                     color = direction#,linewidth = sigificant_irr
+                 ),
+                 arrow = arrow(length = unit(0.15, "cm"), 
+                               type="closed")#, 
+                 #lwd = .3
+    ) +
+    scale_x_continuous(limits = c(-1.1, 2.3),
+                       breaks = seq(-1, 2),
+                       labels = formatC(10^(seq(-1, 2)),
+                                        digits = 1,
+                                        format = "fg", 
+                                        big.mark = ",") %>% 
+                         str_replace("0.1", "\u2264 0.1")) +
+    scale_color_manual(values = c("red", "gray", "blue"), drop = FALSE) +
+    scale_shape_manual(values = c(1, 16)) +
+    theme_bw() +
+    theme(strip.placement = "out") +
+    labs(y = NULL, 
+         x = "Cholera incidence rate \n[reported cases per 100,000/year]",
+         color = "Change direction",
+         shape = "Time period")
+}
+
+# Solution for strip colors in https://stackoverflow.com/questions/19440069/ggplot2-facet-wrap-strip-color-based-on-variable-in-data-set
+strip <- ggh4x::strip_themed(
+  background_y = ggh4x::elem_list_rect(fill = colors_afro_regions()[c(2, 3, 4, 1)]),
+  text_y = ggh4x::elem_list_text(color = c("white", "white", "white", "white"))
+)
+
+#' make_irr_plot
+#'
+#' @param df 
+#'
+#' @return
+#' @export
+#'
+#' @examples
+make_irr_plot <- function(df) {
+  hi <- 10
+  lo <- .1
+  w <- case_when(nrow(df) > 5 ~ .6,
+                 nrow(df) == 1 ~ .1,
+                 TRUE ~ .2)
+  
+  df %>% 
+    mutate(irr_mean = case_when(irr_mean < lo ~ lo,
+                                irr_mean > hi ~ hi,
+                                TRUE ~ irr_mean),
+           irr_low = case_when(irr_low < lo ~ lo,
+                               irr_low > hi ~ NA,
+                               TRUE ~ irr_low),
+           irr_high = case_when(irr_high > hi ~ hi,
+                                irr_high < lo ~ NA,
+                                TRUE ~ irr_high)) %>% 
+    ggplot(aes(y = country, color = direction))  +
+    geom_vline(aes(xintercept = 1), color = "black", lwd = .5, lty = 2) +
+    geom_point(aes(x = irr_mean)) +
+    geom_errorbar(aes(xmin = irr_low, xmax = irr_high), width = w) +
+    scale_color_manual(values = c("red", "gray", "blue"), drop = FALSE) +
+    theme_bw() +
+    theme(strip.placement = "out") +
+    labs(y = NULL, 
+         x = "Incidence rate\nratio",
+         alpha = "Statististically-significant\nchange",
+         color = "Change direction",
+         shape = "Time period") +
+    scale_x_log10(breaks = c(.1, .3, 1, 3, 10),
+                  labels = c("\u2264 0.1", ".3", "1", "3", " \u2265 10"),
+                  limits = c(.1, 10)) +
+    theme(axis.text.y = element_blank(),
+          axis.title.y = element_blank(),
+          axis.ticks.y = element_blank(),
+          strip.background = element_blank(),
+          strip.text = element_blank())
+}
+
 
 # Second post-processing step ---------------------------------------------
 
@@ -1355,12 +1356,14 @@ p_fig1A <- output_plot_map(sf_obj = grid_cases %>%
         panel.background = element_rect(fill = "white", color = "white")) +
   guides(fill = guide_colorbar("Mean annual incidence \n[cases/year]"))
 
-# Save
-ggsave(p_fig1A,
-       file = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_1A.png"),
-       width = 10,
-       height = 6, 
-       dpi = 300)
+
+if (opt$save_panel_figures) {
+  ggsave(p_fig1A,
+         file = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_1A.png"),
+         width = 10,
+         height = 6, 
+         dpi = 300)
+}
 
 ## Figure 1B: cases by region and time period --------------------------------
 # Horizontal barplot of cases by region
@@ -1384,12 +1387,14 @@ p_fig1B <- cases_by_region %>%
   labs(x = "Mean annual cholera incidence [cases/year]", 
        y = "Time period")
 
-# Save figure
-ggsave(plot = p_fig1B,
-       filename = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_1B.png"),
-       width = 8,
-       height = 2.5,
-       dpi = 300)
+
+if (opt$save_panel_figures) {
+  ggsave(plot = p_fig1B,
+         filename = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_1B.png"),
+         width = 8,
+         height = 2.5,
+         dpi = 300)
+}
 
 # Map of WHO regions
 p_fig_BC_regions <- ggplot(afr_sf, aes(fill = AFRO_region)) +
@@ -1427,6 +1432,8 @@ ggsave(plot = p_fig1_v2,
 
 # Figure 2: changes between periods ------------------------------------------
 
+## Figure 2A: Line plots and IRRs  ---------
+
 ## load mai region change stats 
 mai_region_change_stats<-readRDS(str_glue("{opt$output_dir}/mai_region_ratio_stats.rds"))
 
@@ -1465,63 +1472,13 @@ dat_for_incid_dotplot <-  combined_mai_changes %>%
                                TRUE ~ direction) %>% 
            factor(levels = c("increase", "none", "decrease")))
 
-
-
-make_dotlineplot <- function(df) {
-  df %>% 
-    pivot_longer(cols = c("p1", "p2"),
-                 names_to = "period")  %>% 
-    group_by(country) %>% 
-    mutate(p2_value = value[period == "p2"]) %>% 
-    ungroup() %>% 
-    mutate(country = factor(country) %>% 
-             forcats::fct_reorder(p2_value)) %>%
-    mutate(period = ifelse(period == "p1", "2011-2015", "2016-2020")) %>% 
-    ggplot(aes(y = country)) +
-    geom_point(aes(x = value, pch = period), size = 2)  +
-    geom_segment(data = df, 
-                 aes(x = p1, y = country, xend = p2, yend = country,
-                     color = direction#,linewidth = sigificant_irr
-                 ),
-                 arrow = arrow(length = unit(0.15, "cm"), 
-                               type="closed")#, 
-                 #lwd = .3
-    ) +
-    # scale_linetype_manual(values = c(4, 1))  +
-    #scale_alpha_manual(values = c(1, .3)) +
-    #scale_linewidth_manual(values = c(1,0.3)) +
-    scale_x_continuous(limits = c(-1.1, 2.3),
-                       breaks = seq(-1, 2),
-                       labels = formatC(10^(seq(-1, 2)),
-                                        digits = 1,
-                                        format = "fg", 
-                                        big.mark = ",") %>% 
-                         str_replace("0.1", "\u2264 0.1")) +
-    scale_color_manual(values = c("red", "gray", "blue"), drop = FALSE) +
-    scale_shape_manual(values = c(1, 16)) +
-    # ggh4x::facet_nested(admin_level + AFRO_region ~ ., scale = "free", 
-    # space = "free", switch = "y") +
-    theme_bw() +
-    theme(strip.placement = "out") +
-    labs(y = NULL, 
-         x = "Cholera incidence rate \n[reported cases per 100,000/year]",
-         color = "Change direction",
-         shape = "Time period") #+
-  # theme(panel.grid.major.y = element_blank()) 
-}
-
-# Solution for strip colors in https://stackoverflow.com/questions/19440069/ggplot2-facet-wrap-strip-color-based-on-variable-in-data-set
-strip <- ggh4x::strip_themed(
-  background_y = ggh4x::elem_list_rect(fill = colors_afro_regions()[c(2, 3, 4, 1)]),
-  text_y = ggh4x::elem_list_text(color = c("white", "white", "white", "white"))
-)
-
+# Right column of panel 2A
 p_fig2A_arrows <- plot_grid(
   # SSA
   make_dotlineplot(dat_for_incid_dotplot %>% 
                      filter(country == "SSA") %>% 
-  mutate(country = case_when(country == "SSA"~"Africa",
-                             .default = country))) +
+                     mutate(country = case_when(country == "SSA"~"Africa",
+                                                .default = country))) +
     theme(axis.title.x = element_blank(),
           axis.text.x = element_blank(),
           axis.ticks.x = element_blank()) +
@@ -1545,65 +1502,14 @@ p_fig2A_arrows <- plot_grid(
     ggh4x::facet_grid2(AFRO_region ~ ., switch = "y", scales = "free_y", space = "free_y",
                        strip = strip) +
     guides(shape = "none", color = "none"),
-  # facet_grid(AFRO_region ~ ., switch = "y", scales = "free_y", space = "free_y"),
   ncol = 1,
   rel_heights = c(.15, .25, 1),
   align = "v",
   axis = "lr"
 )
 
-p_fig2A_arrows
-
-fig2A_legend <- cowplot::get_legend(
-  make_dotlineplot(dat_for_incid_dotplot) +
-    theme(legend.box="horizontal",
-          legend.direction = "horizontal") +
-    guides(color = guide_legend("Change direction", title.position="top", title.hjust = 0.5),
-           shape = guide_legend("Time period", title.position="top", title.hjust = 0.5))
-  )
-
-make_irr_plot <- function(df) {
-  hi <- 10
-  lo <- .1
-  w <- case_when(nrow(df) > 5 ~ .6,
-                 nrow(df) == 1 ~ .1,
-                 TRUE ~ .2)
-  
-  df %>% 
-    mutate(irr_mean = case_when(irr_mean < lo ~ lo,
-                                irr_mean > hi ~ hi,
-                                TRUE ~ irr_mean),
-           irr_low = case_when(irr_low < lo ~ lo,
-                               irr_low > hi ~ NA,
-                               TRUE ~ irr_low),
-           irr_high = case_when(irr_high > hi ~ hi,
-                                irr_high < lo ~ NA,
-                                TRUE ~ irr_high)) %>% 
-    ggplot(aes(y = country, color = direction))  +
-    geom_vline(aes(xintercept = 1), color = "black", lwd = .5, lty = 2) +
-    geom_point(aes(x = irr_mean)) +
-    geom_errorbar(aes(xmin = irr_low, xmax = irr_high), width = w) +
-    scale_color_manual(values = c("red", "gray", "blue"), drop = FALSE) +
-    # ggh4x::facet_nested(admin_level + AFRO_region ~ ., scale = "free", 
-    # space = "free", switch = "y") +
-    theme_bw() +
-    theme(strip.placement = "out") +
-    labs(y = NULL, 
-         x = "Incidence rate\nratio",
-         alpha = "Statististically-significant\nchange",
-         color = "Change direction",
-         shape = "Time period") +
-    # theme(panel.grid.major.y = element_blank()) +
-    scale_x_log10(breaks = c(.1, .3, 1, 3, 10),
-                  labels = c("\u2264 0.1", ".3", "1", "3", " \u2265 10"),
-                  limits = c(.1, 10)) +
-    theme(axis.text.y = element_blank(),
-          axis.title.y = element_blank(),
-          axis.ticks.y = element_blank(),
-          strip.background = element_blank(),
-          strip.text = element_blank())
-}
 
+# Left column of panel 2A
 p_fig2A_irr <- plot_grid(
   # SSA
   make_irr_plot(dat_for_incid_dotplot %>% 
@@ -1636,9 +1542,17 @@ p_fig2A_irr <- plot_grid(
   axis = "lr"
 )
 
-p_fig2A_irr
+# Legend of figure 2A
+fig2A_legend <- cowplot::get_legend(
+  make_dotlineplot(dat_for_incid_dotplot) +
+    theme(legend.box="horizontal",
+          legend.direction = "horizontal") +
+    guides(color = guide_legend("Change direction", title.position="top", title.hjust = 0.5),
+           shape = guide_legend("Time period", title.position="top", title.hjust = 0.5))
+)
 
 
+# Assemble figure 2A
 p_fig2A <- cowplot::plot_grid(
   cowplot::plot_grid(
     p_fig2A_arrows,
@@ -1653,61 +1567,13 @@ p_fig2A <- cowplot::plot_grid(
   rel_heights = c(1, .1)) +
   theme(plot.background = element_rect(fill = "white", color = "white"))
 
-# Save
-ggsave(p_fig2A,
-       file = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_2A.png"),
-       width = 10,
-       height = 10, 
-       dpi = 300)
-
-## Figure 2A: national-level scatterplot  ---------
-# p_fig2A <- combined_mai_changes %>% 
-#   ggplot(aes(x = log10(`2011-2015`*1e5), 
-#              y =  log10(`2016-2020`*1e5), 
-#              col = region)) +
-#   geom_abline(lty = 2, lwd = .2) +
-#   geom_point(aes(pch = admin_level,
-#                  # alpha = admin_level
-#                  size = admin_level)) +
-#   ggrepel::geom_label_repel(
-#     aes(label = country, size = admin_level, alpha = admin_level),
-#     min.segment.length = 0,
-#     nudge_x = 0,
-#     # nudge_y = .1,
-#     max.overlaps = Inf, 
-#     xlim = c(-1, 2.2),
-#     ylim = c(-1, 2.2)) +
-#   scale_size_manual(values = c(6, 4, 2.5)) +
-#   scale_shape_manual(values = c(15, 17, 16)) +
-#   # scale_alpha_manual(values = c(1, 1, .75)) +
-#   scale_color_manual(values = c("black", colors_afro_regions())) +
-#   theme_bw() +
-#   guides(color = guide_legend("WHO regions")) +
-#   scale_x_continuous(limits = c(-1.1, 2.3),
-#                      breaks = seq(-1, 2),
-#                      labels = formatC(10^(seq(-1, 2)),
-#                                       digits = 1,
-#                                       format = "fg", 
-#                                       big.mark = ",")) +
-#   scale_y_continuous(limits = c(-1.1, 2.3),
-#                      breaks = seq(-1, 2),
-#                      labels = formatC(10^(seq(-1, 2)),
-#                                       digits = 1,
-#                                       format = "fg", 
-#                                       big.mark = ",")) +
-#   labs(x = "Incidence rate 2011-2015\n[cases per 100,000/year]",
-#        y = "Incidence rate 2016-2020\n[cases per 100,000/year]") +
-#   guides(color = "none", size = "none", shape = "none", alpha = "none") +
-#   scale_alpha_manual(values = c(1, 1, 0)) #+
-# # scale_size_manual(values = c(3, 2, .7))
-# 
-# 
-# # Save
-# ggsave(p_fig2A,
-#        file = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_2A.png"),
-#        width = 8,
-#        height = 7, 
-#        dpi = 150)
+if (opt$save_panel_figures) {
+  ggsave(p_fig2A,
+         file = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_2A.png"),
+         width = 10,
+         height = 10, 
+         dpi = 300)
+}
 
 
 ## Figure 2B: rate ratio maps ---------
@@ -1719,8 +1585,7 @@ mai_adm2_change_stats <- mai_change_stats %>%
     q2.5 > 1 ~ "increase",
     q97.5 < 1 ~ "decrease",
     T ~ "no change"
-  ))#,
-# change_direction = factor(change_direction, levels = c("increase", "no change", "decrease")))
+  ))
 
 # Get the model runs by country
 # Copy data from https://livejohnshopkins.sharepoint.com/:x:/r/sites/CholeraMappingGrant/Shared%20Documents/Mapping%20Pipeline/Documentation/final_models_by_country.csv?d=w11e3a81fd229423681237bc63bc530ec&csf=1&web=1&e=7DV68a
@@ -1769,34 +1634,16 @@ p_fig2B <- mai_change_adm %>%
         strip.text = element_text(size = 15),
         legend.position = c(.2, .3))
 
-# Save
-ggsave(p_fig2B,
-       file = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_2B.png"),
-       width = 7,
-       height = 6, 
-       dpi = 150)
-
+if (opt$save_panel_figures) {
+  ggsave(p_fig2B,
+         file = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_2B.png"),
+         width = 7,
+         height = 6, 
+         dpi = 150)
+}
 
 ## Assemble Figure 2 ------
 
-# p_fig2 <- plot_grid(
-#   p_fig2A +
-#     theme(plot.margin = unit(c(2.5, 1.5, 2.5, 1.5), "lines")),
-#   p_fig2B +
-#     theme(plot.margin = unit(c(1, 1, 1, 1), "lines")),
-#   nrow = 1,
-#   labels = "auto"#,
-# ) +
-#   theme(panel.background = element_rect(fill = "white", color = "white"))
-# 
-# # Save
-# ggsave(plot = p_fig2,
-#        filename = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_2.png"),
-#        width = 12,
-#        height = 6,
-#        dpi = 300)
-
-
 p_fig2 <- plot_grid(
   p_fig2A +
     theme(plot.margin = unit(c(1, 1, 1, 1), "lines")),
@@ -1815,6 +1662,7 @@ ggsave(plot = p_fig2,
        height = 9,
        dpi = 600)
 
+
 # Figure 3: population at risk --------------------------------------------
 
 ## Fig. 3A: People per risk category --------
@@ -1829,7 +1677,6 @@ risk_pop_all <- pop_at_risk_all %>%
   mutate(risk_cat = ifelse(risk_cat == ">100","\u2265100",risk_cat)) %>% 
   mutate(risk_cat = factor(risk_cat,levels = c("<1","1-10","10-20","20-50","50-100","\u2265100" )))
 
-
 # Values by AFRO region
 risk_pop_regions <- pop_at_risk_regions %>% 
   filter(period == "2016-2020", 
@@ -1858,12 +1705,15 @@ p_fig3A <- risk_pop_regions %>%
   annotate("segment", x = 125000000, xend = 135000000, y = 3.5, yend = 3.5, colour = "black") +
   annotate("text", x = 155000000, y = 3.5, label = '"High\nIncidence"')
 
-# Save
-ggsave(plot = p_fig3A,
-       filename = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_3A.png"),
-       width = 12,
-       height = 7,
-       dpi = 300)
+
+if (opt$save_panel_figures) {
+  ggsave(plot = p_fig3A,
+         filename = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_3A.png"),
+         width = 12,
+         height = 7,
+         dpi = 300)
+}
+
 
 ## Fig. 3B: ADM2 level risk category map --------
 
@@ -1886,12 +1736,14 @@ p_fig3B <- risk_pop_50_adm2 %>%
   guides(fill = guide_legend("Incidence category\nper 100,000 pop"))
 
 
-# Save
-ggsave(p_fig3B,
-       file = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_3B.png"),
-       width = 7,
-       height = 6, 
-       dpi = 300)
+if (opt$save_panel_figures) {
+  ggsave(p_fig3B,
+         file = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_3B.png"),
+         width = 7,
+         height = 6, 
+         dpi = 300)
+}
+
 
 ## Assemble Figure 3 ----
 
@@ -1902,12 +1754,8 @@ p_fig3 <- plot_grid(
   p_fig3B +
     theme(strip.background = element_blank(),
           plot.margin = unit(c(1, 1, 1, 1), "lines")),
-  # theme(plot.margin = unit(c(-5, -5, -5, -3), units = "lines")),
   nrow = 1,
-  labels = "auto"#,
-  # rel_widths = c(1, 1),
-  # align = "v",
-  # axis = "lr"
+  labels = "auto"
 ) +
   theme(panel.background = element_rect(fill = "white", color = "white"))
 
@@ -1921,7 +1769,6 @@ ggsave(plot = p_fig3,
 
 # Figure 4 ----------------------------------------------------------------
 
-## Fig. 4C: Map of endemicity categories (50% cutoff) ----
 endemicity_df_50_v2 <- risk_pop_50_adm2 %>% 
   mutate(high_risk = risk_cat %in% get_risk_cat_dict()[3:6],
          low_risk = risk_cat %in% get_risk_cat_dict()[1]) %>% 
@@ -1931,21 +1778,17 @@ endemicity_df_50_v2 <- risk_pop_50_adm2 %>%
       sum(high_risk) == 2 ~ "high-both",
       sum(low_risk) == 2 ~ "low-both",
       sum(high_risk) == 1 ~ "high-either",
-      T ~ "mix"
-    ),
+      T ~ "mix"),
     pop = max(pop)
   ) %>% 
-  mutate(endemicity = factor(endemicity, 
-                             levels = c("high-both", "high-either",
-                                        "mix", "low-both"),
-                             labels = c("sustained high", 
-                                        "history of high",
-                                        "history of moderate",
-                                        "sustained low")))  
+  mutate(
+    endemicity = factor(endemicity, 
+                        levels = c("high-both", "high-either", "mix", "low-both"),
+                        labels = c("sustained high", "history of high", "history of moderate", "sustained low"))
+  )  
 
 saveRDS(endemicity_df_50_v2, file = str_glue("{opt$output_dir}/endemicity_50.rds"))
 
-# 95% cutoff ----
 endemicity_df_95_v2 <- risk_pop_95_adm2 %>% 
   mutate(high_risk = risk_cat %in% get_risk_cat_dict()[3:6],
          low_risk = risk_cat %in% get_risk_cat_dict()[1]) %>% 
@@ -1955,133 +1798,20 @@ endemicity_df_95_v2 <- risk_pop_95_adm2 %>%
       sum(high_risk) == 2 ~ "high-both",
       sum(low_risk) == 2 ~ "low-both",
       sum(high_risk) == 1 ~ "high-either",
-      T ~ "mix"
-    ),
+      T ~ "mix"),
     pop = max(pop)
   ) %>% 
-  mutate(endemicity = factor(endemicity, 
-                             levels = c("high-both", "high-either",
-                                        "mix", "low-both"),
-                             labels = c("sustained high", 
-                                        "history of high",
-                                        "history of moderate",
-                                        "sustained low")))  
-
-saveRDS(endemicity_df_95_v2, file = str_glue("{opt$output_dir}/endemicity_95.rds"))
-
-# Figure 4C: Map
-p_fig4C <- endemicity_df_50_v2 %>% 
-  inner_join(u_space_sf, .) %>% 
-  output_plot_map(sf_obj = .,
-                  lakes_sf = lakes_sf,
-                  rivers_sf = rivers_sf,
-                  all_countries_sf = afr_sf,
-                  fill_var = "endemicity",
-                  fill_color_scale_type = "endemicity",
-                  border_width = .03) +
-  theme(legend.position = c(.2, .3),
-        panel.background = element_rect(fill = "white", color = "white")) +
-  guides(fill = guide_legend("10-year incidence\ncategory"))
-
-# Save
-ggsave(p_fig4C,
-       file = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_4C_risk_cat.png"),
-       width = 12,
-       height = 6, 
-       dpi = 150)
-
-## Fig. 4A: Endemicity legend ----
-
-# Tile for legend
-hrisk_cat <- taxdat::get_risk_cat_dict()[-c(1:2)]
-tile_dat <- expand.grid(x = taxdat::get_risk_cat_dict(), 
-                        y = taxdat::get_risk_cat_dict()) %>% 
-  as_tibble() %>% 
-  mutate(endemicity = case_when(x == "<1" & y == "<1" ~ "sustained low",
-                                x %in% hrisk_cat & y %in% hrisk_cat ~ "sustained high",
-                                x %in% hrisk_cat | y %in% hrisk_cat~ "history of high",
-                                T ~ "history of moderate"),
-         endemicity = factor(endemicity, levels = c("sustained high", 
-                                                    "history of high",
-                                                    "history of moderate",
-                                                    "sustained low")))
-
-# Endemicity legend
-p_fig4A <- tile_dat %>% 
-  ggplot(aes(x = x, y = y, fill = endemicity)) +
-  geom_tile(color = "white") +
-  scale_fill_manual(values = taxdat:::colors_endemicity()) +
-  theme_bw() +
-  theme(panel.border = element_blank(),
-        axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
-        # axis.text = element_text(size = 4.5),
-        # axis.title = element_text(size = 5.5),
-        # axis.ticks = element_blank(),
-        # legend.text = element_text(size = 5),
-        # legend.title = element_text(size = 7),
-        legend.key.size = unit(1, units = "lines"),
-        legend.box.spacing = unit(.2, units = "lines")
-  ) +
-  labs(x = "Incidence category\nin 2011-2015", y = "Incidence category\nin 2016-2020",
-       fill = "10-year incidence\ncategory") + 
-  # annotate("segment", y = 5, yend = 6.8, x = 1.5, xend = 1.5, colour = "black", arrow = arrow(angle = 45, length = unit(.2,"cm"))) +
-  # annotate("segment", y = 5, yend = 6.8, x = 4.5, xend = 4.5, colour = "black", arrow = arrow(angle = 45, length = unit(.2,"cm"))) +
-  # annotate("segment", y = 1.5, yend = 1.5, x = 4.5, xend = 6.8, colour = "black", arrow = arrow(angle = 45, length = unit(.2,"cm"))) +
-  # annotate("text", x = Inf, y = -Inf, vjust = -3.5, hjust = 0.1, label = "133.9 M", size = 3) +
-  # coord_cartesian(xlim = c(0.5,7), clip = 'off', expand = FALSE) +
-  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
-  coord_equal()
-
-p_fig4C_legend <- ggdraw(
-  p_fig4C +
-    theme(strip.background = element_blank(),
-          plot.margin = unit(c(1, 0, 1, 0), units = "lines")) +
-    guides(fill = "none")) +
-  # annotate("text", x = -9.5, y = -4, label = "178.7 M", size = 3) +
-  # annotate("text", x = -2.7, y = -4, label = "104.6 M", size = 3)) +
-  draw_plot(p_fig4A +
-              theme(legend.position = "top") + 
-              guides(fill = guide_legend(ncol = 1, direction = "vertical")), 
-            .075, .14, .35, .45)
-
-## Fig. 4B: Fraction by categories for supplement ----
-p_fig4_supp <- endemicity_df_50_v2  %>%
-  group_by(country) %>% 
-  complete(endemicity = unique(endemicity_df_50_v2$endemicity)) %>% 
-  get_AFRO_region(ctry_col = "country") %>% 
-  group_by(AFRO_region, country, endemicity) %>%
-  summarise(n = sum(!is.na(location_period_id)),
-            pop = sum(pop[!is.na(location_period_id)])) %>% 
-  group_by(AFRO_region, country) %>%
-  mutate(frac = pop/sum(pop)) %>% 
-  group_by(country) %>% 
   mutate(
-    frac_other = frac[endemicity == "history of moderate"],
-    frac_high = sum(frac[endemicity %in% c("sustained high", "history of high")]),
-    frac_low = sum(frac[endemicity %in% c("sustained low")])
-  ) %>% 
-  ungroup() %>% 
-  mutate(endemicity = forcats::fct_rev(endemicity),
-         country = factor(country, levels = unique(country[order(frac_high, frac_other)])),
-  ) %>% 
-  ggplot(aes(y = country, x = frac, fill = endemicity)) +
-  geom_bar(stat = "identity") +
-  facet_grid(AFRO_region ~., scales = "free_y", space = "free_y") +
-  scale_fill_manual(values = taxdat:::colors_endemicity(),
-                    breaks = c("sustained high","history of high","history of moderate","sustained low")) +
-  theme_bw() +
-  labs(x = "fraction of population\n per 10-year incidence category") +
-  guides(fill=guide_legend(title="10-year incidence category"))
+    endemicity = factor(endemicity, 
+                        levels = c("high-both", "high-either", "mix", "low-both"),
+                        labels = c("sustained high", "history of high", "history of moderate", "sustained low")))  
 
-ggsave(p_fig4_supp,
-       file = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_4_supp.png"),
-       width = 6,
-       height = 7, 
-       dpi = 150)
+saveRDS(endemicity_df_95_v2, file = str_glue("{opt$output_dir}/endemicity_95.rds"))
 
 
-## Fig. 4B: Alluvial plot ----
+## Figure 4A: Alluvial plot ----
 
+# Process data for alluvial plot
 for_alluvial <- risk_pop_50_adm2 %>% 
   as_tibble() %>% 
   mutate(high_risk = risk_cat %in% get_risk_cat_dict()[3:6],
@@ -2105,7 +1835,8 @@ for_alluvial <- risk_pop_50_adm2 %>%
          endemicity = fct_relevel(endemicity, rev(levels(endemicity))))
 
 
-p_fig4B <- for_alluvial %>% 
+# Make alluvial plot
+p_fig4A <- for_alluvial %>% 
   mutate(pop_label = str_c(round(pop/1e6), "M"),
          pop_label = case_when(period != "2016-2020" ~ NA_character_,
                                TRUE ~ pop_label)) %>% 
@@ -2131,60 +1862,96 @@ p_fig4B <- for_alluvial %>%
   guides(fill = "none")
 
 
+## Figure 4B: Map of 10-year categories ----
 
-# p_fig4 <- plot_grid(
-#   plot_grid(
-#     p_fig4A + 
-#       theme(plot.margin = unit(c(1, 0, 0, 2), units = "lines")),
-#     # guides(fill = "none") +
-#     # theme(panel.background = element_rect(fill = "white", color = "white"),
-#     # plot.margin = unit(c(2, 1.5, 1.5, 1.5), units = "lines")),
-#     p_fig4B + 
-#       theme(plot.margin = unit(c(1, 2.5, 1, 1), units = "lines")),
-#     ncol = 1,
-#     labels = c("a", "b"),
-#     rel_heights = c(.6, 1)#,
-#     # align = "v",
-#     # axis = "lr"
-#   ),
-#   p_fig4C +
-#     guides(fill = "none"),
-#   nrow = 1,
-#   labels = c(NA, "c"),
-#   rel_widths = c(1, 1.5)
-# ) +
-#   theme(panel.background = element_rect(fill = "white", color = "white"))
-# 
-# 
-# # Save
-# ggsave(plot = p_fig4,
-#        filename = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_4.png"),
-#        width = 15,
-#        height = 8,
-#        dpi = 300)
-
-
-p_fig4_v2 <- plot_grid(
-  p_fig4B + 
-    theme(plot.margin = unit(c(1, 2.5, 1, 1), units = "lines")),
-  p_fig4C_legend +
-    guides(fill = "none"),
-  nrow = 1,
-  labels = c("a", "b"),
-  rel_widths = c(1, 1.5)
-) +
-  theme(panel.background = element_rect(fill = "white", color = "white"))
-
-
-# Save
-ggsave(plot = p_fig4_v2,
-       filename = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_4_v2.png"),
-       width = 15,
-       height = 8,
-       dpi = 300)
+# Tile for legend
+hrisk_cat <- taxdat::get_risk_cat_dict()[-c(1:2)]
+tile_dat <- expand.grid(x = taxdat::get_risk_cat_dict(), 
+                        y = taxdat::get_risk_cat_dict()) %>% 
+  as_tibble() %>% 
+  mutate(endemicity = case_when(x == "<1" & y == "<1" ~ "sustained low",
+                                x %in% hrisk_cat & y %in% hrisk_cat ~ "sustained high",
+                                x %in% hrisk_cat | y %in% hrisk_cat~ "history of high",
+                                T ~ "history of moderate"),
+         endemicity = factor(endemicity, levels = c("sustained high", 
+                                                    "history of high",
+                                                    "history of moderate",
+                                                    "sustained low")))
 
+# Endemicity legend
+p_10yr_legend <- tile_dat %>% 
+  ggplot(aes(x = x, y = y, fill = endemicity)) +
+  geom_tile(color = "white") +
+  scale_fill_manual(values = taxdat:::colors_endemicity()) +
+  theme_bw() +
+  theme(panel.border = element_blank(),
+        axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
+        legend.key.size = unit(1, units = "lines"),
+        legend.box.spacing = unit(.2, units = "lines")
+  ) +
+  labs(x = "Incidence category\nin 2011-2015", y = "Incidence category\nin 2016-2020",
+       fill = "10-year incidence\ncategory") + 
+  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
+  coord_equal()
 
-# Figure 5: cholera occurrence -----------------------------------------------------
+# 10-yr risk cat map
+p_fig4B <- endemicity_df_50_v2 %>% 
+  inner_join(u_space_sf, .) %>% 
+  output_plot_map(sf_obj = .,
+                  lakes_sf = lakes_sf,
+                  rivers_sf = rivers_sf,
+                  all_countries_sf = afr_sf,
+                  fill_var = "endemicity",
+                  fill_color_scale_type = "endemicity",
+                  border_width = .03) +
+  theme(legend.position = c(.2, .3),
+        panel.background = element_rect(fill = "white", color = "white")) +
+  guides(fill = guide_legend("10-year incidence\ncategory"))
+
+# Add legend to map
+p_fig4B_legend <- ggdraw(
+  p_fig4B +
+    theme(strip.background = element_blank(),
+          plot.margin = unit(c(1, 0, 1, 0), units = "lines")) +
+    guides(fill = "none")) +
+  draw_plot(
+    p_10yr_legend +
+      theme(legend.position = "top") + 
+      guides(fill = guide_legend(ncol = 1, direction = "vertical")), 
+    .075, .14, .35, .45)
+
+if (opt$save_panel_figures) {
+  ggsave(p_fig4B,
+         file = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_4B_risk_cat.png"),
+         width = 12,
+         height = 6, 
+         dpi = 150)
+}
+
+
+## Assemble Figure 4 ----
+
+p_fig4 <- plot_grid(
+  p_fig4A + 
+    theme(plot.margin = unit(c(1, 2.5, 1, 1), units = "lines")),
+  p_fig4B_legend +
+    guides(fill = "none"),
+  nrow = 1,
+  labels = c("a", "b"),
+  rel_widths = c(1, 1.5)
+) +
+  theme(panel.background = element_rect(fill = "white", color = "white"))
+
+
+# Save
+ggsave(plot = p_fig4,
+       filename = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_4.png"),
+       width = 15,
+       height = 8,
+       dpi = 300)
+
+
+# Figure 5: cholera occurrence -----------------------------------------------------
 
 # Load outbreak data and results (50%)
 load(str_c(opt$output_dir, "/outbreak_analysis_data.rdata"))
@@ -2194,8 +1961,9 @@ load(str_c(opt$output_dir, "/recent_cholera_outbreaks_res.rdata"))
 final_joins <- final_joins %>% 
   mutate(admin_level = str_c("ADM", admin_level))
 
-# Map of cholera occurrence locations
-p_ob_map2 <- endemicity_df_50_v2 %>% 
+
+## Figure 5A: Map of cholera occurrence locations ----
+p_ob_map <- endemicity_df_50_v2 %>% 
   inner_join(u_space_sf, .) %>% 
   select(-admin_level) %>% 
   ggplot() +
@@ -2227,13 +1995,15 @@ p_ob_map2 <- endemicity_df_50_v2 %>%
   guides(fill = guide_legend("10-year incidence\ncategory", override.aes = list(alpha = 1))) +
   scale_shape_manual(values = c(1, 3, 4))
 
-ggsave(plot = p_ob_map2,
-       filename = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_5_a_map.png"),
-       width = 12,
-       height = 5,
-       dpi = 100)
+if (opt$save_panel_figures) {
+  ggsave(plot = p_ob_map2,
+         filename = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_5_a_map.png"),
+         width = 12,
+         height = 5,
+         dpi = 100)
+}
 
-# Distribution of 10-year risk categories among ADM2 locations
+## Figure 5A: Distribution of 10-year risk categories among ADM2 locations ----
 ob_count_dat <-  obs_outbreaks %>% 
   mutate(endemicity = stringr::str_remove_all(endemicity, " risk")) %>% 
   mutate(occurrence = "       cholera\n       observed") %>% 
@@ -2251,19 +2021,6 @@ ob_count_dat <-  obs_outbreaks %>%
                                                str_replace(" ", "\n"))),
                               labels = rev(c("overall", get_AFRO_region_levels() %>% 
                                                str_replace(" ", "\n")))))
-p_frac_regions <- ob_count_dat %>% 
-  filter(AFRO_region != "overall")  %>%
-  mutate(occurrence = str_remove_all(occurrence, "       ")) %>% 
-  ggplot(aes(x = frac, y = AFRO_region, fill = endemicity)) +
-  geom_bar(stat = "identity") +
-  theme_bw() +
-  scale_fill_manual(values = taxdat:::colors_endemicity()) +
-  labs(x = "proportion of locations (ADM2 or lower)", y = "") +
-  guides(fill = "none") +
-  facet_grid(factor(occurrence, levels = c("cholera\nobserved","no cholera\nobserved")) ~ ., switch = "y") +
-  theme(strip.placement = "outer")
-
-
 p_frac_overall <- ob_count_dat %>% 
   mutate(occurrence = str_remove_all(occurrence, "  ")) %>% 
   filter(AFRO_region == "overall")  %>% 
@@ -2275,25 +2032,8 @@ p_frac_overall <- ob_count_dat %>%
   guides(fill = "none") +
   coord_flip()
 
-p_ob_frac_comb <- cowplot::plot_grid(
-  p_frac_overall +
-    theme(plot.margin = unit(c(1, .3, 0, 1), units = "lines")) +
-    theme(axis.text = element_text(size = 8, hjust = .5),
-          axis.title = element_text(size = 10)),
-  p_frac_regions +
-    theme(plot.margin = unit(c(1, 1, 0, 1), units = "lines"),
-          strip.switch.pad.grid = unit(.7, units = "line")) +
-    theme(axis.text = element_text(size = 8),
-          axis.title = element_text(size = 10),
-          strip.text = element_text(size = 8)), 
-  nrow = 1,
-  labels = c("b", "c"),
-  rel_widths = c(.6, 1),
-  align = "h",
-  axis = "tb"
-)
 
-# Model estimates
+## Figure 5B: Cholera occurrence model estimates ----
 pd1 <- position_dodge(.4)
 pd2 <- position_dodge(.3)
 
@@ -2365,34 +2105,26 @@ p_or <- plot_grid(
   theme(plot.margin = unit(c(1, 3, 1, 3), units = "lines"))
 
 
-p_or
+## Assemble Figure 5 ----
 
 p_fig5 <- plot_grid(
   plot_grid(
-    p_ob_map2,
+    p_ob_map,
     p_frac_overall  +
-      theme(plot.margin = unit(c(1, 2, 1, 1), units = "lines")#,
-            # axis.text.x = element_blank(),
-            # axis.title.x = element_blank(),
-            # axis.ticks.x = element_blank()
-      ),
-    # p_frac_regions +
-    #   theme(plot.margin = unit(c(.5, 3, 1, 1), units = "lines")), 
+      theme(plot.margin = unit(c(1, 2, 1, 1), units = "lines")),
     labels = c("a", "b"),
-    # align = "v",
-    # axis = "lr",
     rel_heights = c(1, .3),
     ncol = 1
   ),
   p_or,
   nrow = 1,
   rel_widths = c(1.35, 1),
-  # labels = c(NA_character_, "e"),
   align = "v",
   axis = "tb"
 ) +
   theme(plot.background = element_rect(fill = "white", color = "white"))
 
+# Save
 ggsave(plot = p_fig5,
        filename = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_5.png"),
        width = 13,
@@ -2400,6 +2132,180 @@ ggsave(plot = p_fig5,
        dpi = 300)
 
 
+
+# Figure 6: targetting ----------------------------------------------------
+
+target_pop_levels <- c(1e7, seq(5e7, 4e8, by = 5e7))
+
+pop_frac_sel_2016_2020 <- map_df(target_pop_levels, function(x) {
+  print(x)
+  cumul_pop_frac_stats_2016_2020 %>% 
+    group_by(ranking) %>% 
+    group_modify(function(y, z) {
+      rid <- which(y$cumul_pop >= x)[1]
+      slice(y, rid) %>% 
+        mutate(target_pop = x)
+    })
+})
+
+case_frac_sel_2016_2020 <- map_df(target_pop_levels, function(x) {
+  print(x)
+  cumul_case_frac_stats_2016_2020 %>% 
+    group_by(ranking) %>% 
+    group_modify(function(y, z) {
+      rid <- which(y$cumul_pop >= x)[1]
+      slice(y, rid) %>% 
+        mutate(target_pop = x)
+    })
+})
+
+case_frac_sel_2011_2015 <- map_df(target_pop_levels, function(x) {
+  print(x)
+  cumul_case_frac_stats_2011_2015 %>% 
+    group_by(ranking) %>% 
+    group_modify(function(y, z) {
+      rid <- which(y$cumul_pop >= x)[1]
+      slice(y, rid) %>% 
+        mutate(target_pop = x)
+    })
+})
+
+# Plot
+colors_ranking <- function() {
+  c("2011-2015" = "purple", 
+    "2016-2020" = "orange", 
+    "2011-2020" = "darkgreen", 
+    "2022-2023"= "darkgray",
+    "optimal"= "gray")
+}
+
+pd <- position_dodge(width = .8, preserve = "single")
+
+data_for_figure6_2016_2020 <- bind_rows(
+  pop_frac_sel_2016_2020 %>%
+    mutate(what = "population living in ADM2 units\nwith cholera occurence in 2022-2023"), 
+  case_frac_sel_2016_2020 %>% 
+    mutate(what = "annual cholera cases in 2016-2020")
+) %>% 
+  mutate(
+    target_pop_factor = factor(
+      str_c(formatC(target_pop/1e6, format = "f", digits = 0), "M"),
+      levels = str_c(formatC(target_pop_levels/1e6, format = "f", digits = 0), "M")),
+    ranking = factor(ranking, levels = names(colors_ranking())),
+    targeting = case_when(str_detect(what, "pop") & ranking == "optimal" ~ "oracle",
+                          str_detect(what, "case") & ranking == "2016-2020" ~ "oracle",
+                          TRUE ~ "prospective") %>% 
+      factor()
+  ) %>% 
+  filter(!(ranking == "optimal" & str_detect(what, "2016")))
+
+
+leg_title <- "Period used for\ntargeting"
+pd <- position_dodge(width = 2.5e7, preserve = "single")
+
+p_targets_v2_2016_2020 <- data_for_figure6_2016_2020 %>% 
+  mutate(ranking = case_when(ranking == "optimal" ~ "2022-2023", 
+                             T ~ ranking) %>% 
+           factor(levels = names(colors_ranking()))) %>% 
+  ggplot(aes(x = target_pop, y = mean_diff, color = ranking, fill = ranking)) +
+  geom_abline(aes(intercept = 0, slope = 1/1.1e9), lty = 2, lwd = .2) +
+  geom_bar(
+    stat = "identity", 
+    inherit.aes = F,
+    aes(x = target_pop, y = mean_diff, group = ranking),
+    position = pd, width = 2e7, alpha = 1, lwd = .1, 
+    fill = "white",
+    color = "black") +
+  ggpattern::geom_bar_pattern(
+    aes(pattern = targeting),
+    stat = "identity", 
+    position = pd, 
+    width = 2e7, 
+    alpha = .5, 
+    lwd = .05,
+    color = "black",
+    pattern_color = "white", 
+    pattern_fill = "white",
+    pattern_angle = 45,
+    pattern_density = 0.01,
+    pattern_spacing = 0.025,
+    pattern_key_scale_factor = 0.6) +
+  geom_bar(
+    stat = "identity", 
+    inherit.aes = F,
+    aes(x = target_pop, y = mean_diff, group = ranking),
+    position = pd, width = 2e7, alpha = 0, lwd = .1, 
+    fill = "white",
+    color = "black") +
+  geom_errorbar(aes(ymin = q025_diff, ymax = q975_diff), 
+                position = pd, width = 1.5e7, lwd = .3) +
+  theme_bw() +
+  facet_grid(. ~ what) +
+  scale_fill_manual(values = colors_ranking()) +
+  scale_color_manual(values = colors_ranking()) +
+  labs(x = "Population targeted (out of total of 1.1 billion in 2020)",
+       y = "Proportion reached") +
+  scale_y_continuous(breaks = c(0, .25, .5, .75, 1), labels = c("0%", "25%", "50%", "75%", "100%")) +
+  scale_x_continuous(breaks = c(10, seq(50, 400, by = 50))*1e6, 
+                     labels = function(x) str_c(formatC(x*1e-6, format = "f", digits = 0), "M")) +
+  theme(panel.grid.major.x = element_blank(),
+        panel.grid.minor.x = element_blank()
+  ) +
+  guides(fill = guide_legend(leg_title, override.aes = list(pattern = "none")),
+         color = guide_legend(leg_title),
+         pattern = guide_legend("Targeting type"))  +
+  ggpattern::scale_pattern_manual(values = c(oracle = "stripe", prospective = "none"))
+
+
+data_arrows_figure6_v2_2016_2020 <- data_for_figure6_2016_2020 %>% 
+  filter(target_pop == 1e8,
+         str_detect(what, "cases")) %>% 
+  ungroup() %>% 
+  slice(1:2) %>%
+  bind_rows(
+    data_for_figure6_2016_2020 %>% 
+      filter(target_pop == 1e8,
+             str_detect(what, "pop")) %>% 
+      ungroup() %>% 
+      slice(3:4)
+  ) %>% 
+  group_by(what) %>% 
+  mutate(x = as.numeric(target_pop) + c(-.3, -.1)*3e7,
+         x2 = as.numeric(target_pop) + c(-.5, .1)*3.5e7,
+         y = mean_diff * c(1.06, 1.04),
+         what_label = str_extract(what, "cases|population"),
+         what_label = case_when(what_label == "cases" ~ "cholera cases",
+                                TRUE ~ "cholera-affected population"),
+         label = c(str_glue("unreached {what_label[1]}"),
+                   str_glue("unreached {what_label[1]}"))) %>% 
+  slice(1)
+
+p_targets2_2016_2020 <- p_targets_v2_2016_2020 +
+  theme(legend.background = element_blank()) +
+  geom_segment(
+    data = data_arrows_figure6_v2_2016_2020,
+    aes(x = x, y = y, yend = .99,
+        group = ranking),
+    inherit.aes = F,
+    linestart = "butt", lineend = "butt", linejoin = "mitre",
+    size = .4, arrow = arrow(length = unit(0.05, "inches"), type = "closed", ends = "both"),
+    color = c("#575757")
+  )  +
+  geom_text(
+    data = data_arrows_figure6_v2_2016_2020,
+    aes(x = x2, y = (y + .99)/2, label = label), 
+    inherit.aes = F,
+    angle = 90,
+    color = "black",
+    size = 2.5,
+  ) +
+  geom_hline(aes(yintercept = 1), color = "darkgray", lty = 2, lwd = .6)
+
+
+ggsave(p_targets2_2016_2020, 
+       filename = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_6.png"),
+       width = 12, height = 5.5, dpi = 300)
+
 # Supplementary figures ---------------------------------------------------
 
 
@@ -2656,19 +2562,54 @@ ggsave(p_fig4_supp_95,
        height = 7, 
        dpi = 150)
 
-## Fig. 4B: Alluvial plot ----
-
-for_alluvial_95 <- risk_pop_95_adm2 %>% 
-  as_tibble() %>% 
-  mutate(high_risk = risk_cat %in% get_risk_cat_dict()[3:6],
-         low_risk = risk_cat %in% get_risk_cat_dict()[1],
-         risk_cat_simple = case_when(risk_cat %in% get_risk_cat_dict()[3:6] ~ "high",
-                                     risk_cat %in% get_risk_cat_dict()[1] ~ "low",
-                                     T ~ "mid")) %>% 
-  select(country, location_period_id, risk_cat_simple, period) %>% 
-  inner_join(endemicity_df_95_v2 %>% select(location_period_id, pop, endemicity)) %>% 
-  group_by(location_period_id) %>% 
-  mutate(risk_cat_change = str_c(risk_cat_simple, collapse = "-")) %>% 
+# Fraction by categories for supplement ----
+p_fig4_supp <- endemicity_df_50_v2  %>%
+  group_by(country) %>% 
+  complete(endemicity = unique(endemicity_df_50_v2$endemicity)) %>% 
+  get_AFRO_region(ctry_col = "country") %>% 
+  group_by(AFRO_region, country, endemicity) %>%
+  summarise(n = sum(!is.na(location_period_id)),
+            pop = sum(pop[!is.na(location_period_id)])) %>% 
+  group_by(AFRO_region, country) %>%
+  mutate(frac = pop/sum(pop)) %>% 
+  group_by(country) %>% 
+  mutate(
+    frac_other = frac[endemicity == "history of moderate"],
+    frac_high = sum(frac[endemicity %in% c("sustained high", "history of high")]),
+    frac_low = sum(frac[endemicity %in% c("sustained low")])
+  ) %>% 
+  ungroup() %>% 
+  mutate(endemicity = forcats::fct_rev(endemicity),
+         country = factor(country, levels = unique(country[order(frac_high, frac_other)])),
+  ) %>% 
+  ggplot(aes(y = country, x = frac, fill = endemicity)) +
+  geom_bar(stat = "identity") +
+  facet_grid(AFRO_region ~., scales = "free_y", space = "free_y") +
+  scale_fill_manual(values = taxdat:::colors_endemicity(),
+                    breaks = c("sustained high","history of high","history of moderate","sustained low")) +
+  theme_bw() +
+  labs(x = "fraction of population\n per 10-year incidence category") +
+  guides(fill=guide_legend(title="10-year incidence category"))
+
+ggsave(p_fig4_supp,
+       file = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_4_supp.png"),
+       width = 6,
+       height = 7, 
+       dpi = 150)
+
+## Fig. 4B: Alluvial plot ----
+
+for_alluvial_95 <- risk_pop_95_adm2 %>% 
+  as_tibble() %>% 
+  mutate(high_risk = risk_cat %in% get_risk_cat_dict()[3:6],
+         low_risk = risk_cat %in% get_risk_cat_dict()[1],
+         risk_cat_simple = case_when(risk_cat %in% get_risk_cat_dict()[3:6] ~ "high",
+                                     risk_cat %in% get_risk_cat_dict()[1] ~ "low",
+                                     T ~ "mid")) %>% 
+  select(country, location_period_id, risk_cat_simple, period) %>% 
+  inner_join(endemicity_df_95_v2 %>% select(location_period_id, pop, endemicity)) %>% 
+  group_by(location_period_id) %>% 
+  mutate(risk_cat_change = str_c(risk_cat_simple, collapse = "-")) %>% 
   filter(str_detect(risk_cat_change, "-")) %>% 
   group_by(period, risk_cat_simple, risk_cat_change, endemicity) %>% 
   summarise(pop = sum(pop)) %>% 
@@ -2813,341 +2754,128 @@ p_frac_regions_95 <- ob_count_dat_95 %>%
   ggplot(aes(x = frac, y = AFRO_region, fill = endemicity)) +
   geom_bar(stat = "identity") +
   theme_bw() +
-  scale_fill_manual(values = taxdat:::colors_endemicity()) +
-  labs(x = "proportion of locations (ADM2 or lower)", y = "") +
-  guides(fill = "none") +
-  facet_grid(factor(occurrence, levels = c("cholera\nobserved","no cholera\nobserved")) ~ ., switch = "y") +
-  theme(strip.placement = "outer")
-
-
-p_frac_overall_95 <- ob_count_dat_95 %>% 
-  mutate(occurrence = str_remove_all(occurrence, "  ")) %>% 
-  filter(AFRO_region == "overall")  %>% 
-  ggplot(aes(x = factor(occurrence, levels = c("no cholera \nobserved "," cholera\n observed")), y = frac, fill = endemicity)) +
-  geom_bar(stat = "identity") +
-  theme_bw() +
-  scale_fill_manual(values = taxdat:::colors_endemicity()) +
-  labs(y = "proportion of locations", x = "") +
-  guides(fill = "none") +
-  coord_flip()
-
-p_ob_frac_comb_95 <- cowplot::plot_grid(
-  p_frac_overall_95 +
-    theme(plot.margin = unit(c(1, .3, 0, 1), units = "lines")) +
-    theme(axis.text = element_text(size = 8, hjust = .5),
-          axis.title = element_text(size = 10)),
-  p_frac_regions_95 +
-    theme(plot.margin = unit(c(1, 1, 0, 1), units = "lines"),
-          strip.switch.pad.grid = unit(.7, units = "line")) +
-    theme(axis.text = element_text(size = 8),
-          axis.title = element_text(size = 10),
-          strip.text = element_text(size = 8)), 
-  nrow = 1,
-  labels = c("b", "c"),
-  rel_widths = c(.6, 1),
-  align = "h",
-  axis = "tb"
-)
-
-
-p_ob_1_95 <- baseline_prob_stats %>% 
-  mutate(what = "reference",
-         param = case_when(str_detect(param, "baseline") ~ "sustained low risk",
-                           TRUE ~ param)) %>% 
-  ggplot(aes(x = param, y = mean, ymin = q2.5, ymax = q97.5, color = AFRO_region)) +
-  geom_point(position = pd1) +
-  geom_errorbar(width = 0, position = pd1) +
-  theme_bw() +
-  facet_grid(. ~ what, scales = "free", space = "free") +
-  scale_color_manual(values = c("overall" = "black", colors_afro_regions())) +
-  labs(x = "", y = "probability of cholera occurrence") +
-  guides(color = "none") +
-  coord_cartesian(ylim = c(0, 1))  +
-  theme(axis.text = element_text(size = 8),
-        axis.title = element_text(size = 10))
-
-p_ob_2_95 <- logOR_stats %>% 
-  mutate(what = str_replace(what, "outbreak", "cholera")) %>% 
-  ggplot(aes(x = param, y = mean, ymin = q2.5, ymax = q97.5, color = AFRO_region)) +
-  geom_point(position = pd2) +
-  geom_errorbar(width = 0, position = pd2) +
-  geom_hline(aes(yintercept = 0), lty = 3, lwd = .6) +
-  facet_grid(. ~ what, scales = "free", space = "free") +
-  theme_bw() +
-  scale_color_manual(values = c("overall" = "black", colors_afro_regions())) +
-  labs(x = "10-year cholera risk category", y = "log-Odds ratio", color = NULL) +
-  theme(legend.position = c(.145, .84),
-        legend.key.height = unit(.75, units = "lines"),
-        axis.text = element_text(size = 8),
-        axis.title = element_text(size = 10),
-        legend.title = element_blank(),
-        legend.text = element_text(size = 8))
-
-
-
-p_fig5_95 <- plot_grid(
-  plot_grid(
-    p_ob_map2_95, 
-    plot_grid(
-      p_frac_overall_95  +
-        theme(plot.margin = unit(c(2, 3, 0, 1), units = "lines"),
-              axis.text.x = element_blank(),
-              axis.title.x = element_blank(),
-              axis.ticks.x = element_blank()),
-      p_frac_regions_95 +
-        theme(plot.margin = unit(c(.5, 3, 1, 1), units = "lines")), 
-      labels = c("b", "c"),
-      align = "v",
-      axis = "lr",
-      rel_heights = c(.4, 1),
-      ncol = 1
-    ),
-    nrow = 1,
-    rel_widths = c(1.2, 1),
-    align = "h",
-    axis = "tb",
-    labels = c("a", NA_character_)
-  ),
-  plot_grid(
-    p_ob_1_95 +
-      guides(color = guide_legend("region")) +
-      theme(legend.position = "right") +
-      theme(plot.margin = unit(c(1, 3, 1, 1), units = "lines")), 
-    p_ob_2_95 +
-      guides(color = "none"),
-    nrow = 1,
-    rel_widths = c(.7, 1),
-    align = "h",
-    axis = "tb",
-    labels = c("d", "e")
-  ) +
-    theme(plot.margin = unit(c(1, 3, 1, 3), units = "lines")),
-  ncol = 1,
-  rel_heights = c(1.5, 1),
-  # labels = c(NA_character_, "e"),
-  align = "h",
-  axis = "lr"
-) +
-  theme(plot.background = element_rect(fill = "white", color = "white"))
-
-ggsave(plot = p_fig5_95,
-       filename = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_5_95.png"),
-       width = 13,
-       height = 12,
-       dpi = 300)
-
-
-
-# Figure 6: targetting ----------------------------------------------------
-
-target_pop_levels <- c(1e7, seq(5e7, 4e8, by = 5e7))
-
-pop_frac_sel_2016_2020 <- map_df(target_pop_levels, function(x) {
-  print(x)
-  cumul_pop_frac_stats_2016_2020 %>% 
-    group_by(ranking) %>% 
-    group_modify(function(y, z) {
-      rid <- which(y$cumul_pop >= x)[1]
-      slice(y, rid) %>% 
-        mutate(target_pop = x)
-    })
-})
-
-case_frac_sel_2016_2020 <- map_df(target_pop_levels, function(x) {
-  print(x)
-  cumul_case_frac_stats_2016_2020 %>% 
-    group_by(ranking) %>% 
-    group_modify(function(y, z) {
-      rid <- which(y$cumul_pop >= x)[1]
-      slice(y, rid) %>% 
-        mutate(target_pop = x)
-    })
-})
-
-case_frac_sel_2011_2015 <- map_df(target_pop_levels, function(x) {
-  print(x)
-  cumul_case_frac_stats_2011_2015 %>% 
-    group_by(ranking) %>% 
-    group_modify(function(y, z) {
-      rid <- which(y$cumul_pop >= x)[1]
-      slice(y, rid) %>% 
-        mutate(target_pop = x)
-    })
-})
-
-# Plot
-colors_ranking <- function() {
-  c("2011-2015" = "purple", 
-    "2016-2020" = "orange", 
-    "2011-2020" = "darkgreen", 
-    "2022-2023"= "darkgray",
-    "optimal"= "gray")
-}
-
-pd <- position_dodge(width = .8, preserve = "single")
-
-data_for_figure6_2016_2020 <- bind_rows(
-  pop_frac_sel_2016_2020 %>%
-    mutate(what = "population living in ADM2 units\nwith cholera occurence in 2022-2023"), 
-  case_frac_sel_2016_2020 %>% 
-    mutate(what = "annual cholera cases in 2016-2020")
-) %>% 
-  mutate(
-    target_pop_factor = factor(
-      str_c(formatC(target_pop/1e6, format = "f", digits = 0), "M"),
-      levels = str_c(formatC(target_pop_levels/1e6, format = "f", digits = 0), "M")),
-    ranking = factor(ranking, levels = names(colors_ranking())),
-    targeting = case_when(str_detect(what, "pop") & ranking == "optimal" ~ "oracle",
-                          str_detect(what, "case") & ranking == "2016-2020" ~ "oracle",
-                          TRUE ~ "prospective") %>% 
-      factor()
-  ) %>% 
-  filter(!(ranking == "optimal" & str_detect(what, "2016")))
-
-# p_targets <- data_for_figure6 %>% 
-#   ggplot(aes(x = target_pop_factor, y = mean_diff, color = ranking, fill = ranking)) +
-#   geom_bar(stat = "identity", 
-#            inherit.aes = F,
-#            aes(x = target_pop_factor, y = mean_diff, group = ranking),
-#            position = pd, width = .67, alpha = 1, lwd = .1, 
-#            fill = "white",
-#            color = "black") +
-#   geom_bar(stat = "identity", position = pd, width = .67, alpha = .5, lwd = .1) +
-#   # geom_point(position = pd, size = 1) +
-#   geom_errorbar(aes(ymin = q025_diff, ymax = q975_diff), 
-#                 position = pd, width = .35, lwd = .3) +
-#   theme_bw() +
-#   facet_grid(. ~ what) +
-#   scale_fill_manual(values = colors_ranking()) +
-#   scale_color_manual(values = colors_ranking()) +
-#   labs(x = "Population targeted (out of total of 1.1 billion in 2020)",
-#        y = "Proportion reached of ...") +
-#   scale_y_continuous(breaks = c(0, .25, .5, .75, 1), labels = c("0%", "25%", "50%", "75%", "100%")) +
-#   theme(panel.grid.major.x = element_blank(),
-#         panel.grid.minor.x = element_blank(),
-#         legend.position = c(.63, .8)) +
-#   guides(fill = guide_legend("ADM2 targeting strategy"),
-#          color = guide_legend("ADM2 targeting strategy"))
-
-leg_title <- "Period used for\ntargeting"
-pd <- position_dodge(width = 2.5e7, preserve = "single")
-
-p_targets_v2_2016_2020 <- data_for_figure6_2016_2020 %>% 
-  mutate(ranking = case_when(ranking == "optimal" ~ "2022-2023", 
-                             T ~ ranking) %>% 
-           factor(levels = names(colors_ranking()))) %>% 
-  ggplot(aes(x = target_pop, y = mean_diff, color = ranking, fill = ranking)) +
-  geom_abline(aes(intercept = 0, slope = 1/1.1e9), lty = 2, lwd = .2) +
-  geom_bar(
-    stat = "identity", 
-    inherit.aes = F,
-    aes(x = target_pop, y = mean_diff, group = ranking),
-    position = pd, width = 2e7, alpha = 1, lwd = .1, 
-    fill = "white",
-    color = "black") +
-  ggpattern::geom_bar_pattern(
-    aes(pattern = targeting),
-    stat = "identity", 
-    position = pd, 
-    width = 2e7, 
-    alpha = .5, 
-    lwd = .05,
-    color = "black",
-    pattern_color = "white", 
-    pattern_fill = "white",
-    pattern_angle = 45,
-    pattern_density = 0.01,
-    pattern_spacing = 0.025,
-    pattern_key_scale_factor = 0.6) +
-  geom_bar(
-    stat = "identity", 
-    inherit.aes = F,
-    aes(x = target_pop, y = mean_diff, group = ranking),
-    position = pd, width = 2e7, alpha = 0, lwd = .1, 
-    fill = "white",
-    color = "black") +
-  # geom_point(position = pd, size = 1) +
-  geom_errorbar(aes(ymin = q025_diff, ymax = q975_diff), 
-                position = pd, width = 1.5e7, lwd = .3) +
-  theme_bw() +
-  facet_grid(. ~ what) +
-  scale_fill_manual(values = colors_ranking()) +
-  scale_color_manual(values = colors_ranking()) +
-  labs(x = "Population targeted (out of total of 1.1 billion in 2020)",
-       y = "Proportion reached") +
-  scale_y_continuous(breaks = c(0, .25, .5, .75, 1), labels = c("0%", "25%", "50%", "75%", "100%")) +
-  scale_x_continuous(breaks = c(10, seq(50, 400, by = 50))*1e6, 
-                     labels = function(x) str_c(formatC(x*1e-6, format = "f", digits = 0), "M")) +
-  theme(panel.grid.major.x = element_blank(),
-        panel.grid.minor.x = element_blank()
-        # legend.position = c(.9, .8)
-  ) +
-  guides(fill = guide_legend(leg_title, override.aes = list(pattern = "none")),
-         color = guide_legend(leg_title),
-         pattern = guide_legend("Targeting type"))  +
-  ggpattern::scale_pattern_manual(values = c(oracle = "stripe", prospective = "none"))
+  scale_fill_manual(values = taxdat:::colors_endemicity()) +
+  labs(x = "proportion of locations (ADM2 or lower)", y = "") +
+  guides(fill = "none") +
+  facet_grid(factor(occurrence, levels = c("cholera\nobserved","no cholera\nobserved")) ~ ., switch = "y") +
+  theme(strip.placement = "outer")
 
 
-# p_targets_v2
+p_frac_overall_95 <- ob_count_dat_95 %>% 
+  mutate(occurrence = str_remove_all(occurrence, "  ")) %>% 
+  filter(AFRO_region == "overall")  %>% 
+  ggplot(aes(x = factor(occurrence, levels = c("no cholera \nobserved "," cholera\n observed")), y = frac, fill = endemicity)) +
+  geom_bar(stat = "identity") +
+  theme_bw() +
+  scale_fill_manual(values = taxdat:::colors_endemicity()) +
+  labs(y = "proportion of locations", x = "") +
+  guides(fill = "none") +
+  coord_flip()
 
-# Add annotations
-# data_arrows_figure6 <- data_for_figure6 %>% 
-#   filter(str_detect(what, "cases")) %>%
-#   slice(2) %>% 
-#   ungroup() %>%
-#   mutate(x = as.numeric(target_pop_factor) + c(-.3, -.1),
-#          x2 = as.numeric(target_pop_factor) + c(-.5, .1),
-#          y = mean_diff * c(1.06, 1.04),
-#          label = c("Realized coverage gap", "Best-case coverage gap"))
+p_ob_frac_comb_95 <- cowplot::plot_grid(
+  p_frac_overall_95 +
+    theme(plot.margin = unit(c(1, .3, 0, 1), units = "lines")) +
+    theme(axis.text = element_text(size = 8, hjust = .5),
+          axis.title = element_text(size = 10)),
+  p_frac_regions_95 +
+    theme(plot.margin = unit(c(1, 1, 0, 1), units = "lines"),
+          strip.switch.pad.grid = unit(.7, units = "line")) +
+    theme(axis.text = element_text(size = 8),
+          axis.title = element_text(size = 10),
+          strip.text = element_text(size = 8)), 
+  nrow = 1,
+  labels = c("b", "c"),
+  rel_widths = c(.6, 1),
+  align = "h",
+  axis = "tb"
+)
 
 
-data_arrows_figure6_v2_2016_2020 <- data_for_figure6_2016_2020 %>% 
-  filter(target_pop == 1e8,
-         str_detect(what, "cases")) %>% 
-  ungroup() %>% 
-  slice(1:2) %>%
-  bind_rows(
-    data_for_figure6_2016_2020 %>% 
-      filter(target_pop == 1e8,
-             str_detect(what, "pop")) %>% 
-      ungroup() %>% 
-      slice(3:4)
-  ) %>% 
-  group_by(what) %>% 
-  mutate(x = as.numeric(target_pop) + c(-.3, -.1)*3e7,
-         x2 = as.numeric(target_pop) + c(-.5, .1)*3.5e7,
-         y = mean_diff * c(1.06, 1.04),
-         what_label = str_extract(what, "cases|population"),
-         what_label = case_when(what_label == "cases" ~ "cholera cases",
-                                TRUE ~ "cholera-affected population"),
-         label = c(str_glue("unreached {what_label[1]}"),
-                   str_glue("unreached {what_label[1]}"))) %>% 
-  slice(1)
+p_ob_1_95 <- baseline_prob_stats %>% 
+  mutate(what = "reference",
+         param = case_when(str_detect(param, "baseline") ~ "sustained low risk",
+                           TRUE ~ param)) %>% 
+  ggplot(aes(x = param, y = mean, ymin = q2.5, ymax = q97.5, color = AFRO_region)) +
+  geom_point(position = pd1) +
+  geom_errorbar(width = 0, position = pd1) +
+  theme_bw() +
+  facet_grid(. ~ what, scales = "free", space = "free") +
+  scale_color_manual(values = c("overall" = "black", colors_afro_regions())) +
+  labs(x = "", y = "probability of cholera occurrence") +
+  guides(color = "none") +
+  coord_cartesian(ylim = c(0, 1))  +
+  theme(axis.text = element_text(size = 8),
+        axis.title = element_text(size = 10))
 
-p_targets2_2016_2020 <- p_targets_v2_2016_2020 +
-  theme(legend.background = element_blank()) +
-  geom_segment(
-    data = data_arrows_figure6_v2_2016_2020,
-    aes(x = x, y = y, yend = .99,
-        group = ranking),
-    inherit.aes = F,
-    linestart = "butt", lineend = "butt", linejoin = "mitre",
-    size = .4, arrow = arrow(length = unit(0.05, "inches"), type = "closed", ends = "both"),
-    color = c("#575757")
-  )  +
-  geom_text(
-    data = data_arrows_figure6_v2_2016_2020,
-    aes(x = x2, y = (y + .99)/2, label = label), 
-    inherit.aes = F,
-    angle = 90,
-    color = "black",
-    size = 2.5,
-  ) +
-  geom_hline(aes(yintercept = 1), color = "darkgray", lty = 2, lwd = .6)
+p_ob_2_95 <- logOR_stats %>% 
+  mutate(what = str_replace(what, "outbreak", "cholera")) %>% 
+  ggplot(aes(x = param, y = mean, ymin = q2.5, ymax = q97.5, color = AFRO_region)) +
+  geom_point(position = pd2) +
+  geom_errorbar(width = 0, position = pd2) +
+  geom_hline(aes(yintercept = 0), lty = 3, lwd = .6) +
+  facet_grid(. ~ what, scales = "free", space = "free") +
+  theme_bw() +
+  scale_color_manual(values = c("overall" = "black", colors_afro_regions())) +
+  labs(x = "10-year cholera risk category", y = "log-Odds ratio", color = NULL) +
+  theme(legend.position = c(.145, .84),
+        legend.key.height = unit(.75, units = "lines"),
+        axis.text = element_text(size = 8),
+        axis.title = element_text(size = 10),
+        legend.title = element_blank(),
+        legend.text = element_text(size = 8))
 
 
-ggsave(p_targets2_2016_2020, filename = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_6.png"),
-       width = 12, height = 5.5, dpi = 300)
+
+p_fig5_95 <- plot_grid(
+  plot_grid(
+    p_ob_map2_95, 
+    plot_grid(
+      p_frac_overall_95  +
+        theme(plot.margin = unit(c(2, 3, 0, 1), units = "lines"),
+              axis.text.x = element_blank(),
+              axis.title.x = element_blank(),
+              axis.ticks.x = element_blank()),
+      p_frac_regions_95 +
+        theme(plot.margin = unit(c(.5, 3, 1, 1), units = "lines")), 
+      labels = c("b", "c"),
+      align = "v",
+      axis = "lr",
+      rel_heights = c(.4, 1),
+      ncol = 1
+    ),
+    nrow = 1,
+    rel_widths = c(1.2, 1),
+    align = "h",
+    axis = "tb",
+    labels = c("a", NA_character_)
+  ),
+  plot_grid(
+    p_ob_1_95 +
+      guides(color = guide_legend("region")) +
+      theme(legend.position = "right") +
+      theme(plot.margin = unit(c(1, 3, 1, 1), units = "lines")), 
+    p_ob_2_95 +
+      guides(color = "none"),
+    nrow = 1,
+    rel_widths = c(.7, 1),
+    align = "h",
+    axis = "tb",
+    labels = c("d", "e")
+  ) +
+    theme(plot.margin = unit(c(1, 3, 1, 3), units = "lines")),
+  ncol = 1,
+  rel_heights = c(1.5, 1),
+  # labels = c(NA_character_, "e"),
+  align = "h",
+  axis = "lr"
+) +
+  theme(plot.background = element_rect(fill = "white", color = "white"))
+
+ggsave(plot = p_fig5_95,
+       filename = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_5_95.png"),
+       width = 13,
+       height = 12,
+       dpi = 300)
 
 # Figure 6 supplement ----
 data_for_figure6_2011_2015 <- bind_rows(
@@ -3225,6 +2953,206 @@ ggsave(p_targets_v2_2011_2015_supp, filename = str_glue("{opt$out_dir}/{opt$out_
        width = 7.5, height = 5.5, dpi = 300)
 
 # Scraps ------------------------------------------------------------------
+# compute_irr <- function(target_cells_sf,
+#                         grid_cases,
+#                         grid_rates,
+#                         mean_rate) {
+# 
+#   # Get population of these cells
+#   target_cells <- grid_cases %>%
+#     st_drop_geometry() %>%
+#     select(rid, x, y, cases = mean) %>%
+#     inner_join(
+#       grid_rates %>%
+#         st_drop_geometry() %>%
+#         select(rid, x, y, rate = mean),
+#       by = c("rid", "x", "y")
+#     ) %>%
+#     mutate(pop = cases/rate) %>%
+#     inner_join(target_cells_sf %>%
+#                  st_drop_geometry(),
+#                by = c("rid", "x", "y"))
+# 
+# 
+#   # Compute expected cases
+#   stats <- target_cells %>%
+#     mutate(mean_rate = mean_rate,
+#            expected_cases = pop * mean_rate) %>%
+#     summarise(observed = sum(mean),
+#               expected = sum(expected_cases),
+#               ratio = observed/expected)
+# 
+#   stats
+# }
+
+
+#' Title
+#'
+#' @param target
+#' @param dist_vec
+#' @param mai_adm
+#' @param grid_cases
+#' @param afr_sf
+#' @param dist_definition
+#'
+#' @return
+#' @export
+#'
+#' @examples
+# irr_dist <- function(target,
+#                      dist_vec = seq(5e4, 50e4, by = 5e4),
+#                      mai_adm,
+#                      mai_adm_all,
+#                      grid_cases,
+#                      afr_sf,
+#                      dist_definition = "within",
+#                      by_region = TRUE) {
+# 
+#   if (by_region) {
+#     u_units <- unique(mai_adm$AFRO_region)
+#   } else {
+#     u_units <- unique(mai_adm$country)
+#   }
+# 
+#   res <- map_df(
+#     u_units,
+#     function(this_unit) {
+#       map_df(
+#         unique(mai_adm$period),
+#         function(this_period) {
+# 
+#           cat("Unit", this_unit, "period", this_period, "\n")
+# 
+#           # Get all cells for which we have at least case
+#           case_cells <- grid_cases %>%
+#             filter(mean > 1, period == this_period)
+# 
+#           if (by_region) {
+#             in_unit <- st_intersects(case_cells,
+#                                      afr_sf %>%
+#                                        filter(AFRO_region== this_unit),
+#                                      sparse = FALSE) %>%
+#               rowSums() %>%
+#               {.>0}
+#           } else {
+#             in_unit <- st_intersects(case_cells,
+#                                      afr_sf %>%
+#                                        filter(country == this_unit),
+#                                      sparse = FALSE) %>%
+#               rowSums() %>%
+#               {.>0}
+#           }
+# 
+# 
+#           if (sum(in_unit) == 0) {
+#             res <- tibble(observed = NA,
+#                           expected = NA,
+#                           ratio = NA,
+#                           dist_thresh = NA)
+#           } else {
+# 
+#             this_case_cells <- case_cells[in_unit, ]
+# 
+#             # Make buffer
+#             centroids <- st_centroid(this_case_cells)
+#             centroids <- st_transform(centroids, st_crs(target))
+# 
+#             # Define whether it is within distance or bands
+#             if (dist_definition == "within") {
+#               dist_vec2 <- dist_vec
+#               len <- length(dist_vec2)
+#               mean_dist <- dist_vec
+#             } else {
+#               dist_vec2 <- cbind(c(0, dist_vec[-length(dist_vec)]), dist_vec)
+#               len <- nrow(dist_vec2)
+#               mean_dist <- apply(dist_vec2, 1, mean)
+#             }
+# 
+#             # Filter all grid cells within distance
+#             res <- map_df(
+#               seq_len(len),
+#               function(x) {
+# 
+# 
+#                 if (dist_definition == "within") {
+#                   # Text to save
+#                   dist_txt <- as.character(dist_vec[x]/1e3)
+# 
+#                   within_dist <-  st_is_within_distance(centroids, target,
+#                                                         dist = dist_vec[x],
+#                                                         sparse = F) %>%
+#                     rowSums() %>%
+#                     {.>0}
+# 
+#                   target_cells_sf <- centroids[within_dist, ]
+# 
+#                 } else {
+# 
+#                   # Text to save
+#                   dist_txt <- str_c(formatC(dist_vec2[x, ]/1e3, format = "f", digits = 0, big.mark = ","), collapse = "-")
+# 
+#                   within_dist <- st_is_within_distance(centroids, target,
+#                                                        dist = dist_vec2[x, 2],
+#                                                        sparse = F) %>%
+#                     rowSums() %>%
+#                     {.>0}
+# 
+#                   beyond_dist <- st_is_within_distance(centroids, target,
+#                                                        dist = dist_vec2[x, 1] + 1,
+#                                                        sparse = F) %>%
+#                     rowSums() %>%
+#                     {. > 0} %>%
+#                     {!.}
+# 
+#                   target_cells_sf <- centroids[within_dist & beyond_dist, ]
+# 
+#                 }
+# 
+#                 if (sum(within_dist) > 0) {
+# 
+#                   mean_rate <- get_mean_rate(mai_adm_all = mai_adm_all,
+#                                              this_unit = this_unit,
+#                                              this_period = this_period,
+#                                              by_region = by_region)
+# 
+# 
+#                   stats <- compute_irr(target_cells_sf = target_cells_sf,
+#                                        grid_cases = grid_cases,
+#                                        grid_rates = grid_rates,
+#                                        mean_rate = mean_rate)
+# 
+#                 } else {
+#                   stats <- tibble(observed = NA,
+#                                   expected = NA,
+#                                   ratio = NA)
+#                 }
+# 
+# 
+#                 stats %>%
+#                   mutate(dist = dist_txt,
+#                          dist_definition = dist_definition,
+#                          mean_dist = mean_dist[x])
+#               })
+#           }
+# 
+#           res %>%
+#             mutate(unit = this_unit,
+#                    period = this_period)
+#         })
+#     })
+# 
+#   if (by_region) {
+#     res <- res %>% rename(AFRO_region = unit)
+#   } else {
+#     res <- res %>% rename(country = unit)
+#   }
+# 
+#   res %>%
+#     rowwise() %>%
+#     mutate(lo = exp(log(ratio) - 1.96 * sqrt(1/observed + 1/expected)),
+#            hi = exp(log(ratio) + 1.96 * sqrt(1/observed + 1/expected))) %>%
+#     ungroup()
+# }
 
 
 # 
@@ -3448,3 +3376,85 @@ ggsave(p_targets_v2_2011_2015_supp, filename = str_glue("{opt$out_dir}/{opt$out_
 #   scale_fill_manual(values = taxdat:::colors_endemicity()) +
 #   labs(y = "proportion of locations (ADM2 or lower)", x = "") +
 #   guides(fill = "none")
+#   
+#   
+#   
+## Figure 2A: national-level scatterplot  
+## # p_fig2A <- combined_mai_changes %>% 
+#   ggplot(aes(x = log10(`2011-2015`*1e5), 
+#              y =  log10(`2016-2020`*1e5), 
+#              col = region)) +
+#   geom_abline(lty = 2, lwd = .2) +
+#   geom_point(aes(pch = admin_level,
+#                  # alpha = admin_level
+#                  size = admin_level)) +
+#   ggrepel::geom_label_repel(
+#     aes(label = country, size = admin_level, alpha = admin_level),
+#     min.segment.length = 0,
+#     nudge_x = 0,
+#     # nudge_y = .1,
+#     max.overlaps = Inf, 
+#     xlim = c(-1, 2.2),
+#     ylim = c(-1, 2.2)) +
+#   scale_size_manual(values = c(6, 4, 2.5)) +
+#   scale_shape_manual(values = c(15, 17, 16)) +
+#   # scale_alpha_manual(values = c(1, 1, .75)) +
+#   scale_color_manual(values = c("black", colors_afro_regions())) +
+#   theme_bw() +
+#   guides(color = guide_legend("WHO regions")) +
+#   scale_x_continuous(limits = c(-1.1, 2.3),
+#                      breaks = seq(-1, 2),
+#                      labels = formatC(10^(seq(-1, 2)),
+#                                       digits = 1,
+#                                       format = "fg", 
+#                                       big.mark = ",")) +
+#   scale_y_continuous(limits = c(-1.1, 2.3),
+#                      breaks = seq(-1, 2),
+#                      labels = formatC(10^(seq(-1, 2)),
+#                                       digits = 1,
+#                                       format = "fg", 
+#                                       big.mark = ",")) +
+#   labs(x = "Incidence rate 2011-2015\n[cases per 100,000/year]",
+#        y = "Incidence rate 2016-2020\n[cases per 100,000/year]") +
+#   guides(color = "none", size = "none", shape = "none", alpha = "none") +
+#   scale_alpha_manual(values = c(1, 1, 0)) #+
+# # scale_size_manual(values = c(3, 2, .7))
+# 
+# 
+# # Save
+# ggsave(p_fig2A,
+#        file = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_2A.png"),
+#        width = 8,
+#        height = 7, 
+#        dpi = 150)
+
+# p_frac_regions <- ob_count_dat %>% 
+#   filter(AFRO_region != "overall")  %>%
+#   mutate(occurrence = str_remove_all(occurrence, "       ")) %>% 
+#   ggplot(aes(x = frac, y = AFRO_region, fill = endemicity)) +
+#   geom_bar(stat = "identity") +
+#   theme_bw() +
+#   scale_fill_manual(values = taxdat:::colors_endemicity()) +
+#   labs(x = "proportion of locations (ADM2 or lower)", y = "") +
+#   guides(fill = "none") +
+#   facet_grid(factor(occurrence, levels = c("cholera\nobserved","no cholera\nobserved")) ~ ., switch = "y") +
+#   theme(strip.placement = "outer")
+
+
+# p_ob_frac_comb <- cowplot::plot_grid(
+#   p_frac_overall +
+#     theme(plot.margin = unit(c(1, .3, 0, 1), units = "lines")) +
+#     theme(axis.text = element_text(size = 8, hjust = .5),
+#           axis.title = element_text(size = 10)),
+#   p_frac_regions +
+#     theme(plot.margin = unit(c(1, 1, 0, 1), units = "lines"),
+#           strip.switch.pad.grid = unit(.7, units = "line")) +
+#     theme(axis.text = element_text(size = 8),
+#           axis.title = element_text(size = 10),
+#           strip.text = element_text(size = 8)), 
+#   nrow = 1,
+#   labels = c("b", "c"),
+#   rel_widths = c(.6, 1),
+#   align = "h",
+#   axis = "tb"
+# )

From 0213cf993f1bebd2234cafd876c1091af73d97d4 Mon Sep 17 00:00:00 2001
From: javierps <javier.perezsaez@gmail.com>
Date: Thu, 29 Aug 2024 11:57:30 +0200
Subject: [PATCH 5/5] more cleanup on final figures script

---
 Analysis/R/make_final_figures_and_tables.R    | 156 +++++++++---------
 packages/taxdat/DESCRIPTION                   |   2 +-
 packages/taxdat/NAMESPACE                     |   1 +
 packages/taxdat/R/output_figures_and_tables.R |  10 +-
 4 files changed, 91 insertions(+), 78 deletions(-)

diff --git a/Analysis/R/make_final_figures_and_tables.R b/Analysis/R/make_final_figures_and_tables.R
index e357599a2..7f262309f 100644
--- a/Analysis/R/make_final_figures_and_tables.R
+++ b/Analysis/R/make_final_figures_and_tables.R
@@ -963,38 +963,6 @@ if (opt$redo | !file.exists(opt$bundle_filename)) {
   mai_adm_all <- mai_adm_all  %>% 
     inner_join(population %>% select(location_period_id, pop = mean, period)) 
   
-  # ## Incidence ratios around rivers and lakes -----
-  # 
-  # dist_sf <- list("rivers" = rivers_sf,
-  #                 "lakes" = lakes_sf,
-  #                 "coasts" = coasts_sf,
-  #                 "freshwater" = bind_rows(
-  #                   rivers_sf,
-  #                   lakes_sf
-  #                 ),
-  #                 "water" = bind_rows(
-  #                   rivers_sf,
-  #                   lakes_sf,
-  #                   coasts_sf
-  #                 ))
-  # 
-  # 
-  # irr_dat <- map_df(seq_along(dist_sf), function(x) {
-  #   map_df(c("within", "category"), function(y) {
-  #     cat("---- ", names(dist_sf)[x], y, "\n")
-  #     
-  #     irr_dist(dist_sf[[x]],
-  #              dist_vec = seq(5e3, 100e3, by = 10e3),
-  #              mai_adm = mai_adm,
-  #              mai_adm_all = mai_adm_all,
-  #              grid_cases = grid_cases,
-  #              afr_sf = afr_sf,
-  #              dist_definition = y,
-  #              by_region = T) %>% 
-  #       mutate(what = names(dist_sf)[x])
-  #   })
-  # })
-  
   ## Compute stats for targetting analysis  ---------------------------------------
   
   make_adm2_results <- function(p) {
@@ -1337,7 +1305,7 @@ if (opt$redo | !file.exists(opt$bundle_filename)) {
 
 # Figure 1: cases ---------------------------------------------------------
 
-## Figure 1A: gridded cases by time period --------------------------------
+## Figure 1A: gridded cases by time period ---------
 
 # Gridded maps of cases
 p_fig1A <- output_plot_map(sf_obj = grid_cases %>% 
@@ -1365,7 +1333,7 @@ if (opt$save_panel_figures) {
          dpi = 300)
 }
 
-## Figure 1B: cases by region and time period --------------------------------
+## Figure 1B: cases by region and time period ---------
 # Horizontal barplot of cases by region
 p_fig1B <- cases_by_region %>% 
   mutate(AFRO_region = factor(AFRO_region, levels = get_AFRO_region_levels()),
@@ -1418,12 +1386,13 @@ p_fig1 <- cowplot::plot_grid(
   rel_heights = c(.35, 1)
 )  + theme(panel.background = element_rect(fill = "white", color = "white"))
 
-p_fig1_v2 <- ggdraw() +
+# Add inset of WHO regions
+p_fig1 <- ggdraw() +
   draw_plot(p_fig1) +
   draw_plot(p_fig_BC_regions, x = 0.78, y = .77, width = .23, height = .23)
 
 # Save
-ggsave(plot = p_fig1_v2,
+ggsave(plot = p_fig1,
        filename = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_1.png"),
        width = 10,
        height = 7.5,
@@ -1687,6 +1656,7 @@ risk_pop_regions <- pop_at_risk_regions %>%
   mutate(risk_cat = ifelse(risk_cat == ">100","\u2265100",risk_cat)) %>% 
   mutate(risk_cat = factor(risk_cat,levels = c("<1","1-10","10-20","20-50","50-100","\u2265100" )))
 
+# Figure 3A
 p_fig3A <- risk_pop_regions %>%
   ggplot(aes(y = risk_cat, x = mean)) +
   geom_bar(aes(fill = AFRO_region), stat = "identity", width = .5) +
@@ -1767,8 +1737,9 @@ ggsave(plot = p_fig3,
        dpi = 600)
 
 
-# Figure 4 ----------------------------------------------------------------
+# Figure 4:  10-year risk categories -------------------------------------
 
+# Compute 10-year risk categories
 endemicity_df_50_v2 <- risk_pop_50_adm2 %>% 
   mutate(high_risk = risk_cat %in% get_risk_cat_dict()[3:6],
          low_risk = risk_cat %in% get_risk_cat_dict()[1]) %>% 
@@ -1789,28 +1760,8 @@ endemicity_df_50_v2 <- risk_pop_50_adm2 %>%
 
 saveRDS(endemicity_df_50_v2, file = str_glue("{opt$output_dir}/endemicity_50.rds"))
 
-endemicity_df_95_v2 <- risk_pop_95_adm2 %>% 
-  mutate(high_risk = risk_cat %in% get_risk_cat_dict()[3:6],
-         low_risk = risk_cat %in% get_risk_cat_dict()[1]) %>% 
-  group_by(country, location_period_id) %>% 
-  summarise(
-    endemicity = case_when(
-      sum(high_risk) == 2 ~ "high-both",
-      sum(low_risk) == 2 ~ "low-both",
-      sum(high_risk) == 1 ~ "high-either",
-      T ~ "mix"),
-    pop = max(pop)
-  ) %>% 
-  mutate(
-    endemicity = factor(endemicity, 
-                        levels = c("high-both", "high-either", "mix", "low-both"),
-                        labels = c("sustained high", "history of high", "history of moderate", "sustained low")))  
-
-saveRDS(endemicity_df_95_v2, file = str_glue("{opt$output_dir}/endemicity_95.rds"))
-
-
-## Figure 4A: Alluvial plot ----
 
+## Figure 4A: Alluvial plot ---------
 # Process data for alluvial plot
 for_alluvial <- risk_pop_50_adm2 %>% 
   as_tibble() %>% 
@@ -1862,7 +1813,7 @@ p_fig4A <- for_alluvial %>%
   guides(fill = "none")
 
 
-## Figure 4B: Map of 10-year categories ----
+## Figure 4B: Map of 10-year categories ---------
 
 # Tile for legend
 hrisk_cat <- taxdat::get_risk_cat_dict()[-c(1:2)]
@@ -1908,6 +1859,7 @@ p_fig4B <- endemicity_df_50_v2 %>%
         panel.background = element_rect(fill = "white", color = "white")) +
   guides(fill = guide_legend("10-year incidence\ncategory"))
 
+
 # Add legend to map
 p_fig4B_legend <- ggdraw(
   p_fig4B +
@@ -1929,7 +1881,7 @@ if (opt$save_panel_figures) {
 }
 
 
-## Assemble Figure 4 ----
+## Assemble Figure 4 ---------
 
 p_fig4 <- plot_grid(
   p_fig4A + 
@@ -1961,8 +1913,7 @@ load(str_c(opt$output_dir, "/recent_cholera_outbreaks_res.rdata"))
 final_joins <- final_joins %>% 
   mutate(admin_level = str_c("ADM", admin_level))
 
-
-## Figure 5A: Map of cholera occurrence locations ----
+## Figure 5A: Map of cholera occurrence locations ---------
 p_ob_map <- endemicity_df_50_v2 %>% 
   inner_join(u_space_sf, .) %>% 
   select(-admin_level) %>% 
@@ -2003,7 +1954,7 @@ if (opt$save_panel_figures) {
          dpi = 100)
 }
 
-## Figure 5A: Distribution of 10-year risk categories among ADM2 locations ----
+## Figure 5A: Distribution of 10-year risk categories among ADM2 locations ---------
 ob_count_dat <-  obs_outbreaks %>% 
   mutate(endemicity = stringr::str_remove_all(endemicity, " risk")) %>% 
   mutate(occurrence = "       cholera\n       observed") %>% 
@@ -2033,7 +1984,7 @@ p_frac_overall <- ob_count_dat %>%
   coord_flip()
 
 
-## Figure 5B: Cholera occurrence model estimates ----
+## Figure 5B: Cholera occurrence model estimates ---------
 pd1 <- position_dodge(.4)
 pd2 <- position_dodge(.3)
 
@@ -2105,7 +2056,7 @@ p_or <- plot_grid(
   theme(plot.margin = unit(c(1, 3, 1, 3), units = "lines"))
 
 
-## Assemble Figure 5 ----
+## Assemble Figure 5 ---------
 
 p_fig5 <- plot_grid(
   plot_grid(
@@ -2132,11 +2083,11 @@ ggsave(plot = p_fig5,
        dpi = 300)
 
 
-
 # Figure 6: targetting ----------------------------------------------------
-
+# Compare strategies to target populations
 target_pop_levels <- c(1e7, seq(5e7, 4e8, by = 5e7))
 
+# Compute population covered by interventions
 pop_frac_sel_2016_2020 <- map_df(target_pop_levels, function(x) {
   print(x)
   cumul_pop_frac_stats_2016_2020 %>% 
@@ -2148,6 +2099,7 @@ pop_frac_sel_2016_2020 <- map_df(target_pop_levels, function(x) {
     })
 })
 
+# Compute fraction of population covered by interventions
 case_frac_sel_2016_2020 <- map_df(target_pop_levels, function(x) {
   print(x)
   cumul_case_frac_stats_2016_2020 %>% 
@@ -2159,6 +2111,7 @@ case_frac_sel_2016_2020 <- map_df(target_pop_levels, function(x) {
     })
 })
 
+# Compute fraction of population covered by interventions
 case_frac_sel_2011_2015 <- map_df(target_pop_levels, function(x) {
   print(x)
   cumul_case_frac_stats_2011_2015 %>% 
@@ -2171,14 +2124,6 @@ case_frac_sel_2011_2015 <- map_df(target_pop_levels, function(x) {
 })
 
 # Plot
-colors_ranking <- function() {
-  c("2011-2015" = "purple", 
-    "2016-2020" = "orange", 
-    "2011-2020" = "darkgreen", 
-    "2022-2023"= "darkgray",
-    "optimal"= "gray")
-}
-
 pd <- position_dodge(width = .8, preserve = "single")
 
 data_for_figure6_2016_2020 <- bind_rows(
@@ -2203,6 +2148,7 @@ data_for_figure6_2016_2020 <- bind_rows(
 leg_title <- "Period used for\ntargeting"
 pd <- position_dodge(width = 2.5e7, preserve = "single")
 
+# Bar plots of coverage
 p_targets_v2_2016_2020 <- data_for_figure6_2016_2020 %>% 
   mutate(ranking = case_when(ranking == "optimal" ~ "2022-2023", 
                              T ~ ranking) %>% 
@@ -2257,6 +2203,7 @@ p_targets_v2_2016_2020 <- data_for_figure6_2016_2020 %>%
   ggpattern::scale_pattern_manual(values = c(oracle = "stripe", prospective = "none"))
 
 
+# Arrow labels
 data_arrows_figure6_v2_2016_2020 <- data_for_figure6_2016_2020 %>% 
   filter(target_pop == 1e8,
          str_detect(what, "cases")) %>% 
@@ -2280,6 +2227,7 @@ data_arrows_figure6_v2_2016_2020 <- data_for_figure6_2016_2020 %>%
                    str_glue("unreached {what_label[1]}"))) %>% 
   slice(1)
 
+## Assemble Figure 6 ----------
 p_targets2_2016_2020 <- p_targets_v2_2016_2020 +
   theme(legend.background = element_blank()) +
   geom_segment(
@@ -2302,13 +2250,15 @@ p_targets2_2016_2020 <- p_targets_v2_2016_2020 +
   geom_hline(aes(yintercept = 1), color = "darkgray", lty = 2, lwd = .6)
 
 
+# Save
 ggsave(p_targets2_2016_2020, 
        filename = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_6.png"),
        width = 12, height = 5.5, dpi = 300)
 
-# Supplementary figures ---------------------------------------------------
 
 
+# Supplementary figures ---------------------------------------------------
+
 ## Model fit ----
 # Scatter plot of ADM0 level units
 # Scatter plot of all admin units for mean of observations
@@ -2498,6 +2448,27 @@ ggsave(plot = p_fig3_95,
        dpi = 600)
 
 ### Figure 4 supplement figures (95% cutoff) ----
+
+endemicity_df_95_v2 <- risk_pop_95_adm2 %>% 
+  mutate(high_risk = risk_cat %in% get_risk_cat_dict()[3:6],
+         low_risk = risk_cat %in% get_risk_cat_dict()[1]) %>% 
+  group_by(country, location_period_id) %>% 
+  summarise(
+    endemicity = case_when(
+      sum(high_risk) == 2 ~ "high-both",
+      sum(low_risk) == 2 ~ "low-both",
+      sum(high_risk) == 1 ~ "high-either",
+      T ~ "mix"),
+    pop = max(pop)
+  ) %>% 
+  mutate(
+    endemicity = factor(endemicity, 
+                        levels = c("high-both", "high-either", "mix", "low-both"),
+                        labels = c("sustained high", "history of high", "history of moderate", "sustained low")))  
+
+saveRDS(endemicity_df_95_v2, file = str_glue("{opt$output_dir}/endemicity_95.rds"))
+
+
 p_fig4C_95 <- endemicity_df_95_v2 %>% 
   inner_join(u_space_sf, .) %>% 
   output_plot_map(sf_obj = .,
@@ -2952,7 +2923,40 @@ p_targets_v2_2011_2015_supp <- data_for_figure6_2011_2015 %>%
 ggsave(p_targets_v2_2011_2015_supp, filename = str_glue("{opt$out_dir}/{opt$out_prefix}_fig_6_supp.png"),
        width = 7.5, height = 5.5, dpi = 300)
 
+
+
 # Scraps ------------------------------------------------------------------
+# Incidence ratios around rivers and lakes 
+# 
+# dist_sf <- list("rivers" = rivers_sf,
+#                 "lakes" = lakes_sf,
+#                 "coasts" = coasts_sf,
+#                 "freshwater" = bind_rows(
+#                   rivers_sf,
+#                   lakes_sf
+#                 ),
+#                 "water" = bind_rows(
+#                   rivers_sf,
+#                   lakes_sf,
+#                   coasts_sf
+#                 ))
+# 
+# 
+# irr_dat <- map_df(seq_along(dist_sf), function(x) {
+#   map_df(c("within", "category"), function(y) {
+#     cat("---- ", names(dist_sf)[x], y, "\n")
+#     
+#     irr_dist(dist_sf[[x]],
+#              dist_vec = seq(5e3, 100e3, by = 10e3),
+#              mai_adm = mai_adm,
+#              mai_adm_all = mai_adm_all,
+#              grid_cases = grid_cases,
+#              afr_sf = afr_sf,
+#              dist_definition = y,
+#              by_region = T) %>% 
+#       mutate(what = names(dist_sf)[x])
+#   })
+# })
 # compute_irr <- function(target_cells_sf,
 #                         grid_cases,
 #                         grid_rates,
diff --git a/packages/taxdat/DESCRIPTION b/packages/taxdat/DESCRIPTION
index 2e5809f59..a50603932 100644
--- a/packages/taxdat/DESCRIPTION
+++ b/packages/taxdat/DESCRIPTION
@@ -10,7 +10,7 @@ Encoding: UTF-8
 License: GPL (>= 2)
 Depends: R (>= 3.4),ISOcodes,magrittr, dplyr, tidyr, igraph, geodata
 Suggests: readxl, sf, sp, raster, rgdal, testthat, knitr, rmarkdown
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.2
 Collate:
     'plot_cache_function.R'
     'get_genquant.R'
diff --git a/packages/taxdat/NAMESPACE b/packages/taxdat/NAMESPACE
index f5b98ce60..19a2294e7 100644
--- a/packages/taxdat/NAMESPACE
+++ b/packages/taxdat/NAMESPACE
@@ -110,6 +110,7 @@ export(close_parallel_setup)
 export(collapse_grid)
 export(color_scale)
 export(colors_afro_regions)
+export(colors_ranking)
 export(compute_adjustment_UN_population)
 export(compute_cumul_proportion_thresh)
 export(compute_mean_rate)
diff --git a/packages/taxdat/R/output_figures_and_tables.R b/packages/taxdat/R/output_figures_and_tables.R
index 25dc21dae..b11c7c7df 100644
--- a/packages/taxdat/R/output_figures_and_tables.R
+++ b/packages/taxdat/R/output_figures_and_tables.R
@@ -24,7 +24,6 @@ colors_endemicity <- function(){
   c("#993404", "#D95F0E", "#FED98E", "gray")
 }
 
-
 colors_periods <- function(){c("purple", "orange")}
 
 colors_risk_categories <- function() {
@@ -41,6 +40,15 @@ colors_afro_regions <- function(){
   colors
 }
 
+
+#' @export
+colors_ranking <- function() {
+  c("2011-2015" = "purple", 
+    "2016-2020" = "orange", 
+    "2011-2020" = "darkgreen", 
+    "2022-2023"= "darkgray",
+    "optimal"= "gray")
+}
 # Figure functions --------------------------------------------------------
 
 #' output_plot_map