From b8895ae79712a78ec076606847da254d7233f1b7 Mon Sep 17 00:00:00 2001 From: Pascal Weigmann Date: Thu, 22 Feb 2024 17:45:21 +0100 Subject: [PATCH 01/18] wip --- .Rbuildignore | 2 + .gitignore | 4 + DESCRIPTION | 10 ++ R/plotHeatmaps.R | 93 ++++++++++++++++ R/sysdata.rda | Bin 0 -> 107 bytes R/validateScenarios.R | 181 +++++++++++++++++++++++++++++++ R/zzz.R | 3 + README.md | 7 ++ inst/config/validationConfig.csv | 21 ++++ piamValidation.Rproj | 20 ++++ 10 files changed, 341 insertions(+) create mode 100644 .gitignore create mode 100644 R/plotHeatmaps.R create mode 100644 R/sysdata.rda create mode 100644 R/validateScenarios.R create mode 100644 R/zzz.R create mode 100644 inst/config/validationConfig.csv create mode 100644 piamValidation.Rproj diff --git a/.Rbuildignore b/.Rbuildignore index 4c1e99a..298f71d 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -3,3 +3,5 @@ ^Makefile$ ^workflow$ ^.*CITATION.cff$ +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5b6a065 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata diff --git a/DESCRIPTION b/DESCRIPTION index 5641a1b..22d83c5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -8,7 +8,17 @@ Authors@R: Description: The piamValidation package provides validation tools for the Potsdam Integrated Assessment Modelling environment. License: LGPL-3 URL: https://github.com/pik-piam/piamValidation +Imports: + dplyr (>= 1.1.1), + ggplot2, + ggthemes, + plotly, + quitte (>= 0.3123.0), + readxl, + tidyr Suggests: testthat Encoding: UTF-8 RoxygenNote: 7.3.1 +Depends: + R (>= 2.10) diff --git a/R/plotHeatmaps.R b/R/plotHeatmaps.R new file mode 100644 index 0000000..0ab5416 --- /dev/null +++ b/R/plotHeatmaps.R @@ -0,0 +1,93 @@ + + +validationHeatmap <- function(d, var, cat, metric, interactive = T) { + + # possible extension: when giving multiple vars, plot as facets in same row + + d <- filter(d, variable == var) + + d$period <- as.character(d$period) + + # text for tooltip, category 1 + if (cat == 1) { + d <- d %>% + mutate(text = paste0(region, "\n", + period, "\n", + "Value: ", round(value,2), "\n", + "Ref_Value: ", round(ref_value,2), "\n", + "Ref_Source: ", ref_model, "\n", + "Thresholds: \n", + ifelse(metric == "relative", + paste0("Max: ", max_yel*100, "% / ", max_red*100, "%"), + paste0("Max: ", max_yel, " / ", max_red)) + ) + ) + } + + # text for tooltip, category 2 + if (cat == 2) { + d <- d %>% + mutate(text = paste0(region, "\n", + period, "\n", + "Value: ", round(value,2), "\n", + ifelse(metric == "relative", + paste0("Ref Value: ", round(ref_value,2), "\n", + "Rel Deviation: ", round(check_value,2)*100, "% \n", + "Thresholds: \n", + "Min: ", min_yel*100, "% / ", min_red*100,"% \n", + "Max: ", max_yel*100, "% / ", max_red*100, "%"), + # for metric == "absolute" + paste0("Thresholds: \n", + "Min: ", min_yel, " / ", min_red,"\n", + "Max: ", max_yel, " / ", max_red) + ) + ) + ) + } + + if (cat == 3) { + d <- d %>% + mutate(text = paste0(region, "\n", + period, "\n", + "Avg. growth/yr: ", round(check_value,2)*100, "% \n", + "Absolute value: ", round(value,2), " \n", + "Thresholds: \n", + paste0("Min: ", min_yel*100, "% / ", min_red*100, "% \n", + "Max: ", max_yel*100, "% / ", max_red*100, "%") + ) + ) + } + + # classic ggplot, with text in aes + p <- ggplot(d, aes(x = region, y = period, fill=check, text=text)) + + geom_tile(color="white", linewidth=0.0) + + scale_fill_manual(values = colors, breaks = colors) + + facet_grid(scenario~.) + # make it beautiful + # from https://www.r-bloggers.com/2016/02/making-faceted-heatmaps-with-ggplot2/ + p <- p + labs(x=NULL, y=NULL, title=paste0(var, " [", d$unit[1], "]")) + p <- p + theme_tufte(base_family="Helvetica") + p <- p + theme(axis.ticks=element_blank()) + p <- p + theme(axis.text=element_text(size=7)) + p <- p + coord_equal() + p <- p + theme(legend.position = "none") + + + # p + theme(panel.spacing = unit(2, "lines")) + + # not great, only works with "World" being the first region + if("World" %in% d$region) { + p <- p + geom_vline(xintercept = 1.5, linewidth = 0.8, color = "white") + } + fig <- ggplotly(p, tooltip="text") + + # improve plotly layout, kinda works but very manual + #fig <- fig %>% subplot(heights = 0.3) %>% + # layout(title = list(y=0.64)) + if (interactive) { + return(fig) + } else { + return(p) + } + +} diff --git a/R/sysdata.rda b/R/sysdata.rda new file mode 100644 index 0000000000000000000000000000000000000000..26ce3b89e068b8251b177ee456e6eadf44977611 GIT binary patch literal 107 zcmV-x0F?hiT4*^jL0KkKS#gl0Apigye}McjL;wH*5C9AT5I`@djQ{`vAOKV~O&E*> z(WXPupwLkLP&5xn05LR}UrHb~g=+$YQ0yH-XC_UbsDH4F=e-l>w+%rMG8oZUz)78Q NF64@Ep&;WSNJ8#sB|QKD literal 0 HcmV?d00001 diff --git a/R/validateScenarios.R b/R/validateScenarios.R new file mode 100644 index 0000000..7fd1416 --- /dev/null +++ b/R/validateScenarios.R @@ -0,0 +1,181 @@ +# import data +# supported formats: +# - .mif +# - + + +# dev helpers +#configName <- "validationConfig.csv" +#usethis::use_data(configName, internal = T) + +importData <- function() { + # for dev purposes + if (file.exists("mif.rds")) { + data <- readRDS("mif.rds") + } else { + path <- "C:/Users/pascalwe/Data/vs/" + data <- remind2::deletePlus(quitte::read.quitte( + c(paste0(path, "REMIND_generic_Bal.mif"), + paste0(path, "REMIND_generic_NPi.mif")) + ) + ) + saveRDS(data, "mif.rds") + } + + # change ordering of factors, put last element ("World") first + # TODO: will not work if World is not last + new_order <- c(tail(levels(data$region), 1), head(levels(data$region), -1)) + data$region <- factor(data$region, levels = new_order) + + return(data) +} + +importReferenceData <- function() { + # for dev purposes + if (file.exists("hist.rds")) { + hist <- readRDS("hist.rds") + } else { + hist <- quitte::read.quitte(paste0(path, "historical.mif")) + hist <- filter(hist, period >= 1990) + saveRDS(hist, "hist.rds") + } + + # remove all historical data before 1990 + hist <- filter(hist, period >= 1990) + + return(hist) +} + +# required columns: +# - category: "historic" or "scenario" +# - can be empty, contain one element or multiple: +# - model, scenario, region, period +importConfig <- function(configName) { + path <- system.file(paste0("config/", configName) , package = "piamValidation") + cfg <- read.csv2(path, na.strings = "") + return(cfg) +} + + +cleanConfig <- function(cfg) { + # fill empty threshold columns with Infinity for easier evaluation + cfg <- cfg %>% + mutate(min_red = as.numeric(ifelse(is.na(min_red), -Inf, min_red)), + min_yel = as.numeric(ifelse(is.na(min_yel), -Inf, min_yel)), + max_yel = as.numeric(ifelse(is.na(max_yel), Inf, max_yel)), + max_red = as.numeric(ifelse(is.na(max_red), Inf, max_red)) + ) + + # insert tests here to check if there are forbidden fields in config + return(cfg) +} + + +combineData <- function(data, hist, cfg) { + + # dimensions of data + by <- c("variable", "region", "period", "scenario") + + # full dimensions and important slices + all_reg <- unique(data$region) + all_per <- unique(data$period) # not a factor, convert? + all_per <- all_per[all_per <= 2100] + hist_per <- c(2005, 2010, 2015, 2020) + all_sce <- unique(data$scenario) + + # revert the sorting of factors for better plots + # data$region <- factor(data$region, levels = rev(levels(data$region))) + + # define colors here or later + colors <- c(green = "#008450", yellow = "#EFB700", red = "#B81D13", grey = "#808080") + + ### here, category specifics start + + # category 1 data objects + d1 <- data.frame() + c1 <- cfg[cfg$category == 1, ] + + # assemble data by row of cfg + for (i in 1:nrow(c1)) { + # create filters + # check whether regions, periods, scenarios are specified + # TODO: extend to support excluding elements, multiple elements + reg <- if (is.na(c1[i, "region"])) all_reg else strsplit(c1[i, "region"], split = ", |,")[[1]] + per <- if (is.na(c1[i, "period"])) hist_per else strsplit(as.character(c1[i, "period"]), split = ", |,")[[1]] + sce <- if (is.na(c1[i, "scenario"])) all_sce else strsplit(c1[i, "scenario"], split = ", |, ")[[1]] + + # filter data for each row in cfg + # REMIND data for line i + d <- data %>% + filter(variable == c1[i, "variable"], + region %in% reg, + period %in% per, + scenario %in% sce) + # here?: skip if empty + + # historical data for line i + h <- hist %>% + filter(variable == c1[i, "variable"], + region %in% reg, + period %in% per) %>% + mutate(ref_value = value, ref_model = model) %>% + select(-c("scenario", "unit", "value", "model")) + # remove NA here? + + # in case one or more sources are specified, filter for them + if (!is.na(c1[i, "ref_model"])) { + h <- filter(h, ref_model %in% strsplit(c1[i, "ref_model"], split = ", |,")[[1]]) + } + + # in case of multiple sources, calculate mean (using all available sources) + if (length(unique(h$ref_model)) > 1) { + h_mean <- group_by(h, period, region) %>% + summarise(ref_value = mean(ref_value, na.rm = TRUE)) + h <- mutate(h_mean, variable = c1[i, "variable"], ref_model = "multiple") + } + + # Merge REMIND, historical and config data + # (works as long as we only have one set of thresholds per cfg row) + d <- merge(d, h) %>% + mutate(min_red = c1[i, ]$min_red, + min_yel = c1[i, ]$min_yel, + max_yel = c1[i, ]$max_yel, + max_red = c1[i, ]$max_red, + metric = c1[i, ]$metric) + + d1 <- rbind(d1, d) + } + + return(df) +} + + +evaluateThreshold <- function(df) { + + return(df) +} + + + +# bringing it all together +validateScenario <- function() { + + data <- importData() + + hist <- importReferenceData() + + cfg <- importConfig(configName) + + cfg <- cleanConfig() + + df <- combineData(data, cfg) + + evaluateThreshold(df) + + df <- createTooltip(df) + + return(df) +} + + + diff --git a/R/zzz.R b/R/zzz.R new file mode 100644 index 0000000..6f1da16 --- /dev/null +++ b/R/zzz.R @@ -0,0 +1,3 @@ +.onAttach <- function(libname, pkgname) { + packageStartupMessage("Welcome to piamValidation") +} diff --git a/README.md b/README.md index 1585caf..a902907 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,13 @@ R package **piamValidation**, version **0.0.2** The piamValidation package provides validation tools for the Potsdam Integrated Assessment Modelling environment. +## How to use + +The following categories/metrics require allow the following columns in the cfg: + + + + ## Installation For installation of the most recent package version an additional repository has to be added in R: diff --git a/inst/config/validationConfig.csv b/inst/config/validationConfig.csv new file mode 100644 index 0000000..36eb4e3 --- /dev/null +++ b/inst/config/validationConfig.csv @@ -0,0 +1,21 @@ +category;model;scenario;variable;region;period;min_red;min_yel;max_yel;max_red;metric;ref_period;ref_model;ref_scenario +1;;;Emi|CO2|Energy;World;;;;0.05;0.1;relative;;EDGAR6; +1;;;Emi|CO2|Energy;DEU;;;;0.1;0.2;relative;;EDGAR6; +1;;;PE|Coal;;;;;;0.1;relative;;BP, IEA; +1;;;PE|Coal;World;;;;;0.2;relative;;BP, IEA; +1;;;PE|Oil;;;;;;2;absolute;;; +1;;;PE|Oil;World;;;;;5;absolute;;; +1;;Bal;FE|Electricity;;;;;;0.1;relative;;; +1;;Bal;FE|Electricity;World;;;;;0.2;relative;;; +2;;;Price|Carbon;;2030;;;300;600;absolute;;; +2;;;Price|Carbon;;2050;;;;1000;absolute;;; +2;;;Price|Carbon;DEU;2030;50;100;400;800;absolute;;; +2;;;Price|Carbon;DEU;2050;100;150;600;1200;absolute;;; +2;;;Price|Carbon;LAM, MEA, SSA, OAS, IND;2030, 2050;;;;200;absolute;;; +2;;Bal;Emi|GHG;DEU, USA, JPN, REF, CHA, CAZ, EUR;2050;-2000;-1000;0;500;absolute;;; +2;;Bal;Emi|GHG;LAM, MEA, SSA, OAS, IND;2060;-2000;-1000;0;500;absolute;;; +2;;Bal;Temperature|Global Mean;;2100;;;1.5;1.55;absolute;;; +2;;;Emi|CO2;;2030;-0.5;-0.3;;;relative;2020;; +2;;;FE;;2030, 2040;-0.4;-0.2;;;relative;2020;; +2;;Bal;Price|Secondary Energy|Electricity;;;5.56;;;;absolute;;; +3;;;Cap|Electricity|Solar|PV;;;;;0.5;1;relative;;; diff --git a/piamValidation.Rproj b/piamValidation.Rproj new file mode 100644 index 0000000..497f8bf --- /dev/null +++ b/piamValidation.Rproj @@ -0,0 +1,20 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +AutoAppendNewline: Yes +StripTrailingWhitespace: Yes + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source From 5cd7780b800a3a16beb66a098a762b14516db4d5 Mon Sep 17 00:00:00 2001 From: Pascal Weigmann Date: Fri, 23 Feb 2024 16:56:47 +0100 Subject: [PATCH 02/18] combineData, resolveDuplicates, wip --- NAMESPACE | 6 ++ R/combineData.R | 121 +++++++++++++++++++++++++++++++ R/piamValidation-package.R | 6 -- R/plotHeatmaps.R | 26 ++++--- R/resolveDuplicates.R | 40 ++++++++++ R/validateScenarios.R | 102 +++++--------------------- inst/config/validationConfig.csv | 43 +++++------ man/piamValidation-package.Rd | 22 ------ 8 files changed, 223 insertions(+), 143 deletions(-) create mode 100644 R/combineData.R delete mode 100644 R/piamValidation-package.R create mode 100644 R/resolveDuplicates.R delete mode 100644 man/piamValidation-package.Rd diff --git a/NAMESPACE b/NAMESPACE index 6ae9268..00dd4dd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,2 +1,8 @@ # Generated by roxygen2: do not edit by hand +importFrom(dplyr,"%>%") +importFrom(dplyr,filter) +importFrom(dplyr,group_by) +importFrom(dplyr,mutate) +importFrom(dplyr,select) +importFrom(dplyr,summarise) diff --git a/R/combineData.R b/R/combineData.R new file mode 100644 index 0000000..e4419b4 --- /dev/null +++ b/R/combineData.R @@ -0,0 +1,121 @@ +#' @importFrom dplyr filter select mutate summarise group_by %>% + +# for one row of cfg: filter and merge relevant scenario data with cfg +combineData <- function(data, hist, cfg_row) { + + # shorten as it will be used a lot + c <- cfg_row + + # full dimensions and important slices + all_reg <- unique(data$region) + all_per <- unique(data$period) # not a factor, convert? + all_per <- all_per[all_per <= 2100] + hist_per <- c(2005, 2010, 2015, 2020) + all_sce <- unique(data$scenario) + + # create filters + # check whether regions, periods, scenarios are specified + reg <- if (is.na(c$region)) all_reg else + strsplit(c$region, split = ", |,")[[1]] + sce <- if (is.na(c$scenario)) all_sce else + strsplit(c$scenario, split = ", |, ")[[1]] + + # empty "period" field means different years for historic or scenario category + if (c$category == "historic") { + per <- if (is.na(c$period)) hist_per else + strsplit(as.character(c$period), split = ", |,")[[1]] + } else { + per <- if (is.na(c$period)) all_per else + strsplit(as.character(c$period), split = ", |,")[[1]] + } + + # filter scenario data for row in cfg + d <- data %>% + filter(variable == c$variable, + region %in% reg, + period %in% per, + scenario %in% sce) + + # attach respective cfg information to data slice + d <- d %>% + mutate(min_red = c$min_red, # could be set to NA as it is not used + min_yel = c$min_yel, # could be set to NA as it is not used + max_yel = c$max_yel, + max_red = c$max_red, + category = c$category, + metric = c$metric, + critical = c$critical) + + # depending on category: filter and attach reference values if they are needed + if (c$category == "historic") { + # historic data for relevant variable and dimensions (all sources) + h <- hist %>% + filter(variable == c$variable, + region %in% reg, + period %in% per) %>% + mutate(ref_value = value, ref_model = model) %>% + select(-c("scenario", "unit", "value", "model")) + + # TODO: insert test here if ref_model exists and has data to compare to + + # in case one or more sources are specified, filter for them + if (!is.na(c$ref_model)) { + h <- filter( + h, ref_model %in% strsplit(c$ref_model, split = ", |,")[[1]] + ) + } + + # in case of multiple sources, calculate mean (using all available sources) + if (length(unique(h$ref_model)) > 1) { + h_mean <- group_by(h, period, region) %>% + summarise(ref_value = mean(ref_value, na.rm = TRUE)) + h <- mutate(h_mean, variable = c$variable, ref_model = "multiple") + } + + # merge with historical data adds columns ref_value and ref_model + df <- merge(d, h) + + # add columns which are not used in this category, fill NA + df <- df %>% mutate(ref_period = NA, ref_scenario = NA) + + # filter and attach reference values if they are needed; scenario data + } else if (c$category == "scenario") { + + # no reference values needed for these metrics, fill NA + if (c$metric %in% c("absolute", "growthrate")) { + df <- d %>% + mutate(ref_value = NA, + ref_model = NA, + ref_period = NA, + ref_scenario = NA) + + # get reference values for these metrics + } else if (c$metric %in% c("relative", "difference")) { + # TODO: only works for ref_period, add support for model/scenario as ref + # TODO: support choosing ref_period AND model/scenario + ref <- data %>% + filter(variable == c$variable, + region %in% reg, + period == c$ref_period, + scenario %in% sce) %>% + mutate(ref_value = value, ref_period = period) %>% + select(-c(period, value)) + df <- merge(d, ref) + + # add columns which are not used in this category, fill NA + df <- df %>% + mutate(ref_model = NA, + ref_scenario = NA) + + } else { + # TODO: have this warning here or earlier when cleaning config? + warning("'metric' for category 'scenario' must be either 'absolute', + 'relative', 'difference' or 'growthrate'.") + } + + } else { + warning("'category' must be either 'historic' or 'scenario'.") + } + + return(df) +} diff --git a/R/piamValidation-package.R b/R/piamValidation-package.R deleted file mode 100644 index a65cf64..0000000 --- a/R/piamValidation-package.R +++ /dev/null @@ -1,6 +0,0 @@ -#' @keywords internal -"_PACKAGE" - -## usethis namespace: start -## usethis namespace: end -NULL diff --git a/R/plotHeatmaps.R b/R/plotHeatmaps.R index 0ab5416..6e22c2a 100644 --- a/R/plotHeatmaps.R +++ b/R/plotHeatmaps.R @@ -1,13 +1,5 @@ - - -validationHeatmap <- function(d, var, cat, metric, interactive = T) { - - # possible extension: when giving multiple vars, plot as facets in same row - - d <- filter(d, variable == var) - - d$period <- as.character(d$period) - +# construct tooltips for interactive plots +appendTooltip <- function(df) { # text for tooltip, category 1 if (cat == 1) { d <- d %>% @@ -58,6 +50,20 @@ validationHeatmap <- function(d, var, cat, metric, interactive = T) { ) } + return(df) +} + + +validationHeatmap <- function(d, var, cat, metric, interactive = T) { + + # possible extension: when giving multiple vars, plot as facets in same row + + d <- filter(d, variable == var) + + d$period <- as.character(d$period) + + + # classic ggplot, with text in aes p <- ggplot(d, aes(x = region, y = period, fill=check, text=text)) + geom_tile(color="white", linewidth=0.0) + diff --git a/R/resolveDuplicates.R b/R/resolveDuplicates.R new file mode 100644 index 0000000..260452e --- /dev/null +++ b/R/resolveDuplicates.R @@ -0,0 +1,40 @@ +#' @importFrom dplyr filter select mutate summarise group_by %>% + +resolveDuplicates <- function(df) { + + # TODO: needs to be double checked for edge-cases + # might be easier or safer to handle categories separately + + # check for duplicates + # these are the duplicates that are from the lower rows and should be kept + duplicates <- df[duplicated(df[c("model", "scenario", "variable", + "region", "period", "category", "metric")]), ] + + # here all instances of the data that was duplicated is removed + no_dupl <- dplyr::anti_join(df, duplicates_all, by = c("model", "scenario", "variable", + "region", "period", "category", "metric")) + + df <- rbind(no_dupl, duplicates) + + # different approach: allows partial overwriting + # chosen to not use this anymore, less robust + + # these are missing the duplicated entries that were in the lower rows + # no_dupl <- dplyr::anti_join(df, duplicates_all) + + # overwrite "earlier" values using "later" ones + # BEWARE: requires specific country to come after "all regions" + # d1 <- merge(no_dupl, duplicates_all, by=c("variable", "region", "period", + # "scenario", "unit", "model", "value", + # "ref_model", "ref_value","ref_scenario", + # "ref_period", "category", "metric", "critical"), + # all.x = T) %>% + # mutate(min_red = ifelse(is.na(min_red.y), min_red.x, min_red.y)) %>% + # mutate(min_yel = ifelse(is.na(min_yel.y), min_yel.x, min_yel.y)) %>% + # mutate(max_yel = ifelse(is.na(max_yel.y), max_yel.x, max_yel.y)) %>% + # mutate(max_red = ifelse(is.na(max_red.y), max_red.x, max_red.y)) %>% + # select(-c("min_red.x", "min_yel.x", "max_yel.x", "max_red.x", + # "min_red.y", "min_yel.y", "max_yel.y", "max_red.y")) + + return(df) +} diff --git a/R/validateScenarios.R b/R/validateScenarios.R index 7fd1416..14bc6eb 100644 --- a/R/validateScenarios.R +++ b/R/validateScenarios.R @@ -1,3 +1,5 @@ +#' @importFrom dplyr filter select mutate summarise group_by %>% + # import data # supported formats: # - .mif @@ -8,6 +10,8 @@ #configName <- "validationConfig.csv" #usethis::use_data(configName, internal = T) + + importData <- function() { # for dev purposes if (file.exists("mif.rds")) { @@ -50,7 +54,8 @@ importReferenceData <- function() { # - category: "historic" or "scenario" # - can be empty, contain one element or multiple: # - model, scenario, region, period -importConfig <- function(configName) { +# ... +loadConfig <- function(configName) { path <- system.file(paste0("config/", configName) , package = "piamValidation") cfg <- read.csv2(path, na.strings = "") return(cfg) @@ -71,85 +76,6 @@ cleanConfig <- function(cfg) { } -combineData <- function(data, hist, cfg) { - - # dimensions of data - by <- c("variable", "region", "period", "scenario") - - # full dimensions and important slices - all_reg <- unique(data$region) - all_per <- unique(data$period) # not a factor, convert? - all_per <- all_per[all_per <= 2100] - hist_per <- c(2005, 2010, 2015, 2020) - all_sce <- unique(data$scenario) - - # revert the sorting of factors for better plots - # data$region <- factor(data$region, levels = rev(levels(data$region))) - - # define colors here or later - colors <- c(green = "#008450", yellow = "#EFB700", red = "#B81D13", grey = "#808080") - - ### here, category specifics start - - # category 1 data objects - d1 <- data.frame() - c1 <- cfg[cfg$category == 1, ] - - # assemble data by row of cfg - for (i in 1:nrow(c1)) { - # create filters - # check whether regions, periods, scenarios are specified - # TODO: extend to support excluding elements, multiple elements - reg <- if (is.na(c1[i, "region"])) all_reg else strsplit(c1[i, "region"], split = ", |,")[[1]] - per <- if (is.na(c1[i, "period"])) hist_per else strsplit(as.character(c1[i, "period"]), split = ", |,")[[1]] - sce <- if (is.na(c1[i, "scenario"])) all_sce else strsplit(c1[i, "scenario"], split = ", |, ")[[1]] - - # filter data for each row in cfg - # REMIND data for line i - d <- data %>% - filter(variable == c1[i, "variable"], - region %in% reg, - period %in% per, - scenario %in% sce) - # here?: skip if empty - - # historical data for line i - h <- hist %>% - filter(variable == c1[i, "variable"], - region %in% reg, - period %in% per) %>% - mutate(ref_value = value, ref_model = model) %>% - select(-c("scenario", "unit", "value", "model")) - # remove NA here? - - # in case one or more sources are specified, filter for them - if (!is.na(c1[i, "ref_model"])) { - h <- filter(h, ref_model %in% strsplit(c1[i, "ref_model"], split = ", |,")[[1]]) - } - - # in case of multiple sources, calculate mean (using all available sources) - if (length(unique(h$ref_model)) > 1) { - h_mean <- group_by(h, period, region) %>% - summarise(ref_value = mean(ref_value, na.rm = TRUE)) - h <- mutate(h_mean, variable = c1[i, "variable"], ref_model = "multiple") - } - - # Merge REMIND, historical and config data - # (works as long as we only have one set of thresholds per cfg row) - d <- merge(d, h) %>% - mutate(min_red = c1[i, ]$min_red, - min_yel = c1[i, ]$min_yel, - max_yel = c1[i, ]$max_yel, - max_red = c1[i, ]$max_red, - metric = c1[i, ]$metric) - - d1 <- rbind(d1, d) - } - - return(df) -} - - evaluateThreshold <- function(df) { return(df) @@ -158,19 +84,27 @@ evaluateThreshold <- function(df) { # bringing it all together +# TODO: introduce file path and name as argument validateScenario <- function() { data <- importData() hist <- importReferenceData() - cfg <- importConfig(configName) + cfg <- loadConfig(configName) - cfg <- cleanConfig() + cfg <- cleanConfig(cfg) - df <- combineData(data, cfg) - evaluateThreshold(df) + # combine data for each row of the config and bind together + df <- data.frame() + for (i in 1:nrow(cfg)) { + # TODO: hist should be optional, validation should work without "historical" + d <- combineData(data, hist, cfg[i, ]) + df <- rbind(df, d) + } + + df <- evaluateThreshold(df) df <- createTooltip(df) diff --git a/inst/config/validationConfig.csv b/inst/config/validationConfig.csv index 36eb4e3..e88860f 100644 --- a/inst/config/validationConfig.csv +++ b/inst/config/validationConfig.csv @@ -1,21 +1,22 @@ -category;model;scenario;variable;region;period;min_red;min_yel;max_yel;max_red;metric;ref_period;ref_model;ref_scenario -1;;;Emi|CO2|Energy;World;;;;0.05;0.1;relative;;EDGAR6; -1;;;Emi|CO2|Energy;DEU;;;;0.1;0.2;relative;;EDGAR6; -1;;;PE|Coal;;;;;;0.1;relative;;BP, IEA; -1;;;PE|Coal;World;;;;;0.2;relative;;BP, IEA; -1;;;PE|Oil;;;;;;2;absolute;;; -1;;;PE|Oil;World;;;;;5;absolute;;; -1;;Bal;FE|Electricity;;;;;;0.1;relative;;; -1;;Bal;FE|Electricity;World;;;;;0.2;relative;;; -2;;;Price|Carbon;;2030;;;300;600;absolute;;; -2;;;Price|Carbon;;2050;;;;1000;absolute;;; -2;;;Price|Carbon;DEU;2030;50;100;400;800;absolute;;; -2;;;Price|Carbon;DEU;2050;100;150;600;1200;absolute;;; -2;;;Price|Carbon;LAM, MEA, SSA, OAS, IND;2030, 2050;;;;200;absolute;;; -2;;Bal;Emi|GHG;DEU, USA, JPN, REF, CHA, CAZ, EUR;2050;-2000;-1000;0;500;absolute;;; -2;;Bal;Emi|GHG;LAM, MEA, SSA, OAS, IND;2060;-2000;-1000;0;500;absolute;;; -2;;Bal;Temperature|Global Mean;;2100;;;1.5;1.55;absolute;;; -2;;;Emi|CO2;;2030;-0.5;-0.3;;;relative;2020;; -2;;;FE;;2030, 2040;-0.4;-0.2;;;relative;2020;; -2;;Bal;Price|Secondary Energy|Electricity;;;5.56;;;;absolute;;; -3;;;Cap|Electricity|Solar|PV;;;;;0.5;1;relative;;; +category;model;scenario;variable;region;period;min_red;min_yel;max_yel;max_red;metric;critical;ref_period;ref_model;ref_scenario +historic;;;Emi|CO2|Energy;World;;;;0.05;0.1;relative;yes;;EDGAR6; +historic;;;Emi|CO2|Energy;DEU;;;;0.1;0.2;relative;yes;;EDGAR6; +historic;;;PE|Coal;;;;;;0.1;relative;yes;;BP, IEA; +historic;;;PE|Coal;World;;;;;0.2;relative;yes;;BP, IEA; +historic;;;PE|Oil;;;;;;2;difference;yes;;; +historic;;;PE|Oil;World;;;;;5;difference;yes;;; +historic;;Bal;FE|Electricity;;;;;;0.1;relative;yes;;; +historic;;Bal;FE|Electricity;World;;;;;0.2;relative;yes;;; +scenario;;;Price|Carbon;;2030;;;300;600;absolute;yes;;; +scenario;;;Price|Carbon;;2050;;;;1000;absolute;yes;;; +scenario;;;Price|Carbon;DEU;2030;50;100;400;800;absolute;yes;;; +scenario;;;Price|Carbon;DEU;2050;100;150;600;1200;absolute;yes;;; +scenario;;;Price|Carbon;LAM, MEA, SSA, OAS, IND;2030, 2050;;;;200;absolute;yes;;; +scenario;;Bal;Emi|GHG;DEU, USA, JPN, REF, CHA, CAZ, EUR;2050;-2000;-1000;0;500;absolute;yes;;; +scenario;;Bal;Emi|GHG;LAM, MEA, SSA, OAS, IND;2060;-2000;-1000;0;500;absolute;yes;;; +scenario;;Bal;Temperature|Global Mean;;2100;;;1.5;1.55;absolute;yes;;; +scenario;;;Emi|CO2;;2030;-0.5;-0.3;;;relative;yes;2020;; +scenario;;;Emi|CO2;;2030;-0.6;-0.4;;;relative;yes;2015;; +scenario;;;FE;;2030, 2040;-0.4;-0.2;;;relative;yes;2020;; +scenario;;Bal;Price|Secondary Energy|Electricity;;;5.56;;;;absolute;yes;;; +scenario;;;Cap|Electricity|Solar|PV;;;;;0.5;1;relative;yes;;; diff --git a/man/piamValidation-package.Rd b/man/piamValidation-package.Rd deleted file mode 100644 index 962bd73..0000000 --- a/man/piamValidation-package.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/piamValidation-package.R -\docType{package} -\name{piamValidation-package} -\alias{piamValidation} -\alias{piamValidation-package} -\title{piamValidation: Validation Tools for PIK-PIAM} -\description{ -The piamValidation package provides validation tools for the Potsdam Integrated Assessment Modelling environment. -} -\seealso{ -Useful links: -\itemize{ - \item \url{https://github.com/pik-piam/piamValidation} -} - -} -\author{ -\strong{Maintainer}: Pascal Weigmann \email{pascal.weigmann@pik-potsdam.de} - -} -\keyword{internal} From 8df0e19ea1df751e639ceec7d36a7f38615cf443 Mon Sep 17 00:00:00 2001 From: Pascal Weigmann Date: Tue, 27 Feb 2024 17:56:08 +0100 Subject: [PATCH 03/18] vs wip --- NAMESPACE | 3 + R/appendTooltips.R | 86 +++++++++++++++++++++++++++++ R/combineData.R | 1 + R/evaluateThresholds.R | 94 ++++++++++++++++++++++++++++++++ R/plotHeatmaps.R | 87 ++++++++++------------------- R/resolveDuplicates.R | 3 +- R/validateScenarios.R | 15 +++-- inst/config/validationConfig.csv | 2 +- 8 files changed, 225 insertions(+), 66 deletions(-) create mode 100644 R/appendTooltips.R create mode 100644 R/evaluateThresholds.R diff --git a/NAMESPACE b/NAMESPACE index 00dd4dd..502b1b0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,8 +1,11 @@ # Generated by roxygen2: do not edit by hand +import(ggplot2) importFrom(dplyr,"%>%") importFrom(dplyr,filter) importFrom(dplyr,group_by) importFrom(dplyr,mutate) importFrom(dplyr,select) importFrom(dplyr,summarise) +importFrom(ggthemes,theme_tufte) +importFrom(plotly,ggplotly) diff --git a/R/appendTooltips.R b/R/appendTooltips.R new file mode 100644 index 0000000..9470b2a --- /dev/null +++ b/R/appendTooltips.R @@ -0,0 +1,86 @@ +# construct tooltips for interactive plots +appendTooltips <- function(df) { + + df$text <- NA + + # historic - relative + df[df$category == "historic" & df$metric == "relative", ] <- + df[df$category == "historic" & df$metric == "relative", ] %>% + mutate(text = paste0(region, "\n", + period, "\n", + "Value: ", round(value,2), "\n", + "Ref_Value: ", round(ref_value,2), "\n", + "Ref_Source: ", ref_model, "\n", + "Thresholds: \n", + paste0("Max: ", max_yel*100, "% / ", max_red*100, "%") + ) + ) + + # historic - difference + df[df$category == "historic" & df$metric == "difference", ] <- + df[df$category == "historic" & df$metric == "difference", ] %>% + mutate(text = paste0(region, "\n", + period, "\n", + "Value: ", round(value,2), "\n", + "Ref_Value: ", round(ref_value,2), "\n", + "Ref_Source: ", ref_model, "\n", + "Thresholds: \n", + "Max: ", max_yel, " / ", max_red + ) + ) + + # scenario - relative + df[df$category == "scenario" & df$metric == "relative", ] <- + df[df$category == "scenario" & df$metric == "relative", ] %>% + mutate(text = paste0(region, "\n", + period, "\n", + "Value: ", round(value,2), "\n", + "Ref Value: ", round(ref_value,2), "\n", + "Rel Deviation: ", round(check_value,2)*100, "% \n", + "Thresholds: \n", + "Min: ", min_yel*100, "% / ", min_red*100,"% \n", + "Max: ", max_yel*100, "% / ", max_red*100, "%" + ) + ) + + # scenario - difference + df[df$category == "scenario" & df$metric == "difference", ] <- + df[df$category == "scenario" & df$metric == "difference", ] %>% + mutate(text = paste0(region, "\n", + period, "\n", + "Value: ", round(value,2), "\n", + "Ref Value: ", round(ref_value,2), "\n", + "Difference: ", round(check_value,2), "\n", + "Thresholds: \n", + "Min: ", min_yel, " / ", min_red,"\n", + "Max: ", max_yel, " / ", max_red + ) + ) + + # scenario - absolute + df[df$category == "scenario" & df$metric == "absolute", ] <- + df[df$category == "scenario" & df$metric == "absolute", ] %>% + mutate(text = paste0(region, "\n", + period, "\n", + "Value: ", round(value,2), "\n", + "Thresholds: \n", + "Min: ", min_yel, " / ", min_red,"\n", + "Max: ", max_yel, " / ", max_red + ) + ) + + # scenario - growthrate + df[df$category == "scenario" & df$metric == "absolute", ] <- + df[df$category == "scenario" & df$metric == "absolute", ] %>% + mutate(text = paste0(region, "\n", + period, "\n", + "Avg. growth/yr: ", round(check_value,2)*100, "% \n", + "Absolute value: ", round(value,2), " \n", + "Thresholds: \n", + "Min: ", min_yel*100, "% / ", min_red*100, "% \n", + "Max: ", max_yel*100, "% / ", max_red*100, "%" + ) + ) + + return(df) +} diff --git a/R/combineData.R b/R/combineData.R index e4419b4..7b8e26c 100644 --- a/R/combineData.R +++ b/R/combineData.R @@ -1,6 +1,7 @@ #' @importFrom dplyr filter select mutate summarise group_by %>% # for one row of cfg: filter and merge relevant scenario data with cfg +# results in one df that contains scenario data, reference data and thresholds combineData <- function(data, hist, cfg_row) { # shorten as it will be used a lot diff --git a/R/evaluateThresholds.R b/R/evaluateThresholds.R new file mode 100644 index 0000000..125992c --- /dev/null +++ b/R/evaluateThresholds.R @@ -0,0 +1,94 @@ +#' @importFrom dplyr filter select mutate summarise group_by %>% + +evaluateThresholds <- function(df) { + + # perform evaluation for each category and metric separately + # (shortcuts are possible but will decrease readability of the code for minor + # performance improvements) + + # calculate values that will be compared to thresholds + # historic - relative + his_rel <- df[df$category == "historic" & df$metric == "relative", ] %>% + mutate(check_value = ifelse( + is.na(ref_value), + NA, + # absolute relative deviation above/below reference + abs((value - ref_value) / ref_value) + ) + ) + + # historic - difference + his_dif <- df[df$category == "historic" & df$metric == "difference", ] %>% + # absolute difference to reference + mutate(check_value = abs(value - ref_value)) + + # scenario - relative + sce_rel <- df[df$category == "scenario" & df$metric == "relative", ] %>% + mutate(check_value = ifelse( + is.na(ref_value), + NA, + # relative deviation above/below reference + (value - ref_value)/ref_value + ) + ) + + # scenario - difference + sce_dif <- df[df$category == "scenario" & df$metric == "difference", ] %>% + mutate(check_value = ifelse( + is.na(ref_value), + NA, + # difference to reference + value - ref_value + ) + ) + + # scenario - absolute + sce_abs <- df[df$category == "scenario" & df$metric == "absolute", ] %>% + mutate(check_value = value) + + # scenario - growthrate + # calculate average growth rate of the last 5 years between 2010 and 2060 + # TODO: in case of a need to look further than 2060, split df and add + # calculations for 10-year step periods + sce_gro <- filter(df[df$category == "scenario" & df$metric == "growthrate", ], + period <= 2060 & period >= 2010) + + # add a column with the value 5 years ago for later calculations + tmp <- sce_gro + sce_gro$period <- sce_gro$period + 5 + sce_gro <- sce_gro %>% + mutate(value_5y_ago = value) %>% + select(-value) %>% + merge(tmp) + + # calculate the yearly average growth rate + sce_gro <- sce_gro %>% + mutate(check_value = (value/value_5y_ago)^(1/5) - 1) %>% + select(-value_5y_ago) + + + # reassemble data.frame + df <- do.call("rbind", + list(his_rel, his_dif, sce_rel, sce_dif, sce_abs, sce_gro)) + + # perform comparison to thresholds for whole data.frame at once + df <- df %>% + mutate(check = ifelse(is.na(check_value), + "grey", + ifelse( + # first check whether red threshold is violated... + check_value > max_red | check_value < min_red, + "red", + # otherwise check if yellow threshold is violated... + ifelse( + check_value > max_yel | check_value < min_yel, + "yellow", + # ... else green + "green" + ) + ) + ) + ) + + return(df) +} diff --git a/R/plotHeatmaps.R b/R/plotHeatmaps.R index 6e22c2a..5284642 100644 --- a/R/plotHeatmaps.R +++ b/R/plotHeatmaps.R @@ -1,67 +1,38 @@ -# construct tooltips for interactive plots -appendTooltip <- function(df) { - # text for tooltip, category 1 - if (cat == 1) { - d <- d %>% - mutate(text = paste0(region, "\n", - period, "\n", - "Value: ", round(value,2), "\n", - "Ref_Value: ", round(ref_value,2), "\n", - "Ref_Source: ", ref_model, "\n", - "Thresholds: \n", - ifelse(metric == "relative", - paste0("Max: ", max_yel*100, "% / ", max_red*100, "%"), - paste0("Max: ", max_yel, " / ", max_red)) - ) - ) - } - - # text for tooltip, category 2 - if (cat == 2) { - d <- d %>% - mutate(text = paste0(region, "\n", - period, "\n", - "Value: ", round(value,2), "\n", - ifelse(metric == "relative", - paste0("Ref Value: ", round(ref_value,2), "\n", - "Rel Deviation: ", round(check_value,2)*100, "% \n", - "Thresholds: \n", - "Min: ", min_yel*100, "% / ", min_red*100,"% \n", - "Max: ", max_yel*100, "% / ", max_red*100, "%"), - # for metric == "absolute" - paste0("Thresholds: \n", - "Min: ", min_yel, " / ", min_red,"\n", - "Max: ", max_yel, " / ", max_red) - ) - ) - ) - } +#' @importFrom dplyr filter select mutate %>% +#' @import ggplot2 +#' @importFrom ggthemes theme_tufte +#' @importFrom plotly ggplotly - if (cat == 3) { - d <- d %>% - mutate(text = paste0(region, "\n", - period, "\n", - "Avg. growth/yr: ", round(check_value,2)*100, "% \n", - "Absolute value: ", round(value,2), " \n", - "Thresholds: \n", - paste0("Min: ", min_yel*100, "% / ", min_red*100, "% \n", - "Max: ", max_yel*100, "% / ", max_red*100, "%") - ) - ) - } +# takes the output of "validateScenarios()" and plots heatmaps per variable +validationHeatmap <- function(df, var, cat, met, interactive = T) { - return(df) -} + # possible extension: when giving multiple vars, plot as facets in same row + # prepare data slice + d <- df %>% + filter(variable == var, + category == cat, + metric == met) -validationHeatmap <- function(d, var, cat, metric, interactive = T) { + if (nrow(d) == 0) { - # possible extension: when giving multiple vars, plot as facets in same row + df <- df %>% mutate(cm = paste(category, metric, sep = "-")) + unique(df[df$variable == "PE|Coal", "cm"]) - d <- filter(d, variable == var) + stop(paste0( + "No data found for variable in this category and metric.\n + variable ", var ," is available for the following category-metric + combinations: ", unique(df[df$variable == "PE|Coal", "cm"] + ) + ) + ) + } d$period <- as.character(d$period) - + colors <- c(green = "#008450", + yellow = "#EFB700", + red = "#B81D13", + grey = "#808080") # classic ggplot, with text in aes @@ -70,9 +41,9 @@ validationHeatmap <- function(d, var, cat, metric, interactive = T) { scale_fill_manual(values = colors, breaks = colors) + facet_grid(scenario~.) # make it beautiful - # from https://www.r-bloggers.com/2016/02/making-faceted-heatmaps-with-ggplot2/ + # from https://www.r-bloggers.com/2016/02/making-faceted-heatmaps-with-ggplot2 p <- p + labs(x=NULL, y=NULL, title=paste0(var, " [", d$unit[1], "]")) - p <- p + theme_tufte(base_family="Helvetica") + p <- p + theme_tufte(base_family="Helvetica") # creates warnings p <- p + theme(axis.ticks=element_blank()) p <- p + theme(axis.text=element_text(size=7)) p <- p + coord_equal() diff --git a/R/resolveDuplicates.R b/R/resolveDuplicates.R index 260452e..0d46c63 100644 --- a/R/resolveDuplicates.R +++ b/R/resolveDuplicates.R @@ -11,9 +11,10 @@ resolveDuplicates <- function(df) { "region", "period", "category", "metric")]), ] # here all instances of the data that was duplicated is removed - no_dupl <- dplyr::anti_join(df, duplicates_all, by = c("model", "scenario", "variable", + no_dupl <- dplyr::anti_join(df, duplicates, by = c("model", "scenario", "variable", "region", "period", "category", "metric")) + # reattach one instance to get a complete data set without duplicates df <- rbind(no_dupl, duplicates) # different approach: allows partial overwriting diff --git a/R/validateScenarios.R b/R/validateScenarios.R index 14bc6eb..f72b822 100644 --- a/R/validateScenarios.R +++ b/R/validateScenarios.R @@ -85,7 +85,7 @@ evaluateThreshold <- function(df) { # bringing it all together # TODO: introduce file path and name as argument -validateScenario <- function() { +validateScenarios <- function() { data <- importData() @@ -99,14 +99,17 @@ validateScenario <- function() { # combine data for each row of the config and bind together df <- data.frame() for (i in 1:nrow(cfg)) { - # TODO: hist should be optional, validation should work without "historical" - d <- combineData(data, hist, cfg[i, ]) - df <- rbind(df, d) + # TODO: hist should only be needed if category "historical" is in config + # validation generally should work without hist data + df_row <- combineData(data, hist, cfg[i, ]) + df <- rbind(df, df_row) } - df <- evaluateThreshold(df) + df <- resolveDuplicates(df) - df <- createTooltip(df) + df <- evaluateThresholds(df) + + df <- appendTooltips(df) return(df) } diff --git a/inst/config/validationConfig.csv b/inst/config/validationConfig.csv index e88860f..6767678 100644 --- a/inst/config/validationConfig.csv +++ b/inst/config/validationConfig.csv @@ -19,4 +19,4 @@ scenario;;;Emi|CO2;;2030;-0.5;-0.3;;;relative;yes;2020;; scenario;;;Emi|CO2;;2030;-0.6;-0.4;;;relative;yes;2015;; scenario;;;FE;;2030, 2040;-0.4;-0.2;;;relative;yes;2020;; scenario;;Bal;Price|Secondary Energy|Electricity;;;5.56;;;;absolute;yes;;; -scenario;;;Cap|Electricity|Solar|PV;;;;;0.5;1;relative;yes;;; +scenario;;;New Cap|Electricity|Solar|PV;;;;;0.5;1;growthrate;yes;;; From 9c06a01c62ec19ec7c6bcd5bd86f23da6234348a Mon Sep 17 00:00:00 2001 From: Pascal Weigmann Date: Wed, 28 Feb 2024 16:35:45 +0100 Subject: [PATCH 04/18] vs wip --- NAMESPACE | 2 + R/appendTooltips.R | 5 +- R/evaluateThresholds.R | 4 +- R/validateScenarios.R | 30 ++--- R/{plotHeatmaps.R => validationHeatmap.R} | 9 +- inst/markdown/REMINDvalidation.Rmd | 139 ++++++++++++++++++++++ 6 files changed, 160 insertions(+), 29 deletions(-) rename R/{plotHeatmaps.R => validationHeatmap.R} (89%) create mode 100644 inst/markdown/REMINDvalidation.Rmd diff --git a/NAMESPACE b/NAMESPACE index 502b1b0..deb06a6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,7 @@ # Generated by roxygen2: do not edit by hand +export(validateScenarios) +export(validationHeatmap) import(ggplot2) importFrom(dplyr,"%>%") importFrom(dplyr,filter) diff --git a/R/appendTooltips.R b/R/appendTooltips.R index 9470b2a..cafc984 100644 --- a/R/appendTooltips.R +++ b/R/appendTooltips.R @@ -8,9 +8,10 @@ appendTooltips <- function(df) { df[df$category == "historic" & df$metric == "relative", ] %>% mutate(text = paste0(region, "\n", period, "\n", - "Value: ", round(value,2), "\n", - "Ref_Value: ", round(ref_value,2), "\n", + "Value: ", round(value, 2), "\n", + "Ref_Value: ", round(ref_value, 2), "\n", "Ref_Source: ", ref_model, "\n", + "Deviation:", round(check_value, 2)*100, "%\n", "Thresholds: \n", paste0("Max: ", max_yel*100, "% / ", max_red*100, "%") ) diff --git a/R/evaluateThresholds.R b/R/evaluateThresholds.R index 125992c..a15d807 100644 --- a/R/evaluateThresholds.R +++ b/R/evaluateThresholds.R @@ -1,6 +1,8 @@ #' @importFrom dplyr filter select mutate summarise group_by %>% -evaluateThresholds <- function(df) { +# cleanInf = TRUE: choose to replace "Inf" and "-Inf" which were introduced +# for ease of calculations with "-" +evaluateThresholds <- function(df, cleanInf = TRUE) { # perform evaluation for each category and metric separately # (shortcuts are possible but will decrease readability of the code for minor diff --git a/R/validateScenarios.R b/R/validateScenarios.R index f72b822..993398f 100644 --- a/R/validateScenarios.R +++ b/R/validateScenarios.R @@ -1,22 +1,12 @@ #' @importFrom dplyr filter select mutate summarise group_by %>% -# import data -# supported formats: -# - .mif -# - - - -# dev helpers -#configName <- "validationConfig.csv" -#usethis::use_data(configName, internal = T) - - - importData <- function() { # for dev purposes if (file.exists("mif.rds")) { + cat("loading scenario data from cache\n") data <- readRDS("mif.rds") } else { + cat("importing scenario data from file(s)\n") path <- "C:/Users/pascalwe/Data/vs/" data <- remind2::deletePlus(quitte::read.quitte( c(paste0(path, "REMIND_generic_Bal.mif"), @@ -37,8 +27,11 @@ importData <- function() { importReferenceData <- function() { # for dev purposes if (file.exists("hist.rds")) { + cat("loading reference data from cache\n") hist <- readRDS("hist.rds") } else { + cat("importing reference data from file\n") + path <- "C:/Users/pascalwe/Data/vs/" hist <- quitte::read.quitte(paste0(path, "historical.mif")) hist <- filter(hist, period >= 1990) saveRDS(hist, "hist.rds") @@ -56,7 +49,7 @@ importReferenceData <- function() { # - model, scenario, region, period # ... loadConfig <- function(configName) { - path <- system.file(paste0("config/", configName) , package = "piamValidation") + path <- system.file(paste0("config/", configName), package = "piamValidation") cfg <- read.csv2(path, na.strings = "") return(cfg) } @@ -75,16 +68,9 @@ cleanConfig <- function(cfg) { return(cfg) } - -evaluateThreshold <- function(df) { - - return(df) -} - - - # bringing it all together -# TODO: introduce file path and name as argument +# TODO: introduce file paths and names as arguments +#' @export validateScenarios <- function() { data <- importData() diff --git a/R/plotHeatmaps.R b/R/validationHeatmap.R similarity index 89% rename from R/plotHeatmaps.R rename to R/validationHeatmap.R index 5284642..5234eb6 100644 --- a/R/plotHeatmaps.R +++ b/R/validationHeatmap.R @@ -2,6 +2,7 @@ #' @import ggplot2 #' @importFrom ggthemes theme_tufte #' @importFrom plotly ggplotly +#' @export # takes the output of "validateScenarios()" and plots heatmaps per variable validationHeatmap <- function(df, var, cat, met, interactive = T) { @@ -16,13 +17,12 @@ validationHeatmap <- function(df, var, cat, met, interactive = T) { if (nrow(d) == 0) { - df <- df %>% mutate(cm = paste(category, metric, sep = "-")) - unique(df[df$variable == "PE|Coal", "cm"]) + df$cm <- paste(category, metric, sep = "-") stop(paste0( "No data found for variable in this category and metric.\n variable ", var ," is available for the following category-metric - combinations: ", unique(df[df$variable == "PE|Coal", "cm"] + combinations: ", unique(df[df$variable == var, "cm"] ) ) ) @@ -58,7 +58,8 @@ validationHeatmap <- function(df, var, cat, met, interactive = T) { } fig <- ggplotly(p, tooltip="text") - # improve plotly layout, kinda works but very manual + # improve plotly layout, kinda works but very manual, + # TODO: can this be extended to a general useful function? #fig <- fig %>% subplot(heights = 0.3) %>% # layout(title = list(y=0.64)) if (interactive) { diff --git a/inst/markdown/REMINDvalidation.Rmd b/inst/markdown/REMINDvalidation.Rmd new file mode 100644 index 0000000..6d42e19 --- /dev/null +++ b/inst/markdown/REMINDvalidation.Rmd @@ -0,0 +1,139 @@ +--- +title: "piamValidation: REMIND" +output: + html_document: + toc: true + toc_float: true + code_folding: hide +params: + gdx: "fulldata.gdx" + warning: false + message: false + figWidth: 8 +--- + +```{r include=FALSE} +devtools::load_all(".") +library(knitr) +library(ggplot2) +library(ggthemes) +library(plotly) + +knitr::opts_chunk$set( + echo = FALSE, + error = TRUE, + #fig.width = params$figWidth, + message = params$message, + warning = params$warning +) +``` + +## Import and Prepare Data + + +```{r} +# Data Preparation +df <- validateScenarios() + +``` +## Plots + +### Historic - Relative +Relative deviation to historical reference data. Absolute numbers, so no +difference is made between being above or below the reference value. +```{r} +c <- "historic" +m <- "relative" +d <- df[df$category == c & df$metric == m, ] + +if (nrow(d) > 0) { + vars <- unique(d$variable) + plot_list <- htmltools::tagList() + for (i in 1:length(vars)) { + plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m) + } + plot_list +} +``` + +### Historic - Difference +Difference to historical reference data +```{r} +c <- "historic" +m <- "difference" +d <- df[df$category == c & df$metric == m, ] + +if (nrow(d) > 0) { + vars <- unique(d$variable) + plot_list <- htmltools::tagList() + for (i in 1:length(vars)) { + plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m) + } + plot_list +} +``` + +### Scenario - Relative +```{r} +c <- "scenario" +m <- "relative" +d <- df[df$category == c & df$metric == m, ] + +if (nrow(d) > 0) { + vars <- unique(d$variable) + plot_list <- htmltools::tagList() + for (i in 1:length(vars)) { + plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m) + } + plot_list +} +``` + +### Scenario - Difference +```{r} +c <- "scenario" +m <- "difference" +d <- df[df$category == c & df$metric == m, ] + +if (nrow(d) > 0) { + vars <- unique(d$variable) + plot_list <- htmltools::tagList() + for (i in 1:length(vars)) { + plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m) + } + plot_list +} +``` + +### Scenario - Absolute +```{r} +c <- "scenario" +m <- "absolute" +d <- df[df$category == c & df$metric == m, ] + +if (nrow(d) > 0) { + vars <- unique(d$variable) + plot_list <- htmltools::tagList() + for (i in 1:length(vars)) { + plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m) + } + plot_list +} +``` + + +### Scenario - Growthrate +```{r} +c <- "scenario" +m <- "growthrate" +d <- df[df$category == c & df$metric == m, ] + +if (nrow(d) > 0) { + vars <- unique(d$variable) + plot_list <- htmltools::tagList() + for (i in 1:length(vars)) { + plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m) + } + plot_list +} +``` From 2e017660ca2f7a5d2b9d855dfb9c23927cac7858 Mon Sep 17 00:00:00 2001 From: Pascal Weigmann Date: Thu, 29 Feb 2024 11:18:44 +0100 Subject: [PATCH 05/18] improve file handling --- .gitignore | 2 + R/combineData.R | 2 +- R/importFunctions.R | 68 +++++++++++++++++++++++++ R/sysdata.rda | Bin 107 -> 0 bytes R/validateScenarios.R | 79 ++--------------------------- inst/markdown/REMINDvalidation.Rmd | 19 +++++-- 6 files changed, 91 insertions(+), 79 deletions(-) create mode 100644 R/importFunctions.R delete mode 100644 R/sysdata.rda diff --git a/.gitignore b/.gitignore index 5b6a065..958b5bf 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ .Rhistory .RData .Ruserdata +.rds +.html diff --git a/R/combineData.R b/R/combineData.R index 7b8e26c..a90227b 100644 --- a/R/combineData.R +++ b/R/combineData.R @@ -93,7 +93,7 @@ combineData <- function(data, hist, cfg_row) { # get reference values for these metrics } else if (c$metric %in% c("relative", "difference")) { # TODO: only works for ref_period, add support for model/scenario as ref - # TODO: support choosing ref_period AND model/scenario + # TODO: support choosing ref_period AND model/scenario? ref <- data %>% filter(variable == c$variable, region %in% reg, diff --git a/R/importFunctions.R b/R/importFunctions.R new file mode 100644 index 0000000..93baedc --- /dev/null +++ b/R/importFunctions.R @@ -0,0 +1,68 @@ +#' @importFrom dplyr filter select mutate summarise group_by %>% + +# scenarioData: one or multiple paths to .mif or .csv file(s) containing +# scenario data in IAM format +importData <- function(scenarioPath) { + # for dev purposes use cache + if (file.exists("mif.rds")) { + cat("loading scenario data from cache\n") + data <- readRDS("mif.rds") + } else { + cat("importing scenario data from file(s)\n") + data <- remind2::deletePlus(quitte::as.quitte(scenarioPath)) + saveRDS(data, "mif.rds") + } + + # change ordering of factors, put last element ("World") first + # TODO: will not work if World is not last, put somewhere else? + new_order <- c(tail(levels(data$region), 1), head(levels(data$region), -1)) + data$region <- factor(data$region, levels = new_order) + + return(data) +} + +importReferenceData <- function(referencePath) { + # for dev purposes use cache + if (file.exists("hist.rds")) { + cat("loading reference data from cache\n") + hist <- readRDS("hist.rds") + } else { + cat("importing reference data from file\n") + hist <- quitte::as.quitte(referencePath) + hist <- filter(hist, period >= 1990) + saveRDS(hist, "hist.rds") + } + + # remove all historical data before 1990, (only historic?) + hist <- filter(hist, period >= 1990) + + return(hist) +} + +# required columns: +# - category: "historic" or "scenario" +# - can be empty, contain one element or multiple: +# - model, scenario, region, period +# ... +getConfig <- function(configName) { + # TODO: check if config is available, otherwise is it a local file? if yes load that one + path <- system.file(paste0("config/", configName), package = "piamValidation") + cfg <- read.csv2(path, na.strings = "") + return(cfg) +} + + +cleanConfig <- function(cfg) { + # TODO: insert functionality to expand a row of the config, that should be + # used for multiple variables, via "Var|*" + # fill empty threshold columns with Infinity for easier evaluation + cfg <- cfg %>% + mutate(min_red = as.numeric(ifelse(is.na(min_red), -Inf, min_red)), + min_yel = as.numeric(ifelse(is.na(min_yel), -Inf, min_yel)), + max_yel = as.numeric(ifelse(is.na(max_yel), Inf, max_yel)), + max_red = as.numeric(ifelse(is.na(max_red), Inf, max_red)) + ) + + # insert tests here to check if there are forbidden fields in config + return(cfg) +} diff --git a/R/sysdata.rda b/R/sysdata.rda deleted file mode 100644 index 26ce3b89e068b8251b177ee456e6eadf44977611..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 107 zcmV-x0F?hiT4*^jL0KkKS#gl0Apigye}McjL;wH*5C9AT5I`@djQ{`vAOKV~O&E*> z(WXPupwLkLP&5xn05LR}UrHb~g=+$YQ0yH-XC_UbsDH4F=e-l>w+%rMG8oZUz)78Q NF64@Ep&;WSNJ8#sB|QKD diff --git a/R/validateScenarios.R b/R/validateScenarios.R index 993398f..565b455 100644 --- a/R/validateScenarios.R +++ b/R/validateScenarios.R @@ -1,83 +1,14 @@ #' @importFrom dplyr filter select mutate summarise group_by %>% -importData <- function() { - # for dev purposes - if (file.exists("mif.rds")) { - cat("loading scenario data from cache\n") - data <- readRDS("mif.rds") - } else { - cat("importing scenario data from file(s)\n") - path <- "C:/Users/pascalwe/Data/vs/" - data <- remind2::deletePlus(quitte::read.quitte( - c(paste0(path, "REMIND_generic_Bal.mif"), - paste0(path, "REMIND_generic_NPi.mif")) - ) - ) - saveRDS(data, "mif.rds") - } - - # change ordering of factors, put last element ("World") first - # TODO: will not work if World is not last - new_order <- c(tail(levels(data$region), 1), head(levels(data$region), -1)) - data$region <- factor(data$region, levels = new_order) - - return(data) -} - -importReferenceData <- function() { - # for dev purposes - if (file.exists("hist.rds")) { - cat("loading reference data from cache\n") - hist <- readRDS("hist.rds") - } else { - cat("importing reference data from file\n") - path <- "C:/Users/pascalwe/Data/vs/" - hist <- quitte::read.quitte(paste0(path, "historical.mif")) - hist <- filter(hist, period >= 1990) - saveRDS(hist, "hist.rds") - } - - # remove all historical data before 1990 - hist <- filter(hist, period >= 1990) - - return(hist) -} - -# required columns: -# - category: "historic" or "scenario" -# - can be empty, contain one element or multiple: -# - model, scenario, region, period -# ... -loadConfig <- function(configName) { - path <- system.file(paste0("config/", configName), package = "piamValidation") - cfg <- read.csv2(path, na.strings = "") - return(cfg) -} - - -cleanConfig <- function(cfg) { - # fill empty threshold columns with Infinity for easier evaluation - cfg <- cfg %>% - mutate(min_red = as.numeric(ifelse(is.na(min_red), -Inf, min_red)), - min_yel = as.numeric(ifelse(is.na(min_yel), -Inf, min_yel)), - max_yel = as.numeric(ifelse(is.na(max_yel), Inf, max_yel)), - max_red = as.numeric(ifelse(is.na(max_red), Inf, max_red)) - ) - - # insert tests here to check if there are forbidden fields in config - return(cfg) -} - # bringing it all together -# TODO: introduce file paths and names as arguments #' @export -validateScenarios <- function() { +validateScenarios <- function(scenarioPath, referencePath, configName) { - data <- importData() + data <- importData(scenarioPath) - hist <- importReferenceData() + hist <- importReferenceData(referencePath) - cfg <- loadConfig(configName) + cfg <- getConfig(configName) cfg <- cleanConfig(cfg) @@ -95,8 +26,6 @@ validateScenarios <- function() { df <- evaluateThresholds(df) - df <- appendTooltips(df) - return(df) } diff --git a/inst/markdown/REMINDvalidation.Rmd b/inst/markdown/REMINDvalidation.Rmd index 6d42e19..2fd8679 100644 --- a/inst/markdown/REMINDvalidation.Rmd +++ b/inst/markdown/REMINDvalidation.Rmd @@ -6,7 +6,9 @@ output: toc_float: true code_folding: hide params: - gdx: "fulldata.gdx" + mif: "" + ref: "" + cfg: "" warning: false message: false figWidth: 8 @@ -33,7 +35,16 @@ knitr::opts_chunk$set( ```{r} # Data Preparation -df <- validateScenarios() +path <- "C:/Users/pascalwe/Data/vs/" +mifs <- c(paste0(path, "REMIND_generic_Bal.mif"), + paste0(path, "REMIND_generic_NPi.mif")) + +hmif <- paste0(path, "historical.mif") + +cfg <- "validationConfig.csv" + +df <- validateScenarios(mifs, hmif, cfg) +df <- appendTooltips(df) ``` ## Plots @@ -57,7 +68,7 @@ if (nrow(d) > 0) { ``` ### Historic - Difference -Difference to historical reference data +Absolute difference to historical reference data ```{r} c <- "historic" m <- "difference" @@ -74,6 +85,8 @@ if (nrow(d) > 0) { ``` ### Scenario - Relative +Relative deviation to data point from either: +- scenario ```{r} c <- "scenario" m <- "relative" From 50e0f9cb8471f883f3acbeabed22462fdab5d47c Mon Sep 17 00:00:00 2001 From: Pascal Weigmann Date: Thu, 29 Feb 2024 12:06:44 +0100 Subject: [PATCH 06/18] support reference scenarios --- R/appendTooltips.R | 22 +++++++++------- R/combineData.R | 40 ++++++++++++++++++++---------- R/importFunctions.R | 5 ++-- R/validateScenarios.R | 4 +-- inst/config/validationConfig.csv | 1 + inst/markdown/REMINDvalidation.Rmd | 9 +++++-- 6 files changed, 53 insertions(+), 28 deletions(-) diff --git a/R/appendTooltips.R b/R/appendTooltips.R index cafc984..1b1367b 100644 --- a/R/appendTooltips.R +++ b/R/appendTooltips.R @@ -22,8 +22,8 @@ appendTooltips <- function(df) { df[df$category == "historic" & df$metric == "difference", ] %>% mutate(text = paste0(region, "\n", period, "\n", - "Value: ", round(value,2), "\n", - "Ref_Value: ", round(ref_value,2), "\n", + "Value: ", round(value, 2), "\n", + "Ref_Value: ", round(ref_value, 2), "\n", "Ref_Source: ", ref_model, "\n", "Thresholds: \n", "Max: ", max_yel, " / ", max_red @@ -35,9 +35,13 @@ appendTooltips <- function(df) { df[df$category == "scenario" & df$metric == "relative", ] %>% mutate(text = paste0(region, "\n", period, "\n", - "Value: ", round(value,2), "\n", - "Ref Value: ", round(ref_value,2), "\n", - "Rel Deviation: ", round(check_value,2)*100, "% \n", + "Value: ", round(value, 2), "\n", + "Ref ", ifelse(!is.na(ref_period), + paste("Period:", ref_period), + paste("Scenario:", ref_scenario) + ), "\n", + "Ref Value: ", round(ref_value, 2), "\n", + "Rel Deviation: ", round(check_value, 2)*100, "% \n", "Thresholds: \n", "Min: ", min_yel*100, "% / ", min_red*100,"% \n", "Max: ", max_yel*100, "% / ", max_red*100, "%" @@ -49,9 +53,9 @@ appendTooltips <- function(df) { df[df$category == "scenario" & df$metric == "difference", ] %>% mutate(text = paste0(region, "\n", period, "\n", - "Value: ", round(value,2), "\n", - "Ref Value: ", round(ref_value,2), "\n", - "Difference: ", round(check_value,2), "\n", + "Value: ", round(value, 2), "\n", + "Ref Value: ", round(ref_value, 2), "\n", + "Difference: ", round(check_value, 2), "\n", "Thresholds: \n", "Min: ", min_yel, " / ", min_red,"\n", "Max: ", max_yel, " / ", max_red @@ -63,7 +67,7 @@ appendTooltips <- function(df) { df[df$category == "scenario" & df$metric == "absolute", ] %>% mutate(text = paste0(region, "\n", period, "\n", - "Value: ", round(value,2), "\n", + "Value: ", round(value, 2), "\n", "Thresholds: \n", "Min: ", min_yel, " / ", min_red,"\n", "Max: ", max_yel, " / ", max_red diff --git a/R/combineData.R b/R/combineData.R index a90227b..ed209e8 100644 --- a/R/combineData.R +++ b/R/combineData.R @@ -16,6 +16,7 @@ combineData <- function(data, hist, cfg_row) { # create filters # check whether regions, periods, scenarios are specified + # TODO: add model reg <- if (is.na(c$region)) all_reg else strsplit(c$region, split = ", |,")[[1]] sce <- if (is.na(c$scenario)) all_sce else @@ -92,21 +93,34 @@ combineData <- function(data, hist, cfg_row) { # get reference values for these metrics } else if (c$metric %in% c("relative", "difference")) { - # TODO: only works for ref_period, add support for model/scenario as ref # TODO: support choosing ref_period AND model/scenario? - ref <- data %>% - filter(variable == c$variable, - region %in% reg, - period == c$ref_period, - scenario %in% sce) %>% - mutate(ref_value = value, ref_period = period) %>% - select(-c(period, value)) - df <- merge(d, ref) - # add columns which are not used in this category, fill NA - df <- df %>% - mutate(ref_model = NA, - ref_scenario = NA) + # if a reference period should be used, same scenario, same model + if (!is.na(c$ref_period)) { + ref <- data %>% + filter(variable == c$variable, + region %in% reg, + period == c$ref_period, + scenario %in% sce) %>% + mutate(ref_value = value, ref_period = period) %>% + select(-c(period, value)) %>% + # add columns which are not used in this category, fill NA + mutate(ref_model = NA, ref_scenario = NA) + + # if a reference scenario should be used, same period, same model + } else if (!is.na(c$ref_scenario)) { + ref <- data %>% + filter(variable == c$variable, + region %in% reg, + period %in% per, + scenario == c$ref_scenario) %>% + mutate(ref_value = value, ref_scenario = scenario) %>% + select(-c(scenario, value)) %>% + # add columns which are not used in this category, fill NA + mutate(ref_model = NA, ref_period = NA) + } + + df <- merge(d, ref) } else { # TODO: have this warning here or earlier when cleaning config? diff --git a/R/importFunctions.R b/R/importFunctions.R index 93baedc..a9e2b25 100644 --- a/R/importFunctions.R +++ b/R/importFunctions.R @@ -1,8 +1,8 @@ #' @importFrom dplyr filter select mutate summarise group_by %>% -# scenarioData: one or multiple paths to .mif or .csv file(s) containing +# scenarioPath: one or multiple paths to .mif or .csv file(s) containing # scenario data in IAM format -importData <- function(scenarioPath) { +importScenarioData <- function(scenarioPath) { # for dev purposes use cache if (file.exists("mif.rds")) { cat("loading scenario data from cache\n") @@ -48,6 +48,7 @@ getConfig <- function(configName) { # TODO: check if config is available, otherwise is it a local file? if yes load that one path <- system.file(paste0("config/", configName), package = "piamValidation") cfg <- read.csv2(path, na.strings = "") + cat(paste0("loading config file: ", configName, "\n")) return(cfg) } diff --git a/R/validateScenarios.R b/R/validateScenarios.R index 565b455..311d8ee 100644 --- a/R/validateScenarios.R +++ b/R/validateScenarios.R @@ -4,7 +4,7 @@ #' @export validateScenarios <- function(scenarioPath, referencePath, configName) { - data <- importData(scenarioPath) + data <- importScenarioData(scenarioPath) hist <- importReferenceData(referencePath) @@ -17,7 +17,7 @@ validateScenarios <- function(scenarioPath, referencePath, configName) { df <- data.frame() for (i in 1:nrow(cfg)) { # TODO: hist should only be needed if category "historical" is in config - # validation generally should work without hist data + # validation generally should work without hist data df_row <- combineData(data, hist, cfg[i, ]) df <- rbind(df, df_row) } diff --git a/inst/config/validationConfig.csv b/inst/config/validationConfig.csv index 6767678..2ff00c1 100644 --- a/inst/config/validationConfig.csv +++ b/inst/config/validationConfig.csv @@ -20,3 +20,4 @@ scenario;;;Emi|CO2;;2030;-0.6;-0.4;;;relative;yes;2015;; scenario;;;FE;;2030, 2040;-0.4;-0.2;;;relative;yes;2020;; scenario;;Bal;Price|Secondary Energy|Electricity;;;5.56;;;;absolute;yes;;; scenario;;;New Cap|Electricity|Solar|PV;;;;;0.5;1;growthrate;yes;;; +scenario;;Bal;Price|Carbon;;2030, 2040, 2050;1;;;;relative;yes;;;NPi diff --git a/inst/markdown/REMINDvalidation.Rmd b/inst/markdown/REMINDvalidation.Rmd index 2fd8679..f883115 100644 --- a/inst/markdown/REMINDvalidation.Rmd +++ b/inst/markdown/REMINDvalidation.Rmd @@ -41,9 +41,14 @@ mifs <- c(paste0(path, "REMIND_generic_Bal.mif"), hmif <- paste0(path, "historical.mif") -cfg <- "validationConfig.csv" +config <- "validationConfig.csv" -df <- validateScenarios(mifs, hmif, cfg) + +data <- importScenarioData(mifs) +hist <- importReferenceData(hmif) +cfg <- getConfig(config) %>% cleanConfig() + +df <- validateScenarios(mifs, hmif, config) df <- appendTooltips(df) ``` From ef163427b8580606b088a73c57d559126a92f3ac Mon Sep 17 00:00:00 2001 From: Pascal Weigmann Date: Thu, 29 Feb 2024 16:56:41 +0100 Subject: [PATCH 07/18] vs wip --- DESCRIPTION | 1 + R/appendTooltips.R | 37 +++++++++++++++--------------- R/evaluateThresholds.R | 3 +++ R/validationHeatmap.R | 32 ++++++++++++++------------ inst/config/validationConfig.csv | 6 ++--- inst/markdown/REMINDvalidation.Rmd | 33 +++++++++++++++++++++----- 6 files changed, 69 insertions(+), 43 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 22d83c5..e497cbf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -12,6 +12,7 @@ Imports: dplyr (>= 1.1.1), ggplot2, ggthemes, + kableExtra, plotly, quitte (>= 0.3123.0), readxl, diff --git a/R/appendTooltips.R b/R/appendTooltips.R index 1b1367b..e9d3649 100644 --- a/R/appendTooltips.R +++ b/R/appendTooltips.R @@ -11,8 +11,8 @@ appendTooltips <- function(df) { "Value: ", round(value, 2), "\n", "Ref_Value: ", round(ref_value, 2), "\n", "Ref_Source: ", ref_model, "\n", - "Deviation:", round(check_value, 2)*100, "%\n", - "Thresholds: \n", + "Deviation:", round(check_value)*100, "%\n", + "Thresholds (yel/red): \n", paste0("Max: ", max_yel*100, "% / ", max_red*100, "%") ) ) @@ -25,7 +25,7 @@ appendTooltips <- function(df) { "Value: ", round(value, 2), "\n", "Ref_Value: ", round(ref_value, 2), "\n", "Ref_Source: ", ref_model, "\n", - "Thresholds: \n", + "Thresholds (yel/red): \n", "Max: ", max_yel, " / ", max_red ) ) @@ -41,12 +41,11 @@ appendTooltips <- function(df) { paste("Scenario:", ref_scenario) ), "\n", "Ref Value: ", round(ref_value, 2), "\n", - "Rel Deviation: ", round(check_value, 2)*100, "% \n", - "Thresholds: \n", + "Rel Deviation: ", round(check_value)*100, "% \n", + "Thresholds (yel/red): \n", "Min: ", min_yel*100, "% / ", min_red*100,"% \n", - "Max: ", max_yel*100, "% / ", max_red*100, "%" - ) - ) + "Max: ", max_yel*100, "% / ", max_red*100, "%") + ) # scenario - difference df[df$category == "scenario" & df$metric == "difference", ] <- @@ -56,7 +55,7 @@ appendTooltips <- function(df) { "Value: ", round(value, 2), "\n", "Ref Value: ", round(ref_value, 2), "\n", "Difference: ", round(check_value, 2), "\n", - "Thresholds: \n", + "Thresholds (yel/red): \n", "Min: ", min_yel, " / ", min_red,"\n", "Max: ", max_yel, " / ", max_red ) @@ -68,24 +67,24 @@ appendTooltips <- function(df) { mutate(text = paste0(region, "\n", period, "\n", "Value: ", round(value, 2), "\n", - "Thresholds: \n", + "Thresholds (yel/red): \n", "Min: ", min_yel, " / ", min_red,"\n", "Max: ", max_yel, " / ", max_red - ) - ) + ) + ) # scenario - growthrate - df[df$category == "scenario" & df$metric == "absolute", ] <- - df[df$category == "scenario" & df$metric == "absolute", ] %>% + df[df$category == "scenario" & df$metric == "growthrate", ] <- + df[df$category == "scenario" & df$metric == "growthrate", ] %>% mutate(text = paste0(region, "\n", period, "\n", - "Avg. growth/yr: ", round(check_value,2)*100, "% \n", - "Absolute value: ", round(value,2), " \n", - "Thresholds: \n", + "Avg. growth/yr: ", round(check_value)*100, "% \n", + "Absolute value: ", round(value, 2), " \n", + "Thresholds (yel/red): \n", "Min: ", min_yel*100, "% / ", min_red*100, "% \n", "Max: ", max_yel*100, "% / ", max_red*100, "%" - ) - ) + ) + ) return(df) } diff --git a/R/evaluateThresholds.R b/R/evaluateThresholds.R index a15d807..3cd1bfd 100644 --- a/R/evaluateThresholds.R +++ b/R/evaluateThresholds.R @@ -92,5 +92,8 @@ evaluateThresholds <- function(df, cleanInf = TRUE) { ) ) + # after evaluation, "Inf" can be removed again + if (cleanInf) df[df == "Inf" | df == "-Inf"] <- NA + return(df) } diff --git a/R/validationHeatmap.R b/R/validationHeatmap.R index 5234eb6..b22eab6 100644 --- a/R/validationHeatmap.R +++ b/R/validationHeatmap.R @@ -15,17 +15,16 @@ validationHeatmap <- function(df, var, cat, met, interactive = T) { category == cat, metric == met) + # warn if no data is found for combination of var, cat and met if (nrow(d) == 0) { - df$cm <- paste(category, metric, sep = "-") - - stop(paste0( - "No data found for variable in this category and metric.\n - variable ", var ," is available for the following category-metric - combinations: ", unique(df[df$variable == var, "cm"] - ) + warning( + paste0( + "No data found for variable in this category and metric.\n + variable ", var ," is available for the following category-metric + combinations: ", unique(df[df$variable == var, "cm"]) + ) ) - ) } d$period <- as.character(d$period) @@ -42,10 +41,12 @@ validationHeatmap <- function(df, var, cat, met, interactive = T) { facet_grid(scenario~.) # make it beautiful # from https://www.r-bloggers.com/2016/02/making-faceted-heatmaps-with-ggplot2 - p <- p + labs(x=NULL, y=NULL, title=paste0(var, " [", d$unit[1], "]")) - p <- p + theme_tufte(base_family="Helvetica") # creates warnings - p <- p + theme(axis.ticks=element_blank()) - p <- p + theme(axis.text=element_text(size=7)) + p <- p + labs(x = NULL, y = NULL, title = paste0(var, + " [", d$unit[1], "] - ", + cat, "/", met)) + p <- p + theme_tufte(base_family = "Helvetica") # creates warnings + p <- p + theme(axis.ticks = element_blank()) + p <- p + theme(axis.text = element_text(size = 7)) p <- p + coord_equal() p <- p + theme(legend.position = "none") @@ -56,12 +57,13 @@ validationHeatmap <- function(df, var, cat, met, interactive = T) { if("World" %in% d$region) { p <- p + geom_vline(xintercept = 1.5, linewidth = 0.8, color = "white") } - fig <- ggplotly(p, tooltip="text") + fig <- ggplotly(p, tooltip = "text") - # improve plotly layout, kinda works but very manual, - # TODO: can this be extended to a general useful function? + # improve plotly layout, kinda works but very manual + # TODO: can this be extended to a general, useful function? #fig <- fig %>% subplot(heights = 0.3) %>% # layout(title = list(y=0.64)) + if (interactive) { return(fig) } else { diff --git a/inst/config/validationConfig.csv b/inst/config/validationConfig.csv index 2ff00c1..ecc030c 100644 --- a/inst/config/validationConfig.csv +++ b/inst/config/validationConfig.csv @@ -11,13 +11,13 @@ scenario;;;Price|Carbon;;2030;;;300;600;absolute;yes;;; scenario;;;Price|Carbon;;2050;;;;1000;absolute;yes;;; scenario;;;Price|Carbon;DEU;2030;50;100;400;800;absolute;yes;;; scenario;;;Price|Carbon;DEU;2050;100;150;600;1200;absolute;yes;;; -scenario;;;Price|Carbon;LAM, MEA, SSA, OAS, IND;2030, 2050;;;;200;absolute;yes;;; +scenario;;;Price|Carbon;LAM, MEA, SSA, OAS, IND;2030, 2050;;;;200;absolute;no;;; scenario;;Bal;Emi|GHG;DEU, USA, JPN, REF, CHA, CAZ, EUR;2050;-2000;-1000;0;500;absolute;yes;;; -scenario;;Bal;Emi|GHG;LAM, MEA, SSA, OAS, IND;2060;-2000;-1000;0;500;absolute;yes;;; +scenario;;Bal;Emi|GHG;LAM, MEA, SSA, OAS, IND;2060;-2000;-1000;0;500;absolute;no;;; scenario;;Bal;Temperature|Global Mean;;2100;;;1.5;1.55;absolute;yes;;; scenario;;;Emi|CO2;;2030;-0.5;-0.3;;;relative;yes;2020;; scenario;;;Emi|CO2;;2030;-0.6;-0.4;;;relative;yes;2015;; -scenario;;;FE;;2030, 2040;-0.4;-0.2;;;relative;yes;2020;; +scenario;;;FE;;2030, 2040;-0.4;-0.2;;;relative;no;2020;; scenario;;Bal;Price|Secondary Energy|Electricity;;;5.56;;;;absolute;yes;;; scenario;;;New Cap|Electricity|Solar|PV;;;;;0.5;1;growthrate;yes;;; scenario;;Bal;Price|Carbon;;2030, 2040, 2050;1;;;;relative;yes;;;NPi diff --git a/inst/markdown/REMINDvalidation.Rmd b/inst/markdown/REMINDvalidation.Rmd index f883115..9e9088b 100644 --- a/inst/markdown/REMINDvalidation.Rmd +++ b/inst/markdown/REMINDvalidation.Rmd @@ -20,6 +20,7 @@ library(knitr) library(ggplot2) library(ggthemes) library(plotly) +library(kableExtra) knitr::opts_chunk$set( echo = FALSE, @@ -43,16 +44,32 @@ hmif <- paste0(path, "historical.mif") config <- "validationConfig.csv" - -data <- importScenarioData(mifs) -hist <- importReferenceData(hmif) -cfg <- getConfig(config) %>% cleanConfig() +# debugging +# data <- importScenarioData(mifs) +# hist <- importReferenceData(hmif) +# cfg <- getConfig(config) %>% cleanConfig() df <- validateScenarios(mifs, hmif, config) df <- appendTooltips(df) +``` +## Validation +### Summary +```{r} +# find "critical == yes" data points, that are red/yellow +colors <- c(green = "#008450", + yellow = "#EFB700", + red = "#B81D13", + grey = "#808080") +dt <- dplyr::count(df, critical, check) +kbl(dt) %>% + kable_styling(bootstrap_options = c("striped", "hover"), + full_width = F) %>% + column_spec(2, color = colors[dt$check], + background = colors[dt$check]) ``` -## Plots + +## Heatmaps by Category ### Historic - Relative Relative deviation to historical reference data. Absolute numbers, so no @@ -91,7 +108,11 @@ if (nrow(d) > 0) { ### Scenario - Relative Relative deviation to data point from either: -- scenario + +- period (same scenario/model) +- scenario (same period/model) +- model (same period/scenario - Not supported yet) + ```{r} c <- "scenario" m <- "relative" From d23993c9074af3628753ac81cafcc4cc37ab9a3b Mon Sep 17 00:00:00 2001 From: Pascal Weigmann Date: Fri, 1 Mar 2024 16:47:37 +0100 Subject: [PATCH 08/18] wip expand variables in cfg --- inst/config/validationConfig_COMMITED.csv | 5 + inst/markdown/COMMITEDvalidation.Rmd | 201 ++++++++++++++++++++++ 2 files changed, 206 insertions(+) create mode 100644 inst/config/validationConfig_COMMITED.csv create mode 100644 inst/markdown/COMMITEDvalidation.Rmd diff --git a/inst/config/validationConfig_COMMITED.csv b/inst/config/validationConfig_COMMITED.csv new file mode 100644 index 0000000..0baabc8 --- /dev/null +++ b/inst/config/validationConfig_COMMITED.csv @@ -0,0 +1,5 @@ +category;metric;critical;model;scenario;variable;region;period;min_red;min_yel;max_yel;max_red;ref_period;ref_model;ref_scenario;source/link to discussion +historic;relative;yes;;;PE|*;;;;;0.05;0.1;;;; +historic;relative;yes;;;Emi|*;;;;;0.05;0.1;;;; +historic;relative;yes;;;SE|*;;;;;0.05;0.1;;;; +historic;relative;yes;;;FE;;;;;0.05;0.1;;;; diff --git a/inst/markdown/COMMITEDvalidation.Rmd b/inst/markdown/COMMITEDvalidation.Rmd new file mode 100644 index 0000000..14ccc13 --- /dev/null +++ b/inst/markdown/COMMITEDvalidation.Rmd @@ -0,0 +1,201 @@ +--- +title: "piamValidation: COMMITED" +output: + html_document: + toc: true + toc_float: true + code_folding: hide +params: + mif: "" + ref: "" + cfg: "" + warning: false + message: false + figWidth: 8 +--- + +```{r include=FALSE} +devtools::load_all(".") +library(knitr) +library(ggplot2) +library(ggthemes) +library(plotly) +library(kableExtra) + +knitr::opts_chunk$set( + echo = FALSE, + error = TRUE, + #fig.width = params$figWidth, + message = params$message, + warning = params$warning +) +``` + +## Import and Prepare Data + + +```{r} +# Data Preparation +path <- "C:/Users/pascalwe/Data/vs/" +mifs <- c(paste0(path, "REMIND_generic_Bal.mif"), + paste0(path, "REMIND_generic_NPi.mif")) + +hmif <- paste0(path, "historical.mif") + +config <- "validationConfig_COMMITED.csv" + +cfg <- getConfig(config) +data <- importScenarioData(mifs) + + +# expandConfig <- function(cfg, data, firstLevelOnly = TRUE) +to_expand <- cfg[grepl("\\*", cfg$variable), ] +all_vars <- unique(data$variable) +firstLevelOnly <- TRUE + +cfg_new <- dplyr::anti_join(cfg, to_expand) +for (i in 1:nrow(to_expand)) { + var <- gsub("\\|\\*", "\\\\\\|", to_expand$variable[i]) + selected_vars <- all_vars[grepl(var, all_vars)] + + # + # all variables that don't have a "|" in the substring following the given part + if (firstLevelOnly == TRUE) { + selected_vars <- selected_vars[!grepl("\\|", gsub(var, "", selected_vars))] + } + + c <- cfg[i,] %>% + slice(rep(1, each = length(selected_vars))) + c$variable <- selected_vars + cfg_new <- rbind(cfg_new, c) +} + +cfg <- cfg_new + +cfg <- cleanConfig(cfg) + + +# combine data for each row of the config and bind together +df <- data.frame() +for (i in 1:nrow(cfg)) { + # TODO: hist should only be needed if category "historical" is in config + # validation generally should work without hist data + df_row <- combineData(data, hist, cfg[i, ]) + df <- rbind(df, df_row) +} + +df <- resolveDuplicates(df) + +df <- evaluateThresholds(df) + +df <- appendTooltips(df) + +``` + +## Heatmaps by Category + +### Historic - Relative +Relative deviation to historical reference data. Absolute numbers, so no +difference is made between being above or below the reference value. +```{r} +c <- "historic" +m <- "relative" +d <- df[df$category == c & df$metric == m, ] + +if (nrow(d) > 0) { + vars <- unique(d$variable) + plot_list <- htmltools::tagList() + for (i in 1:length(vars)) { + plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m) + } + plot_list +} +``` + +### Historic - Difference +Absolute difference to historical reference data +```{r} +c <- "historic" +m <- "difference" +d <- df[df$category == c & df$metric == m, ] + +if (nrow(d) > 0) { + vars <- unique(d$variable) + plot_list <- htmltools::tagList() + for (i in 1:length(vars)) { + plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m) + } + plot_list +} +``` + +### Scenario - Relative +Relative deviation to data point from either: + +- period (same scenario/model) +- scenario (same period/model) +- model (same period/scenario - Not supported yet) + +```{r} +c <- "scenario" +m <- "relative" +d <- df[df$category == c & df$metric == m, ] + +if (nrow(d) > 0) { + vars <- unique(d$variable) + plot_list <- htmltools::tagList() + for (i in 1:length(vars)) { + plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m) + } + plot_list +} +``` + +### Scenario - Difference +```{r} +c <- "scenario" +m <- "difference" +d <- df[df$category == c & df$metric == m, ] + +if (nrow(d) > 0) { + vars <- unique(d$variable) + plot_list <- htmltools::tagList() + for (i in 1:length(vars)) { + plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m) + } + plot_list +} +``` + +### Scenario - Absolute +```{r} +c <- "scenario" +m <- "absolute" +d <- df[df$category == c & df$metric == m, ] + +if (nrow(d) > 0) { + vars <- unique(d$variable) + plot_list <- htmltools::tagList() + for (i in 1:length(vars)) { + plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m) + } + plot_list +} +``` + + +### Scenario - Growthrate +```{r} +c <- "scenario" +m <- "growthrate" +d <- df[df$category == c & df$metric == m, ] + +if (nrow(d) > 0) { + vars <- unique(d$variable) + plot_list <- htmltools::tagList() + for (i in 1:length(vars)) { + plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m) + } + plot_list +} +``` From 0e1c675ca2465508e5bba9e27e41c2a76ddd9988 Mon Sep 17 00:00:00 2001 From: Pascal Weigmann Date: Wed, 6 Mar 2024 17:11:30 +0100 Subject: [PATCH 09/18] expand variables, periods wip --- R/combineData.R | 12 ++-- R/importFunctions.R | 67 +++++++++++++++++++++-- R/validateScenarios.R | 16 ++++-- inst/config/validationConfig.csv | 46 ++++++++-------- inst/config/validationConfig_COMMITED.csv | 8 +-- inst/markdown/COMMITEDvalidation.Rmd | 40 +++----------- inst/markdown/REMINDvalidation.Rmd | 8 +-- 7 files changed, 117 insertions(+), 80 deletions(-) diff --git a/R/combineData.R b/R/combineData.R index ed209e8..1b795aa 100644 --- a/R/combineData.R +++ b/R/combineData.R @@ -2,7 +2,7 @@ # for one row of cfg: filter and merge relevant scenario data with cfg # results in one df that contains scenario data, reference data and thresholds -combineData <- function(data, hist, cfg_row) { +combineData <- function(data, cfg_row, ref_data = NULL) { # shorten as it will be used a lot c <- cfg_row @@ -33,7 +33,7 @@ combineData <- function(data, hist, cfg_row) { # filter scenario data for row in cfg d <- data %>% - filter(variable == c$variable, + dplyr::filter(variable == c$variable, region %in% reg, period %in% per, scenario %in% sce) @@ -52,7 +52,7 @@ combineData <- function(data, hist, cfg_row) { if (c$category == "historic") { # historic data for relevant variable and dimensions (all sources) h <- hist %>% - filter(variable == c$variable, + dplyr::filter(variable == c$variable, region %in% reg, period %in% per) %>% mutate(ref_value = value, ref_model = model) %>% @@ -62,7 +62,7 @@ combineData <- function(data, hist, cfg_row) { # in case one or more sources are specified, filter for them if (!is.na(c$ref_model)) { - h <- filter( + h <- dplyr::filter( h, ref_model %in% strsplit(c$ref_model, split = ", |,")[[1]] ) } @@ -98,7 +98,7 @@ combineData <- function(data, hist, cfg_row) { # if a reference period should be used, same scenario, same model if (!is.na(c$ref_period)) { ref <- data %>% - filter(variable == c$variable, + dplyr::filter(variable == c$variable, region %in% reg, period == c$ref_period, scenario %in% sce) %>% @@ -110,7 +110,7 @@ combineData <- function(data, hist, cfg_row) { # if a reference scenario should be used, same period, same model } else if (!is.na(c$ref_scenario)) { ref <- data %>% - filter(variable == c$variable, + dplyr::filter(variable == c$variable, region %in% reg, period %in% per, scenario == c$ref_scenario) %>% diff --git a/R/importFunctions.R b/R/importFunctions.R index a9e2b25..5b60369 100644 --- a/R/importFunctions.R +++ b/R/importFunctions.R @@ -29,12 +29,12 @@ importReferenceData <- function(referencePath) { } else { cat("importing reference data from file\n") hist <- quitte::as.quitte(referencePath) - hist <- filter(hist, period >= 1990) + hist <- dplyr::filter(hist, period >= 1990) saveRDS(hist, "hist.rds") } # remove all historical data before 1990, (only historic?) - hist <- filter(hist, period >= 1990) + hist <- dplyr::filter(hist, period >= 1990) return(hist) } @@ -54,8 +54,6 @@ getConfig <- function(configName) { cleanConfig <- function(cfg) { - # TODO: insert functionality to expand a row of the config, that should be - # used for multiple variables, via "Var|*" # fill empty threshold columns with Infinity for easier evaluation cfg <- cfg %>% mutate(min_red = as.numeric(ifelse(is.na(min_red), -Inf, min_red)), @@ -67,3 +65,64 @@ cleanConfig <- function(cfg) { # insert tests here to check if there are forbidden fields in config return(cfg) } + + +# replace period ranges using ":" with comma-separated list of years +expandPeriods <- function(cfg, data) { + per_expand_idx <- grep("\\:", cfg$period) + all_per <- unique(data$period) + + # iterate through rows with ":" + for (i in per_expand_idx) { + if (nchar(cfg[i, "period"]) != 9) { + stop("Invalid range detected. Make sure to enter years as YYYY:YYYY.\n") + } else { + # find scenario-years that are withing indicated range + start <- substr(cfg[i, "period"], 1, 4) + if (start < min(all_per)) { + warning("Selected period starts earlier than scenario data.\n") + } + stop <- substr(cfg[i, "period"], 6, 9) + if (stop > max(all_per)) { + warning("Selected period ends later than scenario data.\n") + } + selected_per <- all_per[all_per >= start & all_per <= stop] + + # overwrite ":" notation with list + cfg[i, "period"] <- paste0(selected_per, collapse = ", ") + } + } + return(cfg) +} + + +# takes config entries specifying a set of variables via "*" and expands it so +# that every variable corresponds to one row in cfg +# TODO: support "firstLevelOnly = TRUE" to not grab all sub-variables but just +# one level +expandVariables <- function(cfg, data) { + # create the expanded config, starting with the not-to-expand rows, + # then appending the rows with expanded variable names + var_expand <- cfg[grepl("\\*", cfg$variable), ] + cfg_new <- dplyr::anti_join(cfg, var_expand, by = colnames(cfg)) + + if (length(var_expand > 0)) { + all_vars <- unique(data$variable) + for (i in 1:nrow(var_expand)) { + # prepare strings for grepping by adding escape characters and "." + var <- gsub("\\*", "\\.\\*", var_expand$variable[i]) + var <- gsub("\\|", "\\\\|", var) + selected_vars <- all_vars[grepl(var, all_vars)] + cat(paste0(var_expand$variable[i], " was expanded into ", + length(selected_vars), " sub-variables.\n")) + + # take the original row for the current set of variables and repeat it + # once for each sub-variable, overwrite with sub-variable names + c <- cfg[i,] %>% + dplyr::slice(rep(1, each = length(selected_vars))) + c$variable <- selected_vars + cfg_new <- rbind(cfg_new, c) + } + } + return(cfg_new) +} diff --git a/R/validateScenarios.R b/R/validateScenarios.R index 311d8ee..7b59354 100644 --- a/R/validateScenarios.R +++ b/R/validateScenarios.R @@ -1,16 +1,20 @@ #' @importFrom dplyr filter select mutate summarise group_by %>% # bringing it all together +# keepNA #' @export -validateScenarios <- function(scenarioPath, referencePath, configName) { +validateScenarios <- function(scenarioPath, configName, referencePath = NULL, + keepNA = TRUE) { data <- importScenarioData(scenarioPath) hist <- importReferenceData(referencePath) - cfg <- getConfig(configName) - - cfg <- cleanConfig(cfg) + cfg <- configName %>% + getConfig() %>% + cleanConfig() %>% + expandPeriods(data) %>% + expandVariables(data) # combine data for each row of the config and bind together @@ -18,8 +22,10 @@ validateScenarios <- function(scenarioPath, referencePath, configName) { for (i in 1:nrow(cfg)) { # TODO: hist should only be needed if category "historical" is in config # validation generally should work without hist data - df_row <- combineData(data, hist, cfg[i, ]) + # TODO: optimize performance + df_row <- combineData(data, cfg[i, ], ref_data = hist) df <- rbind(df, df_row) + cat(paste0("Combined row ", i, " of ", nrow(cfg), "\n")) } df <- resolveDuplicates(df) diff --git a/inst/config/validationConfig.csv b/inst/config/validationConfig.csv index ecc030c..f72a115 100644 --- a/inst/config/validationConfig.csv +++ b/inst/config/validationConfig.csv @@ -1,23 +1,23 @@ -category;model;scenario;variable;region;period;min_red;min_yel;max_yel;max_red;metric;critical;ref_period;ref_model;ref_scenario -historic;;;Emi|CO2|Energy;World;;;;0.05;0.1;relative;yes;;EDGAR6; -historic;;;Emi|CO2|Energy;DEU;;;;0.1;0.2;relative;yes;;EDGAR6; -historic;;;PE|Coal;;;;;;0.1;relative;yes;;BP, IEA; -historic;;;PE|Coal;World;;;;;0.2;relative;yes;;BP, IEA; -historic;;;PE|Oil;;;;;;2;difference;yes;;; -historic;;;PE|Oil;World;;;;;5;difference;yes;;; -historic;;Bal;FE|Electricity;;;;;;0.1;relative;yes;;; -historic;;Bal;FE|Electricity;World;;;;;0.2;relative;yes;;; -scenario;;;Price|Carbon;;2030;;;300;600;absolute;yes;;; -scenario;;;Price|Carbon;;2050;;;;1000;absolute;yes;;; -scenario;;;Price|Carbon;DEU;2030;50;100;400;800;absolute;yes;;; -scenario;;;Price|Carbon;DEU;2050;100;150;600;1200;absolute;yes;;; -scenario;;;Price|Carbon;LAM, MEA, SSA, OAS, IND;2030, 2050;;;;200;absolute;no;;; -scenario;;Bal;Emi|GHG;DEU, USA, JPN, REF, CHA, CAZ, EUR;2050;-2000;-1000;0;500;absolute;yes;;; -scenario;;Bal;Emi|GHG;LAM, MEA, SSA, OAS, IND;2060;-2000;-1000;0;500;absolute;no;;; -scenario;;Bal;Temperature|Global Mean;;2100;;;1.5;1.55;absolute;yes;;; -scenario;;;Emi|CO2;;2030;-0.5;-0.3;;;relative;yes;2020;; -scenario;;;Emi|CO2;;2030;-0.6;-0.4;;;relative;yes;2015;; -scenario;;;FE;;2030, 2040;-0.4;-0.2;;;relative;no;2020;; -scenario;;Bal;Price|Secondary Energy|Electricity;;;5.56;;;;absolute;yes;;; -scenario;;;New Cap|Electricity|Solar|PV;;;;;0.5;1;growthrate;yes;;; -scenario;;Bal;Price|Carbon;;2030, 2040, 2050;1;;;;relative;yes;;;NPi +category;metric;critical;variable;model;scenario;region;period;min_red;min_yel;max_yel;max_red;ref_model;ref_scenario;ref_period;source/link to discussion +historic;relative;yes;Emi|CO2|Energy;;;World;;;;0.05;0.1;EDGAR6;;; +historic;relative;yes;Emi|CO2|Energy;;;DEU;;;;0.1;0.2;EDGAR6;;; +historic;relative;yes;PE|Coal;;;;;;;;0.1;BP, IEA;;; +historic;relative;yes;PE|Coal;;;World;;;;;0.2;BP, IEA;;; +historic;difference;yes;PE|Oil;;;;;;;;2;;;; +historic;difference;yes;PE|Oil;;;World;;;;;5;;;; +historic;relative;yes;FE|Electricity;;Bal;;;;;;0.1;;;; +historic;relative;yes;FE|Electricity;;Bal;World;;;;;0.2;;;; +scenario;absolute;yes;Price|Carbon;;;;2030;;;300;600;;;; +scenario;absolute;yes;Price|Carbon;;;;2050;;;;1000;;;; +scenario;absolute;yes;Price|Carbon;;;DEU;2030;50;100;400;800;;;; +scenario;absolute;yes;Price|Carbon;;;DEU;2050;100;150;600;1200;;;; +scenario;absolute;no;Price|Carbon;;;LAM, MEA, SSA, OAS, IND;2030, 2050;;;;200;;;; +scenario;absolute;yes;Emi|GHG;;Bal;DEU, USA, JPN, REF, CHA, CAZ, EUR;2050;-2000;-1000;0;500;;;; +scenario;absolute;no;Emi|GHG;;Bal;LAM, MEA, SSA, OAS, IND;2060;-2000;-1000;0;500;;;; +scenario;absolute;yes;Temperature|Global Mean;;Bal;;2100;;;1.5;1.55;;;; +scenario;relative;yes;Emi|CO2;;;;2030;-0.5;-0.3;;;;;2020; +scenario;relative;yes;Emi|CO2;;;;2030;-0.6;-0.4;;;;;2015; +scenario;relative;no;FE;;;;2030, 2040;-0.4;-0.2;;;;;2020; +scenario;absolute;yes;Price|Secondary Energy|Electricity;;Bal;;;5.56;;;;;;; +scenario;growthrate;yes;New Cap|Electricity|Solar|PV;;;;;;;0.5;1;;;; +scenario;relative;yes;Price|Carbon;;Bal;;2030, 2040, 2050;1;;;;;NPi;; diff --git a/inst/config/validationConfig_COMMITED.csv b/inst/config/validationConfig_COMMITED.csv index 0baabc8..46fcabe 100644 --- a/inst/config/validationConfig_COMMITED.csv +++ b/inst/config/validationConfig_COMMITED.csv @@ -1,5 +1,3 @@ -category;metric;critical;model;scenario;variable;region;period;min_red;min_yel;max_yel;max_red;ref_period;ref_model;ref_scenario;source/link to discussion -historic;relative;yes;;;PE|*;;;;;0.05;0.1;;;; -historic;relative;yes;;;Emi|*;;;;;0.05;0.1;;;; -historic;relative;yes;;;SE|*;;;;;0.05;0.1;;;; -historic;relative;yes;;;FE;;;;;0.05;0.1;;;; +category;metric;critical;variable;model;scenario;region;period;min_red;min_yel;max_yel;max_red;ref_model;ref_scenario;ref_period;source/link to discussion +historic;relative;yes;Emi|CO2|*;;;;2010:2200;;;0.05;0.1;;;; +historic;relative;yes;FE;;;;2020:2050;;;0.05;0.1;;;; diff --git a/inst/markdown/COMMITEDvalidation.Rmd b/inst/markdown/COMMITEDvalidation.Rmd index 14ccc13..abc4c5a 100644 --- a/inst/markdown/COMMITEDvalidation.Rmd +++ b/inst/markdown/COMMITEDvalidation.Rmd @@ -42,54 +42,32 @@ mifs <- c(paste0(path, "REMIND_generic_Bal.mif"), hmif <- paste0(path, "historical.mif") -config <- "validationConfig_COMMITED.csv" +configName <- "validationConfig_COMMITED.csv" -cfg <- getConfig(config) -data <- importScenarioData(mifs) +data <- importScenarioData(scenarioPath) +hist <- importReferenceData(referencePath) -# expandConfig <- function(cfg, data, firstLevelOnly = TRUE) -to_expand <- cfg[grepl("\\*", cfg$variable), ] -all_vars <- unique(data$variable) -firstLevelOnly <- TRUE - -cfg_new <- dplyr::anti_join(cfg, to_expand) -for (i in 1:nrow(to_expand)) { - var <- gsub("\\|\\*", "\\\\\\|", to_expand$variable[i]) - selected_vars <- all_vars[grepl(var, all_vars)] - - # - # all variables that don't have a "|" in the substring following the given part - if (firstLevelOnly == TRUE) { - selected_vars <- selected_vars[!grepl("\\|", gsub(var, "", selected_vars))] - } - - c <- cfg[i,] %>% - slice(rep(1, each = length(selected_vars))) - c$variable <- selected_vars - cfg_new <- rbind(cfg_new, c) -} - -cfg <- cfg_new - -cfg <- cleanConfig(cfg) +cfg <- getConfig(configName) %>% + cleanConfig() +cfg <- expandPeriods(cfg, data) %>% + expandVariables(data) # combine data for each row of the config and bind together df <- data.frame() for (i in 1:nrow(cfg)) { # TODO: hist should only be needed if category "historical" is in config # validation generally should work without hist data - df_row <- combineData(data, hist, cfg[i, ]) + df_row <- combineData(data, cfg[i, ], ref_data = hist) df <- rbind(df, df_row) + cat(paste0("Combined row ", i, " of ", nrow(cfg), "\n")) } df <- resolveDuplicates(df) df <- evaluateThresholds(df) -df <- appendTooltips(df) - ``` ## Heatmaps by Category diff --git a/inst/markdown/REMINDvalidation.Rmd b/inst/markdown/REMINDvalidation.Rmd index 9e9088b..f3c29b7 100644 --- a/inst/markdown/REMINDvalidation.Rmd +++ b/inst/markdown/REMINDvalidation.Rmd @@ -17,6 +17,7 @@ params: ```{r include=FALSE} devtools::load_all(".") library(knitr) +library(dplyr) library(ggplot2) library(ggthemes) library(plotly) @@ -44,12 +45,7 @@ hmif <- paste0(path, "historical.mif") config <- "validationConfig.csv" -# debugging -# data <- importScenarioData(mifs) -# hist <- importReferenceData(hmif) -# cfg <- getConfig(config) %>% cleanConfig() - -df <- validateScenarios(mifs, hmif, config) +df <- validateScenarios(mifs, config, referencePath = hmif) df <- appendTooltips(df) ``` ## Validation From c45bb47274ded28cb0b7b11345e8d50fef40f4c5 Mon Sep 17 00:00:00 2001 From: Pascal Weigmann Date: Thu, 7 Mar 2024 16:43:09 +0100 Subject: [PATCH 10/18] bugfix combineData() --- DESCRIPTION | 1 - R/combineData.R | 57 ++++++++++--------- ...TED.csv => validationConfig_COMMITTED.csv} | 4 +- inst/markdown/COMMITEDvalidation.Rmd | 2 + inst/markdown/REMINDvalidation.Rmd | 11 ++-- 5 files changed, 40 insertions(+), 35 deletions(-) rename inst/config/{validationConfig_COMMITED.csv => validationConfig_COMMITTED.csv} (57%) diff --git a/DESCRIPTION b/DESCRIPTION index e497cbf..22d83c5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -12,7 +12,6 @@ Imports: dplyr (>= 1.1.1), ggplot2, ggthemes, - kableExtra, plotly, quitte (>= 0.3123.0), readxl, diff --git a/R/combineData.R b/R/combineData.R index 1b795aa..b6bdd88 100644 --- a/R/combineData.R +++ b/R/combineData.R @@ -11,7 +11,7 @@ combineData <- function(data, cfg_row, ref_data = NULL) { all_reg <- unique(data$region) all_per <- unique(data$period) # not a factor, convert? all_per <- all_per[all_per <= 2100] - hist_per <- c(2005, 2010, 2015, 2020) + ref_per <- c(2005, 2010, 2015, 2020) all_sce <- unique(data$scenario) # create filters @@ -24,7 +24,7 @@ combineData <- function(data, cfg_row, ref_data = NULL) { # empty "period" field means different years for historic or scenario category if (c$category == "historic") { - per <- if (is.na(c$period)) hist_per else + per <- if (is.na(c$period)) ref_per else strsplit(as.character(c$period), split = ", |,")[[1]] } else { per <- if (is.na(c$period)) all_per else @@ -33,7 +33,7 @@ combineData <- function(data, cfg_row, ref_data = NULL) { # filter scenario data for row in cfg d <- data %>% - dplyr::filter(variable == c$variable, + filter(variable == c$variable, region %in% reg, period %in% per, scenario %in% sce) @@ -51,34 +51,39 @@ combineData <- function(data, cfg_row, ref_data = NULL) { # depending on category: filter and attach reference values if they are needed if (c$category == "historic") { # historic data for relevant variable and dimensions (all sources) - h <- hist %>% - dplyr::filter(variable == c$variable, + h <- ref_data %>% + filter(variable %in% c$variable, region %in% reg, period %in% per) %>% mutate(ref_value = value, ref_model = model) %>% select(-c("scenario", "unit", "value", "model")) - # TODO: insert test here if ref_model exists and has data to compare to + # TODO: test here if ref_model exists and has data to compare to + if (nrow(h) == 0) { + cat(paste0("No reference data for variable ", c$variable, " found.\n")) + df <- data.frame() + } else { - # in case one or more sources are specified, filter for them - if (!is.na(c$ref_model)) { - h <- dplyr::filter( - h, ref_model %in% strsplit(c$ref_model, split = ", |,")[[1]] - ) - } + # in case one or more sources are specified, filter for them + if (!is.na(c$ref_model)) { + h <- filter( + h, ref_model %in% strsplit(c$ref_model, split = ", |,")[[1]] + ) + } - # in case of multiple sources, calculate mean (using all available sources) - if (length(unique(h$ref_model)) > 1) { - h_mean <- group_by(h, period, region) %>% - summarise(ref_value = mean(ref_value, na.rm = TRUE)) - h <- mutate(h_mean, variable = c$variable, ref_model = "multiple") - } + # in case of multiple sources, calculate mean (using all available sources) + if (length(unique(h$ref_model)) > 1) { + h_mean <- group_by(h, period, region) %>% + summarise(ref_value = mean(ref_value, na.rm = TRUE)) + h <- mutate(h_mean, variable = c$variable, ref_model = "multiple") + } - # merge with historical data adds columns ref_value and ref_model - df <- merge(d, h) + # merge with historical data adds columns ref_value and ref_model + df <- merge(d, h) - # add columns which are not used in this category, fill NA - df <- df %>% mutate(ref_period = NA, ref_scenario = NA) + # add columns which are not used in this category, fill NA + df <- df %>% mutate(ref_period = NA, ref_scenario = NA) + } # filter and attach reference values if they are needed; scenario data } else if (c$category == "scenario") { @@ -98,19 +103,19 @@ combineData <- function(data, cfg_row, ref_data = NULL) { # if a reference period should be used, same scenario, same model if (!is.na(c$ref_period)) { ref <- data %>% - dplyr::filter(variable == c$variable, + filter(variable %in% c$variable, region %in% reg, - period == c$ref_period, + period %in% c$ref_period, scenario %in% sce) %>% mutate(ref_value = value, ref_period = period) %>% select(-c(period, value)) %>% # add columns which are not used in this category, fill NA mutate(ref_model = NA, ref_scenario = NA) - # if a reference scenario should be used, same period, same model + # if a reference scenario should be used, same period, same model } else if (!is.na(c$ref_scenario)) { ref <- data %>% - dplyr::filter(variable == c$variable, + filter(variable == c$variable, region %in% reg, period %in% per, scenario == c$ref_scenario) %>% diff --git a/inst/config/validationConfig_COMMITED.csv b/inst/config/validationConfig_COMMITTED.csv similarity index 57% rename from inst/config/validationConfig_COMMITED.csv rename to inst/config/validationConfig_COMMITTED.csv index 46fcabe..c364404 100644 --- a/inst/config/validationConfig_COMMITED.csv +++ b/inst/config/validationConfig_COMMITTED.csv @@ -1,3 +1,3 @@ category;metric;critical;variable;model;scenario;region;period;min_red;min_yel;max_yel;max_red;ref_model;ref_scenario;ref_period;source/link to discussion -historic;relative;yes;Emi|CO2|*;;;;2010:2200;;;0.05;0.1;;;; -historic;relative;yes;FE;;;;2020:2050;;;0.05;0.1;;;; +historic;relative;yes;Emi|CO2|*;;;;2020;;;0.1;0.2;;;; +historic;relative;yes;FE|*;;;;2020;;;0.1;0.2;;;; diff --git a/inst/markdown/COMMITEDvalidation.Rmd b/inst/markdown/COMMITEDvalidation.Rmd index abc4c5a..1faa928 100644 --- a/inst/markdown/COMMITEDvalidation.Rmd +++ b/inst/markdown/COMMITEDvalidation.Rmd @@ -68,6 +68,8 @@ df <- resolveDuplicates(df) df <- evaluateThresholds(df) +df <- appendTooltips(df) + ``` ## Heatmaps by Category diff --git a/inst/markdown/REMINDvalidation.Rmd b/inst/markdown/REMINDvalidation.Rmd index f3c29b7..7c03833 100644 --- a/inst/markdown/REMINDvalidation.Rmd +++ b/inst/markdown/REMINDvalidation.Rmd @@ -21,7 +21,6 @@ library(dplyr) library(ggplot2) library(ggthemes) library(plotly) -library(kableExtra) knitr::opts_chunk$set( echo = FALSE, @@ -58,11 +57,11 @@ colors <- c(green = "#008450", red = "#B81D13", grey = "#808080") dt <- dplyr::count(df, critical, check) -kbl(dt) %>% - kable_styling(bootstrap_options = c("striped", "hover"), - full_width = F) %>% - column_spec(2, color = colors[dt$check], - background = colors[dt$check]) +# kbl(dt) %>% +# kable_styling(bootstrap_options = c("striped", "hover"), +# full_width = F) %>% +# column_spec(2, color = colors[dt$check], +# background = colors[dt$check]) ``` ## Heatmaps by Category From 8c0c68df623ebcafad9598b4e59389e3a3be5166 Mon Sep 17 00:00:00 2001 From: Pascal Weigmann Date: Fri, 8 Mar 2024 13:48:57 +0100 Subject: [PATCH 11/18] support model comparison --- R/appendTooltips.R | 7 +- R/combineData.R | 99 ++++++---- R/evaluateThresholds.R | 16 +- R/importFunctions.R | 3 + R/resolveDuplicates.R | 24 ++- R/validationHeatmap.R | 6 +- inst/config/validationConfig_NGFS.csv | 8 + ...validation.Rmd => COMMITTEDvalidation.Rmd} | 1 - inst/markdown/NGFSvalidation.Rmd | 175 ++++++++++++++++++ 9 files changed, 284 insertions(+), 55 deletions(-) create mode 100644 inst/config/validationConfig_NGFS.csv rename inst/markdown/{COMMITEDvalidation.Rmd => COMMITTEDvalidation.Rmd} (99%) create mode 100644 inst/markdown/NGFSvalidation.Rmd diff --git a/R/appendTooltips.R b/R/appendTooltips.R index e9d3649..974d4f9 100644 --- a/R/appendTooltips.R +++ b/R/appendTooltips.R @@ -35,13 +35,16 @@ appendTooltips <- function(df) { df[df$category == "scenario" & df$metric == "relative", ] %>% mutate(text = paste0(region, "\n", period, "\n", + ifelse(!is.na(ref_model), paste("Model:", model, "\n")), "Value: ", round(value, 2), "\n", "Ref ", ifelse(!is.na(ref_period), paste("Period:", ref_period), - paste("Scenario:", ref_scenario) + ifelse(!is.na(ref_scenario), + paste("Scenario:", ref_scenario), + paste("Model:", ref_model)) ), "\n", "Ref Value: ", round(ref_value, 2), "\n", - "Rel Deviation: ", round(check_value)*100, "% \n", + "Rel Deviation: ", round(check_value, 2)*100, "% \n", "Thresholds (yel/red): \n", "Min: ", min_yel*100, "% / ", min_red*100,"% \n", "Max: ", max_yel*100, "% / ", max_red*100, "%") diff --git a/R/combineData.R b/R/combineData.R index b6bdd88..2b5c8d0 100644 --- a/R/combineData.R +++ b/R/combineData.R @@ -8,19 +8,23 @@ combineData <- function(data, cfg_row, ref_data = NULL) { c <- cfg_row # full dimensions and important slices + all_mod <- unique(data$model) + all_sce <- unique(data$scenario) all_reg <- unique(data$region) all_per <- unique(data$period) # not a factor, convert? all_per <- all_per[all_per <= 2100] ref_per <- c(2005, 2010, 2015, 2020) - all_sce <- unique(data$scenario) - # create filters - # check whether regions, periods, scenarios are specified - # TODO: add model - reg <- if (is.na(c$region)) all_reg else - strsplit(c$region, split = ", |,")[[1]] + + # create filters #### + # check whether regions, periods, scenarios are specified, else use all + mod <- if (is.na(c$model)) all_mod else + strsplit(c$model, split = ", |, ")[[1]] sce <- if (is.na(c$scenario)) all_sce else strsplit(c$scenario, split = ", |, ")[[1]] + reg <- if (is.na(c$region)) all_reg else + strsplit(c$region, split = ", |,")[[1]] + # empty "period" field means different years for historic or scenario category if (c$category == "historic") { @@ -31,23 +35,26 @@ combineData <- function(data, cfg_row, ref_data = NULL) { strsplit(as.character(c$period), split = ", |,")[[1]] } - # filter scenario data for row in cfg + # apply filters #### + # filter scenario data according to each row in cfg d <- data %>% - filter(variable == c$variable, - region %in% reg, - period %in% per, - scenario %in% sce) + filter(variable %in% c$variable, # alternative approach for multiple variables could be used here + model %in% mod, + scenario %in% sce, + region %in% reg, + period %in% per) - # attach respective cfg information to data slice + # attach cfg information to data slice, which is independent of category d <- d %>% - mutate(min_red = c$min_red, # could be set to NA as it is not used - min_yel = c$min_yel, # could be set to NA as it is not used - max_yel = c$max_yel, - max_red = c$max_red, + mutate(min_red = c$min_red, + min_yel = c$min_yel, + max_yel = c$max_yel, + max_red = c$max_red, category = c$category, metric = c$metric, critical = c$critical) + # historic #### # depending on category: filter and attach reference values if they are needed if (c$category == "historic") { # historic data for relevant variable and dimensions (all sources) @@ -58,7 +65,7 @@ combineData <- function(data, cfg_row, ref_data = NULL) { mutate(ref_value = value, ref_model = model) %>% select(-c("scenario", "unit", "value", "model")) - # TODO: test here if ref_model exists and has data to compare to + # test whether ref_model exists and has data to compare to if (nrow(h) == 0) { cat(paste0("No reference data for variable ", c$variable, " found.\n")) df <- data.frame() @@ -85,44 +92,64 @@ combineData <- function(data, cfg_row, ref_data = NULL) { df <- df %>% mutate(ref_period = NA, ref_scenario = NA) } - # filter and attach reference values if they are needed; scenario data + # scenario #### + # filter and attach reference values if they are needed; scenario data } else if (c$category == "scenario") { # no reference values needed for these metrics, fill NA if (c$metric %in% c("absolute", "growthrate")) { df <- d %>% - mutate(ref_value = NA, - ref_model = NA, - ref_period = NA, - ref_scenario = NA) + mutate(ref_value = NA, + ref_model = NA, + ref_scenario = NA, + ref_period = NA) + - # get reference values for these metrics + # get reference values for these metrics + # TODO: support choosing ref_period AND model/scenario? } else if (c$metric %in% c("relative", "difference")) { - # TODO: support choosing ref_period AND model/scenario? - # if a reference period should be used, same scenario, same model - if (!is.na(c$ref_period)) { + # if a reference model should be used, same scenario, same period + if (!is.na(c$model)) { ref <- data %>% filter(variable %in% c$variable, - region %in% reg, - period %in% c$ref_period, - scenario %in% sce) %>% - mutate(ref_value = value, ref_period = period) %>% - select(-c(period, value)) %>% + model %in% c$ref_model, # expects exactly one model for now + scenario %in% sce, + region %in% reg, + period %in% per) %>% + + mutate(ref_value = value, ref_model = model) %>% + select(-c(model, value)) %>% # add columns which are not used in this category, fill NA - mutate(ref_model = NA, ref_scenario = NA) + mutate(ref_scenario = NA, ref_period = NA) # if a reference scenario should be used, same period, same model } else if (!is.na(c$ref_scenario)) { ref <- data %>% - filter(variable == c$variable, - region %in% reg, - period %in% per, - scenario == c$ref_scenario) %>% + filter(variable %in% c$variable, + model %in% mod, + scenario %in% c$ref_scenario, # expects exactly one scenario + region %in% reg, + period %in% per) %>% + mutate(ref_value = value, ref_scenario = scenario) %>% select(-c(scenario, value)) %>% # add columns which are not used in this category, fill NA mutate(ref_model = NA, ref_period = NA) + + # if a reference period should be used, same scenario, same model + } else if (!is.na(c$ref_period)) { + ref <- data %>% + filter(variable %in% c$variable, + model %in% mod, + scenario %in% sce, + region %in% reg, + period %in% c$ref_period) %>% # expects exactly one period + + mutate(ref_value = value, ref_period = period) %>% + select(-c(period, value)) %>% + # add columns which are not used in this category, fill NA + mutate(ref_model = NA, ref_scenario = NA) } df <- merge(d, ref) diff --git a/R/evaluateThresholds.R b/R/evaluateThresholds.R index 3cd1bfd..131b816 100644 --- a/R/evaluateThresholds.R +++ b/R/evaluateThresholds.R @@ -9,7 +9,7 @@ evaluateThresholds <- function(df, cleanInf = TRUE) { # performance improvements) # calculate values that will be compared to thresholds - # historic - relative + # historic - relative #### his_rel <- df[df$category == "historic" & df$metric == "relative", ] %>% mutate(check_value = ifelse( is.na(ref_value), @@ -19,12 +19,12 @@ evaluateThresholds <- function(df, cleanInf = TRUE) { ) ) - # historic - difference + # historic - difference #### his_dif <- df[df$category == "historic" & df$metric == "difference", ] %>% # absolute difference to reference mutate(check_value = abs(value - ref_value)) - # scenario - relative + # scenario - relative #### sce_rel <- df[df$category == "scenario" & df$metric == "relative", ] %>% mutate(check_value = ifelse( is.na(ref_value), @@ -34,7 +34,7 @@ evaluateThresholds <- function(df, cleanInf = TRUE) { ) ) - # scenario - difference + # scenario - difference #### sce_dif <- df[df$category == "scenario" & df$metric == "difference", ] %>% mutate(check_value = ifelse( is.na(ref_value), @@ -44,11 +44,11 @@ evaluateThresholds <- function(df, cleanInf = TRUE) { ) ) - # scenario - absolute + # scenario - absolute #### sce_abs <- df[df$category == "scenario" & df$metric == "absolute", ] %>% mutate(check_value = value) - # scenario - growthrate + # scenario - growthrate #### # calculate average growth rate of the last 5 years between 2010 and 2060 # TODO: in case of a need to look further than 2060, split df and add # calculations for 10-year step periods @@ -73,7 +73,9 @@ evaluateThresholds <- function(df, cleanInf = TRUE) { df <- do.call("rbind", list(his_rel, his_dif, sce_rel, sce_dif, sce_abs, sce_gro)) + # evaluation #### # perform comparison to thresholds for whole data.frame at once + # TODO: not as robust as previously thought. Partially fails if only max_red is given df <- df %>% mutate(check = ifelse(is.na(check_value), "grey", @@ -92,7 +94,7 @@ evaluateThresholds <- function(df, cleanInf = TRUE) { ) ) - # after evaluation, "Inf" can be removed again + # after evaluation, "Inf" can be removed if (cleanInf) df[df == "Inf" | df == "-Inf"] <- NA return(df) diff --git a/R/importFunctions.R b/R/importFunctions.R index 5b60369..bb91826 100644 --- a/R/importFunctions.R +++ b/R/importFunctions.R @@ -18,6 +18,9 @@ importScenarioData <- function(scenarioPath) { new_order <- c(tail(levels(data$region), 1), head(levels(data$region), -1)) data$region <- factor(data$region, levels = new_order) + # remove rows without values + data <- data[!is.na(data$value),] + return(data) } diff --git a/R/resolveDuplicates.R b/R/resolveDuplicates.R index 0d46c63..e82202e 100644 --- a/R/resolveDuplicates.R +++ b/R/resolveDuplicates.R @@ -7,18 +7,28 @@ resolveDuplicates <- function(df) { # check for duplicates # these are the duplicates that are from the lower rows and should be kept - duplicates <- df[duplicated(df[c("model", "scenario", "variable", - "region", "period", "category", "metric")]), ] - - # here all instances of the data that was duplicated is removed - no_dupl <- dplyr::anti_join(df, duplicates, by = c("model", "scenario", "variable", - "region", "period", "category", "metric")) + duplicates <- df[duplicated(df[c("model", + "scenario", + "variable", + "region", + "period", + "category", + "metric")]), ] + + # here, all instances of the data that was duplicated are removed + no_dupl <- dplyr::anti_join(df, duplicates, by = c("model", + "scenario", + "variable", + "region", + "period", + "category", + "metric")) # reattach one instance to get a complete data set without duplicates df <- rbind(no_dupl, duplicates) # different approach: allows partial overwriting - # chosen to not use this anymore, less robust + # chosen to not use this anymore, less robust and unclear if more intuitive # these are missing the duplicated entries that were in the lower rows # no_dupl <- dplyr::anti_join(df, duplicates_all) diff --git a/R/validationHeatmap.R b/R/validationHeatmap.R index b22eab6..58d854e 100644 --- a/R/validationHeatmap.R +++ b/R/validationHeatmap.R @@ -5,7 +5,7 @@ #' @export # takes the output of "validateScenarios()" and plots heatmaps per variable -validationHeatmap <- function(df, var, cat, met, interactive = T) { +validationHeatmap <- function(df, var, cat, met, interactive = T, compareModels = T) { # possible extension: when giving multiple vars, plot as facets in same row @@ -38,7 +38,8 @@ validationHeatmap <- function(df, var, cat, met, interactive = T) { p <- ggplot(d, aes(x = region, y = period, fill=check, text=text)) + geom_tile(color="white", linewidth=0.0) + scale_fill_manual(values = colors, breaks = colors) + - facet_grid(scenario~.) + facet_grid(scenario~model) + # make it beautiful # from https://www.r-bloggers.com/2016/02/making-faceted-heatmaps-with-ggplot2 p <- p + labs(x = NULL, y = NULL, title = paste0(var, @@ -47,6 +48,7 @@ validationHeatmap <- function(df, var, cat, met, interactive = T) { p <- p + theme_tufte(base_family = "Helvetica") # creates warnings p <- p + theme(axis.ticks = element_blank()) p <- p + theme(axis.text = element_text(size = 7)) + p <- p + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) p <- p + coord_equal() p <- p + theme(legend.position = "none") diff --git a/inst/config/validationConfig_NGFS.csv b/inst/config/validationConfig_NGFS.csv new file mode 100644 index 0000000..c346768 --- /dev/null +++ b/inst/config/validationConfig_NGFS.csv @@ -0,0 +1,8 @@ +category;metric;critical;variable;model;scenario;region;period;min_red;min_yel;max_yel;max_red;ref_model;ref_scenario;ref_period;source/link to discussion +scenario;relative;yes;Emissions|CO2;REMIND-MAgPIE 3.3-4.7;;;2020;;;0.5;1;GCAM 6.0 NGFS;;; +scenario;relative;yes;Emissions|CO2;REMIND-MAgPIE 3.3-4.7;;World;2020;;;0.3;0.5;GCAM 6.0 NGFS;;; +scenario;relative;yes;Final Energy;GCAM 6.0 NGFS;;;2030:2050;;;0.5;1;REMIND-MAgPIE 3.3-4.7;;; +scenario;absolute;yes;Emissions|CO2;;o_1p5c;;2050;;;0;100;;;; +scenario;absolute;yes;Emissions|CO2;;o_2c;;2050;;;1000;2000;;;; +scenario;absolute;yes;AR6 climate diagnostics|Surface Temperature (GSAT)|MAGICCv7.5.3|50.0th Percentile;;o_1p5c;;2100;;;1.5;1.55;;;; +scenario;absolute;yes;AR6 climate diagnostics|Surface Temperature (GSAT)|MAGICCv7.5.3|50.0th Percentile;;o_2c;;2100;;;2;2.1;;;; diff --git a/inst/markdown/COMMITEDvalidation.Rmd b/inst/markdown/COMMITTEDvalidation.Rmd similarity index 99% rename from inst/markdown/COMMITEDvalidation.Rmd rename to inst/markdown/COMMITTEDvalidation.Rmd index 1faa928..cba8706 100644 --- a/inst/markdown/COMMITEDvalidation.Rmd +++ b/inst/markdown/COMMITTEDvalidation.Rmd @@ -20,7 +20,6 @@ library(knitr) library(ggplot2) library(ggthemes) library(plotly) -library(kableExtra) knitr::opts_chunk$set( echo = FALSE, diff --git a/inst/markdown/NGFSvalidation.Rmd b/inst/markdown/NGFSvalidation.Rmd new file mode 100644 index 0000000..630e441 --- /dev/null +++ b/inst/markdown/NGFSvalidation.Rmd @@ -0,0 +1,175 @@ +--- +title: "piamValidation: NGFS" +output: + html_document: + toc: true + toc_float: true + code_folding: hide +params: + mif: "" + ref: "" + cfg: "" + warning: false + message: false + figWidth: 8 +--- + +```{r include=FALSE} +devtools::load_all(".") +library(knitr) +library(ggplot2) +library(ggthemes) +library(plotly) + +knitr::opts_chunk$set( + echo = FALSE, + error = TRUE, + #fig.width = params$figWidth, + message = params$message, + warning = params$warning +) +``` + +## Import and Prepare Data + + +```{r} +# Data Preparation +path <- "C:/Users/pascalwe/Data/NGFS/phase5/" +snapshot <- "1709886115166-snapshot-24/snapshot_all_regions.csv" +configName <- "validationConfig_NGFS.csv" + +scenarioPath <- paste0(path, snapshot) +data <- importScenarioData(scenarioPath) + +cfg <- getConfig(configName) %>% + cleanConfig() + +cfg <- expandPeriods(cfg, data) %>% + expandVariables(data) + +# combine data for each row of the config and bind together +df <- data.frame() +for (i in 1:nrow(cfg)) { + # TODO: hist should only be needed if category "historical" is in config + # validation generally should work without hist data + df_row <- combineData(data, cfg[i, ]) + df <- rbind(df, df_row) + cat(paste0("Combined row ", i, " of ", nrow(cfg), "\n")) +} + +df <- resolveDuplicates(df) + +df <- evaluateThresholds(df) + +df <- appendTooltips(df) + +``` + +## Heatmaps by Category + +### Historic - Relative +Relative deviation to historical reference data. Absolute numbers, so no +difference is made between being above or below the reference value. +```{r} +c <- "historic" +m <- "relative" +d <- df[df$category == c & df$metric == m, ] + +if (nrow(d) > 0) { + vars <- unique(d$variable) + plot_list <- htmltools::tagList() + for (i in 1:length(vars)) { + plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m) + } + plot_list +} +``` + +### Historic - Difference +Absolute difference to historical reference data +```{r} +c <- "historic" +m <- "difference" +d <- df[df$category == c & df$metric == m, ] + +if (nrow(d) > 0) { + vars <- unique(d$variable) + plot_list <- htmltools::tagList() + for (i in 1:length(vars)) { + plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m) + } + plot_list +} +``` + +### Scenario - Relative +Relative deviation to data point from either: + +- period (same scenario/model) +- scenario (same period/model) +- model (same period/scenario - Not supported yet) + +```{r} +c <- "scenario" +m <- "relative" +d <- df[df$category == c & df$metric == m, ] + +if (nrow(d) > 0) { + vars <- unique(d$variable) + plot_list <- htmltools::tagList() + for (i in 1:length(vars)) { + plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m) + } + plot_list +} +``` + +### Scenario - Difference +```{r} +c <- "scenario" +m <- "difference" +d <- df[df$category == c & df$metric == m, ] + +if (nrow(d) > 0) { + vars <- unique(d$variable) + plot_list <- htmltools::tagList() + for (i in 1:length(vars)) { + plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m) + } + plot_list +} +``` + +### Scenario - Absolute +```{r} +c <- "scenario" +m <- "absolute" +d <- df[df$category == c & df$metric == m, ] + +if (nrow(d) > 0) { + vars <- unique(d$variable) + plot_list <- htmltools::tagList() + for (i in 1:length(vars)) { + plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m) + } + plot_list +} +``` + + +### Scenario - Growthrate +```{r} +c <- "scenario" +m <- "growthrate" +d <- df[df$category == c & df$metric == m, ] + +if (nrow(d) > 0) { + vars <- unique(d$variable) + plot_list <- htmltools::tagList() + for (i in 1:length(vars)) { + plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m) + } + plot_list +} +``` From c1be9d19c2dea05b64f6215f755c375dd8f2122e Mon Sep 17 00:00:00 2001 From: Pascal Weigmann Date: Fri, 8 Mar 2024 18:01:10 +0100 Subject: [PATCH 12/18] exclude green checks, stress testing --- R/validationHeatmap.R | 2 +- inst/config/validationConfig_NGFS.csv | 17 ++++++++++------- inst/markdown/COMMITTEDvalidation.Rmd | 1 + inst/markdown/NGFSvalidation.Rmd | 9 +++++++++ inst/markdown/REMINDvalidation.Rmd | 1 + 5 files changed, 22 insertions(+), 8 deletions(-) diff --git a/R/validationHeatmap.R b/R/validationHeatmap.R index 58d854e..1832909 100644 --- a/R/validationHeatmap.R +++ b/R/validationHeatmap.R @@ -38,7 +38,7 @@ validationHeatmap <- function(df, var, cat, met, interactive = T, compareModels p <- ggplot(d, aes(x = region, y = period, fill=check, text=text)) + geom_tile(color="white", linewidth=0.0) + scale_fill_manual(values = colors, breaks = colors) + - facet_grid(scenario~model) + facet_grid(model~scenario) # make it beautiful # from https://www.r-bloggers.com/2016/02/making-faceted-heatmaps-with-ggplot2 diff --git a/inst/config/validationConfig_NGFS.csv b/inst/config/validationConfig_NGFS.csv index c346768..59cd23b 100644 --- a/inst/config/validationConfig_NGFS.csv +++ b/inst/config/validationConfig_NGFS.csv @@ -1,8 +1,11 @@ category;metric;critical;variable;model;scenario;region;period;min_red;min_yel;max_yel;max_red;ref_model;ref_scenario;ref_period;source/link to discussion -scenario;relative;yes;Emissions|CO2;REMIND-MAgPIE 3.3-4.7;;;2020;;;0.5;1;GCAM 6.0 NGFS;;; -scenario;relative;yes;Emissions|CO2;REMIND-MAgPIE 3.3-4.7;;World;2020;;;0.3;0.5;GCAM 6.0 NGFS;;; -scenario;relative;yes;Final Energy;GCAM 6.0 NGFS;;;2030:2050;;;0.5;1;REMIND-MAgPIE 3.3-4.7;;; -scenario;absolute;yes;Emissions|CO2;;o_1p5c;;2050;;;0;100;;;; -scenario;absolute;yes;Emissions|CO2;;o_2c;;2050;;;1000;2000;;;; -scenario;absolute;yes;AR6 climate diagnostics|Surface Temperature (GSAT)|MAGICCv7.5.3|50.0th Percentile;;o_1p5c;;2100;;;1.5;1.55;;;; -scenario;absolute;yes;AR6 climate diagnostics|Surface Temperature (GSAT)|MAGICCv7.5.3|50.0th Percentile;;o_2c;;2100;;;2;2.1;;;; +scenario;relative;yes;Emissions|*;REMIND-MAgPIE 3.3-4.7;;;;;;50;100;GCAM 6.0 NGFS;;; +scenario;relative;yes;Emissions|*;REMIND-MAgPIE 3.3-4.7;;World;;;;30;50;GCAM 6.0 NGFS;;; +scenario;relative;yes;Final Energy|*;REMIND-MAgPIE 3.3-4.7;;;;;;50;100;GCAM 6.0 NGFS;;; +scenario;relative;yes;Final Energy|*;REMIND-MAgPIE 3.3-4.7;;World;;;;30;50;GCAM 6.0 NGFS;;; +scenario;relative;yes;Secondary Energy|*;REMIND-MAgPIE 3.3-4.7;;;;;;50;100;GCAM 6.0 NGFS;;; +scenario;relative;yes;Secondary Energy|*;REMIND-MAgPIE 3.3-4.7;;World;;;;30;50;GCAM 6.0 NGFS;;; +scenario;relative;yes;Primary Energy|*;REMIND-MAgPIE 3.3-4.7;;;;;;50;100;GCAM 6.0 NGFS;;; +scenario;relative;yes;Primary Energy|*;REMIND-MAgPIE 3.3-4.7;;World;;;;30;50;GCAM 6.0 NGFS;;; +scenario;relative;yes;Carbon Sequestration|*;REMIND-MAgPIE 3.3-4.7;;;;;;50;100;GCAM 6.0 NGFS;;; +scenario;relative;yes;Carbon Sequestration|*;REMIND-MAgPIE 3.3-4.7;;World;;;;30;50;GCAM 6.0 NGFS;;; diff --git a/inst/markdown/COMMITTEDvalidation.Rmd b/inst/markdown/COMMITTEDvalidation.Rmd index cba8706..04802ee 100644 --- a/inst/markdown/COMMITTEDvalidation.Rmd +++ b/inst/markdown/COMMITTEDvalidation.Rmd @@ -1,5 +1,6 @@ --- title: "piamValidation: COMMITED" +date: "`r format(Sys.Date())`" output: html_document: toc: true diff --git a/inst/markdown/NGFSvalidation.Rmd b/inst/markdown/NGFSvalidation.Rmd index 630e441..98bc2d8 100644 --- a/inst/markdown/NGFSvalidation.Rmd +++ b/inst/markdown/NGFSvalidation.Rmd @@ -1,5 +1,6 @@ --- title: "piamValidation: NGFS" +date: "`r format(Sys.Date())`" output: html_document: toc: true @@ -28,6 +29,10 @@ knitr::opts_chunk$set( message = params$message, warning = params$warning ) + +rmarkdown::render('my_markdown_report.Rmd', + output_file = paste('report.', Sys.Date(), + '.pdf', sep='')) ``` ## Import and Prepare Data @@ -64,6 +69,10 @@ df <- evaluateThresholds(df) df <- appendTooltips(df) +# remove all-green rows (all regions green) +df_noGreen <- df[df$check != "green", ] +df_noGreennoGrey <- df[df$check %in% c("red", "yellow"), ] +df <- df_noGreennoGrey ``` ## Heatmaps by Category diff --git a/inst/markdown/REMINDvalidation.Rmd b/inst/markdown/REMINDvalidation.Rmd index 7c03833..c66c955 100644 --- a/inst/markdown/REMINDvalidation.Rmd +++ b/inst/markdown/REMINDvalidation.Rmd @@ -1,5 +1,6 @@ --- title: "piamValidation: REMIND" +date: "`r format(Sys.Date())`" output: html_document: toc: true From e1180b435d11c4bbedcc66d3efded5004732d484 Mon Sep 17 00:00:00 2001 From: orichters Date: Fri, 15 Mar 2024 12:33:07 +0100 Subject: [PATCH 13/18] allow to match ** or * from config, add tests --- R/importFunctions.R | 26 +++++++++------- tests/testthat/test-dummy.R | 1 - tests/testthat/test-expandVariables.R | 43 +++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 11 deletions(-) delete mode 100644 tests/testthat/test-dummy.R create mode 100644 tests/testthat/test-expandVariables.R diff --git a/R/importFunctions.R b/R/importFunctions.R index bb91826..c31fab2 100644 --- a/R/importFunctions.R +++ b/R/importFunctions.R @@ -48,10 +48,10 @@ importReferenceData <- function(referencePath) { # - model, scenario, region, period # ... getConfig <- function(configName) { - # TODO: check if config is available, otherwise is it a local file? if yes load that one path <- system.file(paste0("config/", configName), package = "piamValidation") + if (! file.exists(path) && file.exists(configName)) path <- configName cfg <- read.csv2(path, na.strings = "") - cat(paste0("loading config file: ", configName, "\n")) + message("loading config file: ", configName, "\n") return(cfg) } @@ -101,8 +101,7 @@ expandPeriods <- function(cfg, data) { # takes config entries specifying a set of variables via "*" and expands it so # that every variable corresponds to one row in cfg -# TODO: support "firstLevelOnly = TRUE" to not grab all sub-variables but just -# one level +# * matches everything until the next |, while ** matches including | expandVariables <- function(cfg, data) { # create the expanded config, starting with the not-to-expand rows, # then appending the rows with expanded variable names @@ -111,13 +110,20 @@ expandVariables <- function(cfg, data) { if (length(var_expand > 0)) { all_vars <- unique(data$variable) - for (i in 1:nrow(var_expand)) { + for (i in seq(nrow(var_expand))) { # prepare strings for grepping by adding escape characters and "." - var <- gsub("\\*", "\\.\\*", var_expand$variable[i]) - var <- gsub("\\|", "\\\\|", var) - selected_vars <- all_vars[grepl(var, all_vars)] - cat(paste0(var_expand$variable[i], " was expanded into ", - length(selected_vars), " sub-variables.\n")) + vartoexpand <- var_expand$variable[i] + # escape | + vargrep <- gsub("|", "\\|", vartoexpand, fixed = TRUE) + # convert * into "everything except |" + vargrep <- gsub("*", "[^\\|]*", vargrep, fixed = TRUE) + # convert what was ** back to .* + vargrep <- gsub("[^\\|]*[^\\|]*", ".*", vargrep, fixed = TRUE) + # make sure you match the full variable, not just a part + vargrep <- paste0("^", vargrep, "$") + selected_vars <- all_vars[grepl(vargrep, all_vars)] + message(var_expand$variable[i], " was expanded into ", + length(selected_vars), " sub-variables.") # take the original row for the current set of variables and repeat it # once for each sub-variable, overwrite with sub-variable names diff --git a/tests/testthat/test-dummy.R b/tests/testthat/test-dummy.R deleted file mode 100644 index 909f17c..0000000 --- a/tests/testthat/test-dummy.R +++ /dev/null @@ -1 +0,0 @@ -skip("dummy test") diff --git a/tests/testthat/test-expandVariables.R b/tests/testthat/test-expandVariables.R new file mode 100644 index 0000000..d65f1ec --- /dev/null +++ b/tests/testthat/test-expandVariables.R @@ -0,0 +1,43 @@ +test_that("expandVariables works", { + varinconfig <- function(data, varentry, expectedvars) { + configfile <- file.path(tempdir(), "config.csv") + configheader <- "category;metric;critical;variable;model;scenario;region;period;min_red;min_yel;max_yel;max_red;ref_model;ref_scenario;ref_period;" + configstring <- paste0("scenario;relative;yes;", varentry, ";m;s;r;2005;;;10;100;refmodel1;;;") + writeLines(c(configheader, configstring), configfile) + cfg <- expandVariables(getConfig(configfile), data) + missing <- setdiff(expectedvars, cfg$variable) + if (length(missing) > 0) { + warning("For ", varentry, ", the following variables are expected but missing:\n", + paste0(missing, collapse = ", ")) + } + expect_true(length(missing) == 0) + toomuch <- setdiff(cfg$variable, expectedvars) + if (length(toomuch) > 0) { + warning("For ", varentry, ", the following variables were not expected but matched:\n", + paste0(toomuch, collapse = ", ")) + } + expect_true(length(toomuch) == 0) + } + + data <- list() + depth1 <- c("Emi|CO", "Emi|BC", "Emi|GHG") + depth2CO <- c("Emi|CO|Trans", "Emi|CO|Build", "Emi|CO|Indus") + depth2BC <- c("Emi|BC|Trans", "Emi|BC|Build", "Emi|BC|Indus") + depth3CO <- c("Emi|CO|Trans|Gas", "Emi|CO|Trans|Oil") + whatever <- c("Emi", "Final Energy", "CO|Trans") + data$variable <- c(depth1, depth2CO, depth2BC, depth3CO, whatever) + + # first argument is what is stated in the config, second what should be matched + varinconfig(data, "Final Energy", "Final Energy") + varinconfig(data, "Em*", "Emi") + varinconfig(data, "Emi", "Emi") + varinconfig(data, "Emi|*", depth1) + varinconfig(data, "Emi|*|*", c(depth2CO, depth2BC)) + varinconfig(data, "Emi|*|*|*", depth3CO) + varinconfig(data, "*|CO|*", depth2CO) + varinconfig(data, "CO|*", "CO|Trans") + varinconfig(data, "Final Energy|**", NULL) + varinconfig(data, "Emi|**", c(depth1, depth2CO, depth2BC, depth3CO)) + varinconfig(data, "Em**", c("Emi", depth1, depth2CO, depth2BC, depth3CO)) +}) + From 51b99b7d8233a5ed570074abf3c3092c38ae08fa Mon Sep 17 00:00:00 2001 From: orichters Date: Fri, 15 Mar 2024 16:38:29 +0100 Subject: [PATCH 14/18] support xlsx config files, fix TODO --- NAMESPACE | 3 +++ R/importFunctions.R | 17 +++++++++++------ tests/testthat/test-expandVariables.R | 6 +++--- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index deb06a6..9bb1797 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,3 +11,6 @@ importFrom(dplyr,select) importFrom(dplyr,summarise) importFrom(ggthemes,theme_tufte) importFrom(plotly,ggplotly) +importFrom(readxl,excel_sheets) +importFrom(readxl,read_excel) +importFrom(tibble,as_tibble) diff --git a/R/importFunctions.R b/R/importFunctions.R index c31fab2..0c5bb39 100644 --- a/R/importFunctions.R +++ b/R/importFunctions.R @@ -1,5 +1,6 @@ #' @importFrom dplyr filter select mutate summarise group_by %>% - +#' @importFrom tibble as_tibble +#' @importFrom readxl read_excel excel_sheets # scenarioPath: one or multiple paths to .mif or .csv file(s) containing # scenario data in IAM format importScenarioData <- function(scenarioPath) { @@ -9,13 +10,12 @@ importScenarioData <- function(scenarioPath) { data <- readRDS("mif.rds") } else { cat("importing scenario data from file(s)\n") - data <- remind2::deletePlus(quitte::as.quitte(scenarioPath)) + data <- remind2::deletePlus(quitte::as.quitte(scenarioPath, na.rm = TRUE)) saveRDS(data, "mif.rds") } - # change ordering of factors, put last element ("World") first - # TODO: will not work if World is not last, put somewhere else? - new_order <- c(tail(levels(data$region), 1), head(levels(data$region), -1)) + # change ordering of factors, global elements first + new_order <- unique(intersect(c("World", "GLO", levels(data$region)), levels(data$region))) data$region <- factor(data$region, levels = new_order) # remove rows without values @@ -50,7 +50,12 @@ importReferenceData <- function(referencePath) { getConfig <- function(configName) { path <- system.file(paste0("config/", configName), package = "piamValidation") if (! file.exists(path) && file.exists(configName)) path <- configName - cfg <- read.csv2(path, na.strings = "") + if (grepl("\\.xlsx$", path)) { + cfg <- read_excel(path = path, sheet = if ("config" %in% excel_sheets(path)) "config" else 1) + cfg <- filter(cfg, ! grepl("^#", cfg[[1]])) + } else { + cfg <- as_tibble(read.csv2(path, na.strings = "", comment.char = "#")) + } message("loading config file: ", configName, "\n") return(cfg) } diff --git a/tests/testthat/test-expandVariables.R b/tests/testthat/test-expandVariables.R index d65f1ec..bdbfac0 100644 --- a/tests/testthat/test-expandVariables.R +++ b/tests/testthat/test-expandVariables.R @@ -1,7 +1,8 @@ test_that("expandVariables works", { varinconfig <- function(data, varentry, expectedvars) { configfile <- file.path(tempdir(), "config.csv") - configheader <- "category;metric;critical;variable;model;scenario;region;period;min_red;min_yel;max_yel;max_red;ref_model;ref_scenario;ref_period;" + configheader <- paste0("category;metric;critical;variable;model;scenario;region;period;", + "min_red;min_yel;max_yel;max_red;ref_model;ref_scenario;ref_period;") configstring <- paste0("scenario;relative;yes;", varentry, ";m;s;r;2005;;;10;100;refmodel1;;;") writeLines(c(configheader, configstring), configfile) cfg <- expandVariables(getConfig(configfile), data) @@ -39,5 +40,4 @@ test_that("expandVariables works", { varinconfig(data, "Final Energy|**", NULL) varinconfig(data, "Emi|**", c(depth1, depth2CO, depth2BC, depth3CO)) varinconfig(data, "Em**", c("Emi", depth1, depth2CO, depth2BC, depth3CO)) -}) - +}) From aca34c1d50e921c56e9a0deb83d4530b144f2d1d Mon Sep 17 00:00:00 2001 From: Pascal Weigmann Date: Wed, 20 Mar 2024 11:15:36 +0100 Subject: [PATCH 15/18] add passValidation function --- NAMESPACE | 1 + R/outputFunctions.R | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 R/outputFunctions.R diff --git a/NAMESPACE b/NAMESPACE index 9bb1797..8498520 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ export(validateScenarios) export(validationHeatmap) +export(validationPass) import(ggplot2) importFrom(dplyr,"%>%") importFrom(dplyr,filter) diff --git a/R/outputFunctions.R b/R/outputFunctions.R new file mode 100644 index 0000000..8c04e35 --- /dev/null +++ b/R/outputFunctions.R @@ -0,0 +1,18 @@ +#' @importFrom dplyr filter select mutate summarise group_by %>% +# yellowFail: if set to TRUE a yellow check result of a critical variable will +# lead to the scenario not passing as validated +#' @export +validationPass <- function(data, yellowFail = FALSE) { + + fail_color <- ifelse(yellowFail, c("red", "yellow"), "red") + + # see if any critical variables have failed per scenario and model + pass <- data %>% + filter(check %in% fail_color, critical == "yes") %>% + group_by(model, scenario) %>% + summarize(pass = n() == 0, + n_fail = n() + ) + + return(pass) +} From 87913bade0db3b2c782b86bcc180bc4ce744a0ad Mon Sep 17 00:00:00 2001 From: Pascal Weigmann Date: Wed, 20 Mar 2024 12:12:05 +0100 Subject: [PATCH 16/18] clean up --- .gitignore | 11 +++-- R/appendTooltips.R | 4 +- R/combineData.R | 2 +- R/evaluateThresholds.R | 10 ++--- R/importFunctions.R | 58 ++++++++++----------------- R/resolveDuplicates.R | 24 +---------- R/validateScenarios.R | 11 ++--- inst/config/validationConfig_NGFS.csv | 2 +- inst/markdown/COMMITTEDvalidation.Rmd | 27 +------------ inst/markdown/NGFSvalidation.Rmd | 33 +++------------ inst/markdown/REMINDvalidation.Rmd | 2 +- tests/testthat/test-expandVariables.R | 18 ++++++--- 12 files changed, 63 insertions(+), 139 deletions(-) diff --git a/.gitignore b/.gitignore index 958b5bf..4c015d7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,5 @@ -.Rproj.user -.Rhistory -.RData -.Ruserdata -.rds -.html +*.Rproj.user +*.Rhistory +*.RData +*.Ruserdata +*.html diff --git a/R/appendTooltips.R b/R/appendTooltips.R index 974d4f9..6297143 100644 --- a/R/appendTooltips.R +++ b/R/appendTooltips.R @@ -35,7 +35,9 @@ appendTooltips <- function(df) { df[df$category == "scenario" & df$metric == "relative", ] %>% mutate(text = paste0(region, "\n", period, "\n", - ifelse(!is.na(ref_model), paste("Model:", model, "\n")), + ifelse(!is.na(ref_model), + paste("Model:", model, "\n"), + ""), "Value: ", round(value, 2), "\n", "Ref ", ifelse(!is.na(ref_period), paste("Period:", ref_period), diff --git a/R/combineData.R b/R/combineData.R index 2b5c8d0..2060a95 100644 --- a/R/combineData.R +++ b/R/combineData.R @@ -78,7 +78,7 @@ combineData <- function(data, cfg_row, ref_data = NULL) { ) } - # in case of multiple sources, calculate mean (using all available sources) + # in case of multiple sources, use mean (of all available sources) if (length(unique(h$ref_model)) > 1) { h_mean <- group_by(h, period, region) %>% summarise(ref_value = mean(ref_value, na.rm = TRUE)) diff --git a/R/evaluateThresholds.R b/R/evaluateThresholds.R index 131b816..5550c01 100644 --- a/R/evaluateThresholds.R +++ b/R/evaluateThresholds.R @@ -1,14 +1,12 @@ #' @importFrom dplyr filter select mutate summarise group_by %>% -# cleanInf = TRUE: choose to replace "Inf" and "-Inf" which were introduced -# for ease of calculations with "-" +# cleanInf = TRUE: replace "Inf" and "-Inf" which were introduced +# for ease of calculations with "-" evaluateThresholds <- function(df, cleanInf = TRUE) { - # perform evaluation for each category and metric separately - # (shortcuts are possible but will decrease readability of the code for minor - # performance improvements) + # first calculate values that will be compared to thresholds for each category + # ("check_value") and metric separately, then perform evaluation for all together - # calculate values that will be compared to thresholds # historic - relative #### his_rel <- df[df$category == "historic" & df$metric == "relative", ] %>% mutate(check_value = ifelse( diff --git a/R/importFunctions.R b/R/importFunctions.R index 0c5bb39..bd89264 100644 --- a/R/importFunctions.R +++ b/R/importFunctions.R @@ -1,57 +1,44 @@ #' @importFrom dplyr filter select mutate summarise group_by %>% #' @importFrom tibble as_tibble #' @importFrom readxl read_excel excel_sheets + # scenarioPath: one or multiple paths to .mif or .csv file(s) containing # scenario data in IAM format importScenarioData <- function(scenarioPath) { - # for dev purposes use cache - if (file.exists("mif.rds")) { - cat("loading scenario data from cache\n") - data <- readRDS("mif.rds") - } else { - cat("importing scenario data from file(s)\n") - data <- remind2::deletePlus(quitte::as.quitte(scenarioPath, na.rm = TRUE)) - saveRDS(data, "mif.rds") - } + data <- remind2::deletePlus(quitte::as.quitte(scenarioPath, na.rm = TRUE)) # change ordering of factors, global elements first - new_order <- unique(intersect(c("World", "GLO", levels(data$region)), levels(data$region))) + new_order <- unique(intersect(c("World", "GLO", + levels(data$region)), levels(data$region))) data$region <- factor(data$region, levels = new_order) # remove rows without values - data <- data[!is.na(data$value),] + data <- data[!is.na(data$value), ] return(data) } +# import historical or other reference data for comparison importReferenceData <- function(referencePath) { - # for dev purposes use cache - if (file.exists("hist.rds")) { - cat("loading reference data from cache\n") - hist <- readRDS("hist.rds") - } else { - cat("importing reference data from file\n") - hist <- quitte::as.quitte(referencePath) - hist <- dplyr::filter(hist, period >= 1990) - saveRDS(hist, "hist.rds") - } + ref <- quitte::as.quitte(referencePath) - # remove all historical data before 1990, (only historic?) - hist <- dplyr::filter(hist, period >= 1990) + # remove all historical data before 1990 + ref <- dplyr::filter(ref, period >= 1990) - return(hist) + return(ref) } -# required columns: -# - category: "historic" or "scenario" -# - can be empty, contain one element or multiple: -# - model, scenario, region, period -# ... +# see README for rules on how to fill the config getConfig <- function(configName) { path <- system.file(paste0("config/", configName), package = "piamValidation") if (! file.exists(path) && file.exists(configName)) path <- configName + + # config can be .xlsx or .csv, use "config" sheet in .xlsx if available if (grepl("\\.xlsx$", path)) { - cfg <- read_excel(path = path, sheet = if ("config" %in% excel_sheets(path)) "config" else 1) + cfg <- read_excel( + path = path, + sheet = if ("config" %in% excel_sheets(path)) "config" else 1 + ) cfg <- filter(cfg, ! grepl("^#", cfg[[1]])) } else { cfg <- as_tibble(read.csv2(path, na.strings = "", comment.char = "#")) @@ -60,17 +47,15 @@ getConfig <- function(configName) { return(cfg) } - -cleanConfig <- function(cfg) { - # fill empty threshold columns with Infinity for easier evaluation +# fill empty threshold columns with Infinity for easier evaluation +fillInf <- function(cfg) { cfg <- cfg %>% mutate(min_red = as.numeric(ifelse(is.na(min_red), -Inf, min_red)), min_yel = as.numeric(ifelse(is.na(min_yel), -Inf, min_yel)), max_yel = as.numeric(ifelse(is.na(max_yel), Inf, max_yel)), max_red = as.numeric(ifelse(is.na(max_red), Inf, max_red)) - ) + ) - # insert tests here to check if there are forbidden fields in config return(cfg) } @@ -82,6 +67,7 @@ expandPeriods <- function(cfg, data) { # iterate through rows with ":" for (i in per_expand_idx) { + # check format and compatibility of data and ref periods if (nchar(cfg[i, "period"]) != 9) { stop("Invalid range detected. Make sure to enter years as YYYY:YYYY.\n") } else { @@ -118,7 +104,7 @@ expandVariables <- function(cfg, data) { for (i in seq(nrow(var_expand))) { # prepare strings for grepping by adding escape characters and "." vartoexpand <- var_expand$variable[i] - # escape | + # escape "|" vargrep <- gsub("|", "\\|", vartoexpand, fixed = TRUE) # convert * into "everything except |" vargrep <- gsub("*", "[^\\|]*", vargrep, fixed = TRUE) diff --git a/R/resolveDuplicates.R b/R/resolveDuplicates.R index e82202e..85dd163 100644 --- a/R/resolveDuplicates.R +++ b/R/resolveDuplicates.R @@ -1,11 +1,10 @@ -#' @importFrom dplyr filter select mutate summarise group_by %>% - +# check for duplicates that are a result of config definition overlaps +# only the latest occurrence of a data point is kept (lowest row of config) resolveDuplicates <- function(df) { # TODO: needs to be double checked for edge-cases # might be easier or safer to handle categories separately - # check for duplicates # these are the duplicates that are from the lower rows and should be kept duplicates <- df[duplicated(df[c("model", "scenario", @@ -27,25 +26,6 @@ resolveDuplicates <- function(df) { # reattach one instance to get a complete data set without duplicates df <- rbind(no_dupl, duplicates) - # different approach: allows partial overwriting - # chosen to not use this anymore, less robust and unclear if more intuitive - - # these are missing the duplicated entries that were in the lower rows - # no_dupl <- dplyr::anti_join(df, duplicates_all) - - # overwrite "earlier" values using "later" ones - # BEWARE: requires specific country to come after "all regions" - # d1 <- merge(no_dupl, duplicates_all, by=c("variable", "region", "period", - # "scenario", "unit", "model", "value", - # "ref_model", "ref_value","ref_scenario", - # "ref_period", "category", "metric", "critical"), - # all.x = T) %>% - # mutate(min_red = ifelse(is.na(min_red.y), min_red.x, min_red.y)) %>% - # mutate(min_yel = ifelse(is.na(min_yel.y), min_yel.x, min_yel.y)) %>% - # mutate(max_yel = ifelse(is.na(max_yel.y), max_yel.x, max_yel.y)) %>% - # mutate(max_red = ifelse(is.na(max_red.y), max_red.x, max_red.y)) %>% - # select(-c("min_red.x", "min_yel.x", "max_yel.x", "max_red.x", - # "min_red.y", "min_yel.y", "max_yel.y", "max_red.y")) return(df) } diff --git a/R/validateScenarios.R b/R/validateScenarios.R index 7b59354..96c3702 100644 --- a/R/validateScenarios.R +++ b/R/validateScenarios.R @@ -1,10 +1,8 @@ #' @importFrom dplyr filter select mutate summarise group_by %>% -# bringing it all together -# keepNA +# performs the validation checks from a config on a scenario dataset #' @export -validateScenarios <- function(scenarioPath, configName, referencePath = NULL, - keepNA = TRUE) { +validateScenarios <- function(scenarioPath, configName, referencePath = NULL) { data <- importScenarioData(scenarioPath) @@ -12,7 +10,7 @@ validateScenarios <- function(scenarioPath, configName, referencePath = NULL, cfg <- configName %>% getConfig() %>% - cleanConfig() %>% + fillInf() %>% expandPeriods(data) %>% expandVariables(data) @@ -22,10 +20,9 @@ validateScenarios <- function(scenarioPath, configName, referencePath = NULL, for (i in 1:nrow(cfg)) { # TODO: hist should only be needed if category "historical" is in config # validation generally should work without hist data - # TODO: optimize performance df_row <- combineData(data, cfg[i, ], ref_data = hist) df <- rbind(df, df_row) - cat(paste0("Combined row ", i, " of ", nrow(cfg), "\n")) + cat(paste0("Combined config row ", i, " of ", nrow(cfg), "\n")) } df <- resolveDuplicates(df) diff --git a/inst/config/validationConfig_NGFS.csv b/inst/config/validationConfig_NGFS.csv index 59cd23b..3ff24a3 100644 --- a/inst/config/validationConfig_NGFS.csv +++ b/inst/config/validationConfig_NGFS.csv @@ -1,7 +1,7 @@ category;metric;critical;variable;model;scenario;region;period;min_red;min_yel;max_yel;max_red;ref_model;ref_scenario;ref_period;source/link to discussion scenario;relative;yes;Emissions|*;REMIND-MAgPIE 3.3-4.7;;;;;;50;100;GCAM 6.0 NGFS;;; scenario;relative;yes;Emissions|*;REMIND-MAgPIE 3.3-4.7;;World;;;;30;50;GCAM 6.0 NGFS;;; -scenario;relative;yes;Final Energy|*;REMIND-MAgPIE 3.3-4.7;;;;;;50;100;GCAM 6.0 NGFS;;; +scenario;relative;yes;Final Energy|*|Chemicals|*;REMIND-MAgPIE 3.3-4.7;;;;;;50;100;GCAM 6.0 NGFS;;; scenario;relative;yes;Final Energy|*;REMIND-MAgPIE 3.3-4.7;;World;;;;30;50;GCAM 6.0 NGFS;;; scenario;relative;yes;Secondary Energy|*;REMIND-MAgPIE 3.3-4.7;;;;;;50;100;GCAM 6.0 NGFS;;; scenario;relative;yes;Secondary Energy|*;REMIND-MAgPIE 3.3-4.7;;World;;;;30;50;GCAM 6.0 NGFS;;; diff --git a/inst/markdown/COMMITTEDvalidation.Rmd b/inst/markdown/COMMITTEDvalidation.Rmd index 04802ee..e75ecb3 100644 --- a/inst/markdown/COMMITTEDvalidation.Rmd +++ b/inst/markdown/COMMITTEDvalidation.Rmd @@ -34,7 +34,7 @@ knitr::opts_chunk$set( ## Import and Prepare Data -```{r} +```{r, message = FALSE} # Data Preparation path <- "C:/Users/pascalwe/Data/vs/" mifs <- c(paste0(path, "REMIND_generic_Bal.mif"), @@ -44,30 +44,7 @@ hmif <- paste0(path, "historical.mif") configName <- "validationConfig_COMMITED.csv" -data <- importScenarioData(scenarioPath) - -hist <- importReferenceData(referencePath) - -cfg <- getConfig(configName) %>% - cleanConfig() - -cfg <- expandPeriods(cfg, data) %>% - expandVariables(data) - -# combine data for each row of the config and bind together -df <- data.frame() -for (i in 1:nrow(cfg)) { - # TODO: hist should only be needed if category "historical" is in config - # validation generally should work without hist data - df_row <- combineData(data, cfg[i, ], ref_data = hist) - df <- rbind(df, df_row) - cat(paste0("Combined row ", i, " of ", nrow(cfg), "\n")) -} - -df <- resolveDuplicates(df) - -df <- evaluateThresholds(df) - +df <- validateScenarios(mifs, config, referencePath = hmif) df <- appendTooltips(df) ``` diff --git a/inst/markdown/NGFSvalidation.Rmd b/inst/markdown/NGFSvalidation.Rmd index 98bc2d8..9e53ca3 100644 --- a/inst/markdown/NGFSvalidation.Rmd +++ b/inst/markdown/NGFSvalidation.Rmd @@ -30,49 +30,28 @@ knitr::opts_chunk$set( warning = params$warning ) -rmarkdown::render('my_markdown_report.Rmd', - output_file = paste('report.', Sys.Date(), - '.pdf', sep='')) +# rmarkdown::render('my_markdown_report.Rmd', +# output_file = paste('report.', Sys.Date(), +# '.pdf', sep='')) ``` ## Import and Prepare Data -```{r} +```{r, message = FALSE} # Data Preparation path <- "C:/Users/pascalwe/Data/NGFS/phase5/" snapshot <- "1709886115166-snapshot-24/snapshot_all_regions.csv" -configName <- "validationConfig_NGFS.csv" +config <- "validationConfig_NGFS.csv" scenarioPath <- paste0(path, snapshot) -data <- importScenarioData(scenarioPath) - -cfg <- getConfig(configName) %>% - cleanConfig() - -cfg <- expandPeriods(cfg, data) %>% - expandVariables(data) - -# combine data for each row of the config and bind together -df <- data.frame() -for (i in 1:nrow(cfg)) { - # TODO: hist should only be needed if category "historical" is in config - # validation generally should work without hist data - df_row <- combineData(data, cfg[i, ]) - df <- rbind(df, df_row) - cat(paste0("Combined row ", i, " of ", nrow(cfg), "\n")) -} - -df <- resolveDuplicates(df) - -df <- evaluateThresholds(df) +df <- validateScenarios(scenarioPath, config) df <- appendTooltips(df) # remove all-green rows (all regions green) df_noGreen <- df[df$check != "green", ] df_noGreennoGrey <- df[df$check %in% c("red", "yellow"), ] -df <- df_noGreennoGrey ``` ## Heatmaps by Category diff --git a/inst/markdown/REMINDvalidation.Rmd b/inst/markdown/REMINDvalidation.Rmd index c66c955..1a94d1d 100644 --- a/inst/markdown/REMINDvalidation.Rmd +++ b/inst/markdown/REMINDvalidation.Rmd @@ -35,7 +35,7 @@ knitr::opts_chunk$set( ## Import and Prepare Data -```{r} +```{r, message = FALSE} # Data Preparation path <- "C:/Users/pascalwe/Data/vs/" mifs <- c(paste0(path, "REMIND_generic_Bal.mif"), diff --git a/tests/testthat/test-expandVariables.R b/tests/testthat/test-expandVariables.R index bdbfac0..71c85e6 100644 --- a/tests/testthat/test-expandVariables.R +++ b/tests/testthat/test-expandVariables.R @@ -1,20 +1,26 @@ test_that("expandVariables works", { varinconfig <- function(data, varentry, expectedvars) { configfile <- file.path(tempdir(), "config.csv") - configheader <- paste0("category;metric;critical;variable;model;scenario;region;period;", - "min_red;min_yel;max_yel;max_red;ref_model;ref_scenario;ref_period;") - configstring <- paste0("scenario;relative;yes;", varentry, ";m;s;r;2005;;;10;100;refmodel1;;;") + configheader <- paste0( + "category;metric;critical;variable;model;scenario;region;period;", + "min_red;min_yel;max_yel;max_red;ref_model;ref_scenario;ref_period;") + + configstring <- paste0("scenario;relative;yes;", + varentry, + ";m;s;r;2005;;;10;100;refmodel1;;;") writeLines(c(configheader, configstring), configfile) cfg <- expandVariables(getConfig(configfile), data) missing <- setdiff(expectedvars, cfg$variable) if (length(missing) > 0) { - warning("For ", varentry, ", the following variables are expected but missing:\n", + warning("For ", varentry, + ", the following variables are expected but missing:\n", paste0(missing, collapse = ", ")) } expect_true(length(missing) == 0) toomuch <- setdiff(cfg$variable, expectedvars) if (length(toomuch) > 0) { - warning("For ", varentry, ", the following variables were not expected but matched:\n", + warning("For ", varentry, + ", the following variables were not expected but matched:\n", paste0(toomuch, collapse = ", ")) } expect_true(length(toomuch) == 0) @@ -27,7 +33,7 @@ test_that("expandVariables works", { depth3CO <- c("Emi|CO|Trans|Gas", "Emi|CO|Trans|Oil") whatever <- c("Emi", "Final Energy", "CO|Trans") data$variable <- c(depth1, depth2CO, depth2BC, depth3CO, whatever) - + # first argument is what is stated in the config, second what should be matched varinconfig(data, "Final Energy", "Final Energy") varinconfig(data, "Em*", "Emi") From 603a6d1a4be957ca2d28a24c59f64cb19c5fa4ac Mon Sep 17 00:00:00 2001 From: Pascal Weigmann Date: Wed, 20 Mar 2024 17:04:50 +0100 Subject: [PATCH 17/18] document and clean up for release --- .Rbuildignore | 4 ++++ .buildlibrary | 9 ++++++--- .github/workflows/check.yaml | 5 +++++ .gitignore | 1 + .lintr | 2 ++ .pre-commit-config.yaml | 2 +- CITATION.cff | 6 ++++-- DESCRIPTION | 11 +++++++---- NAMESPACE | 2 +- R/appendTooltips.R | 5 +++-- R/combineData.R | 2 +- R/importFunctions.R | 7 ++++--- R/outputFunctions.R | 18 ++++++++++-------- R/piamValidation-package.R | 7 +++++++ R/validateScenarios.R | 13 +++++++++---- R/validationHeatmap.R | 20 +++++++++++++++----- R/zzz.R | 3 --- README.md | 15 ++++----------- codecov.yml | 18 ++++++++++++++++++ man/piamValidation-package.Rd | 26 ++++++++++++++++++++++++++ man/validateScenarios.Rd | 19 +++++++++++++++++++ man/validationHeatmap.Rd | 25 +++++++++++++++++++++++++ man/validationPass.Rd | 17 +++++++++++++++++ tests/.lintr | 2 ++ 24 files changed, 191 insertions(+), 48 deletions(-) create mode 100644 .lintr create mode 100644 R/piamValidation-package.R delete mode 100644 R/zzz.R create mode 100644 codecov.yml create mode 100644 man/piamValidation-package.Rd create mode 100644 man/validateScenarios.Rd create mode 100644 man/validationHeatmap.Rd create mode 100644 man/validationPass.Rd create mode 100644 tests/.lintr diff --git a/.Rbuildignore b/.Rbuildignore index 298f71d..a318840 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -5,3 +5,7 @@ ^.*CITATION.cff$ ^.*\.Rproj$ ^\.Rproj\.user$ +^.lintr$ +^tests/.lintr$ +^codecov\.yml$ +^\.github$ diff --git a/.buildlibrary b/.buildlibrary index 3d84775..ed81782 100644 --- a/.buildlibrary +++ b/.buildlibrary @@ -1,7 +1,10 @@ -ValidationKey: '39524' +ValidationKey: '198020' AutocreateReadme: yes AcceptedWarnings: - 'Warning: package ''.*'' was built under R version' - 'Warning: namespace ''.*'' is not available and has been replaced' -AcceptedNotes: ~ -allowLinterWarnings: no +AcceptedNotes: +- no visible binding for global variable +- 'Found the following hidden files and directories:' +allowLinterWarnings: yes +enforceVersionUpdate: no diff --git a/.github/workflows/check.yaml b/.github/workflows/check.yaml index 870f216..46f518a 100644 --- a/.github/workflows/check.yaml +++ b/.github/workflows/check.yaml @@ -49,6 +49,11 @@ jobs: shell: Rscript {0} run: lucode2:::validkey(stopIfInvalid = TRUE) + - name: Verify that lucode2::buildLibrary was successful + if: github.event_name == 'pull_request' + shell: Rscript {0} + run: lucode2:::isVersionUpdated() + - name: Checks shell: Rscript {0} run: | diff --git a/.gitignore b/.gitignore index 4c015d7..bcf5ae9 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ *.RData *.Ruserdata *.html +.Rproj.user diff --git a/.lintr b/.lintr new file mode 100644 index 0000000..c02af8f --- /dev/null +++ b/.lintr @@ -0,0 +1,2 @@ +linters: lucode2::lintrRules() +encoding: "UTF-8" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2f13466..243f46a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ repos: - id: mixed-line-ending - repo: https://github.com/lorenzwalthert/precommit - rev: v0.3.2.9025 + rev: v0.4.0 hooks: - id: parsable-R - id: deps-in-desc diff --git a/CITATION.cff b/CITATION.cff index 5a9e975..c741838 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -2,14 +2,16 @@ cff-version: 1.2.0 message: If you use this software, please cite it using the metadata from this file. type: software title: 'piamValidation: Validation Tools for PIK-PIAM' -version: 0.0.2 -date-released: '2024-02-09' +version: 0.1.0 +date-released: '2024-03-20' abstract: The piamValidation package provides validation tools for the Potsdam Integrated Assessment Modelling environment. authors: - family-names: Weigmann given-names: Pascal email: pascal.weigmann@pik-potsdam.de +- family-names: Richters + given-names: Oliver license: LGPL-3.0 repository-code: https://github.com/pik-piam/piamValidation diff --git a/DESCRIPTION b/DESCRIPTION index 22d83c5..5f29e6f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,10 +1,11 @@ Type: Package Package: piamValidation Title: Validation Tools for PIK-PIAM -Version: 0.0.2 -Date: 2024-02-09 +Version: 0.1.0 +Date: 2024-03-20 Authors@R: - person("Pascal", "Weigmann", , "pascal.weigmann@pik-potsdam.de", role = c("aut", "cre")) + c(person("Pascal", "Weigmann",, "pascal.weigmann@pik-potsdam.de", role = c("aut", "cre")), + person("Oliver", "Richters",, role = "aut")) Description: The piamValidation package provides validation tools for the Potsdam Integrated Assessment Modelling environment. License: LGPL-3 URL: https://github.com/pik-piam/piamValidation @@ -15,9 +16,11 @@ Imports: plotly, quitte (>= 0.3123.0), readxl, + tibble, tidyr Suggests: - testthat + testthat, + remind2 Encoding: UTF-8 RoxygenNote: 7.3.1 Depends: diff --git a/NAMESPACE b/NAMESPACE index 8498520..ff53516 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,4 +14,4 @@ importFrom(ggthemes,theme_tufte) importFrom(plotly,ggplotly) importFrom(readxl,excel_sheets) importFrom(readxl,read_excel) -importFrom(tibble,as_tibble) +importFrom(utils,read.csv2) diff --git a/R/appendTooltips.R b/R/appendTooltips.R index 6297143..4426552 100644 --- a/R/appendTooltips.R +++ b/R/appendTooltips.R @@ -2,12 +2,13 @@ appendTooltips <- function(df) { df$text <- NA + region <- period <- NULL # historic - relative df[df$category == "historic" & df$metric == "relative", ] <- df[df$category == "historic" & df$metric == "relative", ] %>% - mutate(text = paste0(region, "\n", - period, "\n", + mutate(text = paste0(.data$region, "\n", + .data$period, "\n", "Value: ", round(value, 2), "\n", "Ref_Value: ", round(ref_value, 2), "\n", "Ref_Source: ", ref_model, "\n", diff --git a/R/combineData.R b/R/combineData.R index 2060a95..468d3d9 100644 --- a/R/combineData.R +++ b/R/combineData.R @@ -2,7 +2,7 @@ # for one row of cfg: filter and merge relevant scenario data with cfg # results in one df that contains scenario data, reference data and thresholds -combineData <- function(data, cfg_row, ref_data = NULL) { +combineData <- function(data, cfgRow, refData = NULL) { # shorten as it will be used a lot c <- cfg_row diff --git a/R/importFunctions.R b/R/importFunctions.R index bd89264..ffe0d82 100644 --- a/R/importFunctions.R +++ b/R/importFunctions.R @@ -1,6 +1,6 @@ -#' @importFrom dplyr filter select mutate summarise group_by %>% -#' @importFrom tibble as_tibble +#' @importFrom dplyr filter select mutate %>% #' @importFrom readxl read_excel excel_sheets +#' @importFrom utils read.csv2 # scenarioPath: one or multiple paths to .mif or .csv file(s) containing # scenario data in IAM format @@ -41,7 +41,8 @@ getConfig <- function(configName) { ) cfg <- filter(cfg, ! grepl("^#", cfg[[1]])) } else { - cfg <- as_tibble(read.csv2(path, na.strings = "", comment.char = "#")) + cfg <- tibble::as_tibble( + read.csv2(path, na.strings = "", comment.char = "#")) } message("loading config file: ", configName, "\n") return(cfg) diff --git a/R/outputFunctions.R b/R/outputFunctions.R index 8c04e35..5dc5b31 100644 --- a/R/outputFunctions.R +++ b/R/outputFunctions.R @@ -1,6 +1,10 @@ -#' @importFrom dplyr filter select mutate summarise group_by %>% -# yellowFail: if set to TRUE a yellow check result of a critical variable will -# lead to the scenario not passing as validated +#' returns information on whether scenarios passed critical validation checks +#' +#' @param data data.frame as returned from ``validateScenarios()`` +#' @param yellowFail if set to TRUE a yellow check result of a critical +#' variable will lead to the scenario not passing as validated +#' +#' @importFrom dplyr %>% #' @export validationPass <- function(data, yellowFail = FALSE) { @@ -8,11 +12,9 @@ validationPass <- function(data, yellowFail = FALSE) { # see if any critical variables have failed per scenario and model pass <- data %>% - filter(check %in% fail_color, critical == "yes") %>% - group_by(model, scenario) %>% - summarize(pass = n() == 0, - n_fail = n() - ) + dplyr::filter(check %in% fail_color, critical == "yes") %>% + dplyr::group_by(model, scenario) %>% + dplyr::summarize(pass = dplyr::n() == 0, n_fail = dplyr::n()) return(pass) } diff --git a/R/piamValidation-package.R b/R/piamValidation-package.R new file mode 100644 index 0000000..05db633 --- /dev/null +++ b/R/piamValidation-package.R @@ -0,0 +1,7 @@ +#' Validation Tools for PIK-PIAM +#' +#' The piamValidation package provides validation tools for the Potsdam Integrated Assessment Modelling environment. +#' +#' @name piamValidation-package +#' @aliases piamValidation-package piamValidation +"_PACKAGE" diff --git a/R/validateScenarios.R b/R/validateScenarios.R index 96c3702..29f89f3 100644 --- a/R/validateScenarios.R +++ b/R/validateScenarios.R @@ -1,6 +1,11 @@ -#' @importFrom dplyr filter select mutate summarise group_by %>% - -# performs the validation checks from a config on a scenario dataset +#' performs the validation checks from a config on a scenario dataset +#' +#' @param scenarioPath one or multiple path(s) to scenario data in .mif or .csv +#' format +#' @param configName select config from inst/config +#' @param referencePath in case of historic comparison, choose path to ref data +#' +#' @importFrom dplyr filter select mutate group_by %>% #' @export validateScenarios <- function(scenarioPath, configName, referencePath = NULL) { @@ -20,7 +25,7 @@ validateScenarios <- function(scenarioPath, configName, referencePath = NULL) { for (i in 1:nrow(cfg)) { # TODO: hist should only be needed if category "historical" is in config # validation generally should work without hist data - df_row <- combineData(data, cfg[i, ], ref_data = hist) + df_row <- combineData(data, cfg[i, ], refData = hist) df <- rbind(df, df_row) cat(paste0("Combined config row ", i, " of ", nrow(cfg), "\n")) } diff --git a/R/validationHeatmap.R b/R/validationHeatmap.R index 1832909..6007063 100644 --- a/R/validationHeatmap.R +++ b/R/validationHeatmap.R @@ -1,28 +1,38 @@ +#' takes the output of "validateScenarios()" and plots heatmaps per variable +#' +#' @param data data.frame as returned by ``validateScenarios()`` +#' @param var variable to be plotted +#' @param cat choose category from "historical" or "scenario" +#' @param met choose metric from "relative", "difference", "absolute" or +#' "growthrate" +#' @param interactive return plots as interactive plotly plots by default +#' @param compareModels if TRUE, plots compare models instead of scenarios +#' #' @importFrom dplyr filter select mutate %>% #' @import ggplot2 #' @importFrom ggthemes theme_tufte #' @importFrom plotly ggplotly #' @export -# takes the output of "validateScenarios()" and plots heatmaps per variable -validationHeatmap <- function(df, var, cat, met, interactive = T, compareModels = T) { +validationHeatmap <- function(data, var, cat, met, + interactive = T, compareModels = T) { # possible extension: when giving multiple vars, plot as facets in same row # prepare data slice - d <- df %>% + d <- data %>% filter(variable == var, category == cat, metric == met) # warn if no data is found for combination of var, cat and met if (nrow(d) == 0) { - df$cm <- paste(category, metric, sep = "-") + data$cm <- paste(category, metric, sep = "-") warning( paste0( "No data found for variable in this category and metric.\n variable ", var ," is available for the following category-metric - combinations: ", unique(df[df$variable == var, "cm"]) + combinations: ", unique(data[data$variable == var, "cm"]) ) ) } diff --git a/R/zzz.R b/R/zzz.R deleted file mode 100644 index 6f1da16..0000000 --- a/R/zzz.R +++ /dev/null @@ -1,3 +0,0 @@ -.onAttach <- function(libname, pkgname) { - packageStartupMessage("Welcome to piamValidation") -} diff --git a/README.md b/README.md index a902907..08b90be 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Validation Tools for PIK-PIAM -R package **piamValidation**, version **0.0.2** +R package **piamValidation**, version **0.1.0** [![CRAN status](https://www.r-pkg.org/badges/version/piamValidation)](https://cran.r-project.org/package=piamValidation) [![R build status](https://github.com/pik-piam/piamValidation/workflows/check/badge.svg)](https://github.com/pik-piam/piamValidation/actions) [![codecov](https://codecov.io/gh/pik-piam/piamValidation/branch/master/graph/badge.svg)](https://app.codecov.io/gh/pik-piam/piamValidation) @@ -9,13 +9,6 @@ R package **piamValidation**, version **0.0.2** The piamValidation package provides validation tools for the Potsdam Integrated Assessment Modelling environment. -## How to use - -The following categories/metrics require allow the following columns in the cfg: - - - - ## Installation For installation of the most recent package version an additional repository has to be added in R: @@ -45,16 +38,16 @@ In case of questions / problems please contact Pascal Weigmann . +Weigmann P, Richters O (2024). _piamValidation: Validation Tools for PIK-PIAM_. R package version 0.1.0, . A BibTeX entry for LaTeX users is ```latex @Manual{, title = {piamValidation: Validation Tools for PIK-PIAM}, - author = {Pascal Weigmann}, + author = {Pascal Weigmann and Oliver Richters}, year = {2024}, - note = {R package version 0.0.2}, + note = {R package version 0.1.0}, url = {https://github.com/pik-piam/piamValidation}, } ``` diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000..cf6328e --- /dev/null +++ b/codecov.yml @@ -0,0 +1,18 @@ +comment: true +github_checks: + annotations: false +coverage: + precision: 2 + round: down + range: "0...95" + status: + project: + default: + target: auto + threshold: 1% + informational: true + patch: + default: + target: auto + threshold: 1% + informational: true diff --git a/man/piamValidation-package.Rd b/man/piamValidation-package.Rd new file mode 100644 index 0000000..47bb351 --- /dev/null +++ b/man/piamValidation-package.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/piamValidation-package.R +\docType{package} +\name{piamValidation-package} +\alias{piamValidation-package} +\alias{piamValidation} +\title{Validation Tools for PIK-PIAM} +\description{ +The piamValidation package provides validation tools for the Potsdam Integrated Assessment Modelling environment. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://github.com/pik-piam/piamValidation} +} + +} +\author{ +\strong{Maintainer}: Pascal Weigmann \email{pascal.weigmann@pik-potsdam.de} + +Authors: +\itemize{ + \item Oliver Richters +} + +} diff --git a/man/validateScenarios.Rd b/man/validateScenarios.Rd new file mode 100644 index 0000000..c768758 --- /dev/null +++ b/man/validateScenarios.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/validateScenarios.R +\name{validateScenarios} +\alias{validateScenarios} +\title{performs the validation checks from a config on a scenario dataset} +\usage{ +validateScenarios(scenarioPath, configName, referencePath = NULL) +} +\arguments{ +\item{scenarioPath}{one or multiple path(s) to scenario data in .mif or .csv +format} + +\item{configName}{select config from inst/config} + +\item{referencePath}{in case of historic comparison, choose path to ref data} +} +\description{ +performs the validation checks from a config on a scenario dataset +} diff --git a/man/validationHeatmap.Rd b/man/validationHeatmap.Rd new file mode 100644 index 0000000..e74794a --- /dev/null +++ b/man/validationHeatmap.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/validationHeatmap.R +\name{validationHeatmap} +\alias{validationHeatmap} +\title{takes the output of "validateScenarios()" and plots heatmaps per variable} +\usage{ +validationHeatmap(data, var, cat, met, interactive = T, compareModels = T) +} +\arguments{ +\item{data}{data.frame as returned by ``validateScenarios()``} + +\item{var}{variable to be plotted} + +\item{cat}{choose category from "historical" or "scenario"} + +\item{met}{choose metric from "relative", "difference", "absolute" or +"growthrate"} + +\item{interactive}{return plots as interactive plotly plots by default} + +\item{compareModels}{if TRUE, plots compare models instead of scenarios} +} +\description{ +takes the output of "validateScenarios()" and plots heatmaps per variable +} diff --git a/man/validationPass.Rd b/man/validationPass.Rd new file mode 100644 index 0000000..4d969c1 --- /dev/null +++ b/man/validationPass.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/outputFunctions.R +\name{validationPass} +\alias{validationPass} +\title{returns information on whether scenarios passed critical validation checks} +\usage{ +validationPass(data, yellowFail = FALSE) +} +\arguments{ +\item{data}{data.frame as returned from ``validateScenarios()``} + +\item{yellowFail}{if set to TRUE a yellow check result of a critical +variable will lead to the scenario not passing as validated} +} +\description{ +returns information on whether scenarios passed critical validation checks +} diff --git a/tests/.lintr b/tests/.lintr new file mode 100644 index 0000000..165993b --- /dev/null +++ b/tests/.lintr @@ -0,0 +1,2 @@ +linters: lucode2::lintrRules(allowUndesirable = TRUE) +encoding: "UTF-8" From 77f2f296d2951682f2b2cfb064811f7d90385ee7 Mon Sep 17 00:00:00 2001 From: Pascal Weigmann Date: Wed, 20 Mar 2024 17:12:40 +0100 Subject: [PATCH 18/18] add missing dependencies --- DESCRIPTION | 3 +++ 1 file changed, 3 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION index 5f29e6f..f37af56 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,9 +10,12 @@ Description: The piamValidation package provides validation tools for the Potsda License: LGPL-3 URL: https://github.com/pik-piam/piamValidation Imports: + devtools, dplyr (>= 1.1.1), ggplot2, ggthemes, + htmltools, + knitr, plotly, quitte (>= 0.3123.0), readxl,