From b8895ae79712a78ec076606847da254d7233f1b7 Mon Sep 17 00:00:00 2001
From: Pascal Weigmann <pascal.weigmann@pik-potsdam.de>
Date: Thu, 22 Feb 2024 17:45:21 +0100
Subject: [PATCH 01/18] wip

---
 .Rbuildignore                    |   2 +
 .gitignore                       |   4 +
 DESCRIPTION                      |  10 ++
 R/plotHeatmaps.R                 |  93 ++++++++++++++++
 R/sysdata.rda                    | Bin 0 -> 107 bytes
 R/validateScenarios.R            | 181 +++++++++++++++++++++++++++++++
 R/zzz.R                          |   3 +
 README.md                        |   7 ++
 inst/config/validationConfig.csv |  21 ++++
 piamValidation.Rproj             |  20 ++++
 10 files changed, 341 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 R/plotHeatmaps.R
 create mode 100644 R/sysdata.rda
 create mode 100644 R/validateScenarios.R
 create mode 100644 R/zzz.R
 create mode 100644 inst/config/validationConfig.csv
 create mode 100644 piamValidation.Rproj

diff --git a/.Rbuildignore b/.Rbuildignore
index 4c1e99a..298f71d 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -3,3 +3,5 @@
 ^Makefile$
 ^workflow$
 ^.*CITATION.cff$
+^.*\.Rproj$
+^\.Rproj\.user$
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..5b6a065
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+.Rproj.user
+.Rhistory
+.RData
+.Ruserdata
diff --git a/DESCRIPTION b/DESCRIPTION
index 5641a1b..22d83c5 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -8,7 +8,17 @@ Authors@R:
 Description: The piamValidation package provides validation tools for the Potsdam Integrated Assessment Modelling environment.
 License: LGPL-3
 URL: https://github.com/pik-piam/piamValidation
+Imports:
+    dplyr (>= 1.1.1),
+    ggplot2,
+    ggthemes,
+    plotly,
+    quitte (>= 0.3123.0),
+    readxl,
+    tidyr
 Suggests:
     testthat
 Encoding: UTF-8
 RoxygenNote: 7.3.1
+Depends: 
+    R (>= 2.10)
diff --git a/R/plotHeatmaps.R b/R/plotHeatmaps.R
new file mode 100644
index 0000000..0ab5416
--- /dev/null
+++ b/R/plotHeatmaps.R
@@ -0,0 +1,93 @@
+
+
+validationHeatmap <- function(d, var, cat, metric, interactive = T) {
+
+  # possible extension: when giving multiple vars, plot as facets in same row
+
+  d <- filter(d, variable == var)
+
+  d$period <- as.character(d$period)
+
+  # text for tooltip, category 1
+  if (cat == 1) {
+    d <- d %>%
+      mutate(text = paste0(region, "\n",
+                           period, "\n",
+                           "Value: ", round(value,2), "\n",
+                           "Ref_Value: ", round(ref_value,2), "\n",
+                           "Ref_Source: ", ref_model, "\n",
+                           "Thresholds: \n",
+                           ifelse(metric == "relative",
+                                  paste0("Max: ", max_yel*100, "% / ", max_red*100, "%"),
+                                  paste0("Max: ", max_yel, " / ", max_red))
+      )
+      )
+  }
+
+  # text for tooltip, category 2
+  if (cat == 2) {
+    d <- d %>%
+      mutate(text = paste0(region, "\n",
+                           period, "\n",
+                           "Value: ", round(value,2), "\n",
+                           ifelse(metric == "relative",
+                                  paste0("Ref Value: ", round(ref_value,2), "\n",
+                                         "Rel Deviation: ", round(check_value,2)*100, "% \n",
+                                         "Thresholds: \n",
+                                         "Min: ", min_yel*100, "% / ", min_red*100,"% \n",
+                                         "Max: ", max_yel*100, "% / ", max_red*100, "%"),
+                                  # for metric == "absolute"
+                                  paste0("Thresholds: \n",
+                                         "Min: ", min_yel, " / ", min_red,"\n",
+                                         "Max: ", max_yel, " / ", max_red)
+                           )
+      )
+      )
+  }
+
+  if (cat == 3) {
+    d <- d %>%
+      mutate(text = paste0(region, "\n",
+                           period, "\n",
+                           "Avg. growth/yr: ", round(check_value,2)*100, "% \n",
+                           "Absolute value: ", round(value,2), " \n",
+                           "Thresholds: \n",
+                           paste0("Min: ", min_yel*100, "% / ", min_red*100, "% \n",
+                                  "Max: ", max_yel*100, "% / ", max_red*100, "%")
+      )
+      )
+  }
+
+  # classic ggplot, with text in aes
+  p <- ggplot(d, aes(x = region, y = period, fill=check, text=text)) +
+    geom_tile(color="white", linewidth=0.0) +
+    scale_fill_manual(values = colors, breaks = colors) +
+    facet_grid(scenario~.)
+  # make it beautiful
+  # from https://www.r-bloggers.com/2016/02/making-faceted-heatmaps-with-ggplot2/
+  p <- p + labs(x=NULL, y=NULL, title=paste0(var, " [", d$unit[1], "]"))
+  p <- p + theme_tufte(base_family="Helvetica")
+  p <- p + theme(axis.ticks=element_blank())
+  p <- p + theme(axis.text=element_text(size=7))
+  p <- p + coord_equal()
+  p <- p + theme(legend.position = "none")
+
+
+  # p + theme(panel.spacing = unit(2, "lines"))
+
+  # not great, only works with "World" being the first region
+  if("World" %in% d$region) {
+    p <- p + geom_vline(xintercept = 1.5, linewidth = 0.8, color = "white")
+  }
+  fig <- ggplotly(p, tooltip="text")
+
+  # improve plotly layout, kinda works but very manual
+  #fig <- fig %>% subplot(heights = 0.3) %>%
+  #   layout(title = list(y=0.64))
+  if (interactive) {
+    return(fig)
+  } else {
+    return(p)
+  }
+
+}
diff --git a/R/sysdata.rda b/R/sysdata.rda
new file mode 100644
index 0000000000000000000000000000000000000000..26ce3b89e068b8251b177ee456e6eadf44977611
GIT binary patch
literal 107
zcmV-x0F?hiT4*^jL0KkKS#gl0Apigye}McjL;wH*5C9AT5I`@djQ{`vAOKV~O&E*>
z(WXPupwLkLP&5xn05LR}UrHb~g=+$YQ0yH-XC_UbsDH4F=e-l>w+%rMG8oZUz)78Q
NF64@Ep&;WSNJ8#sB|QKD

literal 0
HcmV?d00001

diff --git a/R/validateScenarios.R b/R/validateScenarios.R
new file mode 100644
index 0000000..7fd1416
--- /dev/null
+++ b/R/validateScenarios.R
@@ -0,0 +1,181 @@
+# import data
+# supported formats:
+# - .mif
+# -
+
+
+# dev helpers
+#configName <- "validationConfig.csv"
+#usethis::use_data(configName, internal = T)
+
+importData <- function() {
+  # for dev purposes
+  if (file.exists("mif.rds")) {
+    data <- readRDS("mif.rds")
+  } else {
+    path <- "C:/Users/pascalwe/Data/vs/"
+    data <- remind2::deletePlus(quitte::read.quitte(
+      c(paste0(path, "REMIND_generic_Bal.mif"),
+        paste0(path, "REMIND_generic_NPi.mif"))
+        )
+      )
+    saveRDS(data, "mif.rds")
+  }
+
+  # change ordering of factors, put last element ("World") first
+  # TODO: will not work if World is not last
+  new_order <- c(tail(levels(data$region), 1), head(levels(data$region), -1))
+  data$region <- factor(data$region, levels = new_order)
+
+  return(data)
+}
+
+importReferenceData <- function() {
+  # for dev purposes
+  if (file.exists("hist.rds")) {
+    hist <- readRDS("hist.rds")
+  } else {
+    hist <- quitte::read.quitte(paste0(path, "historical.mif"))
+    hist <- filter(hist, period >= 1990)
+    saveRDS(hist, "hist.rds")
+  }
+
+  # remove all historical data before 1990
+  hist <- filter(hist, period >= 1990)
+
+  return(hist)
+}
+
+# required columns:
+# - category: "historic" or "scenario"
+# - can be empty, contain one element or multiple:
+#   - model, scenario, region, period
+importConfig <- function(configName) {
+  path <- system.file(paste0("config/", configName) , package = "piamValidation")
+  cfg <- read.csv2(path, na.strings = "")
+  return(cfg)
+}
+
+
+cleanConfig <- function(cfg) {
+  # fill empty threshold columns with Infinity for easier evaluation
+  cfg <- cfg %>%
+    mutate(min_red = as.numeric(ifelse(is.na(min_red), -Inf, min_red)),
+           min_yel = as.numeric(ifelse(is.na(min_yel), -Inf, min_yel)),
+           max_yel = as.numeric(ifelse(is.na(max_yel),  Inf, max_yel)),
+           max_red = as.numeric(ifelse(is.na(max_red),  Inf, max_red))
+           )
+
+  # insert tests here to check if there are forbidden fields in config
+  return(cfg)
+}
+
+
+combineData <- function(data, hist, cfg) {
+
+  # dimensions of data
+  by <- c("variable", "region", "period", "scenario")
+
+  # full dimensions and important slices
+  all_reg <- unique(data$region)
+  all_per <- unique(data$period)  # not a factor, convert?
+  all_per <- all_per[all_per <= 2100]
+  hist_per <- c(2005, 2010, 2015, 2020)
+  all_sce <- unique(data$scenario)
+
+  # revert the sorting of factors for better plots
+  # data$region <- factor(data$region, levels = rev(levels(data$region)))
+
+  # define colors here or later
+  colors <- c(green = "#008450", yellow = "#EFB700", red = "#B81D13", grey = "#808080")
+
+  ### here, category specifics start
+
+  # category 1 data objects
+  d1 <- data.frame()
+  c1 <- cfg[cfg$category == 1, ]
+
+  # assemble data by row of cfg
+  for (i in 1:nrow(c1)) {
+    # create filters
+    # check whether regions, periods, scenarios are specified
+    # TODO: extend to support excluding elements, multiple elements
+    reg <- if (is.na(c1[i, "region"]))   all_reg else strsplit(c1[i, "region"], split = ", |,")[[1]]
+    per <- if (is.na(c1[i, "period"]))  hist_per else strsplit(as.character(c1[i, "period"]), split = ", |,")[[1]]
+    sce <- if (is.na(c1[i, "scenario"])) all_sce else strsplit(c1[i, "scenario"], split = ", |, ")[[1]]
+
+    # filter data for each row in cfg
+    # REMIND data for line i
+    d <- data %>%
+      filter(variable == c1[i, "variable"],
+             region %in% reg,
+             period %in% per,
+             scenario %in% sce)
+    # here?: skip if empty
+
+    # historical data for line i
+    h <- hist %>%
+      filter(variable == c1[i, "variable"],
+             region %in% reg,
+             period %in% per) %>%
+      mutate(ref_value = value, ref_model = model) %>%
+      select(-c("scenario", "unit", "value", "model"))
+    # remove NA here?
+
+    # in case one or more sources are specified, filter for them
+    if (!is.na(c1[i, "ref_model"])) {
+      h <- filter(h, ref_model %in% strsplit(c1[i, "ref_model"], split = ", |,")[[1]])
+    }
+
+    # in case of multiple sources, calculate mean (using all available sources)
+    if (length(unique(h$ref_model)) > 1) {
+      h_mean <- group_by(h, period, region) %>%
+        summarise(ref_value = mean(ref_value, na.rm = TRUE))
+      h <- mutate(h_mean, variable = c1[i, "variable"], ref_model = "multiple")
+    }
+
+    # Merge REMIND, historical and config data
+    # (works as long as we only have one set of thresholds per cfg row)
+    d <- merge(d, h) %>%
+      mutate(min_red = c1[i, ]$min_red,
+             min_yel = c1[i, ]$min_yel,
+             max_yel = c1[i, ]$max_yel,
+             max_red = c1[i, ]$max_red,
+             metric  = c1[i, ]$metric)
+
+    d1 <- rbind(d1, d)
+  }
+
+  return(df)
+}
+
+
+evaluateThreshold <- function(df) {
+
+  return(df)
+}
+
+
+
+# bringing it all together
+validateScenario <- function() {
+
+  data <- importData()
+
+  hist <- importReferenceData()
+
+  cfg <- importConfig(configName)
+
+  cfg <- cleanConfig()
+
+  df <- combineData(data, cfg)
+
+  evaluateThreshold(df)
+
+  df <- createTooltip(df)
+
+  return(df)
+}
+
+
+
diff --git a/R/zzz.R b/R/zzz.R
new file mode 100644
index 0000000..6f1da16
--- /dev/null
+++ b/R/zzz.R
@@ -0,0 +1,3 @@
+.onAttach <- function(libname, pkgname) {
+  packageStartupMessage("Welcome to piamValidation")
+}
diff --git a/README.md b/README.md
index 1585caf..a902907 100644
--- a/README.md
+++ b/README.md
@@ -9,6 +9,13 @@ R package **piamValidation**, version **0.0.2**
 The piamValidation package provides validation tools for the Potsdam Integrated Assessment Modelling environment.
 
 
+## How to use
+
+The following categories/metrics require allow the following columns in the cfg:
+
+
+
+
 ## Installation
 
 For installation of the most recent package version an additional repository has to be added in R:
diff --git a/inst/config/validationConfig.csv b/inst/config/validationConfig.csv
new file mode 100644
index 0000000..36eb4e3
--- /dev/null
+++ b/inst/config/validationConfig.csv
@@ -0,0 +1,21 @@
+category;model;scenario;variable;region;period;min_red;min_yel;max_yel;max_red;metric;ref_period;ref_model;ref_scenario
+1;;;Emi|CO2|Energy;World;;;;0.05;0.1;relative;;EDGAR6;
+1;;;Emi|CO2|Energy;DEU;;;;0.1;0.2;relative;;EDGAR6;
+1;;;PE|Coal;;;;;;0.1;relative;;BP, IEA;
+1;;;PE|Coal;World;;;;;0.2;relative;;BP, IEA;
+1;;;PE|Oil;;;;;;2;absolute;;;
+1;;;PE|Oil;World;;;;;5;absolute;;;
+1;;Bal;FE|Electricity;;;;;;0.1;relative;;;
+1;;Bal;FE|Electricity;World;;;;;0.2;relative;;;
+2;;;Price|Carbon;;2030;;;300;600;absolute;;;
+2;;;Price|Carbon;;2050;;;;1000;absolute;;;
+2;;;Price|Carbon;DEU;2030;50;100;400;800;absolute;;;
+2;;;Price|Carbon;DEU;2050;100;150;600;1200;absolute;;;
+2;;;Price|Carbon;LAM, MEA, SSA, OAS, IND;2030, 2050;;;;200;absolute;;;
+2;;Bal;Emi|GHG;DEU, USA, JPN, REF, CHA, CAZ, EUR;2050;-2000;-1000;0;500;absolute;;;
+2;;Bal;Emi|GHG;LAM, MEA, SSA, OAS, IND;2060;-2000;-1000;0;500;absolute;;;
+2;;Bal;Temperature|Global Mean;;2100;;;1.5;1.55;absolute;;;
+2;;;Emi|CO2;;2030;-0.5;-0.3;;;relative;2020;;
+2;;;FE;;2030, 2040;-0.4;-0.2;;;relative;2020;;
+2;;Bal;Price|Secondary Energy|Electricity;;;5.56;;;;absolute;;;
+3;;;Cap|Electricity|Solar|PV;;;;;0.5;1;relative;;;
diff --git a/piamValidation.Rproj b/piamValidation.Rproj
new file mode 100644
index 0000000..497f8bf
--- /dev/null
+++ b/piamValidation.Rproj
@@ -0,0 +1,20 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
+
+AutoAppendNewline: Yes
+StripTrailingWhitespace: Yes
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source

From 5cd7780b800a3a16beb66a098a762b14516db4d5 Mon Sep 17 00:00:00 2001
From: Pascal Weigmann <pascal.weigmann@pik-potsdam.de>
Date: Fri, 23 Feb 2024 16:56:47 +0100
Subject: [PATCH 02/18] combineData, resolveDuplicates, wip

---
 NAMESPACE                        |   6 ++
 R/combineData.R                  | 121 +++++++++++++++++++++++++++++++
 R/piamValidation-package.R       |   6 --
 R/plotHeatmaps.R                 |  26 ++++---
 R/resolveDuplicates.R            |  40 ++++++++++
 R/validateScenarios.R            | 102 +++++---------------------
 inst/config/validationConfig.csv |  43 +++++------
 man/piamValidation-package.Rd    |  22 ------
 8 files changed, 223 insertions(+), 143 deletions(-)
 create mode 100644 R/combineData.R
 delete mode 100644 R/piamValidation-package.R
 create mode 100644 R/resolveDuplicates.R
 delete mode 100644 man/piamValidation-package.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 6ae9268..00dd4dd 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,2 +1,8 @@
 # Generated by roxygen2: do not edit by hand
 
+importFrom(dplyr,"%>%")
+importFrom(dplyr,filter)
+importFrom(dplyr,group_by)
+importFrom(dplyr,mutate)
+importFrom(dplyr,select)
+importFrom(dplyr,summarise)
diff --git a/R/combineData.R b/R/combineData.R
new file mode 100644
index 0000000..e4419b4
--- /dev/null
+++ b/R/combineData.R
@@ -0,0 +1,121 @@
+#' @importFrom dplyr filter select mutate summarise group_by %>%
+
+# for one row of cfg: filter and merge relevant scenario data with cfg
+combineData <- function(data, hist, cfg_row) {
+
+  # shorten as it will be used a lot
+  c <- cfg_row
+
+  # full dimensions and important slices
+  all_reg <- unique(data$region)
+  all_per <- unique(data$period)  # not a factor, convert?
+  all_per <- all_per[all_per <= 2100]
+  hist_per <- c(2005, 2010, 2015, 2020)
+  all_sce <- unique(data$scenario)
+
+  # create filters
+  # check whether regions, periods, scenarios are specified
+  reg <- if (is.na(c$region))   all_reg else
+    strsplit(c$region, split = ", |,")[[1]]
+  sce <- if (is.na(c$scenario)) all_sce else
+    strsplit(c$scenario, split = ", |, ")[[1]]
+
+  # empty "period" field means different years for historic or scenario category
+  if (c$category == "historic") {
+    per <- if (is.na(c$period))  hist_per else
+      strsplit(as.character(c$period), split = ", |,")[[1]]
+  } else {
+    per <- if (is.na(c$period))  all_per else
+      strsplit(as.character(c$period), split = ", |,")[[1]]
+  }
+
+  # filter scenario data for row in cfg
+  d <- data %>%
+    filter(variable == c$variable,
+           region %in% reg,
+           period %in% per,
+           scenario %in% sce)
+
+  # attach respective cfg information to data slice
+  d <- d %>%
+    mutate(min_red = c$min_red,  # could be set to NA as it is not used
+           min_yel = c$min_yel,  # could be set to NA as it is not used
+           max_yel = c$max_yel,
+           max_red = c$max_red,
+           category = c$category,
+           metric   = c$metric,
+           critical = c$critical)
+
+  # depending on category: filter and attach reference values if they are needed
+  if (c$category == "historic") {
+    # historic data for relevant variable and dimensions (all sources)
+    h <- hist %>%
+      filter(variable == c$variable,
+             region %in% reg,
+             period %in% per) %>%
+      mutate(ref_value = value, ref_model = model) %>%
+      select(-c("scenario", "unit", "value", "model"))
+
+    # TODO: insert test here if ref_model exists and has data to compare to
+
+    # in case one or more sources are specified, filter for them
+    if (!is.na(c$ref_model)) {
+      h <- filter(
+        h, ref_model %in% strsplit(c$ref_model, split = ", |,")[[1]]
+      )
+    }
+
+    # in case of multiple sources, calculate mean (using all available sources)
+    if (length(unique(h$ref_model)) > 1) {
+      h_mean <- group_by(h, period, region) %>%
+        summarise(ref_value = mean(ref_value, na.rm = TRUE))
+      h <- mutate(h_mean, variable = c$variable, ref_model = "multiple")
+    }
+
+    # merge with historical data adds columns ref_value and ref_model
+    df <- merge(d, h)
+
+    # add columns which are not used in this category, fill NA
+    df <- df %>% mutate(ref_period = NA, ref_scenario = NA)
+
+    # filter and attach reference values if they are needed; scenario data
+  } else if (c$category == "scenario") {
+
+    # no reference values needed for these metrics, fill NA
+    if (c$metric %in% c("absolute", "growthrate")) {
+      df <- d %>%
+        mutate(ref_value = NA,
+               ref_model = NA,
+               ref_period = NA,
+               ref_scenario = NA)
+
+      # get reference values for these metrics
+    } else if (c$metric %in% c("relative", "difference")) {
+      # TODO: only works for ref_period, add support for model/scenario as ref
+      # TODO: support choosing ref_period AND model/scenario
+      ref <- data %>%
+        filter(variable == c$variable,
+               region %in% reg,
+               period == c$ref_period,
+               scenario %in% sce) %>%
+        mutate(ref_value = value, ref_period = period) %>%
+        select(-c(period, value))
+      df <- merge(d, ref)
+
+      # add columns which are not used in this category, fill NA
+      df <- df %>%
+        mutate(ref_model = NA,
+               ref_scenario = NA)
+
+    } else {
+      # TODO: have this warning here or earlier when cleaning config?
+      warning("'metric' for category 'scenario' must be either 'absolute',
+              'relative', 'difference' or 'growthrate'.")
+    }
+
+  } else {
+    warning("'category' must be either 'historic' or 'scenario'.")
+  }
+
+  return(df)
+}
diff --git a/R/piamValidation-package.R b/R/piamValidation-package.R
deleted file mode 100644
index a65cf64..0000000
--- a/R/piamValidation-package.R
+++ /dev/null
@@ -1,6 +0,0 @@
-#' @keywords internal
-"_PACKAGE"
-
-## usethis namespace: start
-## usethis namespace: end
-NULL
diff --git a/R/plotHeatmaps.R b/R/plotHeatmaps.R
index 0ab5416..6e22c2a 100644
--- a/R/plotHeatmaps.R
+++ b/R/plotHeatmaps.R
@@ -1,13 +1,5 @@
-
-
-validationHeatmap <- function(d, var, cat, metric, interactive = T) {
-
-  # possible extension: when giving multiple vars, plot as facets in same row
-
-  d <- filter(d, variable == var)
-
-  d$period <- as.character(d$period)
-
+# construct tooltips for interactive plots
+appendTooltip <- function(df) {
   # text for tooltip, category 1
   if (cat == 1) {
     d <- d %>%
@@ -58,6 +50,20 @@ validationHeatmap <- function(d, var, cat, metric, interactive = T) {
       )
   }
 
+  return(df)
+}
+
+
+validationHeatmap <- function(d, var, cat, metric, interactive = T) {
+
+  # possible extension: when giving multiple vars, plot as facets in same row
+
+  d <- filter(d, variable == var)
+
+  d$period <- as.character(d$period)
+
+
+
   # classic ggplot, with text in aes
   p <- ggplot(d, aes(x = region, y = period, fill=check, text=text)) +
     geom_tile(color="white", linewidth=0.0) +
diff --git a/R/resolveDuplicates.R b/R/resolveDuplicates.R
new file mode 100644
index 0000000..260452e
--- /dev/null
+++ b/R/resolveDuplicates.R
@@ -0,0 +1,40 @@
+#' @importFrom dplyr filter select mutate summarise group_by %>%
+
+resolveDuplicates <- function(df) {
+
+  # TODO: needs to be double checked for edge-cases
+  #       might be easier or safer to handle categories separately
+
+  # check for duplicates
+  # these are the duplicates that are from the lower rows and should be kept
+  duplicates <- df[duplicated(df[c("model", "scenario", "variable",
+                                       "region", "period", "category", "metric")]), ]
+
+  # here all instances of the data that was duplicated is removed
+  no_dupl <- dplyr::anti_join(df, duplicates_all, by = c("model", "scenario", "variable",
+                                                         "region", "period", "category", "metric"))
+
+  df <- rbind(no_dupl, duplicates)
+
+  # different approach: allows partial overwriting
+  # chosen to not use this anymore, less robust
+
+  # these are missing the duplicated entries that were in the lower rows
+  # no_dupl <- dplyr::anti_join(df, duplicates_all)
+
+  # overwrite "earlier" values using "later" ones
+  # BEWARE: requires specific country to come after "all regions"
+  # d1 <- merge(no_dupl, duplicates_all, by=c("variable", "region", "period",
+  #                                           "scenario", "unit", "model", "value",
+  #                                           "ref_model", "ref_value","ref_scenario",
+  #                                           "ref_period", "category", "metric", "critical"),
+  #             all.x = T) %>%
+  #   mutate(min_red = ifelse(is.na(min_red.y), min_red.x, min_red.y)) %>%
+  #   mutate(min_yel = ifelse(is.na(min_yel.y), min_yel.x, min_yel.y)) %>%
+  #   mutate(max_yel = ifelse(is.na(max_yel.y), max_yel.x, max_yel.y)) %>%
+  #   mutate(max_red = ifelse(is.na(max_red.y), max_red.x, max_red.y)) %>%
+  #   select(-c("min_red.x", "min_yel.x", "max_yel.x", "max_red.x",
+  #             "min_red.y", "min_yel.y", "max_yel.y", "max_red.y"))
+
+  return(df)
+}
diff --git a/R/validateScenarios.R b/R/validateScenarios.R
index 7fd1416..14bc6eb 100644
--- a/R/validateScenarios.R
+++ b/R/validateScenarios.R
@@ -1,3 +1,5 @@
+#' @importFrom dplyr filter select mutate summarise group_by %>%
+
 # import data
 # supported formats:
 # - .mif
@@ -8,6 +10,8 @@
 #configName <- "validationConfig.csv"
 #usethis::use_data(configName, internal = T)
 
+
+
 importData <- function() {
   # for dev purposes
   if (file.exists("mif.rds")) {
@@ -50,7 +54,8 @@ importReferenceData <- function() {
 # - category: "historic" or "scenario"
 # - can be empty, contain one element or multiple:
 #   - model, scenario, region, period
-importConfig <- function(configName) {
+# ...
+loadConfig <- function(configName) {
   path <- system.file(paste0("config/", configName) , package = "piamValidation")
   cfg <- read.csv2(path, na.strings = "")
   return(cfg)
@@ -71,85 +76,6 @@ cleanConfig <- function(cfg) {
 }
 
 
-combineData <- function(data, hist, cfg) {
-
-  # dimensions of data
-  by <- c("variable", "region", "period", "scenario")
-
-  # full dimensions and important slices
-  all_reg <- unique(data$region)
-  all_per <- unique(data$period)  # not a factor, convert?
-  all_per <- all_per[all_per <= 2100]
-  hist_per <- c(2005, 2010, 2015, 2020)
-  all_sce <- unique(data$scenario)
-
-  # revert the sorting of factors for better plots
-  # data$region <- factor(data$region, levels = rev(levels(data$region)))
-
-  # define colors here or later
-  colors <- c(green = "#008450", yellow = "#EFB700", red = "#B81D13", grey = "#808080")
-
-  ### here, category specifics start
-
-  # category 1 data objects
-  d1 <- data.frame()
-  c1 <- cfg[cfg$category == 1, ]
-
-  # assemble data by row of cfg
-  for (i in 1:nrow(c1)) {
-    # create filters
-    # check whether regions, periods, scenarios are specified
-    # TODO: extend to support excluding elements, multiple elements
-    reg <- if (is.na(c1[i, "region"]))   all_reg else strsplit(c1[i, "region"], split = ", |,")[[1]]
-    per <- if (is.na(c1[i, "period"]))  hist_per else strsplit(as.character(c1[i, "period"]), split = ", |,")[[1]]
-    sce <- if (is.na(c1[i, "scenario"])) all_sce else strsplit(c1[i, "scenario"], split = ", |, ")[[1]]
-
-    # filter data for each row in cfg
-    # REMIND data for line i
-    d <- data %>%
-      filter(variable == c1[i, "variable"],
-             region %in% reg,
-             period %in% per,
-             scenario %in% sce)
-    # here?: skip if empty
-
-    # historical data for line i
-    h <- hist %>%
-      filter(variable == c1[i, "variable"],
-             region %in% reg,
-             period %in% per) %>%
-      mutate(ref_value = value, ref_model = model) %>%
-      select(-c("scenario", "unit", "value", "model"))
-    # remove NA here?
-
-    # in case one or more sources are specified, filter for them
-    if (!is.na(c1[i, "ref_model"])) {
-      h <- filter(h, ref_model %in% strsplit(c1[i, "ref_model"], split = ", |,")[[1]])
-    }
-
-    # in case of multiple sources, calculate mean (using all available sources)
-    if (length(unique(h$ref_model)) > 1) {
-      h_mean <- group_by(h, period, region) %>%
-        summarise(ref_value = mean(ref_value, na.rm = TRUE))
-      h <- mutate(h_mean, variable = c1[i, "variable"], ref_model = "multiple")
-    }
-
-    # Merge REMIND, historical and config data
-    # (works as long as we only have one set of thresholds per cfg row)
-    d <- merge(d, h) %>%
-      mutate(min_red = c1[i, ]$min_red,
-             min_yel = c1[i, ]$min_yel,
-             max_yel = c1[i, ]$max_yel,
-             max_red = c1[i, ]$max_red,
-             metric  = c1[i, ]$metric)
-
-    d1 <- rbind(d1, d)
-  }
-
-  return(df)
-}
-
-
 evaluateThreshold <- function(df) {
 
   return(df)
@@ -158,19 +84,27 @@ evaluateThreshold <- function(df) {
 
 
 # bringing it all together
+# TODO: introduce file path and name as argument
 validateScenario <- function() {
 
   data <- importData()
 
   hist <- importReferenceData()
 
-  cfg <- importConfig(configName)
+  cfg <- loadConfig(configName)
 
-  cfg <- cleanConfig()
+  cfg <- cleanConfig(cfg)
 
-  df <- combineData(data, cfg)
 
-  evaluateThreshold(df)
+  # combine data for each row of the config and bind together
+  df <- data.frame()
+  for (i in 1:nrow(cfg)) {
+    # TODO: hist should be optional, validation should work without "historical"
+    d <- combineData(data, hist, cfg[i, ])
+    df <- rbind(df, d)
+  }
+
+  df <- evaluateThreshold(df)
 
   df <- createTooltip(df)
 
diff --git a/inst/config/validationConfig.csv b/inst/config/validationConfig.csv
index 36eb4e3..e88860f 100644
--- a/inst/config/validationConfig.csv
+++ b/inst/config/validationConfig.csv
@@ -1,21 +1,22 @@
-category;model;scenario;variable;region;period;min_red;min_yel;max_yel;max_red;metric;ref_period;ref_model;ref_scenario
-1;;;Emi|CO2|Energy;World;;;;0.05;0.1;relative;;EDGAR6;
-1;;;Emi|CO2|Energy;DEU;;;;0.1;0.2;relative;;EDGAR6;
-1;;;PE|Coal;;;;;;0.1;relative;;BP, IEA;
-1;;;PE|Coal;World;;;;;0.2;relative;;BP, IEA;
-1;;;PE|Oil;;;;;;2;absolute;;;
-1;;;PE|Oil;World;;;;;5;absolute;;;
-1;;Bal;FE|Electricity;;;;;;0.1;relative;;;
-1;;Bal;FE|Electricity;World;;;;;0.2;relative;;;
-2;;;Price|Carbon;;2030;;;300;600;absolute;;;
-2;;;Price|Carbon;;2050;;;;1000;absolute;;;
-2;;;Price|Carbon;DEU;2030;50;100;400;800;absolute;;;
-2;;;Price|Carbon;DEU;2050;100;150;600;1200;absolute;;;
-2;;;Price|Carbon;LAM, MEA, SSA, OAS, IND;2030, 2050;;;;200;absolute;;;
-2;;Bal;Emi|GHG;DEU, USA, JPN, REF, CHA, CAZ, EUR;2050;-2000;-1000;0;500;absolute;;;
-2;;Bal;Emi|GHG;LAM, MEA, SSA, OAS, IND;2060;-2000;-1000;0;500;absolute;;;
-2;;Bal;Temperature|Global Mean;;2100;;;1.5;1.55;absolute;;;
-2;;;Emi|CO2;;2030;-0.5;-0.3;;;relative;2020;;
-2;;;FE;;2030, 2040;-0.4;-0.2;;;relative;2020;;
-2;;Bal;Price|Secondary Energy|Electricity;;;5.56;;;;absolute;;;
-3;;;Cap|Electricity|Solar|PV;;;;;0.5;1;relative;;;
+category;model;scenario;variable;region;period;min_red;min_yel;max_yel;max_red;metric;critical;ref_period;ref_model;ref_scenario
+historic;;;Emi|CO2|Energy;World;;;;0.05;0.1;relative;yes;;EDGAR6;
+historic;;;Emi|CO2|Energy;DEU;;;;0.1;0.2;relative;yes;;EDGAR6;
+historic;;;PE|Coal;;;;;;0.1;relative;yes;;BP, IEA;
+historic;;;PE|Coal;World;;;;;0.2;relative;yes;;BP, IEA;
+historic;;;PE|Oil;;;;;;2;difference;yes;;;
+historic;;;PE|Oil;World;;;;;5;difference;yes;;;
+historic;;Bal;FE|Electricity;;;;;;0.1;relative;yes;;;
+historic;;Bal;FE|Electricity;World;;;;;0.2;relative;yes;;;
+scenario;;;Price|Carbon;;2030;;;300;600;absolute;yes;;;
+scenario;;;Price|Carbon;;2050;;;;1000;absolute;yes;;;
+scenario;;;Price|Carbon;DEU;2030;50;100;400;800;absolute;yes;;;
+scenario;;;Price|Carbon;DEU;2050;100;150;600;1200;absolute;yes;;;
+scenario;;;Price|Carbon;LAM, MEA, SSA, OAS, IND;2030, 2050;;;;200;absolute;yes;;;
+scenario;;Bal;Emi|GHG;DEU, USA, JPN, REF, CHA, CAZ, EUR;2050;-2000;-1000;0;500;absolute;yes;;;
+scenario;;Bal;Emi|GHG;LAM, MEA, SSA, OAS, IND;2060;-2000;-1000;0;500;absolute;yes;;;
+scenario;;Bal;Temperature|Global Mean;;2100;;;1.5;1.55;absolute;yes;;;
+scenario;;;Emi|CO2;;2030;-0.5;-0.3;;;relative;yes;2020;;
+scenario;;;Emi|CO2;;2030;-0.6;-0.4;;;relative;yes;2015;;
+scenario;;;FE;;2030, 2040;-0.4;-0.2;;;relative;yes;2020;;
+scenario;;Bal;Price|Secondary Energy|Electricity;;;5.56;;;;absolute;yes;;;
+scenario;;;Cap|Electricity|Solar|PV;;;;;0.5;1;relative;yes;;;
diff --git a/man/piamValidation-package.Rd b/man/piamValidation-package.Rd
deleted file mode 100644
index 962bd73..0000000
--- a/man/piamValidation-package.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/piamValidation-package.R
-\docType{package}
-\name{piamValidation-package}
-\alias{piamValidation}
-\alias{piamValidation-package}
-\title{piamValidation: Validation Tools for PIK-PIAM}
-\description{
-The piamValidation package provides validation tools for the Potsdam Integrated Assessment Modelling environment.
-}
-\seealso{
-Useful links:
-\itemize{
-  \item \url{https://github.com/pik-piam/piamValidation}
-}
-
-}
-\author{
-\strong{Maintainer}: Pascal Weigmann \email{pascal.weigmann@pik-potsdam.de}
-
-}
-\keyword{internal}

From 8df0e19ea1df751e639ceec7d36a7f38615cf443 Mon Sep 17 00:00:00 2001
From: Pascal Weigmann <pascal.weigmann@pik-potsdam.de>
Date: Tue, 27 Feb 2024 17:56:08 +0100
Subject: [PATCH 03/18] vs wip

---
 NAMESPACE                        |  3 +
 R/appendTooltips.R               | 86 +++++++++++++++++++++++++++++
 R/combineData.R                  |  1 +
 R/evaluateThresholds.R           | 94 ++++++++++++++++++++++++++++++++
 R/plotHeatmaps.R                 | 87 ++++++++++-------------------
 R/resolveDuplicates.R            |  3 +-
 R/validateScenarios.R            | 15 +++--
 inst/config/validationConfig.csv |  2 +-
 8 files changed, 225 insertions(+), 66 deletions(-)
 create mode 100644 R/appendTooltips.R
 create mode 100644 R/evaluateThresholds.R

diff --git a/NAMESPACE b/NAMESPACE
index 00dd4dd..502b1b0 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,8 +1,11 @@
 # Generated by roxygen2: do not edit by hand
 
+import(ggplot2)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,filter)
 importFrom(dplyr,group_by)
 importFrom(dplyr,mutate)
 importFrom(dplyr,select)
 importFrom(dplyr,summarise)
+importFrom(ggthemes,theme_tufte)
+importFrom(plotly,ggplotly)
diff --git a/R/appendTooltips.R b/R/appendTooltips.R
new file mode 100644
index 0000000..9470b2a
--- /dev/null
+++ b/R/appendTooltips.R
@@ -0,0 +1,86 @@
+# construct tooltips for interactive plots
+appendTooltips <- function(df) {
+
+  df$text <- NA
+
+  # historic - relative
+  df[df$category == "historic" & df$metric == "relative", ] <-
+    df[df$category == "historic" & df$metric == "relative", ] %>%
+    mutate(text = paste0(region, "\n",
+                         period, "\n",
+                         "Value: ", round(value,2), "\n",
+                         "Ref_Value: ", round(ref_value,2), "\n",
+                         "Ref_Source: ", ref_model, "\n",
+                         "Thresholds: \n",
+                         paste0("Max: ", max_yel*100, "% / ", max_red*100, "%")
+    )
+    )
+
+  # historic - difference
+  df[df$category == "historic" & df$metric == "difference", ] <-
+    df[df$category == "historic" & df$metric == "difference", ] %>%
+    mutate(text = paste0(region, "\n",
+                         period, "\n",
+                         "Value: ", round(value,2), "\n",
+                         "Ref_Value: ", round(ref_value,2), "\n",
+                         "Ref_Source: ", ref_model, "\n",
+                         "Thresholds: \n",
+                         "Max: ", max_yel, " / ", max_red
+    )
+    )
+
+  # scenario - relative
+  df[df$category == "scenario" & df$metric == "relative", ] <-
+    df[df$category == "scenario" & df$metric == "relative", ] %>%
+    mutate(text = paste0(region, "\n",
+                         period, "\n",
+                         "Value: ", round(value,2), "\n",
+                         "Ref Value: ", round(ref_value,2), "\n",
+                         "Rel Deviation: ", round(check_value,2)*100, "% \n",
+                         "Thresholds: \n",
+                         "Min: ", min_yel*100, "% / ", min_red*100,"% \n",
+                         "Max: ", max_yel*100, "% / ", max_red*100, "%"
+    )
+    )
+
+  # scenario - difference
+  df[df$category == "scenario" & df$metric == "difference", ] <-
+    df[df$category == "scenario" & df$metric == "difference", ] %>%
+    mutate(text = paste0(region, "\n",
+                         period, "\n",
+                         "Value: ", round(value,2), "\n",
+                         "Ref Value: ", round(ref_value,2), "\n",
+                         "Difference: ", round(check_value,2), "\n",
+                         "Thresholds: \n",
+                         "Min: ", min_yel, " / ", min_red,"\n",
+                         "Max: ", max_yel, " / ", max_red
+    )
+    )
+
+  # scenario - absolute
+  df[df$category == "scenario" & df$metric == "absolute", ] <-
+    df[df$category == "scenario" & df$metric == "absolute", ] %>%
+    mutate(text = paste0(region, "\n",
+                         period, "\n",
+                         "Value: ", round(value,2), "\n",
+                         "Thresholds: \n",
+                         "Min: ", min_yel, " / ", min_red,"\n",
+                         "Max: ", max_yel, " / ", max_red
+    )
+    )
+
+  # scenario - growthrate
+  df[df$category == "scenario" & df$metric == "absolute", ] <-
+    df[df$category == "scenario" & df$metric == "absolute", ] %>%
+    mutate(text = paste0(region, "\n",
+                         period, "\n",
+                         "Avg. growth/yr: ", round(check_value,2)*100, "% \n",
+                         "Absolute value: ", round(value,2), " \n",
+                         "Thresholds: \n",
+                         "Min: ", min_yel*100, "% / ", min_red*100, "% \n",
+                         "Max: ", max_yel*100, "% / ", max_red*100, "%"
+    )
+    )
+
+  return(df)
+}
diff --git a/R/combineData.R b/R/combineData.R
index e4419b4..7b8e26c 100644
--- a/R/combineData.R
+++ b/R/combineData.R
@@ -1,6 +1,7 @@
 #' @importFrom dplyr filter select mutate summarise group_by %>%
 
 # for one row of cfg: filter and merge relevant scenario data with cfg
+# results in one df that contains scenario data, reference data and thresholds
 combineData <- function(data, hist, cfg_row) {
 
   # shorten as it will be used a lot
diff --git a/R/evaluateThresholds.R b/R/evaluateThresholds.R
new file mode 100644
index 0000000..125992c
--- /dev/null
+++ b/R/evaluateThresholds.R
@@ -0,0 +1,94 @@
+#' @importFrom dplyr filter select mutate summarise group_by %>%
+
+evaluateThresholds <- function(df) {
+
+  # perform evaluation for each category and metric separately
+  # (shortcuts are possible but will decrease readability of the code for minor
+  # performance improvements)
+
+  # calculate values that will be compared to thresholds
+  # historic - relative
+  his_rel <- df[df$category == "historic" & df$metric == "relative", ] %>%
+    mutate(check_value = ifelse(
+      is.na(ref_value),
+      NA,
+      # absolute relative deviation above/below reference
+      abs((value - ref_value) / ref_value)
+      )
+    )
+
+  # historic - difference
+  his_dif <- df[df$category == "historic" & df$metric == "difference", ] %>%
+    # absolute difference to reference
+    mutate(check_value = abs(value - ref_value))
+
+  # scenario - relative
+  sce_rel <- df[df$category == "scenario" & df$metric == "relative", ] %>%
+    mutate(check_value = ifelse(
+      is.na(ref_value),
+      NA,
+      # relative deviation above/below reference
+      (value - ref_value)/ref_value
+      )
+    )
+
+  # scenario - difference
+  sce_dif <- df[df$category == "scenario" & df$metric == "difference", ] %>%
+    mutate(check_value = ifelse(
+      is.na(ref_value),
+      NA,
+      # difference to  reference
+      value - ref_value
+      )
+    )
+
+  # scenario - absolute
+  sce_abs <- df[df$category == "scenario" & df$metric == "absolute", ] %>%
+    mutate(check_value = value)
+
+  # scenario - growthrate
+  # calculate average growth rate of the last 5 years between 2010 and 2060
+  # TODO: in case of a need to look further than 2060, split df and add
+  # calculations for 10-year step periods
+  sce_gro <- filter(df[df$category == "scenario" & df$metric == "growthrate", ],
+                    period <= 2060 & period >= 2010)
+
+  # add a column with the value 5 years ago for later calculations
+  tmp <- sce_gro
+  sce_gro$period <- sce_gro$period + 5
+  sce_gro <- sce_gro %>%
+    mutate(value_5y_ago = value) %>%
+    select(-value) %>%
+    merge(tmp)
+
+  # calculate the yearly average growth rate
+  sce_gro <- sce_gro %>%
+    mutate(check_value = (value/value_5y_ago)^(1/5) - 1) %>%
+    select(-value_5y_ago)
+
+
+  # reassemble data.frame
+  df <- do.call("rbind",
+                list(his_rel, his_dif, sce_rel, sce_dif, sce_abs, sce_gro))
+
+  # perform comparison to thresholds for whole data.frame at once
+  df <- df %>%
+    mutate(check = ifelse(is.na(check_value),
+                        "grey",
+                        ifelse(
+                          # first check whether red threshold is violated...
+                          check_value > max_red | check_value < min_red,
+                          "red",
+                          # otherwise check if yellow threshold is violated...
+                          ifelse(
+                            check_value > max_yel | check_value < min_yel,
+                            "yellow",
+                            # ... else green
+                            "green"
+                            )
+                          )
+                        )
+           )
+
+  return(df)
+}
diff --git a/R/plotHeatmaps.R b/R/plotHeatmaps.R
index 6e22c2a..5284642 100644
--- a/R/plotHeatmaps.R
+++ b/R/plotHeatmaps.R
@@ -1,67 +1,38 @@
-# construct tooltips for interactive plots
-appendTooltip <- function(df) {
-  # text for tooltip, category 1
-  if (cat == 1) {
-    d <- d %>%
-      mutate(text = paste0(region, "\n",
-                           period, "\n",
-                           "Value: ", round(value,2), "\n",
-                           "Ref_Value: ", round(ref_value,2), "\n",
-                           "Ref_Source: ", ref_model, "\n",
-                           "Thresholds: \n",
-                           ifelse(metric == "relative",
-                                  paste0("Max: ", max_yel*100, "% / ", max_red*100, "%"),
-                                  paste0("Max: ", max_yel, " / ", max_red))
-      )
-      )
-  }
-
-  # text for tooltip, category 2
-  if (cat == 2) {
-    d <- d %>%
-      mutate(text = paste0(region, "\n",
-                           period, "\n",
-                           "Value: ", round(value,2), "\n",
-                           ifelse(metric == "relative",
-                                  paste0("Ref Value: ", round(ref_value,2), "\n",
-                                         "Rel Deviation: ", round(check_value,2)*100, "% \n",
-                                         "Thresholds: \n",
-                                         "Min: ", min_yel*100, "% / ", min_red*100,"% \n",
-                                         "Max: ", max_yel*100, "% / ", max_red*100, "%"),
-                                  # for metric == "absolute"
-                                  paste0("Thresholds: \n",
-                                         "Min: ", min_yel, " / ", min_red,"\n",
-                                         "Max: ", max_yel, " / ", max_red)
-                           )
-      )
-      )
-  }
+#' @importFrom dplyr filter select mutate %>%
+#' @import ggplot2
+#' @importFrom ggthemes theme_tufte
+#' @importFrom plotly ggplotly
 
-  if (cat == 3) {
-    d <- d %>%
-      mutate(text = paste0(region, "\n",
-                           period, "\n",
-                           "Avg. growth/yr: ", round(check_value,2)*100, "% \n",
-                           "Absolute value: ", round(value,2), " \n",
-                           "Thresholds: \n",
-                           paste0("Min: ", min_yel*100, "% / ", min_red*100, "% \n",
-                                  "Max: ", max_yel*100, "% / ", max_red*100, "%")
-      )
-      )
-  }
+# takes the output of "validateScenarios()" and plots heatmaps per variable
+validationHeatmap <- function(df, var, cat, met, interactive = T) {
 
-  return(df)
-}
+  # possible extension: when giving multiple vars, plot as facets in same row
 
+  # prepare data slice
+  d <- df %>%
+    filter(variable == var,
+           category == cat,
+           metric == met)
 
-validationHeatmap <- function(d, var, cat, metric, interactive = T) {
+  if (nrow(d) == 0) {
 
-  # possible extension: when giving multiple vars, plot as facets in same row
+    df <- df %>% mutate(cm = paste(category, metric, sep = "-"))
+    unique(df[df$variable == "PE|Coal", "cm"])
 
-  d <- filter(d, variable == var)
+    stop(paste0(
+    "No data found for variable in this category and metric.\n
+    variable ", var ," is available for the following category-metric
+    combinations: ", unique(df[df$variable == "PE|Coal", "cm"]
+                )
+      )
+    )
+  }
 
   d$period <- as.character(d$period)
-
+  colors <- c(green  = "#008450",
+              yellow = "#EFB700",
+              red    = "#B81D13",
+              grey   = "#808080")
 
 
   # classic ggplot, with text in aes
@@ -70,9 +41,9 @@ validationHeatmap <- function(d, var, cat, metric, interactive = T) {
     scale_fill_manual(values = colors, breaks = colors) +
     facet_grid(scenario~.)
   # make it beautiful
-  # from https://www.r-bloggers.com/2016/02/making-faceted-heatmaps-with-ggplot2/
+  # from https://www.r-bloggers.com/2016/02/making-faceted-heatmaps-with-ggplot2
   p <- p + labs(x=NULL, y=NULL, title=paste0(var, " [", d$unit[1], "]"))
-  p <- p + theme_tufte(base_family="Helvetica")
+  p <- p + theme_tufte(base_family="Helvetica")  # creates warnings
   p <- p + theme(axis.ticks=element_blank())
   p <- p + theme(axis.text=element_text(size=7))
   p <- p + coord_equal()
diff --git a/R/resolveDuplicates.R b/R/resolveDuplicates.R
index 260452e..0d46c63 100644
--- a/R/resolveDuplicates.R
+++ b/R/resolveDuplicates.R
@@ -11,9 +11,10 @@ resolveDuplicates <- function(df) {
                                        "region", "period", "category", "metric")]), ]
 
   # here all instances of the data that was duplicated is removed
-  no_dupl <- dplyr::anti_join(df, duplicates_all, by = c("model", "scenario", "variable",
+  no_dupl <- dplyr::anti_join(df, duplicates, by = c("model", "scenario", "variable",
                                                          "region", "period", "category", "metric"))
 
+  # reattach one instance to get a complete data set without duplicates
   df <- rbind(no_dupl, duplicates)
 
   # different approach: allows partial overwriting
diff --git a/R/validateScenarios.R b/R/validateScenarios.R
index 14bc6eb..f72b822 100644
--- a/R/validateScenarios.R
+++ b/R/validateScenarios.R
@@ -85,7 +85,7 @@ evaluateThreshold <- function(df) {
 
 # bringing it all together
 # TODO: introduce file path and name as argument
-validateScenario <- function() {
+validateScenarios <- function() {
 
   data <- importData()
 
@@ -99,14 +99,17 @@ validateScenario <- function() {
   # combine data for each row of the config and bind together
   df <- data.frame()
   for (i in 1:nrow(cfg)) {
-    # TODO: hist should be optional, validation should work without "historical"
-    d <- combineData(data, hist, cfg[i, ])
-    df <- rbind(df, d)
+    # TODO: hist should only be needed if category "historical" is in config
+    # validation generally should work without hist data
+    df_row <- combineData(data, hist, cfg[i, ])
+    df <- rbind(df, df_row)
   }
 
-  df <- evaluateThreshold(df)
+  df <- resolveDuplicates(df)
 
-  df <- createTooltip(df)
+  df <- evaluateThresholds(df)
+
+  df <- appendTooltips(df)
 
   return(df)
 }
diff --git a/inst/config/validationConfig.csv b/inst/config/validationConfig.csv
index e88860f..6767678 100644
--- a/inst/config/validationConfig.csv
+++ b/inst/config/validationConfig.csv
@@ -19,4 +19,4 @@ scenario;;;Emi|CO2;;2030;-0.5;-0.3;;;relative;yes;2020;;
 scenario;;;Emi|CO2;;2030;-0.6;-0.4;;;relative;yes;2015;;
 scenario;;;FE;;2030, 2040;-0.4;-0.2;;;relative;yes;2020;;
 scenario;;Bal;Price|Secondary Energy|Electricity;;;5.56;;;;absolute;yes;;;
-scenario;;;Cap|Electricity|Solar|PV;;;;;0.5;1;relative;yes;;;
+scenario;;;New Cap|Electricity|Solar|PV;;;;;0.5;1;growthrate;yes;;;

From 9c06a01c62ec19ec7c6bcd5bd86f23da6234348a Mon Sep 17 00:00:00 2001
From: Pascal Weigmann <pascal.weigmann@pik-potsdam.de>
Date: Wed, 28 Feb 2024 16:35:45 +0100
Subject: [PATCH 04/18] vs wip

---
 NAMESPACE                                 |   2 +
 R/appendTooltips.R                        |   5 +-
 R/evaluateThresholds.R                    |   4 +-
 R/validateScenarios.R                     |  30 ++---
 R/{plotHeatmaps.R => validationHeatmap.R} |   9 +-
 inst/markdown/REMINDvalidation.Rmd        | 139 ++++++++++++++++++++++
 6 files changed, 160 insertions(+), 29 deletions(-)
 rename R/{plotHeatmaps.R => validationHeatmap.R} (89%)
 create mode 100644 inst/markdown/REMINDvalidation.Rmd

diff --git a/NAMESPACE b/NAMESPACE
index 502b1b0..deb06a6 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,5 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
+export(validateScenarios)
+export(validationHeatmap)
 import(ggplot2)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,filter)
diff --git a/R/appendTooltips.R b/R/appendTooltips.R
index 9470b2a..cafc984 100644
--- a/R/appendTooltips.R
+++ b/R/appendTooltips.R
@@ -8,9 +8,10 @@ appendTooltips <- function(df) {
     df[df$category == "historic" & df$metric == "relative", ] %>%
     mutate(text = paste0(region, "\n",
                          period, "\n",
-                         "Value: ", round(value,2), "\n",
-                         "Ref_Value: ", round(ref_value,2), "\n",
+                         "Value: ", round(value, 2), "\n",
+                         "Ref_Value: ", round(ref_value, 2), "\n",
                          "Ref_Source: ", ref_model, "\n",
+                         "Deviation:", round(check_value, 2)*100, "%\n",
                          "Thresholds: \n",
                          paste0("Max: ", max_yel*100, "% / ", max_red*100, "%")
     )
diff --git a/R/evaluateThresholds.R b/R/evaluateThresholds.R
index 125992c..a15d807 100644
--- a/R/evaluateThresholds.R
+++ b/R/evaluateThresholds.R
@@ -1,6 +1,8 @@
 #' @importFrom dplyr filter select mutate summarise group_by %>%
 
-evaluateThresholds <- function(df) {
+# cleanInf = TRUE: choose to replace "Inf" and "-Inf" which were introduced
+# for ease of calculations with "-"
+evaluateThresholds <- function(df, cleanInf = TRUE) {
 
   # perform evaluation for each category and metric separately
   # (shortcuts are possible but will decrease readability of the code for minor
diff --git a/R/validateScenarios.R b/R/validateScenarios.R
index f72b822..993398f 100644
--- a/R/validateScenarios.R
+++ b/R/validateScenarios.R
@@ -1,22 +1,12 @@
 #' @importFrom dplyr filter select mutate summarise group_by %>%
 
-# import data
-# supported formats:
-# - .mif
-# -
-
-
-# dev helpers
-#configName <- "validationConfig.csv"
-#usethis::use_data(configName, internal = T)
-
-
-
 importData <- function() {
   # for dev purposes
   if (file.exists("mif.rds")) {
+    cat("loading scenario data from cache\n")
     data <- readRDS("mif.rds")
   } else {
+    cat("importing scenario data from file(s)\n")
     path <- "C:/Users/pascalwe/Data/vs/"
     data <- remind2::deletePlus(quitte::read.quitte(
       c(paste0(path, "REMIND_generic_Bal.mif"),
@@ -37,8 +27,11 @@ importData <- function() {
 importReferenceData <- function() {
   # for dev purposes
   if (file.exists("hist.rds")) {
+    cat("loading reference data from cache\n")
     hist <- readRDS("hist.rds")
   } else {
+    cat("importing reference data from file\n")
+    path <- "C:/Users/pascalwe/Data/vs/"
     hist <- quitte::read.quitte(paste0(path, "historical.mif"))
     hist <- filter(hist, period >= 1990)
     saveRDS(hist, "hist.rds")
@@ -56,7 +49,7 @@ importReferenceData <- function() {
 #   - model, scenario, region, period
 # ...
 loadConfig <- function(configName) {
-  path <- system.file(paste0("config/", configName) , package = "piamValidation")
+  path <- system.file(paste0("config/", configName), package = "piamValidation")
   cfg <- read.csv2(path, na.strings = "")
   return(cfg)
 }
@@ -75,16 +68,9 @@ cleanConfig <- function(cfg) {
   return(cfg)
 }
 
-
-evaluateThreshold <- function(df) {
-
-  return(df)
-}
-
-
-
 # bringing it all together
-# TODO: introduce file path and name as argument
+# TODO: introduce file paths and names as arguments
+#' @export
 validateScenarios <- function() {
 
   data <- importData()
diff --git a/R/plotHeatmaps.R b/R/validationHeatmap.R
similarity index 89%
rename from R/plotHeatmaps.R
rename to R/validationHeatmap.R
index 5284642..5234eb6 100644
--- a/R/plotHeatmaps.R
+++ b/R/validationHeatmap.R
@@ -2,6 +2,7 @@
 #' @import ggplot2
 #' @importFrom ggthemes theme_tufte
 #' @importFrom plotly ggplotly
+#' @export
 
 # takes the output of "validateScenarios()" and plots heatmaps per variable
 validationHeatmap <- function(df, var, cat, met, interactive = T) {
@@ -16,13 +17,12 @@ validationHeatmap <- function(df, var, cat, met, interactive = T) {
 
   if (nrow(d) == 0) {
 
-    df <- df %>% mutate(cm = paste(category, metric, sep = "-"))
-    unique(df[df$variable == "PE|Coal", "cm"])
+    df$cm <- paste(category, metric, sep = "-")
 
     stop(paste0(
     "No data found for variable in this category and metric.\n
     variable ", var ," is available for the following category-metric
-    combinations: ", unique(df[df$variable == "PE|Coal", "cm"]
+    combinations: ", unique(df[df$variable == var, "cm"]
                 )
       )
     )
@@ -58,7 +58,8 @@ validationHeatmap <- function(df, var, cat, met, interactive = T) {
   }
   fig <- ggplotly(p, tooltip="text")
 
-  # improve plotly layout, kinda works but very manual
+  # improve plotly layout, kinda works but very manual,
+  # TODO: can this be extended to a general useful function?
   #fig <- fig %>% subplot(heights = 0.3) %>%
   #   layout(title = list(y=0.64))
   if (interactive) {
diff --git a/inst/markdown/REMINDvalidation.Rmd b/inst/markdown/REMINDvalidation.Rmd
new file mode 100644
index 0000000..6d42e19
--- /dev/null
+++ b/inst/markdown/REMINDvalidation.Rmd
@@ -0,0 +1,139 @@
+---
+title: "piamValidation: REMIND"
+output:
+  html_document:
+    toc: true
+    toc_float: true
+    code_folding: hide
+params:
+  gdx: "fulldata.gdx"
+  warning: false
+  message: false
+  figWidth: 8
+---
+
+```{r include=FALSE}
+devtools::load_all(".")
+library(knitr)
+library(ggplot2)
+library(ggthemes)
+library(plotly)
+
+knitr::opts_chunk$set(
+  echo = FALSE,
+  error = TRUE,
+  #fig.width = params$figWidth,
+  message = params$message,
+  warning = params$warning
+)
+```
+
+## Import and Prepare Data
+
+
+```{r}
+# Data Preparation
+df <- validateScenarios()
+
+```
+## Plots
+
+### Historic - Relative
+Relative deviation to historical reference data. Absolute numbers, so no 
+difference is made between being above or below the reference value.
+```{r}
+c <- "historic"
+m <- "relative"
+d <- df[df$category == c & df$metric == m, ]
+
+if (nrow(d) > 0) {
+  vars <- unique(d$variable)
+  plot_list <- htmltools::tagList()
+    for (i in 1:length(vars)) {
+      plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m)
+    }
+  plot_list
+}
+```
+
+### Historic - Difference
+Difference to historical reference data
+```{r}
+c <- "historic"
+m <- "difference"
+d <- df[df$category == c & df$metric == m, ]
+
+if (nrow(d) > 0) {
+  vars <- unique(d$variable)
+  plot_list <- htmltools::tagList()
+    for (i in 1:length(vars)) {
+      plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m)
+    }
+  plot_list
+}
+```
+
+### Scenario - Relative
+```{r}
+c <- "scenario"
+m <- "relative"
+d <- df[df$category == c & df$metric == m, ]
+
+if (nrow(d) > 0) {
+  vars <- unique(d$variable)
+  plot_list <- htmltools::tagList()
+    for (i in 1:length(vars)) {
+      plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m)
+    }
+  plot_list
+}
+```
+
+### Scenario - Difference
+```{r}
+c <- "scenario"
+m <- "difference"
+d <- df[df$category == c & df$metric == m, ]
+
+if (nrow(d) > 0) {
+  vars <- unique(d$variable)
+  plot_list <- htmltools::tagList()
+    for (i in 1:length(vars)) {
+      plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m)
+    }
+  plot_list
+}
+```
+
+### Scenario - Absolute
+```{r}
+c <- "scenario"
+m <- "absolute"
+d <- df[df$category == c & df$metric == m, ]
+
+if (nrow(d) > 0) {
+  vars <- unique(d$variable)
+  plot_list <- htmltools::tagList()
+    for (i in 1:length(vars)) {
+      plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m)
+    }
+  plot_list
+}
+```
+
+
+### Scenario - Growthrate
+```{r}
+c <- "scenario"
+m <- "growthrate"
+d <- df[df$category == c & df$metric == m, ]
+
+if (nrow(d) > 0) {
+  vars <- unique(d$variable)
+  plot_list <- htmltools::tagList()
+    for (i in 1:length(vars)) {
+      plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m)
+    }
+  plot_list
+}
+```

From 2e017660ca2f7a5d2b9d855dfb9c23927cac7858 Mon Sep 17 00:00:00 2001
From: Pascal Weigmann <pascal.weigmann@pik-potsdam.de>
Date: Thu, 29 Feb 2024 11:18:44 +0100
Subject: [PATCH 05/18] improve file handling

---
 .gitignore                         |   2 +
 R/combineData.R                    |   2 +-
 R/importFunctions.R                |  68 +++++++++++++++++++++++++
 R/sysdata.rda                      | Bin 107 -> 0 bytes
 R/validateScenarios.R              |  79 ++---------------------------
 inst/markdown/REMINDvalidation.Rmd |  19 +++++--
 6 files changed, 91 insertions(+), 79 deletions(-)
 create mode 100644 R/importFunctions.R
 delete mode 100644 R/sysdata.rda

diff --git a/.gitignore b/.gitignore
index 5b6a065..958b5bf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,5 @@
 .Rhistory
 .RData
 .Ruserdata
+.rds
+.html
diff --git a/R/combineData.R b/R/combineData.R
index 7b8e26c..a90227b 100644
--- a/R/combineData.R
+++ b/R/combineData.R
@@ -93,7 +93,7 @@ combineData <- function(data, hist, cfg_row) {
       # get reference values for these metrics
     } else if (c$metric %in% c("relative", "difference")) {
       # TODO: only works for ref_period, add support for model/scenario as ref
-      # TODO: support choosing ref_period AND model/scenario
+      # TODO: support choosing ref_period AND model/scenario?
       ref <- data %>%
         filter(variable == c$variable,
                region %in% reg,
diff --git a/R/importFunctions.R b/R/importFunctions.R
new file mode 100644
index 0000000..93baedc
--- /dev/null
+++ b/R/importFunctions.R
@@ -0,0 +1,68 @@
+#' @importFrom dplyr filter select mutate summarise group_by %>%
+
+# scenarioData: one or multiple paths to .mif or .csv file(s) containing
+#               scenario data in IAM format
+importData <- function(scenarioPath) {
+  # for dev purposes use cache
+  if (file.exists("mif.rds")) {
+    cat("loading scenario data from cache\n")
+    data <- readRDS("mif.rds")
+  } else {
+    cat("importing scenario data from file(s)\n")
+    data <- remind2::deletePlus(quitte::as.quitte(scenarioPath))
+    saveRDS(data, "mif.rds")
+  }
+
+  # change ordering of factors, put last element ("World") first
+  # TODO: will not work if World is not last, put somewhere else?
+  new_order <- c(tail(levels(data$region), 1), head(levels(data$region), -1))
+  data$region <- factor(data$region, levels = new_order)
+
+  return(data)
+}
+
+importReferenceData <- function(referencePath) {
+  # for dev purposes use cache
+  if (file.exists("hist.rds")) {
+    cat("loading reference data from cache\n")
+    hist <- readRDS("hist.rds")
+  } else {
+    cat("importing reference data from file\n")
+    hist <- quitte::as.quitte(referencePath)
+    hist <- filter(hist, period >= 1990)
+    saveRDS(hist, "hist.rds")
+  }
+
+  # remove all historical data before 1990, (only historic?)
+  hist <- filter(hist, period >= 1990)
+
+  return(hist)
+}
+
+# required columns:
+# - category: "historic" or "scenario"
+# - can be empty, contain one element or multiple:
+#   - model, scenario, region, period
+# ...
+getConfig <- function(configName) {
+  # TODO: check if config is available, otherwise is it a local file? if yes load that one
+  path <- system.file(paste0("config/", configName), package = "piamValidation")
+  cfg <- read.csv2(path, na.strings = "")
+  return(cfg)
+}
+
+
+cleanConfig <- function(cfg) {
+  # TODO: insert functionality to expand a row of the config, that should be
+  #       used for multiple variables, via "Var|*"
+  # fill empty threshold columns with Infinity for easier evaluation
+  cfg <- cfg %>%
+    mutate(min_red = as.numeric(ifelse(is.na(min_red), -Inf, min_red)),
+           min_yel = as.numeric(ifelse(is.na(min_yel), -Inf, min_yel)),
+           max_yel = as.numeric(ifelse(is.na(max_yel),  Inf, max_yel)),
+           max_red = as.numeric(ifelse(is.na(max_red),  Inf, max_red))
+    )
+
+  # insert tests here to check if there are forbidden fields in config
+  return(cfg)
+}
diff --git a/R/sysdata.rda b/R/sysdata.rda
deleted file mode 100644
index 26ce3b89e068b8251b177ee456e6eadf44977611..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 107
zcmV-x0F?hiT4*^jL0KkKS#gl0Apigye}McjL;wH*5C9AT5I`@djQ{`vAOKV~O&E*>
z(WXPupwLkLP&5xn05LR}UrHb~g=+$YQ0yH-XC_UbsDH4F=e-l>w+%rMG8oZUz)78Q
NF64@Ep&;WSNJ8#sB|QKD

diff --git a/R/validateScenarios.R b/R/validateScenarios.R
index 993398f..565b455 100644
--- a/R/validateScenarios.R
+++ b/R/validateScenarios.R
@@ -1,83 +1,14 @@
 #' @importFrom dplyr filter select mutate summarise group_by %>%
 
-importData <- function() {
-  # for dev purposes
-  if (file.exists("mif.rds")) {
-    cat("loading scenario data from cache\n")
-    data <- readRDS("mif.rds")
-  } else {
-    cat("importing scenario data from file(s)\n")
-    path <- "C:/Users/pascalwe/Data/vs/"
-    data <- remind2::deletePlus(quitte::read.quitte(
-      c(paste0(path, "REMIND_generic_Bal.mif"),
-        paste0(path, "REMIND_generic_NPi.mif"))
-        )
-      )
-    saveRDS(data, "mif.rds")
-  }
-
-  # change ordering of factors, put last element ("World") first
-  # TODO: will not work if World is not last
-  new_order <- c(tail(levels(data$region), 1), head(levels(data$region), -1))
-  data$region <- factor(data$region, levels = new_order)
-
-  return(data)
-}
-
-importReferenceData <- function() {
-  # for dev purposes
-  if (file.exists("hist.rds")) {
-    cat("loading reference data from cache\n")
-    hist <- readRDS("hist.rds")
-  } else {
-    cat("importing reference data from file\n")
-    path <- "C:/Users/pascalwe/Data/vs/"
-    hist <- quitte::read.quitte(paste0(path, "historical.mif"))
-    hist <- filter(hist, period >= 1990)
-    saveRDS(hist, "hist.rds")
-  }
-
-  # remove all historical data before 1990
-  hist <- filter(hist, period >= 1990)
-
-  return(hist)
-}
-
-# required columns:
-# - category: "historic" or "scenario"
-# - can be empty, contain one element or multiple:
-#   - model, scenario, region, period
-# ...
-loadConfig <- function(configName) {
-  path <- system.file(paste0("config/", configName), package = "piamValidation")
-  cfg <- read.csv2(path, na.strings = "")
-  return(cfg)
-}
-
-
-cleanConfig <- function(cfg) {
-  # fill empty threshold columns with Infinity for easier evaluation
-  cfg <- cfg %>%
-    mutate(min_red = as.numeric(ifelse(is.na(min_red), -Inf, min_red)),
-           min_yel = as.numeric(ifelse(is.na(min_yel), -Inf, min_yel)),
-           max_yel = as.numeric(ifelse(is.na(max_yel),  Inf, max_yel)),
-           max_red = as.numeric(ifelse(is.na(max_red),  Inf, max_red))
-           )
-
-  # insert tests here to check if there are forbidden fields in config
-  return(cfg)
-}
-
 # bringing it all together
-# TODO: introduce file paths and names as arguments
 #' @export
-validateScenarios <- function() {
+validateScenarios <- function(scenarioPath, referencePath, configName) {
 
-  data <- importData()
+  data <- importData(scenarioPath)
 
-  hist <- importReferenceData()
+  hist <- importReferenceData(referencePath)
 
-  cfg <- loadConfig(configName)
+  cfg <- getConfig(configName)
 
   cfg <- cleanConfig(cfg)
 
@@ -95,8 +26,6 @@ validateScenarios <- function() {
 
   df <- evaluateThresholds(df)
 
-  df <- appendTooltips(df)
-
   return(df)
 }
 
diff --git a/inst/markdown/REMINDvalidation.Rmd b/inst/markdown/REMINDvalidation.Rmd
index 6d42e19..2fd8679 100644
--- a/inst/markdown/REMINDvalidation.Rmd
+++ b/inst/markdown/REMINDvalidation.Rmd
@@ -6,7 +6,9 @@ output:
     toc_float: true
     code_folding: hide
 params:
-  gdx: "fulldata.gdx"
+  mif: ""
+  ref: ""
+  cfg: ""
   warning: false
   message: false
   figWidth: 8
@@ -33,7 +35,16 @@ knitr::opts_chunk$set(
 
 ```{r}
 # Data Preparation
-df <- validateScenarios()
+path <- "C:/Users/pascalwe/Data/vs/"
+mifs <- c(paste0(path, "REMIND_generic_Bal.mif"),
+          paste0(path, "REMIND_generic_NPi.mif"))
+
+hmif <- paste0(path, "historical.mif")
+
+cfg <- "validationConfig.csv"
+
+df <- validateScenarios(mifs, hmif, cfg)
+df <- appendTooltips(df)
 
 ```
 ## Plots
@@ -57,7 +68,7 @@ if (nrow(d) > 0) {
 ```
 
 ### Historic - Difference
-Difference to historical reference data
+Absolute difference to historical reference data
 ```{r}
 c <- "historic"
 m <- "difference"
@@ -74,6 +85,8 @@ if (nrow(d) > 0) {
 ```
 
 ### Scenario - Relative
+Relative deviation to data point from either:
+- scenario
 ```{r}
 c <- "scenario"
 m <- "relative"

From 50e0f9cb8471f883f3acbeabed22462fdab5d47c Mon Sep 17 00:00:00 2001
From: Pascal Weigmann <pascal.weigmann@pik-potsdam.de>
Date: Thu, 29 Feb 2024 12:06:44 +0100
Subject: [PATCH 06/18] support reference scenarios

---
 R/appendTooltips.R                 | 22 +++++++++-------
 R/combineData.R                    | 40 ++++++++++++++++++++----------
 R/importFunctions.R                |  5 ++--
 R/validateScenarios.R              |  4 +--
 inst/config/validationConfig.csv   |  1 +
 inst/markdown/REMINDvalidation.Rmd |  9 +++++--
 6 files changed, 53 insertions(+), 28 deletions(-)

diff --git a/R/appendTooltips.R b/R/appendTooltips.R
index cafc984..1b1367b 100644
--- a/R/appendTooltips.R
+++ b/R/appendTooltips.R
@@ -22,8 +22,8 @@ appendTooltips <- function(df) {
     df[df$category == "historic" & df$metric == "difference", ] %>%
     mutate(text = paste0(region, "\n",
                          period, "\n",
-                         "Value: ", round(value,2), "\n",
-                         "Ref_Value: ", round(ref_value,2), "\n",
+                         "Value: ", round(value, 2), "\n",
+                         "Ref_Value: ", round(ref_value, 2), "\n",
                          "Ref_Source: ", ref_model, "\n",
                          "Thresholds: \n",
                          "Max: ", max_yel, " / ", max_red
@@ -35,9 +35,13 @@ appendTooltips <- function(df) {
     df[df$category == "scenario" & df$metric == "relative", ] %>%
     mutate(text = paste0(region, "\n",
                          period, "\n",
-                         "Value: ", round(value,2), "\n",
-                         "Ref Value: ", round(ref_value,2), "\n",
-                         "Rel Deviation: ", round(check_value,2)*100, "% \n",
+                         "Value: ", round(value, 2), "\n",
+                         "Ref ", ifelse(!is.na(ref_period),
+                                               paste("Period:", ref_period),
+                                               paste("Scenario:", ref_scenario)
+                                               ), "\n",
+                         "Ref Value: ", round(ref_value, 2), "\n",
+                         "Rel Deviation: ", round(check_value, 2)*100, "% \n",
                          "Thresholds: \n",
                          "Min: ", min_yel*100, "% / ", min_red*100,"% \n",
                          "Max: ", max_yel*100, "% / ", max_red*100, "%"
@@ -49,9 +53,9 @@ appendTooltips <- function(df) {
     df[df$category == "scenario" & df$metric == "difference", ] %>%
     mutate(text = paste0(region, "\n",
                          period, "\n",
-                         "Value: ", round(value,2), "\n",
-                         "Ref Value: ", round(ref_value,2), "\n",
-                         "Difference: ", round(check_value,2), "\n",
+                         "Value: ", round(value, 2), "\n",
+                         "Ref Value: ", round(ref_value, 2), "\n",
+                         "Difference: ", round(check_value, 2), "\n",
                          "Thresholds: \n",
                          "Min: ", min_yel, " / ", min_red,"\n",
                          "Max: ", max_yel, " / ", max_red
@@ -63,7 +67,7 @@ appendTooltips <- function(df) {
     df[df$category == "scenario" & df$metric == "absolute", ] %>%
     mutate(text = paste0(region, "\n",
                          period, "\n",
-                         "Value: ", round(value,2), "\n",
+                         "Value: ", round(value, 2), "\n",
                          "Thresholds: \n",
                          "Min: ", min_yel, " / ", min_red,"\n",
                          "Max: ", max_yel, " / ", max_red
diff --git a/R/combineData.R b/R/combineData.R
index a90227b..ed209e8 100644
--- a/R/combineData.R
+++ b/R/combineData.R
@@ -16,6 +16,7 @@ combineData <- function(data, hist, cfg_row) {
 
   # create filters
   # check whether regions, periods, scenarios are specified
+  # TODO: add model
   reg <- if (is.na(c$region))   all_reg else
     strsplit(c$region, split = ", |,")[[1]]
   sce <- if (is.na(c$scenario)) all_sce else
@@ -92,21 +93,34 @@ combineData <- function(data, hist, cfg_row) {
 
       # get reference values for these metrics
     } else if (c$metric %in% c("relative", "difference")) {
-      # TODO: only works for ref_period, add support for model/scenario as ref
       # TODO: support choosing ref_period AND model/scenario?
-      ref <- data %>%
-        filter(variable == c$variable,
-               region %in% reg,
-               period == c$ref_period,
-               scenario %in% sce) %>%
-        mutate(ref_value = value, ref_period = period) %>%
-        select(-c(period, value))
-      df <- merge(d, ref)
 
-      # add columns which are not used in this category, fill NA
-      df <- df %>%
-        mutate(ref_model = NA,
-               ref_scenario = NA)
+      # if a reference period should be used, same scenario, same model
+      if (!is.na(c$ref_period)) {
+        ref <- data %>%
+          filter(variable == c$variable,
+                 region %in% reg,
+                 period == c$ref_period,
+                 scenario %in% sce) %>%
+          mutate(ref_value = value, ref_period = period) %>%
+          select(-c(period, value)) %>%
+          # add columns which are not used in this category, fill NA
+          mutate(ref_model = NA, ref_scenario = NA)
+
+        # if a reference scenario should be used, same period, same model
+      } else if (!is.na(c$ref_scenario)) {
+        ref <- data %>%
+          filter(variable == c$variable,
+                 region %in% reg,
+                 period %in% per,
+                 scenario == c$ref_scenario) %>%
+          mutate(ref_value = value, ref_scenario = scenario) %>%
+          select(-c(scenario, value)) %>%
+          # add columns which are not used in this category, fill NA
+          mutate(ref_model = NA, ref_period = NA)
+      }
+
+      df <- merge(d, ref)
 
     } else {
       # TODO: have this warning here or earlier when cleaning config?
diff --git a/R/importFunctions.R b/R/importFunctions.R
index 93baedc..a9e2b25 100644
--- a/R/importFunctions.R
+++ b/R/importFunctions.R
@@ -1,8 +1,8 @@
 #' @importFrom dplyr filter select mutate summarise group_by %>%
 
-# scenarioData: one or multiple paths to .mif or .csv file(s) containing
+# scenarioPath: one or multiple paths to .mif or .csv file(s) containing
 #               scenario data in IAM format
-importData <- function(scenarioPath) {
+importScenarioData <- function(scenarioPath) {
   # for dev purposes use cache
   if (file.exists("mif.rds")) {
     cat("loading scenario data from cache\n")
@@ -48,6 +48,7 @@ getConfig <- function(configName) {
   # TODO: check if config is available, otherwise is it a local file? if yes load that one
   path <- system.file(paste0("config/", configName), package = "piamValidation")
   cfg <- read.csv2(path, na.strings = "")
+  cat(paste0("loading config file: ", configName, "\n"))
   return(cfg)
 }
 
diff --git a/R/validateScenarios.R b/R/validateScenarios.R
index 565b455..311d8ee 100644
--- a/R/validateScenarios.R
+++ b/R/validateScenarios.R
@@ -4,7 +4,7 @@
 #' @export
 validateScenarios <- function(scenarioPath, referencePath, configName) {
 
-  data <- importData(scenarioPath)
+  data <- importScenarioData(scenarioPath)
 
   hist <- importReferenceData(referencePath)
 
@@ -17,7 +17,7 @@ validateScenarios <- function(scenarioPath, referencePath, configName) {
   df <- data.frame()
   for (i in 1:nrow(cfg)) {
     # TODO: hist should only be needed if category "historical" is in config
-    # validation generally should work without hist data
+    #       validation generally should work without hist data
     df_row <- combineData(data, hist, cfg[i, ])
     df <- rbind(df, df_row)
   }
diff --git a/inst/config/validationConfig.csv b/inst/config/validationConfig.csv
index 6767678..2ff00c1 100644
--- a/inst/config/validationConfig.csv
+++ b/inst/config/validationConfig.csv
@@ -20,3 +20,4 @@ scenario;;;Emi|CO2;;2030;-0.6;-0.4;;;relative;yes;2015;;
 scenario;;;FE;;2030, 2040;-0.4;-0.2;;;relative;yes;2020;;
 scenario;;Bal;Price|Secondary Energy|Electricity;;;5.56;;;;absolute;yes;;;
 scenario;;;New Cap|Electricity|Solar|PV;;;;;0.5;1;growthrate;yes;;;
+scenario;;Bal;Price|Carbon;;2030, 2040, 2050;1;;;;relative;yes;;;NPi
diff --git a/inst/markdown/REMINDvalidation.Rmd b/inst/markdown/REMINDvalidation.Rmd
index 2fd8679..f883115 100644
--- a/inst/markdown/REMINDvalidation.Rmd
+++ b/inst/markdown/REMINDvalidation.Rmd
@@ -41,9 +41,14 @@ mifs <- c(paste0(path, "REMIND_generic_Bal.mif"),
 
 hmif <- paste0(path, "historical.mif")
 
-cfg <- "validationConfig.csv"
+config <- "validationConfig.csv"
 
-df <- validateScenarios(mifs, hmif, cfg)
+
+data <- importScenarioData(mifs)
+hist <- importReferenceData(hmif)
+cfg <- getConfig(config) %>% cleanConfig()
+
+df <- validateScenarios(mifs, hmif, config)
 df <- appendTooltips(df)
 
 ```

From ef163427b8580606b088a73c57d559126a92f3ac Mon Sep 17 00:00:00 2001
From: Pascal Weigmann <pascal.weigmann@pik-potsdam.de>
Date: Thu, 29 Feb 2024 16:56:41 +0100
Subject: [PATCH 07/18] vs wip

---
 DESCRIPTION                        |  1 +
 R/appendTooltips.R                 | 37 +++++++++++++++---------------
 R/evaluateThresholds.R             |  3 +++
 R/validationHeatmap.R              | 32 ++++++++++++++------------
 inst/config/validationConfig.csv   |  6 ++---
 inst/markdown/REMINDvalidation.Rmd | 33 +++++++++++++++++++++-----
 6 files changed, 69 insertions(+), 43 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 22d83c5..e497cbf 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -12,6 +12,7 @@ Imports:
     dplyr (>= 1.1.1),
     ggplot2,
     ggthemes,
+    kableExtra,
     plotly,
     quitte (>= 0.3123.0),
     readxl,
diff --git a/R/appendTooltips.R b/R/appendTooltips.R
index 1b1367b..e9d3649 100644
--- a/R/appendTooltips.R
+++ b/R/appendTooltips.R
@@ -11,8 +11,8 @@ appendTooltips <- function(df) {
                          "Value: ", round(value, 2), "\n",
                          "Ref_Value: ", round(ref_value, 2), "\n",
                          "Ref_Source: ", ref_model, "\n",
-                         "Deviation:", round(check_value, 2)*100, "%\n",
-                         "Thresholds: \n",
+                         "Deviation:", round(check_value)*100, "%\n",
+                         "Thresholds (yel/red): \n",
                          paste0("Max: ", max_yel*100, "% / ", max_red*100, "%")
     )
     )
@@ -25,7 +25,7 @@ appendTooltips <- function(df) {
                          "Value: ", round(value, 2), "\n",
                          "Ref_Value: ", round(ref_value, 2), "\n",
                          "Ref_Source: ", ref_model, "\n",
-                         "Thresholds: \n",
+                         "Thresholds (yel/red): \n",
                          "Max: ", max_yel, " / ", max_red
     )
     )
@@ -41,12 +41,11 @@ appendTooltips <- function(df) {
                                                paste("Scenario:", ref_scenario)
                                                ), "\n",
                          "Ref Value: ", round(ref_value, 2), "\n",
-                         "Rel Deviation: ", round(check_value, 2)*100, "% \n",
-                         "Thresholds: \n",
+                         "Rel Deviation: ", round(check_value)*100, "% \n",
+                         "Thresholds (yel/red): \n",
                          "Min: ", min_yel*100, "% / ", min_red*100,"% \n",
-                         "Max: ", max_yel*100, "% / ", max_red*100, "%"
-    )
-    )
+                         "Max: ", max_yel*100, "% / ", max_red*100, "%")
+           )
 
   # scenario - difference
   df[df$category == "scenario" & df$metric == "difference", ] <-
@@ -56,7 +55,7 @@ appendTooltips <- function(df) {
                          "Value: ", round(value, 2), "\n",
                          "Ref Value: ", round(ref_value, 2), "\n",
                          "Difference: ", round(check_value, 2), "\n",
-                         "Thresholds: \n",
+                         "Thresholds (yel/red): \n",
                          "Min: ", min_yel, " / ", min_red,"\n",
                          "Max: ", max_yel, " / ", max_red
     )
@@ -68,24 +67,24 @@ appendTooltips <- function(df) {
     mutate(text = paste0(region, "\n",
                          period, "\n",
                          "Value: ", round(value, 2), "\n",
-                         "Thresholds: \n",
+                         "Thresholds (yel/red): \n",
                          "Min: ", min_yel, " / ", min_red,"\n",
                          "Max: ", max_yel, " / ", max_red
-    )
-    )
+                         )
+           )
 
   # scenario - growthrate
-  df[df$category == "scenario" & df$metric == "absolute", ] <-
-    df[df$category == "scenario" & df$metric == "absolute", ] %>%
+  df[df$category == "scenario" & df$metric == "growthrate", ] <-
+    df[df$category == "scenario" & df$metric == "growthrate", ] %>%
     mutate(text = paste0(region, "\n",
                          period, "\n",
-                         "Avg. growth/yr: ", round(check_value,2)*100, "% \n",
-                         "Absolute value: ", round(value,2), " \n",
-                         "Thresholds: \n",
+                         "Avg. growth/yr: ", round(check_value)*100, "% \n",
+                         "Absolute value: ", round(value, 2), " \n",
+                         "Thresholds (yel/red): \n",
                          "Min: ", min_yel*100, "% / ", min_red*100, "% \n",
                          "Max: ", max_yel*100, "% / ", max_red*100, "%"
-    )
-    )
+                         )
+           )
 
   return(df)
 }
diff --git a/R/evaluateThresholds.R b/R/evaluateThresholds.R
index a15d807..3cd1bfd 100644
--- a/R/evaluateThresholds.R
+++ b/R/evaluateThresholds.R
@@ -92,5 +92,8 @@ evaluateThresholds <- function(df, cleanInf = TRUE) {
                         )
            )
 
+  # after evaluation, "Inf" can be removed again
+  if (cleanInf) df[df == "Inf" | df == "-Inf"] <- NA
+
   return(df)
 }
diff --git a/R/validationHeatmap.R b/R/validationHeatmap.R
index 5234eb6..b22eab6 100644
--- a/R/validationHeatmap.R
+++ b/R/validationHeatmap.R
@@ -15,17 +15,16 @@ validationHeatmap <- function(df, var, cat, met, interactive = T) {
            category == cat,
            metric == met)
 
+  # warn if no data is found for combination of var, cat and met
   if (nrow(d) == 0) {
-
     df$cm <- paste(category, metric, sep = "-")
-
-    stop(paste0(
-    "No data found for variable in this category and metric.\n
-    variable ", var ," is available for the following category-metric
-    combinations: ", unique(df[df$variable == var, "cm"]
-                )
+    warning(
+      paste0(
+        "No data found for variable in this category and metric.\n
+        variable ", var ," is available for the following category-metric
+        combinations: ", unique(df[df$variable == var, "cm"])
+        )
       )
-    )
   }
 
   d$period <- as.character(d$period)
@@ -42,10 +41,12 @@ validationHeatmap <- function(df, var, cat, met, interactive = T) {
     facet_grid(scenario~.)
   # make it beautiful
   # from https://www.r-bloggers.com/2016/02/making-faceted-heatmaps-with-ggplot2
-  p <- p + labs(x=NULL, y=NULL, title=paste0(var, " [", d$unit[1], "]"))
-  p <- p + theme_tufte(base_family="Helvetica")  # creates warnings
-  p <- p + theme(axis.ticks=element_blank())
-  p <- p + theme(axis.text=element_text(size=7))
+  p <- p + labs(x = NULL, y = NULL, title = paste0(var,
+                                                   " [", d$unit[1], "] - ",
+                                                   cat, "/", met))
+  p <- p + theme_tufte(base_family = "Helvetica")  # creates warnings
+  p <- p + theme(axis.ticks = element_blank())
+  p <- p + theme(axis.text = element_text(size = 7))
   p <- p + coord_equal()
   p <- p + theme(legend.position = "none")
 
@@ -56,12 +57,13 @@ validationHeatmap <- function(df, var, cat, met, interactive = T) {
   if("World" %in% d$region) {
     p <- p + geom_vline(xintercept = 1.5, linewidth = 0.8, color = "white")
   }
-  fig <- ggplotly(p, tooltip="text")
+  fig <- ggplotly(p, tooltip = "text")
 
-  # improve plotly layout, kinda works but very manual,
-  # TODO: can this be extended to a general useful function?
+  # improve plotly layout, kinda works but very manual
+  # TODO: can this be extended to a general, useful function?
   #fig <- fig %>% subplot(heights = 0.3) %>%
   #   layout(title = list(y=0.64))
+
   if (interactive) {
     return(fig)
   } else {
diff --git a/inst/config/validationConfig.csv b/inst/config/validationConfig.csv
index 2ff00c1..ecc030c 100644
--- a/inst/config/validationConfig.csv
+++ b/inst/config/validationConfig.csv
@@ -11,13 +11,13 @@ scenario;;;Price|Carbon;;2030;;;300;600;absolute;yes;;;
 scenario;;;Price|Carbon;;2050;;;;1000;absolute;yes;;;
 scenario;;;Price|Carbon;DEU;2030;50;100;400;800;absolute;yes;;;
 scenario;;;Price|Carbon;DEU;2050;100;150;600;1200;absolute;yes;;;
-scenario;;;Price|Carbon;LAM, MEA, SSA, OAS, IND;2030, 2050;;;;200;absolute;yes;;;
+scenario;;;Price|Carbon;LAM, MEA, SSA, OAS, IND;2030, 2050;;;;200;absolute;no;;;
 scenario;;Bal;Emi|GHG;DEU, USA, JPN, REF, CHA, CAZ, EUR;2050;-2000;-1000;0;500;absolute;yes;;;
-scenario;;Bal;Emi|GHG;LAM, MEA, SSA, OAS, IND;2060;-2000;-1000;0;500;absolute;yes;;;
+scenario;;Bal;Emi|GHG;LAM, MEA, SSA, OAS, IND;2060;-2000;-1000;0;500;absolute;no;;;
 scenario;;Bal;Temperature|Global Mean;;2100;;;1.5;1.55;absolute;yes;;;
 scenario;;;Emi|CO2;;2030;-0.5;-0.3;;;relative;yes;2020;;
 scenario;;;Emi|CO2;;2030;-0.6;-0.4;;;relative;yes;2015;;
-scenario;;;FE;;2030, 2040;-0.4;-0.2;;;relative;yes;2020;;
+scenario;;;FE;;2030, 2040;-0.4;-0.2;;;relative;no;2020;;
 scenario;;Bal;Price|Secondary Energy|Electricity;;;5.56;;;;absolute;yes;;;
 scenario;;;New Cap|Electricity|Solar|PV;;;;;0.5;1;growthrate;yes;;;
 scenario;;Bal;Price|Carbon;;2030, 2040, 2050;1;;;;relative;yes;;;NPi
diff --git a/inst/markdown/REMINDvalidation.Rmd b/inst/markdown/REMINDvalidation.Rmd
index f883115..9e9088b 100644
--- a/inst/markdown/REMINDvalidation.Rmd
+++ b/inst/markdown/REMINDvalidation.Rmd
@@ -20,6 +20,7 @@ library(knitr)
 library(ggplot2)
 library(ggthemes)
 library(plotly)
+library(kableExtra)
 
 knitr::opts_chunk$set(
   echo = FALSE,
@@ -43,16 +44,32 @@ hmif <- paste0(path, "historical.mif")
 
 config <- "validationConfig.csv"
 
-
-data <- importScenarioData(mifs)
-hist <- importReferenceData(hmif)
-cfg <- getConfig(config) %>% cleanConfig()
+# debugging
+# data <- importScenarioData(mifs)
+# hist <- importReferenceData(hmif)
+# cfg <- getConfig(config) %>% cleanConfig()
 
 df <- validateScenarios(mifs, hmif, config)
 df <- appendTooltips(df)
+```
+## Validation
 
+### Summary
+```{r}
+# find "critical == yes" data points, that are red/yellow
+colors <- c(green  = "#008450",
+            yellow = "#EFB700",
+            red    = "#B81D13",
+            grey   = "#808080")
+dt <- dplyr::count(df, critical, check)
+kbl(dt) %>%
+  kable_styling(bootstrap_options = c("striped", "hover"), 
+                full_width = F) %>%
+  column_spec(2, color = colors[dt$check],
+              background = colors[dt$check])
 ```
-## Plots
+
+## Heatmaps by Category
 
 ### Historic - Relative
 Relative deviation to historical reference data. Absolute numbers, so no 
@@ -91,7 +108,11 @@ if (nrow(d) > 0) {
 
 ### Scenario - Relative
 Relative deviation to data point from either:
-- scenario
+
+- period (same scenario/model)
+- scenario (same period/model)
+- model (same period/scenario - Not supported yet)
+
 ```{r}
 c <- "scenario"
 m <- "relative"

From d23993c9074af3628753ac81cafcc4cc37ab9a3b Mon Sep 17 00:00:00 2001
From: Pascal Weigmann <pascal.weigmann@pik-potsdam.de>
Date: Fri, 1 Mar 2024 16:47:37 +0100
Subject: [PATCH 08/18] wip expand variables in cfg

---
 inst/config/validationConfig_COMMITED.csv |   5 +
 inst/markdown/COMMITEDvalidation.Rmd      | 201 ++++++++++++++++++++++
 2 files changed, 206 insertions(+)
 create mode 100644 inst/config/validationConfig_COMMITED.csv
 create mode 100644 inst/markdown/COMMITEDvalidation.Rmd

diff --git a/inst/config/validationConfig_COMMITED.csv b/inst/config/validationConfig_COMMITED.csv
new file mode 100644
index 0000000..0baabc8
--- /dev/null
+++ b/inst/config/validationConfig_COMMITED.csv
@@ -0,0 +1,5 @@
+category;metric;critical;model;scenario;variable;region;period;min_red;min_yel;max_yel;max_red;ref_period;ref_model;ref_scenario;source/link to discussion
+historic;relative;yes;;;PE|*;;;;;0.05;0.1;;;;
+historic;relative;yes;;;Emi|*;;;;;0.05;0.1;;;;
+historic;relative;yes;;;SE|*;;;;;0.05;0.1;;;;
+historic;relative;yes;;;FE;;;;;0.05;0.1;;;;
diff --git a/inst/markdown/COMMITEDvalidation.Rmd b/inst/markdown/COMMITEDvalidation.Rmd
new file mode 100644
index 0000000..14ccc13
--- /dev/null
+++ b/inst/markdown/COMMITEDvalidation.Rmd
@@ -0,0 +1,201 @@
+---
+title: "piamValidation: COMMITED"
+output:
+  html_document:
+    toc: true
+    toc_float: true
+    code_folding: hide
+params:
+  mif: ""
+  ref: ""
+  cfg: ""
+  warning: false
+  message: false
+  figWidth: 8
+---
+
+```{r include=FALSE}
+devtools::load_all(".")
+library(knitr)
+library(ggplot2)
+library(ggthemes)
+library(plotly)
+library(kableExtra)
+
+knitr::opts_chunk$set(
+  echo = FALSE,
+  error = TRUE,
+  #fig.width = params$figWidth,
+  message = params$message,
+  warning = params$warning
+)
+```
+
+## Import and Prepare Data
+
+
+```{r}
+# Data Preparation
+path <- "C:/Users/pascalwe/Data/vs/"
+mifs <- c(paste0(path, "REMIND_generic_Bal.mif"),
+          paste0(path, "REMIND_generic_NPi.mif"))
+
+hmif <- paste0(path, "historical.mif")
+
+config <- "validationConfig_COMMITED.csv"
+
+cfg <- getConfig(config)
+data <- importScenarioData(mifs)
+
+
+# expandConfig <- function(cfg, data, firstLevelOnly = TRUE)
+to_expand <- cfg[grepl("\\*", cfg$variable), ]
+all_vars <- unique(data$variable)
+firstLevelOnly <- TRUE
+
+cfg_new <- dplyr::anti_join(cfg, to_expand)
+for (i in 1:nrow(to_expand)) {
+  var <- gsub("\\|\\*", "\\\\\\|", to_expand$variable[i])
+  selected_vars <- all_vars[grepl(var, all_vars)]
+
+  #
+  # all variables that don't have a "|" in the substring following the given part
+  if (firstLevelOnly == TRUE) {
+    selected_vars <- selected_vars[!grepl("\\|", gsub(var, "", selected_vars))]
+    }
+  
+  c <- cfg[i,] %>%
+    slice(rep(1, each = length(selected_vars)))
+  c$variable <- selected_vars
+  cfg_new <- rbind(cfg_new, c)
+}
+
+cfg <- cfg_new
+
+cfg <- cleanConfig(cfg)
+
+
+# combine data for each row of the config and bind together
+df <- data.frame()
+for (i in 1:nrow(cfg)) {
+  # TODO: hist should only be needed if category "historical" is in config
+  #       validation generally should work without hist data
+  df_row <- combineData(data, hist, cfg[i, ])
+  df <- rbind(df, df_row)
+}
+
+df <- resolveDuplicates(df)
+
+df <- evaluateThresholds(df)
+
+df <- appendTooltips(df)
+
+```
+
+## Heatmaps by Category
+
+### Historic - Relative
+Relative deviation to historical reference data. Absolute numbers, so no 
+difference is made between being above or below the reference value.
+```{r}
+c <- "historic"
+m <- "relative"
+d <- df[df$category == c & df$metric == m, ]
+
+if (nrow(d) > 0) {
+  vars <- unique(d$variable)
+  plot_list <- htmltools::tagList()
+    for (i in 1:length(vars)) {
+      plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m)
+    }
+  plot_list
+}
+```
+
+### Historic - Difference
+Absolute difference to historical reference data
+```{r}
+c <- "historic"
+m <- "difference"
+d <- df[df$category == c & df$metric == m, ]
+
+if (nrow(d) > 0) {
+  vars <- unique(d$variable)
+  plot_list <- htmltools::tagList()
+    for (i in 1:length(vars)) {
+      plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m)
+    }
+  plot_list
+}
+```
+
+### Scenario - Relative
+Relative deviation to data point from either:
+
+- period (same scenario/model)
+- scenario (same period/model)
+- model (same period/scenario - Not supported yet)
+
+```{r}
+c <- "scenario"
+m <- "relative"
+d <- df[df$category == c & df$metric == m, ]
+
+if (nrow(d) > 0) {
+  vars <- unique(d$variable)
+  plot_list <- htmltools::tagList()
+    for (i in 1:length(vars)) {
+      plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m)
+    }
+  plot_list
+}
+```
+
+### Scenario - Difference
+```{r}
+c <- "scenario"
+m <- "difference"
+d <- df[df$category == c & df$metric == m, ]
+
+if (nrow(d) > 0) {
+  vars <- unique(d$variable)
+  plot_list <- htmltools::tagList()
+    for (i in 1:length(vars)) {
+      plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m)
+    }
+  plot_list
+}
+```
+
+### Scenario - Absolute
+```{r}
+c <- "scenario"
+m <- "absolute"
+d <- df[df$category == c & df$metric == m, ]
+
+if (nrow(d) > 0) {
+  vars <- unique(d$variable)
+  plot_list <- htmltools::tagList()
+    for (i in 1:length(vars)) {
+      plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m)
+    }
+  plot_list
+}
+```
+
+
+### Scenario - Growthrate
+```{r}
+c <- "scenario"
+m <- "growthrate"
+d <- df[df$category == c & df$metric == m, ]
+
+if (nrow(d) > 0) {
+  vars <- unique(d$variable)
+  plot_list <- htmltools::tagList()
+    for (i in 1:length(vars)) {
+      plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m)
+    }
+  plot_list
+}
+```

From 0e1c675ca2465508e5bba9e27e41c2a76ddd9988 Mon Sep 17 00:00:00 2001
From: Pascal Weigmann <pascal.weigmann@pik-potsdam.de>
Date: Wed, 6 Mar 2024 17:11:30 +0100
Subject: [PATCH 09/18] expand variables, periods wip

---
 R/combineData.R                           | 12 ++--
 R/importFunctions.R                       | 67 +++++++++++++++++++++--
 R/validateScenarios.R                     | 16 ++++--
 inst/config/validationConfig.csv          | 46 ++++++++--------
 inst/config/validationConfig_COMMITED.csv |  8 +--
 inst/markdown/COMMITEDvalidation.Rmd      | 40 +++-----------
 inst/markdown/REMINDvalidation.Rmd        |  8 +--
 7 files changed, 117 insertions(+), 80 deletions(-)

diff --git a/R/combineData.R b/R/combineData.R
index ed209e8..1b795aa 100644
--- a/R/combineData.R
+++ b/R/combineData.R
@@ -2,7 +2,7 @@
 
 # for one row of cfg: filter and merge relevant scenario data with cfg
 # results in one df that contains scenario data, reference data and thresholds
-combineData <- function(data, hist, cfg_row) {
+combineData <- function(data, cfg_row, ref_data = NULL) {
 
   # shorten as it will be used a lot
   c <- cfg_row
@@ -33,7 +33,7 @@ combineData <- function(data, hist, cfg_row) {
 
   # filter scenario data for row in cfg
   d <- data %>%
-    filter(variable == c$variable,
+    dplyr::filter(variable == c$variable,
            region %in% reg,
            period %in% per,
            scenario %in% sce)
@@ -52,7 +52,7 @@ combineData <- function(data, hist, cfg_row) {
   if (c$category == "historic") {
     # historic data for relevant variable and dimensions (all sources)
     h <- hist %>%
-      filter(variable == c$variable,
+      dplyr::filter(variable == c$variable,
              region %in% reg,
              period %in% per) %>%
       mutate(ref_value = value, ref_model = model) %>%
@@ -62,7 +62,7 @@ combineData <- function(data, hist, cfg_row) {
 
     # in case one or more sources are specified, filter for them
     if (!is.na(c$ref_model)) {
-      h <- filter(
+      h <- dplyr::filter(
         h, ref_model %in% strsplit(c$ref_model, split = ", |,")[[1]]
       )
     }
@@ -98,7 +98,7 @@ combineData <- function(data, hist, cfg_row) {
       # if a reference period should be used, same scenario, same model
       if (!is.na(c$ref_period)) {
         ref <- data %>%
-          filter(variable == c$variable,
+          dplyr::filter(variable == c$variable,
                  region %in% reg,
                  period == c$ref_period,
                  scenario %in% sce) %>%
@@ -110,7 +110,7 @@ combineData <- function(data, hist, cfg_row) {
         # if a reference scenario should be used, same period, same model
       } else if (!is.na(c$ref_scenario)) {
         ref <- data %>%
-          filter(variable == c$variable,
+          dplyr::filter(variable == c$variable,
                  region %in% reg,
                  period %in% per,
                  scenario == c$ref_scenario) %>%
diff --git a/R/importFunctions.R b/R/importFunctions.R
index a9e2b25..5b60369 100644
--- a/R/importFunctions.R
+++ b/R/importFunctions.R
@@ -29,12 +29,12 @@ importReferenceData <- function(referencePath) {
   } else {
     cat("importing reference data from file\n")
     hist <- quitte::as.quitte(referencePath)
-    hist <- filter(hist, period >= 1990)
+    hist <- dplyr::filter(hist, period >= 1990)
     saveRDS(hist, "hist.rds")
   }
 
   # remove all historical data before 1990, (only historic?)
-  hist <- filter(hist, period >= 1990)
+  hist <- dplyr::filter(hist, period >= 1990)
 
   return(hist)
 }
@@ -54,8 +54,6 @@ getConfig <- function(configName) {
 
 
 cleanConfig <- function(cfg) {
-  # TODO: insert functionality to expand a row of the config, that should be
-  #       used for multiple variables, via "Var|*"
   # fill empty threshold columns with Infinity for easier evaluation
   cfg <- cfg %>%
     mutate(min_red = as.numeric(ifelse(is.na(min_red), -Inf, min_red)),
@@ -67,3 +65,64 @@ cleanConfig <- function(cfg) {
   # insert tests here to check if there are forbidden fields in config
   return(cfg)
 }
+
+
+# replace period ranges using ":" with comma-separated list of years
+expandPeriods <- function(cfg, data) {
+  per_expand_idx <- grep("\\:", cfg$period)
+  all_per <- unique(data$period)
+
+  # iterate through rows with ":"
+  for (i in per_expand_idx) {
+    if (nchar(cfg[i, "period"]) != 9) {
+      stop("Invalid range detected. Make sure to enter years as YYYY:YYYY.\n")
+    } else {
+      # find scenario-years that are withing indicated range
+      start <- substr(cfg[i, "period"], 1, 4)
+      if (start < min(all_per)) {
+        warning("Selected period starts earlier than scenario data.\n")
+        }
+      stop <- substr(cfg[i, "period"], 6, 9)
+      if (stop > max(all_per)) {
+        warning("Selected period ends later than scenario data.\n")
+      }
+      selected_per <- all_per[all_per >= start & all_per <= stop]
+
+      # overwrite ":" notation with list
+      cfg[i, "period"] <- paste0(selected_per, collapse = ", ")
+      }
+  }
+  return(cfg)
+}
+
+
+# takes config entries specifying a set of variables via "*" and expands it so
+# that every variable corresponds to one row in cfg
+# TODO: support "firstLevelOnly = TRUE" to not grab all sub-variables but just
+#       one level
+expandVariables <- function(cfg, data) {
+  # create the expanded config, starting with the not-to-expand rows,
+  # then appending the rows with expanded variable names
+  var_expand <- cfg[grepl("\\*", cfg$variable), ]
+  cfg_new <- dplyr::anti_join(cfg, var_expand, by = colnames(cfg))
+
+  if (length(var_expand > 0)) {
+    all_vars <- unique(data$variable)
+    for (i in 1:nrow(var_expand)) {
+      # prepare strings for grepping by adding escape characters and "."
+      var <- gsub("\\*", "\\.\\*", var_expand$variable[i])
+      var <- gsub("\\|", "\\\\|", var)
+      selected_vars <- all_vars[grepl(var, all_vars)]
+      cat(paste0(var_expand$variable[i], " was expanded into ",
+                 length(selected_vars), " sub-variables.\n"))
+
+      # take the original row for the current set of variables and repeat it
+      # once for each sub-variable, overwrite with sub-variable names
+      c <- cfg[i,] %>%
+        dplyr::slice(rep(1, each = length(selected_vars)))
+      c$variable <- selected_vars
+      cfg_new <- rbind(cfg_new, c)
+    }
+  }
+  return(cfg_new)
+}
diff --git a/R/validateScenarios.R b/R/validateScenarios.R
index 311d8ee..7b59354 100644
--- a/R/validateScenarios.R
+++ b/R/validateScenarios.R
@@ -1,16 +1,20 @@
 #' @importFrom dplyr filter select mutate summarise group_by %>%
 
 # bringing it all together
+# keepNA
 #' @export
-validateScenarios <- function(scenarioPath, referencePath, configName) {
+validateScenarios <- function(scenarioPath, configName, referencePath = NULL,
+                              keepNA = TRUE) {
 
   data <- importScenarioData(scenarioPath)
 
   hist <- importReferenceData(referencePath)
 
-  cfg <- getConfig(configName)
-
-  cfg <- cleanConfig(cfg)
+  cfg <- configName %>%
+    getConfig() %>%
+    cleanConfig() %>%
+    expandPeriods(data) %>%
+    expandVariables(data)
 
 
   # combine data for each row of the config and bind together
@@ -18,8 +22,10 @@ validateScenarios <- function(scenarioPath, referencePath, configName) {
   for (i in 1:nrow(cfg)) {
     # TODO: hist should only be needed if category "historical" is in config
     #       validation generally should work without hist data
-    df_row <- combineData(data, hist, cfg[i, ])
+    # TODO: optimize performance
+    df_row <- combineData(data, cfg[i, ], ref_data = hist)
     df <- rbind(df, df_row)
+    cat(paste0("Combined row ", i, " of ", nrow(cfg), "\n"))
   }
 
   df <- resolveDuplicates(df)
diff --git a/inst/config/validationConfig.csv b/inst/config/validationConfig.csv
index ecc030c..f72a115 100644
--- a/inst/config/validationConfig.csv
+++ b/inst/config/validationConfig.csv
@@ -1,23 +1,23 @@
-category;model;scenario;variable;region;period;min_red;min_yel;max_yel;max_red;metric;critical;ref_period;ref_model;ref_scenario
-historic;;;Emi|CO2|Energy;World;;;;0.05;0.1;relative;yes;;EDGAR6;
-historic;;;Emi|CO2|Energy;DEU;;;;0.1;0.2;relative;yes;;EDGAR6;
-historic;;;PE|Coal;;;;;;0.1;relative;yes;;BP, IEA;
-historic;;;PE|Coal;World;;;;;0.2;relative;yes;;BP, IEA;
-historic;;;PE|Oil;;;;;;2;difference;yes;;;
-historic;;;PE|Oil;World;;;;;5;difference;yes;;;
-historic;;Bal;FE|Electricity;;;;;;0.1;relative;yes;;;
-historic;;Bal;FE|Electricity;World;;;;;0.2;relative;yes;;;
-scenario;;;Price|Carbon;;2030;;;300;600;absolute;yes;;;
-scenario;;;Price|Carbon;;2050;;;;1000;absolute;yes;;;
-scenario;;;Price|Carbon;DEU;2030;50;100;400;800;absolute;yes;;;
-scenario;;;Price|Carbon;DEU;2050;100;150;600;1200;absolute;yes;;;
-scenario;;;Price|Carbon;LAM, MEA, SSA, OAS, IND;2030, 2050;;;;200;absolute;no;;;
-scenario;;Bal;Emi|GHG;DEU, USA, JPN, REF, CHA, CAZ, EUR;2050;-2000;-1000;0;500;absolute;yes;;;
-scenario;;Bal;Emi|GHG;LAM, MEA, SSA, OAS, IND;2060;-2000;-1000;0;500;absolute;no;;;
-scenario;;Bal;Temperature|Global Mean;;2100;;;1.5;1.55;absolute;yes;;;
-scenario;;;Emi|CO2;;2030;-0.5;-0.3;;;relative;yes;2020;;
-scenario;;;Emi|CO2;;2030;-0.6;-0.4;;;relative;yes;2015;;
-scenario;;;FE;;2030, 2040;-0.4;-0.2;;;relative;no;2020;;
-scenario;;Bal;Price|Secondary Energy|Electricity;;;5.56;;;;absolute;yes;;;
-scenario;;;New Cap|Electricity|Solar|PV;;;;;0.5;1;growthrate;yes;;;
-scenario;;Bal;Price|Carbon;;2030, 2040, 2050;1;;;;relative;yes;;;NPi
+category;metric;critical;variable;model;scenario;region;period;min_red;min_yel;max_yel;max_red;ref_model;ref_scenario;ref_period;source/link to discussion
+historic;relative;yes;Emi|CO2|Energy;;;World;;;;0.05;0.1;EDGAR6;;;
+historic;relative;yes;Emi|CO2|Energy;;;DEU;;;;0.1;0.2;EDGAR6;;;
+historic;relative;yes;PE|Coal;;;;;;;;0.1;BP, IEA;;;
+historic;relative;yes;PE|Coal;;;World;;;;;0.2;BP, IEA;;;
+historic;difference;yes;PE|Oil;;;;;;;;2;;;;
+historic;difference;yes;PE|Oil;;;World;;;;;5;;;;
+historic;relative;yes;FE|Electricity;;Bal;;;;;;0.1;;;;
+historic;relative;yes;FE|Electricity;;Bal;World;;;;;0.2;;;;
+scenario;absolute;yes;Price|Carbon;;;;2030;;;300;600;;;;
+scenario;absolute;yes;Price|Carbon;;;;2050;;;;1000;;;;
+scenario;absolute;yes;Price|Carbon;;;DEU;2030;50;100;400;800;;;;
+scenario;absolute;yes;Price|Carbon;;;DEU;2050;100;150;600;1200;;;;
+scenario;absolute;no;Price|Carbon;;;LAM, MEA, SSA, OAS, IND;2030, 2050;;;;200;;;;
+scenario;absolute;yes;Emi|GHG;;Bal;DEU, USA, JPN, REF, CHA, CAZ, EUR;2050;-2000;-1000;0;500;;;;
+scenario;absolute;no;Emi|GHG;;Bal;LAM, MEA, SSA, OAS, IND;2060;-2000;-1000;0;500;;;;
+scenario;absolute;yes;Temperature|Global Mean;;Bal;;2100;;;1.5;1.55;;;;
+scenario;relative;yes;Emi|CO2;;;;2030;-0.5;-0.3;;;;;2020;
+scenario;relative;yes;Emi|CO2;;;;2030;-0.6;-0.4;;;;;2015;
+scenario;relative;no;FE;;;;2030, 2040;-0.4;-0.2;;;;;2020;
+scenario;absolute;yes;Price|Secondary Energy|Electricity;;Bal;;;5.56;;;;;;;
+scenario;growthrate;yes;New Cap|Electricity|Solar|PV;;;;;;;0.5;1;;;;
+scenario;relative;yes;Price|Carbon;;Bal;;2030, 2040, 2050;1;;;;;NPi;;
diff --git a/inst/config/validationConfig_COMMITED.csv b/inst/config/validationConfig_COMMITED.csv
index 0baabc8..46fcabe 100644
--- a/inst/config/validationConfig_COMMITED.csv
+++ b/inst/config/validationConfig_COMMITED.csv
@@ -1,5 +1,3 @@
-category;metric;critical;model;scenario;variable;region;period;min_red;min_yel;max_yel;max_red;ref_period;ref_model;ref_scenario;source/link to discussion
-historic;relative;yes;;;PE|*;;;;;0.05;0.1;;;;
-historic;relative;yes;;;Emi|*;;;;;0.05;0.1;;;;
-historic;relative;yes;;;SE|*;;;;;0.05;0.1;;;;
-historic;relative;yes;;;FE;;;;;0.05;0.1;;;;
+category;metric;critical;variable;model;scenario;region;period;min_red;min_yel;max_yel;max_red;ref_model;ref_scenario;ref_period;source/link to discussion
+historic;relative;yes;Emi|CO2|*;;;;2010:2200;;;0.05;0.1;;;;
+historic;relative;yes;FE;;;;2020:2050;;;0.05;0.1;;;;
diff --git a/inst/markdown/COMMITEDvalidation.Rmd b/inst/markdown/COMMITEDvalidation.Rmd
index 14ccc13..abc4c5a 100644
--- a/inst/markdown/COMMITEDvalidation.Rmd
+++ b/inst/markdown/COMMITEDvalidation.Rmd
@@ -42,54 +42,32 @@ mifs <- c(paste0(path, "REMIND_generic_Bal.mif"),
 
 hmif <- paste0(path, "historical.mif")
 
-config <- "validationConfig_COMMITED.csv"
+configName <- "validationConfig_COMMITED.csv"
 
-cfg <- getConfig(config)
-data <- importScenarioData(mifs)
+data <- importScenarioData(scenarioPath)
 
+hist <- importReferenceData(referencePath)
 
-# expandConfig <- function(cfg, data, firstLevelOnly = TRUE)
-to_expand <- cfg[grepl("\\*", cfg$variable), ]
-all_vars <- unique(data$variable)
-firstLevelOnly <- TRUE
-
-cfg_new <- dplyr::anti_join(cfg, to_expand)
-for (i in 1:nrow(to_expand)) {
-  var <- gsub("\\|\\*", "\\\\\\|", to_expand$variable[i])
-  selected_vars <- all_vars[grepl(var, all_vars)]
-
-  #
-  # all variables that don't have a "|" in the substring following the given part
-  if (firstLevelOnly == TRUE) {
-    selected_vars <- selected_vars[!grepl("\\|", gsub(var, "", selected_vars))]
-    }
-  
-  c <- cfg[i,] %>%
-    slice(rep(1, each = length(selected_vars)))
-  c$variable <- selected_vars
-  cfg_new <- rbind(cfg_new, c)
-}
-
-cfg <- cfg_new
-
-cfg <- cleanConfig(cfg)
+cfg <- getConfig(configName) %>%
+  cleanConfig()
 
+cfg <- expandPeriods(cfg, data) %>%
+  expandVariables(data)
 
 # combine data for each row of the config and bind together
 df <- data.frame()
 for (i in 1:nrow(cfg)) {
   # TODO: hist should only be needed if category "historical" is in config
   #       validation generally should work without hist data
-  df_row <- combineData(data, hist, cfg[i, ])
+  df_row <- combineData(data, cfg[i, ], ref_data = hist)
   df <- rbind(df, df_row)
+  cat(paste0("Combined row ", i, " of ", nrow(cfg), "\n"))
 }
 
 df <- resolveDuplicates(df)
 
 df <- evaluateThresholds(df)
 
-df <- appendTooltips(df)
-
 ```
 
 ## Heatmaps by Category
diff --git a/inst/markdown/REMINDvalidation.Rmd b/inst/markdown/REMINDvalidation.Rmd
index 9e9088b..f3c29b7 100644
--- a/inst/markdown/REMINDvalidation.Rmd
+++ b/inst/markdown/REMINDvalidation.Rmd
@@ -17,6 +17,7 @@ params:
 ```{r include=FALSE}
 devtools::load_all(".")
 library(knitr)
+library(dplyr)
 library(ggplot2)
 library(ggthemes)
 library(plotly)
@@ -44,12 +45,7 @@ hmif <- paste0(path, "historical.mif")
 
 config <- "validationConfig.csv"
 
-# debugging
-# data <- importScenarioData(mifs)
-# hist <- importReferenceData(hmif)
-# cfg <- getConfig(config) %>% cleanConfig()
-
-df <- validateScenarios(mifs, hmif, config)
+df <- validateScenarios(mifs, config, referencePath = hmif)
 df <- appendTooltips(df)
 ```
 ## Validation

From c45bb47274ded28cb0b7b11345e8d50fef40f4c5 Mon Sep 17 00:00:00 2001
From: Pascal Weigmann <pascal.weigmann@pik-potsdam.de>
Date: Thu, 7 Mar 2024 16:43:09 +0100
Subject: [PATCH 10/18] bugfix combineData()

---
 DESCRIPTION                                   |  1 -
 R/combineData.R                               | 57 ++++++++++---------
 ...TED.csv => validationConfig_COMMITTED.csv} |  4 +-
 inst/markdown/COMMITEDvalidation.Rmd          |  2 +
 inst/markdown/REMINDvalidation.Rmd            | 11 ++--
 5 files changed, 40 insertions(+), 35 deletions(-)
 rename inst/config/{validationConfig_COMMITED.csv => validationConfig_COMMITTED.csv} (57%)

diff --git a/DESCRIPTION b/DESCRIPTION
index e497cbf..22d83c5 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -12,7 +12,6 @@ Imports:
     dplyr (>= 1.1.1),
     ggplot2,
     ggthemes,
-    kableExtra,
     plotly,
     quitte (>= 0.3123.0),
     readxl,
diff --git a/R/combineData.R b/R/combineData.R
index 1b795aa..b6bdd88 100644
--- a/R/combineData.R
+++ b/R/combineData.R
@@ -11,7 +11,7 @@ combineData <- function(data, cfg_row, ref_data = NULL) {
   all_reg <- unique(data$region)
   all_per <- unique(data$period)  # not a factor, convert?
   all_per <- all_per[all_per <= 2100]
-  hist_per <- c(2005, 2010, 2015, 2020)
+  ref_per <- c(2005, 2010, 2015, 2020)
   all_sce <- unique(data$scenario)
 
   # create filters
@@ -24,7 +24,7 @@ combineData <- function(data, cfg_row, ref_data = NULL) {
 
   # empty "period" field means different years for historic or scenario category
   if (c$category == "historic") {
-    per <- if (is.na(c$period))  hist_per else
+    per <- if (is.na(c$period))  ref_per else
       strsplit(as.character(c$period), split = ", |,")[[1]]
   } else {
     per <- if (is.na(c$period))  all_per else
@@ -33,7 +33,7 @@ combineData <- function(data, cfg_row, ref_data = NULL) {
 
   # filter scenario data for row in cfg
   d <- data %>%
-    dplyr::filter(variable == c$variable,
+    filter(variable == c$variable,
            region %in% reg,
            period %in% per,
            scenario %in% sce)
@@ -51,34 +51,39 @@ combineData <- function(data, cfg_row, ref_data = NULL) {
   # depending on category: filter and attach reference values if they are needed
   if (c$category == "historic") {
     # historic data for relevant variable and dimensions (all sources)
-    h <- hist %>%
-      dplyr::filter(variable == c$variable,
+    h <- ref_data %>%
+      filter(variable %in% c$variable,
              region %in% reg,
              period %in% per) %>%
       mutate(ref_value = value, ref_model = model) %>%
       select(-c("scenario", "unit", "value", "model"))
 
-    # TODO: insert test here if ref_model exists and has data to compare to
+    # TODO: test here if ref_model exists and has data to compare to
+    if (nrow(h) == 0) {
+      cat(paste0("No reference data for variable ", c$variable, " found.\n"))
+      df <- data.frame()
+    } else {
 
-    # in case one or more sources are specified, filter for them
-    if (!is.na(c$ref_model)) {
-      h <- dplyr::filter(
-        h, ref_model %in% strsplit(c$ref_model, split = ", |,")[[1]]
-      )
-    }
+      # in case one or more sources are specified, filter for them
+      if (!is.na(c$ref_model)) {
+        h <- filter(
+          h, ref_model %in% strsplit(c$ref_model, split = ", |,")[[1]]
+        )
+      }
 
-    # in case of multiple sources, calculate mean (using all available sources)
-    if (length(unique(h$ref_model)) > 1) {
-      h_mean <- group_by(h, period, region) %>%
-        summarise(ref_value = mean(ref_value, na.rm = TRUE))
-      h <- mutate(h_mean, variable = c$variable, ref_model = "multiple")
-    }
+      # in case of multiple sources, calculate mean (using all available sources)
+      if (length(unique(h$ref_model)) > 1) {
+        h_mean <- group_by(h, period, region) %>%
+          summarise(ref_value = mean(ref_value, na.rm = TRUE))
+        h <- mutate(h_mean, variable = c$variable, ref_model = "multiple")
+      }
 
-    # merge with historical data adds columns ref_value and ref_model
-    df <- merge(d, h)
+      # merge with historical data adds columns ref_value and ref_model
+      df <- merge(d, h)
 
-    # add columns which are not used in this category, fill NA
-    df <- df %>% mutate(ref_period = NA, ref_scenario = NA)
+      # add columns which are not used in this category, fill NA
+      df <- df %>% mutate(ref_period = NA, ref_scenario = NA)
+    }
 
     # filter and attach reference values if they are needed; scenario data
   } else if (c$category == "scenario") {
@@ -98,19 +103,19 @@ combineData <- function(data, cfg_row, ref_data = NULL) {
       # if a reference period should be used, same scenario, same model
       if (!is.na(c$ref_period)) {
         ref <- data %>%
-          dplyr::filter(variable == c$variable,
+          filter(variable %in% c$variable,
                  region %in% reg,
-                 period == c$ref_period,
+                 period %in% c$ref_period,
                  scenario %in% sce) %>%
           mutate(ref_value = value, ref_period = period) %>%
           select(-c(period, value)) %>%
           # add columns which are not used in this category, fill NA
           mutate(ref_model = NA, ref_scenario = NA)
 
-        # if a reference scenario should be used, same period, same model
+      # if a reference scenario should be used, same period, same model
       } else if (!is.na(c$ref_scenario)) {
         ref <- data %>%
-          dplyr::filter(variable == c$variable,
+          filter(variable == c$variable,
                  region %in% reg,
                  period %in% per,
                  scenario == c$ref_scenario) %>%
diff --git a/inst/config/validationConfig_COMMITED.csv b/inst/config/validationConfig_COMMITTED.csv
similarity index 57%
rename from inst/config/validationConfig_COMMITED.csv
rename to inst/config/validationConfig_COMMITTED.csv
index 46fcabe..c364404 100644
--- a/inst/config/validationConfig_COMMITED.csv
+++ b/inst/config/validationConfig_COMMITTED.csv
@@ -1,3 +1,3 @@
 category;metric;critical;variable;model;scenario;region;period;min_red;min_yel;max_yel;max_red;ref_model;ref_scenario;ref_period;source/link to discussion
-historic;relative;yes;Emi|CO2|*;;;;2010:2200;;;0.05;0.1;;;;
-historic;relative;yes;FE;;;;2020:2050;;;0.05;0.1;;;;
+historic;relative;yes;Emi|CO2|*;;;;2020;;;0.1;0.2;;;;
+historic;relative;yes;FE|*;;;;2020;;;0.1;0.2;;;;
diff --git a/inst/markdown/COMMITEDvalidation.Rmd b/inst/markdown/COMMITEDvalidation.Rmd
index abc4c5a..1faa928 100644
--- a/inst/markdown/COMMITEDvalidation.Rmd
+++ b/inst/markdown/COMMITEDvalidation.Rmd
@@ -68,6 +68,8 @@ df <- resolveDuplicates(df)
 
 df <- evaluateThresholds(df)
 
+df <- appendTooltips(df)
+
 ```
 
 ## Heatmaps by Category
diff --git a/inst/markdown/REMINDvalidation.Rmd b/inst/markdown/REMINDvalidation.Rmd
index f3c29b7..7c03833 100644
--- a/inst/markdown/REMINDvalidation.Rmd
+++ b/inst/markdown/REMINDvalidation.Rmd
@@ -21,7 +21,6 @@ library(dplyr)
 library(ggplot2)
 library(ggthemes)
 library(plotly)
-library(kableExtra)
 
 knitr::opts_chunk$set(
   echo = FALSE,
@@ -58,11 +57,11 @@ colors <- c(green  = "#008450",
             red    = "#B81D13",
             grey   = "#808080")
 dt <- dplyr::count(df, critical, check)
-kbl(dt) %>%
-  kable_styling(bootstrap_options = c("striped", "hover"), 
-                full_width = F) %>%
-  column_spec(2, color = colors[dt$check],
-              background = colors[dt$check])
+# kbl(dt) %>%
+#   kable_styling(bootstrap_options = c("striped", "hover"),
+#                 full_width = F) %>%
+#   column_spec(2, color = colors[dt$check],
+#               background = colors[dt$check])
 ```
 
 ## Heatmaps by Category

From 8c0c68df623ebcafad9598b4e59389e3a3be5166 Mon Sep 17 00:00:00 2001
From: Pascal Weigmann <pascal.weigmann@pik-potsdam.de>
Date: Fri, 8 Mar 2024 13:48:57 +0100
Subject: [PATCH 11/18] support model comparison

---
 R/appendTooltips.R                            |   7 +-
 R/combineData.R                               |  99 ++++++----
 R/evaluateThresholds.R                        |  16 +-
 R/importFunctions.R                           |   3 +
 R/resolveDuplicates.R                         |  24 ++-
 R/validationHeatmap.R                         |   6 +-
 inst/config/validationConfig_NGFS.csv         |   8 +
 ...validation.Rmd => COMMITTEDvalidation.Rmd} |   1 -
 inst/markdown/NGFSvalidation.Rmd              | 175 ++++++++++++++++++
 9 files changed, 284 insertions(+), 55 deletions(-)
 create mode 100644 inst/config/validationConfig_NGFS.csv
 rename inst/markdown/{COMMITEDvalidation.Rmd => COMMITTEDvalidation.Rmd} (99%)
 create mode 100644 inst/markdown/NGFSvalidation.Rmd

diff --git a/R/appendTooltips.R b/R/appendTooltips.R
index e9d3649..974d4f9 100644
--- a/R/appendTooltips.R
+++ b/R/appendTooltips.R
@@ -35,13 +35,16 @@ appendTooltips <- function(df) {
     df[df$category == "scenario" & df$metric == "relative", ] %>%
     mutate(text = paste0(region, "\n",
                          period, "\n",
+                         ifelse(!is.na(ref_model), paste("Model:", model, "\n")),
                          "Value: ", round(value, 2), "\n",
                          "Ref ", ifelse(!is.na(ref_period),
                                                paste("Period:", ref_period),
-                                               paste("Scenario:", ref_scenario)
+                                        ifelse(!is.na(ref_scenario),
+                                               paste("Scenario:", ref_scenario),
+                                               paste("Model:", ref_model))
                                                ), "\n",
                          "Ref Value: ", round(ref_value, 2), "\n",
-                         "Rel Deviation: ", round(check_value)*100, "% \n",
+                         "Rel Deviation: ", round(check_value, 2)*100, "% \n",
                          "Thresholds (yel/red): \n",
                          "Min: ", min_yel*100, "% / ", min_red*100,"% \n",
                          "Max: ", max_yel*100, "% / ", max_red*100, "%")
diff --git a/R/combineData.R b/R/combineData.R
index b6bdd88..2b5c8d0 100644
--- a/R/combineData.R
+++ b/R/combineData.R
@@ -8,19 +8,23 @@ combineData <- function(data, cfg_row, ref_data = NULL) {
   c <- cfg_row
 
   # full dimensions and important slices
+  all_mod <- unique(data$model)
+  all_sce <- unique(data$scenario)
   all_reg <- unique(data$region)
   all_per <- unique(data$period)  # not a factor, convert?
   all_per <- all_per[all_per <= 2100]
   ref_per <- c(2005, 2010, 2015, 2020)
-  all_sce <- unique(data$scenario)
 
-  # create filters
-  # check whether regions, periods, scenarios are specified
-  # TODO: add model
-  reg <- if (is.na(c$region))   all_reg else
-    strsplit(c$region, split = ", |,")[[1]]
+
+  # create filters ####
+  # check whether regions, periods, scenarios are specified, else use all
+  mod <- if (is.na(c$model)) all_mod else
+    strsplit(c$model, split = ", |, ")[[1]]
   sce <- if (is.na(c$scenario)) all_sce else
     strsplit(c$scenario, split = ", |, ")[[1]]
+  reg <- if (is.na(c$region))   all_reg else
+    strsplit(c$region, split = ", |,")[[1]]
+
 
   # empty "period" field means different years for historic or scenario category
   if (c$category == "historic") {
@@ -31,23 +35,26 @@ combineData <- function(data, cfg_row, ref_data = NULL) {
       strsplit(as.character(c$period), split = ", |,")[[1]]
   }
 
-  # filter scenario data for row in cfg
+  # apply filters ####
+  # filter scenario data according to each row in cfg
   d <- data %>%
-    filter(variable == c$variable,
-           region %in% reg,
-           period %in% per,
-           scenario %in% sce)
+    filter(variable %in% c$variable,  # alternative approach for multiple variables could be used here
+           model    %in% mod,
+           scenario %in% sce,
+           region   %in% reg,
+           period   %in% per)
 
-  # attach respective cfg information to data slice
+  # attach cfg information to data slice, which is independent of category
   d <- d %>%
-    mutate(min_red = c$min_red,  # could be set to NA as it is not used
-           min_yel = c$min_yel,  # could be set to NA as it is not used
-           max_yel = c$max_yel,
-           max_red = c$max_red,
+    mutate(min_red  = c$min_red,
+           min_yel  = c$min_yel,
+           max_yel  = c$max_yel,
+           max_red  = c$max_red,
            category = c$category,
            metric   = c$metric,
            critical = c$critical)
 
+  # historic ####
   # depending on category: filter and attach reference values if they are needed
   if (c$category == "historic") {
     # historic data for relevant variable and dimensions (all sources)
@@ -58,7 +65,7 @@ combineData <- function(data, cfg_row, ref_data = NULL) {
       mutate(ref_value = value, ref_model = model) %>%
       select(-c("scenario", "unit", "value", "model"))
 
-    # TODO: test here if ref_model exists and has data to compare to
+    # test whether ref_model exists and has data to compare to
     if (nrow(h) == 0) {
       cat(paste0("No reference data for variable ", c$variable, " found.\n"))
       df <- data.frame()
@@ -85,44 +92,64 @@ combineData <- function(data, cfg_row, ref_data = NULL) {
       df <- df %>% mutate(ref_period = NA, ref_scenario = NA)
     }
 
-    # filter and attach reference values if they are needed; scenario data
+  # scenario ####
+  # filter and attach reference values if they are needed; scenario data
   } else if (c$category == "scenario") {
 
     # no reference values needed for these metrics, fill NA
     if (c$metric %in% c("absolute", "growthrate")) {
       df <- d %>%
-        mutate(ref_value = NA,
-               ref_model = NA,
-               ref_period = NA,
-               ref_scenario = NA)
+        mutate(ref_value    = NA,
+               ref_model    = NA,
+               ref_scenario = NA,
+               ref_period   = NA)
+
 
-      # get reference values for these metrics
+    # get reference values for these metrics
+    # TODO: support choosing ref_period AND model/scenario?
     } else if (c$metric %in% c("relative", "difference")) {
-      # TODO: support choosing ref_period AND model/scenario?
 
-      # if a reference period should be used, same scenario, same model
-      if (!is.na(c$ref_period)) {
+      # if a reference model should be used, same scenario, same period
+      if (!is.na(c$model)) {
         ref <- data %>%
           filter(variable %in% c$variable,
-                 region %in% reg,
-                 period %in% c$ref_period,
-                 scenario %in% sce) %>%
-          mutate(ref_value = value, ref_period = period) %>%
-          select(-c(period, value)) %>%
+                 model    %in% c$ref_model, # expects exactly one model for now
+                 scenario %in% sce,
+                 region   %in% reg,
+                 period   %in% per) %>%
+
+          mutate(ref_value = value, ref_model = model) %>%
+          select(-c(model, value)) %>%
           # add columns which are not used in this category, fill NA
-          mutate(ref_model = NA, ref_scenario = NA)
+          mutate(ref_scenario = NA, ref_period = NA)
 
       # if a reference scenario should be used, same period, same model
       } else if (!is.na(c$ref_scenario)) {
         ref <- data %>%
-          filter(variable == c$variable,
-                 region %in% reg,
-                 period %in% per,
-                 scenario == c$ref_scenario) %>%
+          filter(variable %in% c$variable,
+                 model    %in% mod,
+                 scenario %in% c$ref_scenario,  # expects exactly one scenario
+                 region   %in% reg,
+                 period   %in% per) %>%
+
           mutate(ref_value = value, ref_scenario = scenario) %>%
           select(-c(scenario, value)) %>%
           # add columns which are not used in this category, fill NA
           mutate(ref_model = NA, ref_period = NA)
+
+      # if a reference period should be used, same scenario, same model
+      } else if (!is.na(c$ref_period)) {
+        ref <- data %>%
+          filter(variable %in% c$variable,
+                 model    %in% mod,
+                 scenario %in% sce,
+                 region   %in% reg,
+                 period   %in% c$ref_period) %>% # expects exactly one period
+
+          mutate(ref_value = value, ref_period = period) %>%
+          select(-c(period, value)) %>%
+          # add columns which are not used in this category, fill NA
+          mutate(ref_model = NA, ref_scenario = NA)
       }
 
       df <- merge(d, ref)
diff --git a/R/evaluateThresholds.R b/R/evaluateThresholds.R
index 3cd1bfd..131b816 100644
--- a/R/evaluateThresholds.R
+++ b/R/evaluateThresholds.R
@@ -9,7 +9,7 @@ evaluateThresholds <- function(df, cleanInf = TRUE) {
   # performance improvements)
 
   # calculate values that will be compared to thresholds
-  # historic - relative
+  # historic - relative ####
   his_rel <- df[df$category == "historic" & df$metric == "relative", ] %>%
     mutate(check_value = ifelse(
       is.na(ref_value),
@@ -19,12 +19,12 @@ evaluateThresholds <- function(df, cleanInf = TRUE) {
       )
     )
 
-  # historic - difference
+  # historic - difference ####
   his_dif <- df[df$category == "historic" & df$metric == "difference", ] %>%
     # absolute difference to reference
     mutate(check_value = abs(value - ref_value))
 
-  # scenario - relative
+  # scenario - relative ####
   sce_rel <- df[df$category == "scenario" & df$metric == "relative", ] %>%
     mutate(check_value = ifelse(
       is.na(ref_value),
@@ -34,7 +34,7 @@ evaluateThresholds <- function(df, cleanInf = TRUE) {
       )
     )
 
-  # scenario - difference
+  # scenario - difference ####
   sce_dif <- df[df$category == "scenario" & df$metric == "difference", ] %>%
     mutate(check_value = ifelse(
       is.na(ref_value),
@@ -44,11 +44,11 @@ evaluateThresholds <- function(df, cleanInf = TRUE) {
       )
     )
 
-  # scenario - absolute
+  # scenario - absolute ####
   sce_abs <- df[df$category == "scenario" & df$metric == "absolute", ] %>%
     mutate(check_value = value)
 
-  # scenario - growthrate
+  # scenario - growthrate ####
   # calculate average growth rate of the last 5 years between 2010 and 2060
   # TODO: in case of a need to look further than 2060, split df and add
   # calculations for 10-year step periods
@@ -73,7 +73,9 @@ evaluateThresholds <- function(df, cleanInf = TRUE) {
   df <- do.call("rbind",
                 list(his_rel, his_dif, sce_rel, sce_dif, sce_abs, sce_gro))
 
+  # evaluation ####
   # perform comparison to thresholds for whole data.frame at once
+  # TODO: not as robust as previously thought. Partially fails if only max_red is given
   df <- df %>%
     mutate(check = ifelse(is.na(check_value),
                         "grey",
@@ -92,7 +94,7 @@ evaluateThresholds <- function(df, cleanInf = TRUE) {
                         )
            )
 
-  # after evaluation, "Inf" can be removed again
+  # after evaluation, "Inf" can be removed
   if (cleanInf) df[df == "Inf" | df == "-Inf"] <- NA
 
   return(df)
diff --git a/R/importFunctions.R b/R/importFunctions.R
index 5b60369..bb91826 100644
--- a/R/importFunctions.R
+++ b/R/importFunctions.R
@@ -18,6 +18,9 @@ importScenarioData <- function(scenarioPath) {
   new_order <- c(tail(levels(data$region), 1), head(levels(data$region), -1))
   data$region <- factor(data$region, levels = new_order)
 
+  # remove rows without values
+  data <- data[!is.na(data$value),]
+
   return(data)
 }
 
diff --git a/R/resolveDuplicates.R b/R/resolveDuplicates.R
index 0d46c63..e82202e 100644
--- a/R/resolveDuplicates.R
+++ b/R/resolveDuplicates.R
@@ -7,18 +7,28 @@ resolveDuplicates <- function(df) {
 
   # check for duplicates
   # these are the duplicates that are from the lower rows and should be kept
-  duplicates <- df[duplicated(df[c("model", "scenario", "variable",
-                                       "region", "period", "category", "metric")]), ]
-
-  # here all instances of the data that was duplicated is removed
-  no_dupl <- dplyr::anti_join(df, duplicates, by = c("model", "scenario", "variable",
-                                                         "region", "period", "category", "metric"))
+  duplicates <- df[duplicated(df[c("model",
+                                   "scenario",
+                                   "variable",
+                                   "region",
+                                   "period",
+                                   "category",
+                                   "metric")]), ]
+
+  # here, all instances of the data that was duplicated are removed
+  no_dupl <- dplyr::anti_join(df, duplicates, by = c("model",
+                                                     "scenario",
+                                                     "variable",
+                                                     "region",
+                                                     "period",
+                                                     "category",
+                                                     "metric"))
 
   # reattach one instance to get a complete data set without duplicates
   df <- rbind(no_dupl, duplicates)
 
   # different approach: allows partial overwriting
-  # chosen to not use this anymore, less robust
+  # chosen to not use this anymore, less robust and unclear if more intuitive
 
   # these are missing the duplicated entries that were in the lower rows
   # no_dupl <- dplyr::anti_join(df, duplicates_all)
diff --git a/R/validationHeatmap.R b/R/validationHeatmap.R
index b22eab6..58d854e 100644
--- a/R/validationHeatmap.R
+++ b/R/validationHeatmap.R
@@ -5,7 +5,7 @@
 #' @export
 
 # takes the output of "validateScenarios()" and plots heatmaps per variable
-validationHeatmap <- function(df, var, cat, met, interactive = T) {
+validationHeatmap <- function(df, var, cat, met, interactive = T, compareModels = T) {
 
   # possible extension: when giving multiple vars, plot as facets in same row
 
@@ -38,7 +38,8 @@ validationHeatmap <- function(df, var, cat, met, interactive = T) {
   p <- ggplot(d, aes(x = region, y = period, fill=check, text=text)) +
     geom_tile(color="white", linewidth=0.0) +
     scale_fill_manual(values = colors, breaks = colors) +
-    facet_grid(scenario~.)
+    facet_grid(scenario~model)
+
   # make it beautiful
   # from https://www.r-bloggers.com/2016/02/making-faceted-heatmaps-with-ggplot2
   p <- p + labs(x = NULL, y = NULL, title = paste0(var,
@@ -47,6 +48,7 @@ validationHeatmap <- function(df, var, cat, met, interactive = T) {
   p <- p + theme_tufte(base_family = "Helvetica")  # creates warnings
   p <- p + theme(axis.ticks = element_blank())
   p <- p + theme(axis.text = element_text(size = 7))
+  p <- p + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
   p <- p + coord_equal()
   p <- p + theme(legend.position = "none")
 
diff --git a/inst/config/validationConfig_NGFS.csv b/inst/config/validationConfig_NGFS.csv
new file mode 100644
index 0000000..c346768
--- /dev/null
+++ b/inst/config/validationConfig_NGFS.csv
@@ -0,0 +1,8 @@
+category;metric;critical;variable;model;scenario;region;period;min_red;min_yel;max_yel;max_red;ref_model;ref_scenario;ref_period;source/link to discussion
+scenario;relative;yes;Emissions|CO2;REMIND-MAgPIE 3.3-4.7;;;2020;;;0.5;1;GCAM 6.0 NGFS;;;
+scenario;relative;yes;Emissions|CO2;REMIND-MAgPIE 3.3-4.7;;World;2020;;;0.3;0.5;GCAM 6.0 NGFS;;;
+scenario;relative;yes;Final Energy;GCAM 6.0 NGFS;;;2030:2050;;;0.5;1;REMIND-MAgPIE 3.3-4.7;;;
+scenario;absolute;yes;Emissions|CO2;;o_1p5c;;2050;;;0;100;;;;
+scenario;absolute;yes;Emissions|CO2;;o_2c;;2050;;;1000;2000;;;;
+scenario;absolute;yes;AR6 climate diagnostics|Surface Temperature (GSAT)|MAGICCv7.5.3|50.0th Percentile;;o_1p5c;;2100;;;1.5;1.55;;;;
+scenario;absolute;yes;AR6 climate diagnostics|Surface Temperature (GSAT)|MAGICCv7.5.3|50.0th Percentile;;o_2c;;2100;;;2;2.1;;;;
diff --git a/inst/markdown/COMMITEDvalidation.Rmd b/inst/markdown/COMMITTEDvalidation.Rmd
similarity index 99%
rename from inst/markdown/COMMITEDvalidation.Rmd
rename to inst/markdown/COMMITTEDvalidation.Rmd
index 1faa928..cba8706 100644
--- a/inst/markdown/COMMITEDvalidation.Rmd
+++ b/inst/markdown/COMMITTEDvalidation.Rmd
@@ -20,7 +20,6 @@ library(knitr)
 library(ggplot2)
 library(ggthemes)
 library(plotly)
-library(kableExtra)
 
 knitr::opts_chunk$set(
   echo = FALSE,
diff --git a/inst/markdown/NGFSvalidation.Rmd b/inst/markdown/NGFSvalidation.Rmd
new file mode 100644
index 0000000..630e441
--- /dev/null
+++ b/inst/markdown/NGFSvalidation.Rmd
@@ -0,0 +1,175 @@
+---
+title: "piamValidation: NGFS"
+output:
+  html_document:
+    toc: true
+    toc_float: true
+    code_folding: hide
+params:
+  mif: ""
+  ref: ""
+  cfg: ""
+  warning: false
+  message: false
+  figWidth: 8
+---
+
+```{r include=FALSE}
+devtools::load_all(".")
+library(knitr)
+library(ggplot2)
+library(ggthemes)
+library(plotly)
+
+knitr::opts_chunk$set(
+  echo = FALSE,
+  error = TRUE,
+  #fig.width = params$figWidth,
+  message = params$message,
+  warning = params$warning
+)
+```
+
+## Import and Prepare Data
+
+
+```{r}
+# Data Preparation
+path <- "C:/Users/pascalwe/Data/NGFS/phase5/"
+snapshot <- "1709886115166-snapshot-24/snapshot_all_regions.csv"
+configName <- "validationConfig_NGFS.csv"
+
+scenarioPath <- paste0(path, snapshot)
+data <- importScenarioData(scenarioPath)
+
+cfg <- getConfig(configName) %>%
+  cleanConfig()
+
+cfg <- expandPeriods(cfg, data) %>%
+  expandVariables(data)
+
+# combine data for each row of the config and bind together
+df <- data.frame()
+for (i in 1:nrow(cfg)) {
+  # TODO: hist should only be needed if category "historical" is in config
+  #       validation generally should work without hist data
+  df_row <- combineData(data, cfg[i, ])
+  df <- rbind(df, df_row)
+  cat(paste0("Combined row ", i, " of ", nrow(cfg), "\n"))
+}
+
+df <- resolveDuplicates(df)
+
+df <- evaluateThresholds(df)
+
+df <- appendTooltips(df)
+
+```
+
+## Heatmaps by Category
+
+### Historic - Relative
+Relative deviation to historical reference data. Absolute numbers, so no 
+difference is made between being above or below the reference value.
+```{r}
+c <- "historic"
+m <- "relative"
+d <- df[df$category == c & df$metric == m, ]
+
+if (nrow(d) > 0) {
+  vars <- unique(d$variable)
+  plot_list <- htmltools::tagList()
+    for (i in 1:length(vars)) {
+      plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m)
+    }
+  plot_list
+}
+```
+
+### Historic - Difference
+Absolute difference to historical reference data
+```{r}
+c <- "historic"
+m <- "difference"
+d <- df[df$category == c & df$metric == m, ]
+
+if (nrow(d) > 0) {
+  vars <- unique(d$variable)
+  plot_list <- htmltools::tagList()
+    for (i in 1:length(vars)) {
+      plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m)
+    }
+  plot_list
+}
+```
+
+### Scenario - Relative
+Relative deviation to data point from either:
+
+- period (same scenario/model)
+- scenario (same period/model)
+- model (same period/scenario - Not supported yet)
+
+```{r}
+c <- "scenario"
+m <- "relative"
+d <- df[df$category == c & df$metric == m, ]
+
+if (nrow(d) > 0) {
+  vars <- unique(d$variable)
+  plot_list <- htmltools::tagList()
+    for (i in 1:length(vars)) {
+      plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m)
+    }
+  plot_list
+}
+```
+
+### Scenario - Difference
+```{r}
+c <- "scenario"
+m <- "difference"
+d <- df[df$category == c & df$metric == m, ]
+
+if (nrow(d) > 0) {
+  vars <- unique(d$variable)
+  plot_list <- htmltools::tagList()
+    for (i in 1:length(vars)) {
+      plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m)
+    }
+  plot_list
+}
+```
+
+### Scenario - Absolute
+```{r}
+c <- "scenario"
+m <- "absolute"
+d <- df[df$category == c & df$metric == m, ]
+
+if (nrow(d) > 0) {
+  vars <- unique(d$variable)
+  plot_list <- htmltools::tagList()
+    for (i in 1:length(vars)) {
+      plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m)
+    }
+  plot_list
+}
+```
+
+
+### Scenario - Growthrate
+```{r}
+c <- "scenario"
+m <- "growthrate"
+d <- df[df$category == c & df$metric == m, ]
+
+if (nrow(d) > 0) {
+  vars <- unique(d$variable)
+  plot_list <- htmltools::tagList()
+    for (i in 1:length(vars)) {
+      plot_list[[i]] <- validationHeatmap(d, vars[i], cat = c, met = m)
+    }
+  plot_list
+}
+```

From c1be9d19c2dea05b64f6215f755c375dd8f2122e Mon Sep 17 00:00:00 2001
From: Pascal Weigmann <pascal.weigmann@pik-potsdam.de>
Date: Fri, 8 Mar 2024 18:01:10 +0100
Subject: [PATCH 12/18] exclude green checks, stress testing

---
 R/validationHeatmap.R                 |  2 +-
 inst/config/validationConfig_NGFS.csv | 17 ++++++++++-------
 inst/markdown/COMMITTEDvalidation.Rmd |  1 +
 inst/markdown/NGFSvalidation.Rmd      |  9 +++++++++
 inst/markdown/REMINDvalidation.Rmd    |  1 +
 5 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/R/validationHeatmap.R b/R/validationHeatmap.R
index 58d854e..1832909 100644
--- a/R/validationHeatmap.R
+++ b/R/validationHeatmap.R
@@ -38,7 +38,7 @@ validationHeatmap <- function(df, var, cat, met, interactive = T, compareModels
   p <- ggplot(d, aes(x = region, y = period, fill=check, text=text)) +
     geom_tile(color="white", linewidth=0.0) +
     scale_fill_manual(values = colors, breaks = colors) +
-    facet_grid(scenario~model)
+    facet_grid(model~scenario)
 
   # make it beautiful
   # from https://www.r-bloggers.com/2016/02/making-faceted-heatmaps-with-ggplot2
diff --git a/inst/config/validationConfig_NGFS.csv b/inst/config/validationConfig_NGFS.csv
index c346768..59cd23b 100644
--- a/inst/config/validationConfig_NGFS.csv
+++ b/inst/config/validationConfig_NGFS.csv
@@ -1,8 +1,11 @@
 category;metric;critical;variable;model;scenario;region;period;min_red;min_yel;max_yel;max_red;ref_model;ref_scenario;ref_period;source/link to discussion
-scenario;relative;yes;Emissions|CO2;REMIND-MAgPIE 3.3-4.7;;;2020;;;0.5;1;GCAM 6.0 NGFS;;;
-scenario;relative;yes;Emissions|CO2;REMIND-MAgPIE 3.3-4.7;;World;2020;;;0.3;0.5;GCAM 6.0 NGFS;;;
-scenario;relative;yes;Final Energy;GCAM 6.0 NGFS;;;2030:2050;;;0.5;1;REMIND-MAgPIE 3.3-4.7;;;
-scenario;absolute;yes;Emissions|CO2;;o_1p5c;;2050;;;0;100;;;;
-scenario;absolute;yes;Emissions|CO2;;o_2c;;2050;;;1000;2000;;;;
-scenario;absolute;yes;AR6 climate diagnostics|Surface Temperature (GSAT)|MAGICCv7.5.3|50.0th Percentile;;o_1p5c;;2100;;;1.5;1.55;;;;
-scenario;absolute;yes;AR6 climate diagnostics|Surface Temperature (GSAT)|MAGICCv7.5.3|50.0th Percentile;;o_2c;;2100;;;2;2.1;;;;
+scenario;relative;yes;Emissions|*;REMIND-MAgPIE 3.3-4.7;;;;;;50;100;GCAM 6.0 NGFS;;;
+scenario;relative;yes;Emissions|*;REMIND-MAgPIE 3.3-4.7;;World;;;;30;50;GCAM 6.0 NGFS;;;
+scenario;relative;yes;Final Energy|*;REMIND-MAgPIE 3.3-4.7;;;;;;50;100;GCAM 6.0 NGFS;;;
+scenario;relative;yes;Final Energy|*;REMIND-MAgPIE 3.3-4.7;;World;;;;30;50;GCAM 6.0 NGFS;;;
+scenario;relative;yes;Secondary Energy|*;REMIND-MAgPIE 3.3-4.7;;;;;;50;100;GCAM 6.0 NGFS;;;
+scenario;relative;yes;Secondary Energy|*;REMIND-MAgPIE 3.3-4.7;;World;;;;30;50;GCAM 6.0 NGFS;;;
+scenario;relative;yes;Primary Energy|*;REMIND-MAgPIE 3.3-4.7;;;;;;50;100;GCAM 6.0 NGFS;;;
+scenario;relative;yes;Primary Energy|*;REMIND-MAgPIE 3.3-4.7;;World;;;;30;50;GCAM 6.0 NGFS;;;
+scenario;relative;yes;Carbon Sequestration|*;REMIND-MAgPIE 3.3-4.7;;;;;;50;100;GCAM 6.0 NGFS;;;
+scenario;relative;yes;Carbon Sequestration|*;REMIND-MAgPIE 3.3-4.7;;World;;;;30;50;GCAM 6.0 NGFS;;;
diff --git a/inst/markdown/COMMITTEDvalidation.Rmd b/inst/markdown/COMMITTEDvalidation.Rmd
index cba8706..04802ee 100644
--- a/inst/markdown/COMMITTEDvalidation.Rmd
+++ b/inst/markdown/COMMITTEDvalidation.Rmd
@@ -1,5 +1,6 @@
 ---
 title: "piamValidation: COMMITED"
+date: "`r format(Sys.Date())`"
 output:
   html_document:
     toc: true
diff --git a/inst/markdown/NGFSvalidation.Rmd b/inst/markdown/NGFSvalidation.Rmd
index 630e441..98bc2d8 100644
--- a/inst/markdown/NGFSvalidation.Rmd
+++ b/inst/markdown/NGFSvalidation.Rmd
@@ -1,5 +1,6 @@
 ---
 title: "piamValidation: NGFS"
+date: "`r format(Sys.Date())`"
 output:
   html_document:
     toc: true
@@ -28,6 +29,10 @@ knitr::opts_chunk$set(
   message = params$message,
   warning = params$warning
 )
+
+rmarkdown::render('my_markdown_report.Rmd',
+                   output_file = paste('report.', Sys.Date(), 
+                                       '.pdf', sep=''))
 ```
 
 ## Import and Prepare Data
@@ -64,6 +69,10 @@ df <- evaluateThresholds(df)
 
 df <- appendTooltips(df)
 
+# remove all-green rows (all regions green)
+df_noGreen <- df[df$check != "green", ]
+df_noGreennoGrey <- df[df$check %in% c("red", "yellow"), ]
+df <- df_noGreennoGrey
 ```
 
 ## Heatmaps by Category
diff --git a/inst/markdown/REMINDvalidation.Rmd b/inst/markdown/REMINDvalidation.Rmd
index 7c03833..c66c955 100644
--- a/inst/markdown/REMINDvalidation.Rmd
+++ b/inst/markdown/REMINDvalidation.Rmd
@@ -1,5 +1,6 @@
 ---
 title: "piamValidation: REMIND"
+date: "`r format(Sys.Date())`"
 output:
   html_document:
     toc: true

From e1180b435d11c4bbedcc66d3efded5004732d484 Mon Sep 17 00:00:00 2001
From: orichters <oliverr@pik-potsdam.de>
Date: Fri, 15 Mar 2024 12:33:07 +0100
Subject: [PATCH 13/18] allow to match ** or * from config, add tests

---
 R/importFunctions.R                   | 26 +++++++++-------
 tests/testthat/test-dummy.R           |  1 -
 tests/testthat/test-expandVariables.R | 43 +++++++++++++++++++++++++++
 3 files changed, 59 insertions(+), 11 deletions(-)
 delete mode 100644 tests/testthat/test-dummy.R
 create mode 100644 tests/testthat/test-expandVariables.R

diff --git a/R/importFunctions.R b/R/importFunctions.R
index bb91826..c31fab2 100644
--- a/R/importFunctions.R
+++ b/R/importFunctions.R
@@ -48,10 +48,10 @@ importReferenceData <- function(referencePath) {
 #   - model, scenario, region, period
 # ...
 getConfig <- function(configName) {
-  # TODO: check if config is available, otherwise is it a local file? if yes load that one
   path <- system.file(paste0("config/", configName), package = "piamValidation")
+  if (! file.exists(path) && file.exists(configName)) path <- configName
   cfg <- read.csv2(path, na.strings = "")
-  cat(paste0("loading config file: ", configName, "\n"))
+  message("loading config file: ", configName, "\n")
   return(cfg)
 }
 
@@ -101,8 +101,7 @@ expandPeriods <- function(cfg, data) {
 
 # takes config entries specifying a set of variables via "*" and expands it so
 # that every variable corresponds to one row in cfg
-# TODO: support "firstLevelOnly = TRUE" to not grab all sub-variables but just
-#       one level
+# * matches everything until the next |, while ** matches including |
 expandVariables <- function(cfg, data) {
   # create the expanded config, starting with the not-to-expand rows,
   # then appending the rows with expanded variable names
@@ -111,13 +110,20 @@ expandVariables <- function(cfg, data) {
 
   if (length(var_expand > 0)) {
     all_vars <- unique(data$variable)
-    for (i in 1:nrow(var_expand)) {
+    for (i in seq(nrow(var_expand))) {
       # prepare strings for grepping by adding escape characters and "."
-      var <- gsub("\\*", "\\.\\*", var_expand$variable[i])
-      var <- gsub("\\|", "\\\\|", var)
-      selected_vars <- all_vars[grepl(var, all_vars)]
-      cat(paste0(var_expand$variable[i], " was expanded into ",
-                 length(selected_vars), " sub-variables.\n"))
+      vartoexpand <- var_expand$variable[i]
+      # escape |
+      vargrep <- gsub("|", "\\|", vartoexpand, fixed = TRUE)
+      # convert * into "everything except |"
+      vargrep <- gsub("*", "[^\\|]*", vargrep, fixed = TRUE)
+      # convert what was ** back to .*
+      vargrep <- gsub("[^\\|]*[^\\|]*", ".*", vargrep, fixed = TRUE)
+      # make sure you match the full variable, not just a part
+      vargrep <- paste0("^", vargrep, "$")
+      selected_vars <- all_vars[grepl(vargrep, all_vars)]
+      message(var_expand$variable[i], " was expanded into ",
+              length(selected_vars), " sub-variables.")
 
       # take the original row for the current set of variables and repeat it
       # once for each sub-variable, overwrite with sub-variable names
diff --git a/tests/testthat/test-dummy.R b/tests/testthat/test-dummy.R
deleted file mode 100644
index 909f17c..0000000
--- a/tests/testthat/test-dummy.R
+++ /dev/null
@@ -1 +0,0 @@
-skip("dummy test")
diff --git a/tests/testthat/test-expandVariables.R b/tests/testthat/test-expandVariables.R
new file mode 100644
index 0000000..d65f1ec
--- /dev/null
+++ b/tests/testthat/test-expandVariables.R
@@ -0,0 +1,43 @@
+test_that("expandVariables works", {
+  varinconfig <- function(data, varentry, expectedvars) {
+    configfile <- file.path(tempdir(), "config.csv")
+    configheader <- "category;metric;critical;variable;model;scenario;region;period;min_red;min_yel;max_yel;max_red;ref_model;ref_scenario;ref_period;"
+    configstring <- paste0("scenario;relative;yes;", varentry, ";m;s;r;2005;;;10;100;refmodel1;;;")
+    writeLines(c(configheader, configstring), configfile)
+    cfg <- expandVariables(getConfig(configfile), data)
+    missing <- setdiff(expectedvars, cfg$variable)
+    if (length(missing) > 0) {
+      warning("For ", varentry, ", the following variables are expected but missing:\n",
+              paste0(missing, collapse = ", "))
+    }
+    expect_true(length(missing) == 0)
+    toomuch <- setdiff(cfg$variable, expectedvars)
+    if (length(toomuch) > 0) {
+      warning("For ", varentry, ", the following variables were not expected but matched:\n",
+              paste0(toomuch, collapse = ", "))
+    }
+    expect_true(length(toomuch) == 0)
+  }
+
+  data <- list()
+  depth1 <- c("Emi|CO", "Emi|BC", "Emi|GHG")
+  depth2CO <- c("Emi|CO|Trans", "Emi|CO|Build", "Emi|CO|Indus")
+  depth2BC <- c("Emi|BC|Trans", "Emi|BC|Build", "Emi|BC|Indus")
+  depth3CO <- c("Emi|CO|Trans|Gas", "Emi|CO|Trans|Oil")
+  whatever <- c("Emi", "Final Energy", "CO|Trans")
+  data$variable <- c(depth1, depth2CO, depth2BC, depth3CO, whatever)
+ 
+  # first argument is what is stated in the config, second what should be matched
+  varinconfig(data, "Final Energy", "Final Energy")
+  varinconfig(data, "Em*", "Emi")
+  varinconfig(data, "Emi", "Emi")
+  varinconfig(data, "Emi|*", depth1)
+  varinconfig(data, "Emi|*|*", c(depth2CO, depth2BC))
+  varinconfig(data, "Emi|*|*|*", depth3CO)
+  varinconfig(data, "*|CO|*", depth2CO)
+  varinconfig(data, "CO|*", "CO|Trans")
+  varinconfig(data, "Final Energy|**", NULL)
+  varinconfig(data, "Emi|**", c(depth1, depth2CO, depth2BC, depth3CO))
+  varinconfig(data, "Em**", c("Emi", depth1, depth2CO, depth2BC, depth3CO))
+})  
+

From 51b99b7d8233a5ed570074abf3c3092c38ae08fa Mon Sep 17 00:00:00 2001
From: orichters <oliverr@pik-potsdam.de>
Date: Fri, 15 Mar 2024 16:38:29 +0100
Subject: [PATCH 14/18] support xlsx config files, fix TODO

---
 NAMESPACE                             |  3 +++
 R/importFunctions.R                   | 17 +++++++++++------
 tests/testthat/test-expandVariables.R |  6 +++---
 3 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index deb06a6..9bb1797 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -11,3 +11,6 @@ importFrom(dplyr,select)
 importFrom(dplyr,summarise)
 importFrom(ggthemes,theme_tufte)
 importFrom(plotly,ggplotly)
+importFrom(readxl,excel_sheets)
+importFrom(readxl,read_excel)
+importFrom(tibble,as_tibble)
diff --git a/R/importFunctions.R b/R/importFunctions.R
index c31fab2..0c5bb39 100644
--- a/R/importFunctions.R
+++ b/R/importFunctions.R
@@ -1,5 +1,6 @@
 #' @importFrom dplyr filter select mutate summarise group_by %>%
-
+#' @importFrom tibble as_tibble
+#' @importFrom readxl read_excel excel_sheets
 # scenarioPath: one or multiple paths to .mif or .csv file(s) containing
 #               scenario data in IAM format
 importScenarioData <- function(scenarioPath) {
@@ -9,13 +10,12 @@ importScenarioData <- function(scenarioPath) {
     data <- readRDS("mif.rds")
   } else {
     cat("importing scenario data from file(s)\n")
-    data <- remind2::deletePlus(quitte::as.quitte(scenarioPath))
+    data <- remind2::deletePlus(quitte::as.quitte(scenarioPath, na.rm = TRUE))
     saveRDS(data, "mif.rds")
   }
 
-  # change ordering of factors, put last element ("World") first
-  # TODO: will not work if World is not last, put somewhere else?
-  new_order <- c(tail(levels(data$region), 1), head(levels(data$region), -1))
+  # change ordering of factors, global elements first
+  new_order <- unique(intersect(c("World", "GLO", levels(data$region)), levels(data$region)))
   data$region <- factor(data$region, levels = new_order)
 
   # remove rows without values
@@ -50,7 +50,12 @@ importReferenceData <- function(referencePath) {
 getConfig <- function(configName) {
   path <- system.file(paste0("config/", configName), package = "piamValidation")
   if (! file.exists(path) && file.exists(configName)) path <- configName
-  cfg <- read.csv2(path, na.strings = "")
+  if (grepl("\\.xlsx$", path)) {
+    cfg <- read_excel(path = path, sheet = if ("config" %in% excel_sheets(path)) "config" else 1)
+    cfg <- filter(cfg, ! grepl("^#", cfg[[1]]))
+  } else {
+    cfg <- as_tibble(read.csv2(path, na.strings = "", comment.char = "#"))
+  }
   message("loading config file: ", configName, "\n")
   return(cfg)
 }
diff --git a/tests/testthat/test-expandVariables.R b/tests/testthat/test-expandVariables.R
index d65f1ec..bdbfac0 100644
--- a/tests/testthat/test-expandVariables.R
+++ b/tests/testthat/test-expandVariables.R
@@ -1,7 +1,8 @@
 test_that("expandVariables works", {
   varinconfig <- function(data, varentry, expectedvars) {
     configfile <- file.path(tempdir(), "config.csv")
-    configheader <- "category;metric;critical;variable;model;scenario;region;period;min_red;min_yel;max_yel;max_red;ref_model;ref_scenario;ref_period;"
+    configheader <- paste0("category;metric;critical;variable;model;scenario;region;period;",
+                           "min_red;min_yel;max_yel;max_red;ref_model;ref_scenario;ref_period;")
     configstring <- paste0("scenario;relative;yes;", varentry, ";m;s;r;2005;;;10;100;refmodel1;;;")
     writeLines(c(configheader, configstring), configfile)
     cfg <- expandVariables(getConfig(configfile), data)
@@ -39,5 +40,4 @@ test_that("expandVariables works", {
   varinconfig(data, "Final Energy|**", NULL)
   varinconfig(data, "Emi|**", c(depth1, depth2CO, depth2BC, depth3CO))
   varinconfig(data, "Em**", c("Emi", depth1, depth2CO, depth2BC, depth3CO))
-})  
-
+})

From aca34c1d50e921c56e9a0deb83d4530b144f2d1d Mon Sep 17 00:00:00 2001
From: Pascal Weigmann <pascal.weigmann@pik-potsdam.de>
Date: Wed, 20 Mar 2024 11:15:36 +0100
Subject: [PATCH 15/18] add passValidation function

---
 NAMESPACE           |  1 +
 R/outputFunctions.R | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+)
 create mode 100644 R/outputFunctions.R

diff --git a/NAMESPACE b/NAMESPACE
index 9bb1797..8498520 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,6 +2,7 @@
 
 export(validateScenarios)
 export(validationHeatmap)
+export(validationPass)
 import(ggplot2)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,filter)
diff --git a/R/outputFunctions.R b/R/outputFunctions.R
new file mode 100644
index 0000000..8c04e35
--- /dev/null
+++ b/R/outputFunctions.R
@@ -0,0 +1,18 @@
+#' @importFrom dplyr filter select mutate summarise group_by %>%
+# yellowFail: if set to TRUE a yellow check result of a critical variable will
+#             lead to the scenario not passing as validated
+#' @export
+validationPass <- function(data, yellowFail = FALSE) {
+
+  fail_color <- ifelse(yellowFail, c("red", "yellow"), "red")
+
+  # see if any critical variables have failed per scenario and model
+  pass <- data %>%
+    filter(check %in% fail_color, critical == "yes") %>%
+    group_by(model, scenario) %>%
+    summarize(pass = n() == 0,
+              n_fail = n()
+              )
+
+  return(pass)
+}

From 87913bade0db3b2c782b86bcc180bc4ce744a0ad Mon Sep 17 00:00:00 2001
From: Pascal Weigmann <pascal.weigmann@pik-potsdam.de>
Date: Wed, 20 Mar 2024 12:12:05 +0100
Subject: [PATCH 16/18] clean up

---
 .gitignore                            | 11 +++--
 R/appendTooltips.R                    |  4 +-
 R/combineData.R                       |  2 +-
 R/evaluateThresholds.R                | 10 ++---
 R/importFunctions.R                   | 58 ++++++++++-----------------
 R/resolveDuplicates.R                 | 24 +----------
 R/validateScenarios.R                 | 11 ++---
 inst/config/validationConfig_NGFS.csv |  2 +-
 inst/markdown/COMMITTEDvalidation.Rmd | 27 +------------
 inst/markdown/NGFSvalidation.Rmd      | 33 +++------------
 inst/markdown/REMINDvalidation.Rmd    |  2 +-
 tests/testthat/test-expandVariables.R | 18 ++++++---
 12 files changed, 63 insertions(+), 139 deletions(-)

diff --git a/.gitignore b/.gitignore
index 958b5bf..4c015d7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,5 @@
-.Rproj.user
-.Rhistory
-.RData
-.Ruserdata
-.rds
-.html
+*.Rproj.user
+*.Rhistory
+*.RData
+*.Ruserdata
+*.html
diff --git a/R/appendTooltips.R b/R/appendTooltips.R
index 974d4f9..6297143 100644
--- a/R/appendTooltips.R
+++ b/R/appendTooltips.R
@@ -35,7 +35,9 @@ appendTooltips <- function(df) {
     df[df$category == "scenario" & df$metric == "relative", ] %>%
     mutate(text = paste0(region, "\n",
                          period, "\n",
-                         ifelse(!is.na(ref_model), paste("Model:", model, "\n")),
+                         ifelse(!is.na(ref_model),
+                                paste("Model:", model, "\n"),
+                                ""),
                          "Value: ", round(value, 2), "\n",
                          "Ref ", ifelse(!is.na(ref_period),
                                                paste("Period:", ref_period),
diff --git a/R/combineData.R b/R/combineData.R
index 2b5c8d0..2060a95 100644
--- a/R/combineData.R
+++ b/R/combineData.R
@@ -78,7 +78,7 @@ combineData <- function(data, cfg_row, ref_data = NULL) {
         )
       }
 
-      # in case of multiple sources, calculate mean (using all available sources)
+      # in case of multiple sources, use mean (of all available sources)
       if (length(unique(h$ref_model)) > 1) {
         h_mean <- group_by(h, period, region) %>%
           summarise(ref_value = mean(ref_value, na.rm = TRUE))
diff --git a/R/evaluateThresholds.R b/R/evaluateThresholds.R
index 131b816..5550c01 100644
--- a/R/evaluateThresholds.R
+++ b/R/evaluateThresholds.R
@@ -1,14 +1,12 @@
 #' @importFrom dplyr filter select mutate summarise group_by %>%
 
-# cleanInf = TRUE: choose to replace "Inf" and "-Inf" which were introduced
-# for ease of calculations with "-"
+# cleanInf = TRUE: replace "Inf" and "-Inf" which were introduced
+#                  for ease of calculations with "-"
 evaluateThresholds <- function(df, cleanInf = TRUE) {
 
-  # perform evaluation for each category and metric separately
-  # (shortcuts are possible but will decrease readability of the code for minor
-  # performance improvements)
+  # first calculate values that will be compared to thresholds for each category
+  # ("check_value") and metric separately, then perform evaluation for all together
 
-  # calculate values that will be compared to thresholds
   # historic - relative ####
   his_rel <- df[df$category == "historic" & df$metric == "relative", ] %>%
     mutate(check_value = ifelse(
diff --git a/R/importFunctions.R b/R/importFunctions.R
index 0c5bb39..bd89264 100644
--- a/R/importFunctions.R
+++ b/R/importFunctions.R
@@ -1,57 +1,44 @@
 #' @importFrom dplyr filter select mutate summarise group_by %>%
 #' @importFrom tibble as_tibble
 #' @importFrom readxl read_excel excel_sheets
+
 # scenarioPath: one or multiple paths to .mif or .csv file(s) containing
 #               scenario data in IAM format
 importScenarioData <- function(scenarioPath) {
-  # for dev purposes use cache
-  if (file.exists("mif.rds")) {
-    cat("loading scenario data from cache\n")
-    data <- readRDS("mif.rds")
-  } else {
-    cat("importing scenario data from file(s)\n")
-    data <- remind2::deletePlus(quitte::as.quitte(scenarioPath, na.rm = TRUE))
-    saveRDS(data, "mif.rds")
-  }
+  data <- remind2::deletePlus(quitte::as.quitte(scenarioPath, na.rm = TRUE))
 
   # change ordering of factors, global elements first
-  new_order <- unique(intersect(c("World", "GLO", levels(data$region)), levels(data$region)))
+  new_order <- unique(intersect(c("World", "GLO",
+                                  levels(data$region)), levels(data$region)))
   data$region <- factor(data$region, levels = new_order)
 
   # remove rows without values
-  data <- data[!is.na(data$value),]
+  data <- data[!is.na(data$value), ]
 
   return(data)
 }
 
+# import historical or other reference data for comparison
 importReferenceData <- function(referencePath) {
-  # for dev purposes use cache
-  if (file.exists("hist.rds")) {
-    cat("loading reference data from cache\n")
-    hist <- readRDS("hist.rds")
-  } else {
-    cat("importing reference data from file\n")
-    hist <- quitte::as.quitte(referencePath)
-    hist <- dplyr::filter(hist, period >= 1990)
-    saveRDS(hist, "hist.rds")
-  }
+  ref <- quitte::as.quitte(referencePath)
 
-  # remove all historical data before 1990, (only historic?)
-  hist <- dplyr::filter(hist, period >= 1990)
+  # remove all historical data before 1990
+  ref <- dplyr::filter(ref, period >= 1990)
 
-  return(hist)
+  return(ref)
 }
 
-# required columns:
-# - category: "historic" or "scenario"
-# - can be empty, contain one element or multiple:
-#   - model, scenario, region, period
-# ...
+# see README for rules on how to fill the config
 getConfig <- function(configName) {
   path <- system.file(paste0("config/", configName), package = "piamValidation")
   if (! file.exists(path) && file.exists(configName)) path <- configName
+
+  # config can be .xlsx or .csv, use "config" sheet in .xlsx if available
   if (grepl("\\.xlsx$", path)) {
-    cfg <- read_excel(path = path, sheet = if ("config" %in% excel_sheets(path)) "config" else 1)
+    cfg <- read_excel(
+      path = path,
+      sheet = if ("config" %in% excel_sheets(path)) "config" else 1
+      )
     cfg <- filter(cfg, ! grepl("^#", cfg[[1]]))
   } else {
     cfg <- as_tibble(read.csv2(path, na.strings = "", comment.char = "#"))
@@ -60,17 +47,15 @@ getConfig <- function(configName) {
   return(cfg)
 }
 
-
-cleanConfig <- function(cfg) {
-  # fill empty threshold columns with Infinity for easier evaluation
+# fill empty threshold columns with Infinity for easier evaluation
+fillInf <- function(cfg) {
   cfg <- cfg %>%
     mutate(min_red = as.numeric(ifelse(is.na(min_red), -Inf, min_red)),
            min_yel = as.numeric(ifelse(is.na(min_yel), -Inf, min_yel)),
            max_yel = as.numeric(ifelse(is.na(max_yel),  Inf, max_yel)),
            max_red = as.numeric(ifelse(is.na(max_red),  Inf, max_red))
-    )
+           )
 
-  # insert tests here to check if there are forbidden fields in config
   return(cfg)
 }
 
@@ -82,6 +67,7 @@ expandPeriods <- function(cfg, data) {
 
   # iterate through rows with ":"
   for (i in per_expand_idx) {
+    # check format and compatibility of data and ref periods
     if (nchar(cfg[i, "period"]) != 9) {
       stop("Invalid range detected. Make sure to enter years as YYYY:YYYY.\n")
     } else {
@@ -118,7 +104,7 @@ expandVariables <- function(cfg, data) {
     for (i in seq(nrow(var_expand))) {
       # prepare strings for grepping by adding escape characters and "."
       vartoexpand <- var_expand$variable[i]
-      # escape |
+      # escape "|"
       vargrep <- gsub("|", "\\|", vartoexpand, fixed = TRUE)
       # convert * into "everything except |"
       vargrep <- gsub("*", "[^\\|]*", vargrep, fixed = TRUE)
diff --git a/R/resolveDuplicates.R b/R/resolveDuplicates.R
index e82202e..85dd163 100644
--- a/R/resolveDuplicates.R
+++ b/R/resolveDuplicates.R
@@ -1,11 +1,10 @@
-#' @importFrom dplyr filter select mutate summarise group_by %>%
-
+# check for duplicates that are a result of config definition overlaps
+# only the latest occurrence of a data point is kept (lowest row of config)
 resolveDuplicates <- function(df) {
 
   # TODO: needs to be double checked for edge-cases
   #       might be easier or safer to handle categories separately
 
-  # check for duplicates
   # these are the duplicates that are from the lower rows and should be kept
   duplicates <- df[duplicated(df[c("model",
                                    "scenario",
@@ -27,25 +26,6 @@ resolveDuplicates <- function(df) {
   # reattach one instance to get a complete data set without duplicates
   df <- rbind(no_dupl, duplicates)
 
-  # different approach: allows partial overwriting
-  # chosen to not use this anymore, less robust and unclear if more intuitive
-
-  # these are missing the duplicated entries that were in the lower rows
-  # no_dupl <- dplyr::anti_join(df, duplicates_all)
-
-  # overwrite "earlier" values using "later" ones
-  # BEWARE: requires specific country to come after "all regions"
-  # d1 <- merge(no_dupl, duplicates_all, by=c("variable", "region", "period",
-  #                                           "scenario", "unit", "model", "value",
-  #                                           "ref_model", "ref_value","ref_scenario",
-  #                                           "ref_period", "category", "metric", "critical"),
-  #             all.x = T) %>%
-  #   mutate(min_red = ifelse(is.na(min_red.y), min_red.x, min_red.y)) %>%
-  #   mutate(min_yel = ifelse(is.na(min_yel.y), min_yel.x, min_yel.y)) %>%
-  #   mutate(max_yel = ifelse(is.na(max_yel.y), max_yel.x, max_yel.y)) %>%
-  #   mutate(max_red = ifelse(is.na(max_red.y), max_red.x, max_red.y)) %>%
-  #   select(-c("min_red.x", "min_yel.x", "max_yel.x", "max_red.x",
-  #             "min_red.y", "min_yel.y", "max_yel.y", "max_red.y"))
 
   return(df)
 }
diff --git a/R/validateScenarios.R b/R/validateScenarios.R
index 7b59354..96c3702 100644
--- a/R/validateScenarios.R
+++ b/R/validateScenarios.R
@@ -1,10 +1,8 @@
 #' @importFrom dplyr filter select mutate summarise group_by %>%
 
-# bringing it all together
-# keepNA
+# performs the validation checks from a config on a scenario dataset
 #' @export
-validateScenarios <- function(scenarioPath, configName, referencePath = NULL,
-                              keepNA = TRUE) {
+validateScenarios <- function(scenarioPath, configName, referencePath = NULL) {
 
   data <- importScenarioData(scenarioPath)
 
@@ -12,7 +10,7 @@ validateScenarios <- function(scenarioPath, configName, referencePath = NULL,
 
   cfg <- configName %>%
     getConfig() %>%
-    cleanConfig() %>%
+    fillInf() %>%
     expandPeriods(data) %>%
     expandVariables(data)
 
@@ -22,10 +20,9 @@ validateScenarios <- function(scenarioPath, configName, referencePath = NULL,
   for (i in 1:nrow(cfg)) {
     # TODO: hist should only be needed if category "historical" is in config
     #       validation generally should work without hist data
-    # TODO: optimize performance
     df_row <- combineData(data, cfg[i, ], ref_data = hist)
     df <- rbind(df, df_row)
-    cat(paste0("Combined row ", i, " of ", nrow(cfg), "\n"))
+    cat(paste0("Combined config row ", i, " of ", nrow(cfg), "\n"))
   }
 
   df <- resolveDuplicates(df)
diff --git a/inst/config/validationConfig_NGFS.csv b/inst/config/validationConfig_NGFS.csv
index 59cd23b..3ff24a3 100644
--- a/inst/config/validationConfig_NGFS.csv
+++ b/inst/config/validationConfig_NGFS.csv
@@ -1,7 +1,7 @@
 category;metric;critical;variable;model;scenario;region;period;min_red;min_yel;max_yel;max_red;ref_model;ref_scenario;ref_period;source/link to discussion
 scenario;relative;yes;Emissions|*;REMIND-MAgPIE 3.3-4.7;;;;;;50;100;GCAM 6.0 NGFS;;;
 scenario;relative;yes;Emissions|*;REMIND-MAgPIE 3.3-4.7;;World;;;;30;50;GCAM 6.0 NGFS;;;
-scenario;relative;yes;Final Energy|*;REMIND-MAgPIE 3.3-4.7;;;;;;50;100;GCAM 6.0 NGFS;;;
+scenario;relative;yes;Final Energy|*|Chemicals|*;REMIND-MAgPIE 3.3-4.7;;;;;;50;100;GCAM 6.0 NGFS;;;
 scenario;relative;yes;Final Energy|*;REMIND-MAgPIE 3.3-4.7;;World;;;;30;50;GCAM 6.0 NGFS;;;
 scenario;relative;yes;Secondary Energy|*;REMIND-MAgPIE 3.3-4.7;;;;;;50;100;GCAM 6.0 NGFS;;;
 scenario;relative;yes;Secondary Energy|*;REMIND-MAgPIE 3.3-4.7;;World;;;;30;50;GCAM 6.0 NGFS;;;
diff --git a/inst/markdown/COMMITTEDvalidation.Rmd b/inst/markdown/COMMITTEDvalidation.Rmd
index 04802ee..e75ecb3 100644
--- a/inst/markdown/COMMITTEDvalidation.Rmd
+++ b/inst/markdown/COMMITTEDvalidation.Rmd
@@ -34,7 +34,7 @@ knitr::opts_chunk$set(
 ## Import and Prepare Data
 
 
-```{r}
+```{r, message = FALSE}
 # Data Preparation
 path <- "C:/Users/pascalwe/Data/vs/"
 mifs <- c(paste0(path, "REMIND_generic_Bal.mif"),
@@ -44,30 +44,7 @@ hmif <- paste0(path, "historical.mif")
 
 configName <- "validationConfig_COMMITED.csv"
 
-data <- importScenarioData(scenarioPath)
-
-hist <- importReferenceData(referencePath)
-
-cfg <- getConfig(configName) %>%
-  cleanConfig()
-
-cfg <- expandPeriods(cfg, data) %>%
-  expandVariables(data)
-
-# combine data for each row of the config and bind together
-df <- data.frame()
-for (i in 1:nrow(cfg)) {
-  # TODO: hist should only be needed if category "historical" is in config
-  #       validation generally should work without hist data
-  df_row <- combineData(data, cfg[i, ], ref_data = hist)
-  df <- rbind(df, df_row)
-  cat(paste0("Combined row ", i, " of ", nrow(cfg), "\n"))
-}
-
-df <- resolveDuplicates(df)
-
-df <- evaluateThresholds(df)
-
+df <- validateScenarios(mifs, config, referencePath = hmif)
 df <- appendTooltips(df)
 
 ```
diff --git a/inst/markdown/NGFSvalidation.Rmd b/inst/markdown/NGFSvalidation.Rmd
index 98bc2d8..9e53ca3 100644
--- a/inst/markdown/NGFSvalidation.Rmd
+++ b/inst/markdown/NGFSvalidation.Rmd
@@ -30,49 +30,28 @@ knitr::opts_chunk$set(
   warning = params$warning
 )
 
-rmarkdown::render('my_markdown_report.Rmd',
-                   output_file = paste('report.', Sys.Date(), 
-                                       '.pdf', sep=''))
+# rmarkdown::render('my_markdown_report.Rmd',
+#                    output_file = paste('report.', Sys.Date(), 
+#                                        '.pdf', sep=''))
 ```
 
 ## Import and Prepare Data
 
 
-```{r}
+```{r, message = FALSE}
 # Data Preparation
 path <- "C:/Users/pascalwe/Data/NGFS/phase5/"
 snapshot <- "1709886115166-snapshot-24/snapshot_all_regions.csv"
-configName <- "validationConfig_NGFS.csv"
+config <- "validationConfig_NGFS.csv"
 
 scenarioPath <- paste0(path, snapshot)
-data <- importScenarioData(scenarioPath)
-
-cfg <- getConfig(configName) %>%
-  cleanConfig()
-
-cfg <- expandPeriods(cfg, data) %>%
-  expandVariables(data)
-
-# combine data for each row of the config and bind together
-df <- data.frame()
-for (i in 1:nrow(cfg)) {
-  # TODO: hist should only be needed if category "historical" is in config
-  #       validation generally should work without hist data
-  df_row <- combineData(data, cfg[i, ])
-  df <- rbind(df, df_row)
-  cat(paste0("Combined row ", i, " of ", nrow(cfg), "\n"))
-}
-
-df <- resolveDuplicates(df)
-
-df <- evaluateThresholds(df)
 
+df <- validateScenarios(scenarioPath, config)
 df <- appendTooltips(df)
 
 # remove all-green rows (all regions green)
 df_noGreen <- df[df$check != "green", ]
 df_noGreennoGrey <- df[df$check %in% c("red", "yellow"), ]
-df <- df_noGreennoGrey
 ```
 
 ## Heatmaps by Category
diff --git a/inst/markdown/REMINDvalidation.Rmd b/inst/markdown/REMINDvalidation.Rmd
index c66c955..1a94d1d 100644
--- a/inst/markdown/REMINDvalidation.Rmd
+++ b/inst/markdown/REMINDvalidation.Rmd
@@ -35,7 +35,7 @@ knitr::opts_chunk$set(
 ## Import and Prepare Data
 
 
-```{r}
+```{r, message = FALSE}
 # Data Preparation
 path <- "C:/Users/pascalwe/Data/vs/"
 mifs <- c(paste0(path, "REMIND_generic_Bal.mif"),
diff --git a/tests/testthat/test-expandVariables.R b/tests/testthat/test-expandVariables.R
index bdbfac0..71c85e6 100644
--- a/tests/testthat/test-expandVariables.R
+++ b/tests/testthat/test-expandVariables.R
@@ -1,20 +1,26 @@
 test_that("expandVariables works", {
   varinconfig <- function(data, varentry, expectedvars) {
     configfile <- file.path(tempdir(), "config.csv")
-    configheader <- paste0("category;metric;critical;variable;model;scenario;region;period;",
-                           "min_red;min_yel;max_yel;max_red;ref_model;ref_scenario;ref_period;")
-    configstring <- paste0("scenario;relative;yes;", varentry, ";m;s;r;2005;;;10;100;refmodel1;;;")
+    configheader <- paste0(
+      "category;metric;critical;variable;model;scenario;region;period;",
+      "min_red;min_yel;max_yel;max_red;ref_model;ref_scenario;ref_period;")
+
+    configstring <- paste0("scenario;relative;yes;",
+                           varentry,
+                           ";m;s;r;2005;;;10;100;refmodel1;;;")
     writeLines(c(configheader, configstring), configfile)
     cfg <- expandVariables(getConfig(configfile), data)
     missing <- setdiff(expectedvars, cfg$variable)
     if (length(missing) > 0) {
-      warning("For ", varentry, ", the following variables are expected but missing:\n",
+      warning("For ", varentry,
+              ", the following variables are expected but missing:\n",
               paste0(missing, collapse = ", "))
     }
     expect_true(length(missing) == 0)
     toomuch <- setdiff(cfg$variable, expectedvars)
     if (length(toomuch) > 0) {
-      warning("For ", varentry, ", the following variables were not expected but matched:\n",
+      warning("For ", varentry,
+              ", the following variables were not expected but matched:\n",
               paste0(toomuch, collapse = ", "))
     }
     expect_true(length(toomuch) == 0)
@@ -27,7 +33,7 @@ test_that("expandVariables works", {
   depth3CO <- c("Emi|CO|Trans|Gas", "Emi|CO|Trans|Oil")
   whatever <- c("Emi", "Final Energy", "CO|Trans")
   data$variable <- c(depth1, depth2CO, depth2BC, depth3CO, whatever)
- 
+
   # first argument is what is stated in the config, second what should be matched
   varinconfig(data, "Final Energy", "Final Energy")
   varinconfig(data, "Em*", "Emi")

From 603a6d1a4be957ca2d28a24c59f64cb19c5fa4ac Mon Sep 17 00:00:00 2001
From: Pascal Weigmann <pascal.weigmann@pik-potsdam.de>
Date: Wed, 20 Mar 2024 17:04:50 +0100
Subject: [PATCH 17/18] document and clean up for release

---
 .Rbuildignore                 |  4 ++++
 .buildlibrary                 |  9 ++++++---
 .github/workflows/check.yaml  |  5 +++++
 .gitignore                    |  1 +
 .lintr                        |  2 ++
 .pre-commit-config.yaml       |  2 +-
 CITATION.cff                  |  6 ++++--
 DESCRIPTION                   | 11 +++++++----
 NAMESPACE                     |  2 +-
 R/appendTooltips.R            |  5 +++--
 R/combineData.R               |  2 +-
 R/importFunctions.R           |  7 ++++---
 R/outputFunctions.R           | 18 ++++++++++--------
 R/piamValidation-package.R    |  7 +++++++
 R/validateScenarios.R         | 13 +++++++++----
 R/validationHeatmap.R         | 20 +++++++++++++++-----
 R/zzz.R                       |  3 ---
 README.md                     | 15 ++++-----------
 codecov.yml                   | 18 ++++++++++++++++++
 man/piamValidation-package.Rd | 26 ++++++++++++++++++++++++++
 man/validateScenarios.Rd      | 19 +++++++++++++++++++
 man/validationHeatmap.Rd      | 25 +++++++++++++++++++++++++
 man/validationPass.Rd         | 17 +++++++++++++++++
 tests/.lintr                  |  2 ++
 24 files changed, 191 insertions(+), 48 deletions(-)
 create mode 100644 .lintr
 create mode 100644 R/piamValidation-package.R
 delete mode 100644 R/zzz.R
 create mode 100644 codecov.yml
 create mode 100644 man/piamValidation-package.Rd
 create mode 100644 man/validateScenarios.Rd
 create mode 100644 man/validationHeatmap.Rd
 create mode 100644 man/validationPass.Rd
 create mode 100644 tests/.lintr

diff --git a/.Rbuildignore b/.Rbuildignore
index 298f71d..a318840 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -5,3 +5,7 @@
 ^.*CITATION.cff$
 ^.*\.Rproj$
 ^\.Rproj\.user$
+^.lintr$
+^tests/.lintr$
+^codecov\.yml$
+^\.github$
diff --git a/.buildlibrary b/.buildlibrary
index 3d84775..ed81782 100644
--- a/.buildlibrary
+++ b/.buildlibrary
@@ -1,7 +1,10 @@
-ValidationKey: '39524'
+ValidationKey: '198020'
 AutocreateReadme: yes
 AcceptedWarnings:
 - 'Warning: package ''.*'' was built under R version'
 - 'Warning: namespace ''.*'' is not available and has been replaced'
-AcceptedNotes: ~
-allowLinterWarnings: no
+AcceptedNotes:
+- no visible binding for global variable
+- 'Found the following hidden files and directories:'
+allowLinterWarnings: yes
+enforceVersionUpdate: no
diff --git a/.github/workflows/check.yaml b/.github/workflows/check.yaml
index 870f216..46f518a 100644
--- a/.github/workflows/check.yaml
+++ b/.github/workflows/check.yaml
@@ -49,6 +49,11 @@ jobs:
         shell: Rscript {0}
         run: lucode2:::validkey(stopIfInvalid = TRUE)
 
+      - name: Verify that lucode2::buildLibrary was successful
+        if: github.event_name == 'pull_request'
+        shell: Rscript {0}
+        run: lucode2:::isVersionUpdated()
+
       - name: Checks
         shell: Rscript {0}
         run: |
diff --git a/.gitignore b/.gitignore
index 4c015d7..bcf5ae9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@
 *.RData
 *.Ruserdata
 *.html
+.Rproj.user
diff --git a/.lintr b/.lintr
new file mode 100644
index 0000000..c02af8f
--- /dev/null
+++ b/.lintr
@@ -0,0 +1,2 @@
+linters: lucode2::lintrRules()
+encoding: "UTF-8"
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 2f13466..243f46a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -15,7 +15,7 @@ repos:
     -   id: mixed-line-ending
 
 -   repo: https://github.com/lorenzwalthert/precommit
-    rev: v0.3.2.9025
+    rev: v0.4.0
     hooks:
     -   id: parsable-R
     -   id: deps-in-desc
diff --git a/CITATION.cff b/CITATION.cff
index 5a9e975..c741838 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -2,14 +2,16 @@ cff-version: 1.2.0
 message: If you use this software, please cite it using the metadata from this file.
 type: software
 title: 'piamValidation: Validation Tools for PIK-PIAM'
-version: 0.0.2
-date-released: '2024-02-09'
+version: 0.1.0
+date-released: '2024-03-20'
 abstract: The piamValidation package provides validation tools for the Potsdam Integrated
   Assessment Modelling environment.
 authors:
 - family-names: Weigmann
   given-names: Pascal
   email: pascal.weigmann@pik-potsdam.de
+- family-names: Richters
+  given-names: Oliver
 license: LGPL-3.0
 repository-code: https://github.com/pik-piam/piamValidation
 
diff --git a/DESCRIPTION b/DESCRIPTION
index 22d83c5..5f29e6f 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,10 +1,11 @@
 Type: Package
 Package: piamValidation
 Title: Validation Tools for PIK-PIAM
-Version: 0.0.2
-Date: 2024-02-09
+Version: 0.1.0
+Date: 2024-03-20
 Authors@R:
-    person("Pascal", "Weigmann", , "pascal.weigmann@pik-potsdam.de", role = c("aut", "cre"))
+    c(person("Pascal", "Weigmann",, "pascal.weigmann@pik-potsdam.de", role = c("aut", "cre")),
+      person("Oliver", "Richters",, role = "aut"))
 Description: The piamValidation package provides validation tools for the Potsdam Integrated Assessment Modelling environment.
 License: LGPL-3
 URL: https://github.com/pik-piam/piamValidation
@@ -15,9 +16,11 @@ Imports:
     plotly,
     quitte (>= 0.3123.0),
     readxl,
+    tibble,
     tidyr
 Suggests:
-    testthat
+    testthat,
+    remind2
 Encoding: UTF-8
 RoxygenNote: 7.3.1
 Depends: 
diff --git a/NAMESPACE b/NAMESPACE
index 8498520..ff53516 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -14,4 +14,4 @@ importFrom(ggthemes,theme_tufte)
 importFrom(plotly,ggplotly)
 importFrom(readxl,excel_sheets)
 importFrom(readxl,read_excel)
-importFrom(tibble,as_tibble)
+importFrom(utils,read.csv2)
diff --git a/R/appendTooltips.R b/R/appendTooltips.R
index 6297143..4426552 100644
--- a/R/appendTooltips.R
+++ b/R/appendTooltips.R
@@ -2,12 +2,13 @@
 appendTooltips <- function(df) {
 
   df$text <- NA
+  region <- period <- NULL
 
   # historic - relative
   df[df$category == "historic" & df$metric == "relative", ] <-
     df[df$category == "historic" & df$metric == "relative", ] %>%
-    mutate(text = paste0(region, "\n",
-                         period, "\n",
+    mutate(text = paste0(.data$region, "\n",
+                         .data$period, "\n",
                          "Value: ", round(value, 2), "\n",
                          "Ref_Value: ", round(ref_value, 2), "\n",
                          "Ref_Source: ", ref_model, "\n",
diff --git a/R/combineData.R b/R/combineData.R
index 2060a95..468d3d9 100644
--- a/R/combineData.R
+++ b/R/combineData.R
@@ -2,7 +2,7 @@
 
 # for one row of cfg: filter and merge relevant scenario data with cfg
 # results in one df that contains scenario data, reference data and thresholds
-combineData <- function(data, cfg_row, ref_data = NULL) {
+combineData <- function(data, cfgRow, refData = NULL) {
 
   # shorten as it will be used a lot
   c <- cfg_row
diff --git a/R/importFunctions.R b/R/importFunctions.R
index bd89264..ffe0d82 100644
--- a/R/importFunctions.R
+++ b/R/importFunctions.R
@@ -1,6 +1,6 @@
-#' @importFrom dplyr filter select mutate summarise group_by %>%
-#' @importFrom tibble as_tibble
+#' @importFrom dplyr filter select mutate %>%
 #' @importFrom readxl read_excel excel_sheets
+#' @importFrom utils read.csv2
 
 # scenarioPath: one or multiple paths to .mif or .csv file(s) containing
 #               scenario data in IAM format
@@ -41,7 +41,8 @@ getConfig <- function(configName) {
       )
     cfg <- filter(cfg, ! grepl("^#", cfg[[1]]))
   } else {
-    cfg <- as_tibble(read.csv2(path, na.strings = "", comment.char = "#"))
+    cfg <- tibble::as_tibble(
+      read.csv2(path, na.strings = "", comment.char = "#"))
   }
   message("loading config file: ", configName, "\n")
   return(cfg)
diff --git a/R/outputFunctions.R b/R/outputFunctions.R
index 8c04e35..5dc5b31 100644
--- a/R/outputFunctions.R
+++ b/R/outputFunctions.R
@@ -1,6 +1,10 @@
-#' @importFrom dplyr filter select mutate summarise group_by %>%
-# yellowFail: if set to TRUE a yellow check result of a critical variable will
-#             lead to the scenario not passing as validated
+#' returns information on whether scenarios passed critical validation checks
+#'
+#' @param data data.frame as returned from ``validateScenarios()``
+#' @param yellowFail if set to TRUE a yellow check result of a critical
+#'        variable will lead to the scenario not passing as validated
+#'
+#' @importFrom dplyr %>%
 #' @export
 validationPass <- function(data, yellowFail = FALSE) {
 
@@ -8,11 +12,9 @@ validationPass <- function(data, yellowFail = FALSE) {
 
   # see if any critical variables have failed per scenario and model
   pass <- data %>%
-    filter(check %in% fail_color, critical == "yes") %>%
-    group_by(model, scenario) %>%
-    summarize(pass = n() == 0,
-              n_fail = n()
-              )
+    dplyr::filter(check %in% fail_color, critical == "yes") %>%
+    dplyr::group_by(model, scenario) %>%
+    dplyr::summarize(pass = dplyr::n() == 0, n_fail = dplyr::n())
 
   return(pass)
 }
diff --git a/R/piamValidation-package.R b/R/piamValidation-package.R
new file mode 100644
index 0000000..05db633
--- /dev/null
+++ b/R/piamValidation-package.R
@@ -0,0 +1,7 @@
+#' Validation Tools for PIK-PIAM
+#'
+#' The piamValidation package provides validation tools for the Potsdam Integrated Assessment Modelling environment.
+#'
+#' @name piamValidation-package
+#' @aliases piamValidation-package piamValidation
+"_PACKAGE"
diff --git a/R/validateScenarios.R b/R/validateScenarios.R
index 96c3702..29f89f3 100644
--- a/R/validateScenarios.R
+++ b/R/validateScenarios.R
@@ -1,6 +1,11 @@
-#' @importFrom dplyr filter select mutate summarise group_by %>%
-
-# performs the validation checks from a config on a scenario dataset
+#' performs the validation checks from a config on a scenario dataset
+#'
+#' @param scenarioPath one or multiple path(s) to scenario data in .mif or .csv
+#'        format
+#' @param configName select config from inst/config
+#' @param referencePath in case of historic comparison, choose path to ref data
+#'
+#' @importFrom dplyr filter select mutate group_by %>%
 #' @export
 validateScenarios <- function(scenarioPath, configName, referencePath = NULL) {
 
@@ -20,7 +25,7 @@ validateScenarios <- function(scenarioPath, configName, referencePath = NULL) {
   for (i in 1:nrow(cfg)) {
     # TODO: hist should only be needed if category "historical" is in config
     #       validation generally should work without hist data
-    df_row <- combineData(data, cfg[i, ], ref_data = hist)
+    df_row <- combineData(data, cfg[i, ], refData = hist)
     df <- rbind(df, df_row)
     cat(paste0("Combined config row ", i, " of ", nrow(cfg), "\n"))
   }
diff --git a/R/validationHeatmap.R b/R/validationHeatmap.R
index 1832909..6007063 100644
--- a/R/validationHeatmap.R
+++ b/R/validationHeatmap.R
@@ -1,28 +1,38 @@
+#' takes the output of "validateScenarios()" and plots heatmaps per variable
+#'
+#' @param data data.frame as returned by ``validateScenarios()``
+#' @param var variable to be plotted
+#' @param cat choose category from "historical" or "scenario"
+#' @param met choose metric from "relative", "difference", "absolute" or
+#'        "growthrate"
+#' @param interactive return plots as interactive plotly plots by default
+#' @param compareModels if TRUE, plots compare models instead of scenarios
+#'
 #' @importFrom dplyr filter select mutate %>%
 #' @import ggplot2
 #' @importFrom ggthemes theme_tufte
 #' @importFrom plotly ggplotly
 #' @export
 
-# takes the output of "validateScenarios()" and plots heatmaps per variable
-validationHeatmap <- function(df, var, cat, met, interactive = T, compareModels = T) {
+validationHeatmap <- function(data, var, cat, met,
+                              interactive = T, compareModels = T) {
 
   # possible extension: when giving multiple vars, plot as facets in same row
 
   # prepare data slice
-  d <- df %>%
+  d <- data %>%
     filter(variable == var,
            category == cat,
            metric == met)
 
   # warn if no data is found for combination of var, cat and met
   if (nrow(d) == 0) {
-    df$cm <- paste(category, metric, sep = "-")
+    data$cm <- paste(category, metric, sep = "-")
     warning(
       paste0(
         "No data found for variable in this category and metric.\n
         variable ", var ," is available for the following category-metric
-        combinations: ", unique(df[df$variable == var, "cm"])
+        combinations: ", unique(data[data$variable == var, "cm"])
         )
       )
   }
diff --git a/R/zzz.R b/R/zzz.R
deleted file mode 100644
index 6f1da16..0000000
--- a/R/zzz.R
+++ /dev/null
@@ -1,3 +0,0 @@
-.onAttach <- function(libname, pkgname) {
-  packageStartupMessage("Welcome to piamValidation")
-}
diff --git a/README.md b/README.md
index a902907..08b90be 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Validation Tools for PIK-PIAM
 
-R package **piamValidation**, version **0.0.2**
+R package **piamValidation**, version **0.1.0**
 
 [![CRAN status](https://www.r-pkg.org/badges/version/piamValidation)](https://cran.r-project.org/package=piamValidation)  [![R build status](https://github.com/pik-piam/piamValidation/workflows/check/badge.svg)](https://github.com/pik-piam/piamValidation/actions) [![codecov](https://codecov.io/gh/pik-piam/piamValidation/branch/master/graph/badge.svg)](https://app.codecov.io/gh/pik-piam/piamValidation) 
 
@@ -9,13 +9,6 @@ R package **piamValidation**, version **0.0.2**
 The piamValidation package provides validation tools for the Potsdam Integrated Assessment Modelling environment.
 
 
-## How to use
-
-The following categories/metrics require allow the following columns in the cfg:
-
-
-
-
 ## Installation
 
 For installation of the most recent package version an additional repository has to be added in R:
@@ -45,16 +38,16 @@ In case of questions / problems please contact Pascal Weigmann <pascal.weigmann@
 
 To cite package **piamValidation** in publications use:
 
-Weigmann P (2024). _piamValidation: Validation Tools for PIK-PIAM_. R package version 0.0.2, <URL: https://github.com/pik-piam/piamValidation>.
+Weigmann P, Richters O (2024). _piamValidation: Validation Tools for PIK-PIAM_. R package version 0.1.0, <URL: https://github.com/pik-piam/piamValidation>.
 
 A BibTeX entry for LaTeX users is
 
  ```latex
 @Manual{,
   title = {piamValidation: Validation Tools for PIK-PIAM},
-  author = {Pascal Weigmann},
+  author = {Pascal Weigmann and Oliver Richters},
   year = {2024},
-  note = {R package version 0.0.2},
+  note = {R package version 0.1.0},
   url = {https://github.com/pik-piam/piamValidation},
 }
 ```
diff --git a/codecov.yml b/codecov.yml
new file mode 100644
index 0000000..cf6328e
--- /dev/null
+++ b/codecov.yml
@@ -0,0 +1,18 @@
+comment: true
+github_checks:
+  annotations: false
+coverage:
+  precision: 2
+  round: down
+  range: "0...95"
+  status:
+    project:
+      default:
+        target: auto
+        threshold: 1%
+        informational: true
+    patch:
+      default:
+        target: auto
+        threshold: 1%
+        informational: true
diff --git a/man/piamValidation-package.Rd b/man/piamValidation-package.Rd
new file mode 100644
index 0000000..47bb351
--- /dev/null
+++ b/man/piamValidation-package.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/piamValidation-package.R
+\docType{package}
+\name{piamValidation-package}
+\alias{piamValidation-package}
+\alias{piamValidation}
+\title{Validation Tools for PIK-PIAM}
+\description{
+The piamValidation package provides validation tools for the Potsdam Integrated Assessment Modelling environment.
+}
+\seealso{
+Useful links:
+\itemize{
+  \item \url{https://github.com/pik-piam/piamValidation}
+}
+
+}
+\author{
+\strong{Maintainer}: Pascal Weigmann \email{pascal.weigmann@pik-potsdam.de}
+
+Authors:
+\itemize{
+  \item Oliver Richters
+}
+
+}
diff --git a/man/validateScenarios.Rd b/man/validateScenarios.Rd
new file mode 100644
index 0000000..c768758
--- /dev/null
+++ b/man/validateScenarios.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/validateScenarios.R
+\name{validateScenarios}
+\alias{validateScenarios}
+\title{performs the validation checks from a config on a scenario dataset}
+\usage{
+validateScenarios(scenarioPath, configName, referencePath = NULL)
+}
+\arguments{
+\item{scenarioPath}{one or multiple path(s) to scenario data in .mif or .csv
+format}
+
+\item{configName}{select config from inst/config}
+
+\item{referencePath}{in case of historic comparison, choose path to ref data}
+}
+\description{
+performs the validation checks from a config on a scenario dataset
+}
diff --git a/man/validationHeatmap.Rd b/man/validationHeatmap.Rd
new file mode 100644
index 0000000..e74794a
--- /dev/null
+++ b/man/validationHeatmap.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/validationHeatmap.R
+\name{validationHeatmap}
+\alias{validationHeatmap}
+\title{takes the output of "validateScenarios()" and plots heatmaps per variable}
+\usage{
+validationHeatmap(data, var, cat, met, interactive = T, compareModels = T)
+}
+\arguments{
+\item{data}{data.frame as returned by ``validateScenarios()``}
+
+\item{var}{variable to be plotted}
+
+\item{cat}{choose category from "historical" or "scenario"}
+
+\item{met}{choose metric from "relative", "difference", "absolute" or
+"growthrate"}
+
+\item{interactive}{return plots as interactive plotly plots by default}
+
+\item{compareModels}{if TRUE, plots compare models instead of scenarios}
+}
+\description{
+takes the output of "validateScenarios()" and plots heatmaps per variable
+}
diff --git a/man/validationPass.Rd b/man/validationPass.Rd
new file mode 100644
index 0000000..4d969c1
--- /dev/null
+++ b/man/validationPass.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/outputFunctions.R
+\name{validationPass}
+\alias{validationPass}
+\title{returns information on whether scenarios passed critical validation checks}
+\usage{
+validationPass(data, yellowFail = FALSE)
+}
+\arguments{
+\item{data}{data.frame as returned from ``validateScenarios()``}
+
+\item{yellowFail}{if set to TRUE a yellow check result of a critical
+variable will lead to the scenario not passing as validated}
+}
+\description{
+returns information on whether scenarios passed critical validation checks
+}
diff --git a/tests/.lintr b/tests/.lintr
new file mode 100644
index 0000000..165993b
--- /dev/null
+++ b/tests/.lintr
@@ -0,0 +1,2 @@
+linters: lucode2::lintrRules(allowUndesirable = TRUE)
+encoding: "UTF-8"

From 77f2f296d2951682f2b2cfb064811f7d90385ee7 Mon Sep 17 00:00:00 2001
From: Pascal Weigmann <pascal.weigmann@pik-potsdam.de>
Date: Wed, 20 Mar 2024 17:12:40 +0100
Subject: [PATCH 18/18] add missing dependencies

---
 DESCRIPTION | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/DESCRIPTION b/DESCRIPTION
index 5f29e6f..f37af56 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -10,9 +10,12 @@ Description: The piamValidation package provides validation tools for the Potsda
 License: LGPL-3
 URL: https://github.com/pik-piam/piamValidation
 Imports:
+    devtools,
     dplyr (>= 1.1.1),
     ggplot2,
     ggthemes,
+    htmltools,
+    knitr,
     plotly,
     quitte (>= 0.3123.0),
     readxl,