From 17a99a883a111816784c0fc3bfc1a7f97d0945cb Mon Sep 17 00:00:00 2001
From: Sebastian Fischer <sebf.fischer@gmail.com>
Date: Wed, 17 Jul 2024 15:22:21 +0200
Subject: [PATCH] delta method and refactor

---
 R/MeasureAbstractCi.R                   | 20 +++++++--
 man/mlr_measures_abstract_ci.Rd         |  7 ++-
 tests/testthat/helpers.R                | 57 +++++++++++++++----------
 tests/testthat/test_MeasureCIConZ.R     | 13 +-----
 tests/testthat/test_MeasureCIHoldout.R  | 12 +-----
 tests/testthat/test_MeasureCINaiveCV.R  | 30 ++-----------
 tests/testthat/test_MeasureCi.R         | 11 +----
 tests/testthat/test_MeasureCiCorT.R     | 12 +-----
 tests/testthat/test_MeasureCiNestedCV.R | 19 +++------
 9 files changed, 71 insertions(+), 110 deletions(-)

diff --git a/R/MeasureAbstractCi.R b/R/MeasureAbstractCi.R
index 2112ccd..95db1d3 100644
--- a/R/MeasureAbstractCi.R
+++ b/R/MeasureAbstractCi.R
@@ -25,10 +25,16 @@
 #'   The resample result.
 #' @section Inheriting:
 #' To define a new CI method, inherit from the abstract base class and implement the private method:
-#' `ci(tbl: data.table, rr: ResampleResult, param_vals: named `list()`) -> numeric(3)`
+#' `ci: function(tbl: data.table, rr: ResampleResult, param_vals: named `list()`) -> numeric(3)`
 #' Here, `tbl` contains the columns `loss`, `row_id` and `iteration`, which are the pointwise loss,
 #' the identifier of the observation and the resampling iteration.
 #' It should return a vector containing the `estimate`, `lower` and `upper` boundary in that order.
+#' In case the confidence interval is not of the form `(estimate, estimate - z * se, estimate + z * se)`
+#' it is also necessary to implement
+#' `trafo: function(ci: numeric(3), measure: Measure) -> numeric(3)`
+#' Which receives a confidence interval for a pointwise loss (e.g. squared-error) and transforms it according
+#' to the transformation `measure$trafo` (e.g. sqrt to go from mse to rmse).
+#' 
 #' @export
 MeasureAbstractCi = R6Class("MeasureAbstractCi",
   inherit = Measure,
@@ -97,13 +103,12 @@ MeasureAbstractCi = R6Class("MeasureAbstractCi",
         stopf("CI for Measure '%s' requires one of: %s", self$measure$id, paste0(self$resamplings, sep = ", "))
       }
 
-
       param_vals = self$param_set$get_values()
       tbl = rr$obs_loss(self$measure)
       names(tbl)[names(tbl) == self$measure$id] = "loss"
       ci = private$.ci(tbl, rr, param_vals)
       if (!is.null(self$measure$trafo)) {
-        ci = self$measure$trafo(ci)
+        ci = private$.trafo(ci)
       }
       if (param_vals$within_range) {
         ci = pmin(pmax(ci, self$range[1L]), self$range[2L])
@@ -112,6 +117,15 @@ MeasureAbstractCi = R6Class("MeasureAbstractCi",
     }
   ),
   private = list(
+    .trafo = function(ci) {
+      measure = self$measure
+      ci[[1]] = measure$trafo$fn(ci[[1]])
+      halfwidth = (ci[[3]] - ci[[1]])
+      multiplier = measure$trafo$deriv(ci[[1]])
+      est_t = measure$trafo$fn(ci[[1]])
+      ci_t = c(est_t, est_t - halfwidth * multiplier, est_t + halfwidth * multiplier)
+      set_names(ci_t, names(ci))
+    },
     .score = function(prediction, ...) {
       stopf("CI measures must be passed to $aggregate(), not $score()")
     },
diff --git a/man/mlr_measures_abstract_ci.Rd b/man/mlr_measures_abstract_ci.Rd
index f815dde..db285e7 100644
--- a/man/mlr_measures_abstract_ci.Rd
+++ b/man/mlr_measures_abstract_ci.Rd
@@ -28,10 +28,15 @@ This is initialized to \code{TRUE}.
 \section{Inheriting}{
 
 To define a new CI method, inherit from the abstract base class and implement the private method:
-\verb{ci(tbl: data.table, rr: ResampleResult, param_vals: named }list()\verb{) -> numeric(3)}
+\verb{ci: function(tbl: data.table, rr: ResampleResult, param_vals: named }list()\verb{) -> numeric(3)}
 Here, \code{tbl} contains the columns \code{loss}, \code{row_id} and \code{iteration}, which are the pointwise loss,
 the identifier of the observation and the resampling iteration.
 It should return a vector containing the \code{estimate}, \code{lower} and \code{upper} boundary in that order.
+In case the confidence interval is not of the form \verb{(estimate, estimate - z * se, estimate + z * se)}
+it is also necessary to implement
+\verb{trafo: function(ci: numeric(3), measure: Measure) -> numeric(3)}
+Which receives a confidence interval for a pointwise loss (e.g. squared-error) and transforms it according
+to the transformation \code{measure$trafo} (e.g. sqrt to go from mse to rmse).
 }
 
 \section{Super class}{
diff --git a/tests/testthat/helpers.R b/tests/testthat/helpers.R
index d21cf50..3a2a8f2 100644
--- a/tests/testthat/helpers.R
+++ b/tests/testthat/helpers.R
@@ -1,27 +1,38 @@
-expect_ci_measure = function(m, rr, symmetric = TRUE) {
-  m = m$clone(deep = TRUE)
-  get("expect_measure", envir = .GlobalEnv)(m)
-  testthat::expect_s3_class(m, "MeasureAbstractCi")
-  testthat::expect_error(rr$score(m), "$aggregate", fixed = TRUE)
-  ci = rr$aggregate(m)
-  checkmate::expect_numeric(ci[[m$id]])
-  checkmate::expect_numeric(ci[[paste0(m$id, ".lower")]])
-  checkmate::expect_numeric(ci[[paste0(m$id, ".upper")]])
-  testthat::expect_true(ci[[m$id]] > ci[[paste0(m$id, ".lower")]])
-  testthat::expect_true(ci[[m$id]] < ci[[paste0(m$id, ".upper")]])
-  if (symmetric) {
-    d1 = ci[[m$id]] - ci[[paste0(m$id, ".lower")]]
-    d2 = ci[[paste0(m$id, ".upper")]] - ci[[m$id]]
-    testthat::expect_equal(d1, d2)
-  }
+expect_ci_measure = function(id, resampling, task = tsk("boston_housing"),
+  symmetric = TRUE, stratum = "chas", ...) {
+  check = function(m, rr) {
+    m = m$clone(deep = TRUE)
+    get("expect_measure", envir = .GlobalEnv)(m)
+    testthat::expect_s3_class(m, "MeasureAbstractCi")
+    testthat::expect_error(rr$score(m), "$aggregate", fixed = TRUE)
+    ci = rr$aggregate(m)
+    checkmate::expect_numeric(ci[[m$id]])
+    checkmate::expect_numeric(ci[[paste0(m$id, ".lower")]])
+    checkmate::expect_numeric(ci[[paste0(m$id, ".upper")]])
+    testthat::expect_true(ci[[m$id]] > ci[[paste0(m$id, ".lower")]])
+    testthat::expect_true(ci[[m$id]] < ci[[paste0(m$id, ".upper")]])
+    if (symmetric && ci[[2]] != m$range[[1L]] && ci[[3]] != m$range[2L]) {
+      d1 = ci[[m$id]] - ci[[paste0(m$id, ".lower")]]
+      d2 = ci[[paste0(m$id, ".upper")]] - ci[[m$id]]
+      testthat::expect_equal(d1, d2)
+    }
+
+    m$param_set$values$alpha = 0.05
+    ci1 = rr$aggregate(m)
+    m$param_set$values$alpha = 0.5
+    ci2 = rr$aggregate(m)
 
-  m$param_set$values$alpha = 0.05
-  ci1 = rr$aggregate(m)
-  m$param_set$values$alpha = 0.5
-  ci2 = rr$aggregate(m)
+    expect_equal(ci1[1L], ci2[1L])
+    expect_true(ci2[2L] >= ci1[2L])
+    expect_true(ci2[3L] <= ci1[3L])
+  }
+  rr = resample(task, lrn("regr.featureless"), resampling)
+  check(msr(id, measure = "regr.rmse", within_range = FALSE), rr)
+  check(msr(id, measure = "regr.mse", within_range = FALSE), rr)
 
-  expect_equal(ci1[1L], ci2[1L])
-  expect_true(ci2[2L] >= ci1[2L])
-  expect_true(ci2[3L] <= ci1[3L])
+  task$col_roles$stratum = "chas"
+  rr_strat = resample(task, lrn("regr.featureless"), resampling)
+  check(msr(id, measure = "regr.rmse", within_range = FALSE), rr)
+  check(msr(id, measure = "regr.mse", within_range = FALSE), rr)
 }
 
diff --git a/tests/testthat/test_MeasureCIConZ.R b/tests/testthat/test_MeasureCIConZ.R
index 72c95d7..a2b0336 100644
--- a/tests/testthat/test_MeasureCIConZ.R
+++ b/tests/testthat/test_MeasureCIConZ.R
@@ -1,15 +1,4 @@
 test_that("basic", {
   withr::local_seed(1)
-  mci = msr("ci.con_z", "regr.mae")
-  rr = resample(tsk("boston_housing"), lrn("regr.featureless"), rsmp("paired_subsampling", repeats_in = 5, repeats_out = 10))
-  expect_ci_measure(mci, rr)
-})
-
-test_that("stratification", {
-  withr::local_seed(1)
-  mci = msr("ci.con_z", "regr.mae")
-  task = tsk("boston_housing")
-  task$col_roles$stratum = "chas"
-  rr = resample(task, lrn("regr.featureless"), rsmp("paired_subsampling", repeats_in = 5, repeats_out = 10))
-  expect_ci_measure(mci, rr)
+  expect_ci_measure("ci.con_z", rsmp("paired_subsampling", repeats_in = 5, repeats_out = 10))
 })
diff --git a/tests/testthat/test_MeasureCIHoldout.R b/tests/testthat/test_MeasureCIHoldout.R
index e9da3c7..fa6fc71 100644
--- a/tests/testthat/test_MeasureCIHoldout.R
+++ b/tests/testthat/test_MeasureCIHoldout.R
@@ -1,13 +1,3 @@
 test_that("simple", {
-  mci = msr("ci.holdout", "regr.mse")
-  rr = resample(tsk("boston_housing"), lrn("regr.featureless"), rsmp("holdout"))
-  expect_ci_measure(mci, rr)
-})
-
-test_that("stratification", {
-  mci = msr("ci.holdout", "regr.mse")
-  task = tsk("boston_housing")
-  task$col_roles$stratum = "chas"
-  rr = resample(task, lrn("regr.featureless"), rsmp("holdout"))
-  expect_ci_measure(mci, rr)
+  expect_ci_measure("ci.holdout", rsmp("holdout"))
 })
diff --git a/tests/testthat/test_MeasureCINaiveCV.R b/tests/testthat/test_MeasureCINaiveCV.R
index f3e9941..34bbacc 100644
--- a/tests/testthat/test_MeasureCINaiveCV.R
+++ b/tests/testthat/test_MeasureCINaiveCV.R
@@ -1,28 +1,6 @@
 test_that("basic", {
-  task = tsk("mtcars")
-  learner = lrn("regr.featureless")
-
-  mci = msr("ci.naive_cv", "regr.mse", variance = "all-pairs")
-  rr = resample(task, learner, rsmp("cv"))
-  expect_ci_measure(mci, rr)
-
-  mci = msr("ci.naive_cv", "regr.mse", variance = "within-fold")
-  rr = resample(task, learner, rsmp("cv"))
-  expect_ci_measure(mci, rr)
-
-  mci = msr("ci.naive_cv", "regr.mse")
-  rr = resample(task, learner, rsmp("loo"))
-  expect_ci_measure(mci, rr)
-
-  mci = msr("ci.naive_cv", "regr.mse", variance = "within-fold")
-  rr = resample(task, learner, rsmp("loo"))
-  expect_error(rr$aggregate(mci), "LOO")
-})
-
-test_that("stratification", {
-  mci = msr("ci.naive_cv", "regr.mse")
-  task = tsk("boston_housing")
-  task$col_roles$stratum = "chas"
-  rr = resample(task, lrn("regr.featureless"), rsmp("cv"))
-  expect_ci_measure(mci, rr)
+  expect_ci_measure("ci.naive_cv", rsmp("cv"), variance = "all-pairs")
+  expect_ci_measure("ci.naive_cv", rsmp("cv"), variance = "within-fold")
+  expect_ci_measure("ci.naive_cv", rsmp("loo"), variance = "all-pairs")
+  expect_ci_measure("ci.naive_cv", rsmp("loo"), variance = "within-fold")
 })
diff --git a/tests/testthat/test_MeasureCi.R b/tests/testthat/test_MeasureCi.R
index 0674311..09db16b 100644
--- a/tests/testthat/test_MeasureCi.R
+++ b/tests/testthat/test_MeasureCi.R
@@ -3,14 +3,5 @@ test_that("basic", {
   ci1 = rr$aggregate(msr("ci", "classif.acc"))
   ci2 = rr$aggregate(msr("ci.holdout", "classif.acc"))
   expect_equal(ci1, ci2)
-  mci = msr("ci", "classif.acc")
-  expect_ci_measure(mci, rr)
-})
-
-test_that("obs_loss with trafo", {
-  withr::local_seed(1)
-  rr = resample(tsk("boston_housing"), lrn("regr.featureless"), rsmp("cv"))
-  ci = rr$aggregate(msr("ci.naive_cv", "regr.rmse"))
-  expect_ci_measure(msr("ci.naive_cv", "regr.rmse"), rr, symmetric = FALSE)
-  expect_ci_measure(msr("ci", "regr.rmse"), rr, symmetric = FALSE)
+  expect_ci_measure("ci", rsmp("holdout"))
 })
diff --git a/tests/testthat/test_MeasureCiCorT.R b/tests/testthat/test_MeasureCiCorT.R
index c14ea69..9a15314 100644
--- a/tests/testthat/test_MeasureCiCorT.R
+++ b/tests/testthat/test_MeasureCiCorT.R
@@ -1,13 +1,3 @@
 test_that("simple", {
-  mci = msr("ci.cor_t", "regr.mse")
-  rr = resample(tsk("boston_housing"), lrn("regr.featureless"), rsmp("subsampling", repeats = 10))
-  expect_ci_measure(mci, rr)
-})
-
-test_that("simple", {
-  mci = msr("ci.cor_t", "regr.mse")
-  task = tsk("boston_housing")
-  task$col_roles$stratum = "chas"
-  rr = resample(task, lrn("regr.featureless"), rsmp("subsampling", repeats = 10))
-  expect_ci_measure(mci, rr)
+  expect_ci_measure("ci.cor_t", rsmp("subsampling", repeats = 10L))
 })
diff --git a/tests/testthat/test_MeasureCiNestedCV.R b/tests/testthat/test_MeasureCiNestedCV.R
index 95e965c..6116f97 100644
--- a/tests/testthat/test_MeasureCiNestedCV.R
+++ b/tests/testthat/test_MeasureCiNestedCV.R
@@ -1,15 +1,8 @@
 test_that("basic", {
-  withr::local_seed(1)
-  mci = msr("ci.ncv", "classif.acc")
-  rr = resample(tsk("iris"), lrn("classif.featureless"), rsmp("nested_cv", repeats = 20, folds = 5))
-  expect_ci_measure(mci, rr)
-})
-
-test_that("stratification", {
-  withr::local_seed(1)
-  mci = msr("ci.ncv", "classif.acc")
-  task = tsk("iris")
-  task$col_roles$stratum = "Species"
-  rr = resample(task, lrn("classif.featureless"), rsmp("nested_cv", repeats = 20, folds = 5))
-  expect_ci_measure(mci, rr)
+  task = tsk("mtcars")$cbind(data.frame(chas = rep(c("a", "b"), times = 16)))
+  expect_ci_measure(
+    "ci.ncv",
+    rsmp("nested_cv", folds = 3L, repeats = 5L),
+    task = task
+  )
 })