diff --git a/DESCRIPTION b/DESCRIPTION
index c79f5928..3f17652e 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -41,6 +41,7 @@ Suggests:
     mlr3learners,
     mlr3pipelines,
     rpart,
+    fastVoteR,
     testthat (>= 3.0.0)
 Config/testthat/edition: 3
 Config/testthat/parallel: true
@@ -74,6 +75,7 @@ Collate:
     'assertions.R'
     'auto_fselector.R'
     'bibentries.R'
+    'embedded_ensemble_fselect.R'
     'ensemble_fselect.R'
     'extract_inner_fselect_archives.R'
     'extract_inner_fselect_results.R'
diff --git a/NAMESPACE b/NAMESPACE
index 81cfe545..a0536eef 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -36,6 +36,7 @@ export(auto_fselector)
 export(callback_batch_fselect)
 export(clbk)
 export(clbks)
+export(embedded_ensemble_fselect)
 export(ensemble_fselect)
 export(extract_inner_fselect_archives)
 export(extract_inner_fselect_results)
diff --git a/NEWS.md b/NEWS.md
index c456706a..46b078bc 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,9 @@
 # mlr3fselect (development version)
 
+* Use [fastVoteR](https://github.com/bblodfon/fastVoteR) for feature ranking in `EnsembleFSResult()` objects
+* Add embedded ensemble feature selection `embedded_ensemble_fselect()`
+* Refactor `ensemble_fselect()` and `EnsembleFSResult()`
+
 # mlr3fselect 1.2.1
 
 * compatibility: mlr3 0.22.0
diff --git a/R/EnsembleFSResult.R b/R/EnsembleFSResult.R
index 4b792738..fed296f2 100644
--- a/R/EnsembleFSResult.R
+++ b/R/EnsembleFSResult.R
@@ -5,7 +5,8 @@
 #' @description
 #' The `EnsembleFSResult` stores the results of ensemble feature selection.
 #' It includes methods for evaluating the stability of the feature selection process and for ranking the selected features among others.
-#' The function [ensemble_fselect()] returns an object of this class.
+#'
+#' Both functions [ensemble_fselect()] and [embedded_ensemble_fselect()] return an object of this class.
 #'
 #' @section S3 Methods:
 #' * `as.data.table.EnsembleFSResult(x, benchmark_result = TRUE)`\cr
@@ -16,7 +17,7 @@
 #'       Whether to add the learner, task and resampling information from the benchmark result.
 #'
 #' @references
-#' `r format_bib("das1999")`
+#' `r format_bib("das1999", "meinshausen2010")`
 #'
 #' @export
 #' @examples
@@ -27,7 +28,8 @@
 #'     learners = lrns(c("classif.rpart", "classif.featureless")),
 #'     init_resampling = rsmp("subsampling", repeats = 2),
 #'     inner_resampling = rsmp("cv", folds = 3),
-#'     measure = msr("classif.ce"),
+#'     inner_measure = msr("classif.ce"),
+#'     measure = msr("classif.acc"),
 #'     terminator = trm("none")
 #'   )
 #'
@@ -43,7 +45,16 @@
 #'   # returns a ranking of all features
 #'   head(efsr$feature_ranking())
 #'
-#'   # returns the empirical pareto front (nfeatures vs error)
+#'   # returns the empirical pareto front, i.e. n_features vs measure (error)
+#'   efsr$pareto_front()
+#'
+#'   # returns the knee points (optimal trade-off between n_features and performance)
+#'   efsr$knee_points()
+#'
+#'   # change to use the inner optimization measure
+#'   efsr$set_active_measure(which = "inner")
+#'
+#'   # Pareto front is calculated on the inner measure
 #'   efsr$pareto_front()
 #' }
 EnsembleFSResult = R6Class("EnsembleFSResult",
@@ -62,26 +73,52 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     #'
     #' @param result ([data.table::data.table])\cr
     #'  The result of the ensemble feature selection.
-    #'  Column names should include `"resampling_iteration"`, `"learner_id"`, `"features"`
-    #'  and `"n_features"`.
+    #'  Mandatory column names should include `"resampling_iteration"`, `"learner_id"`,
+    #'  `"features"` and `"n_features"`.
+    #'  A column named as `{measure$id}` (scores on the test sets) must also be
+    #'  always present.
+    #'  The column with the performance scores on the inner resampling of the train sets is not mandatory,
+    #'  but note that it should be named as `{inner_measure$id}_inner` to distinguish from
+    #'  the `{measure$id}`.
     #' @param features ([character()])\cr
     #'  The vector of features of the task that was used in the ensemble feature
     #'  selection.
     #' @param benchmark_result ([mlr3::BenchmarkResult])\cr
     #'  The benchmark result object.
-    #' @param measure_id (`character(1)`)\cr
-    #'  Column name of `"result"` that corresponds to the measure used.
-    #' @param minimize (`logical(1)`)\cr
-    #'  If `TRUE` (default), lower values of the measure correspond to higher performance.
-    initialize = function(result, features, benchmark_result = NULL, measure_id,
-                          minimize = TRUE) {
+    #' @param measure ([mlr3::Measure])\cr
+    #'  The performance measure used to evaluate the learners on the test sets generated
+    #'  during the ensemble feature selection process.
+    #'  By default, this serves as the 'active' measure for the methods of this object.
+    #'  The active measure can be updated using the `$set_active_measure()` method.
+    #' @param inner_measure ([mlr3::Measure])\cr
+    #'  The performance measure used to optimize and evaluate the learners during the inner resampling process of the training sets, generated as part of the ensemble feature selection procedure.
+    initialize = function(
+      result,
+      features,
+      benchmark_result = NULL,
+      measure,
+      inner_measure = NULL
+      ) {
       assert_data_table(result)
-      private$.measure_id = assert_string(measure_id, null.ok = FALSE)
-      mandatory_columns = c("resampling_iteration", "learner_id", "features", "n_features")
-      assert_names(names(result), must.include = c(mandatory_columns, measure_id))
+      private$.measure = assert_measure(measure)
+      private$.active_measure = "outer"
+      measure_ids = c(private$.measure$id)
+      if (!is.null(inner_measure)) {
+        private$.inner_measure = assert_measure(inner_measure)
+        # special end-fix required for inner measure
+        measure_ids = c(measure_ids, sprintf("%s_inner", private$.inner_measure$id))
+      }
+
+      # the non-NULL measure ids should be defined as columns in the dt result
+      mandatory_columns = c("resampling_iteration", "learner_id", "features",
+                            "n_features", measure_ids)
+      assert_names(names(result), must.include = mandatory_columns)
       private$.result = result
       private$.features = assert_character(features, any.missing = FALSE, null.ok = FALSE)
-      private$.minimize = assert_logical(minimize, null.ok = FALSE)
+
+      # check that all feature sets are subsets of the task features
+      assert_subset(unlist(result$features), private$.features)
+
       self$benchmark_result = if (!is.null(benchmark_result)) assert_benchmark_result(benchmark_result)
 
       self$man = "mlr3fselect::ensemble_fs_result"
@@ -99,7 +136,8 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     #'
     #' @param ... (ignored).
     print = function(...) {
-      catf(format(self))
+      catf("%s with %s learners and %s initial resamplings",
+           format(self), self$n_learners, self$n_resamples)
       print(private$.result[, c("resampling_iteration", "learner_id", "n_features"), with = FALSE])
     },
 
@@ -110,43 +148,102 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     },
 
     #' @description
-    #' Calculates the feature ranking.
+    #' Use this function to change the active measure.
+    #'
+    #' @param which (`character(1)`)\cr
+    #'  Which [measure][mlr3::Measure] from the ensemble feature selection result
+    #'  to use in methods of this object.
+    #'  Should be either `"inner"` (optimization measure used in training sets)
+    #'  or `"outer"` (measure used in test sets, default value).
+    set_active_measure = function(which = "outer") {
+      assert_choice(which, c("inner", "outer"))
+
+      # check if `inner_measure` is an `mlr3::Measure`
+      if (which == "inner" && is.null(private$.inner_measure)) {
+        stop("No inner_measure was defined during initialization")
+      }
+
+      private$.active_measure = which
+    },
+
+    #' @description
+    #' Calculates the feature ranking via [fastVoteR::rank_candidates()].
     #'
     #' @details
-    #' The feature ranking process is built on the following framework: models act as voters, features act as candidates, and voters select certain candidates (features).
+    #' The feature ranking process is built on the following framework: models act as *voters*, features act as *candidates*, and voters select certain candidates (features).
     #' The primary objective is to compile these selections into a consensus ranked list of features, effectively forming a committee.
-    #' Currently, only `"approval_voting"` method is supported, which selects the candidates/features that have the highest approval score or selection frequency, i.e. appear the most often.
+    #'
+    #' For every feature a score is calculated, which depends on the `"method"` argument.
+    #' The higher the score, the higher the ranking of the feature.
+    #' Note that some methods output a feature ranking instead of a score per feature, so we always include **Borda's score**, which is method-agnostic, i.e. it can be used to compare the feature rankings across different methods.
+    #'
+    #' We shuffle the input candidates/features so that we enforce random tie-breaking.
+    #' Users should set the same `seed` for consistent comparison between the different feature ranking methods and for reproducibility.
     #'
     #' @param method (`character(1)`)\cr
-    #' The method to calculate the feature ranking.
+    #' The method to calculate the feature ranking. See [fastVoteR::rank_candidates()]
+    #' for a complete list of available methods.
+    #' Approval voting (`"av"`) is the default method.
+    #' @param use_weights (`logical(1)`)\cr
+    #' The default value (`TRUE`) uses weights equal to the performance scores
+    #' of each voter/model (or the inverse scores if the measure is minimized).
+    #' If `FALSE`, we treat all voters as equal and assign them all a weight equal to 1.
+    #' @param committee_size (`integer(1)`)\cr
+    #' Number of top selected features in the output ranking.
+    #' This parameter can be used to speed-up methods that build a committee sequentially
+    #' (`"seq_pav"`), by requesting only the top N selected candidates/features
+    #' and not the complete feature ranking.
+    #' @param shuffle_features (`logical(1)`)\cr
+    #' Whether to shuffle the task features randomly before computing the ranking.
+    #' Shuffling ensures consistent random tie-breaking across methods and prevents
+    #' deterministic biases when features with equal scores are encountered.
+    #' Default is `TRUE` and it's advised to set a seed before running this function.
+    #' Set to `FALSE` if deterministic ordering of features is preferred (same as
+    #' during initialization).
     #'
-    #' @return A [data.table::data.table] listing all the features, ordered by decreasing inclusion probability scores (depending on the `method`)
-    feature_ranking = function(method = "approval_voting") {
-      assert_choice(method, choices = "approval_voting")
-
-      # cached results
-      if (!is.null(private$.feature_ranking[[method]])) {
-        return(private$.feature_ranking[[method]])
+    #' @return A [data.table::data.table] listing all the features, ordered by decreasing scores (depends on the `"method"`). Columns are as follows:
+    #' - `"feature"`: Feature names.
+    #' - `"score"`: Scores assigned to each feature based on the selected method (if applicable).
+    #' - `"norm_score"`: Normalized scores (if applicable), scaled to the range \eqn{[0,1]}, which can be loosely interpreted as **selection probabilities** (Meinshausen et al. (2010)).
+    #' - `"borda_score"`: Borda scores for method-agnostic comparison, ranging in \eqn{[0,1]}, where the top feature receives a score of 1 and the lowest-ranked feature receives a score of 0.
+    #' This column is always included so that feature ranking methods that output only rankings have also a feature-wise score.
+    #'
+    feature_ranking = function(method = "av", use_weights = TRUE, committee_size = NULL, shuffle_features = TRUE) {
+      requireNamespace("fastVoteR")
+
+      # candidates => all features, voters => list of selected (best) features sets
+      candidates = private$.features
+      voters = private$.result$features
+
+      # calculate weights
+      if (use_weights) {
+        # voter weights are the (inverse) scores
+        measure = self$measure # get active measure
+        measure_id = ifelse(private$.active_measure == "inner",
+                            sprintf("%s_inner", measure$id),
+                            measure$id)
+
+        scores = private$.result[, get(measure_id)]
+        weights = if (measure$minimize) 1 / scores else scores
+      } else {
+        # all voters are equal
+        weights = rep(1, length(voters))
       }
 
-      count_tbl = sort(table(unlist(private$.result$features)), decreasing = TRUE)
-      features_selected = names(count_tbl)
-      features_not_selected = setdiff(private$.features, features_selected)
-
-      res_fs = data.table(
-        feature = features_selected,
-        inclusion_probability = as.vector(count_tbl) / nrow(private$.result)
+      # get consensus feature ranking
+      res = fastVoteR::rank_candidates(
+        voters = voters,
+        candidates = candidates,
+        weights = weights,
+        committee_size = committee_size,
+        method = method,
+        borda_score = TRUE,
+        shuffle_candidates = shuffle_features
       )
 
-      res_fns = data.table(
-        feature = features_not_selected,
-        inclusion_probability = 0
-      )
+      setnames(res, "candidate", "feature")
 
-      res = rbindlist(list(res_fs, res_fns))
-
-      private$.feature_ranking[[method]] = res
-      private$.feature_ranking[[method]]
+      res
     },
 
     #' @description
@@ -222,8 +319,11 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     pareto_front = function(type = "empirical") {
       assert_choice(type, choices =  c("empirical", "estimated"))
       result = private$.result
-      measure_id = private$.measure_id
-      minimize = private$.minimize
+      measure = self$measure # get active measure
+      measure_id = ifelse(private$.active_measure == "inner",
+                          sprintf("%s_inner", measure$id),
+                          measure$id)
+      minimize = measure$minimize
 
       # Keep only n_features and performance scores
       cols_to_keep = c("n_features", measure_id)
@@ -261,6 +361,8 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
         # Transform the data (x => 1/x)
         n_features_inv = NULL
         pf[, n_features_inv := 1 / n_features]
+        # remove edge cases where no features were selected
+        pf = pf[n_features > 0]
 
         # Fit the linear model
         form = mlr3misc::formulate(lhs = measure_id, rhs = "n_features_inv")
@@ -298,8 +400,11 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     knee_points = function(method = "NBI", type = "empirical") {
       assert_choice(method, choices = c("NBI"))
       assert_choice(type, choices = c("empirical", "estimated"))
-      measure_id = private$.measure_id
-      minimize = private$.minimize
+      measure = self$measure # get active measure
+      measure_id = ifelse(private$.active_measure == "inner",
+                          sprintf("%s_inner", measure$id),
+                          measure$id)
+      minimize = measure$minimize
 
       pf = if (type == "empirical") self$pareto_front() else self$pareto_front(type = "estimated")
 
@@ -346,11 +451,36 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
       uniqueN(private$.result$learner_id)
     },
 
-    #' @field measure (`character(1)`)\cr
-    #' Returns the measure id used in the ensemble feature selection.
+    #' @field measure ([mlr3::Measure])\cr
+    #' Returns the 'active' measure that is used in methods of this object.
     measure = function(rhs) {
       assert_ro_binding(rhs)
-      private$.measure_id
+
+      if (private$.active_measure == "outer") {
+        private$.measure
+      } else {
+        private$.inner_measure
+      }
+    },
+
+    #' @field active_measure (`character(1)`)\cr
+    #' Indicates the type of the active performance measure.
+    #'
+    #' During the ensemble feature selection process, the dataset is split into **multiple subsamples** (train/test splits) using an initial resampling scheme.
+    #' So, performance can be evaluated using one of two measures:
+    #'
+    #' - `"outer"`: measure used to evaluate the performance on the test sets.
+    #' - `"inner"`: measure used for optimization and to compute performance during inner resampling on the training sets.
+    active_measure = function(rhs) {
+      assert_ro_binding(rhs)
+      private$.active_measure
+    },
+
+    #' @field n_resamples (`character(1)`)\cr
+    #' Returns the number of times the task was initially resampled in the ensemble feature selection process.
+    n_resamples = function(rhs) {
+      assert_ro_binding(rhs)
+      uniqueN(self$result$resampling_iteration)
     }
   ),
 
@@ -358,14 +488,14 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     .result = NULL, # with no R6 classes
     .stability_global = NULL,
     .stability_learner = NULL,
-    .feature_ranking = NULL,
     .features = NULL,
-    .measure_id = NULL,
-    .minimize = NULL
+    .measure = NULL,
+    .inner_measure = NULL,
+    .active_measure = NULL
   )
 )
 
 #' @export
-as.data.table.EnsembleFSResult = function(x,  ...) {
+as.data.table.EnsembleFSResult = function(x, ...) {
   x$result
 }
diff --git a/R/bibentries.R b/R/bibentries.R
index d3787ad6..65a36a82 100644
--- a/R/bibentries.R
+++ b/R/bibentries.R
@@ -9,7 +9,6 @@ bibentries = c(
     title       = "ecr 2.0",
     booktitle   = "Proceedings of the Genetic and Evolutionary Computation Conference Companion"
   ),
-
   bergstra_2012 = bibentry("article",
     title       = "Random Search for Hyper-Parameter Optimization",
     author      = "James Bergstra and Yoshua Bengio",
@@ -20,8 +19,7 @@ bibentries = c(
     pages       = "281--305",
     url         = "https://jmlr.csail.mit.edu/papers/v13/bergstra12a.html"
   ),
-
-  thomas2017  = bibentry("article",
+  thomas2017 = bibentry("article",
     doi       = "10.1155/2017/1421409",
     year      = "2017",
     publisher = "Hindawi Limited",
@@ -31,8 +29,7 @@ bibentries = c(
     title     = "Probing for Sparse and Fast Variable Selection with Model-Based Boosting",
     journal   = "Computational and Mathematical Methods in Medicine"
   ),
-
-  wu2007      = bibentry("article",
+  wu2007 = bibentry("article",
     doi       = "10.1198/016214506000000843",
     year      = "2007",
     month     = "3",
@@ -44,8 +41,7 @@ bibentries = c(
     title     = "Controlling Variable Selection by the Addition of Pseudovariables",
     journal   = "Journal of the American Statistical Association"
   ),
-
-  guyon2002     = bibentry("article",
+  guyon2002 = bibentry("article",
     title       = "Gene Selection for Cancer Classification using Support Vector Machines",
     volume      = "46",
     issn        = "1573-0565",
@@ -56,7 +52,6 @@ bibentries = c(
     author      = "Isabelle Guyon and Jason Weston and Stephen Barnhill and Vladimir Vapnik",
     year        = "2002"
   ),
-
   kuhn2013 = bibentry("Inbook",
     author    = "Kuhn, Max and Johnson, Kjell",
     chapter   = "Over-Fitting and Model Tuning",
@@ -67,7 +62,6 @@ bibentries = c(
     pages     = "61--92",
     isbn      = "978-1-4614-6849-3"
   ),
-
   saeys2008 = bibentry("article",
     author      = "Saeys, Yvan and Abeel, Thomas and Van De Peer, Yves",
     doi         = "10.1007/978-3-540-87481-2_21",
@@ -79,7 +73,6 @@ bibentries = c(
     volume      = "5212 LNAI",
     year        = "2008"
   ),
-
   abeel2010 = bibentry("article",
     author    = "Abeel, Thomas and Helleputte, Thibault and Van de Peer, Yves and Dupont, Pierre and Saeys, Yvan",
     doi       = "10.1093/BIOINFORMATICS/BTP630",
@@ -92,7 +85,6 @@ bibentries = c(
     volume    = "26",
     year      = "2010"
   ),
-
   pes2020 = bibentry("article",
     author    = "Pes, Barbara",
     doi       = "10.1007/s00521-019-04082-3",
@@ -106,7 +98,6 @@ bibentries = c(
     volume    = "32",
     year      = "2020"
   ),
-
   das1999 = bibentry("article",
     author    = "Das, I",
     issn      = "09344373",
@@ -118,5 +109,31 @@ bibentries = c(
     title     = "On characterizing the 'knee' of the Pareto curve based on normal-boundary intersection",
     volume    = "18",
     year      = "1999"
+  ),
+  meinshausen2010 = bibentry("article",
+    author    = "Meinshausen, Nicolai and Buhlmann, Peter",
+    doi       = "10.1111/J.1467-9868.2010.00740.X",
+    eprint    = "0809.2932",
+    issn      = "1369-7412",
+    journal   = "Journal of the Royal Statistical Society Series B: Statistical Methodology",
+    month     = "sep",
+    number    = "4",
+    pages     = "417--473",
+    publisher = "Oxford Academic",
+    title     = "Stability Selection",
+    volume    = "72",
+    year      = "2010"
+  ),
+  hedou2024 = bibentry("article",
+    author = "Hedou, Julien and Maric, Ivana and Bellan, Gregoire and Einhaus, Jakob and Gaudilliere, Dyani K. and Ladant, Francois Xavier and Verdonk, Franck and Stelzer, Ina A. and Feyaerts, Dorien and Tsai, Amy S. and Ganio, Edward A. and Sabayev, Maximilian and Gillard, Joshua and Amar, Jonas and Cambriel, Amelie and Oskotsky, Tomiko T. and Roldan, Alennie and Golob, Jonathan L. and Sirota, Marina and Bonham, Thomas A. and Sato, Masaki and Diop, Maigane and Durand, Xavier and Angst, Martin S. and Stevenson, David K. and Aghaeepour, Nima and Montanari, Andrea and Gaudilliere, Brice", #nolint
+    doi = "10.1038/s41587-023-02033-x",
+    issn = "1546-1696",
+    journal = "Nature Biotechnology 2024",
+    month = "jan",
+    pages = "1--13",
+    publisher = "Nature Publishing Group",
+    title = "Discovery of sparse, reliable omic biomarkers with Stabl",
+    url = "https://www.nature.com/articles/s41587-023-02033-x",
+    year = "2024"
   )
 )
diff --git a/R/embedded_ensemble_fselect.R b/R/embedded_ensemble_fselect.R
new file mode 100644
index 00000000..8852d760
--- /dev/null
+++ b/R/embedded_ensemble_fselect.R
@@ -0,0 +1,112 @@
+#' @title Embedded Ensemble Feature Selection
+#'
+#' @include CallbackBatchFSelect.R
+#'
+#' @description
+#' Ensemble feature selection using multiple learners.
+#' The ensemble feature selection method is designed to identify the most predictive features from a given dataset by leveraging multiple machine learning models and resampling techniques.
+#' Returns an [EnsembleFSResult].
+#'
+#' @details
+#' The method begins by applying an initial resampling technique specified by the user, to create **multiple subsamples** from the original dataset (train/test splits).
+#' This resampling process helps in generating diverse subsets of data for robust feature selection.
+#'
+#' For each subsample (train set) generated in the previous step, the method applies learners
+#' that support **embedded feature selection**.
+#' These learners are then scored on their ability to predict on the resampled
+#' test sets, storing the selected features during training, for each
+#' combination of subsample and learner.
+#'
+#' Results are stored in an [EnsembleFSResult].
+#'
+#' @param learners (list of [mlr3::Learner])\cr
+#'  The learners to be used for feature selection.
+#'  All learners must have the `selected_features` property, i.e. implement
+#'  embedded feature selection (e.g. regularized models).
+#' @param init_resampling ([mlr3::Resampling])\cr
+#'  The initial resampling strategy of the data, from which each train set
+#'  will be passed on to the learners and each test set will be used for
+#'  prediction.
+#'  Can only be [mlr3::ResamplingSubsampling] or [mlr3::ResamplingBootstrap].
+#' @param measure ([mlr3::Measure])\cr
+#'  The measure used to score each learner on the test sets generated by
+#'  `init_resampling`.
+#'  If `NULL`, default measure is used.
+#' @param store_benchmark_result (`logical(1)`)\cr
+#'  Whether to store the benchmark result in [EnsembleFSResult] or not.
+#'
+#' @template param_task
+#'
+#' @returns an [EnsembleFSResult] object.
+#'
+#' @source
+#' `r format_bib("meinshausen2010", "hedou2024")`
+#' @export
+#' @examples
+#' \donttest{
+#'   eefsr = embedded_ensemble_fselect(
+#'     task = tsk("sonar"),
+#'     learners = lrns(c("classif.rpart", "classif.featureless")),
+#'     init_resampling = rsmp("subsampling", repeats = 5),
+#'     measure = msr("classif.ce")
+#'   )
+#'   eefsr
+#' }
+embedded_ensemble_fselect = function(
+  task,
+  learners,
+  init_resampling,
+  measure,
+  store_benchmark_result = TRUE
+  ) {
+  assert_task(task)
+  assert_learners(as_learners(learners), task = task, properties = "selected_features")
+  assert_resampling(init_resampling)
+  assert_choice(class(init_resampling)[1], choices = c("ResamplingBootstrap", "ResamplingSubsampling"))
+  assert_measure(measure, task = task)
+  assert_flag(store_benchmark_result)
+
+  init_resampling$instantiate(task)
+
+  design = benchmark_grid(
+    tasks = task,
+    learners = learners,
+    resamplings = init_resampling
+  )
+
+  bmr = benchmark(design, store_models = TRUE)
+
+  trained_learners = bmr$score()$learner
+
+  # extract selected features
+  features = map(trained_learners, function(learner) {
+    learner$selected_features()
+  })
+
+  # extract n_features
+  n_features = map_int(features, length)
+
+  # extract scores on the test sets
+  scores = bmr$score(measure)
+
+  set(scores, j = "features", value = features)
+  set(scores, j = "n_features", value = n_features)
+  setnames(scores, "iteration", "resampling_iteration")
+
+  # remove R6 objects
+  set(scores, j = "learner", value = NULL)
+  set(scores, j = "task", value = NULL)
+  set(scores, j = "resampling", value = NULL)
+  set(scores, j = "prediction_test", value = NULL)
+  set(scores, j = "task_id", value = NULL)
+  set(scores, j = "nr", value = NULL)
+  set(scores, j = "resampling_id", value = NULL)
+  set(scores, j = "uhash", value = NULL)
+
+  EnsembleFSResult$new(
+    result = scores,
+    features = task$feature_names,
+    benchmark_result = if (store_benchmark_result) bmr,
+    measure = measure
+  )
+}
diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
index ed1aceb3..12be4636 100644
--- a/R/ensemble_fselect.R
+++ b/R/ensemble_fselect.R
@@ -1,39 +1,55 @@
-#' @title Ensemble Feature Selection
+#' @title Wrapper-based Ensemble Feature Selection
 #'
 #' @include CallbackBatchFSelect.R
 #'
 #' @description
 #' Ensemble feature selection using multiple learners.
-#' The ensemble feature selection method is designed to identify the most informative features from a given dataset by leveraging multiple machine learning models and resampling techniques.
+#' The ensemble feature selection method is designed to identify the most predictive features from a given dataset by leveraging multiple machine learning models and resampling techniques.
 #' Returns an [EnsembleFSResult].
 #'
 #' @details
-#' The method begins by applying an initial resampling technique specified by the user, to create **multiple subsamples** from the original dataset.
+#' The method begins by applying an initial resampling technique specified by the user, to create **multiple subsamples** from the original dataset (train/test splits).
 #' This resampling process helps in generating diverse subsets of data for robust feature selection.
 #'
-#' For each subsample generated in the previous step, the method performs **wrapped-based feature selection** ([auto_fselector]) using each provided learner, the given inner resampling method, performance measure and optimization algorithm.
-#' This process generates the best feature subset for each combination of subsample and learner.
+#' For each subsample (train set) generated in the previous step, the method performs **wrapped-based feature selection** ([auto_fselector]) using each provided learner, the given inner resampling method, inner performance measure and optimization algorithm.
+#' This process generates 1) the best feature subset and 2) a final trained model using these best features, for each combination of subsample and learner.
+#' The final models are then scored on their ability to predict on the resampled test sets.
+#'
 #' Results are stored in an [EnsembleFSResult].
 #'
+#' The result object also includes the performance scores calculated during the inner resampling of the training sets, using models with the best feature subsets.
+#' These scores are stored in a column named `{measure_id}_inner`.
+#'
+#' @section Note:
+#'
+#' The **active measure** of performance is the one applied to the test sets.
+#' This is preferred, as inner resampling scores on the training sets are likely to be overestimated when using the final models.
+#' Users can change the active measure by using the `set_active_measure()` method of the [EnsembleFSResult].
+#'
 #' @param learners (list of [mlr3::Learner])\cr
 #'  The learners to be used for feature selection.
 #' @param init_resampling ([mlr3::Resampling])\cr
 #'  The initial resampling strategy of the data, from which each train set
-#'  will be passed on to the learners.
+#'  will be passed on to the [auto_fselector] to optimize the learners and
+#'  perform feature selection.
+#'  Each test set will be used for prediction on the final models returned by [auto_fselector].
 #'  Can only be [mlr3::ResamplingSubsampling] or [mlr3::ResamplingBootstrap].
 #' @param inner_resampling ([mlr3::Resampling])\cr
 #'  The inner resampling strategy used by the [FSelector].
+#' @param inner_measure ([mlr3::Measure])\cr
+#'  The inner optimization measure used by the [FSelector].
+#' @param measure ([mlr3::Measure])\cr
+#'  Measure used to score each trained learner on the test sets generated by `init_resampling`.
 #' @param store_benchmark_result (`logical(1)`)\cr
 #'  Whether to store the benchmark result in [EnsembleFSResult] or not.
 #' @param store_models (`logical(1)`)\cr
 #'  Whether to store models in [auto_fselector] or not.
-#' @param callbacks (list of lists of [CallbackBatchFSelect])\cr
+#' @param callbacks (Named list of lists of [CallbackBatchFSelect])\cr
 #'  Callbacks to be used for each learner.
-#'  The lists must have the same length as the number of learners.
+#'  The lists must be named by the learner ids.
 #'
 #' @template param_fselector
 #' @template param_task
-#' @template param_measure
 #' @template param_terminator
 #'
 #' @returns an [EnsembleFSResult] object.
@@ -49,7 +65,8 @@
 #'     learners = lrns(c("classif.rpart", "classif.featureless")),
 #'     init_resampling = rsmp("subsampling", repeats = 2),
 #'     inner_resampling = rsmp("cv", folds = 3),
-#'     measure = msr("classif.ce"),
+#'     inner_measure = msr("classif.ce"),
+#'     measure = msr("classif.acc"),
 #'     terminator = trm("evals", n_evals = 10)
 #'   )
 #'   efsr
@@ -60,49 +77,43 @@ ensemble_fselect = function(
   learners,
   init_resampling,
   inner_resampling,
+  inner_measure,
   measure,
   terminator,
   callbacks = NULL,
   store_benchmark_result = TRUE,
-  store_models = TRUE
+  store_models = FALSE
   ) {
   assert_task(task)
   assert_learners(as_learners(learners), task = task)
   assert_resampling(init_resampling)
   assert_choice(class(init_resampling)[1], choices = c("ResamplingBootstrap", "ResamplingSubsampling"))
-  assert_list(callbacks, types = "list", len = length(learners), null.ok = TRUE)
+  assert_resampling(inner_resampling)
+  assert_measure(inner_measure, task = task)
+  assert_measure(measure, task = task)
+  callbacks = map(callbacks, function(callbacks) assert_callbacks(as_callbacks(callbacks)))
+  if (length(callbacks)) assert_names(names(callbacks), subset.of = map_chr(learners, "id"))
   assert_flag(store_benchmark_result)
+  assert_flag(store_models)
 
   # create auto_fselector for each learner
-  afss = imap(unname(learners), function(learner, i) {
+  afss = map(learners, function(learner) {
     auto_fselector(
       fselector = fselector,
       learner = learner,
       resampling = inner_resampling,
-      measure = measure,
+      measure = inner_measure,
       terminator = terminator,
       store_models = store_models,
-      callbacks = callbacks[[i]]
-    )
-  })
-
-  init_resampling$instantiate(task)
-  grid = map_dtr(seq(init_resampling$iters), function(i) {
-
-    # create task and resampling for each outer iteration
-    task_subset = task$clone()$filter(init_resampling$train_set(i))
-    resampling = rsmp("insample")$instantiate(task_subset)
-
-    data.table(
-      resampling_iteration = i,
-      learner_id = map_chr(learners, "id"),
-      learner = afss,
-      task = list(task_subset),
-      resampling = list(resampling)
+      callbacks = callbacks[[learner$id]]
     )
   })
 
-  design = grid[, c("learner", "task", "resampling"), with = FALSE]
+  design = benchmark_grid(
+    tasks = task,
+    learners = afss,
+    resamplings = init_resampling
+  )
 
   bmr = benchmark(design, store_models = TRUE)
 
@@ -118,31 +129,42 @@ ensemble_fselect = function(
     afs$fselect_result$n_features[[1]]
   })
 
-  # extract scores
-  scores = map_dbl(afss, function(afs) {
-    afs$fselect_instance$archive$best()[, measure$id, with = FALSE][[1]]
+  # extract inner scores
+  inner_scores = map_dbl(afss, function(afs) {
+    afs$fselect_instance$archive$best()[, inner_measure$id, with = FALSE][[1]]
   })
 
-  set(grid, j = "features", value = features)
-  set(grid, j = "n_features", value = n_features)
-  set(grid, j = measure$id, value = scores)
+  # extract scores on the test sets
+  scores = bmr$score(measure)
+
+  set(scores, j = "features", value = features)
+  set(scores, j = "n_features", value = n_features)
+  set(scores, j = sprintf("%s_inner", inner_measure$id), value = inner_scores)
+  setnames(scores, "iteration", "resampling_iteration")
+
+  # remove R6 objects
+  set(scores, j = "learner", value = NULL)
+  set(scores, j = "task", value = NULL)
+  set(scores, j = "resampling", value = NULL)
+  set(scores, j = "prediction_test", value = NULL)
+  set(scores, j = "task_id", value = NULL)
+  set(scores, j = "nr", value = NULL)
+  set(scores, j = "resampling_id", value = NULL)
+  set(scores, j = "uhash", value = NULL)
 
   # extract importance scores if RFE optimization was used
   if (class(fselector)[1] == "FSelectorBatchRFE") {
     imp_scores = map(afss, function(afs) {
       afs$fselect_result$importance[[1]]
     })
-    set(grid, j = "importance", value = imp_scores)
+    set(scores, j = "importance", value = imp_scores)
   }
 
-  set(grid, j = "learner", value = NULL)
-  set(grid, j = "task", value = NULL)
-  set(grid, j = "resampling", value = NULL)
   EnsembleFSResult$new(
-    result = grid,
+    result = scores,
     features = task$feature_names,
     benchmark_result = if (store_benchmark_result) bmr,
-    measure_id = measure$id,
-    minimize = measure$minimize
+    measure = measure,
+    inner_measure = inner_measure
   )
 }
diff --git a/man/embedded_ensemble_fselect.Rd b/man/embedded_ensemble_fselect.Rd
new file mode 100644
index 00000000..0f580583
--- /dev/null
+++ b/man/embedded_ensemble_fselect.Rd
@@ -0,0 +1,79 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/embedded_ensemble_fselect.R
+\name{embedded_ensemble_fselect}
+\alias{embedded_ensemble_fselect}
+\title{Embedded Ensemble Feature Selection}
+\source{
+Meinshausen, Nicolai, Buhlmann, Peter (2010).
+\dQuote{Stability Selection.}
+\emph{Journal of the Royal Statistical Society Series B: Statistical Methodology}, \bold{72}(4), 417--473.
+ISSN 1369-7412, \doi{10.1111/J.1467-9868.2010.00740.X}, 0809.2932.
+
+Hedou, Julien, Maric, Ivana, Bellan, Gregoire, Einhaus, Jakob, Gaudilliere, K. D, Ladant, Xavier F, Verdonk, Franck, Stelzer, A. I, Feyaerts, Dorien, Tsai, S. A, Ganio, A. E, Sabayev, Maximilian, Gillard, Joshua, Amar, Jonas, Cambriel, Amelie, Oskotsky, T. T, Roldan, Alennie, Golob, L. J, Sirota, Marina, Bonham, A. T, Sato, Masaki, Diop, Maigane, Durand, Xavier, Angst, S. M, Stevenson, K. D, Aghaeepour, Nima, Montanari, Andrea, Gaudilliere, Brice (2024).
+\dQuote{Discovery of sparse, reliable omic biomarkers with Stabl.}
+\emph{Nature Biotechnology 2024}, 1--13.
+ISSN 1546-1696, \doi{10.1038/s41587-023-02033-x}, \url{https://www.nature.com/articles/s41587-023-02033-x}.
+}
+\usage{
+embedded_ensemble_fselect(
+  task,
+  learners,
+  init_resampling,
+  measure,
+  store_benchmark_result = TRUE
+)
+}
+\arguments{
+\item{task}{(\link[mlr3:Task]{mlr3::Task})\cr
+Task to operate on.}
+
+\item{learners}{(list of \link[mlr3:Learner]{mlr3::Learner})\cr
+The learners to be used for feature selection.
+All learners must have the \code{selected_features} property, i.e. implement
+embedded feature selection (e.g. regularized models).}
+
+\item{init_resampling}{(\link[mlr3:Resampling]{mlr3::Resampling})\cr
+The initial resampling strategy of the data, from which each train set
+will be passed on to the learners and each test set will be used for
+prediction.
+Can only be \link[mlr3:mlr_resamplings_subsampling]{mlr3::ResamplingSubsampling} or \link[mlr3:mlr_resamplings_bootstrap]{mlr3::ResamplingBootstrap}.}
+
+\item{measure}{(\link[mlr3:Measure]{mlr3::Measure})\cr
+The measure used to score each learner on the test sets generated by
+\code{init_resampling}.
+If \code{NULL}, default measure is used.}
+
+\item{store_benchmark_result}{(\code{logical(1)})\cr
+Whether to store the benchmark result in \link{EnsembleFSResult} or not.}
+}
+\value{
+an \link{EnsembleFSResult} object.
+}
+\description{
+Ensemble feature selection using multiple learners.
+The ensemble feature selection method is designed to identify the most predictive features from a given dataset by leveraging multiple machine learning models and resampling techniques.
+Returns an \link{EnsembleFSResult}.
+}
+\details{
+The method begins by applying an initial resampling technique specified by the user, to create \strong{multiple subsamples} from the original dataset (train/test splits).
+This resampling process helps in generating diverse subsets of data for robust feature selection.
+
+For each subsample (train set) generated in the previous step, the method applies learners
+that support \strong{embedded feature selection}.
+These learners are then scored on their ability to predict on the resampled
+test sets, storing the selected features during training, for each
+combination of subsample and learner.
+
+Results are stored in an \link{EnsembleFSResult}.
+}
+\examples{
+\donttest{
+  eefsr = embedded_ensemble_fselect(
+    task = tsk("sonar"),
+    learners = lrns(c("classif.rpart", "classif.featureless")),
+    init_resampling = rsmp("subsampling", repeats = 5),
+    measure = msr("classif.ce")
+  )
+  eefsr
+}
+}
diff --git a/man/ensemble_fs_result.Rd b/man/ensemble_fs_result.Rd
index 56a0fe37..dd0239a3 100644
--- a/man/ensemble_fs_result.Rd
+++ b/man/ensemble_fs_result.Rd
@@ -7,7 +7,8 @@
 \description{
 The \code{EnsembleFSResult} stores the results of ensemble feature selection.
 It includes methods for evaluating the stability of the feature selection process and for ranking the selected features among others.
-The function \code{\link[=ensemble_fselect]{ensemble_fselect()}} returns an object of this class.
+
+Both functions \code{\link[=ensemble_fselect]{ensemble_fselect()}} and \code{\link[=embedded_ensemble_fselect]{embedded_ensemble_fselect()}} return an object of this class.
 }
 \section{S3 Methods}{
 
@@ -31,7 +32,8 @@ Whether to add the learner, task and resampling information from the benchmark r
     learners = lrns(c("classif.rpart", "classif.featureless")),
     init_resampling = rsmp("subsampling", repeats = 2),
     inner_resampling = rsmp("cv", folds = 3),
-    measure = msr("classif.ce"),
+    inner_measure = msr("classif.ce"),
+    measure = msr("classif.acc"),
     terminator = trm("none")
   )
 
@@ -47,7 +49,16 @@ Whether to add the learner, task and resampling information from the benchmark r
   # returns a ranking of all features
   head(efsr$feature_ranking())
 
-  # returns the empirical pareto front (nfeatures vs error)
+  # returns the empirical pareto front, i.e. n_features vs measure (error)
+  efsr$pareto_front()
+
+  # returns the knee points (optimal trade-off between n_features and performance)
+  efsr$knee_points()
+
+  # change to use the inner optimization measure
+  efsr$set_active_measure(which = "inner")
+
+  # Pareto front is calculated on the inner measure
   efsr$pareto_front()
 }
 }
@@ -56,6 +67,11 @@ Das, I (1999).
 \dQuote{On characterizing the 'knee' of the Pareto curve based on normal-boundary intersection.}
 \emph{Structural Optimization}, \bold{18}(1-2), 107--115.
 ISSN 09344373.
+
+Meinshausen, Nicolai, Buhlmann, Peter (2010).
+\dQuote{Stability Selection.}
+\emph{Journal of the Royal Statistical Society Series B: Statistical Methodology}, \bold{72}(4), 417--473.
+ISSN 1369-7412, \doi{10.1111/J.1467-9868.2010.00740.X}, 0809.2932.
 }
 \section{Public fields}{
 \if{html}{\out{<div class="r6-fields">}}
@@ -77,8 +93,21 @@ Returns the result of the ensemble feature selection.}
 \item{\code{n_learners}}{(\code{numeric(1)})\cr
 Returns the number of learners used in the ensemble feature selection.}
 
-\item{\code{measure}}{(\code{character(1)})\cr
-Returns the measure id used in the ensemble feature selection.}
+\item{\code{measure}}{(\link[mlr3:Measure]{mlr3::Measure})\cr
+Returns the 'active' measure that is used in methods of this object.}
+
+\item{\code{active_measure}}{(\code{character(1)})\cr
+Indicates the type of the active performance measure.
+
+During the ensemble feature selection process, the dataset is split into \strong{multiple subsamples} (train/test splits) using an initial resampling scheme.
+So, performance can be evaluated using one of two measures:
+\itemize{
+\item \code{"outer"}: measure used to evaluate the performance on the test sets.
+\item \code{"inner"}: measure used for optimization and to compute performance during inner resampling on the training sets.
+}}
+
+\item{\code{n_resamples}}{(\code{character(1)})\cr
+Returns the number of times the task was initially resampled in the ensemble feature selection process.}
 }
 \if{html}{\out{</div>}}
 }
@@ -89,6 +118,7 @@ Returns the measure id used in the ensemble feature selection.}
 \item \href{#method-EnsembleFSResult-format}{\code{EnsembleFSResult$format()}}
 \item \href{#method-EnsembleFSResult-print}{\code{EnsembleFSResult$print()}}
 \item \href{#method-EnsembleFSResult-help}{\code{EnsembleFSResult$help()}}
+\item \href{#method-EnsembleFSResult-set_active_measure}{\code{EnsembleFSResult$set_active_measure()}}
 \item \href{#method-EnsembleFSResult-feature_ranking}{\code{EnsembleFSResult$feature_ranking()}}
 \item \href{#method-EnsembleFSResult-stability}{\code{EnsembleFSResult$stability()}}
 \item \href{#method-EnsembleFSResult-pareto_front}{\code{EnsembleFSResult$pareto_front()}}
@@ -106,8 +136,8 @@ Creates a new instance of this \link[R6:R6Class]{R6} class.
   result,
   features,
   benchmark_result = NULL,
-  measure_id,
-  minimize = TRUE
+  measure,
+  inner_measure = NULL
 )}\if{html}{\out{</div>}}
 }
 
@@ -116,8 +146,13 @@ Creates a new instance of this \link[R6:R6Class]{R6} class.
 \describe{
 \item{\code{result}}{(\link[data.table:data.table]{data.table::data.table})\cr
 The result of the ensemble feature selection.
-Column names should include \code{"resampling_iteration"}, \code{"learner_id"}, \code{"features"}
-and \code{"n_features"}.}
+Mandatory column names should include \code{"resampling_iteration"}, \code{"learner_id"},
+\code{"features"} and \code{"n_features"}.
+A column named as \code{{measure$id}} (scores on the test sets) must also be
+always present.
+The column with the performance scores on the inner resampling of the train sets is not mandatory,
+but note that it should be named as \verb{\{inner_measure$id\}_inner} to distinguish from
+the \code{{measure$id}}.}
 
 \item{\code{features}}{(\code{\link[=character]{character()}})\cr
 The vector of features of the task that was used in the ensemble feature
@@ -126,11 +161,14 @@ selection.}
 \item{\code{benchmark_result}}{(\link[mlr3:BenchmarkResult]{mlr3::BenchmarkResult})\cr
 The benchmark result object.}
 
-\item{\code{measure_id}}{(\code{character(1)})\cr
-Column name of \code{"result"} that corresponds to the measure used.}
+\item{\code{measure}}{(\link[mlr3:Measure]{mlr3::Measure})\cr
+The performance measure used to evaluate the learners on the test sets generated
+during the ensemble feature selection process.
+By default, this serves as the 'active' measure for the methods of this object.
+The active measure can be updated using the \verb{$set_active_measure()} method.}
 
-\item{\code{minimize}}{(\code{logical(1)})\cr
-If \code{TRUE} (default), lower values of the measure correspond to higher performance.}
+\item{\code{inner_measure}}{(\link[mlr3:Measure]{mlr3::Measure})\cr
+The performance measure used to optimize and evaluate the learners during the inner resampling process of the training sets, generated as part of the ensemble feature selection procedure.}
 }
 \if{html}{\out{</div>}}
 }
@@ -178,32 +216,92 @@ Opens the corresponding help page referenced by field \verb{$man}.
 \if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$help()}\if{html}{\out{</div>}}
 }
 
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-EnsembleFSResult-set_active_measure"></a>}}
+\if{latex}{\out{\hypertarget{method-EnsembleFSResult-set_active_measure}{}}}
+\subsection{Method \code{set_active_measure()}}{
+Use this function to change the active measure.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$set_active_measure(which = "outer")}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{which}}{(\code{character(1)})\cr
+Which \link[mlr3:Measure]{measure} from the ensemble feature selection result
+to use in methods of this object.
+Should be either \code{"inner"} (optimization measure used in training sets)
+or \code{"outer"} (measure used in test sets, default value).}
+}
+\if{html}{\out{</div>}}
+}
 }
 \if{html}{\out{<hr>}}
 \if{html}{\out{<a id="method-EnsembleFSResult-feature_ranking"></a>}}
 \if{latex}{\out{\hypertarget{method-EnsembleFSResult-feature_ranking}{}}}
 \subsection{Method \code{feature_ranking()}}{
-Calculates the feature ranking.
+Calculates the feature ranking via \code{\link[fastVoteR:rank_candidates]{fastVoteR::rank_candidates()}}.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$feature_ranking(method = "approval_voting")}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$feature_ranking(
+  method = "av",
+  use_weights = TRUE,
+  committee_size = NULL,
+  shuffle_features = TRUE
+)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
 \if{html}{\out{<div class="arguments">}}
 \describe{
 \item{\code{method}}{(\code{character(1)})\cr
-The method to calculate the feature ranking.}
+The method to calculate the feature ranking. See \code{\link[fastVoteR:rank_candidates]{fastVoteR::rank_candidates()}}
+for a complete list of available methods.
+Approval voting (\code{"av"}) is the default method.}
+
+\item{\code{use_weights}}{(\code{logical(1)})\cr
+The default value (\code{TRUE}) uses weights equal to the performance scores
+of each voter/model (or the inverse scores if the measure is minimized).
+If \code{FALSE}, we treat all voters as equal and assign them all a weight equal to 1.}
+
+\item{\code{committee_size}}{(\code{integer(1)})\cr
+Number of top selected features in the output ranking.
+This parameter can be used to speed-up methods that build a committee sequentially
+(\code{"seq_pav"}), by requesting only the top N selected candidates/features
+and not the complete feature ranking.}
+
+\item{\code{shuffle_features}}{(\code{logical(1)})\cr
+Whether to shuffle the task features randomly before computing the ranking.
+Shuffling ensures consistent random tie-breaking across methods and prevents
+deterministic biases when features with equal scores are encountered.
+Default is \code{TRUE} and it's advised to set a seed before running this function.
+Set to \code{FALSE} if deterministic ordering of features is preferred (same as
+during initialization).}
 }
 \if{html}{\out{</div>}}
 }
 \subsection{Details}{
-The feature ranking process is built on the following framework: models act as voters, features act as candidates, and voters select certain candidates (features).
+The feature ranking process is built on the following framework: models act as \emph{voters}, features act as \emph{candidates}, and voters select certain candidates (features).
 The primary objective is to compile these selections into a consensus ranked list of features, effectively forming a committee.
-Currently, only \code{"approval_voting"} method is supported, which selects the candidates/features that have the highest approval score or selection frequency, i.e. appear the most often.
+
+For every feature a score is calculated, which depends on the \code{"method"} argument.
+The higher the score, the higher the ranking of the feature.
+Note that some methods output a feature ranking instead of a score per feature, so we always include \strong{Borda's score}, which is method-agnostic, i.e. it can be used to compare the feature rankings across different methods.
+
+We shuffle the input candidates/features so that we enforce random tie-breaking.
+Users should set the same \code{seed} for consistent comparison between the different feature ranking methods and for reproducibility.
 }
 
 \subsection{Returns}{
-A \link[data.table:data.table]{data.table::data.table} listing all the features, ordered by decreasing inclusion probability scores (depending on the \code{method})
+A \link[data.table:data.table]{data.table::data.table} listing all the features, ordered by decreasing scores (depends on the \code{"method"}). Columns are as follows:
+\itemize{
+\item \code{"feature"}: Feature names.
+\item \code{"score"}: Scores assigned to each feature based on the selected method (if applicable).
+\item \code{"norm_score"}: Normalized scores (if applicable), scaled to the range \eqn{[0,1]}, which can be loosely interpreted as \strong{selection probabilities} (Meinshausen et al. (2010)).
+\item \code{"borda_score"}: Borda scores for method-agnostic comparison, ranging in \eqn{[0,1]}, where the top feature receives a score of 1 and the lowest-ranked feature receives a score of 0.
+This column is always included so that feature ranking methods that output only rankings have also a feature-wise score.
+}
 }
 }
 \if{html}{\out{<hr>}}
diff --git a/man/ensemble_fselect.Rd b/man/ensemble_fselect.Rd
index c83a22e4..baa5ead5 100644
--- a/man/ensemble_fselect.Rd
+++ b/man/ensemble_fselect.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/ensemble_fselect.R
 \name{ensemble_fselect}
 \alias{ensemble_fselect}
-\title{Ensemble Feature Selection}
+\title{Wrapper-based Ensemble Feature Selection}
 \source{
 Saeys, Yvan, Abeel, Thomas, Van De Peer, Yves (2008).
 \dQuote{Robust feature selection using ensemble feature selection techniques.}
@@ -26,11 +26,12 @@ ensemble_fselect(
   learners,
   init_resampling,
   inner_resampling,
+  inner_measure,
   measure,
   terminator,
   callbacks = NULL,
   store_benchmark_result = TRUE,
-  store_models = TRUE
+  store_models = FALSE
 )
 }
 \arguments{
@@ -45,21 +46,26 @@ The learners to be used for feature selection.}
 
 \item{init_resampling}{(\link[mlr3:Resampling]{mlr3::Resampling})\cr
 The initial resampling strategy of the data, from which each train set
-will be passed on to the learners.
+will be passed on to the \link{auto_fselector} to optimize the learners and
+perform feature selection.
+Each test set will be used for prediction on the final models returned by \link{auto_fselector}.
 Can only be \link[mlr3:mlr_resamplings_subsampling]{mlr3::ResamplingSubsampling} or \link[mlr3:mlr_resamplings_bootstrap]{mlr3::ResamplingBootstrap}.}
 
 \item{inner_resampling}{(\link[mlr3:Resampling]{mlr3::Resampling})\cr
 The inner resampling strategy used by the \link{FSelector}.}
 
+\item{inner_measure}{(\link[mlr3:Measure]{mlr3::Measure})\cr
+The inner optimization measure used by the \link{FSelector}.}
+
 \item{measure}{(\link[mlr3:Measure]{mlr3::Measure})\cr
-Measure to optimize. If \code{NULL}, default measure is used.}
+Measure used to score each trained learner on the test sets generated by \code{init_resampling}.}
 
 \item{terminator}{(\link[bbotk:Terminator]{bbotk::Terminator})\cr
 Stop criterion of the feature selection.}
 
-\item{callbacks}{(list of lists of \link{CallbackBatchFSelect})\cr
+\item{callbacks}{(Named list of lists of \link{CallbackBatchFSelect})\cr
 Callbacks to be used for each learner.
-The lists must have the same length as the number of learners.}
+The lists must be named by the learner ids.}
 
 \item{store_benchmark_result}{(\code{logical(1)})\cr
 Whether to store the benchmark result in \link{EnsembleFSResult} or not.}
@@ -72,17 +78,30 @@ an \link{EnsembleFSResult} object.
 }
 \description{
 Ensemble feature selection using multiple learners.
-The ensemble feature selection method is designed to identify the most informative features from a given dataset by leveraging multiple machine learning models and resampling techniques.
+The ensemble feature selection method is designed to identify the most predictive features from a given dataset by leveraging multiple machine learning models and resampling techniques.
 Returns an \link{EnsembleFSResult}.
 }
 \details{
-The method begins by applying an initial resampling technique specified by the user, to create \strong{multiple subsamples} from the original dataset.
+The method begins by applying an initial resampling technique specified by the user, to create \strong{multiple subsamples} from the original dataset (train/test splits).
 This resampling process helps in generating diverse subsets of data for robust feature selection.
 
-For each subsample generated in the previous step, the method performs \strong{wrapped-based feature selection} (\link{auto_fselector}) using each provided learner, the given inner resampling method, performance measure and optimization algorithm.
-This process generates the best feature subset for each combination of subsample and learner.
+For each subsample (train set) generated in the previous step, the method performs \strong{wrapped-based feature selection} (\link{auto_fselector}) using each provided learner, the given inner resampling method, inner performance measure and optimization algorithm.
+This process generates 1) the best feature subset and 2) a final trained model using these best features, for each combination of subsample and learner.
+The final models are then scored on their ability to predict on the resampled test sets.
+
 Results are stored in an \link{EnsembleFSResult}.
+
+The result object also includes the performance scores calculated during the inner resampling of the training sets, using models with the best feature subsets.
+These scores are stored in a column named \verb{\{measure_id\}_inner}.
 }
+\section{Note}{
+
+
+The \strong{active measure} of performance is the one applied to the test sets.
+This is preferred, as inner resampling scores on the training sets are likely to be overestimated when using the final models.
+Users can change the active measure by using the \code{set_active_measure()} method of the \link{EnsembleFSResult}.
+}
+
 \examples{
 \donttest{
   efsr = ensemble_fselect(
@@ -91,7 +110,8 @@ Results are stored in an \link{EnsembleFSResult}.
     learners = lrns(c("classif.rpart", "classif.featureless")),
     init_resampling = rsmp("subsampling", repeats = 2),
     inner_resampling = rsmp("cv", folds = 3),
-    measure = msr("classif.ce"),
+    inner_measure = msr("classif.ce"),
+    measure = msr("classif.acc"),
     terminator = trm("evals", n_evals = 10)
   )
   efsr
diff --git a/tests/testthat/test_embedded_ensemble_fselect.R b/tests/testthat/test_embedded_ensemble_fselect.R
new file mode 100644
index 00000000..eb8b5e7b
--- /dev/null
+++ b/tests/testthat/test_embedded_ensemble_fselect.R
@@ -0,0 +1,64 @@
+test_that("embedded efs works", {
+  task = tsk("sonar")
+  with_seed(42, {
+    efsr = embedded_ensemble_fselect(
+      task = task,
+      learners = lrns(c("classif.rpart", "classif.featureless")),
+      init_resampling = rsmp("subsampling", repeats = 5),
+      measure = msr("classif.ce")
+    )
+  })
+
+  expect_character(efsr$man)
+  expect_data_table(efsr$result, nrows = 10)
+  expect_list(efsr$result$features, any.missing = FALSE, len = 10)
+  expect_numeric(efsr$result$n_features, len = 10)
+  expect_numeric(efsr$result$classif.ce, len = 10)
+  expect_benchmark_result(efsr$benchmark_result)
+  expect_measure(efsr$measure)
+  expect_equal(efsr$measure$id, "classif.ce")
+  expect_true(efsr$measure$minimize) # classification error
+  expect_equal(efsr$n_learners, 2)
+  expect_equal(efsr$n_resamples, 5)
+
+  # stability
+  expect_number(efsr$stability(stability_measure = "jaccard", stability_args = list(impute.na = 0)))
+  expect_error(efsr$stability(stability_args = list(20)), "have names")
+  stability = efsr$stability(stability_measure = "jaccard", stability_args = list(impute.na = 0), global = FALSE)
+  expect_numeric(stability, len = 2)
+  expect_equal(names(stability), c("classif.rpart", "classif.featureless"))
+
+  # pareto_front
+  pf = efsr$pareto_front()
+  expect_data_table(pf, nrows = 7)
+  expect_equal(names(pf), c("n_features", "classif.ce"))
+  pf_pred = efsr$pareto_front(type = "estimated")
+  expect_data_table(pf_pred, nrows = max(efsr$result$n_features))
+  expect_equal(names(pf_pred), c("n_features", "classif.ce"))
+
+  # knee_points
+  kps = efsr$knee_points()
+  expect_data_table(kps, nrows = 1)
+  expect_equal(names(kps), c("n_features", "classif.ce"))
+  kpse = efsr$knee_points(type = "estimated")
+  expect_data_table(kpse, nrows = 1)
+  expect_true(kps$n_features != kpse$n_features)
+
+  # data.table conversion
+  tab = as.data.table(efsr)
+  expect_equal(names(tab), c("learner_id", "resampling_iteration", "classif.ce",
+                             "features", "n_features",
+                             "task", "learner", "resampling"))
+
+  # cannot change to use inner_measure
+  expect_error(efsr$set_active_measure(which = "inner"), "No inner_measure was defined")
+  # changing to "outer" leaves us with the same measure
+  efsr$set_active_measure(which = "outer")
+  expect_equal(efsr$measure$id, "classif.ce") # classification error
+
+  # default feature ranking
+  skip_if_not_installed("fastVoteR")
+  feature_ranking = efsr$feature_ranking()
+  expect_data_table(feature_ranking, nrows = length(task$feature_names))
+  expect_equal(names(feature_ranking), c("feature", "score", "norm_score", "borda_score"))
+})
diff --git a/tests/testthat/test_ensemble_fselect.R b/tests/testthat/test_ensemble_fselect.R
index 19bd1240..edc9d975 100644
--- a/tests/testthat/test_ensemble_fselect.R
+++ b/tests/testthat/test_ensemble_fselect.R
@@ -1,4 +1,4 @@
-test_that("ensemble feature selection works", {
+test_that("efs works", {
   task = tsk("sonar")
   with_seed(42, {
     efsr = ensemble_fselect(
@@ -7,6 +7,7 @@ test_that("ensemble feature selection works", {
       learners = lrns(c("classif.rpart", "classif.featureless")),
       init_resampling = rsmp("subsampling", repeats = 2),
       inner_resampling = rsmp("cv", folds = 3),
+      inner_measure = msr("classif.ce"),
       measure = msr("classif.ce"),
       terminator = trm("evals", n_evals = 5)
     )
@@ -17,25 +18,25 @@ test_that("ensemble feature selection works", {
   expect_list(efsr$result$features, any.missing = FALSE, len = 4)
   expect_numeric(efsr$result$n_features, len = 4)
   expect_numeric(efsr$result$classif.ce, len = 4)
+  expect_numeric(efsr$result$classif.ce_inner, len = 4)
   expect_benchmark_result(efsr$benchmark_result)
-  expect_equal(efsr$measure, "classif.ce")
+  expect_measure(efsr$measure)
+  expect_equal(efsr$measure$id, "classif.ce")
+  expect_true(efsr$measure$minimize) # classification error
+  expect_equal(efsr$active_measure, "outer")
   expect_equal(efsr$n_learners, 2)
+  expect_equal(efsr$n_resamples, 2)
 
   # stability
   expect_number(efsr$stability(stability_measure = "jaccard"))
   expect_error(efsr$stability(stability_args = list(20)), "have names")
   stability = efsr$stability(stability_measure = "jaccard", global = FALSE)
   expect_numeric(stability, len = 2)
-  expect_names(names(stability), identical.to = c("classif.rpart", "classif.featureless"))
-
-  # feature ranking
-  feature_ranking = efsr$feature_ranking()
-  expect_data_table(feature_ranking, nrows = length(task$feature_names))
-  expect_names(names(feature_ranking), identical.to = c("feature", "inclusion_probability"))
+  expect_equal(names(stability), c("classif.rpart.fselector", "classif.featureless.fselector"))
 
   # pareto_front
   pf = efsr$pareto_front()
-  expect_data_table(pf, nrows = 3)
+  expect_data_table(pf, nrows = 2)
   expect_equal(names(pf), c("n_features", "classif.ce"))
   pf_pred = efsr$pareto_front(type = "estimated")
   expect_data_table(pf_pred, nrows = max(efsr$result$n_features))
@@ -51,69 +52,49 @@ test_that("ensemble feature selection works", {
 
   # data.table conversion
   tab = as.data.table(efsr)
-  expect_names(names(tab), identical.to = c("resampling_iteration", "learner_id", "features", "n_features", "classif.ce", "task", "learner", "resampling"))
-})
-
-test_that("ensemble feature selection works without benchmark result", {
-  task = tsk("sonar")
-  with_seed(42, {
-    efsr = ensemble_fselect(
-      fselector = fs("random_search"),
-      task = task,
-      learners = lrns(c("classif.rpart", "classif.featureless")),
-      init_resampling = rsmp("subsampling", repeats = 2),
-      inner_resampling = rsmp("cv", folds = 3),
-      measure = msr("classif.ce"),
-      terminator = trm("evals", n_evals = 3),
-      store_benchmark_result = FALSE
-    )
-  })
-
-  expect_character(efsr$man)
-  expect_data_table(efsr$result, nrows = 4)
-  expect_list(efsr$result$features, any.missing = FALSE, len = 4)
-  expect_numeric(efsr$result$n_features, len = 4)
-  expect_numeric(efsr$result$classif.ce, len = 4)
-  expect_null(efsr$benchmark_result)
-  expect_equal(efsr$measure, "classif.ce")
-  expect_equal(efsr$n_learners, 2)
-
-  # stability
-  expect_number(efsr$stability(stability_measure = "jaccard"))
-  stability = efsr$stability(stability_measure = "jaccard", global = FALSE)
-  expect_numeric(stability, len = 2)
-  expect_names(names(stability), identical.to = c("classif.rpart", "classif.featureless"))
-
-  # feature ranking
+  expect_equal(names(tab), c("learner_id", "resampling_iteration", "classif.ce",
+                             "features", "n_features", "classif.ce_inner",
+                             "task", "learner", "resampling"))
+  # scores on train and test sets are different (even though same measure used)
+  assert_true(all(tab$classif.ce != tab$classif.ce_inner))
+
+  # change to use inner measure
+  expect_error(efsr$set_active_measure(which = "XYZ"), regexp = "Must be element")
+  efsr$set_active_measure(which = "inner")
+  expect_measure(efsr$measure)
+  expect_equal(efsr$measure$id, "classif.ce") # classification error also used for inner measure
+  expect_equal(efsr$active_measure, "inner")
+  pf_inner = efsr$pareto_front()
+  expect_data_table(pf_inner, nrows = 3) # pareto front has changed
+  expect_equal(names(pf_inner), c("n_features", "classif.ce_inner"))
+  kps_inner = efsr$knee_points()
+  expect_data_table(kps_inner, nrows = 1)
+  # inner id to distinguish from outer measure
+  expect_equal(names(kps_inner), c("n_features", "classif.ce_inner"))
+  # change to use outer measure again
+  efsr$set_active_measure(which = "outer")
+  expect_equal(efsr$active_measure, "outer")
+  pf_outer = efsr$pareto_front()
+  expect_equal(pf_outer, pf) # same measure, same pareto front
+
+  # default feature ranking
+  skip_if_not_installed("fastVoteR")
   feature_ranking = efsr$feature_ranking()
   expect_data_table(feature_ranking, nrows = length(task$feature_names))
-  expect_names(names(feature_ranking), identical.to = c("feature", "inclusion_probability"))
-
-  # pareto_front
-  pf = efsr$pareto_front()
-  expect_data_table(pf, nrows = 3)
-  expect_equal(names(pf), c("n_features", "classif.ce"))
-
-  # knee_points
-  kps = efsr$knee_points()
-  expect_data_table(kps, nrows = 1)
-  expect_equal(names(kps), c("n_features", "classif.ce"))
-
-  # data.table conversion
-  tab = as.data.table(efsr)
-  expect_names(names(tab), identical.to = c("resampling_iteration", "learner_id", "features", "n_features", "classif.ce"))
+  expect_equal(names(feature_ranking), c("feature", "score", "norm_score", "borda_score"))
 })
 
-test_that("ensemble feature selection works with rfe", {
+test_that("efs works with rfe", {
   task = tsk("sonar")
   with_seed(42, {
     efsr = ensemble_fselect(
-      fselector = fs("rfe", n_features = 2, feature_fraction = 0.8),
+      fselector = fs("rfe", subset_sizes = c(60, 20, 10, 5)),
       task = task,
       learners = lrns(c("classif.rpart", "classif.featureless")),
       init_resampling = rsmp("subsampling", repeats = 2),
       inner_resampling = rsmp("cv", folds = 3),
-      measure = msr("classif.ce"),
+      inner_measure = msr("classif.ce"),
+      measure = msr("classif.acc"),
       terminator = trm("none")
     )
   })
@@ -122,45 +103,69 @@ test_that("ensemble feature selection works with rfe", {
   expect_data_table(efsr$result, nrows = 4)
   expect_list(efsr$result$features, any.missing = FALSE, len = 4)
   expect_numeric(efsr$result$n_features, len = 4)
-  expect_numeric(efsr$result$classif.ce, len = 4)
+  expect_numeric(efsr$result$classif.ce_inner, len = 4)
+  expect_numeric(efsr$result$classif.acc, len = 4)
   expect_list(efsr$result$importance, any.missing = FALSE, len = 4)
   expect_benchmark_result(efsr$benchmark_result)
-  expect_equal(efsr$measure, "classif.ce")
+  expect_measure(efsr$measure)
+  expect_equal(efsr$measure$id, "classif.acc")
+  expect_false(efsr$measure$minimize) # accuracy
+  expect_equal(efsr$active_measure, "outer")
   expect_equal(efsr$n_learners, 2)
+  expect_equal(efsr$n_resamples, 2)
+
+  # change active measure
+  efsr$set_active_measure(which = "inner")
+  expect_measure(efsr$measure)
+  expect_equal(efsr$measure$id, "classif.ce") # no `_inner` end-fix here
+  expect_true(efsr$measure$minimize) # classification error
+  expect_equal(efsr$active_measure, "inner")
 
   # stability
   expect_number(efsr$stability(stability_measure = "jaccard"))
   stability = efsr$stability(stability_measure = "jaccard", global = FALSE)
   expect_numeric(stability, len = 2)
-  expect_names(names(stability), identical.to = c("classif.rpart", "classif.featureless"))
-
-  # feature ranking
-  feature_ranking = efsr$feature_ranking()
-  expect_data_table(feature_ranking, nrows = length(task$feature_names))
-  expect_names(names(feature_ranking), identical.to = c("feature", "inclusion_probability"))
+  expect_equal(names(stability), c("classif.rpart.fselector", "classif.featureless.fselector"))
 
   # pareto_front
   pf = efsr$pareto_front()
   expect_data_table(pf, nrows = 4)
-  expect_equal(names(pf), c("n_features", "classif.ce"))
+  expect_equal(names(pf), c("n_features", "classif.ce_inner"))
   pf_pred = efsr$pareto_front(type = "estimated")
   expect_data_table(pf_pred, nrows = max(efsr$result$n_features))
-  expect_equal(names(pf_pred), c("n_features", "classif.ce"))
+  expect_equal(names(pf_pred), c("n_features", "classif.ce_inner"))
 
   # knee_points
   kps = efsr$knee_points(type = "estimated")
   expect_data_table(kps, nrows = 1)
-  expect_equal(names(kps), c("n_features", "classif.ce"))
+  expect_equal(names(kps), c("n_features", "classif.ce_inner"))
+
+  # change measure back to "outer"
+  efsr$set_active_measure(which = "outer")
+  expect_equal(efsr$active_measure, "outer")
+  pf_outer = efsr$pareto_front() # pareto front has used the accuracy measure
+  expect_equal(names(pf_outer), c("n_features", "classif.acc"))
 
   # data.table conversion
   tab = as.data.table(efsr)
-  expect_names(names(tab), identical.to = c("resampling_iteration", "learner_id", "features", "n_features", "classif.ce", "importance", "task", "learner", "resampling"))
+  expect_equal(names(tab), c("learner_id", "resampling_iteration", "classif.acc",
+                             "features", "n_features", "classif.ce_inner",
+                             "importance", "task", "learner", "resampling"))
+
+  # default feature ranking
+  skip_if_not_installed("fastVoteR")
+  feature_ranking = efsr$feature_ranking()
+  expect_data_table(feature_ranking, nrows = length(task$feature_names))
+  expect_equal(names(feature_ranking), c("feature", "score", "norm_score", "borda_score"))
 })
 
 test_that("EnsembleFSResult initialization", {
   result = data.table(a = 1, b = 3)
-  expect_error(EnsembleFSResult$new(result = result, features = LETTERS, measure_id = "a"), "missing elements")
+  # `result` doesn't have mandatory columns
+  expect_error(EnsembleFSResult$new(result = result, features = LETTERS,
+                                    measure = msr("classif.ce")), "is missing")
 
+  errors = c(0.13, 0.24, 0.16, 0.11, 0.25, 0.18, 0.15, 0.1, 0.16)
   result = data.table(
     resampling_iteration = c(1, 1, 1, 2, 2, 2, 3, 3, 3),
     learner_id = rep(c("classif.xgboost", "classif.rpart", "classif.ranger"), 3),
@@ -175,17 +180,43 @@ test_that("EnsembleFSResult initialization", {
       c("V2"),
       c("V4", "V12"),
       c("V6", "V15", "V19", "V7")),
-    classif.ce = c(0.13, 0.24, 0.16, 0.11, 0.25, 0.18, 0.15, 0.1, 0.16)
+    classif.ce = errors,
+    classif.acc_inner = 1 - errors # inner measure has the `_inner` end-fix
   )
 
+  # a feature set includes "V20" which is not included in input "features"
+  expect_error(EnsembleFSResult$new(result = result, features = paste0("V", 1:19), measure = msr("classif.ce")), "Must be a subset of")
+  # `inner_measure` is not a `Measure`
+  expect_error(EnsembleFSResult$new(result = result, features = paste0("V", 1:20),
+                                    measure = msr("classif.ce"), inner_measure = "measure"))
+  # `inner_measure` id is not a column name in the `result`
+  expect_error(EnsembleFSResult$new(result = result, features = paste0("V", 1:20),
+                                    measure = msr("classif.ce"), inner_measure = msr("classif.ce")))
+  # both `inner_measure` and `measure` ids are missing from the `result`'s column names
+  expect_error(EnsembleFSResult$new(result = result, features = paste0("V", 1:20),
+                                    measure = msr("classif.acc"), inner_measure = msr("classif.ce")))
+
   # works without benchmark result object
-  efsr = EnsembleFSResult$new(result = result, features = paste0("V", 1:20), measure_id = "classif.ce")
+  efsr = EnsembleFSResult$new(result = result, features = paste0("V", 1:20),
+                              measure = msr("classif.ce"), inner_measure = msr("classif.acc"))
   expect_class(efsr, "EnsembleFSResult")
   expect_equal(efsr$n_learners, 3)
+  expect_equal(efsr$n_resamples, 3)
+  expect_equal(efsr$.__enclos_env__$private$.active_measure, "outer")
+  expect_measure(efsr$measure)
+  expect_equal(efsr$measure$id, "classif.ce")
+  expect_true(efsr$measure$minimize)
+  expect_equal(efsr$active_measure, "outer")
   tab = as.data.table(efsr)
   expect_data_table(tab)
-  expect_names(names(tab), identical.to = c("resampling_iteration", "learner_id",
-                                            "n_features", "features", "classif.ce"))
+  expect_equal(names(tab), c("resampling_iteration", "learner_id", "n_features",
+                             "features", "classif.ce", "classif.acc_inner"))
+  # change active measure
+  efsr$set_active_measure(which = "inner")
+  expect_equal(efsr$active_measure, "inner")
+  expect_measure(efsr$measure)
+  expect_equal(efsr$measure$id, "classif.acc")
+  expect_false(efsr$measure$minimize)
 })
 
 test_that("different callbacks can be set", {
@@ -196,17 +227,17 @@ test_that("different callbacks can be set", {
   )
 
   efsr = ensemble_fselect(
-    # 4-5 evaluations on sonar
-    fselector = fs("rfe", n_features = 25, feature_fraction = 0.8),
+    fselector = fs("rfe", subset_sizes = c(60, 20, 10, 5)),
     task = tsk("sonar"),
     learners = lrns(c("classif.rpart", "classif.featureless")),
     init_resampling = rsmp("subsampling", repeats = 2),
     inner_resampling = rsmp("cv", folds = 3),
-    measure = msr("classif.ce"),
+    inner_measure = msr("classif.ce"),
+    measure = msr("classif.acc"),
     terminator = trm("none"),
-    callbacks = list(list(callback_test), list())
+    callbacks = list("classif.rpart" = callback_test)
   )
 
   expect_true(all(efsr$benchmark_result$score()$learner[[1]]$fselect_instance$archive$data$callback_active))
-  expect_null(efsr$benchmark_result$score()$learner[[2]]$fselect_instance$archive$data$callback_active)
+  expect_null(efsr$benchmark_result$score()$learner[[3]]$fselect_instance$archive$data$callback_active)
 })