diff --git a/CHANGELOG.txt b/CHANGELOG.txt
index 03a9123..92b3bab 100644
--- a/CHANGELOG.txt
+++ b/CHANGELOG.txt
@@ -2,6 +2,12 @@
 CHANGELOG
 =========
 
+-------------------------------------------------------------------------------
+March, 23, 2021 1.0.1
+-------------------------------------------------------------------------------
+
+- Add cross-validation (cv) capability to benchmark function.
+
 -------------------------------------------------------------------------------
 February, 1, 2021 1.0.0
 -------------------------------------------------------------------------------
diff --git a/README.md b/README.md
index 2b68d52..f677928 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@ The library provides:
 
 * Automated task detection. No need to know what feature selection method works with what machine learning task
 
-* Benchmarking with multiple selectors
+* Benchmarking multiple selectors using cross-validation
 
 * Inspection of results and feature importance
 
@@ -91,7 +91,7 @@ selectors = {
 }
 
 # Benchmark
-score_df, selected_df, runtime_df = benchmark(selectors, data, label)
+score_df, selected_df, runtime_df = benchmark(selectors, data, label, cv=5)
 print(score_df, "\n\n", selected_df, "\n\n", runtime_df)
 
 # Get benchmark statistics by feature
@@ -125,6 +125,18 @@ plot_importance(df)
 
 Selective is available to install as `pip install selective`. 
 
+## Source 
+
+Alternatively, you can build a wheel package on your platform from scratch using the source code:
+
+```bash
+git clone https://github.com/fidelity/selective.git
+cd selective
+pip install setuptools wheel # if wheel is not installed
+python setup.py sdist bdist_wheel
+pip install dist/selective-X.X.X-py3-none-any.whl
+```
+
 ## Support
 
 Please submit bug reports and feature requests as [Issues](https://github.com/fidelity/selective/issues).
diff --git a/dist/selective-1.0.0-py3-none-any.whl b/dist/selective-1.0.0-py3-none-any.whl
deleted file mode 100644
index 9a653f5..0000000
Binary files a/dist/selective-1.0.0-py3-none-any.whl and /dev/null differ
diff --git a/feature/_version.py b/feature/_version.py
index 76221c0..6670214 100644
--- a/feature/_version.py
+++ b/feature/_version.py
@@ -2,4 +2,4 @@
 # Copyright FMR LLC <opensource@fidelity.com>
 # SPDX-License-Identifier: GNU GPLv3
 
-__version__ = "1.0.0"
+__version__ = "1.0.1"
diff --git a/feature/selector.py b/feature/selector.py
index 59510d6..2edf22b 100644
--- a/feature/selector.py
+++ b/feature/selector.py
@@ -22,6 +22,7 @@
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 from sklearn.ensemble import ExtraTreesClassifier, ExtraTreesRegressor
 from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
+from sklearn.model_selection import KFold
 from xgboost import XGBClassifier, XGBRegressor
 
 from feature.base import _BaseDispatcher, _BaseSupervisedSelector, _BaseUnsupervisedSelector
@@ -475,9 +476,11 @@ def benchmark(selectors: Dict[str, Union[SelectionMethod.Correlation,
                                          SelectionMethod.Variance]],
               data: pd.DataFrame,
               labels: Optional[pd.Series] = None,
+              cv: Optional[int] = None,
               output_filename: Optional[str] = None,
               drop_zero_variance_features: Optional[bool] = True,
-              verbose: bool = False) \
+              verbose: bool = False,
+              seed: int = Constants.default_seed) \
         -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
     """
     Benchmark with a given set of feature selectors.
@@ -495,6 +498,8 @@ def benchmark(selectors: Dict[str, Union[SelectionMethod.Correlation,
         Data of shape (n_samples, n_features) used for feature selection.
     labels: pd.Series, optional (default=None)
         The target values (class labels in classification, real numbers in regression).
+    cv: int, optional (default=None)
+        Number of folds to use for cross-validation.
     output_filename: str, optional (default=None)
         If not None, benchmarking output is saved.
         If file exists, results are appended, otherwise file is created.
@@ -502,6 +507,81 @@ def benchmark(selectors: Dict[str, Union[SelectionMethod.Correlation,
         Whether to drop features with zero variance before running feature selector methods or not.
     verbose: bool, optional (default=False)
         Whether to print progress messages or not.
+    seed: int, optional (default=Constants.default_seed)
+        The random seed to initialize the random number generator.
+
+    Returns
+    -------
+    Tuple of data frames with scores, selected features and runtime for each method.
+    If cv is not None, the data frames will contain the concatenated results from each fold.
+    """
+
+    check_true(selectors is not None, ValueError("Benchmark selectors cannot be none."))
+    check_true(data is not None, ValueError("Benchmark data cannot be none."))
+
+    if cv is None:
+        return _bench(selectors=selectors,
+                      data=data,
+                      labels=labels,
+                      output_filename=output_filename,
+                      drop_zero_variance_features=drop_zero_variance_features,
+                      verbose=verbose)
+    else:
+
+        # Create K-Fold object
+        kf = KFold(n_splits=cv, shuffle=True, random_state=seed)
+
+        # Initialize variables
+        t0 = time()
+        train_labels, test_labels = None, None
+        score_df, selected_df, runtime_df = pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
+
+        # Split data into cv-folds and run _bench for each fold
+        if verbose:
+            print("\n>>> Running")
+        for fold, (train_index, _) in enumerate(kf.split(data)):
+
+            if verbose:
+                print("\tFold", fold, "...")
+
+            # Split data, labels into folds
+            train_data = data.iloc[train_index]
+            if labels is not None:
+                train_labels = labels.iloc[train_index]
+
+            # Run benchmark
+            score_cv_df, selected_cv_df, runtime_cv_df = _bench(selectors=selectors,
+                                                                data=train_data,
+                                                                labels=train_labels,
+                                                                output_filename=output_filename,
+                                                                drop_zero_variance_features=drop_zero_variance_features,
+                                                                verbose=False)
+
+            # Concatenate data frames
+            score_df = pd.concat((score_df, score_cv_df))
+            selected_df = pd.concat((selected_df, selected_cv_df))
+            runtime_df = pd.concat((runtime_df, runtime_cv_df))
+
+        if verbose:
+            print(f"<<< Done! Time taken: {(time() - t0) / 60:.2f} minutes")
+
+        return score_df, selected_df, runtime_df
+
+
+def _bench(selectors: Dict[str, Union[SelectionMethod.Correlation,
+                                      SelectionMethod.Linear,
+                                      SelectionMethod.TreeBased,
+                                      SelectionMethod.Statistical,
+                                      SelectionMethod.Variance]],
+           data: pd.DataFrame,
+           labels: Optional[pd.Series] = None,
+           output_filename: Optional[str] = None,
+           drop_zero_variance_features: Optional[bool] = True,
+           verbose: bool = False) \
+        -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+    """
+    Benchmark with a given set of feature selectors.
+    Return a tuple of data frames with scores, runtime and selected features for each method.
 
     Returns
     -------
@@ -552,7 +632,7 @@ def benchmark(selectors: Dict[str, Union[SelectionMethod.Correlation,
             if verbose:
                 print(f"<<< Done! Time taken: {(time() - t0) / 60:.2f} minutes")
 
-    # Convert to series
+    # Format
     runtime_df = pd.Series(method_to_runtime).to_frame("runtime").rename_axis("method").reset_index()
 
     return score_df, selected_df, runtime_df
@@ -561,15 +641,19 @@ def benchmark(selectors: Dict[str, Union[SelectionMethod.Correlation,
 def calculate_statistics(scores: pd.DataFrame,
                          selected: pd.DataFrame,
                          columns: Optional[list] = None,
-                         ignore_constant: Optional[bool] = True):
-    """Calculate statistics for each feature using scores/selections from list of methods.
+                         ignore_constant: Optional[bool] = True) -> pd.DataFrame:
+    """
+    Calculate statistics for each feature using scores/selections from list of methods.
+    Returns data frame with calculated statistics for each feature.
 
     Parameters
     ----------
     scores:  pd.DataFrame
         Data frame with scores for each feature (index) and selector (columns).
+        Each feature could have multiple rows from different cross-validation folds.
     selected: pd.DataFrame
         Data frame with selection flag for each feature (index) and selector (columns).
+        Each feature could have multiple rows from different cross-validation folds.
     columns: list (default=None)
         List of methods (columns) to include in statistics.
         If None, all methods (columns) will be used.
@@ -584,9 +668,9 @@ def calculate_statistics(scores: pd.DataFrame,
     check_true(isinstance(scores, pd.DataFrame), ValueError("scores must be a data frame."))
     check_true(isinstance(selected, pd.DataFrame), ValueError("selection must be a data frame."))
     check_true(scores.shape == selected.shape, ValueError("Shapes of scores and selected data frames must match."))
-    check_true(len(scores.index.intersection(selected.index)) == selected.shape[0],
+    check_true(np.all(scores.index == selected.index),
                ValueError("Index of score and selection data frames must match."))
-    check_true(len(scores.columns.intersection(selected.columns)) == selected.shape[1],
+    check_true(np.all(scores.columns == selected.columns),
                ValueError("Columns of score and selection data frames must match."))
 
     # Get columns to use
@@ -597,25 +681,25 @@ def calculate_statistics(scores: pd.DataFrame,
     scores_df = scores[columns].copy()
     selected_df = selected[columns].copy()
 
+    # Group by feature for CV results
+    scores_df = scores_df.groupby(scores_df.index).mean()
+    selected_df = selected_df.groupby(selected_df.index).mean()
+
     # Drop methods with constant scores
     if ignore_constant:
         mask = ~np.isclose(np.var(scores_df, axis=0), 0)
         scores_df = scores_df.loc[:, mask]
         selected_df = selected_df.loc[:, mask]
 
-    # Sort by index
-    scores_df.sort_index(inplace=True)
-    selected_df.sort_index(inplace=True)
-
     # Calculate statistics
-    stats_df = pd.DataFrame(index=scores.index)
-    stats_df["_score_mean"] = scores_df.mean(axis=1)
-    stats_df["_score_mean_norm"] = normalize_columns(scores_df).mean(axis=1)
-    stats_df["_selection_freq"] = selected_df.sum(axis=1)
-    stats_df["_selection_freq_norm"] = normalize_columns(selected_df).sum(axis=1)
+    stats_df = pd.DataFrame(index=scores_df.index)
+    stats_df["score_mean"] = scores_df.mean(axis=1)
+    stats_df["score_mean_norm"] = normalize_columns(scores_df).mean(axis=1)
+    stats_df["selection_freq"] = selected_df.sum(axis=1)
+    stats_df["selection_freq_norm"] = normalize_columns(selected_df).sum(axis=1)
 
     # Sort
-    stats_df.sort_values(by="_score_mean_norm", ascending=False, inplace=True)
+    stats_df.sort_values(by="score_mean_norm", ascending=False, inplace=True)
 
     return stats_df
 
@@ -632,6 +716,7 @@ def plot_importance(scores: pd.DataFrame,
     ----------
     scores: pd.DataFrame
         Data frame with scores for each feature (index) and method (columns).
+        Each feature could have multiple rows from different cross-validation folds.
     columns: list (default=None)
         List of methods (columns) to include in statistics.
         If None, all methods (columns) will be used.
@@ -663,6 +748,9 @@ def plot_importance(scores: pd.DataFrame,
     df = scores[columns].copy()
     df.fillna(0, inplace=True)
 
+    # Group by feature for CV results
+    df = df.groupby(df.index).mean()
+
     # Get normalized scores such that scores for each method sums to 1
     if normalize:
         df = normalize_columns(df)
diff --git a/feature/tree_based.py b/feature/tree_based.py
index 69ea6d5..d31ccf5 100644
--- a/feature/tree_based.py
+++ b/feature/tree_based.py
@@ -50,14 +50,14 @@ def dispatch_model(self, labels: pd.Series, *args):
             # Custom estimator should be compatible with the task
             if "classification_" in task_str:
                 if isinstance(self.estimator, CatBoost):
-                    if self.estimator._estimator_type is not 'classifier':
+                    if self.estimator._estimator_type != 'classifier':
                         raise TypeError(str(self.estimator) + " cannot be used for task: " + task_str)
                 else:
                     if not isinstance(self.estimator, ClassifierMixin):
                         raise TypeError(str(self.estimator) + " cannot be used for task: " + task_str)
             else:
                 if isinstance(self.estimator, CatBoost):
-                    if self.estimator._estimator_type is not 'regressor':
+                    if self.estimator._estimator_type != 'regressor':
                         raise TypeError(str(self.estimator) + " cannot be used for task: " + task_str)
                 else:
                     if not isinstance(self.estimator, RegressorMixin):
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index 33511d1..2f69c0d 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -17,43 +17,42 @@
 
 class TestBenchmark(BaseTest):
 
-    def test_benchmark_regression(self):
+    num_features = 3
+    corr_threshold = 0.5
+    alpha = 1000
+    tree_params = {"random_state": 123, "n_estimators": 100}
+
+    selectors = {
+        "corr_pearson": SelectionMethod.Correlation(corr_threshold, method="pearson"),
+        "corr_kendall": SelectionMethod.Correlation(corr_threshold, method="kendall"),
+        "corr_spearman": SelectionMethod.Correlation(corr_threshold, method="spearman"),
+        "univ_anova": SelectionMethod.Statistical(num_features, method="anova"),
+        "univ_chi_square": SelectionMethod.Statistical(num_features, method="chi_square"),
+        "univ_mutual_info": SelectionMethod.Statistical(num_features, method="mutual_info"),
+        "linear": SelectionMethod.Linear(num_features, regularization="none"),
+        "lasso": SelectionMethod.Linear(num_features, regularization="lasso", alpha=alpha),
+        "ridge": SelectionMethod.Linear(num_features, regularization="ridge", alpha=alpha),
+        "random_forest": SelectionMethod.TreeBased(num_features),
+        "xgboost_clf": SelectionMethod.TreeBased(num_features, estimator=XGBClassifier(**tree_params)),
+        "xgboost_reg": SelectionMethod.TreeBased(num_features, estimator=XGBRegressor(**tree_params)),
+        "extra_clf": SelectionMethod.TreeBased(num_features, estimator=ExtraTreesClassifier(**tree_params)),
+        "extra_reg": SelectionMethod.TreeBased(num_features, estimator=ExtraTreesRegressor(**tree_params)),
+        "lgbm_clf": SelectionMethod.TreeBased(num_features, estimator=LGBMClassifier(**tree_params)),
+        "lgbm_reg": SelectionMethod.TreeBased(num_features, estimator=LGBMRegressor(**tree_params)),
+        "gradient_clf": SelectionMethod.TreeBased(num_features, estimator=GradientBoostingClassifier(**tree_params)),
+        "gradient_reg": SelectionMethod.TreeBased(num_features, estimator=GradientBoostingRegressor(**tree_params)),
+        "adaboost_clf": SelectionMethod.TreeBased(num_features, estimator=AdaBoostClassifier(**tree_params)),
+        "adaboost_reg": SelectionMethod.TreeBased(num_features, estimator=AdaBoostRegressor(**tree_params)),
+        "catboost_clf": SelectionMethod.TreeBased(num_features, estimator=CatBoostClassifier(**tree_params, silent=True)),
+        "catboost_reg": SelectionMethod.TreeBased(num_features, estimator=CatBoostRegressor(**tree_params, silent=True))
+    }
 
+    def test_benchmark_regression(self):
         data, label = get_data_label(load_boston())
         data = data.drop(columns=["CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS"])
 
-        num_features = 3
-        corr_threshold = 0.5
-        alpha = 1000
-        tree_params = {"random_state": 123, "n_estimators": 100}
-
-        selectors = {
-            "corr_pearson": SelectionMethod.Correlation(corr_threshold, method="pearson"),
-            "corr_kendall": SelectionMethod.Correlation(corr_threshold, method="kendall"),
-            "corr_spearman": SelectionMethod.Correlation(corr_threshold, method="spearman"),
-            "univ_anova": SelectionMethod.Statistical(num_features, method="anova"),
-            "univ_chi_square": SelectionMethod.Statistical(num_features, method="chi_square"),
-            "univ_mutual_info": SelectionMethod.Statistical(num_features, method="mutual_info"),
-            "linear": SelectionMethod.Linear(num_features, regularization="none"),
-            "lasso": SelectionMethod.Linear(num_features, regularization="lasso", alpha=alpha),
-            "ridge": SelectionMethod.Linear(num_features, regularization="ridge", alpha=alpha),
-            "random_forest": SelectionMethod.TreeBased(num_features),
-            "xgboost_clf": SelectionMethod.TreeBased(num_features, estimator=XGBClassifier(**tree_params)),
-            "xgboost_reg": SelectionMethod.TreeBased(num_features, estimator=XGBRegressor(**tree_params)),
-            "extra_clf": SelectionMethod.TreeBased(num_features, estimator=ExtraTreesClassifier(**tree_params)),
-            "extra_reg": SelectionMethod.TreeBased(num_features, estimator=ExtraTreesRegressor(**tree_params)),
-            "lgbm_clf": SelectionMethod.TreeBased(num_features, estimator=LGBMClassifier(**tree_params)),
-            "lgbm_reg": SelectionMethod.TreeBased(num_features, estimator=LGBMRegressor(**tree_params)),
-            "gradient_clf": SelectionMethod.TreeBased(num_features, estimator=GradientBoostingClassifier(**tree_params)),
-            "gradient_reg": SelectionMethod.TreeBased(num_features, estimator=GradientBoostingRegressor(**tree_params)),
-            "adaboost_clf": SelectionMethod.TreeBased(num_features, estimator=AdaBoostClassifier(**tree_params)),
-            "adaboost_reg": SelectionMethod.TreeBased(num_features, estimator=AdaBoostRegressor(**tree_params)),
-            "catboost_clf": SelectionMethod.TreeBased(num_features, estimator=CatBoostClassifier(**tree_params, silent=True)),
-            "catboost_reg": SelectionMethod.TreeBased(num_features, estimator=CatBoostRegressor(**tree_params, silent=True))
-        }
-
         # Benchmark
-        score_df, selected_df, runtime_df = benchmark(selectors, data, label, output_filename=None)
+        score_df, selected_df, runtime_df = benchmark(self.selectors, data, label, output_filename=None)
         _ = calculate_statistics(score_df, selected_df)
 
         self.assertListAlmostEqual([0.4787777784012165, 0.47170429073431874, 0.5596288196730658, 0.4400410275414326, 0.5674082968785575],
@@ -86,42 +85,61 @@ def test_benchmark_regression(self):
         self.assertListAlmostEqual([0.10947144861974874, 0.020211076089938374, 0.08416074180466389, 0.045604950489313435, 0.7405517829963355],
                                    score_df["random_forest"].to_list())
 
-    def test_benchmark_classification(self):
+    def test_benchmark_regression_cv(self):
+        data, label = get_data_label(load_boston())
+        data = data.drop(columns=["CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS"])
 
-        data, label = get_data_label(load_iris())
+        # Benchmark
+        score_df, selected_df, runtime_df = benchmark(self.selectors, data, label, cv=5, output_filename=None)
+        _ = calculate_statistics(score_df, selected_df)
+
+        # Aggregate scores from different cv-folds
+        score_df = score_df.groupby(score_df.index).mean()
+
+        self.assertListAlmostEqual(
+            [0.5598624197527886, 0.43999689309372514, 0.47947203347292133, 0.5677393697964164, 0.4718904343871402],
+            score_df["corr_pearson"].to_list())
+
+        self.assertListAlmostEqual(
+            [0.5133150872001859, 0.33830236220280874, 0.5355471187677026, 0.4944995007684703, 0.4812959438381611],
+            score_df["corr_kendall"].to_list())
+
+        self.assertListAlmostEqual(
+            [0.6266784101694156, 0.3922216387923788, 0.6538541627239243, 0.598348546553966, 0.5537572894805117],
+            score_df["corr_spearman"].to_list())
 
-        num_features = 3
-        corr_threshold = 0.5
-        alpha = 1000
-        tree_params = {"random_state": 123, "n_estimators": 100}
-
-        selectors = {
-            "corr_pearson": SelectionMethod.Correlation(corr_threshold, method="pearson"),
-            "corr_kendall": SelectionMethod.Correlation(corr_threshold, method="kendall"),
-            "corr_spearman": SelectionMethod.Correlation(corr_threshold, method="spearman"),
-            "univ_anova": SelectionMethod.Statistical(num_features, method="anova"),
-            "univ_chi_square": SelectionMethod.Statistical(num_features, method="chi_square"),
-            "univ_mutual_info": SelectionMethod.Statistical(num_features, method="mutual_info"),
-            "linear": SelectionMethod.Linear(num_features, regularization="none"),
-            "lasso": SelectionMethod.Linear(num_features, regularization="lasso", alpha=alpha),
-            "ridge": SelectionMethod.Linear(num_features, regularization="ridge", alpha=alpha),
-            "random_forest": SelectionMethod.TreeBased(num_features),
-            "xgboost_clf": SelectionMethod.TreeBased(num_features, estimator=XGBClassifier(**tree_params)),
-            "xgboost_reg": SelectionMethod.TreeBased(num_features, estimator=XGBRegressor(**tree_params)),
-            "extra_clf": SelectionMethod.TreeBased(num_features, estimator=ExtraTreesClassifier(**tree_params)),
-            "extra_reg": SelectionMethod.TreeBased(num_features, estimator=ExtraTreesRegressor(**tree_params)),
-            "lgbm_clf": SelectionMethod.TreeBased(num_features, estimator=LGBMClassifier(**tree_params)),
-            "lgbm_reg": SelectionMethod.TreeBased(num_features, estimator=LGBMRegressor(**tree_params)),
-            "gradient_clf": SelectionMethod.TreeBased(num_features, estimator=GradientBoostingClassifier(**tree_params)),
-            "gradient_reg": SelectionMethod.TreeBased(num_features, estimator=GradientBoostingRegressor(**tree_params)),
-            "adaboost_clf": SelectionMethod.TreeBased(num_features, estimator=AdaBoostClassifier(**tree_params)),
-            "adaboost_reg": SelectionMethod.TreeBased(num_features, estimator=AdaBoostRegressor(**tree_params)),
-            "catboost_clf": SelectionMethod.TreeBased(num_features, estimator=CatBoostClassifier(**tree_params, silent=True)),
-            "catboost_reg": SelectionMethod.TreeBased(num_features, estimator=CatBoostRegressor(**tree_params, silent=True))
-        }
+        self.assertListAlmostEqual(
+            [66.9096213925407, 50.470199216622746, 71.84642313219175, 481.0566386481166, 60.5346993182466],
+            score_df["univ_anova"].to_list())
+
+        self.assertListAlmostEqual([0, 0, 0, 0, 0],
+                                   score_df["univ_chi_square"].to_list())
+
+        self.assertListAlmostEqual(
+            [0.31315151982855777, 0.16552049446241074, 0.3376809619388398, 0.681986210957143, 0.18450178283973817],
+            score_df["univ_mutual_info"].to_list())
+
+        self.assertListAlmostEqual(
+            [0.06157747888912044, 0.006445566885590223, 0.06693250180688959, 0.9576028432508157, 0.053796504696545476],
+            score_df["linear"].to_list())
+
+        self.assertListAlmostEqual(
+            [0.05329389111187177, 0.007117077997740284, 0.054563375238215125, 0.9260391103473467, 0.05071613235478144],
+            score_df["lasso"].to_list())
+
+        self.assertListAlmostEqual(
+            [0.061567603158881413, 0.006446613222308434, 0.06694625250225411, 0.9575175129470551, 0.05379855880797472],
+            score_df["ridge"].to_list())
+
+        self.assertListAlmostEqual(
+            [0.07819877553940296, 0.04385018441841779, 0.11432712180337742, 0.7401304941703286, 0.023493424068473153],
+            score_df["random_forest"].to_list())
+
+    def test_benchmark_classification(self):
+        data, label = get_data_label(load_iris())
 
         # Benchmark
-        score_df, selected_df, runtime_df = benchmark(selectors, data, label, output_filename=None)
+        score_df, selected_df, runtime_df = benchmark(self.selectors, data, label, output_filename=None)
         _ = calculate_statistics(score_df, selected_df)
 
         self.assertListAlmostEqual([0.7018161715727902, 0.47803395524999537, 0.8157648279049796, 0.7867331225527027],
@@ -153,3 +171,43 @@ def test_benchmark_classification(self):
 
         self.assertListAlmostEqual([0.09210348279677849, 0.03045933928742506, 0.4257647994615192, 0.45167237845427727],
                                    score_df["random_forest"].to_list())
+
+    def test_benchmark_classification_cv(self):
+        data, label = get_data_label(load_iris())
+
+        # Benchmark
+        score_df, selected_df, runtime_df = benchmark(self.selectors, data, label, cv=5, output_filename=None)
+        _ = calculate_statistics(score_df, selected_df)
+
+        # Aggregate scores from different cv-folds
+        score_df = score_df.groupby(score_df.index).mean()
+
+        self.assertListAlmostEqual([0.8161221983271784, 0.7871883928143776, 0.7020705184086643, 0.4793198034473529],
+                                   score_df["corr_pearson"].to_list())
+
+        self.assertListAlmostEqual([0.6780266710547757, 0.6550828618428932, 0.6125815664695313, 0.35594860548691776],
+                                   score_df["corr_kendall"].to_list())
+
+        self.assertListAlmostEqual([0.78225620681015, 0.7652859083343029, 0.7201874607448919, 0.44222588698925963],
+                                   score_df["corr_spearman"].to_list())
+
+        self.assertListAlmostEqual([946.9891701851375, 781.7441886012473, 95.65931730842011, 39.49994604080157],
+                                   score_df["univ_anova"].to_list())
+
+        self.assertListAlmostEqual([92.9884264821005, 53.62326775665224, 8.659084856298207, 2.9711267637858163],
+                                   score_df["univ_chi_square"].to_list())
+
+        self.assertListAlmostEqual([0.994113677302704, 0.9907696444894937, 0.4998955427118911, 0.2298786031192229],
+                                   score_df["univ_mutual_info"].to_list())
+
+        self.assertListAlmostEqual([0.22327603204146848, 0.03543066514916661, 0.26254667473769594, 0.506591069316828],
+                                   score_df["linear"].to_list())
+
+        self.assertListAlmostEqual([0.280393459805252, 0.9489351779830099, 0.6627768115497065, 0.4761878539373159],
+                                   score_df["lasso"].to_list())
+
+        self.assertListAlmostEqual([1.1049393460379105e-15, 2.0872192862952944e-15, 6.504056552595708e-16, 4.218847493575594e-16],
+                                   score_df["ridge"].to_list())
+
+        self.assertListAlmostEqual([0.4185294825699565, 0.4472560913161835, 0.10091608418224696, 0.03329834193161316],
+                                   score_df["random_forest"].to_list())
\ No newline at end of file