diff --git a/notebooks/2.0-mb-data_preprocessing_loading_splitting.ipynb b/notebooks/2.0-mb-data_preprocessing_loading_splitting.ipynb
index 151d0af4..c28fbabf 100644
--- a/notebooks/2.0-mb-data_preprocessing_loading_splitting.ipynb
+++ b/notebooks/2.0-mb-data_preprocessing_loading_splitting.ipynb
@@ -3351,7 +3351,7 @@
         }
       ],
       "source": [
-        "# use shap instead of feature importance to maintain consistency throughout the work\n",
+        "# use shap to maintain consistency throughout the work\n",
         "model.fit(X, y)\n",
         "explainer = shap.TreeExplainer(model)\n",
         "shap_values = explainer.shap_values(Pool(X, y, cat_features=cat_features))\n",
@@ -3804,9 +3804,10 @@
       "outputs": [],
       "source": [
         "# randomly sample 10 % of rows\n",
-        "train = train.sample(frac=0.1, random_state=seed)\n",
-        "valid = valid.sample(frac=0.1, random_state=seed)\n",
-        "test = test.sample(frac=0.1, random_state=seed)\n"
+        "frac= 0.1\n",
+        "train = train.sample(frac=frac, random_state=seed)\n",
+        "valid = valid.sample(frac=frac, random_state=seed)\n",
+        "test = test.sample(frac=frac, random_state=seed)\n"
       ]
     },
     {
@@ -4159,7 +4160,9 @@
         }
       ],
       "source": [
+        "# number of samples compared\n",
         "n = 20\n",
+        "\n",
         "ser_train = train[\"ROOT\"].value_counts()[:n]\n",
         "ser_valid = valid[\"ROOT\"].value_counts()[:n]\n",
         "ser_test = test[\"ROOT\"].value_counts()[:n]\n",