From 98be9076232b9680842fd7d2a3fb01f0d53e9586 Mon Sep 17 00:00:00 2001 From: Markus Bilz Date: Sun, 6 Nov 2022 17:24:08 +0100 Subject: [PATCH] =?UTF-8?q?Fixed=20some=20typos=20=F0=9F=AA=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Relates to #11. --- .../2.0-mb-data_preprocessing_loading_splitting.ipynb | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/notebooks/2.0-mb-data_preprocessing_loading_splitting.ipynb b/notebooks/2.0-mb-data_preprocessing_loading_splitting.ipynb index 151d0af4..c28fbabf 100644 --- a/notebooks/2.0-mb-data_preprocessing_loading_splitting.ipynb +++ b/notebooks/2.0-mb-data_preprocessing_loading_splitting.ipynb @@ -3351,7 +3351,7 @@ } ], "source": [ - "# use shap instead of feature importance to maintain consistency throughout the work\n", + "# use shap to maintain consistency throughout the work\n", "model.fit(X, y)\n", "explainer = shap.TreeExplainer(model)\n", "shap_values = explainer.shap_values(Pool(X, y, cat_features=cat_features))\n", @@ -3804,9 +3804,10 @@ "outputs": [], "source": [ "# randomly sample 10 % of rows\n", - "train = train.sample(frac=0.1, random_state=seed)\n", - "valid = valid.sample(frac=0.1, random_state=seed)\n", - "test = test.sample(frac=0.1, random_state=seed)\n" + "frac= 0.1\n", + "train = train.sample(frac=frac, random_state=seed)\n", + "valid = valid.sample(frac=frac, random_state=seed)\n", + "test = test.sample(frac=frac, random_state=seed)\n" ] }, { @@ -4159,7 +4160,9 @@ } ], "source": [ + "# number of samples compared\n", "n = 20\n", + "\n", "ser_train = train[\"ROOT\"].value_counts()[:n]\n", "ser_valid = valid[\"ROOT\"].value_counts()[:n]\n", "ser_test = test[\"ROOT\"].value_counts()[:n]\n",