From 9947165e0b3ff6a3f319a4321cadb3bb93f44778 Mon Sep 17 00:00:00 2001
From: Yury Kashnitsky <foofest@gmail.com>
Date: Mon, 19 Aug 2024 17:09:13 +0200
Subject: [PATCH] add a fix to #764 (#766)

* add a fix to #764

* run pre-commit

---------

Co-authored-by: Yury Kashnitsky <kashnitsky@google.com>
---
 .../topic1_pandas_data_analysis.ipynb         | 20 +++++++++++--------
 .../topic08_sgd_hashing_vowpal_wabbit.md      |  2 +-
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/jupyter_english/topic01_pandas_data_analysis/topic1_pandas_data_analysis.ipynb b/jupyter_english/topic01_pandas_data_analysis/topic1_pandas_data_analysis.ipynb
index 617a7ce6c1..7292c89188 100644
--- a/jupyter_english/topic01_pandas_data_analysis/topic1_pandas_data_analysis.ipynb
+++ b/jupyter_english/topic01_pandas_data_analysis/topic1_pandas_data_analysis.ipynb
@@ -290,7 +290,10 @@
     "\n",
     "Let's use it to answer the question:\n",
     "\n",
-    "**What are average values of numerical features for churned users?**\n"
+    "**What are average values of numerical features for churned users?**\n",
+    "\n",
+    "\n",
+    "Here we'll resort to an additional method `select_dtypes` to select all numeric columns."
    ]
   },
   {
@@ -299,14 +302,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df[df[\"Churn\"] == 1].mean()"
+    "df.select_dtypes(include=np.number)[df[\"Churn\"] == 1].mean()"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**How much time (on average) do churned users spend on the phone during daytime?**"
+    "**How much time (on average) do churned users spend on the phone during daytime?**\n",
+    "\n"
    ]
   },
   {
@@ -396,7 +400,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df.apply(np.max)"
+    "df.apply('max')"
    ]
   },
   {
@@ -522,7 +526,7 @@
    "source": [
     "columns_to_show = [\"Total day minutes\", \"Total eve minutes\", \"Total night minutes\"]\n",
     "\n",
-    "df.groupby([\"Churn\"])[columns_to_show].agg([np.mean, np.std, np.min, np.max])"
+    "df.groupby([\"Churn\"])[columns_to_show].agg(['mean', 'std', 'min', 'max'])"
    ]
   },
   {
@@ -809,7 +813,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -823,9 +827,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.12.4"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/mlcourse_ai_jupyter_book/book/topic08/topic08_sgd_hashing_vowpal_wabbit.md b/mlcourse_ai_jupyter_book/book/topic08/topic08_sgd_hashing_vowpal_wabbit.md
index 0ebc74e5f5..5fee9eb018 100644
--- a/mlcourse_ai_jupyter_book/book/topic08/topic08_sgd_hashing_vowpal_wabbit.md
+++ b/mlcourse_ai_jupyter_book/book/topic08/topic08_sgd_hashing_vowpal_wabbit.md
@@ -857,4 +857,4 @@ This section has been moved to Kaggle, please explore [this Notebook](https://ww
 - ["Convex Optimization" by Stephen Boyd](https://www.amazon.com/Convex-Optimization-Stephen-Boyd/dp/0521833787)
 - "Command-line Tools can be 235x Faster than your Hadoop Cluster" [post](https://adamdrake.com/command-line-tools-can-be-235x-faster-than-your-hadoop-cluster.html)
 - Benchmarking various ML algorithms on Criteo 1TB dataset on [GitHub](https://github.com/rambler-digital-solutions/criteo-1tb-benchmark)
-- [VW on FastML.com](http://fastml.com/blog/categories/vw/)
\ No newline at end of file
+- [VW on FastML.com](http://fastml.com/blog/categories/vw/)