From 9947165e0b3ff6a3f319a4321cadb3bb93f44778 Mon Sep 17 00:00:00 2001 From: Yury Kashnitsky Date: Mon, 19 Aug 2024 17:09:13 +0200 Subject: [PATCH] add a fix to #764 (#766) * add a fix to #764 * run pre-commit --------- Co-authored-by: Yury Kashnitsky --- .../topic1_pandas_data_analysis.ipynb | 20 +++++++++++-------- .../topic08_sgd_hashing_vowpal_wabbit.md | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/jupyter_english/topic01_pandas_data_analysis/topic1_pandas_data_analysis.ipynb b/jupyter_english/topic01_pandas_data_analysis/topic1_pandas_data_analysis.ipynb index 617a7ce6c1..7292c89188 100644 --- a/jupyter_english/topic01_pandas_data_analysis/topic1_pandas_data_analysis.ipynb +++ b/jupyter_english/topic01_pandas_data_analysis/topic1_pandas_data_analysis.ipynb @@ -290,7 +290,10 @@ "\n", "Let's use it to answer the question:\n", "\n", - "**What are average values of numerical features for churned users?**\n" + "**What are average values of numerical features for churned users?**\n", + "\n", + "\n", + "Here we'll resort to an additional method `select_dtypes` to select all numeric columns." ] }, { @@ -299,14 +302,15 @@ "metadata": {}, "outputs": [], "source": [ - "df[df[\"Churn\"] == 1].mean()" + "df.select_dtypes(include=np.number)[df[\"Churn\"] == 1].mean()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "**How much time (on average) do churned users spend on the phone during daytime?**" + "**How much time (on average) do churned users spend on the phone during daytime?**\n", + "\n" ] }, { @@ -396,7 +400,7 @@ "metadata": {}, "outputs": [], "source": [ - "df.apply(np.max)" + "df.apply('max')" ] }, { @@ -522,7 +526,7 @@ "source": [ "columns_to_show = [\"Total day minutes\", \"Total eve minutes\", \"Total night minutes\"]\n", "\n", - "df.groupby([\"Churn\"])[columns_to_show].agg([np.mean, np.std, np.min, np.max])" + "df.groupby([\"Churn\"])[columns_to_show].agg(['mean', 'std', 'min', 'max'])" ] }, { @@ -809,7 +813,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -823,9 +827,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.12.4" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/mlcourse_ai_jupyter_book/book/topic08/topic08_sgd_hashing_vowpal_wabbit.md b/mlcourse_ai_jupyter_book/book/topic08/topic08_sgd_hashing_vowpal_wabbit.md index 0ebc74e5f5..5fee9eb018 100644 --- a/mlcourse_ai_jupyter_book/book/topic08/topic08_sgd_hashing_vowpal_wabbit.md +++ b/mlcourse_ai_jupyter_book/book/topic08/topic08_sgd_hashing_vowpal_wabbit.md @@ -857,4 +857,4 @@ This section has been moved to Kaggle, please explore [this Notebook](https://ww - ["Convex Optimization" by Stephen Boyd](https://www.amazon.com/Convex-Optimization-Stephen-Boyd/dp/0521833787) - "Command-line Tools can be 235x Faster than your Hadoop Cluster" [post](https://adamdrake.com/command-line-tools-can-be-235x-faster-than-your-hadoop-cluster.html) - Benchmarking various ML algorithms on Criteo 1TB dataset on [GitHub](https://github.com/rambler-digital-solutions/criteo-1tb-benchmark) -- [VW on FastML.com](http://fastml.com/blog/categories/vw/) \ No newline at end of file +- [VW on FastML.com](http://fastml.com/blog/categories/vw/)