From 00c55c3d961fef089e11e0e64de37354ab455eeb Mon Sep 17 00:00:00 2001 From: Brian Caffo Date: Tue, 24 Oct 2023 10:42:41 -0400 Subject: [PATCH] Added some stuff --- .../ds4ph/24_regression_examples.ipynb | 178 +- .../ds4ph/24_regression_examples.slides.html | 15059 ++++++++++++++++ .../ds4ph/25_regression_interpretation.ipynb | 0 slides/ds4ph/readme.md | 1 + slides/ds4ph/slide_convert.ipynb | 20 +- 5 files changed, 15178 insertions(+), 80 deletions(-) rename book/regression_examples.ipynb => slides/ds4ph/24_regression_examples.ipynb (56%) create mode 100644 slides/ds4ph/24_regression_examples.slides.html rename book/regression_interpretation.ipynb => slides/ds4ph/25_regression_interpretation.ipynb (100%) diff --git a/book/regression_examples.ipynb b/slides/ds4ph/24_regression_examples.ipynb similarity index 56% rename from book/regression_examples.ipynb rename to slides/ds4ph/24_regression_examples.ipynb index c3c6d62..b1ee1d6 100644 --- a/book/regression_examples.ipynb +++ b/slides/ds4ph/24_regression_examples.ipynb @@ -3,27 +3,26 @@ { "cell_type": "markdown", "metadata": { - "colab_type": "text", - "id": "view-in-github" + "slideshow": { + "slide_type": "slide" + }, + "tags": [] }, - "source": [ - "\"Open [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/smart-stats/ds4bio_book/HEAD)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, "source": [ "# Linear models: a classic example" ] }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 2, "metadata": { "colab": {}, "colab_type": "code", - "id": "oIimJROu2Hc_" + "id": "oIimJROu2Hc_", + "slideshow": { + "slide_type": "slide" + }, + "tags": [] }, "outputs": [], "source": [ @@ -35,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -43,7 +42,11 @@ }, "colab_type": "code", "id": "vsVJc6lS2Qov", - "outputId": "fc0b2fe3-eccd-4f2a-819f-084bc8e2602d" + "outputId": "fc0b2fe3-eccd-4f2a-819f-084bc8e2602d", + "slideshow": { + "slide_type": "subslide" + }, + "tags": [] }, "outputs": [ { @@ -132,20 +135,23 @@ "" ], "text/plain": [ - " Region Fertility Agriculture ... Education Catholic Infant.Mortality\n", - "0 Courtelary 80.2 17.0 ... 12 9.96 22.2\n", - "1 Delemont 83.1 45.1 ... 9 84.84 22.2\n", - "2 Franches-Mnt 92.5 39.7 ... 5 93.40 20.2\n", - "3 Moutier 85.8 36.5 ... 7 33.77 20.3\n", - "4 Neuveville 76.9 43.5 ... 15 5.16 20.6\n", + " Region Fertility Agriculture Examination Education Catholic \\\n", + "0 Courtelary 80.2 17.0 15 12 9.96 \n", + "1 Delemont 83.1 45.1 6 9 84.84 \n", + "2 Franches-Mnt 92.5 39.7 5 5 93.40 \n", + "3 Moutier 85.8 36.5 12 7 33.77 \n", + "4 Neuveville 76.9 43.5 17 15 5.16 \n", "\n", - "[5 rows x 7 columns]" + " Infant.Mortality \n", + "0 22.2 \n", + "1 22.2 \n", + "2 20.2 \n", + "3 20.3 \n", + "4 20.6 " ] }, - "execution_count": 45, - "metadata": { - "tags": [] - }, + "execution_count": 3, + "metadata": {}, "output_type": "execute_result" } ], @@ -156,7 +162,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -164,7 +170,11 @@ }, "colab_type": "code", "id": "T34j2o8u2cH_", - "outputId": "61309a3a-8ff6-489d-9dfd-c1ea40be99d8" + "outputId": "61309a3a-8ff6-489d-9dfd-c1ea40be99d8", + "slideshow": { + "slide_type": "subslide" + }, + "tags": [] }, "outputs": [ { @@ -174,10 +184,8 @@ " array([-0.17211397, -0.25800824, -0.87094006, 0.10411533, 1.07704814])]" ] }, - "execution_count": 49, - "metadata": { - "tags": [] - }, + "execution_count": 4, + "metadata": {}, "output_type": "execute_result" } ], @@ -190,84 +198,99 @@ ] }, { - "cell_type": "code", - "execution_count": 50, + "cell_type": "markdown", "metadata": { - "colab": {}, - "colab_type": "code", - "id": "Dwova9_d5BKe" + "slideshow": { + "slide_type": "subslide" + }, + "tags": [] }, - "outputs": [], "source": [ - "x2 = x\n", - "x2['Test'] = x2.Agriculture + x2.Examination\n", - "fit2 = LinearRegression().fit(x2, y)\n", - "yhat2 = fit2.predict(x2)" + "## Example of adjustment effect" ] }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 10, "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 283 + "slideshow": { + "slide_type": "subslide" }, - "colab_type": "code", - "id": "RAIz1jJs6YFg", - "outputId": "de9beb6e-d3e6-4033-eba9-27689bac6ff4" + "tags": [] }, "outputs": [ { "data": { "text/plain": [ - "[]" + "[60.304375228005725, array([0.19420175])]" ] }, - "execution_count": 51, - "metadata": { - "tags": [] - }, + "execution_count": 10, + "metadata": {}, "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light", - "tags": [] - }, - "output_type": "display_data" } ], "source": [ - "plt.plot(yhat, yhat2)" + "fit_marginal = LinearRegression().fit(x['Agriculture'].to_numpy().reshape(-1,1), y)\n", + "[fit_marginal.intercept_, fit_marginal.coef_]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + }, + "tags": [] + }, + "source": [ + "## Adding a useless regressor does what?" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "Dwova9_d5BKe", + "slideshow": { + "slide_type": "subslide" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "x2 = x\n", + "x2['Test'] = x2.Agriculture + x2.Examination\n", + "fit2 = LinearRegression().fit(x2, y)\n", + "yhat2 = fit2.predict(x2)" ] }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 51, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 283 }, "colab_type": "code", - "id": "626Z0Ql66qWx", - "outputId": "12617691-af4e-47d3-d080-10223101ec81" + "id": "RAIz1jJs6YFg", + "outputId": "de9beb6e-d3e6-4033-eba9-27689bac6ff4", + "slideshow": { + "slide_type": "subslide" + }, + "tags": [] }, "outputs": [ { "data": { "text/plain": [ - "[]" + "[]" ] }, - "execution_count": 53, + "execution_count": 51, "metadata": { "tags": [] }, @@ -275,7 +298,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -288,10 +311,7 @@ } ], "source": [ - "x3 = x2.drop(['Agriculture'], axis = 1)\n", - "fit3 = LinearRegression().fit(x3, y)\n", - "yhat3 = fit3.predict(x3)\n", - "plt.plot(yhat, yhat3)\n" + "plt.plot(yhat, yhat2);" ] } ], @@ -303,9 +323,9 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python [conda env:.conda-ds4bio]", "language": "python", - "name": "python3" + "name": "conda-env-.conda-ds4bio-py" }, "language_info": { "codemirror_mode": { @@ -317,7 +337,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.10.4" } }, "nbformat": 4, diff --git a/slides/ds4ph/24_regression_examples.slides.html b/slides/ds4ph/24_regression_examples.slides.html new file mode 100644 index 0000000..23a77b7 --- /dev/null +++ b/slides/ds4ph/24_regression_examples.slides.html @@ -0,0 +1,15059 @@ + + + + + + + + + +24_regression_examples slides + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+
+
+ + + + + + + + + + + diff --git a/book/regression_interpretation.ipynb b/slides/ds4ph/25_regression_interpretation.ipynb similarity index 100% rename from book/regression_interpretation.ipynb rename to slides/ds4ph/25_regression_interpretation.ipynb diff --git a/slides/ds4ph/readme.md b/slides/ds4ph/readme.md index 59f9574..e04319f 100644 --- a/slides/ds4ph/readme.md +++ b/slides/ds4ph/readme.md @@ -20,3 +20,4 @@ + [Lecture 21, Maximum likelihood](https://smart-stats.github.io/ds4bio_book/slides/ds4ph/21_ml.slides.html) + [Lecture 22, Linear separable](https://smart-stats.github.io/ds4bio_book/slides/ds4ph/22_linear_separable.slides.html) + [Lecture 23, Linear separable interpretation](https://smart-stats.github.io/ds4bio_book/slides/ds4ph/23_linear_separable_smf.slides.html) ++ [Lecture 24, Regression example](https://smart-stats.github.io/ds4bio_book/slides/ds4ph/24_regression_examples.slides.html) \ No newline at end of file diff --git a/slides/ds4ph/slide_convert.ipynb b/slides/ds4ph/slide_convert.ipynb index 4ffbc58..fd2c26d 100644 --- a/slides/ds4ph/slide_convert.ipynb +++ b/slides/ds4ph/slide_convert.ipynb @@ -60,7 +60,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -75,6 +75,24 @@ "source": [ "!jupyter nbconvert 23_linear_separable_smf.ipynb --to slides --SlidesExporter.reveal_theme=solarized" ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[NbConvertApp] Converting notebook 23_linear_separable_smf.ipynb to slides\n", + "[NbConvertApp] Writing 590645 bytes to 23_linear_separable_smf.slides.html\n" + ] + } + ], + "source": [ + "!jupyter nbconvert 24_linear_separable_smf.ipynb --to slides --SlidesExporter.reveal_theme=solarized" + ] } ], "metadata": {