diff --git a/fall2024/oct_16_session.ipynb b/fall2024/oct_16_session.ipynb
new file mode 100644
index 0000000..d234ec1
--- /dev/null
+++ b/fall2024/oct_16_session.ipynb
@@ -0,0 +1,3421 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": [],
+ "authorship_tag": "ABX9TyOpuaXhqrFc7vH6V6jrPoOP",
+ "include_colab_link": true
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# 10/16/2024"
+ ],
+ "metadata": {
+ "id": "dYa59UPAm69U"
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 230
+ },
+ "id": "miyRtEbHm2tR",
+ "outputId": "c394ccf4-4acd-472e-f1e0-3208bd06731a"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "(4123, 9)\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Year Country GDP per capita (current US$) \\\n",
+ "301 2020 Bangladesh 2233.305901 \n",
+ "2873 2008 Panama 7197.110786 \n",
+ "186 2019 Aruba 31902.809818 \n",
+ "\n",
+ " Life expectancy at birth, total (years) Population, total Country Code \\\n",
+ "301 71.968 167420951.0 BGD \n",
+ "2873 76.225 3495276.0 PAN \n",
+ "186 76.248 106442.0 ABW \n",
+ "\n",
+ " Region Income Group Lending Type \n",
+ "301 South Asia Lower middle income IDA \n",
+ "2873 Latin America & Caribbean High income IBRD \n",
+ "186 Latin America & Caribbean High income Not classified "
+ ],
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Year | \n",
+ " Country | \n",
+ " GDP per capita (current US$) | \n",
+ " Life expectancy at birth, total (years) | \n",
+ " Population, total | \n",
+ " Country Code | \n",
+ " Region | \n",
+ " Income Group | \n",
+ " Lending Type | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 301 | \n",
+ " 2020 | \n",
+ " Bangladesh | \n",
+ " 2233.305901 | \n",
+ " 71.968 | \n",
+ " 167420951.0 | \n",
+ " BGD | \n",
+ " South Asia | \n",
+ " Lower middle income | \n",
+ " IDA | \n",
+ "
\n",
+ " \n",
+ " 2873 | \n",
+ " 2008 | \n",
+ " Panama | \n",
+ " 7197.110786 | \n",
+ " 76.225 | \n",
+ " 3495276.0 | \n",
+ " PAN | \n",
+ " Latin America & Caribbean | \n",
+ " High income | \n",
+ " IBRD | \n",
+ "
\n",
+ " \n",
+ " 186 | \n",
+ " 2019 | \n",
+ " Aruba | \n",
+ " 31902.809818 | \n",
+ " 76.248 | \n",
+ " 106442.0 | \n",
+ " ABW | \n",
+ " Latin America & Caribbean | \n",
+ " High income | \n",
+ " Not classified | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"df\",\n \"rows\": 3,\n \"fields\": [\n {\n \"column\": \"Year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6,\n \"min\": 2008,\n \"max\": 2020,\n \"num_unique_values\": 3,\n \"samples\": [\n 2020,\n 2008,\n 2019\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Bangladesh\",\n \"Panama\",\n \"Aruba\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"GDP per capita (current US$)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 15891.771383786097,\n \"min\": 2233.30590129762,\n \"max\": 31902.8098183195,\n \"num_unique_values\": 3,\n \"samples\": [\n 2233.30590129762,\n 7197.11078610101,\n 31902.8098183195\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Life expectancy at birth, total (years)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.4644464557651324,\n \"min\": 71.968,\n \"max\": 76.248,\n \"num_unique_values\": 3,\n \"samples\": [\n 71.968,\n 76.225,\n 76.248\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Population, total\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 95635816.20048375,\n \"min\": 106442.0,\n \"max\": 167420951.0,\n \"num_unique_values\": 3,\n \"samples\": [\n 167420951.0,\n 3495276.0,\n 106442.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country Code\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"BGD\",\n \"PAN\",\n \"ABW\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Region\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Latin America & Caribbean \",\n \"South Asia\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Income Group\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"High income\",\n \"Lower middle income\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Lending Type\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"IDA\",\n \"IBRD\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 1
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "import plotly.express as px\n",
+ "\n",
+ "pd.set_option(\"display.max_rows\", None)\n",
+ "\n",
+ "df = pd.read_csv(\"https://raw.githubusercontent.com/wcj365/python-stats-dataviz/refs/heads/master/fall2024/data/World_Development_Indicators_(WDI).csv\")\n",
+ "\n",
+ "print(df.shape)\n",
+ "df.sample(3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "column_names_dict = {\n",
+ " \"GDP per capita (current US$)\" : \"GDP per Capita\",\n",
+ " \"Life expectancy at birth, total (years)\" : \"Life Expectancy\",\n",
+ " \"Population, total\" : \"Population\"\n",
+ "}\n",
+ "\n",
+ "df = df.rename(columns=column_names_dict)\n",
+ "df.sample(3)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 143
+ },
+ "id": "5jsZ2HlMoBgO",
+ "outputId": "380916dc-a028-4990-acaa-e9053d17766f"
+ },
+ "execution_count": 5,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Year Country GDP per Capita Life Expectancy Population Country Code \\\n",
+ "1663 2014 Hungary 14294.258418 75.763415 9866468.0 HUN \n",
+ "1591 2018 Guyana 6094.909837 68.896000 785514.0 GUY \n",
+ "2456 2009 Moldova 1898.439757 69.573000 2865213.0 MDA \n",
+ "\n",
+ " Region Income Group Lending Type \n",
+ "1663 Europe & Central Asia High income Not classified \n",
+ "1591 Latin America & Caribbean High income IDA \n",
+ "2456 Europe & Central Asia Upper middle income IBRD "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Year | \n",
+ " Country | \n",
+ " GDP per Capita | \n",
+ " Life Expectancy | \n",
+ " Population | \n",
+ " Country Code | \n",
+ " Region | \n",
+ " Income Group | \n",
+ " Lending Type | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1663 | \n",
+ " 2014 | \n",
+ " Hungary | \n",
+ " 14294.258418 | \n",
+ " 75.763415 | \n",
+ " 9866468.0 | \n",
+ " HUN | \n",
+ " Europe & Central Asia | \n",
+ " High income | \n",
+ " Not classified | \n",
+ "
\n",
+ " \n",
+ " 1591 | \n",
+ " 2018 | \n",
+ " Guyana | \n",
+ " 6094.909837 | \n",
+ " 68.896000 | \n",
+ " 785514.0 | \n",
+ " GUY | \n",
+ " Latin America & Caribbean | \n",
+ " High income | \n",
+ " IDA | \n",
+ "
\n",
+ " \n",
+ " 2456 | \n",
+ " 2009 | \n",
+ " Moldova | \n",
+ " 1898.439757 | \n",
+ " 69.573000 | \n",
+ " 2865213.0 | \n",
+ " MDA | \n",
+ " Europe & Central Asia | \n",
+ " Upper middle income | \n",
+ " IBRD | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"df\",\n \"rows\": 3,\n \"fields\": [\n {\n \"column\": \"Year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4,\n \"min\": 2009,\n \"max\": 2018,\n \"num_unique_values\": 3,\n \"samples\": [\n 2014,\n 2018,\n 2009\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Hungary\",\n \"Guyana\",\n \"Moldova\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"GDP per Capita\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6304.707217132986,\n \"min\": 1898.43975710988,\n \"max\": 14294.2584180751,\n \"num_unique_values\": 3,\n \"samples\": [\n 14294.2584180751,\n 6094.90983678918,\n 1898.43975710988\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Life Expectancy\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3.7846388224844634,\n \"min\": 68.896,\n \"max\": 75.7634146341463,\n \"num_unique_values\": 3,\n \"samples\": [\n 75.7634146341463,\n 68.896,\n 69.573\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Population\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4757563.1902572075,\n \"min\": 785514.0,\n \"max\": 9866468.0,\n \"num_unique_values\": 3,\n \"samples\": [\n 9866468.0,\n 785514.0,\n 2865213.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country Code\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"HUN\",\n \"GUY\",\n \"MDA\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Region\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Latin America & Caribbean \",\n \"Europe & Central Asia\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Income Group\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Upper middle income\",\n \"High income\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Lending Type\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Not classified\",\n \"IDA\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 5
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.sample(3).T"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 331
+ },
+ "id": "jWfgC1osqnxg",
+ "outputId": "f3cf3399-7bca-477e-a7e7-45d865daf01f"
+ },
+ "execution_count": 15,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " 64 2542 \\\n",
+ "Year 2011 2019 \n",
+ "Country American Samoa Morocco \n",
+ "GDP per Capita 10495.304732 3498.582764 \n",
+ "Life Expectancy NaN 74.27 \n",
+ "Population 54310.0 36304408.0 \n",
+ "Country Code ASM MAR \n",
+ "Region East Asia & Pacific Middle East & North Africa \n",
+ "Income Group High income Lower middle income \n",
+ "Lending Type Not classified IBRD \n",
+ "\n",
+ " 2167 \n",
+ "Year 2005 \n",
+ "Country Liechtenstein \n",
+ "GDP per Capita 105751.498913 \n",
+ "Life Expectancy 80.668293 \n",
+ "Population 34603.0 \n",
+ "Country Code LIE \n",
+ "Region Europe & Central Asia \n",
+ "Income Group High income \n",
+ "Lending Type Not classified "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 64 | \n",
+ " 2542 | \n",
+ " 2167 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Year | \n",
+ " 2011 | \n",
+ " 2019 | \n",
+ " 2005 | \n",
+ "
\n",
+ " \n",
+ " Country | \n",
+ " American Samoa | \n",
+ " Morocco | \n",
+ " Liechtenstein | \n",
+ "
\n",
+ " \n",
+ " GDP per Capita | \n",
+ " 10495.304732 | \n",
+ " 3498.582764 | \n",
+ " 105751.498913 | \n",
+ "
\n",
+ " \n",
+ " Life Expectancy | \n",
+ " NaN | \n",
+ " 74.27 | \n",
+ " 80.668293 | \n",
+ "
\n",
+ " \n",
+ " Population | \n",
+ " 54310.0 | \n",
+ " 36304408.0 | \n",
+ " 34603.0 | \n",
+ "
\n",
+ " \n",
+ " Country Code | \n",
+ " ASM | \n",
+ " MAR | \n",
+ " LIE | \n",
+ "
\n",
+ " \n",
+ " Region | \n",
+ " East Asia & Pacific | \n",
+ " Middle East & North Africa | \n",
+ " Europe & Central Asia | \n",
+ "
\n",
+ " \n",
+ " Income Group | \n",
+ " High income | \n",
+ " Lower middle income | \n",
+ " High income | \n",
+ "
\n",
+ " \n",
+ " Lending Type | \n",
+ " Not classified | \n",
+ " IBRD | \n",
+ " Not classified | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"df\",\n \"rows\": 9,\n \"fields\": [\n {\n \"column\": 64,\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"American Samoa\",\n \"East Asia & Pacific\",\n 2011\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": 2542,\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"Lower middle income\",\n \"Morocco\",\n \"MAR\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": 2167,\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"High income\",\n \"Liechtenstein\",\n \"LIE\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 15
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Section One\n",
+ "\n",
+ "Summary Statistics with describe() function"
+ ],
+ "metadata": {
+ "id": "7xNKKN0sryqI"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df[[\"Population\",\"Life Expectancy\"]].describe()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 300
+ },
+ "id": "Z-sGYgrInbnV",
+ "outputId": "b8c0e8d0-8ede-468a-90e8-7ae7151e0528"
+ },
+ "execution_count": 8,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Population Life Expectancy\n",
+ "count 4.123000e+03 3777.000000\n",
+ "mean 3.319575e+07 71.060853\n",
+ "std 1.316430e+08 8.499806\n",
+ "min 9.791000e+03 42.125000\n",
+ "25% 7.436200e+05 65.351000\n",
+ "50% 5.872624e+06 72.765000\n",
+ "75% 2.148494e+07 77.529000\n",
+ "max 1.417173e+09 85.497561"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Population | \n",
+ " Life Expectancy | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 4.123000e+03 | \n",
+ " 3777.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 3.319575e+07 | \n",
+ " 71.060853 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 1.316430e+08 | \n",
+ " 8.499806 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 9.791000e+03 | \n",
+ " 42.125000 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 7.436200e+05 | \n",
+ " 65.351000 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 5.872624e+06 | \n",
+ " 72.765000 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 2.148494e+07 | \n",
+ " 77.529000 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 1.417173e+09 | \n",
+ " 85.497561 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"df[[\\\"Population\\\",\\\"Life Expectancy\\\"]]\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"Population\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 493275444.7155919,\n \"min\": 4123.0,\n \"max\": 1417173173.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 33195748.93936454,\n 5872624.0,\n 4123.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Life Expectancy\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1314.2456744171654,\n \"min\": 8.499806399270673,\n \"max\": 3777.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 71.06085323233692,\n 72.765,\n 3777.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 8
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.describe()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 300
+ },
+ "id": "rinla82UqOFO",
+ "outputId": "fe432e64-d97a-4676-9a30-e0cadf2b1134"
+ },
+ "execution_count": 12,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Year GDP per Capita Life Expectancy Population\n",
+ "count 4123.00000 3962.000000 3777.000000 4.123000e+03\n",
+ "mean 2013.00000 17007.692848 71.060853 3.319575e+07\n",
+ "std 5.47789 25733.109164 8.499806 1.316430e+08\n",
+ "min 2004.00000 128.538423 42.125000 9.791000e+03\n",
+ "25% 2008.00000 1862.892400 65.351000 7.436200e+05\n",
+ "50% 2013.00000 6048.304202 72.765000 5.872624e+06\n",
+ "75% 2018.00000 22137.309568 77.529000 2.148494e+07\n",
+ "max 2022.00000 240862.182448 85.497561 1.417173e+09"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Year | \n",
+ " GDP per Capita | \n",
+ " Life Expectancy | \n",
+ " Population | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 4123.00000 | \n",
+ " 3962.000000 | \n",
+ " 3777.000000 | \n",
+ " 4.123000e+03 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 2013.00000 | \n",
+ " 17007.692848 | \n",
+ " 71.060853 | \n",
+ " 3.319575e+07 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 5.47789 | \n",
+ " 25733.109164 | \n",
+ " 8.499806 | \n",
+ " 1.316430e+08 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 2004.00000 | \n",
+ " 128.538423 | \n",
+ " 42.125000 | \n",
+ " 9.791000e+03 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 2008.00000 | \n",
+ " 1862.892400 | \n",
+ " 65.351000 | \n",
+ " 7.436200e+05 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 2013.00000 | \n",
+ " 6048.304202 | \n",
+ " 72.765000 | \n",
+ " 5.872624e+06 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 2018.00000 | \n",
+ " 22137.309568 | \n",
+ " 77.529000 | \n",
+ " 2.148494e+07 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 2022.00000 | \n",
+ " 240862.182448 | \n",
+ " 85.497561 | \n",
+ " 1.417173e+09 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"df\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"Year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1100.7234184651504,\n \"min\": 5.477889924083767,\n \"max\": 4123.0,\n \"num_unique_values\": 7,\n \"samples\": [\n 4123.0,\n 2013.0,\n 2018.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"GDP per Capita\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 81846.43514470861,\n \"min\": 128.538422519456,\n \"max\": 240862.18244774,\n \"num_unique_values\": 8,\n \"samples\": [\n 17007.692847815495,\n 6048.30420227805,\n 3962.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Life Expectancy\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1314.2456744171654,\n \"min\": 8.499806399270673,\n \"max\": 3777.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 71.06085323233692,\n 72.765,\n 3777.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Population\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 493275444.7155919,\n \"min\": 4123.0,\n \"max\": 1417173173.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 33195748.93936454,\n 5872624.0,\n 4123.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 12
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.describe(include=\"object\")"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 174
+ },
+ "id": "Z9hiipFcpebK",
+ "outputId": "c23fb0b2-5922-4a63-cde7-fc50ef08e4cb"
+ },
+ "execution_count": 10,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Country Country Code Region Income Group \\\n",
+ "count 4123 4123 4123 4123 \n",
+ "unique 217 217 7 5 \n",
+ "top Afghanistan AFG Europe & Central Asia High income \n",
+ "freq 19 19 1102 1558 \n",
+ "\n",
+ " Lending Type \n",
+ "count 4123 \n",
+ "unique 4 \n",
+ "top Not classified \n",
+ "freq 1387 "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Country | \n",
+ " Country Code | \n",
+ " Region | \n",
+ " Income Group | \n",
+ " Lending Type | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 4123 | \n",
+ " 4123 | \n",
+ " 4123 | \n",
+ " 4123 | \n",
+ " 4123 | \n",
+ "
\n",
+ " \n",
+ " unique | \n",
+ " 217 | \n",
+ " 217 | \n",
+ " 7 | \n",
+ " 5 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " top | \n",
+ " Afghanistan | \n",
+ " AFG | \n",
+ " Europe & Central Asia | \n",
+ " High income | \n",
+ " Not classified | \n",
+ "
\n",
+ " \n",
+ " freq | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 1102 | \n",
+ " 1558 | \n",
+ " 1387 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"df\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"Country\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n 217,\n \"19\",\n \"4123\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country Code\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n 217,\n \"19\",\n \"4123\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Region\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n 7,\n \"1102\",\n \"4123\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Income Group\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n 5,\n \"1558\",\n \"4123\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Lending Type\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n 4,\n \"1387\",\n \"4123\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 10
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.describe(include=\"object\").T"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 206
+ },
+ "id": "5bY7fPmAqVlh",
+ "outputId": "57dc1947-7ab4-4395-8b0a-56c3571d9a78"
+ },
+ "execution_count": 13,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " count unique top freq\n",
+ "Country 4123 217 Afghanistan 19\n",
+ "Country Code 4123 217 AFG 19\n",
+ "Region 4123 7 Europe & Central Asia 1102\n",
+ "Income Group 4123 5 High income 1558\n",
+ "Lending Type 4123 4 Not classified 1387"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " count | \n",
+ " unique | \n",
+ " top | \n",
+ " freq | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Country | \n",
+ " 4123 | \n",
+ " 217 | \n",
+ " Afghanistan | \n",
+ " 19 | \n",
+ "
\n",
+ " \n",
+ " Country Code | \n",
+ " 4123 | \n",
+ " 217 | \n",
+ " AFG | \n",
+ " 19 | \n",
+ "
\n",
+ " \n",
+ " Region | \n",
+ " 4123 | \n",
+ " 7 | \n",
+ " Europe & Central Asia | \n",
+ " 1102 | \n",
+ "
\n",
+ " \n",
+ " Income Group | \n",
+ " 4123 | \n",
+ " 5 | \n",
+ " High income | \n",
+ " 1558 | \n",
+ "
\n",
+ " \n",
+ " Lending Type | \n",
+ " 4123 | \n",
+ " 4 | \n",
+ " Not classified | \n",
+ " 1387 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"df\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"count\",\n \"properties\": {\n \"dtype\": \"date\",\n \"min\": \"4123\",\n \"max\": \"4123\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"4123\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"unique\",\n \"properties\": {\n \"dtype\": \"date\",\n \"min\": 4,\n \"max\": 217,\n \"num_unique_values\": 4,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"top\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"AFG\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"freq\",\n \"properties\": {\n \"dtype\": \"date\",\n \"min\": \"19\",\n \"max\": \"1558\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"1102\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 13
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Section Two\n",
+ "\n",
+ "Compute quartiles and transform a numerical column into a categorical column based on the quartiles."
+ ],
+ "metadata": {
+ "id": "HgkmVl2Nr9W1"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def assign_quartile(gdp):\n",
+ " if gdp <= 1862.89:\n",
+ " quartile = \"1st Quartile (<=25%)\"\n",
+ " elif gdp > 1862.89 and gdp <= 6048.30:\n",
+ " quartile = \"2nd Quartile (25-50%)\"\n",
+ " elif gdp > 6048.30 and gdp <= 22137.31:\n",
+ " quartile = \"3rd Quartile (50-75%)\"\n",
+ " else:\n",
+ " quartile = \"4th Quartile (>75%)\"\n",
+ " return quartile\n",
+ "\n",
+ "\n",
+ "hungary = assign_quartile(14294.25)\n",
+ "hungary"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 35
+ },
+ "id": "n6WYClMCq2oF",
+ "outputId": "c0686b53-2f40-4857-a3ea-5de0a58c3b89"
+ },
+ "execution_count": 16,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "'3rd Quartile (50-75%)'"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "string"
+ }
+ },
+ "metadata": {},
+ "execution_count": 16
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df_usa = df[df[\"Country Code\"] == \"USA\"]\n",
+ "df_usa"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 645
+ },
+ "id": "X1407yvBuihV",
+ "outputId": "faa01e20-9193-4fe7-8361-9061cbadee98"
+ },
+ "execution_count": 17,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Year Country GDP per Capita Life Expectancy Population \\\n",
+ "3914 2004 United States 41724.631629 77.487805 292805298.0 \n",
+ "3915 2005 United States 44123.407068 77.487805 295516599.0 \n",
+ "3916 2006 United States 46302.000880 77.687805 298379912.0 \n",
+ "3917 2007 United States 48050.223777 77.987805 301231207.0 \n",
+ "3918 2008 United States 48570.045980 78.039024 304093966.0 \n",
+ "3919 2009 United States 47194.943355 78.390244 306771529.0 \n",
+ "3920 2010 United States 48650.643128 78.541463 309327143.0 \n",
+ "3921 2011 United States 50065.966504 78.641463 311583481.0 \n",
+ "3922 2012 United States 51784.418574 78.741463 313877662.0 \n",
+ "3923 2013 United States 53291.127689 78.741463 316059947.0 \n",
+ "3924 2014 United States 55123.849787 78.841463 318386329.0 \n",
+ "3925 2015 United States 56762.729452 78.690244 320738994.0 \n",
+ "3926 2016 United States 57866.744934 78.539024 323071755.0 \n",
+ "3927 2017 United States 59907.754261 78.539024 325122128.0 \n",
+ "3928 2018 United States 62823.309438 78.639024 326838199.0 \n",
+ "3929 2019 United States 65120.394663 78.787805 328329953.0 \n",
+ "3930 2020 United States 63528.634303 76.980488 331511512.0 \n",
+ "3931 2021 United States 70219.472454 76.329268 332031554.0 \n",
+ "3932 2022 United States 76329.582265 NaN 333287557.0 \n",
+ "\n",
+ " Country Code Region Income Group Lending Type \n",
+ "3914 USA North America High income Not classified \n",
+ "3915 USA North America High income Not classified \n",
+ "3916 USA North America High income Not classified \n",
+ "3917 USA North America High income Not classified \n",
+ "3918 USA North America High income Not classified \n",
+ "3919 USA North America High income Not classified \n",
+ "3920 USA North America High income Not classified \n",
+ "3921 USA North America High income Not classified \n",
+ "3922 USA North America High income Not classified \n",
+ "3923 USA North America High income Not classified \n",
+ "3924 USA North America High income Not classified \n",
+ "3925 USA North America High income Not classified \n",
+ "3926 USA North America High income Not classified \n",
+ "3927 USA North America High income Not classified \n",
+ "3928 USA North America High income Not classified \n",
+ "3929 USA North America High income Not classified \n",
+ "3930 USA North America High income Not classified \n",
+ "3931 USA North America High income Not classified \n",
+ "3932 USA North America High income Not classified "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Year | \n",
+ " Country | \n",
+ " GDP per Capita | \n",
+ " Life Expectancy | \n",
+ " Population | \n",
+ " Country Code | \n",
+ " Region | \n",
+ " Income Group | \n",
+ " Lending Type | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 3914 | \n",
+ " 2004 | \n",
+ " United States | \n",
+ " 41724.631629 | \n",
+ " 77.487805 | \n",
+ " 292805298.0 | \n",
+ " USA | \n",
+ " North America | \n",
+ " High income | \n",
+ " Not classified | \n",
+ "
\n",
+ " \n",
+ " 3915 | \n",
+ " 2005 | \n",
+ " United States | \n",
+ " 44123.407068 | \n",
+ " 77.487805 | \n",
+ " 295516599.0 | \n",
+ " USA | \n",
+ " North America | \n",
+ " High income | \n",
+ " Not classified | \n",
+ "
\n",
+ " \n",
+ " 3916 | \n",
+ " 2006 | \n",
+ " United States | \n",
+ " 46302.000880 | \n",
+ " 77.687805 | \n",
+ " 298379912.0 | \n",
+ " USA | \n",
+ " North America | \n",
+ " High income | \n",
+ " Not classified | \n",
+ "
\n",
+ " \n",
+ " 3917 | \n",
+ " 2007 | \n",
+ " United States | \n",
+ " 48050.223777 | \n",
+ " 77.987805 | \n",
+ " 301231207.0 | \n",
+ " USA | \n",
+ " North America | \n",
+ " High income | \n",
+ " Not classified | \n",
+ "
\n",
+ " \n",
+ " 3918 | \n",
+ " 2008 | \n",
+ " United States | \n",
+ " 48570.045980 | \n",
+ " 78.039024 | \n",
+ " 304093966.0 | \n",
+ " USA | \n",
+ " North America | \n",
+ " High income | \n",
+ " Not classified | \n",
+ "
\n",
+ " \n",
+ " 3919 | \n",
+ " 2009 | \n",
+ " United States | \n",
+ " 47194.943355 | \n",
+ " 78.390244 | \n",
+ " 306771529.0 | \n",
+ " USA | \n",
+ " North America | \n",
+ " High income | \n",
+ " Not classified | \n",
+ "
\n",
+ " \n",
+ " 3920 | \n",
+ " 2010 | \n",
+ " United States | \n",
+ " 48650.643128 | \n",
+ " 78.541463 | \n",
+ " 309327143.0 | \n",
+ " USA | \n",
+ " North America | \n",
+ " High income | \n",
+ " Not classified | \n",
+ "
\n",
+ " \n",
+ " 3921 | \n",
+ " 2011 | \n",
+ " United States | \n",
+ " 50065.966504 | \n",
+ " 78.641463 | \n",
+ " 311583481.0 | \n",
+ " USA | \n",
+ " North America | \n",
+ " High income | \n",
+ " Not classified | \n",
+ "
\n",
+ " \n",
+ " 3922 | \n",
+ " 2012 | \n",
+ " United States | \n",
+ " 51784.418574 | \n",
+ " 78.741463 | \n",
+ " 313877662.0 | \n",
+ " USA | \n",
+ " North America | \n",
+ " High income | \n",
+ " Not classified | \n",
+ "
\n",
+ " \n",
+ " 3923 | \n",
+ " 2013 | \n",
+ " United States | \n",
+ " 53291.127689 | \n",
+ " 78.741463 | \n",
+ " 316059947.0 | \n",
+ " USA | \n",
+ " North America | \n",
+ " High income | \n",
+ " Not classified | \n",
+ "
\n",
+ " \n",
+ " 3924 | \n",
+ " 2014 | \n",
+ " United States | \n",
+ " 55123.849787 | \n",
+ " 78.841463 | \n",
+ " 318386329.0 | \n",
+ " USA | \n",
+ " North America | \n",
+ " High income | \n",
+ " Not classified | \n",
+ "
\n",
+ " \n",
+ " 3925 | \n",
+ " 2015 | \n",
+ " United States | \n",
+ " 56762.729452 | \n",
+ " 78.690244 | \n",
+ " 320738994.0 | \n",
+ " USA | \n",
+ " North America | \n",
+ " High income | \n",
+ " Not classified | \n",
+ "
\n",
+ " \n",
+ " 3926 | \n",
+ " 2016 | \n",
+ " United States | \n",
+ " 57866.744934 | \n",
+ " 78.539024 | \n",
+ " 323071755.0 | \n",
+ " USA | \n",
+ " North America | \n",
+ " High income | \n",
+ " Not classified | \n",
+ "
\n",
+ " \n",
+ " 3927 | \n",
+ " 2017 | \n",
+ " United States | \n",
+ " 59907.754261 | \n",
+ " 78.539024 | \n",
+ " 325122128.0 | \n",
+ " USA | \n",
+ " North America | \n",
+ " High income | \n",
+ " Not classified | \n",
+ "
\n",
+ " \n",
+ " 3928 | \n",
+ " 2018 | \n",
+ " United States | \n",
+ " 62823.309438 | \n",
+ " 78.639024 | \n",
+ " 326838199.0 | \n",
+ " USA | \n",
+ " North America | \n",
+ " High income | \n",
+ " Not classified | \n",
+ "
\n",
+ " \n",
+ " 3929 | \n",
+ " 2019 | \n",
+ " United States | \n",
+ " 65120.394663 | \n",
+ " 78.787805 | \n",
+ " 328329953.0 | \n",
+ " USA | \n",
+ " North America | \n",
+ " High income | \n",
+ " Not classified | \n",
+ "
\n",
+ " \n",
+ " 3930 | \n",
+ " 2020 | \n",
+ " United States | \n",
+ " 63528.634303 | \n",
+ " 76.980488 | \n",
+ " 331511512.0 | \n",
+ " USA | \n",
+ " North America | \n",
+ " High income | \n",
+ " Not classified | \n",
+ "
\n",
+ " \n",
+ " 3931 | \n",
+ " 2021 | \n",
+ " United States | \n",
+ " 70219.472454 | \n",
+ " 76.329268 | \n",
+ " 332031554.0 | \n",
+ " USA | \n",
+ " North America | \n",
+ " High income | \n",
+ " Not classified | \n",
+ "
\n",
+ " \n",
+ " 3932 | \n",
+ " 2022 | \n",
+ " United States | \n",
+ " 76329.582265 | \n",
+ " NaN | \n",
+ " 333287557.0 | \n",
+ " USA | \n",
+ " North America | \n",
+ " High income | \n",
+ " Not classified | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "df_usa",
+ "summary": "{\n \"name\": \"df_usa\",\n \"rows\": 19,\n \"fields\": [\n {\n \"column\": \"Year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5,\n \"min\": 2004,\n \"max\": 2022,\n \"num_unique_values\": 19,\n \"samples\": [\n 2004,\n 2009,\n 2015\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"United States\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"GDP per Capita\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 9299.742535088688,\n \"min\": 41724.6316287624,\n \"max\": 76329.5822652029,\n \"num_unique_values\": 19,\n \"samples\": [\n 41724.6316287624\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Life Expectancy\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.7143185960056724,\n \"min\": 76.3292682926829,\n \"max\": 78.8414634146341,\n \"num_unique_values\": 15,\n \"samples\": [\n 78.690243902439\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Population\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 12903610.421862261,\n \"min\": 292805298.0,\n \"max\": 333287557.0,\n \"num_unique_values\": 19,\n \"samples\": [\n 292805298.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country Code\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"USA\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Region\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"North America\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Income Group\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"High income\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Lending Type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Not classified\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 17
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "assign_quartile(76329.58)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 35
+ },
+ "id": "tQnm_KEeu0vi",
+ "outputId": "adb60008-bdc9-453a-c40c-e835c8d9cd75"
+ },
+ "execution_count": 18,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "'4th Quartile (>75%)'"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "string"
+ }
+ },
+ "metadata": {},
+ "execution_count": 18
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df_2020 = df[df[\"Year\"] == 2020]\n",
+ "print(df_2020.shape[0])\n",
+ "print(f\"There are {df_2020.shape[0]} countries in the 2020 dataset\")\n",
+ "df_2020.sample(3)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 178
+ },
+ "id": "CFCCKy52vITP",
+ "outputId": "e804911a-8b3a-44fb-8968-f3c75f24dfeb"
+ },
+ "execution_count": 23,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "217\n",
+ "There are 217 countries in the 2020 dataset\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Year Country GDP per Capita Life Expectancy Population \\\n",
+ "3531 2020 Sudan 608.332520 65.614000 44440486.0 \n",
+ "1574 2020 Guinea-Bissau 710.258133 59.999000 2015828.0 \n",
+ "1004 2020 Czechia 22992.879383 78.226829 10697858.0 \n",
+ "\n",
+ " Country Code Region Income Group Lending Type \n",
+ "3531 SDN Sub-Saharan Africa Low income IDA \n",
+ "1574 GNB Sub-Saharan Africa Low income IDA \n",
+ "1004 CZE Europe & Central Asia High income Not classified "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Year | \n",
+ " Country | \n",
+ " GDP per Capita | \n",
+ " Life Expectancy | \n",
+ " Population | \n",
+ " Country Code | \n",
+ " Region | \n",
+ " Income Group | \n",
+ " Lending Type | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 3531 | \n",
+ " 2020 | \n",
+ " Sudan | \n",
+ " 608.332520 | \n",
+ " 65.614000 | \n",
+ " 44440486.0 | \n",
+ " SDN | \n",
+ " Sub-Saharan Africa | \n",
+ " Low income | \n",
+ " IDA | \n",
+ "
\n",
+ " \n",
+ " 1574 | \n",
+ " 2020 | \n",
+ " Guinea-Bissau | \n",
+ " 710.258133 | \n",
+ " 59.999000 | \n",
+ " 2015828.0 | \n",
+ " GNB | \n",
+ " Sub-Saharan Africa | \n",
+ " Low income | \n",
+ " IDA | \n",
+ "
\n",
+ " \n",
+ " 1004 | \n",
+ " 2020 | \n",
+ " Czechia | \n",
+ " 22992.879383 | \n",
+ " 78.226829 | \n",
+ " 10697858.0 | \n",
+ " CZE | \n",
+ " Europe & Central Asia | \n",
+ " High income | \n",
+ " Not classified | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"df_2020\",\n \"rows\": 3,\n \"fields\": [\n {\n \"column\": \"Year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 2020,\n \"max\": 2020,\n \"num_unique_values\": 1,\n \"samples\": [\n 2020\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Sudan\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"GDP per Capita\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 12894.401478389347,\n \"min\": 608.33251953125,\n \"max\": 22992.8793833348,\n \"num_unique_values\": 3,\n \"samples\": [\n 608.33251953125\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Life Expectancy\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 9.335107988887337,\n \"min\": 59.999,\n \"max\": 78.2268292682927,\n \"num_unique_values\": 3,\n \"samples\": [\n 65.614\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Population\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 22412028.748286963,\n \"min\": 2015828.0,\n \"max\": 44440486.0,\n \"num_unique_values\": 3,\n \"samples\": [\n 44440486.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country Code\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"SDN\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Region\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Europe & Central Asia\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Income Group\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"High income\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Lending Type\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Not classified\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 23
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Tuple\n",
+ "df_2020.shape"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "oWNY3dBIvbTP",
+ "outputId": "5328eec1-0c83-43a3-a3bf-951583b47c00"
+ },
+ "execution_count": 22,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "(217, 9)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 22
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## In-class Exercise\n",
+ "\n",
+ "Create a new column call \"GDP Quartile\" in the df_2020 dataframe and assign each country the quartile it belongs to by applying the assign_quartile() function.\n"
+ ],
+ "metadata": {
+ "id": "JJC4nFU9wC-s"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [],
+ "metadata": {
+ "id": "2RTn_Z2wv_Ss"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Homework Question 1\n",
+ "\n",
+ "Save the summary statistics of all numerical columns and all categorical columns to only one Excel spreadsheet file with two worksheets, one for numerical and one for categorical columns."
+ ],
+ "metadata": {
+ "id": "oTB9WwHgq-_s"
+ }
+ }
+ ]
+}
\ No newline at end of file