diff --git a/fall2024/oct_16_session.ipynb b/fall2024/oct_16_session.ipynb new file mode 100644 index 0000000..d234ec1 --- /dev/null +++ b/fall2024/oct_16_session.ipynb @@ -0,0 +1,3421 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyOpuaXhqrFc7vH6V6jrPoOP", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# 10/16/2024" + ], + "metadata": { + "id": "dYa59UPAm69U" + } + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 230 + }, + "id": "miyRtEbHm2tR", + "outputId": "c394ccf4-4acd-472e-f1e0-3208bd06731a" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(4123, 9)\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Year Country GDP per capita (current US$) \\\n", + "301 2020 Bangladesh 2233.305901 \n", + "2873 2008 Panama 7197.110786 \n", + "186 2019 Aruba 31902.809818 \n", + "\n", + " Life expectancy at birth, total (years) Population, total Country Code \\\n", + "301 71.968 167420951.0 BGD \n", + "2873 76.225 3495276.0 PAN \n", + "186 76.248 106442.0 ABW \n", + "\n", + " Region Income Group Lending Type \n", + "301 South Asia Lower middle income IDA \n", + "2873 Latin America & Caribbean High income IBRD \n", + "186 Latin America & Caribbean High income Not classified " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YearCountryGDP per capita (current US$)Life expectancy at birth, total (years)Population, totalCountry CodeRegionIncome GroupLending Type
3012020Bangladesh2233.30590171.968167420951.0BGDSouth AsiaLower middle incomeIDA
28732008Panama7197.11078676.2253495276.0PANLatin America & CaribbeanHigh incomeIBRD
1862019Aruba31902.80981876.248106442.0ABWLatin America & CaribbeanHigh incomeNot classified
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"df\",\n \"rows\": 3,\n \"fields\": [\n {\n \"column\": \"Year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6,\n \"min\": 2008,\n \"max\": 2020,\n \"num_unique_values\": 3,\n \"samples\": [\n 2020,\n 2008,\n 2019\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Bangladesh\",\n \"Panama\",\n \"Aruba\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"GDP per capita (current US$)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 15891.771383786097,\n \"min\": 2233.30590129762,\n \"max\": 31902.8098183195,\n \"num_unique_values\": 3,\n \"samples\": [\n 2233.30590129762,\n 7197.11078610101,\n 31902.8098183195\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Life expectancy at birth, total (years)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.4644464557651324,\n \"min\": 71.968,\n \"max\": 76.248,\n \"num_unique_values\": 3,\n \"samples\": [\n 71.968,\n 76.225,\n 76.248\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Population, total\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 95635816.20048375,\n \"min\": 106442.0,\n \"max\": 167420951.0,\n \"num_unique_values\": 3,\n \"samples\": [\n 167420951.0,\n 3495276.0,\n 106442.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country Code\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"BGD\",\n \"PAN\",\n \"ABW\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Region\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Latin America & Caribbean \",\n \"South Asia\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Income Group\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"High income\",\n \"Lower middle income\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Lending Type\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"IDA\",\n \"IBRD\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 1 + } + ], + "source": [ + "import pandas as pd\n", + "import plotly.express as px\n", + "\n", + "pd.set_option(\"display.max_rows\", None)\n", + "\n", + "df = pd.read_csv(\"https://raw.githubusercontent.com/wcj365/python-stats-dataviz/refs/heads/master/fall2024/data/World_Development_Indicators_(WDI).csv\")\n", + "\n", + "print(df.shape)\n", + "df.sample(3)" + ] + }, + { + "cell_type": "code", + "source": [ + "column_names_dict = {\n", + " \"GDP per capita (current US$)\" : \"GDP per Capita\",\n", + " \"Life expectancy at birth, total (years)\" : \"Life Expectancy\",\n", + " \"Population, total\" : \"Population\"\n", + "}\n", + "\n", + "df = df.rename(columns=column_names_dict)\n", + "df.sample(3)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 143 + }, + "id": "5jsZ2HlMoBgO", + "outputId": "380916dc-a028-4990-acaa-e9053d17766f" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Year Country GDP per Capita Life Expectancy Population Country Code \\\n", + "1663 2014 Hungary 14294.258418 75.763415 9866468.0 HUN \n", + "1591 2018 Guyana 6094.909837 68.896000 785514.0 GUY \n", + "2456 2009 Moldova 1898.439757 69.573000 2865213.0 MDA \n", + "\n", + " Region Income Group Lending Type \n", + "1663 Europe & Central Asia High income Not classified \n", + "1591 Latin America & Caribbean High income IDA \n", + "2456 Europe & Central Asia Upper middle income IBRD " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YearCountryGDP per CapitaLife ExpectancyPopulationCountry CodeRegionIncome GroupLending Type
16632014Hungary14294.25841875.7634159866468.0HUNEurope & Central AsiaHigh incomeNot classified
15912018Guyana6094.90983768.896000785514.0GUYLatin America & CaribbeanHigh incomeIDA
24562009Moldova1898.43975769.5730002865213.0MDAEurope & Central AsiaUpper middle incomeIBRD
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"df\",\n \"rows\": 3,\n \"fields\": [\n {\n \"column\": \"Year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4,\n \"min\": 2009,\n \"max\": 2018,\n \"num_unique_values\": 3,\n \"samples\": [\n 2014,\n 2018,\n 2009\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Hungary\",\n \"Guyana\",\n \"Moldova\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"GDP per Capita\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6304.707217132986,\n \"min\": 1898.43975710988,\n \"max\": 14294.2584180751,\n \"num_unique_values\": 3,\n \"samples\": [\n 14294.2584180751,\n 6094.90983678918,\n 1898.43975710988\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Life Expectancy\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3.7846388224844634,\n \"min\": 68.896,\n \"max\": 75.7634146341463,\n \"num_unique_values\": 3,\n \"samples\": [\n 75.7634146341463,\n 68.896,\n 69.573\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Population\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4757563.1902572075,\n \"min\": 785514.0,\n \"max\": 9866468.0,\n \"num_unique_values\": 3,\n \"samples\": [\n 9866468.0,\n 785514.0,\n 2865213.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country Code\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"HUN\",\n \"GUY\",\n \"MDA\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Region\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Latin America & Caribbean \",\n \"Europe & Central Asia\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Income Group\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Upper middle income\",\n \"High income\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Lending Type\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Not classified\",\n \"IDA\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "df.sample(3).T" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 331 + }, + "id": "jWfgC1osqnxg", + "outputId": "f3cf3399-7bca-477e-a7e7-45d865daf01f" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " 64 2542 \\\n", + "Year 2011 2019 \n", + "Country American Samoa Morocco \n", + "GDP per Capita 10495.304732 3498.582764 \n", + "Life Expectancy NaN 74.27 \n", + "Population 54310.0 36304408.0 \n", + "Country Code ASM MAR \n", + "Region East Asia & Pacific Middle East & North Africa \n", + "Income Group High income Lower middle income \n", + "Lending Type Not classified IBRD \n", + "\n", + " 2167 \n", + "Year 2005 \n", + "Country Liechtenstein \n", + "GDP per Capita 105751.498913 \n", + "Life Expectancy 80.668293 \n", + "Population 34603.0 \n", + "Country Code LIE \n", + "Region Europe & Central Asia \n", + "Income Group High income \n", + "Lending Type Not classified " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
6425422167
Year201120192005
CountryAmerican SamoaMoroccoLiechtenstein
GDP per Capita10495.3047323498.582764105751.498913
Life ExpectancyNaN74.2780.668293
Population54310.036304408.034603.0
Country CodeASMMARLIE
RegionEast Asia & PacificMiddle East & North AfricaEurope & Central Asia
Income GroupHigh incomeLower middle incomeHigh income
Lending TypeNot classifiedIBRDNot classified
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"df\",\n \"rows\": 9,\n \"fields\": [\n {\n \"column\": 64,\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"American Samoa\",\n \"East Asia & Pacific\",\n 2011\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": 2542,\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"Lower middle income\",\n \"Morocco\",\n \"MAR\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": 2167,\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"High income\",\n \"Liechtenstein\",\n \"LIE\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 15 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Section One\n", + "\n", + "Summary Statistics with describe() function" + ], + "metadata": { + "id": "7xNKKN0sryqI" + } + }, + { + "cell_type": "code", + "source": [ + "df[[\"Population\",\"Life Expectancy\"]].describe()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 300 + }, + "id": "Z-sGYgrInbnV", + "outputId": "b8c0e8d0-8ede-468a-90e8-7ae7151e0528" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Population Life Expectancy\n", + "count 4.123000e+03 3777.000000\n", + "mean 3.319575e+07 71.060853\n", + "std 1.316430e+08 8.499806\n", + "min 9.791000e+03 42.125000\n", + "25% 7.436200e+05 65.351000\n", + "50% 5.872624e+06 72.765000\n", + "75% 2.148494e+07 77.529000\n", + "max 1.417173e+09 85.497561" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PopulationLife Expectancy
count4.123000e+033777.000000
mean3.319575e+0771.060853
std1.316430e+088.499806
min9.791000e+0342.125000
25%7.436200e+0565.351000
50%5.872624e+0672.765000
75%2.148494e+0777.529000
max1.417173e+0985.497561
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"df[[\\\"Population\\\",\\\"Life Expectancy\\\"]]\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"Population\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 493275444.7155919,\n \"min\": 4123.0,\n \"max\": 1417173173.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 33195748.93936454,\n 5872624.0,\n 4123.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Life Expectancy\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1314.2456744171654,\n \"min\": 8.499806399270673,\n \"max\": 3777.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 71.06085323233692,\n 72.765,\n 3777.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 8 + } + ] + }, + { + "cell_type": "code", + "source": [ + "df.describe()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 300 + }, + "id": "rinla82UqOFO", + "outputId": "fe432e64-d97a-4676-9a30-e0cadf2b1134" + }, + "execution_count": 12, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Year GDP per Capita Life Expectancy Population\n", + "count 4123.00000 3962.000000 3777.000000 4.123000e+03\n", + "mean 2013.00000 17007.692848 71.060853 3.319575e+07\n", + "std 5.47789 25733.109164 8.499806 1.316430e+08\n", + "min 2004.00000 128.538423 42.125000 9.791000e+03\n", + "25% 2008.00000 1862.892400 65.351000 7.436200e+05\n", + "50% 2013.00000 6048.304202 72.765000 5.872624e+06\n", + "75% 2018.00000 22137.309568 77.529000 2.148494e+07\n", + "max 2022.00000 240862.182448 85.497561 1.417173e+09" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YearGDP per CapitaLife ExpectancyPopulation
count4123.000003962.0000003777.0000004.123000e+03
mean2013.0000017007.69284871.0608533.319575e+07
std5.4778925733.1091648.4998061.316430e+08
min2004.00000128.53842342.1250009.791000e+03
25%2008.000001862.89240065.3510007.436200e+05
50%2013.000006048.30420272.7650005.872624e+06
75%2018.0000022137.30956877.5290002.148494e+07
max2022.00000240862.18244885.4975611.417173e+09
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"df\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"Year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1100.7234184651504,\n \"min\": 5.477889924083767,\n \"max\": 4123.0,\n \"num_unique_values\": 7,\n \"samples\": [\n 4123.0,\n 2013.0,\n 2018.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"GDP per Capita\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 81846.43514470861,\n \"min\": 128.538422519456,\n \"max\": 240862.18244774,\n \"num_unique_values\": 8,\n \"samples\": [\n 17007.692847815495,\n 6048.30420227805,\n 3962.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Life Expectancy\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1314.2456744171654,\n \"min\": 8.499806399270673,\n \"max\": 3777.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 71.06085323233692,\n 72.765,\n 3777.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Population\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 493275444.7155919,\n \"min\": 4123.0,\n \"max\": 1417173173.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 33195748.93936454,\n 5872624.0,\n 4123.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 12 + } + ] + }, + { + "cell_type": "code", + "source": [ + "df.describe(include=\"object\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 174 + }, + "id": "Z9hiipFcpebK", + "outputId": "c23fb0b2-5922-4a63-cde7-fc50ef08e4cb" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Country Country Code Region Income Group \\\n", + "count 4123 4123 4123 4123 \n", + "unique 217 217 7 5 \n", + "top Afghanistan AFG Europe & Central Asia High income \n", + "freq 19 19 1102 1558 \n", + "\n", + " Lending Type \n", + "count 4123 \n", + "unique 4 \n", + "top Not classified \n", + "freq 1387 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CountryCountry CodeRegionIncome GroupLending Type
count41234123412341234123
unique217217754
topAfghanistanAFGEurope & Central AsiaHigh incomeNot classified
freq1919110215581387
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"df\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"Country\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n 217,\n \"19\",\n \"4123\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country Code\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n 217,\n \"19\",\n \"4123\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Region\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n 7,\n \"1102\",\n \"4123\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Income Group\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n 5,\n \"1558\",\n \"4123\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Lending Type\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n 4,\n \"1387\",\n \"4123\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "df.describe(include=\"object\").T" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "5bY7fPmAqVlh", + "outputId": "57dc1947-7ab4-4395-8b0a-56c3571d9a78" + }, + "execution_count": 13, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " count unique top freq\n", + "Country 4123 217 Afghanistan 19\n", + "Country Code 4123 217 AFG 19\n", + "Region 4123 7 Europe & Central Asia 1102\n", + "Income Group 4123 5 High income 1558\n", + "Lending Type 4123 4 Not classified 1387" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countuniquetopfreq
Country4123217Afghanistan19
Country Code4123217AFG19
Region41237Europe & Central Asia1102
Income Group41235High income1558
Lending Type41234Not classified1387
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"df\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"count\",\n \"properties\": {\n \"dtype\": \"date\",\n \"min\": \"4123\",\n \"max\": \"4123\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"4123\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"unique\",\n \"properties\": {\n \"dtype\": \"date\",\n \"min\": 4,\n \"max\": 217,\n \"num_unique_values\": 4,\n \"samples\": [\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"top\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"AFG\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"freq\",\n \"properties\": {\n \"dtype\": \"date\",\n \"min\": \"19\",\n \"max\": \"1558\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"1102\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Section Two\n", + "\n", + "Compute quartiles and transform a numerical column into a categorical column based on the quartiles." + ], + "metadata": { + "id": "HgkmVl2Nr9W1" + } + }, + { + "cell_type": "code", + "source": [ + "def assign_quartile(gdp):\n", + " if gdp <= 1862.89:\n", + " quartile = \"1st Quartile (<=25%)\"\n", + " elif gdp > 1862.89 and gdp <= 6048.30:\n", + " quartile = \"2nd Quartile (25-50%)\"\n", + " elif gdp > 6048.30 and gdp <= 22137.31:\n", + " quartile = \"3rd Quartile (50-75%)\"\n", + " else:\n", + " quartile = \"4th Quartile (>75%)\"\n", + " return quartile\n", + "\n", + "\n", + "hungary = assign_quartile(14294.25)\n", + "hungary" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "n6WYClMCq2oF", + "outputId": "c0686b53-2f40-4857-a3ea-5de0a58c3b89" + }, + "execution_count": 16, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'3rd Quartile (50-75%)'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 16 + } + ] + }, + { + "cell_type": "code", + "source": [ + "df_usa = df[df[\"Country Code\"] == \"USA\"]\n", + "df_usa" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 645 + }, + "id": "X1407yvBuihV", + "outputId": "faa01e20-9193-4fe7-8361-9061cbadee98" + }, + "execution_count": 17, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Year Country GDP per Capita Life Expectancy Population \\\n", + "3914 2004 United States 41724.631629 77.487805 292805298.0 \n", + "3915 2005 United States 44123.407068 77.487805 295516599.0 \n", + "3916 2006 United States 46302.000880 77.687805 298379912.0 \n", + "3917 2007 United States 48050.223777 77.987805 301231207.0 \n", + "3918 2008 United States 48570.045980 78.039024 304093966.0 \n", + "3919 2009 United States 47194.943355 78.390244 306771529.0 \n", + "3920 2010 United States 48650.643128 78.541463 309327143.0 \n", + "3921 2011 United States 50065.966504 78.641463 311583481.0 \n", + "3922 2012 United States 51784.418574 78.741463 313877662.0 \n", + "3923 2013 United States 53291.127689 78.741463 316059947.0 \n", + "3924 2014 United States 55123.849787 78.841463 318386329.0 \n", + "3925 2015 United States 56762.729452 78.690244 320738994.0 \n", + "3926 2016 United States 57866.744934 78.539024 323071755.0 \n", + "3927 2017 United States 59907.754261 78.539024 325122128.0 \n", + "3928 2018 United States 62823.309438 78.639024 326838199.0 \n", + "3929 2019 United States 65120.394663 78.787805 328329953.0 \n", + "3930 2020 United States 63528.634303 76.980488 331511512.0 \n", + "3931 2021 United States 70219.472454 76.329268 332031554.0 \n", + "3932 2022 United States 76329.582265 NaN 333287557.0 \n", + "\n", + " Country Code Region Income Group Lending Type \n", + "3914 USA North America High income Not classified \n", + "3915 USA North America High income Not classified \n", + "3916 USA North America High income Not classified \n", + "3917 USA North America High income Not classified \n", + "3918 USA North America High income Not classified \n", + "3919 USA North America High income Not classified \n", + "3920 USA North America High income Not classified \n", + "3921 USA North America High income Not classified \n", + "3922 USA North America High income Not classified \n", + "3923 USA North America High income Not classified \n", + "3924 USA North America High income Not classified \n", + "3925 USA North America High income Not classified \n", + "3926 USA North America High income Not classified \n", + "3927 USA North America High income Not classified \n", + "3928 USA North America High income Not classified \n", + "3929 USA North America High income Not classified \n", + "3930 USA North America High income Not classified \n", + "3931 USA North America High income Not classified \n", + "3932 USA North America High income Not classified " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YearCountryGDP per CapitaLife ExpectancyPopulationCountry CodeRegionIncome GroupLending Type
39142004United States41724.63162977.487805292805298.0USANorth AmericaHigh incomeNot classified
39152005United States44123.40706877.487805295516599.0USANorth AmericaHigh incomeNot classified
39162006United States46302.00088077.687805298379912.0USANorth AmericaHigh incomeNot classified
39172007United States48050.22377777.987805301231207.0USANorth AmericaHigh incomeNot classified
39182008United States48570.04598078.039024304093966.0USANorth AmericaHigh incomeNot classified
39192009United States47194.94335578.390244306771529.0USANorth AmericaHigh incomeNot classified
39202010United States48650.64312878.541463309327143.0USANorth AmericaHigh incomeNot classified
39212011United States50065.96650478.641463311583481.0USANorth AmericaHigh incomeNot classified
39222012United States51784.41857478.741463313877662.0USANorth AmericaHigh incomeNot classified
39232013United States53291.12768978.741463316059947.0USANorth AmericaHigh incomeNot classified
39242014United States55123.84978778.841463318386329.0USANorth AmericaHigh incomeNot classified
39252015United States56762.72945278.690244320738994.0USANorth AmericaHigh incomeNot classified
39262016United States57866.74493478.539024323071755.0USANorth AmericaHigh incomeNot classified
39272017United States59907.75426178.539024325122128.0USANorth AmericaHigh incomeNot classified
39282018United States62823.30943878.639024326838199.0USANorth AmericaHigh incomeNot classified
39292019United States65120.39466378.787805328329953.0USANorth AmericaHigh incomeNot classified
39302020United States63528.63430376.980488331511512.0USANorth AmericaHigh incomeNot classified
39312021United States70219.47245476.329268332031554.0USANorth AmericaHigh incomeNot classified
39322022United States76329.582265NaN333287557.0USANorth AmericaHigh incomeNot classified
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "df_usa", + "summary": "{\n \"name\": \"df_usa\",\n \"rows\": 19,\n \"fields\": [\n {\n \"column\": \"Year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5,\n \"min\": 2004,\n \"max\": 2022,\n \"num_unique_values\": 19,\n \"samples\": [\n 2004,\n 2009,\n 2015\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"United States\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"GDP per Capita\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 9299.742535088688,\n \"min\": 41724.6316287624,\n \"max\": 76329.5822652029,\n \"num_unique_values\": 19,\n \"samples\": [\n 41724.6316287624\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Life Expectancy\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.7143185960056724,\n \"min\": 76.3292682926829,\n \"max\": 78.8414634146341,\n \"num_unique_values\": 15,\n \"samples\": [\n 78.690243902439\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Population\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 12903610.421862261,\n \"min\": 292805298.0,\n \"max\": 333287557.0,\n \"num_unique_values\": 19,\n \"samples\": [\n 292805298.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country Code\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"USA\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Region\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"North America\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Income Group\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"High income\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Lending Type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Not classified\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "code", + "source": [ + "assign_quartile(76329.58)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "tQnm_KEeu0vi", + "outputId": "adb60008-bdc9-453a-c40c-e835c8d9cd75" + }, + "execution_count": 18, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'4th Quartile (>75%)'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "df_2020 = df[df[\"Year\"] == 2020]\n", + "print(df_2020.shape[0])\n", + "print(f\"There are {df_2020.shape[0]} countries in the 2020 dataset\")\n", + "df_2020.sample(3)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 178 + }, + "id": "CFCCKy52vITP", + "outputId": "e804911a-8b3a-44fb-8968-f3c75f24dfeb" + }, + "execution_count": 23, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "217\n", + "There are 217 countries in the 2020 dataset\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Year Country GDP per Capita Life Expectancy Population \\\n", + "3531 2020 Sudan 608.332520 65.614000 44440486.0 \n", + "1574 2020 Guinea-Bissau 710.258133 59.999000 2015828.0 \n", + "1004 2020 Czechia 22992.879383 78.226829 10697858.0 \n", + "\n", + " Country Code Region Income Group Lending Type \n", + "3531 SDN Sub-Saharan Africa Low income IDA \n", + "1574 GNB Sub-Saharan Africa Low income IDA \n", + "1004 CZE Europe & Central Asia High income Not classified " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YearCountryGDP per CapitaLife ExpectancyPopulationCountry CodeRegionIncome GroupLending Type
35312020Sudan608.33252065.61400044440486.0SDNSub-Saharan AfricaLow incomeIDA
15742020Guinea-Bissau710.25813359.9990002015828.0GNBSub-Saharan AfricaLow incomeIDA
10042020Czechia22992.87938378.22682910697858.0CZEEurope & Central AsiaHigh incomeNot classified
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"df_2020\",\n \"rows\": 3,\n \"fields\": [\n {\n \"column\": \"Year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 2020,\n \"max\": 2020,\n \"num_unique_values\": 1,\n \"samples\": [\n 2020\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Sudan\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"GDP per Capita\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 12894.401478389347,\n \"min\": 608.33251953125,\n \"max\": 22992.8793833348,\n \"num_unique_values\": 3,\n \"samples\": [\n 608.33251953125\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Life Expectancy\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 9.335107988887337,\n \"min\": 59.999,\n \"max\": 78.2268292682927,\n \"num_unique_values\": 3,\n \"samples\": [\n 65.614\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Population\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 22412028.748286963,\n \"min\": 2015828.0,\n \"max\": 44440486.0,\n \"num_unique_values\": 3,\n \"samples\": [\n 44440486.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country Code\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"SDN\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Region\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Europe & Central Asia\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Income Group\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"High income\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Lending Type\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Not classified\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Tuple\n", + "df_2020.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "oWNY3dBIvbTP", + "outputId": "5328eec1-0c83-43a3-a3bf-951583b47c00" + }, + "execution_count": 22, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(217, 9)" + ] + }, + "metadata": {}, + "execution_count": 22 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## In-class Exercise\n", + "\n", + "Create a new column call \"GDP Quartile\" in the df_2020 dataframe and assign each country the quartile it belongs to by applying the assign_quartile() function.\n" + ], + "metadata": { + "id": "JJC4nFU9wC-s" + } + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "2RTn_Z2wv_Ss" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Homework Question 1\n", + "\n", + "Save the summary statistics of all numerical columns and all categorical columns to only one Excel spreadsheet file with two worksheets, one for numerical and one for categorical columns." + ], + "metadata": { + "id": "oTB9WwHgq-_s" + } + } + ] +} \ No newline at end of file