diff --git a/content/notebooks/5. More Pandas.ipynb b/content/notebooks/5. More Pandas.ipynb index a929777..65f1edb 100644 --- a/content/notebooks/5. More Pandas.ipynb +++ b/content/notebooks/5. More Pandas.ipynb @@ -1078,9 +1078,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "df['city'].value_counts()" - ] + "source": [] }, { "cell_type": "code", diff --git a/content/solutions/04_06.py b/content/solutions/04_06.py index df28f41..9519c1a 100644 --- a/content/solutions/04_06.py +++ b/content/solutions/04_06.py @@ -9,4 +9,9 @@ def f(x): """Returns the argument multiplied by 3 and increased by 10.""" return (x * 3) + 10 -print(f(4)) # x = 2 is arbitrary. Try with other values. \ No newline at end of file + +print('def f(x):') +print('\t"""Returns the argument multiplied by 3 and increased by 10."""') +print('\treturn (x * 3) + 10\n') + +print('f(x) = ', f(4)) # x = 4 is arbitrary. Try with other values. \ No newline at end of file diff --git a/content/solutions/05_27.py b/content/solutions/05_27.py index 87c78b0..7426db5 100644 --- a/content/solutions/05_27.py +++ b/content/solutions/05_27.py @@ -1,3 +1,26 @@ +import pandas as pd + +df_2014 = pd.read_csv("../data/food_training/training_2014.csv", header=1) +df_2015 = pd.read_csv("../data/food_training/training_2015.csv", header=1) +df_2016 = pd.read_csv("../data/food_training/training_2016.csv", header=1) + +frames = [df_2014, df_2015, df_2016] +df = pd.concat(frames) + +df = df.reset_index() +df.index + +cols_to_remove = ["Unnamed: 5", "Unnamed: 6"] +df = df.drop(cols_to_remove, axis=1) + +df[["city", "country"]] = df["Location"].str.split(pat=";", expand=True) + +df = df.drop("Location", axis=1) + +df["city"] = df["city"].str.lower() + +df["city"] = df["city"].str.replace(r"/\w*", "", regex=True) + dict_codes = { "BG": "Bulgaria", "CZ": "Czech Republic", @@ -8,4 +31,6 @@ } country_in_codes = df["country"].isin(dict_codes.keys()) -df.loc[country_in_codes, "country"] = df.loc[country_in_codes, "country"].map(dict_codes) \ No newline at end of file +df.loc[country_in_codes, "country"] = df.loc[country_in_codes, "country"].map(dict_codes) + +print('df.loc[country_in_codes, "country"] = df.loc[country_in_codes, "country"].map(dict_codes)') \ No newline at end of file diff --git a/content/solutions/05_28.py b/content/solutions/05_28.py index e4b0e00..1acc786 100644 --- a/content/solutions/05_28.py +++ b/content/solutions/05_28.py @@ -1 +1,38 @@ -df.loc[df["city"] == "unknown", "country"] \ No newline at end of file +import pandas as pd + +df_2014 = pd.read_csv("../data/food_training/training_2014.csv", header=1) +df_2015 = pd.read_csv("../data/food_training/training_2015.csv", header=1) +df_2016 = pd.read_csv("../data/food_training/training_2016.csv", header=1) + +frames = [df_2014, df_2015, df_2016] +df = pd.concat(frames) + +df = df.reset_index() +df.index + +cols_to_remove = ["Unnamed: 5", "Unnamed: 6"] +df = df.drop(cols_to_remove, axis=1) + +df[["city", "country"]] = df["Location"].str.split(pat=";", expand=True) + +df = df.drop("Location", axis=1) + +df["city"] = df["city"].str.lower() + +df["city"] = df["city"].str.replace(r"/\w*", "", regex=True) + +dict_codes = { + "BG": "Bulgaria", + "CZ": "Czech Republic", + "IT": "Italy", + "GR": "Greece", + "SI": "Slovenia", + "UK": "United Kingdom", +} + +country_in_codes = df["country"].isin(dict_codes.keys()) +df.loc[country_in_codes, "country"] = df.loc[country_in_codes, "country"].map(dict_codes) + +print('df.loc[df["city"] == "unknown", "country"]\n') + +display(df.loc[df["city"] == "unknown", "country"]) \ No newline at end of file diff --git a/content/solutions/05_29.py b/content/solutions/05_29.py index fc41745..a90e3df 100644 --- a/content/solutions/05_29.py +++ b/content/solutions/05_29.py @@ -1,3 +1,26 @@ +import pandas as pd + +df_2014 = pd.read_csv("../data/food_training/training_2014.csv", header=1) +df_2015 = pd.read_csv("../data/food_training/training_2015.csv", header=1) +df_2016 = pd.read_csv("../data/food_training/training_2016.csv", header=1) + +frames = [df_2014, df_2015, df_2016] +df = pd.concat(frames) + +df = df.reset_index() +df.index + +cols_to_remove = ["Unnamed: 5", "Unnamed: 6"] +df = df.drop(cols_to_remove, axis=1) + +df[["city", "country"]] = df["Location"].str.split(pat=";", expand=True) + +df = df.drop("Location", axis=1) + +df["city"] = df["city"].str.lower() + +df["city"] = df["city"].str.replace(r"/\w*", "", regex=True) + dict_capitals = { "Denmark": "copenhague", "France": "paris", @@ -7,4 +30,7 @@ } unknown_city = df["city"] == "unknown" -df.loc[unknown_city, "city"] = df.loc[unknown_city, "country"].map(dict_capitals) \ No newline at end of file +df.loc[unknown_city, "city"] = df.loc[unknown_city, "country"].map(dict_capitals) + +print('unknown_city = df["city"] == "unknown"') +print('df.loc[unknown_city, "city"] = df.loc[unknown_city, "country"].map(dict_capitals)') \ No newline at end of file diff --git a/content/solutions/05_30.py b/content/solutions/05_30.py index 1f54a6c..cba83dd 100644 --- a/content/solutions/05_30.py +++ b/content/solutions/05_30.py @@ -1 +1,39 @@ -set(df["city"]) - dict_cities.keys() \ No newline at end of file +import pandas as pd + +df_2014 = pd.read_csv("../data/food_training/training_2014.csv", header=1) +df_2015 = pd.read_csv("../data/food_training/training_2015.csv", header=1) +df_2016 = pd.read_csv("../data/food_training/training_2016.csv", header=1) + +frames = [df_2014, df_2015, df_2016] +df = pd.concat(frames) + +df = df.reset_index() +df.index + +cols_to_remove = ["Unnamed: 5", "Unnamed: 6"] +df = df.drop(cols_to_remove, axis=1) + +df[["city", "country"]] = df["Location"].str.split(pat=";", expand=True) + +df = df.drop("Location", axis=1) + +df["city"] = df["city"].str.lower() + +df["city"] = df["city"].str.replace(r"/\w*", "", regex=True) + +dict_capitals = { + "Denmark": "copenhague", + "France": "paris", + "Italy": "rome", + "Spain": "madrid", + "United Kingdom": "london", +} + +unknown_city = df["city"] == "unknown" +df.loc[unknown_city, "city"] = df.loc[unknown_city, "country"].map(dict_capitals) + +dict_cities = df.loc[df['country'].notnull(), ['city', 'country']].set_index('city').to_dict()['country'] + +print('set(df["city"]) - dict_cities.keys()\n') + +display(set(df["city"]) - dict_cities.keys()) diff --git a/content/solutions/05_31.py b/content/solutions/05_31.py index c4c9fec..2177ef0 100644 --- a/content/solutions/05_31.py +++ b/content/solutions/05_31.py @@ -1,3 +1,39 @@ +import pandas as pd + +df_2014 = pd.read_csv("../data/food_training/training_2014.csv", header=1) +df_2015 = pd.read_csv("../data/food_training/training_2015.csv", header=1) +df_2016 = pd.read_csv("../data/food_training/training_2016.csv", header=1) + +frames = [df_2014, df_2015, df_2016] +df = pd.concat(frames) + +df = df.reset_index() +df.index + +cols_to_remove = ["Unnamed: 5", "Unnamed: 6"] +df = df.drop(cols_to_remove, axis=1) + +df[["city", "country"]] = df["Location"].str.split(pat=";", expand=True) + +df = df.drop("Location", axis=1) + +df["city"] = df["city"].str.lower() + +df["city"] = df["city"].str.replace(r"/\w*", "", regex=True) + +dict_capitals = { + "Denmark": "copenhague", + "France": "paris", + "Italy": "rome", + "Spain": "madrid", + "United Kingdom": "london", +} + +unknown_city = df["city"] == "unknown" +df.loc[unknown_city, "city"] = df.loc[unknown_city, "country"].map(dict_capitals) + +dict_cities = df.loc[df['country'].notnull(), ['city', 'country']].set_index('city').to_dict()['country'] + dict_cities.update( { "bristol": "United Kingdom", @@ -7,4 +43,6 @@ "murcia": "Spain", "parma": "Italy", }, -) \ No newline at end of file +) + +print("dict_cities.update(\n{\n\t\"bristol\": \"United Kingdom\",\n\t\"gothenburg\": \"Sweden\",\n\t\"graz\": \"Austria\",\n\t\"lyon\": \"France\",\n\t \"murcia\": \"Spain\",\n\t \"parma\": \"Italy\",\n},\n)\n") \ No newline at end of file diff --git a/content/solutions/05_32.py b/content/solutions/05_32.py index 3b4a6e2..3a0a599 100644 --- a/content/solutions/05_32.py +++ b/content/solutions/05_32.py @@ -1,2 +1,52 @@ +import pandas as pd + +df_2014 = pd.read_csv("../data/food_training/training_2014.csv", header=1) +df_2015 = pd.read_csv("../data/food_training/training_2015.csv", header=1) +df_2016 = pd.read_csv("../data/food_training/training_2016.csv", header=1) + +frames = [df_2014, df_2015, df_2016] +df = pd.concat(frames) + +df = df.reset_index() +df.index + +cols_to_remove = ["Unnamed: 5", "Unnamed: 6"] +df = df.drop(cols_to_remove, axis=1) + +df[["city", "country"]] = df["Location"].str.split(pat=";", expand=True) + +df = df.drop("Location", axis=1) + +df["city"] = df["city"].str.lower() + +df["city"] = df["city"].str.replace(r"/\w*", "", regex=True) + +dict_capitals = { + "Denmark": "copenhague", + "France": "paris", + "Italy": "rome", + "Spain": "madrid", + "United Kingdom": "london", +} + +unknown_city = df["city"] == "unknown" +df.loc[unknown_city, "city"] = df.loc[unknown_city, "country"].map(dict_capitals) + +dict_cities = df.loc[df['country'].notnull(), ['city', 'country']].set_index('city').to_dict()['country'] + +dict_cities.update( + { + "bristol": "United Kingdom", + "gothenburg": "Sweden", + "graz": "Austria", + "lyon": "France", + "murcia": "Spain", + "parma": "Italy", + }, +) + null_country = df["country"].isnull() -df.loc[null_country, "country"] = df.loc[null_country, "city"].map(dict_cities) \ No newline at end of file +df.loc[null_country, "country"] = df.loc[null_country, "city"].map(dict_cities) + +print('null_country = df["country"].isnull()') +print('df.loc[null_country, "country"] = df.loc[null_country, "city"].map(dict_cities)') \ No newline at end of file diff --git a/content/solutions/05_33.py b/content/solutions/05_33.py index 689a6ca..8085ccf 100644 --- a/content/solutions/05_33.py +++ b/content/solutions/05_33.py @@ -1 +1,53 @@ -df["country"].value_counts(dropna=False) \ No newline at end of file +import pandas as pd + +df_2014 = pd.read_csv("../data/food_training/training_2014.csv", header=1) +df_2015 = pd.read_csv("../data/food_training/training_2015.csv", header=1) +df_2016 = pd.read_csv("../data/food_training/training_2016.csv", header=1) + +frames = [df_2014, df_2015, df_2016] +df = pd.concat(frames) + +df = df.reset_index() +df.index + +cols_to_remove = ["Unnamed: 5", "Unnamed: 6"] +df = df.drop(cols_to_remove, axis=1) + +df[["city", "country"]] = df["Location"].str.split(pat=";", expand=True) + +df = df.drop("Location", axis=1) + +df["city"] = df["city"].str.lower() + +df["city"] = df["city"].str.replace(r"/\w*", "", regex=True) + +dict_capitals = { + "Denmark": "copenhague", + "France": "paris", + "Italy": "rome", + "Spain": "madrid", + "United Kingdom": "london", +} + +unknown_city = df["city"] == "unknown" +df.loc[unknown_city, "city"] = df.loc[unknown_city, "country"].map(dict_capitals) + +dict_cities = df.loc[df['country'].notnull(), ['city', 'country']].set_index('city').to_dict()['country'] + +dict_cities.update( + { + "bristol": "United Kingdom", + "gothenburg": "Sweden", + "graz": "Austria", + "lyon": "France", + "murcia": "Spain", + "parma": "Italy", + }, +) + +null_country = df["country"].isnull() +df.loc[null_country, "country"] = df.loc[null_country, "city"].map(dict_cities) + +print('df["country"].value_counts(dropna=False)\n') + +display(df["country"].value_counts(dropna=False)) \ No newline at end of file diff --git a/content/solutions/05_34.py b/content/solutions/05_34.py index 63c70f8..eea2930 100644 --- a/content/solutions/05_34.py +++ b/content/solutions/05_34.py @@ -2,4 +2,12 @@ def f(x): if x == 1: return "single" else: - return "multiple" \ No newline at end of file + return "multiple" + +print('def f(x):') +print('\tif x == 1:') +print('\t\treturn "single"') +print('\telse:') +print('\t\treturn "multiple"\n') + +print('f(4) = ',f(4)) #x = 4 \ No newline at end of file diff --git a/content/solutions/05_35.py b/content/solutions/05_35.py index db3b33e..4e54e09 100644 --- a/content/solutions/05_35.py +++ b/content/solutions/05_35.py @@ -1 +1,62 @@ -df["Attendees"].apply(f) \ No newline at end of file +import pandas as pd + +df_2014 = pd.read_csv("../data/food_training/training_2014.csv", header=1) +df_2015 = pd.read_csv("../data/food_training/training_2015.csv", header=1) +df_2016 = pd.read_csv("../data/food_training/training_2016.csv", header=1) + +frames = [df_2014, df_2015, df_2016] +df = pd.concat(frames) + +df = df.reset_index() +df.index + +cols_to_remove = ["Unnamed: 5", "Unnamed: 6"] +df = df.drop(cols_to_remove, axis=1) + +df[["city", "country"]] = df["Location"].str.split(pat=";", expand=True) + +df = df.drop("Location", axis=1) + +df["city"] = df["city"].str.lower() + +df["city"] = df["city"].str.replace(r"/\w*", "", regex=True) + +dict_capitals = { + "Denmark": "copenhague", + "France": "paris", + "Italy": "rome", + "Spain": "madrid", + "United Kingdom": "london", +} + +unknown_city = df["city"] == "unknown" +df.loc[unknown_city, "city"] = df.loc[unknown_city, "country"].map(dict_capitals) + +dict_cities = df.loc[df['country'].notnull(), ['city', 'country']].set_index('city').to_dict()['country'] + +dict_cities.update( + { + "bristol": "United Kingdom", + "gothenburg": "Sweden", + "graz": "Austria", + "lyon": "France", + "murcia": "Spain", + "parma": "Italy", + }, +) + +null_country = df["country"].isnull() +df.loc[null_country, "country"] = df.loc[null_country, "city"].map(dict_cities) + +df["country"].value_counts(dropna=False) + + +def f(x): + if x == 1: + return "single" + else: + return "multiple" + +print('df["Attendees"].apply(f)\n') + +display(df["Attendees"].apply(f)) diff --git a/content/solutions/05_36.py b/content/solutions/05_36.py index 82e7848..75ac002 100644 --- a/content/solutions/05_36.py +++ b/content/solutions/05_36.py @@ -1 +1,5 @@ -languages = pd.read_csv("../data/food_training/languages.csv") \ No newline at end of file +import pandas as pd + +languages = pd.read_csv("../data/food_training/languages.csv") + +print('languages = pd.read_csv("../data/food_training/languages.csv")') \ No newline at end of file diff --git a/content/solutions/05_37.py b/content/solutions/05_37.py index f712fd9..3277a91 100644 --- a/content/solutions/05_37.py +++ b/content/solutions/05_37.py @@ -1 +1,68 @@ -df = df.merge(languages, how="left", left_on="country", right_on="Country") \ No newline at end of file +import pandas as pd + +df_2014 = pd.read_csv("../data/food_training/training_2014.csv", header=1) +df_2015 = pd.read_csv("../data/food_training/training_2015.csv", header=1) +df_2016 = pd.read_csv("../data/food_training/training_2016.csv", header=1) + +frames = [df_2014, df_2015, df_2016] +df = pd.concat(frames) + +df = df.reset_index() +df.index + +cols_to_remove = ["Unnamed: 5", "Unnamed: 6"] +df = df.drop(cols_to_remove, axis=1) + +df[["city", "country"]] = df["Location"].str.split(pat=";", expand=True) + +df = df.drop("Location", axis=1) + +df["city"] = df["city"].str.lower() + +df["city"] = df["city"].str.replace(r"/\w*", "", regex=True) + +dict_capitals = { + "Denmark": "copenhague", + "France": "paris", + "Italy": "rome", + "Spain": "madrid", + "United Kingdom": "london", +} + +unknown_city = df["city"] == "unknown" +df.loc[unknown_city, "city"] = df.loc[unknown_city, "country"].map(dict_capitals) + +dict_cities = df.loc[df['country'].notnull(), ['city', 'country']].set_index('city').to_dict()['country'] + +dict_cities.update( + { + "bristol": "United Kingdom", + "gothenburg": "Sweden", + "graz": "Austria", + "lyon": "France", + "murcia": "Spain", + "parma": "Italy", + }, +) + +null_country = df["country"].isnull() +df.loc[null_country, "country"] = df.loc[null_country, "city"].map(dict_cities) + +df["country"].value_counts(dropna=False) + + +def f(x): + if x == 1: + return "single" + else: + return "multiple" + +df["Attendees"].apply(f) + + +languages = pd.read_csv("../data/food_training/languages.csv") + +df = df.merge(languages, how="left", left_on="country", right_on="Country") + +print('df = df.merge(languages, how="left", left_on="country", right_on="Country")\n') +display(df) \ No newline at end of file diff --git a/content/solutions/05_38.py b/content/solutions/05_38.py index f2614a1..4c27890 100644 --- a/content/solutions/05_38.py +++ b/content/solutions/05_38.py @@ -1,5 +1,75 @@ +import pandas as pd + +df_2014 = pd.read_csv("../data/food_training/training_2014.csv", header=1) +df_2015 = pd.read_csv("../data/food_training/training_2015.csv", header=1) +df_2016 = pd.read_csv("../data/food_training/training_2016.csv", header=1) + +frames = [df_2014, df_2015, df_2016] +df = pd.concat(frames) + +df = df.reset_index() +df.index + +cols_to_remove = ["Unnamed: 5", "Unnamed: 6"] +df = df.drop(cols_to_remove, axis=1) + +df[["city", "country"]] = df["Location"].str.split(pat=";", expand=True) + +df = df.drop("Location", axis=1) + +df["city"] = df["city"].str.lower() + +df["city"] = df["city"].str.replace(r"/\w*", "", regex=True) + +dict_capitals = { + "Denmark": "copenhague", + "France": "paris", + "Italy": "rome", + "Spain": "madrid", + "United Kingdom": "london", +} + +unknown_city = df["city"] == "unknown" +df.loc[unknown_city, "city"] = df.loc[unknown_city, "country"].map(dict_capitals) + +dict_cities = df.loc[df['country'].notnull(), ['city', 'country']].set_index('city').to_dict()['country'] + +dict_cities.update( + { + "bristol": "United Kingdom", + "gothenburg": "Sweden", + "graz": "Austria", + "lyon": "France", + "murcia": "Spain", + "parma": "Italy", + }, +) + +null_country = df["country"].isnull() +df.loc[null_country, "country"] = df.loc[null_country, "city"].map(dict_cities) + +df["country"].value_counts(dropna=False) + + +def f(x): + if x == 1: + return "single" + else: + return "multiple" + +df["Attendees"].apply(f) + + +languages = pd.read_csv("../data/food_training/languages.csv") + +df = df.merge(languages, how="left", left_on="country", right_on="Country") + df = df.drop("Country", axis=1) +print('df = df.drop("Country", axis=1)') + +display(df) + # N.B. You can only run this cell once! If you try run it again, it will throw an error! # Why? Because if you drop the Country column, it will be removed...so you can't # drop it a second time as the column isn't there to drop! \ No newline at end of file diff --git a/content/solutions/05_39.py b/content/solutions/05_39.py index d9d3e45..fc708eb 100644 --- a/content/solutions/05_39.py +++ b/content/solutions/05_39.py @@ -1 +1,71 @@ -df["DateFrom"].dtype \ No newline at end of file +import pandas as pd + +df_2014 = pd.read_csv("../data/food_training/training_2014.csv", header=1) +df_2015 = pd.read_csv("../data/food_training/training_2015.csv", header=1) +df_2016 = pd.read_csv("../data/food_training/training_2016.csv", header=1) + +frames = [df_2014, df_2015, df_2016] +df = pd.concat(frames) + +df = df.reset_index() +df.index + +cols_to_remove = ["Unnamed: 5", "Unnamed: 6"] +df = df.drop(cols_to_remove, axis=1) + +df[["city", "country"]] = df["Location"].str.split(pat=";", expand=True) + +df = df.drop("Location", axis=1) + +df["city"] = df["city"].str.lower() + +df["city"] = df["city"].str.replace(r"/\w*", "", regex=True) + +dict_capitals = { + "Denmark": "copenhague", + "France": "paris", + "Italy": "rome", + "Spain": "madrid", + "United Kingdom": "london", +} + +unknown_city = df["city"] == "unknown" +df.loc[unknown_city, "city"] = df.loc[unknown_city, "country"].map(dict_capitals) + +dict_cities = df.loc[df['country'].notnull(), ['city', 'country']].set_index('city').to_dict()['country'] + +dict_cities.update( + { + "bristol": "United Kingdom", + "gothenburg": "Sweden", + "graz": "Austria", + "lyon": "France", + "murcia": "Spain", + "parma": "Italy", + }, +) + +null_country = df["country"].isnull() +df.loc[null_country, "country"] = df.loc[null_country, "city"].map(dict_cities) + +df["country"].value_counts(dropna=False) + + +def f(x): + if x == 1: + return "single" + else: + return "multiple" + +df["Attendees"].apply(f) + + +languages = pd.read_csv("../data/food_training/languages.csv") + +df = df.merge(languages, how="left", left_on="country", right_on="Country") + +df = df.drop("Country", axis=1) + +print('df["DateFrom"].dtype)\n') + +display(df["DateFrom"].dtype) \ No newline at end of file diff --git a/content/solutions/05_40.py b/content/solutions/05_40.py index eef6325..d0d8bfa 100644 --- a/content/solutions/05_40.py +++ b/content/solutions/05_40.py @@ -1,2 +1,73 @@ +import pandas as pd + +df_2014 = pd.read_csv("../data/food_training/training_2014.csv", header=1) +df_2015 = pd.read_csv("../data/food_training/training_2015.csv", header=1) +df_2016 = pd.read_csv("../data/food_training/training_2016.csv", header=1) + +frames = [df_2014, df_2015, df_2016] +df = pd.concat(frames) + +df = df.reset_index() +df.index + +cols_to_remove = ["Unnamed: 5", "Unnamed: 6"] +df = df.drop(cols_to_remove, axis=1) + +df[["city", "country"]] = df["Location"].str.split(pat=";", expand=True) + +df = df.drop("Location", axis=1) + +df["city"] = df["city"].str.lower() + +df["city"] = df["city"].str.replace(r"/\w*", "", regex=True) + +dict_capitals = { + "Denmark": "copenhague", + "France": "paris", + "Italy": "rome", + "Spain": "madrid", + "United Kingdom": "london", +} + +unknown_city = df["city"] == "unknown" +df.loc[unknown_city, "city"] = df.loc[unknown_city, "country"].map(dict_capitals) + +dict_cities = df.loc[df['country'].notnull(), ['city', 'country']].set_index('city').to_dict()['country'] + +dict_cities.update( + { + "bristol": "United Kingdom", + "gothenburg": "Sweden", + "graz": "Austria", + "lyon": "France", + "murcia": "Spain", + "parma": "Italy", + }, +) + +null_country = df["country"].isnull() +df.loc[null_country, "country"] = df.loc[null_country, "city"].map(dict_cities) + +df["country"].value_counts(dropna=False) + + +def f(x): + if x == 1: + return "single" + else: + return "multiple" + +df["Attendees"].apply(f) + + +languages = pd.read_csv("../data/food_training/languages.csv") + +df = df.merge(languages, how="left", left_on="country", right_on="Country") + +df = df.drop("Country", axis=1) + +print('df["DateFrom"] = pd.to_datetime(df["DateFrom"], format="%Y-%m-%d")\n') +print('df["DateTo"] = pd.to_datetime(df["DateTo"], format="%Y-%m-%d")') + df["DateFrom"] = pd.to_datetime(df["DateFrom"], format="%Y-%m-%d") df["DateTo"] = pd.to_datetime(df["DateTo"], format="%Y-%m-%d") \ No newline at end of file diff --git a/content/solutions/05_41.py b/content/solutions/05_41.py index 3a34721..213b28e 100644 --- a/content/solutions/05_41.py +++ b/content/solutions/05_41.py @@ -1 +1,73 @@ -df[df["DateFrom"] > "2017-02-01"] \ No newline at end of file +import pandas as pd + +df_2014 = pd.read_csv("../data/food_training/training_2014.csv", header=1) +df_2015 = pd.read_csv("../data/food_training/training_2015.csv", header=1) +df_2016 = pd.read_csv("../data/food_training/training_2016.csv", header=1) + +frames = [df_2014, df_2015, df_2016] +df = pd.concat(frames) + +df = df.reset_index() +df.index + +cols_to_remove = ["Unnamed: 5", "Unnamed: 6"] +df = df.drop(cols_to_remove, axis=1) + +df[["city", "country"]] = df["Location"].str.split(pat=";", expand=True) + +df = df.drop("Location", axis=1) + +df["city"] = df["city"].str.lower() + +df["city"] = df["city"].str.replace(r"/\w*", "", regex=True) + +dict_capitals = { + "Denmark": "copenhague", + "France": "paris", + "Italy": "rome", + "Spain": "madrid", + "United Kingdom": "london", +} + +unknown_city = df["city"] == "unknown" +df.loc[unknown_city, "city"] = df.loc[unknown_city, "country"].map(dict_capitals) + +dict_cities = df.loc[df['country'].notnull(), ['city', 'country']].set_index('city').to_dict()['country'] + +dict_cities.update( + { + "bristol": "United Kingdom", + "gothenburg": "Sweden", + "graz": "Austria", + "lyon": "France", + "murcia": "Spain", + "parma": "Italy", + }, +) + +null_country = df["country"].isnull() +df.loc[null_country, "country"] = df.loc[null_country, "city"].map(dict_cities) + +df["country"].value_counts(dropna=False) + + +def f(x): + if x == 1: + return "single" + else: + return "multiple" + +df["Attendees"].apply(f) + + +languages = pd.read_csv("../data/food_training/languages.csv") + +df = df.merge(languages, how="left", left_on="country", right_on="Country") + +df = df.drop("Country", axis=1) + +df["DateFrom"] = pd.to_datetime(df["DateFrom"], format="%Y-%m-%d") +df["DateTo"] = pd.to_datetime(df["DateTo"], format="%Y-%m-%d") + +print('df[df["DateFrom"] > "2017-02-01"]\n') +display(df[df["DateFrom"] > "2017-02-01"]) diff --git a/content/solutions/05_42.py b/content/solutions/05_42.py index 73aa5d3..4341063 100644 --- a/content/solutions/05_42.py +++ b/content/solutions/05_42.py @@ -1 +1,75 @@ -df["duration"] = df["DateTo"] - df["DateFrom"] + datetime.timedelta(days=1) \ No newline at end of file +import pandas as pd +import datetime + +df_2014 = pd.read_csv("../data/food_training/training_2014.csv", header=1) +df_2015 = pd.read_csv("../data/food_training/training_2015.csv", header=1) +df_2016 = pd.read_csv("../data/food_training/training_2016.csv", header=1) + +frames = [df_2014, df_2015, df_2016] +df = pd.concat(frames) + +df = df.reset_index() +df.index + +cols_to_remove = ["Unnamed: 5", "Unnamed: 6"] +df = df.drop(cols_to_remove, axis=1) + +df[["city", "country"]] = df["Location"].str.split(pat=";", expand=True) + +df = df.drop("Location", axis=1) + +df["city"] = df["city"].str.lower() + +df["city"] = df["city"].str.replace(r"/\w*", "", regex=True) + +dict_capitals = { + "Denmark": "copenhague", + "France": "paris", + "Italy": "rome", + "Spain": "madrid", + "United Kingdom": "london", +} + +unknown_city = df["city"] == "unknown" +df.loc[unknown_city, "city"] = df.loc[unknown_city, "country"].map(dict_capitals) + +dict_cities = df.loc[df['country'].notnull(), ['city', 'country']].set_index('city').to_dict()['country'] + +dict_cities.update( + { + "bristol": "United Kingdom", + "gothenburg": "Sweden", + "graz": "Austria", + "lyon": "France", + "murcia": "Spain", + "parma": "Italy", + }, +) + +null_country = df["country"].isnull() +df.loc[null_country, "country"] = df.loc[null_country, "city"].map(dict_cities) + +df["country"].value_counts(dropna=False) + + +def f(x): + if x == 1: + return "single" + else: + return "multiple" + +df["Attendees"].apply(f) + + +languages = pd.read_csv("../data/food_training/languages.csv") + +df = df.merge(languages, how="left", left_on="country", right_on="Country") + +df = df.drop("Country", axis=1) + +df["DateFrom"] = pd.to_datetime(df["DateFrom"], format="%Y-%m-%d") +df["DateTo"] = pd.to_datetime(df["DateTo"], format="%Y-%m-%d") + +print('df["duration"] = df["DateTo"] - df["DateFrom"] + datetime.timedelta(days=1)\n') +df["duration"] = df["DateTo"] - df["DateFrom"] + datetime.timedelta(days=1) +display(df.head()) \ No newline at end of file diff --git a/content/solutions/05_43.py b/content/solutions/05_43.py index 49c09b5..58cf6a7 100644 --- a/content/solutions/05_43.py +++ b/content/solutions/05_43.py @@ -1,2 +1,79 @@ +import pandas as pd +import datetime + +df_2014 = pd.read_csv("../data/food_training/training_2014.csv", header=1) +df_2015 = pd.read_csv("../data/food_training/training_2015.csv", header=1) +df_2016 = pd.read_csv("../data/food_training/training_2016.csv", header=1) + +frames = [df_2014, df_2015, df_2016] +df = pd.concat(frames) + +df = df.reset_index() +df.index + +cols_to_remove = ["Unnamed: 5", "Unnamed: 6"] +df = df.drop(cols_to_remove, axis=1) + +df[["city", "country"]] = df["Location"].str.split(pat=";", expand=True) + +df = df.drop("Location", axis=1) + +df["city"] = df["city"].str.lower() + +df["city"] = df["city"].str.replace(r"/\w*", "", regex=True) + +dict_capitals = { + "Denmark": "copenhague", + "France": "paris", + "Italy": "rome", + "Spain": "madrid", + "United Kingdom": "london", +} + +unknown_city = df["city"] == "unknown" +df.loc[unknown_city, "city"] = df.loc[unknown_city, "country"].map(dict_capitals) + +dict_cities = df.loc[df['country'].notnull(), ['city', 'country']].set_index('city').to_dict()['country'] + +dict_cities.update( + { + "bristol": "United Kingdom", + "gothenburg": "Sweden", + "graz": "Austria", + "lyon": "France", + "murcia": "Spain", + "parma": "Italy", + }, +) + +null_country = df["country"].isnull() +df.loc[null_country, "country"] = df.loc[null_country, "city"].map(dict_cities) + +df["country"].value_counts(dropna=False) + + +def f(x): + if x == 1: + return "single" + else: + return "multiple" + +df["Attendees"].apply(f) + +languages = pd.read_csv("../data/food_training/languages.csv") + +df = df.merge(languages, how="left", left_on="country", right_on="Country") + +df = df.drop("Country", axis=1) + +df["DateFrom"] = pd.to_datetime(df["DateFrom"], format="%Y-%m-%d") +df["DateTo"] = pd.to_datetime(df["DateTo"], format="%Y-%m-%d") + +df["duration"] = df["DateTo"] - df["DateFrom"] + datetime.timedelta(days=1) + +print('df["month"] = df["DateFrom"].dt.month\n') +print('df["month"].hist()\n') + df["month"] = df["DateFrom"].dt.month -df["month"].hist() \ No newline at end of file +# display(df) +display(df["month"].hist()) \ No newline at end of file diff --git a/content/solutions/05_44.py b/content/solutions/05_44.py index bad939c..1dedd50 100644 --- a/content/solutions/05_44.py +++ b/content/solutions/05_44.py @@ -1 +1,69 @@ -df.sort_values("city") \ No newline at end of file +import pandas as pd +import datetime + +df_2014 = pd.read_csv("../data/food_training/training_2014.csv", header=1) +df_2015 = pd.read_csv("../data/food_training/training_2015.csv", header=1) +df_2016 = pd.read_csv("../data/food_training/training_2016.csv", header=1) + +frames = [df_2014, df_2015, df_2016] +df = pd.concat(frames) + +df = df.reset_index() +df.index + +cols_to_remove = ["Unnamed: 5", "Unnamed: 6"] +df = df.drop(cols_to_remove, axis=1) + +df[["city", "country"]] = df["Location"].str.split(pat=";", expand=True) + +df = df.drop("Location", axis=1) + +df["city"] = df["city"].str.lower() + +df["city"] = df["city"].str.replace(r"/\w*", "", regex=True) + +dict_capitals = { + "Denmark": "copenhague", + "France": "paris", + "Italy": "rome", + "Spain": "madrid", + "United Kingdom": "london", +} + +unknown_city = df["city"] == "unknown" +df.loc[unknown_city, "city"] = df.loc[unknown_city, "country"].map(dict_capitals) + +dict_cities = df.loc[df['country'].notnull(), ['city', 'country']].set_index('city').to_dict()['country'] + +dict_cities.update( + { + "bristol": "United Kingdom", + "gothenburg": "Sweden", + "graz": "Austria", + "lyon": "France", + "murcia": "Spain", + "parma": "Italy", + }, +) + +null_country = df["country"].isnull() +df.loc[null_country, "country"] = df.loc[null_country, "city"].map(dict_cities) + +df["country"].value_counts(dropna=False) + + +def f(x): + if x == 1: + return "single" + else: + return "multiple" + +df["Attendees"].apply(f) + +languages = pd.read_csv("../data/food_training/languages.csv") + +df = df.merge(languages, how="left", left_on="country", right_on="Country") + +print('df.sort_values("city")\n') + +display(df.sort_values("city")) \ No newline at end of file diff --git a/content/solutions/05_45.py b/content/solutions/05_45.py index 3491680..b1bb085 100644 --- a/content/solutions/05_45.py +++ b/content/solutions/05_45.py @@ -1 +1,77 @@ -df.sort_values(["duration", "Attendees"], ascending=[True, False]) \ No newline at end of file +import pandas as pd +import datetime + +df_2014 = pd.read_csv("../data/food_training/training_2014.csv", header=1) +df_2015 = pd.read_csv("../data/food_training/training_2015.csv", header=1) +df_2016 = pd.read_csv("../data/food_training/training_2016.csv", header=1) + +frames = [df_2014, df_2015, df_2016] +df = pd.concat(frames) + +df = df.reset_index() +df.index + +cols_to_remove = ["Unnamed: 5", "Unnamed: 6"] +df = df.drop(cols_to_remove, axis=1) + +df[["city", "country"]] = df["Location"].str.split(pat=";", expand=True) + +df = df.drop("Location", axis=1) + +df["city"] = df["city"].str.lower() + +df["city"] = df["city"].str.replace(r"/\w*", "", regex=True) + +dict_capitals = { + "Denmark": "copenhague", + "France": "paris", + "Italy": "rome", + "Spain": "madrid", + "United Kingdom": "london", +} + +unknown_city = df["city"] == "unknown" +df.loc[unknown_city, "city"] = df.loc[unknown_city, "country"].map(dict_capitals) + +dict_cities = df.loc[df['country'].notnull(), ['city', 'country']].set_index('city').to_dict()['country'] + +dict_cities.update( + { + "bristol": "United Kingdom", + "gothenburg": "Sweden", + "graz": "Austria", + "lyon": "France", + "murcia": "Spain", + "parma": "Italy", + }, +) + +null_country = df["country"].isnull() +df.loc[null_country, "country"] = df.loc[null_country, "city"].map(dict_cities) + +df["country"].value_counts(dropna=False) + + +def f(x): + if x == 1: + return "single" + else: + return "multiple" + +df["Attendees"].apply(f) + +languages = pd.read_csv("../data/food_training/languages.csv") + +df = df.merge(languages, how="left", left_on="country", right_on="Country") + +df = df.drop("Country", axis=1) + +df["DateFrom"] = pd.to_datetime(df["DateFrom"], format="%Y-%m-%d") +df["DateTo"] = pd.to_datetime(df["DateTo"], format="%Y-%m-%d") + +df["duration"] = df["DateTo"] - df["DateFrom"] + datetime.timedelta(days=1) + +df["month"] = df["DateFrom"].dt.month + +print('df.sort_values(["duration", "Attendees"], ascending=[True, False])') +display(df.sort_values(["duration", "Attendees"], ascending=[True, False])) \ No newline at end of file diff --git a/content/solutions/05_46.py b/content/solutions/05_46.py index 67dd12e..f3856a6 100644 --- a/content/solutions/05_46.py +++ b/content/solutions/05_46.py @@ -1 +1,79 @@ -df_gr = df.groupby("city") \ No newline at end of file +import pandas as pd +import datetime + +df_2014 = pd.read_csv("../data/food_training/training_2014.csv", header=1) +df_2015 = pd.read_csv("../data/food_training/training_2015.csv", header=1) +df_2016 = pd.read_csv("../data/food_training/training_2016.csv", header=1) + +frames = [df_2014, df_2015, df_2016] +df = pd.concat(frames) + +df = df.reset_index() +df.index + +cols_to_remove = ["Unnamed: 5", "Unnamed: 6"] +df = df.drop(cols_to_remove, axis=1) + +df[["city", "country"]] = df["Location"].str.split(pat=";", expand=True) + +df = df.drop("Location", axis=1) + +df["city"] = df["city"].str.lower() + +df["city"] = df["city"].str.replace(r"/\w*", "", regex=True) + +dict_capitals = { + "Denmark": "copenhague", + "France": "paris", + "Italy": "rome", + "Spain": "madrid", + "United Kingdom": "london", +} + +unknown_city = df["city"] == "unknown" +df.loc[unknown_city, "city"] = df.loc[unknown_city, "country"].map(dict_capitals) + +dict_cities = df.loc[df['country'].notnull(), ['city', 'country']].set_index('city').to_dict()['country'] + +dict_cities.update( + { + "bristol": "United Kingdom", + "gothenburg": "Sweden", + "graz": "Austria", + "lyon": "France", + "murcia": "Spain", + "parma": "Italy", + }, +) + +null_country = df["country"].isnull() +df.loc[null_country, "country"] = df.loc[null_country, "city"].map(dict_cities) + +df["country"].value_counts(dropna=False) + + +def f(x): + if x == 1: + return "single" + else: + return "multiple" + +df["Attendees"].apply(f) + +languages = pd.read_csv("../data/food_training/languages.csv") + +df = df.merge(languages, how="left", left_on="country", right_on="Country") + +df = df.drop("Country", axis=1) + +df["DateFrom"] = pd.to_datetime(df["DateFrom"], format="%Y-%m-%d") +df["DateTo"] = pd.to_datetime(df["DateTo"], format="%Y-%m-%d") + +df["duration"] = df["DateTo"] - df["DateFrom"] + datetime.timedelta(days=1) + +df["month"] = df["DateFrom"].dt.month +df.sort_values(["duration", "Attendees"], ascending=[True, False]) + +print('df_gr = df.groupby("city")') +df_gr = df.groupby("city") + diff --git a/content/solutions/05_47.py b/content/solutions/05_47.py index c7520eb..21e7bb5 100644 --- a/content/solutions/05_47.py +++ b/content/solutions/05_47.py @@ -1 +1,80 @@ -df_gr["Attendees"].mean() \ No newline at end of file +import pandas as pd +import datetime + +df_2014 = pd.read_csv("../data/food_training/training_2014.csv", header=1) +df_2015 = pd.read_csv("../data/food_training/training_2015.csv", header=1) +df_2016 = pd.read_csv("../data/food_training/training_2016.csv", header=1) + +frames = [df_2014, df_2015, df_2016] +df = pd.concat(frames) + +df = df.reset_index() +df.index + +cols_to_remove = ["Unnamed: 5", "Unnamed: 6"] +df = df.drop(cols_to_remove, axis=1) + +df[["city", "country"]] = df["Location"].str.split(pat=";", expand=True) + +df = df.drop("Location", axis=1) + +df["city"] = df["city"].str.lower() + +df["city"] = df["city"].str.replace(r"/\w*", "", regex=True) + +dict_capitals = { + "Denmark": "copenhague", + "France": "paris", + "Italy": "rome", + "Spain": "madrid", + "United Kingdom": "london", +} + +unknown_city = df["city"] == "unknown" +df.loc[unknown_city, "city"] = df.loc[unknown_city, "country"].map(dict_capitals) + +dict_cities = df.loc[df['country'].notnull(), ['city', 'country']].set_index('city').to_dict()['country'] + +dict_cities.update( + { + "bristol": "United Kingdom", + "gothenburg": "Sweden", + "graz": "Austria", + "lyon": "France", + "murcia": "Spain", + "parma": "Italy", + }, +) + +null_country = df["country"].isnull() +df.loc[null_country, "country"] = df.loc[null_country, "city"].map(dict_cities) + +df["country"].value_counts(dropna=False) + + +def f(x): + if x == 1: + return "single" + else: + return "multiple" + +df["Attendees"].apply(f) + +languages = pd.read_csv("../data/food_training/languages.csv") + +df = df.merge(languages, how="left", left_on="country", right_on="Country") + +df = df.drop("Country", axis=1) + +df["DateFrom"] = pd.to_datetime(df["DateFrom"], format="%Y-%m-%d") +df["DateTo"] = pd.to_datetime(df["DateTo"], format="%Y-%m-%d") + +df["duration"] = df["DateTo"] - df["DateFrom"] + datetime.timedelta(days=1) + +df["month"] = df["DateFrom"].dt.month +df.sort_values(["duration", "Attendees"], ascending=[True, False]) + +df_gr = df.groupby("city") + +print('df_gr["Attendees"].mean()') +display(df_gr["Attendees"].mean()) \ No newline at end of file