diff --git a/colombia/datasets.py b/colombia/datasets.py index 6f61d63..3237e4c 100644 --- a/colombia/datasets.py +++ b/colombia/datasets.py @@ -320,6 +320,53 @@ def load_trade4digit_municipality(): } +def read_trade4digit_rcpy_country(): + df = pd.read_stata(prefix_path("Trade/exp_rcpy_rc_p4.dta")) + df["r"] = "COL" + return df + + +trade4digit_rcpy_country = { + "read_function": read_trade4digit_rcpy_country, + "field_mapping": { + "r": "location", + "ctry_dest": "country", + "p": "product", + "yr": "year", + "X_rcpy_d": "export_value", + "NP_rcpy": "export_num_plants" + }, + "classification_fields": { + "location": { + "classification": location_classification, + "level": "country" + }, + "product": { + "classification": product_classification, + "level": "4digit" + }, + "country": { + "classification": country_classification, + "level": "country" + }, + }, + "digit_padding": { + "country": 3, + "product": 4 + }, + "facet_fields": ["location", "country", "product", "year"], + "facets": { + ("country_id", "location_id", "year"): { + "export_value": sum_group, + "export_num_plants": sum_group + }, + ("country_id", "location_id", "product_id", "year"): { + "export_value": first, + "export_num_plants": first + } + } +} + trade4digit_rcpy_department = { "read_function": lambda: pd.read_stata(prefix_path("Trade/exp_rcpy_r2_p4.dta")), "field_mapping": { @@ -352,8 +399,8 @@ def load_trade4digit_municipality(): "facet_fields": ["location", "country", "product", "year"], "facets": { ("country_id", "location_id", "year"): { - "export_value": first, - "export_num_plants": first + "export_value": sum_group, + "export_num_plants": sum_group }, ("country_id", "location_id", "product_id", "year"): { "export_value": first, @@ -363,6 +410,43 @@ def load_trade4digit_municipality(): } +trade4digit_rcpy_msa = { + "read_function": lambda: pd.read_stata(prefix_path("Trade/exp_rcpy_ra_p4.dta")), + "field_mapping": { + "r": "location", + "ctry_dest": "country", + "p": "product", + "yr": "year", + "X_rcpy_d": "export_value", + "NP_rcpy": "export_num_plants" + }, + "classification_fields": { + "location": { + "classification": location_classification, + "level": "msa" + }, + "product": { + "classification": product_classification, + "level": "4digit" + }, + "country": { + "classification": country_classification, + "level": "country" + }, + }, + "digit_padding": { + "country": 3, + "product": 4 + }, + "facet_fields": ["location", "country", "product", "year"], + "facets": { + ("country_id", "location_id", "year"): { + "export_value": sum_group, + "export_num_plants": sum_group + } + } +} + trade4digit_rcpy_municipality = { "read_function": lambda: pd.read_stata(prefix_path("Trade/exp_rcpy_r5_p4.dta")), "field_mapping": { diff --git a/colombia/import.py b/colombia/import.py index f7d4a97..09777bf 100644 --- a/colombia/import.py +++ b/colombia/import.py @@ -10,7 +10,8 @@ industry4digit_msa, industry2digit_department, industry4digit_municipality, trade4digit_rcpy_municipality, - trade4digit_rcpy_department, population, + trade4digit_rcpy_department, trade4digit_rcpy_msa, + trade4digit_rcpy_country, population, gdp_nominal_department, gdp_real_department, occupation2digit, occupation2digit_industry2digit) @@ -130,6 +131,21 @@ df.to_sql("msa_year", db.engine, index=False, chunksize=10000, if_exists="append") + # Country - trade rcpy + ret = process_dataset(trade4digit_rcpy_country) + + df = ret[("country_id", "location_id", "year")].reset_index() + df.to_sql("country_country_year", db.engine, + index=False, chunksize=10000, if_exists="append") + + + # MSA - trade rcpy + ret = process_dataset(trade4digit_rcpy_msa) + + df = ret[("country_id", "location_id", "year")].reset_index() + df.to_sql("country_msa_year", db.engine, + index=False, chunksize=10000, if_exists="append") + # Municipality - trade rcpy ret = process_dataset(trade4digit_rcpy_municipality)