Skip to content

Commit

Permalink
All industry data now uses neave's new data files
Browse files Browse the repository at this point in the history
  • Loading branch information
makmanalp committed Aug 10, 2015
1 parent dea6c90 commit e120010
Showing 1 changed file with 39 additions and 37 deletions.
76 changes: 39 additions & 37 deletions colombia/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,26 +188,21 @@ def prefix_path(to_prefix):
}


pila_to_atlas = {
"r": "department",
"i": "industry",
"year": "year",
"E_yir": "employment",
"W_yir": "wages",
"rca": "rca",
"density": "density",
"cog": "cog",
"coi": "coi",
"pci": "complexity"
}

pila_to_atlas_muni = dict(pila_to_atlas.items())
pila_to_atlas_muni["r"] = "municipality"

industry4digit_department = {
"read_function": lambda: pd.read_stata(prefix_path("Atlas/Colombia/beta/PILA_andres/COL_PILA_ecomp-E_yir_2008-2012_rev3_dpto.dta")),
"field_mapping": pila_to_atlas,
"hook_pre_merge": lambda df: df[df.industry != "."],
"read_function": lambda: pd.read_hdf(prefix_path("Atlas/Colombia/beta/Industries/industries_state.hdf"), "data"),
"hook_pre_merge": lambda df: df.drop_duplicates(["department", "industry", "year"]),
"field_mapping": {
"state_code": "department",
"p_code": "industry",
"year": "year",
"state_p_emp": "employment",
"state_p_wage": "wages",
"state_p_wagemonth": "monthly_wages",
"state_p_rca": "rca",
"state_p_distance_ps_pred": "density",
"state_p_cog_ps_pred1": "cog",
"all_p_pci": "complexity"
},
"classification_fields": {
"department": {
"classification": location_classification,
Expand Down Expand Up @@ -238,16 +233,22 @@ def prefix_path(to_prefix):
"wages": first,
"density": first,
"cog": first,
"coi": first,
"rca": first
}
}
}

industry4digit_municipality = {
"read_function": lambda: pd.read_stata(prefix_path("Atlas/Colombia/beta/PILA_andres/COL_PILA_ecomp-E_yir_2008-2012_rev3_mun.dta")),
"field_mapping": pila_to_atlas_muni,
"hook_pre_merge": lambda df: df[df.industry != "."],
"read_function": lambda: pd.read_hdf(prefix_path("Atlas/Colombia/beta/Industries/industries_muni.hdf"), "data"),
"hook_pre_merge": lambda df: df.drop_duplicates(["municipality", "industry", "year"]),
"field_mapping": {
"muni_code": "municipality",
"p_code": "industry",
"year": "year",
"muni_p_emp": "employment",
"muni_p_wage": "wages",
"muni_p_wagemonth": "monthly_wages",
},
"classification_fields": {
"municipality": {
"classification": location_classification,
Expand All @@ -264,16 +265,10 @@ def prefix_path(to_prefix):
},
"facet_fields": ["municipality", "industry", "year"],
"facets": {
("industry_id", "year"): {
"complexity": first
},
("municipality_id", "industry_id", "year"): {
"employment": first,
"wages": first,
"density": first,
"cog": first,
"coi": first,
"rca": first
#"monthly_wages": first,
}
}
}
Expand Down Expand Up @@ -333,16 +328,19 @@ def prefix_path(to_prefix):


industry2digit_department = {
"read_function": lambda: pd.read_stata(prefix_path("Atlas/Colombia/beta/Industries/output2008-2013_d2industrydescriptives.dta")),
"hook_pre_merge": lambda df: df[df.industry != ""].drop_duplicates(["department", "industry", "year"]),
"read_function": lambda: pd.read_hdf(prefix_path("Atlas/Colombia/beta/Industries/industries_state.hdf"), "data"),
"hook_pre_merge": lambda df: df.drop_duplicates(["department", "industry", "year"]),
"field_mapping": {

"state_code": "department",
"d2_code": "industry",
"d3_code": "industry",
"year": "year",
"state_d2_establisments": "num_establishments",
"state_d2_annualwages": "wages",
"state_d2_employment": "employment"
"state_d3_est": "num_establishments",
"state_d3_wage": "wages",
"state_d3_emp": "employment",
#"state_d3_rca": "rca",
"state_d3_distance_ps_pred1": "density",
"state_d3_cog_ps_pred1": "cog",
"all_d3_pci": "complexity"
},
"classification_fields": {
"department": {
Expand All @@ -364,12 +362,16 @@ def prefix_path(to_prefix):
"wages": sumGroup,
"employment": sumGroup,
"num_establishments": sumGroup,
"complexity": first
},

("department_id", "industry_id", "year"): {
"wages": first,
"employment": first,
"num_establishments": first,
"density": first,
"cog": first,
#"rca": first
}
}
}

0 comments on commit e120010

Please sign in to comment.