Skip to content

Commit

Permalink
Casting data types in table querist (#55)
Browse files Browse the repository at this point in the history
* Rewrite tests to include casted table dimensions

* Format code base

* Implement casting in table queries

* Re-run notebooks with tables in them

* Reformat table in README

* Make dtype checking explicit (3.8 bw compat)

* Use pandas tools to handle dtype checking...
  • Loading branch information
daffidwilde authored Dec 19, 2023
1 parent 9258428 commit 33836cc
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 61 deletions.
26 changes: 13 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,19 +59,19 @@ Here's a basic example of how to use the `CensusAPI` class to retrieve a table:
>>> # Submit the parameters to the table querist method
>>> table = api.query_table(population_type, area_type, dimensions)
>>> print(table)
ctry sex hh_deprivation_housing count population_type
0 E92000001 1 -8 0 UR_HH
1 E92000001 1 0 24993178 UR_HH
2 E92000001 1 1 3340293 UR_HH
3 E92000001 2 -8 0 UR_HH
4 E92000001 2 0 23890474 UR_HH
5 E92000001 2 1 3280355 UR_HH
6 W92000004 1 -8 0 UR_HH
7 W92000004 1 0 1457330 UR_HH
8 W92000004 1 1 100914 UR_HH
9 W92000004 2 -8 0 UR_HH
10 W92000004 2 0 1391731 UR_HH
11 W92000004 2 1 101574 UR_HH
ctry sex hh_deprivation_housing count population_type
0 E92000001 1 -8 0 UR_HH
1 E92000001 1 0 24993178 UR_HH
2 E92000001 1 1 3340293 UR_HH
3 E92000001 2 -8 0 UR_HH
4 E92000001 2 0 23890474 UR_HH
5 E92000001 2 1 3280355 UR_HH
6 W92000004 1 -8 0 UR_HH
7 W92000004 1 0 1457330 UR_HH
8 W92000004 1 1 100914 UR_HH
9 W92000004 2 -8 0 UR_HH
10 W92000004 2 0 1391731 UR_HH
11 W92000004 2 1 101574 UR_HH

```

Expand Down
74 changes: 37 additions & 37 deletions docs/how-to-guides/query-table.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -353,43 +353,43 @@
"</div>"
],
"text/plain": [
" ctry sex hh_tenure_9a count population_type\n",
"0 E92000001 1 -8 0 HRP\n",
"1 E92000001 1 0 3276595 HRP\n",
"2 E92000001 1 1 2178228 HRP\n",
"3 E92000001 1 2 109635 HRP\n",
"4 E92000001 1 3 1064598 HRP\n",
"5 E92000001 1 4 1108579 HRP\n",
"6 E92000001 1 5 1785514 HRP\n",
"7 E92000001 1 6 210267 HRP\n",
"8 E92000001 1 7 17429 HRP\n",
"9 E92000001 2 -8 0 HRP\n",
"10 E92000001 2 0 4348098 HRP\n",
"11 E92000001 2 1 4566144 HRP\n",
"12 E92000001 2 2 126316 HRP\n",
"13 E92000001 2 3 880554 HRP\n",
"14 E92000001 2 4 951932 HRP\n",
"15 E92000001 2 5 2488175 HRP\n",
"16 E92000001 2 6 310933 HRP\n",
"17 E92000001 2 7 13088 HRP\n",
"18 W92000004 1 -8 0 HRP\n",
"19 W92000004 1 0 219732 HRP\n",
"20 W92000004 1 1 129053 HRP\n",
"21 W92000004 1 2 2020 HRP\n",
"22 W92000004 1 3 66360 HRP\n",
"23 W92000004 1 4 56478 HRP\n",
"24 W92000004 1 5 87615 HRP\n",
"25 W92000004 1 6 14552 HRP\n",
"26 W92000004 1 7 1247 HRP\n",
"27 W92000004 2 -8 0 HRP\n",
"28 W92000004 2 0 292357 HRP\n",
"29 W92000004 2 1 248774 HRP\n",
"30 W92000004 2 2 2263 HRP\n",
"31 W92000004 2 3 50277 HRP\n",
"32 W92000004 2 4 48978 HRP\n",
"33 W92000004 2 5 107229 HRP\n",
"34 W92000004 2 6 19245 HRP\n",
"35 W92000004 2 7 933 HRP"
" ctry sex hh_tenure_9a count population_type\n",
"0 E92000001 1 -8 0 HRP\n",
"1 E92000001 1 0 3276595 HRP\n",
"2 E92000001 1 1 2178228 HRP\n",
"3 E92000001 1 2 109635 HRP\n",
"4 E92000001 1 3 1064598 HRP\n",
"5 E92000001 1 4 1108579 HRP\n",
"6 E92000001 1 5 1785514 HRP\n",
"7 E92000001 1 6 210267 HRP\n",
"8 E92000001 1 7 17429 HRP\n",
"9 E92000001 2 -8 0 HRP\n",
"10 E92000001 2 0 4348098 HRP\n",
"11 E92000001 2 1 4566144 HRP\n",
"12 E92000001 2 2 126316 HRP\n",
"13 E92000001 2 3 880554 HRP\n",
"14 E92000001 2 4 951932 HRP\n",
"15 E92000001 2 5 2488175 HRP\n",
"16 E92000001 2 6 310933 HRP\n",
"17 E92000001 2 7 13088 HRP\n",
"18 W92000004 1 -8 0 HRP\n",
"19 W92000004 1 0 219732 HRP\n",
"20 W92000004 1 1 129053 HRP\n",
"21 W92000004 1 2 2020 HRP\n",
"22 W92000004 1 3 66360 HRP\n",
"23 W92000004 1 4 56478 HRP\n",
"24 W92000004 1 5 87615 HRP\n",
"25 W92000004 1 6 14552 HRP\n",
"26 W92000004 1 7 1247 HRP\n",
"27 W92000004 2 -8 0 HRP\n",
"28 W92000004 2 0 292357 HRP\n",
"29 W92000004 2 1 248774 HRP\n",
"30 W92000004 2 2 2263 HRP\n",
"31 W92000004 2 3 50277 HRP\n",
"32 W92000004 2 4 48978 HRP\n",
"33 W92000004 2 5 107229 HRP\n",
"34 W92000004 2 6 19245 HRP\n",
"35 W92000004 2 7 933 HRP"
]
},
"execution_count": 1,
Expand Down
8 changes: 4 additions & 4 deletions docs/tutorials/getting-started.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 2,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -180,7 +180,7 @@
"25 UR_HH "
]
},
"execution_count": 9,
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -207,7 +207,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 3,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -369,7 +369,7 @@
"11 79227 UR_HH "
]
},
"execution_count": 6,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
Expand Down
9 changes: 6 additions & 3 deletions src/census21api/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,10 +156,13 @@ def query_table(
table_json["observations"], use_id
)
columns = (area_type, *dimensions, "count")
data = pd.DataFrame(records, columns=columns)
data["population_type"] = population_type
table = pd.DataFrame(records, columns=columns)
table["population_type"] = population_type

return data
if use_id:
table = table.astype({dim: int for dim in dimensions})

return table

def _get_population_types(self) -> Set[str]:
"""
Expand Down
4 changes: 2 additions & 2 deletions tests/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ def st_records_and_queries(draw, max_nrows=10):
for _ in range(nrows):
record = (
draw(st.text()),
*(draw(st.text()) for _ in dimensions),
draw(st.integers()),
*(str(draw(st.integers(-1, 10))) for _ in dimensions),
draw(st.integers(0, 1000)),
)
records.append(record)

Expand Down
17 changes: 15 additions & 2 deletions tests/test_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,11 +172,24 @@ def test_query_table_valid(records_and_query, use_id):
assert len(data) == len(records)

expected_columns = [area_type, *dimensions, "count", "population_type"]
assert data.columns.to_list() == expected_columns
assert all(data.columns == expected_columns)
assert (data["population_type"] == population_type).all()

if use_id:
assert all(data.select_dtypes("int").columns == [*dimensions, "count"])
assert all(
data.select_dtypes("object").columns
== [area_type, "population_type"]
)
else:
assert all(data.select_dtypes("int").columns == ["count"])
assert all(
data.select_dtypes("object").columns
== [area_type, *dimensions, "population_type"]
)

for i, row in data.drop("population_type", axis=1).iterrows():
assert tuple(row) == records[i]
assert (*map(str, row[:-1]), row[-1]) == records[i]

querist.assert_called_once_with(population_type, area_type, dimensions)
extract.assert_called_once_with("foo", use_id)
Expand Down

0 comments on commit 33836cc

Please sign in to comment.