From f7c3e28d6837c3ea1e1944a04f8ea7d8f0f5811f Mon Sep 17 00:00:00 2001 From: Henry Wilde Date: Wed, 13 Dec 2023 13:33:23 +0000 Subject: [PATCH] Docs: how-to guides and clearer writing (#49) * Fix link to blocked combo issue in README * Move API reference material within reference sect * Expand tutorials spiel * Start writing how-to guides * Expand population querist how-to * Include how-to guides in top-level site * Sort population type querist output * Run population how-to with sorted outputs * Reset index when sorting in population querist * Remove QA document So much of it is irrelevant to the work. * Write how-to on querying features * Reset indices on all metadata querists * Rerun population type how-to * Fix the category querist (#52) * Migrate tests to area type category querist The old implementation only serves area type category queries. * Finish tests for area type category querist * Write tests for remaining category querist methods * Implement category querist * Remove dictionary union operators * Add category querist how-to * Change area type category how-to to MSOA Let's aim for highest coverage in our integration tests... * Add sections to category how-to * Add coverage checking to integration doctests * Run doctests on Python 3.8 as well * Add python version to quartodoc dependency * Fix typo in qualification * Add table how-to; skim some cream off tutorial --- .docscoveragerc | 3 + .github/workflows/tests.yml | 9 +- QA.md | 92 -- README.md | 6 +- _quarto.yml | 12 +- docs/how-to-guides/index.qmd | 13 + docs/how-to-guides/query-categories.ipynb | 308 +++++ docs/how-to-guides/query-feature.ipynb | 1121 +++++++++++++++++ .../query-population-types.ipynb | 247 ++++ docs/how-to-guides/query-table.ipynb | 857 +++++++++++++ docs/reference/index.qmd | 27 + docs/tutorials/getting-started.ipynb | 7 +- docs/tutorials/index.qmd | 6 +- index.qmd | 16 +- pyproject.toml | 2 +- src/census21api/wrapper.py | 142 ++- tests/strategies.py | 55 +- tests/test_wrapper.py | 257 +++- 18 files changed, 2974 insertions(+), 206 deletions(-) create mode 100644 .docscoveragerc delete mode 100644 QA.md create mode 100644 docs/how-to-guides/index.qmd create mode 100644 docs/how-to-guides/query-categories.ipynb create mode 100644 docs/how-to-guides/query-feature.ipynb create mode 100644 docs/how-to-guides/query-population-types.ipynb create mode 100644 docs/how-to-guides/query-table.ipynb create mode 100644 docs/reference/index.qmd diff --git a/.docscoveragerc b/.docscoveragerc new file mode 100644 index 0000000..fd210c1 --- /dev/null +++ b/.docscoveragerc @@ -0,0 +1,3 @@ +[report] +exclude_also = + def _process_response \ No newline at end of file diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ce6e4fb..c89db47 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -34,12 +34,17 @@ jobs: run: | python -m pytest tests --cov=census21api --cov-fail-under=100 - name: Test documentation - if: matrix.os == 'ubuntu-latest' && matrix.python-version == 3.11 + if: matrix.os == 'ubuntu-latest' run: | python -m pip install ".[docs]" python -m doctest README.md python -m pytest docs \ - --nbval --nbval-current-env --randomly-dont-reorganize + --nbval \ + --nbval-current-env \ + --randomly-dont-reorganize \ + --cov=census21api \ + --cov-config=.docscoveragerc \ + --cov-fail-under=99 - name: Install and run linters if: matrix.os == 'ubuntu-latest' && matrix.python-version == 3.11 run: | diff --git a/QA.md b/QA.md deleted file mode 100644 index 69062fc..0000000 --- a/QA.md +++ /dev/null @@ -1,92 +0,0 @@ -## Quality assurance checklist - -Quality assurance checklist from [the quality assurance of code for analysis and research guidance](https://best-practice-and-impact.github.io/qa-of-code-guidance/intro.html). - -### Modular code - -- [X] Individual pieces of logic are written as functions. Classes are used if more appropriate. -- [X] Repetition in the code is minimalised. For example, by moving reusable code into functions or classes. - -### Good coding practices - -- [X] Names used in the code are informative and concise. -- [X] Code logic is clear and avoids unnecessary complexity. -- [X] Code follows a standard style, e.g. [PEP8 for Python](https://www.python.org/dev/peps/pep-0008/) and [Google](https://google.github.io/styleguide/Rguide.html) or [tidyverse](https://style.tidyverse.org/) for R. - -### Project structure - -- [x] A clear, standard directory structure is used to separate input data, outputs, code and documentation. - -### Code documentation - -- [ ] Comments are used to describe why code is written in a particular way, rather than describing what the code is doing. -- [ ] Comments are kept up to date, so they do not confuse the reader. -- [x] Code is not commented out to adjust which lines of code run. -- [x] All functions and classes are documented to describe what they do, what inputs they take and what they return. -- [x] Python code is [documented using docstrings](https://www.python.org/dev/peps/pep-0257/). R code is [documented using `roxygen2` comments](https://cran.r-project.org/web/packages/roxygen2/vignettes/roxygen2.html). - -### Project documentation - -- [ ] A README file details the purpose of the project, basic installation instructions, and examples of usage. -- [ ] Where appropriate, guidance for prospective contributors is available including a code of conduct. -- [ ] If the code's users are not familiar with the code, desk instructions are provided to guide lead users through example use cases. -- [ ] The extent of analytical quality assurance conducted on the project is clearly documented. -- [ ] Assumptions in the analysis and their quality are documented next to the code that implements them. These are also made available to users. -- [ ] Copyright and licenses are specified for both documentation and code. -- [ ] Instructions for how to cite the project are given. - -### Version control - -- [x] Code is [version controlled using Git](https://git-scm.com/). -- [x] Code is committed regularly, preferably when a discrete unit of work has been completed. -- [x] An appropriate branching strategy is defined and used throughout development. -- [x] Code is open-sourced. Any sensitive data are omitted or replaced with dummy data. - -### Configuration - -- [x] Credentials and other secrets are not written in code but are configured as environment variables. -- [x] Configuration is clearly separated from code used for analysis, so that it is simple to identify and update. -- [ ] The configuration used to generate particular outputs, releases and publications is recorded. - -### Data management - -- [ ] Published outputs meet [accessibility regulations](https://analysisfunction.civilservice.gov.uk/area_of_work/accessibility/). -- [ ] All data for analysis are stored in an open format, so that specific software is not required to access them. -- [ ] Input data are stored safely and are treated as read-only. -- [ ] Input data are versioned. All changes to the data result in new versions being created, or [changes are recorded as new records](https://en.wikipedia.org/wiki/Slowly_changing_dimension). -- [ ] All input data is documented in a data register, including where they come from and their importance to the analysis. -- [ ] Outputs from your analysis are disposable and are regularly deleted and regenerated while analysis develops. Your analysis code is able to reproduce them at any time. -- [ ] Non-sensitive data are made available to users. If data are sensitive, dummy data is made available so that the code can be run by others. -- [ ] Data quality is monitored, as per [the government data quality framework](https://www.gov.uk/government/publications/the-government-data-quality-framework/the-government-data-quality-framework). - -### Peer review - -- [x] Peer review is conducted and recorded near to the code. Merge or pull requests are used to document review, when relevant. - -### Testing - -- [x] Core functionality is unit tested as code. See [`pytest` for Python](https://docs.pytest.org/en/stable/) and [`testthat` for R](https://testthat.r-lib.org/). -- [x] Code based tests are run regularly, ideally being automated using continuous integration. -- [x] Bug fixes include implementing new unit tests to ensure that the same bug does not reoccur. -- [ ] Informal tests are recorded near to the code. -- [ ] Stakeholder or user acceptance sign-offs are recorded near to the code. - -### Dependency management - -- [ ] Required passwords, secrets and tokens are documented, but are stored outside of version control. -- [x] Required libraries and packages are documented, including their versions. -- [x] Working operating system environments are documented. -- [ ] Example configuration files are provided. - -### Logging - -- [x] Misuse or failure in the code produces informative error messages. -- [ ] Code configuration is recorded when the code is run. - -### Project management - -- [ ] The roles and responsibilities of team members are clearly defined. -- [ ] An issue tracker (e.g GitHub Project, Trello or Jira) is used to record development tasks. -- [x] New issues or tasks are guided by users’ needs and stories. -- [ ] Acceptance criteria are noted for issues and tasks. Fulfilment of acceptance criteria is recorded. -- [x] Quality assurance standards and processes for the project are defined. These are based around [the quality assurance of code for analysis and research guidance document](https://best-practice-and-impact.github.io/qa-of-code-guidance/intro.html). \ No newline at end of file diff --git a/README.md b/README.md index d905157..e82d41f 100644 --- a/README.md +++ b/README.md @@ -87,9 +87,9 @@ contribution or opening an issue. ### Blocked dimension combinations -Some combinations of columns (dimensions) cannot be queried at once. See #39 -for an example. This is a deliberate block put in place by the developers of -the API. +Some combinations of columns (dimensions) cannot be queried at once. See +[#39](https://github.com/datasciencecampus/issues/39) for an example. This is a +deliberate block put in place by the developers of the API. ### Some columns are missing diff --git a/_quarto.yml b/_quarto.yml index cb58a49..18ae0a1 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -12,6 +12,8 @@ website: text: About - href: docs/tutorials/index.qmd text: Tutorials + - href: docs/how-to-guides/index.qmd + text: How-to guides - href: docs/reference/index.qmd text: Reference right: @@ -25,8 +27,10 @@ website: left: > All content is available under the [Open Government Licence V3.0](https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/), - except where otherwise stated.
- Built using [Quarto](https://quarto.org/). + except where otherwise stated. + center: > + Built using [Quarto](https://quarto.org/) and + [Diátaxis](https://diataxis.fr/). format: html: @@ -37,10 +41,10 @@ format: quartodoc: title: API reference package: census21api - dir: docs/reference + dir: docs/reference/api sections: - title: CensusAPI - desc: Core class for connecting to the Census API. + desc: Core class for connecting to the Census API package: census21api.wrapper contents: - CensusAPI diff --git a/docs/how-to-guides/index.qmd b/docs/how-to-guides/index.qmd new file mode 100644 index 0000000..925b091 --- /dev/null +++ b/docs/how-to-guides/index.qmd @@ -0,0 +1,13 @@ +--- +title: How-to guides +listing: + type: table + contents: + - "*.ipynb" + fields: [title, description] +--- + +This section of the documentation comprises bite-size, task-oriented guides on +how to make different queries using `census21api`. + +
\ No newline at end of file diff --git a/docs/how-to-guides/query-categories.ipynb b/docs/how-to-guides/query-categories.ipynb new file mode 100644 index 0000000..c615ff6 --- /dev/null +++ b/docs/how-to-guides/query-categories.ipynb @@ -0,0 +1,308 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "title: Querying feature item categories\n", + "description: How to query metadata on a particular feature item\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Some of the features (area types and dimensions) for a population type may be\n", + "unfamiliar or difficult to understand. This guide shows you how to get metadata\n", + "on an item from a feature.\n", + "\n", + "## Querying a dimension's categorisations\n", + "\n", + "To do this for a given dimension, pass your population type and item to the\n", + "`CensusAPI.query_categories()` method." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idlabeldimensionpopulation_type
0-8Does not applyhealth_in_generalUR_HH
11Very good healthhealth_in_generalUR_HH
22Good healthhealth_in_generalUR_HH
33Fair healthhealth_in_generalUR_HH
44Bad healthhealth_in_generalUR_HH
55Very bad healthhealth_in_generalUR_HH
\n", + "
" + ], + "text/plain": [ + " id label dimension population_type\n", + "0 -8 Does not apply health_in_general UR_HH\n", + "1 1 Very good health health_in_general UR_HH\n", + "2 2 Good health health_in_general UR_HH\n", + "3 3 Fair health health_in_general UR_HH\n", + "4 4 Bad health health_in_general UR_HH\n", + "5 5 Very bad health health_in_general UR_HH" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from census21api import CensusAPI\n", + "\n", + "api = CensusAPI()\n", + "\n", + "population_type = \"UR_HH\"\n", + "feature = \"dimensions\"\n", + "item = \"health_in_general\"\n", + "\n", + "api.query_categories(population_type, feature, item)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Querying an area type's areas\n", + "\n", + "You can use the same method to query category metadata for area types:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idlabelarea_typepopulation_type
0E02000001City of London 001msoaUR_HH
1E02000002Barking and Dagenham 001msoaUR_HH
2E02000003Barking and Dagenham 002msoaUR_HH
3E02000004Barking and Dagenham 003msoaUR_HH
4E02000005Barking and Dagenham 004msoaUR_HH
...............
7259W02000424Wrexham 021msoaUR_HH
7260W02000425Wrexham 022msoaUR_HH
7261W02000426Wrexham 023msoaUR_HH
7262W02000427Neath Port Talbot 021msoaUR_HH
7263W02000428Swansea 032msoaUR_HH
\n", + "

7264 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " id label area_type population_type\n", + "0 E02000001 City of London 001 msoa UR_HH\n", + "1 E02000002 Barking and Dagenham 001 msoa UR_HH\n", + "2 E02000003 Barking and Dagenham 002 msoa UR_HH\n", + "3 E02000004 Barking and Dagenham 003 msoa UR_HH\n", + "4 E02000005 Barking and Dagenham 004 msoa UR_HH\n", + "... ... ... ... ...\n", + "7259 W02000424 Wrexham 021 msoa UR_HH\n", + "7260 W02000425 Wrexham 022 msoa UR_HH\n", + "7261 W02000426 Wrexham 023 msoa UR_HH\n", + "7262 W02000427 Neath Port Talbot 021 msoa UR_HH\n", + "7263 W02000428 Swansea 032 msoa UR_HH\n", + "\n", + "[7264 rows x 4 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "feature = \"area-types\"\n", + "item = \"msoa\"\n", + "\n", + "api.query_categories(population_type, feature, item)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "centhesus", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/how-to-guides/query-feature.ipynb b/docs/how-to-guides/query-feature.ipynb new file mode 100644 index 0000000..cf84381 --- /dev/null +++ b/docs/how-to-guides/query-feature.ipynb @@ -0,0 +1,1121 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "title: Querying feature metadata\n", + "description: How to retrieve metadata on area types and dimensions \n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Two parameters you have to set to query a table from the API are an area type\n", + "and some dimensions. In `census21api`, we call these parameters \"features\".\n", + "This guide shows you how to get metadata on each of these features with\n", + "`census21api` using the `CensusAPI.query_feature()` method.\n", + "\n", + "## Getting area types\n", + "\n", + "Given a population type, we use households (`HH`) here, we can retrieve area\n", + "type metadata like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idlabeldescriptiontotal_counthierarchy_orderpopulation_type
0natEngland and WalesData for both England and Wales.11400HH
1ctryCountriesData for either the whole of England or Wales.21300HH
2rgnRegionsData for the nine regions in England, and Wale...101200HH
3lepLocal enterprise partnershipsLocal enterprise partnerships (LEPs) are volun...371150HH
4nhserNHS England regionsEach NHS region is responsible for planning lo...81100HH
5lhbLocal health boardsLocal health boards in Wales are responsible f...81000HH
6icbIntegrated care boardsIntegrated care boards in England are responsi...43900HH
7sicblSub integrated care board locationsSub integrated care board locations have repla...107800HH
8utla232023 Upper tier local authoritiesUpper tier local authorities provide a range o...175750HH
9utlaUpper tier local authoritiesUpper tier local authorities provide a range o...174700HH
10ltla232023 Lower tier local authoritiesLower tier local authorities provide a range o...318650HH
11ltlaLower tier local authoritiesLower tier local authorities provide a range o...331600HH
12sencSenedd constituenciesAreas used to elect a member of the Senedd (MS...41550HH
13p19wpcPost-2019 Westminster Parliamentary constituen...Areas that will be used to elect members of pa...575525HH
14senerSenedd electoral regionsThere are five Senedd electoral regions each m...6520HH
15wpcWestminster Parliamentary constituenciesAreas used to elect a member of parliament (MP...573500HH
16wdElectoral wards and divisionsAreas used to elect local authority councillor...7638400HH
17msoaMiddle layer Super Output AreasMiddle layer Super Output Areas (MSOAs) are ma...7264300HH
18lsoaLower layer Super Output AreasLower layer Super Output Areas (LSOAs) are mad...35672200HH
19oaOutput AreasThe lowest level of geographical area for cens...188880100HH
\n", + "
" + ], + "text/plain": [ + " id label \\\n", + "0 nat England and Wales \n", + "1 ctry Countries \n", + "2 rgn Regions \n", + "3 lep Local enterprise partnerships \n", + "4 nhser NHS England regions \n", + "5 lhb Local health boards \n", + "6 icb Integrated care boards \n", + "7 sicbl Sub integrated care board locations \n", + "8 utla23 2023 Upper tier local authorities \n", + "9 utla Upper tier local authorities \n", + "10 ltla23 2023 Lower tier local authorities \n", + "11 ltla Lower tier local authorities \n", + "12 senc Senedd constituencies \n", + "13 p19wpc Post-2019 Westminster Parliamentary constituen... \n", + "14 sener Senedd electoral regions \n", + "15 wpc Westminster Parliamentary constituencies \n", + "16 wd Electoral wards and divisions \n", + "17 msoa Middle layer Super Output Areas \n", + "18 lsoa Lower layer Super Output Areas \n", + "19 oa Output Areas \n", + "\n", + " description total_count \\\n", + "0 Data for both England and Wales. 1 \n", + "1 Data for either the whole of England or Wales. 2 \n", + "2 Data for the nine regions in England, and Wale... 10 \n", + "3 Local enterprise partnerships (LEPs) are volun... 37 \n", + "4 Each NHS region is responsible for planning lo... 8 \n", + "5 Local health boards in Wales are responsible f... 8 \n", + "6 Integrated care boards in England are responsi... 43 \n", + "7 Sub integrated care board locations have repla... 107 \n", + "8 Upper tier local authorities provide a range o... 175 \n", + "9 Upper tier local authorities provide a range o... 174 \n", + "10 Lower tier local authorities provide a range o... 318 \n", + "11 Lower tier local authorities provide a range o... 331 \n", + "12 Areas used to elect a member of the Senedd (MS... 41 \n", + "13 Areas that will be used to elect members of pa... 575 \n", + "14 There are five Senedd electoral regions each m... 6 \n", + "15 Areas used to elect a member of parliament (MP... 573 \n", + "16 Areas used to elect local authority councillor... 7638 \n", + "17 Middle layer Super Output Areas (MSOAs) are ma... 7264 \n", + "18 Lower layer Super Output Areas (LSOAs) are mad... 35672 \n", + "19 The lowest level of geographical area for cens... 188880 \n", + "\n", + " hierarchy_order population_type \n", + "0 1400 HH \n", + "1 1300 HH \n", + "2 1200 HH \n", + "3 1150 HH \n", + "4 1100 HH \n", + "5 1000 HH \n", + "6 900 HH \n", + "7 800 HH \n", + "8 750 HH \n", + "9 700 HH \n", + "10 650 HH \n", + "11 600 HH \n", + "12 550 HH \n", + "13 525 HH \n", + "14 520 HH \n", + "15 500 HH \n", + "16 400 HH \n", + "17 300 HH \n", + "18 200 HH \n", + "19 100 HH " + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from census21api import CensusAPI\n", + "\n", + "api = CensusAPI()\n", + "\n", + "population_type = \"HH\"\n", + "api.query_feature(population_type, \"area-types\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This data frame contains lots of information about the available area types for\n", + "our population type:\n", + "\n", + "- `id`: the code for the area type\n", + "- `label`: its full name\n", + "- `description`: a description of the area type\n", + "- `total_count`: the number of categories in the area type\n", + "- `hierarchy_order`: where the area type sits in the hierarchy of area types\n", + "- `population_type`: the population type you queried\n", + "\n", + "## Getting dimensions\n", + "\n", + "We can retrieve dimensions (features of the data) in a similar way:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idlabeldescriptiontotal_countquality_statement_textpopulation_type
0accommodation_typeAccommodation type (8 categories)The type of building or structure used or avai...8We have made changes to housing definitions si...HH
1accom_by_dwelling_typeAccommodation by type of dwelling (9 categories)Classifies dwellings by their type of accommod...9HH
2heating_typeType of central heating in household (13 categ...Central heating is a heating system used to he...13HH
3hh_adults_and_children_11aAdults and children in household (11 categories)Classifies households by the age of the people...11We have made methodological adjustments to the...HH
4hh_adults_disabled_4aNumber of disabled adults in household (4 cate...The number of adults in a household who assess...4HH
5hh_adults_employment_5aNumber of adults in employment in household (5...The number of employed adults in a household.\\...5As Census 2021 was during a unique period of r...HH
6hh_adults_num_3aNumber of adults in household (3 categories)An adult in a household is any person who is n...3HH
7hh_adult_welsh_speakersWelsh-speaking people in household (5 categories)Classifies households by the ability of people...5HH
8hh_away_student_4aHouseholds with students or schoolchildren liv...This measures households where resident studen...4HH
9hh_carers_6aNumber of unpaid carers in household (6 catego...An unpaid carer may look after, give help or s...6We did not ask people aged under five years wh...HH
10hh_dependent_children_3aDependent children in household and their age ...Classifies the number of dependent children in...3HH
11hh_deprivationHousehold deprivation (6 categories)The dimensions of deprivation used to classify...6Caution should be used in interpreting this va...HH
12hh_deprivation_educationHousehold deprived in the education dimension ...A household is classified as deprived in the e...3HH
13hh_deprivation_employmentHousehold deprived in the employment dimension...A household is classified as deprived in the e...3HH
14hh_deprivation_healthHousehold deprived in the health and disabilit...A household is classified as deprived in the h...3HH
15hh_deprivation_housingHousehold deprived in the housing dimension (3...A household is classified as deprived in the h...3HH
16hh_disabled_4aNumber of disabled people in household (4 cate...The number of people in a household who assess...4HH
17hh_families_countNumber of families in household (7 categories)Number of families in household.7HH
18hh_families_type_12aHousehold type (12 categories)Classifies households in an alternative way to...12HH
19hh_family_composition_37aHousehold composition (37 categories)Households according to the relationships betw...37There are quality considerations around consis...HH
20hh_hrp_veteranHousehold Reference Person previously served i...Identifies whether the Household Reference Per...5Take care when comparing characteristics of UK...HH
21hh_languageHousehold language (English and Welsh) (5 cate...Classifies households by the combination of ad...5HH
22hh_lifestage_13aLifestage of Household Reference Person(13 cat...Household lifestage classifies households acco...13HH
23hh_multi_ethnic_combination_8aCombination of ethnic groups in household (8 c...Classifies households by whether household mem...8HH
24hh_multi_ethnic_groupMultiple ethnic groups in household (6 categor...Classifies households by whether members ident...6HH
25hh_multi_languageMultiple main languages in household (6 catego...Classifies households by whether members speak...6HH
26hh_multi_religionMultiple religions in household (10 categories)Classifies households by whether members ident...10HH
27hh_multi_religion_combinationCombination of religions in household (15 cate...Classifies households by the religious affilia...15HH
28hh_not_limited_4aNumber of people in household with a long-term...The number of people in the household who asse...4HH
29hh_no_condition_4aNumber of people in household with no long-ter...The number of people in the household who asse...4HH
30hh_number_limited_little_4aNumber of disabled people in household whose d...The number of people in the household who have...4HH
31hh_number_limited_lot_4aNumber of disabled people in household whose d...The number of people in the household who have...4HH
32hh_persons_per_bedroomNumber of people per bedroom in household (5 c...The number of household members is divided by ...5HH
33hh_persons_per_roomNumber of people per room in household (5 cate...The number of household members is divided by ...5It is inappropriate to measure change in numbe...HH
34hh_size_9aHousehold size (9 categories)The number of people in the household.\\n\\nVisi...9HH
35hh_tenure_9aTenure of household (9 categories)Whether a household owns or rents the accommod...9There is evidence of people incorrectly identi...HH
36hh_veterans_5aNumber of people in household who previously s...Number of people in the household who have pre...5Take care when comparing characteristics of UK...HH
37hh_welsh_speaking_adults_3aWelsh-speaking adults in household (3 categories)Number of adults (aged 16 years and over) who ...3HH
38hh_welsh_speak_3_plus_8aWelsh-speakers in household (8 categories)Number of people (aged 3 years and over) who c...8HH
39number_bedrooms_6aNumber of Bedrooms (6 categories)The number of bedrooms in a household’s accomm...6HH
40number_of_cars_6aCar or van availability (6 categories)The number of cars or vans owned or available ...6HH
41occupancy_rating_bedrooms_6aOccupancy rating for bedrooms (6 categories)Whether a household's accommodation is overcro...6HH
42occupancy_rating_rooms_6aOccupancy rating for rooms (6 categories)Whether a household's accommodation is overcro...6It is inappropriate to measure change in numbe...HH
43voa_number_of_rooms_9aNumber of rooms (Valuation Office Agency) (9 c...A room can be any room in a dwelling apart fro...9It is inappropriate to measure change in numbe...HH
44workers_transportNumber of people who work in household and the...This groups the number of people who work in a...18As Census 2021 was during a unique period of r...HH
\n", + "
" + ], + "text/plain": [ + " id \\\n", + "0 accommodation_type \n", + "1 accom_by_dwelling_type \n", + "2 heating_type \n", + "3 hh_adults_and_children_11a \n", + "4 hh_adults_disabled_4a \n", + "5 hh_adults_employment_5a \n", + "6 hh_adults_num_3a \n", + "7 hh_adult_welsh_speakers \n", + "8 hh_away_student_4a \n", + "9 hh_carers_6a \n", + "10 hh_dependent_children_3a \n", + "11 hh_deprivation \n", + "12 hh_deprivation_education \n", + "13 hh_deprivation_employment \n", + "14 hh_deprivation_health \n", + "15 hh_deprivation_housing \n", + "16 hh_disabled_4a \n", + "17 hh_families_count \n", + "18 hh_families_type_12a \n", + "19 hh_family_composition_37a \n", + "20 hh_hrp_veteran \n", + "21 hh_language \n", + "22 hh_lifestage_13a \n", + "23 hh_multi_ethnic_combination_8a \n", + "24 hh_multi_ethnic_group \n", + "25 hh_multi_language \n", + "26 hh_multi_religion \n", + "27 hh_multi_religion_combination \n", + "28 hh_not_limited_4a \n", + "29 hh_no_condition_4a \n", + "30 hh_number_limited_little_4a \n", + "31 hh_number_limited_lot_4a \n", + "32 hh_persons_per_bedroom \n", + "33 hh_persons_per_room \n", + "34 hh_size_9a \n", + "35 hh_tenure_9a \n", + "36 hh_veterans_5a \n", + "37 hh_welsh_speaking_adults_3a \n", + "38 hh_welsh_speak_3_plus_8a \n", + "39 number_bedrooms_6a \n", + "40 number_of_cars_6a \n", + "41 occupancy_rating_bedrooms_6a \n", + "42 occupancy_rating_rooms_6a \n", + "43 voa_number_of_rooms_9a \n", + "44 workers_transport \n", + "\n", + " label \\\n", + "0 Accommodation type (8 categories) \n", + "1 Accommodation by type of dwelling (9 categories) \n", + "2 Type of central heating in household (13 categ... \n", + "3 Adults and children in household (11 categories) \n", + "4 Number of disabled adults in household (4 cate... \n", + "5 Number of adults in employment in household (5... \n", + "6 Number of adults in household (3 categories) \n", + "7 Welsh-speaking people in household (5 categories) \n", + "8 Households with students or schoolchildren liv... \n", + "9 Number of unpaid carers in household (6 catego... \n", + "10 Dependent children in household and their age ... \n", + "11 Household deprivation (6 categories) \n", + "12 Household deprived in the education dimension ... \n", + "13 Household deprived in the employment dimension... \n", + "14 Household deprived in the health and disabilit... \n", + "15 Household deprived in the housing dimension (3... \n", + "16 Number of disabled people in household (4 cate... \n", + "17 Number of families in household (7 categories) \n", + "18 Household type (12 categories) \n", + "19 Household composition (37 categories) \n", + "20 Household Reference Person previously served i... \n", + "21 Household language (English and Welsh) (5 cate... \n", + "22 Lifestage of Household Reference Person(13 cat... \n", + "23 Combination of ethnic groups in household (8 c... \n", + "24 Multiple ethnic groups in household (6 categor... \n", + "25 Multiple main languages in household (6 catego... \n", + "26 Multiple religions in household (10 categories) \n", + "27 Combination of religions in household (15 cate... \n", + "28 Number of people in household with a long-term... \n", + "29 Number of people in household with no long-ter... \n", + "30 Number of disabled people in household whose d... \n", + "31 Number of disabled people in household whose d... \n", + "32 Number of people per bedroom in household (5 c... \n", + "33 Number of people per room in household (5 cate... \n", + "34 Household size (9 categories) \n", + "35 Tenure of household (9 categories) \n", + "36 Number of people in household who previously s... \n", + "37 Welsh-speaking adults in household (3 categories) \n", + "38 Welsh-speakers in household (8 categories) \n", + "39 Number of Bedrooms (6 categories) \n", + "40 Car or van availability (6 categories) \n", + "41 Occupancy rating for bedrooms (6 categories) \n", + "42 Occupancy rating for rooms (6 categories) \n", + "43 Number of rooms (Valuation Office Agency) (9 c... \n", + "44 Number of people who work in household and the... \n", + "\n", + " description total_count \\\n", + "0 The type of building or structure used or avai... 8 \n", + "1 Classifies dwellings by their type of accommod... 9 \n", + "2 Central heating is a heating system used to he... 13 \n", + "3 Classifies households by the age of the people... 11 \n", + "4 The number of adults in a household who assess... 4 \n", + "5 The number of employed adults in a household.\\... 5 \n", + "6 An adult in a household is any person who is n... 3 \n", + "7 Classifies households by the ability of people... 5 \n", + "8 This measures households where resident studen... 4 \n", + "9 An unpaid carer may look after, give help or s... 6 \n", + "10 Classifies the number of dependent children in... 3 \n", + "11 The dimensions of deprivation used to classify... 6 \n", + "12 A household is classified as deprived in the e... 3 \n", + "13 A household is classified as deprived in the e... 3 \n", + "14 A household is classified as deprived in the h... 3 \n", + "15 A household is classified as deprived in the h... 3 \n", + "16 The number of people in a household who assess... 4 \n", + "17 Number of families in household. 7 \n", + "18 Classifies households in an alternative way to... 12 \n", + "19 Households according to the relationships betw... 37 \n", + "20 Identifies whether the Household Reference Per... 5 \n", + "21 Classifies households by the combination of ad... 5 \n", + "22 Household lifestage classifies households acco... 13 \n", + "23 Classifies households by whether household mem... 8 \n", + "24 Classifies households by whether members ident... 6 \n", + "25 Classifies households by whether members speak... 6 \n", + "26 Classifies households by whether members ident... 10 \n", + "27 Classifies households by the religious affilia... 15 \n", + "28 The number of people in the household who asse... 4 \n", + "29 The number of people in the household who asse... 4 \n", + "30 The number of people in the household who have... 4 \n", + "31 The number of people in the household who have... 4 \n", + "32 The number of household members is divided by ... 5 \n", + "33 The number of household members is divided by ... 5 \n", + "34 The number of people in the household.\\n\\nVisi... 9 \n", + "35 Whether a household owns or rents the accommod... 9 \n", + "36 Number of people in the household who have pre... 5 \n", + "37 Number of adults (aged 16 years and over) who ... 3 \n", + "38 Number of people (aged 3 years and over) who c... 8 \n", + "39 The number of bedrooms in a household’s accomm... 6 \n", + "40 The number of cars or vans owned or available ... 6 \n", + "41 Whether a household's accommodation is overcro... 6 \n", + "42 Whether a household's accommodation is overcro... 6 \n", + "43 A room can be any room in a dwelling apart fro... 9 \n", + "44 This groups the number of people who work in a... 18 \n", + "\n", + " quality_statement_text population_type \n", + "0 We have made changes to housing definitions si... HH \n", + "1 HH \n", + "2 HH \n", + "3 We have made methodological adjustments to the... HH \n", + "4 HH \n", + "5 As Census 2021 was during a unique period of r... HH \n", + "6 HH \n", + "7 HH \n", + "8 HH \n", + "9 We did not ask people aged under five years wh... HH \n", + "10 HH \n", + "11 Caution should be used in interpreting this va... HH \n", + "12 HH \n", + "13 HH \n", + "14 HH \n", + "15 HH \n", + "16 HH \n", + "17 HH \n", + "18 HH \n", + "19 There are quality considerations around consis... HH \n", + "20 Take care when comparing characteristics of UK... HH \n", + "21 HH \n", + "22 HH \n", + "23 HH \n", + "24 HH \n", + "25 HH \n", + "26 HH \n", + "27 HH \n", + "28 HH \n", + "29 HH \n", + "30 HH \n", + "31 HH \n", + "32 HH \n", + "33 It is inappropriate to measure change in numbe... HH \n", + "34 HH \n", + "35 There is evidence of people incorrectly identi... HH \n", + "36 Take care when comparing characteristics of UK... HH \n", + "37 HH \n", + "38 HH \n", + "39 HH \n", + "40 HH \n", + "41 HH \n", + "42 It is inappropriate to measure change in numbe... HH \n", + "43 It is inappropriate to measure change in numbe... HH \n", + "44 As Census 2021 was during a unique period of r... HH " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "api.query_feature(population_type, \"dimensions\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This data frame contains much of the same information as the area type\n", + "metadata. However, it replaces `hierarchy_order` with `quality_statement_text`,\n", + "which contains any information users should consider when interpreting\n", + "data involving that dimension.\n", + "\n", + "## Filtering for items in a feature\n", + "\n", + "You can also specify to only see some items in a feature by passing the item\n", + "codes as arguments. For instance, let's filter down our area types to only see\n", + "the Output Areas:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idlabeldescriptiontotal_counthierarchy_orderpopulation_type
0msoaMiddle layer Super Output AreasMiddle layer Super Output Areas (MSOAs) are ma...7264300HH
1lsoaLower layer Super Output AreasLower layer Super Output Areas (LSOAs) are mad...35672200HH
2oaOutput AreasThe lowest level of geographical area for cens...188880100HH
\n", + "
" + ], + "text/plain": [ + " id label \\\n", + "0 msoa Middle layer Super Output Areas \n", + "1 lsoa Lower layer Super Output Areas \n", + "2 oa Output Areas \n", + "\n", + " description total_count \\\n", + "0 Middle layer Super Output Areas (MSOAs) are ma... 7264 \n", + "1 Lower layer Super Output Areas (LSOAs) are mad... 35672 \n", + "2 The lowest level of geographical area for cens... 188880 \n", + "\n", + " hierarchy_order population_type \n", + "0 300 HH \n", + "1 200 HH \n", + "2 100 HH " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "api.query_feature(population_type, \"area-types\", \"msoa\", \"lsoa\", \"oa\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "centhesus", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/how-to-guides/query-population-types.ipynb b/docs/how-to-guides/query-population-types.ipynb new file mode 100644 index 0000000..59ec47d --- /dev/null +++ b/docs/how-to-guides/query-population-types.ipynb @@ -0,0 +1,247 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "title: Querying population types\n", + "description: How to retrieve metadata on population types \n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The API has many endpoints. One of them returns information about the\n", + "population types available to users. This guide demonstrates how to get those\n", + "metadata with `census21api`.\n", + "\n", + "## Getting all types\n", + "\n", + "We can retrieve population type metadata with the\n", + "`CensusAPI.query_population_types()` method like so:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namelabeldescriptiontype
0HHAll householdsEither one usual resident living alone or a gr...microdata
1HRPAll Household Reference PersonsA person who serves as a reference point, main...microdata
2URAll usual residentsThe main population base for census statistics...microdata
3UR_CEAll usual residents in communal establishmentsA usual resident who lives in a place that pro...microdata
4UR_HHAll usual residents in householdsA person who usually lives in England or Wales...microdata
\n", + "
" + ], + "text/plain": [ + " name label \\\n", + "0 HH All households \n", + "1 HRP All Household Reference Persons \n", + "2 UR All usual residents \n", + "3 UR_CE All usual residents in communal establishments \n", + "4 UR_HH All usual residents in households \n", + "\n", + " description type \n", + "0 Either one usual resident living alone or a gr... microdata \n", + "1 A person who serves as a reference point, main... microdata \n", + "2 The main population base for census statistics... microdata \n", + "3 A usual resident who lives in a place that pro... microdata \n", + "4 A person who usually lives in England or Wales... microdata " + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from census21api import CensusAPI\n", + "\n", + "api = CensusAPI()\n", + "api.query_population_types()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This data frame contains the code, full name, description, and type\n", + "of every population type exposed through the API.\n", + "\n", + ":::{.callout-note}\n", + "Note that every population type has type `microdata`; these are the\n", + "only population types we can query for tables. The API exposes many more but\n", + "they are not currently usable by `census21api`.\n", + ":::\n", + "\n", + "## Filtering for specific populations\n", + "\n", + "You can also specify to only see some population types when you call the\n", + "population querist by listing their codes as arguments:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namelabeldescriptiontype
0URAll usual residentsThe main population base for census statistics...microdata
1UR_CEAll usual residents in communal establishmentsA usual resident who lives in a place that pro...microdata
2UR_HHAll usual residents in householdsA person who usually lives in England or Wales...microdata
\n", + "
" + ], + "text/plain": [ + " name label \\\n", + "0 UR All usual residents \n", + "1 UR_CE All usual residents in communal establishments \n", + "2 UR_HH All usual residents in households \n", + "\n", + " description type \n", + "0 The main population base for census statistics... microdata \n", + "1 A usual resident who lives in a place that pro... microdata \n", + "2 A person who usually lives in England or Wales... microdata " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "api.query_population_types(\"UR\", \"UR_CE\", \"UR_HH\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "centhesus", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/how-to-guides/query-table.ipynb b/docs/how-to-guides/query-table.ipynb new file mode 100644 index 0000000..53acf00 --- /dev/null +++ b/docs/how-to-guides/query-table.ipynb @@ -0,0 +1,857 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "title: Querying a table\n", + "description: How to retrieve a custom table from the \n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To retrieve a table from the Census API, you need three things:\n", + "\n", + "- a **population type** (e.g. household reference persons, usual residents)\n", + "- an **area type** (e.g. local authorities, Senedd electoral regions)\n", + "- some **dimensions** (e.g. english proficiency, economic activity)\n", + "\n", + "## Retrieving a table\n", + "\n", + "With those selected, we can use the `CensusAPI.query_table()` method to get our\n", + "table:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ctrysexhh_tenure_9acountpopulation_type
0E920000011-80HRP
1E92000001103276595HRP
2E92000001112178228HRP
3E9200000112109635HRP
4E92000001131064598HRP
5E92000001141108579HRP
6E92000001151785514HRP
7E9200000116210267HRP
8E920000011717429HRP
9E920000012-80HRP
10E92000001204348098HRP
11E92000001214566144HRP
12E9200000122126316HRP
13E9200000123880554HRP
14E9200000124951932HRP
15E92000001252488175HRP
16E9200000126310933HRP
17E920000012713088HRP
18W920000041-80HRP
19W9200000410219732HRP
20W9200000411129053HRP
21W92000004122020HRP
22W920000041366360HRP
23W920000041456478HRP
24W920000041587615HRP
25W920000041614552HRP
26W92000004171247HRP
27W920000042-80HRP
28W9200000420292357HRP
29W9200000421248774HRP
30W92000004222263HRP
31W920000042350277HRP
32W920000042448978HRP
33W9200000425107229HRP
34W920000042619245HRP
35W9200000427933HRP
\n", + "
" + ], + "text/plain": [ + " ctry sex hh_tenure_9a count population_type\n", + "0 E92000001 1 -8 0 HRP\n", + "1 E92000001 1 0 3276595 HRP\n", + "2 E92000001 1 1 2178228 HRP\n", + "3 E92000001 1 2 109635 HRP\n", + "4 E92000001 1 3 1064598 HRP\n", + "5 E92000001 1 4 1108579 HRP\n", + "6 E92000001 1 5 1785514 HRP\n", + "7 E92000001 1 6 210267 HRP\n", + "8 E92000001 1 7 17429 HRP\n", + "9 E92000001 2 -8 0 HRP\n", + "10 E92000001 2 0 4348098 HRP\n", + "11 E92000001 2 1 4566144 HRP\n", + "12 E92000001 2 2 126316 HRP\n", + "13 E92000001 2 3 880554 HRP\n", + "14 E92000001 2 4 951932 HRP\n", + "15 E92000001 2 5 2488175 HRP\n", + "16 E92000001 2 6 310933 HRP\n", + "17 E92000001 2 7 13088 HRP\n", + "18 W92000004 1 -8 0 HRP\n", + "19 W92000004 1 0 219732 HRP\n", + "20 W92000004 1 1 129053 HRP\n", + "21 W92000004 1 2 2020 HRP\n", + "22 W92000004 1 3 66360 HRP\n", + "23 W92000004 1 4 56478 HRP\n", + "24 W92000004 1 5 87615 HRP\n", + "25 W92000004 1 6 14552 HRP\n", + "26 W92000004 1 7 1247 HRP\n", + "27 W92000004 2 -8 0 HRP\n", + "28 W92000004 2 0 292357 HRP\n", + "29 W92000004 2 1 248774 HRP\n", + "30 W92000004 2 2 2263 HRP\n", + "31 W92000004 2 3 50277 HRP\n", + "32 W92000004 2 4 48978 HRP\n", + "33 W92000004 2 5 107229 HRP\n", + "34 W92000004 2 6 19245 HRP\n", + "35 W92000004 2 7 933 HRP" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from census21api import CensusAPI\n", + "\n", + "api = CensusAPI()\n", + "\n", + "population_type = \"HRP\"\n", + "area_type = \"ctry\"\n", + "dimensions = (\"sex\", \"hh_tenure_9a\")\n", + "\n", + "api.query_table(population_type, area_type, dimensions)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Retrieving labelled data\n", + "\n", + "Sometimes the encoded data can be a bit opaque. If you would prefer to use the\n", + "labels for the area type and dimensions, use the `use_id` argument:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ctrysexhh_tenure_9acountpopulation_type
0EnglandFemaleDoes not apply0HRP
1EnglandFemaleOwned: Owns outright3276595HRP
2EnglandFemaleOwned: Owns with a mortgage or loan2178228HRP
3EnglandFemaleShared ownership: Shared ownership109635HRP
4EnglandFemaleSocial rented: Rents from council or Local Aut...1064598HRP
5EnglandFemaleSocial rented: Other social rented1108579HRP
6EnglandFemalePrivate rented: Private landlord or letting ag...1785514HRP
7EnglandFemalePrivate rented: Other private rented210267HRP
8EnglandFemaleLives rent free17429HRP
9EnglandMaleDoes not apply0HRP
10EnglandMaleOwned: Owns outright4348098HRP
11EnglandMaleOwned: Owns with a mortgage or loan4566144HRP
12EnglandMaleShared ownership: Shared ownership126316HRP
13EnglandMaleSocial rented: Rents from council or Local Aut...880554HRP
14EnglandMaleSocial rented: Other social rented951932HRP
15EnglandMalePrivate rented: Private landlord or letting ag...2488175HRP
16EnglandMalePrivate rented: Other private rented310933HRP
17EnglandMaleLives rent free13088HRP
18WalesFemaleDoes not apply0HRP
19WalesFemaleOwned: Owns outright219732HRP
20WalesFemaleOwned: Owns with a mortgage or loan129053HRP
21WalesFemaleShared ownership: Shared ownership2020HRP
22WalesFemaleSocial rented: Rents from council or Local Aut...66360HRP
23WalesFemaleSocial rented: Other social rented56478HRP
24WalesFemalePrivate rented: Private landlord or letting ag...87615HRP
25WalesFemalePrivate rented: Other private rented14552HRP
26WalesFemaleLives rent free1247HRP
27WalesMaleDoes not apply0HRP
28WalesMaleOwned: Owns outright292357HRP
29WalesMaleOwned: Owns with a mortgage or loan248774HRP
30WalesMaleShared ownership: Shared ownership2263HRP
31WalesMaleSocial rented: Rents from council or Local Aut...50277HRP
32WalesMaleSocial rented: Other social rented48978HRP
33WalesMalePrivate rented: Private landlord or letting ag...107229HRP
34WalesMalePrivate rented: Other private rented19245HRP
35WalesMaleLives rent free933HRP
\n", + "
" + ], + "text/plain": [ + " ctry sex hh_tenure_9a \\\n", + "0 England Female Does not apply \n", + "1 England Female Owned: Owns outright \n", + "2 England Female Owned: Owns with a mortgage or loan \n", + "3 England Female Shared ownership: Shared ownership \n", + "4 England Female Social rented: Rents from council or Local Aut... \n", + "5 England Female Social rented: Other social rented \n", + "6 England Female Private rented: Private landlord or letting ag... \n", + "7 England Female Private rented: Other private rented \n", + "8 England Female Lives rent free \n", + "9 England Male Does not apply \n", + "10 England Male Owned: Owns outright \n", + "11 England Male Owned: Owns with a mortgage or loan \n", + "12 England Male Shared ownership: Shared ownership \n", + "13 England Male Social rented: Rents from council or Local Aut... \n", + "14 England Male Social rented: Other social rented \n", + "15 England Male Private rented: Private landlord or letting ag... \n", + "16 England Male Private rented: Other private rented \n", + "17 England Male Lives rent free \n", + "18 Wales Female Does not apply \n", + "19 Wales Female Owned: Owns outright \n", + "20 Wales Female Owned: Owns with a mortgage or loan \n", + "21 Wales Female Shared ownership: Shared ownership \n", + "22 Wales Female Social rented: Rents from council or Local Aut... \n", + "23 Wales Female Social rented: Other social rented \n", + "24 Wales Female Private rented: Private landlord or letting ag... \n", + "25 Wales Female Private rented: Other private rented \n", + "26 Wales Female Lives rent free \n", + "27 Wales Male Does not apply \n", + "28 Wales Male Owned: Owns outright \n", + "29 Wales Male Owned: Owns with a mortgage or loan \n", + "30 Wales Male Shared ownership: Shared ownership \n", + "31 Wales Male Social rented: Rents from council or Local Aut... \n", + "32 Wales Male Social rented: Other social rented \n", + "33 Wales Male Private rented: Private landlord or letting ag... \n", + "34 Wales Male Private rented: Other private rented \n", + "35 Wales Male Lives rent free \n", + "\n", + " count population_type \n", + "0 0 HRP \n", + "1 3276595 HRP \n", + "2 2178228 HRP \n", + "3 109635 HRP \n", + "4 1064598 HRP \n", + "5 1108579 HRP \n", + "6 1785514 HRP \n", + "7 210267 HRP \n", + "8 17429 HRP \n", + "9 0 HRP \n", + "10 4348098 HRP \n", + "11 4566144 HRP \n", + "12 126316 HRP \n", + "13 880554 HRP \n", + "14 951932 HRP \n", + "15 2488175 HRP \n", + "16 310933 HRP \n", + "17 13088 HRP \n", + "18 0 HRP \n", + "19 219732 HRP \n", + "20 129053 HRP \n", + "21 2020 HRP \n", + "22 66360 HRP \n", + "23 56478 HRP \n", + "24 87615 HRP \n", + "25 14552 HRP \n", + "26 1247 HRP \n", + "27 0 HRP \n", + "28 292357 HRP \n", + "29 248774 HRP \n", + "30 2263 HRP \n", + "31 50277 HRP \n", + "32 48978 HRP \n", + "33 107229 HRP \n", + "34 19245 HRP \n", + "35 933 HRP " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "api.query_table(population_type, area_type, dimensions, use_id=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "centhesus", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/reference/index.qmd b/docs/reference/index.qmd new file mode 100644 index 0000000..e9c1cfc --- /dev/null +++ b/docs/reference/index.qmd @@ -0,0 +1,27 @@ +--- +title: Reference +listing: + id: reference + type: table + contents: + - api/index.qmd + fields: [title, description] + sort: false +--- + +This section of the documentation covers the reference material for +`census21api`. According to the Diátaxis framework, reference guides are: + +> technical descriptions of the machinery and how to operate it. Reference +> material is information-oriented. + +Right now, the only reference material we need our API reference, which +contains the technical details of the core `CensusAPI` class and its methods. + +We automatically generate this reference material using +[`quartodoc`](https://machow.github.io/quartodoc/get-started/overview.html). + +
+ +:::{#reference} +::: \ No newline at end of file diff --git a/docs/tutorials/getting-started.ipynb b/docs/tutorials/getting-started.ipynb index 81ff3c2..fa03c71 100644 --- a/docs/tutorials/getting-started.ipynb +++ b/docs/tutorials/getting-started.ipynb @@ -42,11 +42,8 @@ "source": [ "## Defining a query\n", "\n", - "Now, to query a table, we need three parameters:\n", - "\n", - "- a **population type** (e.g. household reference persons, usual residents)\n", - "- an **area type** (e.g. local authorities, Senedd electoral regions)\n", - "- some **dimensions** (e.g. english proficiency, economic activity)\n", + "To query a table, we need to identify our population type, area type, and\n", + "dimensions to form our query parameters.\n", "\n", "Let's say we want the counts for different levels of deprivation between the\n", "sexes at a national level. Deprivation statistics are recorded at the household\n", diff --git a/docs/tutorials/index.qmd b/docs/tutorials/index.qmd index 3f8ba74..1f64224 100644 --- a/docs/tutorials/index.qmd +++ b/docs/tutorials/index.qmd @@ -10,7 +10,9 @@ listing: sort: false --- -In these tutorials, we walk through how to install the package and perform some -basic workflows to interact with Census data. +In these tutorials, we walk you through some small projects using +`census21api`. The purpose of this section of the documentation is for you to +learn how to use the package for your own projects. For closer details of the +package's innards, see the [Reference](docs/reference/index.qmd) material.
diff --git a/index.qmd b/index.qmd index 43e5a3a..e63f8a9 100644 --- a/index.qmd +++ b/index.qmd @@ -32,12 +32,20 @@ for National Statistics data engineering and architecture apprentices -- with support from the [Data Science Campus](https://datasciencecampus.ons.gov.uk). See our [citation file](https://github.com/datasciencecampus/census21api/blob/main/CITATION.cff) -for a list of our authors. +for a list of the original authors or our +[GitHub repository](https://github.com/datasciencecampus/census21api/graphs/contributors) +for all the contributors. ## Where do I go now? -Get stuck in with the package by checking out our -[tutorials](docs/tutorials/index.qmd). For closer details, consider our -[API reference](docs/reference/index.qmd). +Our documentation follows the [Diátaxis](https://diataxis.fr/) framework and is +split into the following sections: + +- [Tutorials](docs/tutorials/index.qmd) are walkthroughs of projects using + `census21api` --- if you're new, start here. +- [How-to guides](docs/how-to-guides/index.qmd) are short guides with a very + specific goal in mind. +- [Reference](docs/reference/index.qmd) holds the technical documentation + covering the details of how `census21api` works. ::: diff --git a/pyproject.toml b/pyproject.toml index 235fe09..ccc8189 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ lint = [ ] docs = [ "nbval>=0.10.0", - "quartodoc>=0.5.0", + "quartodoc>=0.5.0; python_version>='3.9'", "seaborn>=0.12.1" ] dev = [ diff --git a/src/census21api/wrapper.py b/src/census21api/wrapper.py index 51af3c7..ec4ad36 100644 --- a/src/census21api/wrapper.py +++ b/src/census21api/wrapper.py @@ -2,7 +2,7 @@ import warnings from json import JSONDecodeError -from typing import Any, Dict, List, Literal, Optional, Set +from typing import Any, Dict, List, Literal, Optional, Set, Union import pandas as pd import requests @@ -10,7 +10,7 @@ from census21api.constants import API_ROOT -JSONLike = Optional[Dict[str, Any]] +JSONLike = Optional[Union[List[dict], Dict[str, Any]]] DataLike = Optional[pd.DataFrame] @@ -36,7 +36,7 @@ class CensusAPI: def __init__(self, logger: bool = False) -> None: self._logger: bool = logger self._current_url: str = None - self._current_data: dict = None + self._current_data: JSONLike = None def _process_response(self, response: Response) -> JSONLike: """ @@ -56,7 +56,6 @@ def _process_response(self, response: Response) -> JSONLike: Data dictionary if the response is valid and `None` if not. """ - data = None if not 200 <= response.status_code <= 299: if self._logger: warnings.warn( @@ -69,10 +68,10 @@ def _process_response(self, response: Response) -> JSONLike: ), UserWarning, ) - return data + return None try: - data = response.json() + return response.json() except JSONDecodeError as e: if self._logger: warnings.warn( @@ -85,8 +84,6 @@ def _process_response(self, response: Response) -> JSONLike: UserWarning, ) - return data - def get(self, url: str) -> JSONLike: """ Make a call to, and retrieve some data from, the API. @@ -183,7 +180,6 @@ def query_table( table_json = self._query_table_json( population_type, area_type, dimensions ) - data = None if isinstance(table_json, dict) and "observations" in table_json: records = _extract_records_from_observations( @@ -266,14 +262,12 @@ def query_population_types(self, *population_types: str) -> DataLike: if isinstance(meta, dict) and "name" in meta: metas.append(meta) - if not metas: - return None + if metas: + metadata = pd.DataFrame(metas) + if population_types: + metadata = metadata[metadata["name"].isin(population_types)] - metadata = pd.DataFrame(metas) - if population_types: - metadata = metadata[metadata["name"].isin(population_types)] - - return metadata + return metadata.sort_values("name", ignore_index=True) def query_feature( self, @@ -310,7 +304,6 @@ def query_feature( url = "/".join((API_ROOT, population_type, f"{feature}?limit=500")) json = self.get(url) - metadata = None if isinstance(json, dict) and "items" in json: metadata = pd.json_normalize(json["items"]) @@ -318,7 +311,92 @@ def query_feature( if items: metadata = metadata[metadata["id"].isin(items)] - return metadata + return metadata.reset_index(drop=True) + + def _query_area_type_categories_json( + self, population_type: str, area_type: str + ) -> JSONLike: + """ + Query metadata for an area type's categories in JSON format. + + Parameters + ---------- + population_type : str + Population type to query. + area_type : str + Area type to query. + + Returns + ------- + areas : dict or None + Dictionary with the area type categories if the calls + succeed, and `None` if any fail. + """ + + url = "/".join( + ( + API_ROOT, + population_type, + "area-types", + area_type, + "areas?limit=500", + ) + ) + json = self.get(url) + + if isinstance(json, dict) and "items" in json: + areas = json["items"] + total_counted = json["count"] + while total_counted < json["total_count"]: + json = self.get(url + f"&offset={total_counted}") + if not (isinstance(json, dict) and "items" in json): + return None + + areas.extend(json["items"]) + total_counted += json["count"] + + return areas + + def _query_dimension_categories_json( + self, population_type: str, dimension: str + ) -> JSONLike: + """ + Query metadata for a dimension's categories in JSON format. + + Parameters + ---------- + population_type : str + Population type to query. + dimension : str + Dimension to query. + + Returns + ------- + categorisations : list or None + List with the dimension category metadata if the call + succeeds, and `None` if not. + """ + + url = "/".join( + ( + API_ROOT, + population_type, + "dimensions", + dimension, + "categorisations?limit=500", + ) + ) + json = self.get(url) + + if isinstance(json, dict) and "items" in json: + item = next( + item for item in json["items"] if item["id"] == dimension + ) + categorisations = [ + {**cat, "dimension": dimension} for cat in item["categories"] + ] + + return categorisations def query_categories( self, @@ -351,26 +429,20 @@ def query_categories( succeeds, and `None` if not. """ - endpoint = "areas" if feature == "area-types" else "categorisations" - url = "/".join( - (API_ROOT, population_type, feature, item, f"{endpoint}?limit=500") - ) - json = self.get(url) - categories = None - - if isinstance(json, dict) and "items" in json: - items = json["items"] - - total_counted = json["count"] - while total_counted < json["total_count"]: - json = self.get(url + f"&offset={total_counted}") - items.extend(json["items"]) - total_counted += json["count"] + if feature == "area-types": + categories = self._query_area_type_categories_json( + population_type, item + ) + if feature == "dimensions": + categories = self._query_dimension_categories_json( + population_type, item + ) - categories = pd.json_normalize(items) + if isinstance(categories, (dict, list)): + categories = pd.json_normalize(categories) categories["population_type"] = population_type - return categories + return categories def _extract_records_from_observations( diff --git a/tests/strategies.py b/tests/strategies.py index 98ab2c2..ca28552 100644 --- a/tests/strategies.py +++ b/tests/strategies.py @@ -86,28 +86,6 @@ def st_feature_queries(draw): return population_type, endpoint, items, result -@st.composite -def st_category_queries(draw): - """Create a category metadata query pack for testing.""" - - population_type = draw(st.sampled_from(POPULATION_TYPES)) - feature = draw(st.sampled_from(("area-types", "dimensions"))) - endpoint = "areas" if feature == "area-types" else "categorisations" - - items_by_population_type = ( - AREA_TYPES_BY_POPULATION_TYPE - if feature == "area-types" - else DIMENSIONS_BY_POPULATION_TYPE - ) - possible_items = items_by_population_type[population_type] - item = draw(st.sampled_from(possible_items)) - - num_items = draw(st.integers(1, 5)) - items = [{"item": st.text()} for _ in range(num_items)] - - return population_type, feature, item, endpoint, items - - @st.composite def st_population_types(draw, include_interested=False): """Sample a set of population types and their metadata.""" @@ -131,3 +109,36 @@ def st_population_types(draw, include_interested=False): ) return population_types, json_metadata, interested + + +@st.composite +def st_category_queries(draw, feature=None): + """Create a category metadata query pack for testing.""" + + population_type = draw(st.sampled_from(POPULATION_TYPES)) + feature = feature or draw(st.sampled_from(("area-types", "dimensions"))) + num_categories = draw(st.integers(1, 5)) + + if feature == "area-types": + item = draw( + st.sampled_from(AREA_TYPES_BY_POPULATION_TYPE[population_type]) + ) + categories = [ + { + "id": draw(st.text()), + "label": draw(st.text()), + "area_type": item, + } + for _ in range(num_categories) + ] + + if feature == "dimensions": + item = draw( + st.sampled_from(DIMENSIONS_BY_POPULATION_TYPE[population_type]) + ) + categories = [ + {"id": draw(st.text()), "label": draw(st.text())} + for _ in range(num_categories) + ] + + return population_type, item, categories diff --git a/tests/test_wrapper.py b/tests/test_wrapper.py index 60a9d06..1c7813e 100644 --- a/tests/test_wrapper.py +++ b/tests/test_wrapper.py @@ -300,7 +300,9 @@ def test_query_population_types_valid_all_types(params): "description", "type", ] - for (_, row), meta in zip(metadata.iterrows(), json_metadata): + + sorted_json_metadata = sorted(json_metadata, key=lambda x: x["name"]) + for (_, row), meta in zip(metadata.iterrows(), sorted_json_metadata): assert dict(row) == meta get_pop_types.assert_called_once_with() @@ -420,38 +422,43 @@ def test_query_feature_invalid(query, result): ) -@given(st_category_queries()) -def test_query_categories_single_call(query): - """Test the category querist works on a single call.""" +@given(st_category_queries(feature="area-types")) +def test_query_area_type_categories_json_single_call(params): + """Test the category querist works for area types.""" - population_type, feature, item, endpoint, items = query + population_type, area_type, categories = params api = CensusAPI() with mock.patch("census21api.wrapper.CensusAPI.get") as get: - get.return_value = {"count": 1, "total_count": 1, "items": items} - categories = api.query_categories(population_type, feature, item) - - assert isinstance(categories, pd.DataFrame) - assert len(categories) == len(items) - assert set(categories["item"]) == set(i["item"] for i in items) - assert (categories["population_type"] == population_type).all() + get.return_value = {"count": 1, "total_count": 1, "items": categories} + areas = api._query_area_type_categories_json( + population_type, area_type + ) - expected_columns = ["item", "population_type"] - assert categories.columns.to_list() == expected_columns + assert isinstance(areas, list) + assert len(areas) == len(categories) + assert all(isinstance(area, dict) for area in areas) + assert areas == categories get.assert_called_once_with( "/".join( - (API_ROOT, population_type, feature, item, f"{endpoint}?limit=500") + ( + API_ROOT, + population_type, + "area-types", + area_type, + "areas?limit=500", + ) ) ) -@given(st_category_queries()) -def test_query_categories_multiple_calls(query): - """Test the category querist works with multiple calls.""" +@given(st_category_queries(feature="area-types")) +def test_query_area_type_categories_json_multiple_calls(params): + """Test the area type category querist works for multiple calls.""" - population_type, feature, item, endpoint, items = query + population_type, area_type, categories = params api = CensusAPI() @@ -459,15 +466,88 @@ def test_query_categories_multiple_calls(query): get.return_value = { "count": 1, "total_count": 2, - "items": items.copy(), + "items": categories.copy(), + } + areas = api._query_area_type_categories_json( + population_type, area_type + ) + + assert isinstance(areas, list) + assert len(areas) == 2 * len(categories) + assert all(isinstance(area, dict) for area in areas) + assert areas == 2 * categories + + assert get.call_count == 2 + get.assert_called_with( + "/".join( + ( + API_ROOT, + population_type, + "area-types", + area_type, + "areas?limit=500&offset=1", + ) + ) + ) + + +@given( + st_category_queries(feature="area-types"), + st.one_of((st.just(None), st.dictionaries(st.integers(), st.integers()))), +) +def test_query_area_type_categories_json_single_call_invalid(params, result): + """Test the category querist gives None if the first call fails.""" + + population_type, area_type, _ = params + + api = CensusAPI() + + with mock.patch("census21api.wrapper.CensusAPI.get") as get: + get.return_value = result + areas = api._query_area_type_categories_json( + population_type, area_type + ) + + assert areas is None + + get.assert_called_once_with( + "/".join( + ( + API_ROOT, + population_type, + "area-types", + area_type, + "areas?limit=500", + ) + ) + ) + + +@given( + st_category_queries(feature="area-types"), + st.one_of((st.just(None), st.dictionaries(st.integers(), st.integers()))), +) +def test_query_area_type_categories_json_multiple_call_invalid( + params, second_response +): + """Test the category querist gives None if the first call fails.""" + + population_type, area_type, categories = params + + api = CensusAPI() + + with mock.patch("census21api.wrapper.CensusAPI.get") as get: + first_response = { + "count": 1, + "total_count": 2, + "items": categories.copy(), } - categories = api.query_categories(population_type, feature, item) + get.side_effect = [first_response, second_response] + areas = api._query_area_type_categories_json( + population_type, area_type + ) - assert isinstance(categories, pd.DataFrame) - assert categories.columns.to_list() == ["item", "population_type"] - assert len(categories) == len(items) * 2 - assert set(categories["item"]) == set(i["item"] for i in items) - assert (categories["population_type"] == population_type).all() + assert areas is None assert get.call_count == 2 get.assert_called_with( @@ -475,31 +555,136 @@ def test_query_categories_multiple_calls(query): ( API_ROOT, population_type, - feature, - item, - f"{endpoint}?limit=500&offset=1", + "area-types", + area_type, + "areas?limit=500&offset=1", + ) + ) + ) + + +@given(st_category_queries(feature="dimensions")) +def test_query_dimension_categories_json_valid(params): + """Test the dimension category querist works for a valid call.""" + + population_type, dimension, categories = params + + api = CensusAPI() + + with mock.patch("census21api.wrapper.CensusAPI.get") as get: + get.return_value = { + "count": 0, + "total_count": 1, + "items": [ + {"id": dimension, "label": dimension, "categories": categories} + ], + } + categorisations = api._query_dimension_categories_json( + population_type, dimension + ) + + assert isinstance(categorisations, list) + assert len(categorisations) == len(categories) + assert all( + isinstance(categorisation, dict) for categorisation in categorisations + ) + assert categorisations == [ + {**cat, "dimension": dimension} for cat in categories + ] + + get.assert_called_once_with( + "/".join( + ( + API_ROOT, + population_type, + "dimensions", + dimension, + "categorisations?limit=500", ) ) ) @given( - st_category_queries(), + st_category_queries(feature="dimensions"), st.one_of((st.just(None), st.dictionaries(st.integers(), st.integers()))), ) -def test_query_categories_invalid(query, result): - """Test the category querist returns nothing on a failed call.""" +def test_query_dimension_categories_json_invalid(params, result): + """Test the dimension category querist works on a failed call.""" - population_type, feature, item, endpoint, _ = query + population_type, dimension, _ = params api = CensusAPI() with mock.patch("census21api.wrapper.CensusAPI.get") as get: get.return_value = result - categories = api.query_categories(population_type, feature, item) + categorisations = api._query_dimension_categories_json( + population_type, dimension + ) - assert categories is None + assert categorisations is None get.assert_called_once_with( - f"{API_ROOT}/{population_type}/{feature}/{item}/{endpoint}?limit=500" + "/".join( + ( + API_ROOT, + population_type, + "dimensions", + dimension, + "categorisations?limit=500", + ) + ) + ) + + +@given(st_category_queries()) +def test_query_categories_valid(params): + """Test the category querist works for a valid call.""" + + population_type, item, categories = params + + api = CensusAPI() + + feature = "area-types" if "area_type" in categories[0] else "dimensions" + querist_to_patch = ( + "census21api.wrapper.CensusAPI." + f"_query_{feature.replace('-', '_')[:-1]}_categories_json" ) + + with mock.patch(querist_to_patch) as query: + query.return_value = categories + result = api.query_categories(population_type, feature, item) + + assert isinstance(result, pd.DataFrame) + assert (result["population_type"] == population_type).all() + assert pd.DataFrame(categories).equals( + result.drop("population_type", axis=1) + ) + + query.assert_called_once_with(population_type, item) + + +@given(st_category_queries()) +def test_query_categories_invalid(params): + """Test the category querist returns none on a failed call.""" + + population_type, item, categories = params + + api = CensusAPI() + + feature = "area-types" if "area_type" in categories[0] else "dimensions" + querist_to_patch = ( + "census21api.wrapper.CensusAPI." + f"_query_{feature.replace('-', '_')[:-1]}_categories_json" + ) + + with mock.patch(querist_to_patch) as query, mock.patch( + "census21api.wrapper.pd.json_normalize" + ) as json: + query.return_value = None + result = api.query_categories(population_type, feature, item) + + assert result is None + + query.assert_called_once_with(population_type, item) + json.assert_not_called()