From 594197937ddca6eb350f476919b83d7d6b7e9c5d Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 5 Mar 2018 21:16:30 +0000 Subject: [PATCH 001/375] Update traceability descriptions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …According to IATI/IATI-Stats@4b4582fa. --- static/templates/comprehensiveness_base.html | 4 ++-- static/templates/comprehensiveness_financials.html | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/static/templates/comprehensiveness_base.html b/static/templates/comprehensiveness_base.html index 1e52e64842..fdc4efac17 100644 --- a/static/templates/comprehensiveness_base.html +++ b/static/templates/comprehensiveness_base.html @@ -199,7 +199,7 @@

Pseudocode

Else Ignore activity Else If the comprehensiveness test is 'Transaction - Traceability' - If transaction/transaction-type[@code="IF"] exists (1.xx) or transaction/transaction-type[@code="1"] exists (2.xx) + If transaction/transaction-type[@code="IF"] exists (1.xx) or transaction/transaction-type[@code="1"] exists (2.xx) or transaction/transaction-type[@code="11"] exists or transaction/transaction-type[@code="13"] exists Use activity Else Ignore activity @@ -309,7 +309,7 @@

Pseudocode

Financials Transaction - Traceability - All transactions of @type=IF (1.xx) or @type=1 (2.xx) must contain provider-org/@provider-activity-id + All transactions with @type of 'Incoming Funds' (i.e. `IF` (1.xx) or `1` (2.xx)) or 'Incoming Commitment' (i.e. `11`) or 'Incoming Pledge' (i.e. `13`) must contain provider-org/@provider-activity-id diff --git a/static/templates/comprehensiveness_financials.html b/static/templates/comprehensiveness_financials.html index c9e2a5dc4c..a950c232ef 100644 --- a/static/templates/comprehensiveness_financials.html +++ b/static/templates/comprehensiveness_financials.html @@ -29,7 +29,7 @@
Transaction - Disbursement or Expenditure
Transaction - Traceability
-

For the data at the publishers' lowest hierarchy, the percentage of current activities containing a transaction of type Incoming Funds that also contain the IATI identifier for the funding organisation's activity. This links the funds disbursed by one organisation and received by another. (NB activities that do not contain incoming funds transactions are excluded from the calculation.) (In future the syntax of the provider-activity-id will also be validated.)

+

For the data at the publishers' lowest hierarchy, the percentage of current activities containing a transaction of type Incoming Funds, Incoming Commitment or Incoming Pledge that also contain the IATI identifier for the funding organisation's activity. This links the funds disbursed by one organisation and received by another. (NB activities that do not contain incoming funds transactions are excluded from the calculation.) (In future the syntax of the provider-activity-id will also be validated.)

Donor publishers who list themselves within as a participating-org of either 1 (i.e. 'Funding') or 3 (i.e. 'Extending') AND who are not listed as type 4 (i.e. 'Implementing') will be given credit for traceability, as they are at the top of the funding chain.

From 7f69f7c178cda89ea92916837f9bc8328e8de0d9 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 24 Jul 2018 12:14:15 +0100 Subject: [PATCH 002/375] Remove codelist conditions notice --- data.py | 3 --- make_html.py | 1 - static/templates/codelist.html | 3 --- static/templates/publisher.html | 3 --- 4 files changed, 10 deletions(-) diff --git a/data.py b/data.py index 87fe02b797..d56c64a016 100644 --- a/data.py +++ b/data.py @@ -217,9 +217,6 @@ def create_codelist_mapping(major_version): MAJOR_VERSIONS = ['2', '1'] codelist_mapping = { v:create_codelist_mapping(v) for v in MAJOR_VERSIONS } -codelist_conditions = { - major_version: transform_codelist_mapping_keys({ x['path']:x.get('condition') for x in json.load(open('data/IATI-Codelists-{}/out/clv2/mapping.json'.format(major_version)))}) - for major_version in MAJOR_VERSIONS } # Create a big dictionary of all codelist values by version and codelist name codelist_sets = { diff --git a/make_html.py b/make_html.py index 3b9521a624..852c266db0 100644 --- a/make_html.py +++ b/make_html.py @@ -119,7 +119,6 @@ def get_codelist_values(codelist_values_for_element): app.jinja_env.globals['slugs'] = slugs app.jinja_env.globals['codelist_mapping'] = codelist_mapping -app.jinja_env.globals['codelist_conditions'] = codelist_conditions app.jinja_env.globals['codelist_sets'] = codelist_sets app.jinja_env.globals['get_codelist_values'] = get_codelist_values diff --git a/static/templates/codelist.html b/static/templates/codelist.html index fc4ee5c588..5eddf8720b 100644 --- a/static/templates/codelist.html +++ b/static/templates/codelist.html @@ -8,9 +8,6 @@

Codelist values used for {{element}}

Who uses {{codelist_mapping[major_version].get(element)}} in {{element}}?

(for files published to version {{major_version}}.xx of the standard)

-{% if codelist_conditions[major_version][element] %} -
Note: this attribute can be on multiple codelists (specified by the @vocabulary attribute) but the dashboard only currently checks against the default codelist - see issue #174. Therefore some publishers may incorrectly show up as "Not On Codelist".
-{% endif %}

(This page in JSON format)

Values should be on the {{codelist_mapping[major_version].get(element)}} codelist.

{% endblock %} diff --git a/static/templates/publisher.html b/static/templates/publisher.html index b340eedd0b..ac3ad7dfd9 100644 --- a/static/templates/publisher.html +++ b/static/templates/publisher.html @@ -351,9 +351,6 @@

Codelist Values (version {{major_version}}.xx)

{{codes|count}} - {% if codelist_conditions[major_version][element] %} -
Note: this attribute can be on multiple codelists (specified by the @vocabulary attribute) but the Dashboard only currently checks against the default codelist - see issue #174. Therefore some publishers may incorrectly show up as "Not On Codelist".
- {% endif %} {%else%} {{codes|count}} {%endif%} From ae341b9112bc7604713da5f83df5ce1033a24699 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 20 Nov 2019 15:23:18 +0000 Subject: [PATCH 003/375] Use the default mapping (if we have one) Default mappings should be preferred. --- data.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/data.py b/data.py index 55c4e32d1a..00567724e4 100644 --- a/data.py +++ b/data.py @@ -211,7 +211,13 @@ def transform_codelist_mapping_keys(codelist_mapping): return codelist_mapping def create_codelist_mapping(major_version): - codelist_mapping = {x['path']:x['codelist'] for x in json.load(open('data/IATI-Codelists-{}/out/clv2/mapping.json'.format(major_version)))} + codelist_mapping = {} + for x in json.load(open('data/IATI-Codelists-{}/out/clv2/mapping.json'.format(major_version))): + if 'condition' in x \ + and x['path'] in codelist_mapping \ + and not re.search(r'not\([^)]+\)', x['condition']): + continue + codelist_mapping[x['path']] = x['codelist'] return transform_codelist_mapping_keys(codelist_mapping) MAJOR_VERSIONS = ['2', '1'] From b32baed8442e33aef7ea747f7461d87bbae675cc Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 23 Mar 2021 18:23:26 +0000 Subject: [PATCH 004/375] README fixes --- README.rst | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/README.rst b/README.rst index 71f21a1537..192509d73f 100644 --- a/README.rst +++ b/README.rst @@ -1,22 +1,19 @@ IATI Dashboard ============== -.. image:: https://requires.io/github/IATI/IATI-Dashboard/requirements.svg?branch=master - :target: https://requires.io/github/IATI/IATI-Dashboard/requirements/?branch=master - :alt: Requirements Status .. image:: https://img.shields.io/badge/license-GPLv3-blue.svg - :target: https://github.com/IATI/IATI-Dashboard/blob/master/GPL.md + :target: https://github.com/codeforIATI/IATI-Dashboard/blob/main/GPL.md Introduction ------------ The IATI dashboard displays key numbers and graphs about the data on the `IATI registry `__. -See the Dashboard in action at http://dashboard.iatistandard.org +See the Dashboard in action at https://dashboard.codeforiati.org The Dashboard is in beta, all contents/urls/machine readable downloads are subject to change. -This repository is the code for the Dashboard frontend. Stats are generated from the Registry by stats code in a separate repository - https://github.com/IATI/IATI-Stats +This repository is the code for the Dashboard frontend. Stats are generated from the Registry by stats code in a separate repository - https://github.com/codeforIATI/IATI-Stats Technology Overview ^^^^^^^^^^^^^^^^^^^ @@ -31,7 +28,7 @@ Python scripts: Bash helper scripts: -* The main source of data for the Dashboard is stats generated by `IATI-Stats `_ (about the data on the IATI Registry). ``get_stats.sh`` can be used to fetch a recently calculated copy of these stats. (or see calculating your own stats section below) +* The main source of data for the Dashboard is stats generated by `IATI-Stats `_ (about the data on the IATI Registry). ``get_stats.sh`` can be used to fetch a recently calculated copy of these stats. (or see calculating your own stats section below) * The Dashboard also uses various other data from online sources (including GitHub). These can be fetched using ``fetch_data.sh``. * ``git.sh`` runs all the above commands, see Usage below. @@ -56,7 +53,7 @@ To install: .. code-block:: bash ## Get the code - git clone https://github.com/IATI/IATI-Dashboard.git + git clone https://github.com/codeforIATI/IATI-Dashboard.git cd IATI-Dashboard ## Set up a virtual environment (recommended) @@ -76,7 +73,7 @@ To install: Usage ^^^^^ -The following steps are performed routinely on our `deployed Dashboard `__. (On our servers, the Dashboard is actually deployed using `this salt file `__. +The following steps are performed routinely on our `deployed Dashboard `__. .. code-block:: bash @@ -108,9 +105,9 @@ Using the live development server is highly recommended, because it displays ful Calculating your own stats for the dashboard ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The Dashboard requires a `stats-calculated` directory, which can be downloaded using the `get_stats.sh` shell script as described above, or calculated yourself using http://github.com/IATI/IATI-Stats . `stats-calculated` corresponds to the `gitout` directory generated by `IATI-Stat's git.sh `__. +The Dashboard requires a `stats-calculated` directory, which can be downloaded using the `get_stats.sh` shell script as described above, or calculated yourself using http://github.com/codeforIATI/IATI-Stats . `stats-calculated` corresponds to the `gitout` directory generated by `IATI-Stat's git.sh `__. -Often you only want to regenerate the current stats, use `get_stats.sh` to download the pre-calculated historical stats and just replace the stats-calculated/current directory with the out directory produced by running the `loop, aggregate and invert commands individually `__. +Often you only want to regenerate the current stats, use `get_stats.sh` to download the pre-calculated historical stats and just replace the stats-calculated/current directory with the out directory produced by running the `loop, aggregate and invert commands individually `__. License ^^^^^^^ From 9dafc341937c88b39798abecb21bc63c64248013 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 23 Mar 2021 20:52:45 +0000 Subject: [PATCH 005/375] Move templates out of /static --- make_html.py | 2 +- {static/templates => templates}/activities.html | 0 {static/templates => templates}/base.html | 0 {static/templates => templates}/booleans.html | 0 {static/templates => templates}/boxes.html | 0 {static/templates => templates}/codelist.html | 0 {static/templates => templates}/codelists.html | 0 {static/templates => templates}/comment.html | 0 {static/templates => templates}/data_quality.html | 0 {static/templates => templates}/dates.html | 0 {static/templates => templates}/download.html | 0 {static/templates => templates}/element.html | 0 {static/templates => templates}/elements.html | 0 {static/templates => templates}/exploring_data.html | 0 {static/templates => templates}/faq.html | 0 {static/templates => templates}/files.html | 0 {static/templates => templates}/headlines.html | 0 {static/templates => templates}/humanitarian.html | 0 {static/templates => templates}/identifiers.html | 0 {static/templates => templates}/index.html | 0 {static/templates => templates}/license.html | 0 {static/templates => templates}/licenses.html | 0 {static/templates => templates}/organisation.html | 0 {static/templates => templates}/publisher.html | 0 {static/templates => templates}/publishers.html | 0 {static/templates => templates}/publishing_stats.html | 0 {static/templates => templates}/registration_agencies.html | 0 {static/templates => templates}/reporting_orgs.html | 0 {static/templates => templates}/section_index.html | 0 {static/templates => templates}/sparkwise.html | 0 {static/templates => templates}/tablesorter_instructions.html | 0 {static/templates => templates}/validation.html | 0 {static/templates => templates}/versions.html | 0 {static/templates => templates}/xml.html | 0 34 files changed, 1 insertion(+), 1 deletion(-) rename {static/templates => templates}/activities.html (100%) rename {static/templates => templates}/base.html (100%) rename {static/templates => templates}/booleans.html (100%) rename {static/templates => templates}/boxes.html (100%) rename {static/templates => templates}/codelist.html (100%) rename {static/templates => templates}/codelists.html (100%) rename {static/templates => templates}/comment.html (100%) rename {static/templates => templates}/data_quality.html (100%) rename {static/templates => templates}/dates.html (100%) rename {static/templates => templates}/download.html (100%) rename {static/templates => templates}/element.html (100%) rename {static/templates => templates}/elements.html (100%) rename {static/templates => templates}/exploring_data.html (100%) rename {static/templates => templates}/faq.html (100%) rename {static/templates => templates}/files.html (100%) rename {static/templates => templates}/headlines.html (100%) rename {static/templates => templates}/humanitarian.html (100%) rename {static/templates => templates}/identifiers.html (100%) rename {static/templates => templates}/index.html (100%) rename {static/templates => templates}/license.html (100%) rename {static/templates => templates}/licenses.html (100%) rename {static/templates => templates}/organisation.html (100%) rename {static/templates => templates}/publisher.html (100%) rename {static/templates => templates}/publishers.html (100%) rename {static/templates => templates}/publishing_stats.html (100%) rename {static/templates => templates}/registration_agencies.html (100%) rename {static/templates => templates}/reporting_orgs.html (100%) rename {static/templates => templates}/section_index.html (100%) rename {static/templates => templates}/sparkwise.html (100%) rename {static/templates => templates}/tablesorter_instructions.html (100%) rename {static/templates => templates}/validation.html (100%) rename {static/templates => templates}/versions.html (100%) rename {static/templates => templates}/xml.html (100%) diff --git a/make_html.py b/make_html.py index 94d2d7a602..d3e592cc98 100644 --- a/make_html.py +++ b/make_html.py @@ -10,7 +10,7 @@ from collections import defaultdict from flask import Flask, render_template, redirect, abort, Response -app = Flask(__name__, template_folder="static/templates") +app = Flask(__name__) import licenses from vars import expected_versions diff --git a/static/templates/activities.html b/templates/activities.html similarity index 100% rename from static/templates/activities.html rename to templates/activities.html diff --git a/static/templates/base.html b/templates/base.html similarity index 100% rename from static/templates/base.html rename to templates/base.html diff --git a/static/templates/booleans.html b/templates/booleans.html similarity index 100% rename from static/templates/booleans.html rename to templates/booleans.html diff --git a/static/templates/boxes.html b/templates/boxes.html similarity index 100% rename from static/templates/boxes.html rename to templates/boxes.html diff --git a/static/templates/codelist.html b/templates/codelist.html similarity index 100% rename from static/templates/codelist.html rename to templates/codelist.html diff --git a/static/templates/codelists.html b/templates/codelists.html similarity index 100% rename from static/templates/codelists.html rename to templates/codelists.html diff --git a/static/templates/comment.html b/templates/comment.html similarity index 100% rename from static/templates/comment.html rename to templates/comment.html diff --git a/static/templates/data_quality.html b/templates/data_quality.html similarity index 100% rename from static/templates/data_quality.html rename to templates/data_quality.html diff --git a/static/templates/dates.html b/templates/dates.html similarity index 100% rename from static/templates/dates.html rename to templates/dates.html diff --git a/static/templates/download.html b/templates/download.html similarity index 100% rename from static/templates/download.html rename to templates/download.html diff --git a/static/templates/element.html b/templates/element.html similarity index 100% rename from static/templates/element.html rename to templates/element.html diff --git a/static/templates/elements.html b/templates/elements.html similarity index 100% rename from static/templates/elements.html rename to templates/elements.html diff --git a/static/templates/exploring_data.html b/templates/exploring_data.html similarity index 100% rename from static/templates/exploring_data.html rename to templates/exploring_data.html diff --git a/static/templates/faq.html b/templates/faq.html similarity index 100% rename from static/templates/faq.html rename to templates/faq.html diff --git a/static/templates/files.html b/templates/files.html similarity index 100% rename from static/templates/files.html rename to templates/files.html diff --git a/static/templates/headlines.html b/templates/headlines.html similarity index 100% rename from static/templates/headlines.html rename to templates/headlines.html diff --git a/static/templates/humanitarian.html b/templates/humanitarian.html similarity index 100% rename from static/templates/humanitarian.html rename to templates/humanitarian.html diff --git a/static/templates/identifiers.html b/templates/identifiers.html similarity index 100% rename from static/templates/identifiers.html rename to templates/identifiers.html diff --git a/static/templates/index.html b/templates/index.html similarity index 100% rename from static/templates/index.html rename to templates/index.html diff --git a/static/templates/license.html b/templates/license.html similarity index 100% rename from static/templates/license.html rename to templates/license.html diff --git a/static/templates/licenses.html b/templates/licenses.html similarity index 100% rename from static/templates/licenses.html rename to templates/licenses.html diff --git a/static/templates/organisation.html b/templates/organisation.html similarity index 100% rename from static/templates/organisation.html rename to templates/organisation.html diff --git a/static/templates/publisher.html b/templates/publisher.html similarity index 100% rename from static/templates/publisher.html rename to templates/publisher.html diff --git a/static/templates/publishers.html b/templates/publishers.html similarity index 100% rename from static/templates/publishers.html rename to templates/publishers.html diff --git a/static/templates/publishing_stats.html b/templates/publishing_stats.html similarity index 100% rename from static/templates/publishing_stats.html rename to templates/publishing_stats.html diff --git a/static/templates/registration_agencies.html b/templates/registration_agencies.html similarity index 100% rename from static/templates/registration_agencies.html rename to templates/registration_agencies.html diff --git a/static/templates/reporting_orgs.html b/templates/reporting_orgs.html similarity index 100% rename from static/templates/reporting_orgs.html rename to templates/reporting_orgs.html diff --git a/static/templates/section_index.html b/templates/section_index.html similarity index 100% rename from static/templates/section_index.html rename to templates/section_index.html diff --git a/static/templates/sparkwise.html b/templates/sparkwise.html similarity index 100% rename from static/templates/sparkwise.html rename to templates/sparkwise.html diff --git a/static/templates/tablesorter_instructions.html b/templates/tablesorter_instructions.html similarity index 100% rename from static/templates/tablesorter_instructions.html rename to templates/tablesorter_instructions.html diff --git a/static/templates/validation.html b/templates/validation.html similarity index 100% rename from static/templates/validation.html rename to templates/validation.html diff --git a/static/templates/versions.html b/templates/versions.html similarity index 100% rename from static/templates/versions.html rename to templates/versions.html diff --git a/static/templates/xml.html b/templates/xml.html similarity index 100% rename from static/templates/xml.html rename to templates/xml.html From f5e7b3ff46da3e2161b21061e85c058507a44014 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 23 Mar 2021 21:14:25 +0000 Subject: [PATCH 006/375] Reinstate development server --- make_html.py | 57 ++++++++++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/make_html.py b/make_html.py index d3e592cc98..508257cf7c 100644 --- a/make_html.py +++ b/make_html.py @@ -250,33 +250,38 @@ def image_development_publisher(image): parser.add_argument("--url", help="Link to connect dashboard to publishing stats", default="") + parser.add_argument("--live", action="store_true", + help="Run a development server") args = parser.parse_args() app.jinja_env.globals['pubstats_url'] = args.url - from flask_frozen import Freezer - app.config['FREEZER_DESTINATION'] = 'out' - app.config['FREEZER_REMOVE_EXTRA_FILES'] = False - app.debug = False # Comment to turn off debugging - app.testing = True # Comment to turn off debugging - freezer = Freezer(app) - - @freezer.register_generator - def url_generator(): - for page_name in basic_page_names: - yield 'basic_page', {'page_name': page_name} - for publisher in current_stats['inverted_publisher']['activities'].keys(): - yield 'publisher', {'publisher': publisher} - for slug in slugs['element']['by_slug']: - yield 'element', {'slug': slug} - for major_version, codelist_slugs in slugs['codelist'].items(): - for slug in codelist_slugs['by_slug']: - yield 'codelist', { - 'slug': slug, - 'major_version': major_version - } - for license in licenses.licenses: - if license is None: - license = 'None' - yield 'licenses_individual_license', {'license': license} - + if args.live: + app.debug = True + app.run() + else: + from flask_frozen import Freezer + app.config['FREEZER_DESTINATION'] = 'out' + app.config['FREEZER_REMOVE_EXTRA_FILES'] = False + app.debug = False # Comment to turn off debugging + app.testing = True # Comment to turn off debugging + freezer = Freezer(app) + + @freezer.register_generator + def url_generator(): + for page_name in basic_page_names: + yield 'basic_page', {'page_name': page_name} + for publisher in current_stats['inverted_publisher']['activities'].keys(): + yield 'publisher', {'publisher': publisher} + for slug in slugs['element']['by_slug']: + yield 'element', {'slug': slug} + for major_version, codelist_slugs in slugs['codelist'].items(): + for slug in codelist_slugs['by_slug']: + yield 'codelist', { + 'slug': slug, + 'major_version': major_version + } + for license in licenses.licenses: + if license is None: + license = 'None' + yield 'licenses_individual_license', {'license': license} freezer.freeze() From 90d79f7254ef8b9546dbde7710204cb57b909fa8 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 23 Mar 2021 21:16:02 +0000 Subject: [PATCH 007/375] Add a nicer gitignore --- .gitignore | 143 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 139 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 3554e82638..cc8af738d9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,142 @@ -*.pyc -*.swp - -pyenv stats-calculated* data out + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ From 4e62bb268d4747fe266adebb7ac1deddf9c8b629 Mon Sep 17 00:00:00 2001 From: Ocre42 Date: Tue, 17 Sep 2019 11:26:37 +0100 Subject: [PATCH 008/375] Updating requirements --- requirements.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 93eda8ebe6..267388ac00 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,9 +5,11 @@ Frozen-Flask==0.13 unicodecsv==0.14.1 lxml==3.7.2 pytz==2016.10 + +# These are dependencies of the above, but are specificied explicitly in order to pin versions Flask==0.12.3 MarkupSafe==0.23 -Werkzeug==0.11.15 +Werkzeug==0.12.2 argparse==1.4.0 itsdangerous==0.24 mock==2.0.0 @@ -16,4 +18,3 @@ numpy==1.12.0 pyparsing==2.1.10 python-dateutil==2.6.0 six==1.10.0 -wsgiref==0.1.2 From d1c449c89339103a92392bb342a399947afcd99e Mon Sep 17 00:00:00 2001 From: Ocre42 Date: Tue, 17 Sep 2019 12:06:47 +0100 Subject: [PATCH 009/375] Updating and linting data.py --- data.py | 126 +++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 80 insertions(+), 46 deletions(-) diff --git a/data.py b/data.py index 55c4e32d1a..eddbca599a 100644 --- a/data.py +++ b/data.py @@ -1,8 +1,9 @@ +from collections import OrderedDict, MutableMapping import json -from collections import OrderedDict, defaultdict -import sys, os, re, copy, datetime, unicodecsv -import UserDict +import os +import re import csv +from decimal import Decimal publisher_re = re.compile('(.*)\-[^\-]') @@ -23,27 +24,30 @@ def wrapper(self, key): return wrapper -class GroupFiles(object, UserDict.DictMixin): +class GroupFiles(MutableMapping): def __init__(self, inputdict): self.inputdict = inputdict self.cache = {} + def __getitem__(self, key): - if key in self.cache: return self.cache[key] + if key in self.cache: + return self.cache[key] self.inputdict[key] out = OrderedDict() - for k2,v2 in self.inputdict[key].items(): + for k2, v2 in self.inputdict[key].items(): if type(v2) == OrderedDict: out[k2] = OrderedDict() for listitem, v3 in v2.items(): m = publisher_re.match(listitem) if m: publisher = m.group(1) - if not publisher in out[k2]: + if publisher not in out[k2]: out[k2][publisher] = OrderedDict() out[k2][publisher][listitem] = v3 else: - pass # FIXME + pass + # FIXME else: out[k2] = v2 @@ -51,7 +55,26 @@ def __getitem__(self, key): return out -class JSONDir(object, UserDict.DictMixin): + def __len__(self): + return len(self.inputdict) + + + def __iter__(self): + return iter(self.inputdict) + + + def __delitem__(self, key): + try: + del self.inputdict[key] + except KeyError: + pass + + + def __setitem__(self, key, value): + super(GroupFiles, self).__setitem__(key, value) + + +class JSONDir(MutableMapping): """Produces an object, to be used to access JSON-formatted publisher data and return this as an ordered dictionary (with nested dictionaries, if appropriate). Use of this class removes the need to load large amounts of data into memory. @@ -63,6 +86,22 @@ def __init__(self, folder): """ self.folder = folder + + def __len__(self): + return len(self.keys()) + + + def __delitem__(self, key): + try: + del self.folder[key] + except KeyError: + pass + + + def __setitem__(self, key, value): + super(JSONDir, self).__setitem__(key, value) + + @memoize def __getitem__(self, key): """Define how variables are gathered from the raw JSON files and then parsed into @@ -75,9 +114,9 @@ def __getitem__(self, key): if os.path.exists(os.path.join(self.folder, key)): # The data being sought is a directory data = JSONDir(os.path.join(self.folder, key)) - elif os.path.exists(os.path.join(self.folder, key+'.json')): + elif os.path.exists(os.path.join(self.folder, key + '.json')): # The data being sought is a json file - with open(os.path.join(self.folder, key+'.json')) as fp: + with open(os.path.join(self.folder, key + '.json')) as fp: data = json.load(fp, object_pairs_hook=OrderedDict) # Deal with publishers who had an old registry ID @@ -87,7 +126,7 @@ def __getitem__(self, key): # Look over the set of changed registry IDs for previous_id, current_id in get_registry_id_matches().items(): folder = self.folder - previous_path = os.path.join(folder.replace(current_id,previous_id), key+'.json') + previous_path = os.path.join(folder.replace(current_id, previous_id), key + '.json') # If this publisher has had an old ID and there is data for it if (current_id == self.get_publisher_name()) and os.path.exists(previous_path): # Get the corresponding value for the old publisher ID, and merge with the existing value for this publisher @@ -97,7 +136,7 @@ def __getitem__(self, key): # FIXME i) Should deep_merge attempt to sort this ordereddict ii) Should there be an attempt to aggregate/average conflicting values? else: # No value found as either a folder or json file - raise KeyError, key + raise KeyError(key) return data @@ -105,7 +144,8 @@ def keys(self): """Method to return a list of keys that are contained within the data folder that is being accessed within this instance. """ - return [ x[:-5] if x.endswith('.json') else x for x in os.listdir(self.folder) ] + return [x[:-5] if x.endswith('.json') else x for x in os.listdir(self.folder) ] + def __iter__(self): """Custom iterable, to iterate over the keys that are contained within the data @@ -131,7 +171,6 @@ def get_publisher_name(self): return None - def get_publisher_stats(publisher, stats_type='aggregated'): """Function to obtain current data for a given publisher. Returns: A JSONDir object for the publisher, or an empty dictionary if the publisher @@ -184,7 +223,7 @@ def deep_merge(obj1, obj2): # If it's a dictionary we need to go deeper, by running this function recursively else: if key in obj2: - deep_merge(obj1[key],obj2[key]) + deep_merge(obj1[key], obj2[key]) current_stats = { @@ -203,59 +242,54 @@ def deep_merge(obj1, obj2): if line != '.\n': current_stats['download_errors'].append(line.strip('\n').split(' ', 3)) + def transform_codelist_mapping_keys(codelist_mapping): # Perform the same transformation as https://github.com/IATI/IATI-Stats/blob/d622f8e88af4d33b1161f906ec1b53c63f2f0936/stats.py#L12 - codelist_mapping = {k:v for k,v in codelist_mapping.items() if not k.startswith('//iati-organisation') } - codelist_mapping = {re.sub('^\/\/iati-activity', './', k):v for k,v in codelist_mapping.items() } - codelist_mapping = {re.sub('^\/\/', './/', k):v for k,v, in codelist_mapping.items() } + codelist_mapping = {k: v for k, v in codelist_mapping.items() if not k.startswith('//iati-organisation')} + codelist_mapping = {re.sub('^\/\/iati-activity', './', k): v for k, v in codelist_mapping.items()} + codelist_mapping = {re.sub('^\/\/', './/', k): v for k, v, in codelist_mapping.items()} return codelist_mapping + def create_codelist_mapping(major_version): - codelist_mapping = {x['path']:x['codelist'] for x in json.load(open('data/IATI-Codelists-{}/out/clv2/mapping.json'.format(major_version)))} + codelist_mapping = {x['path']: x['codelist'] for x in json.load(open('data/IATI-Codelists-{}/out/clv2/mapping.json'.format(major_version)))} return transform_codelist_mapping_keys(codelist_mapping) MAJOR_VERSIONS = ['2', '1'] -codelist_mapping = { v:create_codelist_mapping(v) for v in MAJOR_VERSIONS } +codelist_mapping = {v: create_codelist_mapping(v) for v in MAJOR_VERSIONS} codelist_conditions = { - major_version: transform_codelist_mapping_keys({ x['path']:x.get('condition') for x in json.load(open('data/IATI-Codelists-{}/out/clv2/mapping.json'.format(major_version)))}) - for major_version in MAJOR_VERSIONS } + major_version: transform_codelist_mapping_keys({x['path']: x.get('condition') for x in json.load(open('data/IATI-Codelists-{}/out/clv2/mapping.json'.format(major_version)))}) + for major_version in MAJOR_VERSIONS} # Create a big dictionary of all codelist values by version and codelist name codelist_sets = { major_version: { - cname:set(c['code'] for c in codelist['data']) for cname, codelist in JSONDir('data/IATI-Codelists-{}/out/clv2/json/en/'.format(major_version)).items() - } for major_version in MAJOR_VERSIONS } + cname: set(c['code'] for c in codelist['data']) for cname, codelist in JSONDir('data/IATI-Codelists-{}/out/clv2/json/en/'.format(major_version)).items() + } for major_version in MAJOR_VERSIONS} #Simple look up to map publisher id to a publishers given name (title) -publisher_name={publisher:publisher_json['result']['title'] for publisher,publisher_json in ckan_publishers.items()} +publisher_name = {publisher: publisher_json['result']['title'] for publisher, publisher_json in ckan_publishers.items()} #Create a list of tuples ordered by publisher given name titles - this allows us to display lists of publishers in alphabetical order -publishers_ordered_by_title = [ - (publisher_name[publisher], publisher) - for publisher in current_stats['inverted_publisher']['activities'] - if publisher in publisher_name] -publishers_ordered_by_title.sort(key=lambda x: unicode.lower(x[0])) +publishers_ordered_by_title = [(publisher_name[publisher], publisher) for publisher in current_stats['inverted_publisher']['activities']] +publishers_ordered_by_title.sort(key=lambda x: (x[0]).lower()) # List of publishers who report all their activities as a secondary publisher secondary_publishers = [publisher for publisher, stats in JSONDir('./stats-calculated/current/aggregated-publisher').items() - if int(stats['activities']) == len(stats['activities_secondary_reported']) - and int(stats['activities']) > 0] + if int(stats['activities']) == len(stats['activities_secondary_reported']) and + int(stats['activities']) > 0] -import csv -from decimal import Decimal try: - dac2012 = {x[0]:Decimal(x[1].replace(',','')) for x in csv.reader(open('data/dac2012.csv'))} + dac2012 = {x[0]: Decimal(x[1].replace(',', '')) for x in csv.reader(open('data/dac2012.csv'))} except IOError: dac2012 = {} - - def make_slugs(keys): - out = {'by_slug':{}, 'by_i':{}} - for i,key in enumerate(keys): - slug = re.sub('[^a-zA-Z0-9:@\-_]', '', re.sub('{[^}]*}', '', key.replace('{http://www.w3.org/XML/1998/namespace}','xml:').replace('/','_'))).strip('_') + out = {'by_slug': {}, 'by_i': {}} + for i, key in enumerate(keys): + slug = re.sub('[^a-zA-Z0-9:@\-_]', '', re.sub('{[^}]*}', '', key.replace('{http://www.w3.org/XML/1998/namespace}','xml:').replace('/', '_'))).strip('_') while slug in out['by_slug']: slug += '_' out['by_slug'][slug] = i @@ -263,10 +297,10 @@ def make_slugs(keys): return out slugs = { - 'codelist': { major_version:( - make_slugs(current_stats['inverted_publisher']['codelist_values_by_major_version'][major_version].keys()) - if major_version in current_stats['inverted_publisher']['codelist_values_by_major_version'] - else make_slugs([]) - ) for major_version in MAJOR_VERSIONS }, + 'codelist': {major_version: ( + make_slugs(current_stats['inverted_publisher']['codelist_values_by_major_version'][major_version].keys()) + if major_version in current_stats['inverted_publisher']['codelist_values_by_major_version'] + else make_slugs([]) + ) for major_version in MAJOR_VERSIONS}, 'element': make_slugs(current_stats['inverted_publisher']['elements'].keys()) } From 9c9a33b45f87b6b65d9ae0c06aebcee66e275b91 Mon Sep 17 00:00:00 2001 From: Ocre42 Date: Tue, 17 Sep 2019 15:45:45 +0100 Subject: [PATCH 010/375] Upgrading and linting plots.py --- plots.py | 92 +++++++++++++++++++++++++++----------------------------- 1 file changed, 45 insertions(+), 47 deletions(-) diff --git a/plots.py b/plots.py index 6fcf8b7175..b8d09958c2 100644 --- a/plots.py +++ b/plots.py @@ -19,82 +19,81 @@ import matplotlib.pyplot as plt import matplotlib.dates as mdates from collections import defaultdict - import os -import unicodecsv +import csv import common import data +from vars import expected_versions +mpl.use('Agg') # Import failed_downloads as a global -failed_downloads = unicodecsv.reader(open('data/downloads/history.csv')) +failed_downloads = csv.reader(open('data/downloads/history.csv')) gitaggregate_publisher = data.JSONDir('./stats-calculated/gitaggregate-publisher-dated') class AugmentedJSONDir(data.JSONDir): def __getitem__(self, key): if key == 'failed_downloads': - return dict((row[0],row[1]) for row in failed_downloads) + return dict((row[0], row[1]) for row in failed_downloads) elif key == 'publisher_types': out = defaultdict(lambda: defaultdict(int)) - for publisher, publisher_data in gitaggregate_publisher.iteritems(): + for publisher, publisher_data in gitaggregate_publisher.items(): if publisher in data.ckan_publishers: organization_type = common.get_publisher_type(publisher)['name'] - for datestring,count in publisher_data['activities'].iteritems(): + for datestring, count in publisher_data['activities'].items(): out[datestring][organization_type] += 1 else: print('Publisher not matched:', publisher) return out elif key == 'activities_per_publisher_type': out = defaultdict(lambda: defaultdict(int)) - for publisher, publisher_data in gitaggregate_publisher.iteritems(): + for publisher, publisher_data in gitaggregate_publisher.items(): if publisher in data.ckan_publishers: organization_type = common.get_publisher_type(publisher)['name'] - for datestring,count in publisher_data['activities'].iteritems(): + for datestring, count in publisher_data['activities'].items(): out[datestring][organization_type] += count else: print('Publisher not matched:', publisher) return out - else: + else: return super(AugmentedJSONDir, self).__getitem__(key) -from vars import expected_versions - def make_plot(stat_path, git_stats, img_prefix=''): if type(stat_path) == tuple: stat_name = stat_path[0] else: stat_name = stat_path - + stat_dict = git_stats.get(stat_name) if not stat_dict: return items = sorted(stat_dict.items()) - x_values = [ datetime.date(int(x[0:4]), int(x[5:7]), int(x[8:10])).toordinal() for x,y in items ] + x_values = [datetime.date(int(x[0:4]), int(x[5:7]), int(x[8:10])).toordinal() for x, y in items] if type(stat_path) == tuple: - y_values = [ dict((k,v) for k,v in y.iteritems() if stat_path[1](k)) for x,y in items ] + y_values = [dict((k, v) for k, v in y.items() if stat_path[1](k)) for x, y in items] else: - y_values = [ y for x,y in items ] + y_values = [y for x, y in items] - #years = mdates.YearLocator() # every year - #months = mdates.MonthLocator() # every month - dateFmt = mdates.DateFormatter('%Y-%m-%d') + # years = mdates.YearLocator() # every year + # months = mdates.MonthLocator() # every month + datefmt = mdates.DateFormatter('%Y-%m-%d') fig, ax = plt.subplots() - ax.set_color_cycle(['b', 'g', 'r', 'c', 'm', 'y', 'k', '#00ff00', '#fc5ab8', '#af31f2']) + ax.set_prop_cycle('color', ['b', 'g', 'r', 'c', 'm', 'y', 'k', '#00ff00', '#fc5ab8', '#af31f2']) fig_legend = plt.figure() dpi = 96 - fig.set_size_inches(600.0/dpi, 600.0/dpi) + fig.set_size_inches(600.0 / dpi, 600.0 / dpi) if type(y_values[0]) == dict: - keys = set([ tm for y in y_values for tm in y.keys() ]) + keys = set([tm for y in y_values for tm in y.keys()]) plots = {} for key in keys: - plots[key], = ax.plot(x_values, [ y.get(key) or 0 for y in y_values ]) + plots[key], = ax.plot(x_values, [y.get(key) or 0 for y in y_values]) if stat_name in ['publisher_types', 'activities_per_publisher_type']: # Sort by the most recent value for the key sorted_items = sorted(plots.items(), key=lambda x: y_values[-1][x[0]], reverse=True) fig_legend.legend([x[1] for x in sorted_items], [x[0] for x in sorted_items], 'center', ncol=1) - fig_legend.set_size_inches(600.0/dpi, 300.0/dpi) + fig_legend.set_size_inches(600.0 / dpi, 300.0 / dpi) else: fig_legend.legend(plots.values(), plots.keys(), 'center', ncol=4) fig_legend.set_size_inches(600.0/dpi, 100.0/dpi) @@ -103,19 +102,18 @@ def make_plot(stat_path, git_stats, img_prefix=''): keys = None ax.plot(x_values, y_values) - # format the ticks - #ax.xaxis.set_major_locator(years) - ax.xaxis.set_major_formatter(dateFmt) - #ax.xaxis.set_minor_locator(months) + # ax.xaxis.set_major_locator(years) + ax.xaxis.set_major_formatter(datefmt) + # ax.xaxis.set_minor_locator(months) - #datemin = datetime.date(r.date.min().year, 1, 1) - #datemax = datetime.date(r.date.max().year+1, 1, 1) - #ax.set_xlim(datemin, datemax) + # datemin = datetime.date(r.date.min().year, 1, 1) + # datemax = datetime.date(r.date.max().year+1, 1, 1) + # ax.set_xlim(datemin, datemax) # format the coords message box - #def price(x): return '$%1.2f'%x - #ax.format_ydata = price + # def price(x): return '$%1.2f'%x + # ax.format_ydata = price ax.xaxis_date() ax.format_xdata = mdates.DateFormatter('%Y-%m-%d') ax.grid(True) @@ -124,22 +122,22 @@ def make_plot(stat_path, git_stats, img_prefix=''): # axes up to make room for them fig.autofmt_xdate() - fig.savefig('out/{0}{1}{2}.png'.format(img_prefix,stat_name,stat_path[2] if type(stat_path) == tuple else ''), dpi=dpi) + fig.savefig('out/{0}{1}{2}.png'.format(img_prefix, stat_name, stat_path[2] if type(stat_path) == tuple else ''), dpi=dpi) plt.close('all') - fn = 'out/{0}{1}.csv'.format(img_prefix,stat_name) + fn = 'out/{0}{1}.csv'.format(img_prefix, stat_name) with open(fn, 'w') as fp: - writer = unicodecsv.writer(fp) + writer = csv.writer(fp) if keys: sorted_keys = sorted(list(keys)) writer.writerow(['date'] + sorted_keys) else: writer.writerow(['date', 'value']) - for k,v in items: + for k, v in items: if keys: - writer.writerow([k] + [ v.get(key) for key in sorted_keys ]) + writer.writerow([k] + [v.get(key) for key in sorted_keys]) else: - writer.writerow([k,v]) + writer.writerow([k, v]) del writer @@ -157,17 +155,17 @@ def make_plot(stat_path, git_stats, img_prefix=''): 'invalidxml', 'nonstandardroots', 'unique_identifiers', - ('validation', lambda x: x=='fail', ''), - ('publishers_validation', lambda x: x=='fail', ''), - ('publisher_has_org_file', lambda x: x=='no', ''), + ('validation', lambda x: x == 'fail', ''), + ('publishers_validation', lambda x: x == 'fail', ''), + ('publisher_has_org_file', lambda x: x == 'no', ''), ('versions', lambda x: x in expected_versions, '_expected'), ('versions', lambda x: x not in expected_versions, '_other'), ('publishers_per_version', lambda x: x in expected_versions, '_expected'), ('publishers_per_version', lambda x: x not in expected_versions, '_other'), ('file_size_bins', lambda x: True, ''), - ('publisher_types', lambda x: True, '' ), - ('activities_per_publisher_type', lambda x: True, '' ) - ]: + ('publisher_types', lambda x: True, ''), + ('activities_per_publisher_type', lambda x: True, '') +]: make_plot(stat_path, git_stats) # Delete git_stats variable to save memory @@ -179,7 +177,7 @@ def make_plot(stat_path, git_stats, img_prefix=''): pass git_stats_publishers = AugmentedJSONDir('./stats-calculated/gitaggregate-publisher-dated/') -for publisher, git_stats_publisher in git_stats_publishers.iteritems(): +for publisher, git_stats_publisher in git_stats_publishers.items(): for stat_path in [ 'activities', 'activity_files', @@ -188,7 +186,7 @@ def make_plot(stat_path, git_stats, img_prefix=''): 'invalidxml', 'nonstandardroots', 'publisher_unique_identifiers', - ('validation', lambda x: x=='fail', ''), + ('validation', lambda x: x == 'fail', ''), ('versions', lambda x: True, ''), - ]: + ]: make_plot(stat_path, git_stats_publisher, 'publisher_imgs/{0}_'.format(publisher)) From 6242fe0cd2f53a98db83576c041420c983210cc2 Mon Sep 17 00:00:00 2001 From: Ocre42 Date: Tue, 17 Sep 2019 16:23:31 +0100 Subject: [PATCH 011/375] Upgrading and linting make_csv.py --- data.py | 1 + make_csv.py | 24 +++++++++++------------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/data.py b/data.py index eddbca599a..ea9b99178f 100644 --- a/data.py +++ b/data.py @@ -140,6 +140,7 @@ def __getitem__(self, key): return data + def keys(self): """Method to return a list of keys that are contained within the data folder that is being accessed within this instance. diff --git a/make_csv.py b/make_csv.py index b145dcf304..3fa8b52200 100644 --- a/make_csv.py +++ b/make_csv.py @@ -1,12 +1,12 @@ # Script to generate CSV files from data in the 'stats-calculated' folder, # and extra logic in other files in this repository - -import unicodecsv +import csv import os import data from collections import OrderedDict -publisher_name={publisher:publisher_json['result']['title'] for publisher,publisher_json in data.ckan_publishers.items()} +publisher_name = {publisher: publisher_json['result']['title'] for publisher, publisher_json in data.ckan_publishers.items()} + def publisher_dicts(): for publisher, activities in data.current_stats['inverted_publisher']['activities'].items(): @@ -18,7 +18,7 @@ def publisher_dicts(): 'Publisher Registry Id': publisher, 'Activities': activities, 'Organisations': publisher_stats['organisations'], - 'Files': publisher_stats['activity_files']+publisher_stats['organisation_files'], + 'Files': publisher_stats['activity_files'] + publisher_stats['organisation_files'], 'Activity Files': publisher_stats['activity_files'], 'Organisation Files': publisher_stats['organisation_files'], 'Total File Size': publisher_stats['file_size'], @@ -30,7 +30,7 @@ def publisher_dicts(): } with open(os.path.join('out', 'publishers.csv'), 'w') as fp: - writer = unicodecsv.DictWriter(fp, [ + writer = csv.DictWriter(fp, [ 'Publisher Name', 'Publisher Registry Id', 'Activities', @@ -44,24 +44,22 @@ def publisher_dicts(): 'Reporting Orgs in Data', 'Hierarchies (count)', 'Hierarchies', - ]) + ]) writer.writeheader() for d in publisher_dicts(): writer.writerow(d) - - -publishers = data.current_stats['inverted_publisher']['activities'].keys() +publishers = list(data.current_stats['inverted_publisher']['activities'].keys()) with open(os.path.join('out', 'elements.csv'), 'w') as fp: - writer = unicodecsv.DictWriter(fp, [ 'Element' ] + publishers ) + writer = csv.DictWriter(fp, ['Element'] + publishers) writer.writeheader() for element, publisher_dict in data.current_stats['inverted_publisher']['elements'].items(): publisher_dict['Element'] = element writer.writerow(publisher_dict) with open(os.path.join('out', 'elements_total.csv'), 'w') as fp: - writer = unicodecsv.DictWriter(fp, [ 'Element' ] + publishers ) + writer = csv.DictWriter(fp, ['Element'] + publishers) writer.writeheader() for element, publisher_dict in data.current_stats['inverted_publisher']['elements_total'].items(): publisher_dict['Element'] = element @@ -69,7 +67,7 @@ def publisher_dicts(): with open(os.path.join('out', 'registry.csv'), 'w') as fp: keys = ['name', 'title', 'publisher_frequency', 'publisher_frequency_select', 'publisher_implementation_schedule', 'publisher_ui', 'publisher_field_exclusions', 'publisher_contact', 'image_url', 'display_name', 'publisher_iati_id', 'publisher_units', 'publisher_record_exclusions', 'publisher_data_quality', 'publisher_country', 'publisher_description', 'publisher_refs', 'publisher_thresholds' 'publisher_agencies', 'publisher_constraints', 'publisher_organization_type', 'publisher_segmentation', 'license_id', 'state', 'publisher_timeliness'] - writer = unicodecsv.DictWriter(fp, keys) + writer = csv.DictWriter(fp, keys) writer.writeheader() for publisher_json in data.ckan_publishers.values(): - writer.writerow({x:publisher_json['result'].get(x) or 0 for x in keys}) + writer.writerow({x: publisher_json['result'].get(x) or 0 for x in keys}) From cd08d37092bed1c889bd58268beffc8fda61225f Mon Sep 17 00:00:00 2001 From: Ocre42 Date: Wed, 18 Sep 2019 11:01:54 +0100 Subject: [PATCH 012/375] Upgrade and lint speakers_kit.py --- speakers_kit.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/speakers_kit.py b/speakers_kit.py index f126bbad94..cae6567102 100644 --- a/speakers_kit.py +++ b/speakers_kit.py @@ -1,13 +1,15 @@ from __future__ import print_function import json import data -import unicodecsv +import csv from collections import defaultdict -from itertools import izip_longest +from itertools import zip_longest + def codelist_dict(codelist_path): codelist_json = json.load(open(codelist_path)) - return {c['code']:c['name'] for c in codelist_json['data']} + return {c['code']: c['name'] for c in codelist_json['data']} + organisation_type_dict = codelist_dict('data/IATI-Codelists-2/out/clv2/json/en/OrganisationType.json') country_dict = codelist_dict('data/IATI-Codelists-2/out/clv2/json/en/Country.json') @@ -21,7 +23,7 @@ def codelist_dict(codelist_path): for publisher, publisher_data in aggregated_publisher.items(): if publisher in data.ckan_publishers: organization_type = data.ckan_publishers[publisher]['result']['publisher_organization_type'] - #activities_by['type'][organisation_type_dict[organization_type]] += publisher_data['activities'] + # activities_by['type'][organisation_type_dict[organization_type]] += publisher_data['activities'] publishers_by['type'][organisation_type_dict[organization_type]] += 1 publisher_country_code = data.ckan_publishers[publisher]['result']['publisher_country'] @@ -47,20 +49,20 @@ def codelist_dict(codelist_path): publishers_quarterly = [] publishers_by_date = json.load(open('./stats-calculated/gitaggregate-dated/publishers.json')) for date, publishers in sorted(publishers_by_date.items()): - if (date[8:10] == '30' and date[5:7] in ['06','09']) or (date[8:10] == '31' and date[5:7] in ['03','12']): + if (date[8:10] == '30' and date[5:7] in ['06', '09']) or (date[8:10] == '31' and date[5:7] in ['03', '12']): publishers_quarterly.append((date, publishers)) with open('out/speakers_kit.csv', 'w') as fp: - writer = unicodecsv.DictWriter(fp, fieldnames) + writer = csv.DictWriter(fp, fieldnames) writer.writeheader() sort_second = lambda x: sorted(x, key=lambda y: y[1], reverse=True) - for publishers_by_type, publishers_by_country, publishers_quarterly_, activities_by_country, activities_by_region in izip_longest( + for publishers_by_type, publishers_by_country, publishers_quarterly_, activities_by_country, activities_by_region in zip_longest( sort_second(publishers_by['type'].items()), sort_second(publishers_by['country'].items()), publishers_quarterly, sort_second(activities_by['country'].items()), sort_second(activities_by['region'].items()), - ): + ): writer.writerow({ 'publisher_type': publishers_by_type[0] if publishers_by_type else '', 'publishers_by_type': publishers_by_type[1] if publishers_by_type else '', From e6e6c8bd84a1d28ce3b3c5197d73aaccac146b10 Mon Sep 17 00:00:00 2001 From: Ocre42 Date: Fri, 20 Sep 2019 14:21:55 +0100 Subject: [PATCH 013/375] Upgrading make_html and licenses.py --- data.py | 4 ++ licenses.py | 49 +++++++++-------- make_html.py | 116 ++++++++++++++++++++++----------------- templates/publisher.html | 6 +- 4 files changed, 98 insertions(+), 77 deletions(-) diff --git a/data.py b/data.py index ea9b99178f..29d8369d3a 100644 --- a/data.py +++ b/data.py @@ -98,6 +98,10 @@ def __delitem__(self, key): pass + def __repr__(self): + return '{}, JSONDIR({})'.format(super(JSONDir, self).__repr__(), self.__dict__) + + def __setitem__(self, key, value): super(JSONDir, self).__setitem__(key, value) diff --git a/licenses.py b/licenses.py index 949e950753..eb35bdffa7 100644 --- a/licenses.py +++ b/licenses.py @@ -1,3 +1,8 @@ +import json +from collections import OrderedDict +from flask import render_template + + license_names = { 'notspecified': 'Other::License Not Specified', 'odc-pddl': 'OKD Compliant::Open Data Commons Public Domain Dedication and Licence (PDDL)', @@ -79,14 +84,11 @@ 'zpl': 'OSI Approved::Zope Public License', 'zlib-license': 'OSI Approved::zlib/libpng license'} -import json -from collections import OrderedDict -from flask import render_template - with open('./stats-calculated/ckan.json') as handler: ckan = json.load(handler, object_pairs_hook=OrderedDict) -licenses = [ package.get('license_id') for _, publisher in ckan.items() for _, package in publisher.items() ] +licenses = [package.get('license_id') for _, publisher in ckan.items() for _, package in publisher.items()] + def licenses_for_publisher(publisher_name): # Check publisher is in the compiled list of CKAN data @@ -95,29 +97,30 @@ def licenses_for_publisher(publisher_name): return set() # Return unique licenses used - return set([ package.get('license_id') for package in ckan[publisher_name].values() ]) + return set([package.get('license_id') for package in ckan[publisher_name].values()]) -def main(): - licenses_and_publisher = set([ (package.get('license_id'), publisher_name) for publisher_name, publisher in ckan.items() for package_name, package in publisher.items() ]) - licenses_per_publisher = [ license for license, publisher in licenses_and_publisher ] +def main(): + licenses_and_publisher = set([(package.get('license_id') if package.get('license_id') else 'notspecified', publisher_name) for publisher_name, publisher in ckan.items() for package_name, package in publisher.items()]) + licenses_per_publisher = [license for license, publisher in licenses_and_publisher] return render_template('licenses.html', - license_names=license_names, - license_count = dict((x,licenses.count(x)) for x in set(licenses)), - publisher_license_count = dict((x,licenses_per_publisher.count(x)) for x in set(licenses_per_publisher)), - sorted=sorted, - page='licenses', - licenses=True) + license_names=license_names, + license_count=dict((x if x else 'notspecified', licenses.count(x)) for x in set(licenses)), + publisher_license_count=dict((x, licenses_per_publisher.count(x)) for x in set(licenses_per_publisher)), + sorted=sorted, + page='licenses', + licenses=True) + def individual_license(license): if license == 'None': license = None - publishers = [ publisher_name for publisher_name, publisher in ckan.items() for _, package in publisher.items() if package.get('license_id') == license ] - publisher_counts = [ (publisher, publishers.count(publisher)) for publisher in set(publishers) ] + publishers = [publisher_name for publisher_name, publisher in ckan.items() for _, package in publisher.items() if package.get('license_id') == license] + publisher_counts = [(publisher, publishers.count(publisher)) for publisher in set(publishers) ] return render_template('license.html', - url=lambda x: '../'+x, - license=license, - license_names=license_names, - publisher_counts=publisher_counts, - page='licenses', - licenses=True) + url=lambda x: '../' + x, + license=license, + license_names=license_names, + publisher_counts=publisher_counts, + page='licenses', + licenses=True) diff --git a/make_html.py b/make_html.py index 508257cf7c..9286663f26 100644 --- a/make_html.py +++ b/make_html.py @@ -24,17 +24,19 @@ def dictinvert(d): inv = defaultdict(list) - for k, v in d.iteritems(): + for k, v in d.items(): inv[v].append(k) return inv + def nested_dictinvert(d): inv = defaultdict(lambda: defaultdict(int)) - for k, v in d.iteritems(): - for k2, v2 in v.iteritems(): + for k, v in d.items(): + for k2, v2 in v.items(): inv[k2][k] += v2 return inv + def dataset_to_publisher(publisher_slug): """ Converts a dataset (package) slug e.g. dfid-bd to the corresponding publisher slug e.g. dfid """ @@ -46,6 +48,7 @@ def firstint(s): m = re.search('\d+', s[0]) return int(m.group(0)) + def xpath_to_url(path): path = path.strip('./') if path.startswith('iati-activity'): @@ -55,11 +58,14 @@ def xpath_to_url(path): else: return 'http://iatistandard.org/activity-standard/iati-activities/iati-activity/'+path.split('@')[0] + def registration_agency(orgid): for code in codelist_sets['2']['OrganisationRegistrationAgency']: if orgid.startswith(code): return code + + def get_codelist_values(codelist_values_for_element): """Return a list of unique values present within a one-level nested dictionary. Envisaged usage is to gather the codelist values used by each publisher, as in @@ -67,7 +73,7 @@ def get_codelist_values(codelist_values_for_element): Input: Set of codelist values for a given element (listed by publisher), for example: current_stats['inverted_publisher']['codelist_values_by_major_version']['1']['.//@xml:lang'] """ - return list(set([y for x in codelist_values_for_element.items() for y in x[1].keys()])) + return list(set([y for x in codelist_values_for_element.items() for y in list(x[1])])) # Store data processing times date_time_data_str = max(gitdate.values()) @@ -138,6 +144,7 @@ def get_codelist_values(codelist_values_for_element): 'faq', ] + @app.route('/.html') def basic_page(page_name): if page_name in basic_page_names: @@ -147,70 +154,73 @@ def basic_page(page_name): else: abort(404) + @app.route('/data/download_errors.json') def download_errors_json(): - return Response(json.dumps(current_stats['download_errors'], indent=2), mimetype='application/json'), + return Response(json.dumps(current_stats['download_errors'], indent=2), mimetype='application/json'), app.add_url_rule('/', 'index_redirect', lambda: redirect('index.html')) app.add_url_rule('/licenses.html', 'licenses', licenses.main) app.add_url_rule('/license/.html', 'licenses_individual_license', licenses.individual_license) + @app.route('/publisher/.html') def publisher(publisher): publisher_stats = get_publisher_stats(publisher) budget_table = [{ - 'year': 'Total', - 'count_total': sum(sum(x.values()) for x in publisher_stats['count_budgets_by_type_by_year'].values()), - 'sum_total': {currency: sum(sums.values()) for by_currency in publisher_stats['sum_budgets_by_type_by_year'].values() for currency,sums in by_currency.items()}, - 'count_original': sum(publisher_stats['count_budgets_by_type_by_year']['1'].values()) if '1' in publisher_stats['count_budgets_by_type_by_year'] else None, - 'sum_original': {k: sum(v.values()) for k, v in publisher_stats['sum_budgets_by_type_by_year']['1'].items()} if '1' in publisher_stats['sum_budgets_by_type_by_year'] else None, - 'count_revised': sum(publisher_stats['count_budgets_by_type_by_year']['2'].values()) if '2' in publisher_stats['count_budgets_by_type_by_year'] else None, - 'sum_revised': {k: sum(v.values()) for k, v in publisher_stats['sum_budgets_by_type_by_year']['2'].items()} if '2' in publisher_stats['sum_budgets_by_type_by_year'] else None - }] + [ - { - 'year': year, - 'count_total': sum(x[year] for x in publisher_stats['count_budgets_by_type_by_year'].values() if year in x), - 'sum_total': {currency: sums.get(year) for by_currency in publisher_stats['sum_budgets_by_type_by_year'].values() for currency,sums in by_currency.items()}, - 'count_original': publisher_stats['count_budgets_by_type_by_year']['1'].get(year) if '1' in publisher_stats['count_budgets_by_type_by_year'] else None, - 'sum_original': {k: v.get(year) for k, v in publisher_stats['sum_budgets_by_type_by_year']['1'].items()} if '1' in publisher_stats['sum_budgets_by_type_by_year'] else None, - 'count_revised': publisher_stats['count_budgets_by_type_by_year']['2'].get(year) if '2' in publisher_stats['count_budgets_by_type_by_year'] else None, - 'sum_revised': {k: v.get(year) for k, v in publisher_stats['sum_budgets_by_type_by_year']['2'].items()} if '2' in publisher_stats['sum_budgets_by_type_by_year'] else None - } for year in sorted(set(sum((x.keys() for x in publisher_stats['count_budgets_by_type_by_year'].values()), []))) - ] + 'year': 'Total', + 'count_total': sum(sum(x.values()) for x in publisher_stats['count_budgets_by_type_by_year'].values()), + 'sum_total': {currency: sum(sums.values()) for by_currency in publisher_stats['sum_budgets_by_type_by_year'].values() for currency,sums in by_currency.items()}, + 'count_original': sum(publisher_stats['count_budgets_by_type_by_year']['1'].values()) if '1' in publisher_stats['count_budgets_by_type_by_year'] else None, + 'sum_original': {k: sum(v.values()) for k, v in publisher_stats['sum_budgets_by_type_by_year']['1'].items()} if '1' in publisher_stats['sum_budgets_by_type_by_year'] else None, + 'count_revised': sum(publisher_stats['count_budgets_by_type_by_year']['2'].values()) if '2' in publisher_stats['count_budgets_by_type_by_year'] else None, + 'sum_revised': {k: sum(v.values()) for k, v in publisher_stats['sum_budgets_by_type_by_year']['2'].items()} if '2' in publisher_stats['sum_budgets_by_type_by_year'] else None + }] + [{'year': year, + 'count_total': sum(x[year] for x in publisher_stats['count_budgets_by_type_by_year'].values() if year in x), + 'sum_total': {currency: sums.get(year) for by_currency in publisher_stats['sum_budgets_by_type_by_year'].values() for currency,sums in by_currency.items()}, + 'count_original': publisher_stats['count_budgets_by_type_by_year']['1'].get(year) if '1' in publisher_stats['count_budgets_by_type_by_year'] else None, + 'sum_original': {k: v.get(year) for k, v in publisher_stats['sum_budgets_by_type_by_year']['1'].items()} if '1' in publisher_stats['sum_budgets_by_type_by_year'] else None, + 'count_revised': publisher_stats['count_budgets_by_type_by_year']['2'].get(year) if '2' in publisher_stats['count_budgets_by_type_by_year'] else None, + 'sum_revised': {k: v.get(year) for k, v in publisher_stats['sum_budgets_by_type_by_year']['2'].items()} if '2' in publisher_stats['sum_budgets_by_type_by_year'] else None + } for year in sorted(set(sum((list(x.keys()) for x in publisher_stats['count_budgets_by_type_by_year'].values()), []))) + ] return render_template('publisher.html', - url=lambda x: '../' + x, - publisher=publisher, - publisher_stats=publisher_stats, - publisher_inverted=get_publisher_stats(publisher, 'inverted-file'), - publisher_licenses=licenses.licenses_for_publisher(publisher), - budget_table=budget_table - ) + url=lambda x: '../' + x, + publisher=publisher, + publisher_stats=publisher_stats, + publisher_inverted=get_publisher_stats(publisher, 'inverted-file'), + publisher_licenses=licenses.licenses_for_publisher(publisher), + budget_table=budget_table, + codelist_by_major_version=list(publisher_stats['codelist_values_by_major_version'].items()), + publisher_elements=list(publisher_stats['elements'].items()),) + @app.route('/codelist//.html') def codelist(major_version, slug): i = slugs['codelist'][major_version]['by_slug'][slug] - element = current_stats['inverted_publisher']['codelist_values_by_major_version'][major_version].keys()[i] - values = nested_dictinvert(current_stats['inverted_publisher']['codelist_values_by_major_version'][major_version].values()[i]) + element = list(current_stats['inverted_publisher']['codelist_values_by_major_version'][major_version])[i] + values = nested_dictinvert(list(current_stats['inverted_publisher']['codelist_values_by_major_version'][major_version].values())[i]) return render_template('codelist.html', - element=element, - values=values, - reverse_codelist_mapping={major_version: dictinvert(mapping) for major_version, mapping in codelist_mapping.items() }, - url=lambda x: '../../' + x, - major_version=major_version, - page='codelists') + element=element, + values=values, + reverse_codelist_mapping={major_version: dictinvert(mapping) for major_version, mapping in codelist_mapping.items() }, + url=lambda x: '../../' + x, + major_version=major_version, + page='codelists') + @app.route('/element/.html') def element(slug): i = slugs['element']['by_slug'][slug] - element = current_stats['inverted_publisher']['elements'].keys()[i] - publishers = current_stats['inverted_publisher']['elements'].values()[i] - file_grouped = current_stats['inverted_file_grouped']['elements'].values()[i] + element = list(current_stats['inverted_publisher']['elements'])[i] + publishers = list(current_stats['inverted_publisher']['elements'].values())[i] + file_grouped = list(current_stats['inverted_file_grouped']['elements'].values())[i] return render_template('element.html', - element=element, - publishers=publishers, - file_grouped=file_grouped, - url=lambda x: '../' + x, - page='elements') + element=element, + publishers=publishers, + file_grouped=file_grouped, + url=lambda x: '../' + x, + page='elements') @app.route('/registration_agencies.html') @@ -222,29 +232,33 @@ def registration_agencies(): reg_ag = registration_agency(orgid) if reg_ag: registration_agencies[reg_ag] += 1 - registration_agencies_publishers[reg_ag] += publishers.keys() + registration_agencies_publishers[reg_ag] += list(publishers) else: nonmatching.append((orgid, publishers)) return render_template('registration_agencies.html', - page='registration_agencies', - registration_agencies=registration_agencies, - registration_agencies_publishers=registration_agencies_publishers, - nonmatching=nonmatching) + page='registration_agencies', + registration_agencies=registration_agencies, + registration_agencies_publishers=registration_agencies_publishers, + nonmatching=nonmatching) + # Server an image through the development server (--live) @app.route('/.png') def image_development(image): return Response(open(os.path.join('out', image + '.png')).read(), mimetype='image/png') + @app.route('/.csv') def csv_development(name): return Response(open(os.path.join('out', name + '.csv')).read(), mimetype='text/csv') + @app.route('/publisher_imgs/.png') def image_development_publisher(image): print(image) return Response(open(os.path.join('out', 'publisher_imgs', image + '.png')).read(), mimetype='image/png') + if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--url", diff --git a/templates/publisher.html b/templates/publisher.html index ab605b7395..e7d5099b4c 100644 --- a/templates/publisher.html +++ b/templates/publisher.html @@ -319,7 +319,7 @@

Files

{% for major_version in MAJOR_VERSIONS %} - {% if major_version in publisher_stats.codelist_values_by_major_version %} + {% if major_version in codelist_by_major_version %}
@@ -336,7 +336,7 @@

Codelist Values (version {{major_version}}.xx)

- {% for element, values in publisher_stats.codelist_values_by_major_version[major_version].items() %} + {% for element, values in codelist_by_major_version[major_version] %} {% with element_i=current_stats.inverted_publisher.codelist_values_by_major_version[major_version].keys().index(element) %} @@ -385,7 +385,7 @@

Elements and Attributes Published

- {% for element, count in publisher_stats.elements.items() %} + {% for element, count in publisher_elements[] %} {% with element_i=current_stats.inverted_publisher.elements.keys().index(element) %} From 12e93c030b07e67969aabb41a8202c9be65dd722 Mon Sep 17 00:00:00 2001 From: Ocre42 Date: Wed, 2 Oct 2019 11:04:15 +0100 Subject: [PATCH 014/375] data.py copy object before modifying --- data.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/data.py b/data.py index 29d8369d3a..3283e880ee 100644 --- a/data.py +++ b/data.py @@ -158,6 +158,7 @@ def __iter__(self): """ return iter(self.keys()) + def get_publisher_name(self): """Find the name of the publisher that this data relates to. Note, this is a super hacky way to do this, prize available if a better way is found to do this! @@ -212,7 +213,7 @@ def deep_merge(obj1, obj2): """ # Iterate through keys - for key in obj1: + for key in obj1.copy(): # If this is value, we've hit the bottom, copy all of obj2 into obj1 if type(obj1[key]) is not OrderedDict: for key2 in obj2: @@ -260,6 +261,7 @@ def create_codelist_mapping(major_version): codelist_mapping = {x['path']: x['codelist'] for x in json.load(open('data/IATI-Codelists-{}/out/clv2/mapping.json'.format(major_version)))} return transform_codelist_mapping_keys(codelist_mapping) + MAJOR_VERSIONS = ['2', '1'] codelist_mapping = {v: create_codelist_mapping(v) for v in MAJOR_VERSIONS} From f290079427ff58412b18ce40c65a9ba6ada2b1e2 Mon Sep 17 00:00:00 2001 From: Ocre42 Date: Thu, 3 Oct 2019 10:56:03 +0100 Subject: [PATCH 015/375] Only add publishers within the ckan_publisher list --- data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data.py b/data.py index 3283e880ee..e3ddbfc97a 100644 --- a/data.py +++ b/data.py @@ -279,7 +279,7 @@ def create_codelist_mapping(major_version): #Simple look up to map publisher id to a publishers given name (title) publisher_name = {publisher: publisher_json['result']['title'] for publisher, publisher_json in ckan_publishers.items()} #Create a list of tuples ordered by publisher given name titles - this allows us to display lists of publishers in alphabetical order -publishers_ordered_by_title = [(publisher_name[publisher], publisher) for publisher in current_stats['inverted_publisher']['activities']] +publishers_ordered_by_title = [(publisher_name[publisher], publisher) for publisher in current_stats['inverted_publisher']['activities'] if publisher in publisher_name] publishers_ordered_by_title.sort(key=lambda x: (x[0]).lower()) # List of publishers who report all their activities as a secondary publisher From 33dce9d87d289f3a5e720af81089971f2241c770 Mon Sep 17 00:00:00 2001 From: Ocre42 Date: Fri, 4 Oct 2019 10:23:30 +0100 Subject: [PATCH 016/375] Clear data/ckan_publishers/ directory before downloading --- fetch_data.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fetch_data.sh b/fetch_data.sh index 83bd950103..9380496d03 100755 --- a/fetch_data.sh +++ b/fetch_data.sh @@ -5,6 +5,7 @@ mkdir -p data/downloads/ wget "https://gist.githubusercontent.com/codeforIATIbot/f117c9be138aa94c9762d57affc51a64/raw/errors" -O data/downloads/errors # Get CKAN (IATI Registry) data +rm -rf data/ckan_publishers/ python fetch_data.py # Generate a csv file with the number of download errors logged since 2013 @@ -45,7 +46,7 @@ if [ ! -d IATI-Codelists-2 ]; then git clone https://github.com/IATI/IATI-Codelists.git IATI-Codelists-2 fi cd IATI-Codelists-2 -echo "checking out Codelists-1" +echo "checking out Codelists-2" git checkout version-2.03 > /dev/null git pull > /dev/null echo "running gen.sh for Codelist-2" From 8999ac2a303d2200d4af2857b4802c255d4bb7cb Mon Sep 17 00:00:00 2001 From: Ocre42 Date: Wed, 9 Oct 2019 12:49:57 +0100 Subject: [PATCH 017/375] Remove timeliness.py --- timeliness.py | 198 -------------------------------------------------- 1 file changed, 198 deletions(-) delete mode 100644 timeliness.py diff --git a/timeliness.py b/timeliness.py deleted file mode 100644 index 4775bfeb3e..0000000000 --- a/timeliness.py +++ /dev/null @@ -1,198 +0,0 @@ -# This file converts raw timeliness data into the associated Dashboard assessments - -from __future__ import print_function -from data import JSONDir, publisher_name, get_publisher_stats, get_registry_id_matches -import datetime -from dateutil.relativedelta import relativedelta -from collections import defaultdict, Counter - - -def short_month(month_str): - """Return the 'short month' represeentation of a date which is inputted as a string, seperated with dashes - For example '01-03-2012' returns 'Mar' - """ - short_months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] - return short_months[int(month_str.split('-')[1]) - 1] - - -def parse_iso_date(d): - """Parse a string representation of a date into a datetime object - """ - try: - return datetime.date(int(d[:4]), int(d[5:7]), int(d[8:10])) - except (ValueError, TypeError): - return None - - -def previous_months_generator(d): - """Returns a generator object with the previous month for a given datetime object - """ - year = d.year - month = d.month - for i in range(0,12): - month -= 1 - if month <= 0: - year -= 1 - month = 12 - yield year,month - -# Store lists of previous months -previous_months = ['{}-{}'.format(year,str(month).zfill(2)) for year,month in previous_months_generator(datetime.date.today())] -previous_months_reversed=list(reversed(previous_months)) - -# Store the current month as a string -today = datetime.date.today() -this_month = '{}-{}'.format(today.year, str(today.month).zfill(2)) - -# Store a list of the past 12 months from today -previous_month_days = [today - relativedelta(months=x) for x in range(1, 13)] - -# Store the current month and year numbers -this_month_number = datetime.datetime.today().month -this_year = datetime.datetime.today().year - - -def publisher_frequency(): - """Generate the publisher frequency data - """ - - # Load all the data from 'gitaggregate-publisher-dated' into memory - gitaggregate_publisher = JSONDir('./stats-calculated/gitaggregate-publisher-dated') - - # Loop over each publisher - i.e. a publisher folder within 'gitaggregate-publisher-dated' - for publisher, agg in gitaggregate_publisher.items(): - - # Skip to the next publisher if there is no data for 'most_recent_transaction_date' for this publisher - if not 'most_recent_transaction_date' in agg: - continue - - # Skip if this publisher appears in the list of publishers who have since changed their Registry ID - if publisher in get_registry_id_matches().keys(): - continue - - updates_per_month = defaultdict(int) - previous_transaction_date = datetime.date(1,1,1) - - # Find the most recent transaction date and parse into a datetime object - for gitdate, transaction_date_str in sorted(agg['most_recent_transaction_date'].items()): - transaction_date = parse_iso_date(transaction_date_str) - - # If transaction date has increased - if transaction_date is not None and transaction_date > previous_transaction_date: - previous_transaction_date = transaction_date - updates_per_month[gitdate[:7]] += 1 - - # Find the first date that this publisher made data available, and parse into a datetime object - first_published_string = sorted(agg['most_recent_transaction_date'])[0] - first_published = parse_iso_date(first_published_string) - - # Implement the assessment logic on http://dashboard.iatistandard.org/timeliness.html#h_assesment - - if first_published >= previous_month_days[2]: - # This is a publisher of less than 3 months - #if True in [ x in updates_per_month for x in previous_months[:3] ]: - frequency = 'Annual' - elif first_published >= previous_month_days[5]: - # This is a publisher of less than 6 months - if all([ x in updates_per_month for x in previous_months[:3] ]): - frequency = 'Monthly' - else: - frequency = 'Annual' - elif first_published >= previous_month_days[11]: - # This is a publisher of less than 12 months - if [ x in updates_per_month for x in previous_months[:6] ].count(True) >= 4: - frequency = 'Monthly' - elif any([ x in updates_per_month for x in previous_months[:3] ]) and any([ x in updates_per_month for x in previous_months[3:6] ]): - frequency = 'Quarterly' - else: - frequency = 'Annual' - else: - # This is a publisher of 1 year or more - if ([ x in updates_per_month for x in previous_months[:12] ].count(True) >= 7) and ([ x in updates_per_month for x in previous_months[:2] ].count(True) >= 1): - # Data updated in 7 or more of past 12 full months AND data updated at least once in last 2 full months. - frequency = 'Monthly' - elif ([ x in updates_per_month for x in previous_months[:12] ].count(True) >= 3) and ([ x in updates_per_month for x in previous_months[:4] ].count(True) >= 1): - # Data updated in 3 or more of past 12 full months AND data updated at least once in last 4 full months. - frequency = 'Quarterly' - elif any([ x in updates_per_month for x in previous_months[:6] ]) and any([ x in updates_per_month for x in previous_months[6:12] ]): - # There has been an update in 2 of the last 6 month periods - frequency = 'Six-Monthly' - elif any([ x in updates_per_month for x in previous_months[:12] ]): - # There has been an update in 1 of the last 12 months - frequency = 'Annual' - else: - # There has been an update in none of the last 12 months - frequency = 'Less than Annual' - - # If the publisher is in the list of current publishers, return a generator object - if publisher in publisher_name: - yield publisher, publisher_name.get(publisher), updates_per_month, frequency - - -def frequency_index(frequency): - return ['Monthly', 'Quarterly', 'Six-Monthly', 'Annual', 'Less than Annual'].index(frequency) - -def publisher_frequency_sorted(): - return sorted(publisher_frequency(), key=lambda (publisher, publisher_title , _, frequency): ( - frequency_index(frequency), - publisher_title - )) - -def publisher_frequency_dict(): - publisher_data_list = sorted(publisher_frequency(), key=lambda publisher: publisher[0] ) - data = {} - for v in publisher_data_list: - data[v[0]] = v - return data - -def publisher_frequency_summary(): - return Counter(frequency for _,_,_,frequency in publisher_frequency()) - -def timelag_index(timelag): - return ['One month', 'A quarter', 'Six months', 'One year', 'More than one year'].index(timelag) - -def publisher_timelag_sorted(): - publisher_timelags = [ (publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag']) for publisher,agg in JSONDir('./stats-calculated/current/aggregated-publisher').items() ] - return sorted(publisher_timelags, key=lambda (publisher, publisher_title, _, timelag): ( - timelag_index(timelag), - publisher_title - )) - -def publisher_timelag_dict(): - publisher_timelags = [ (publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag']) for publisher,agg in JSONDir('./stats-calculated/current/aggregated-publisher').items() ] - data = {} - for v in publisher_timelags: - data[v[0]] = v - return data - -def publisher_timelag_summary(): - return Counter(timelag for _,_,_,timelag in publisher_timelag_sorted()) - -blacklist_publisher = JSONDir('./stats-blacklist/gitaggregate-publisher-dated') - -def has_future_transactions(publisher): - """ - returns 0, 1 or 2 - Returns 2 if the most recent data for a publisher has future transactions. - Returns 1 if the publisher has ever had future transactions. - Returns -1 if the publisher has not been checked for some reason. - Returns 0 otherwise. - """ - publisher_stats = get_publisher_stats(publisher) - if 'transaction_dates' in publisher_stats: - for transaction_type, transaction_counts in publisher_stats['transaction_dates'].items(): - for transaction_date_string, count in transaction_counts.items(): - transaction_date = parse_iso_date(transaction_date_string) - if transaction_date and transaction_date > datetime.date.today(): - return 2 - if publisher not in blacklist_publisher: - return -1 - today = datetime.date.today() - mindate = datetime.date(today.year-1, today.month, 1) - for date, activity_blacklist in blacklist_publisher[publisher]['activities_with_future_transactions'].items(): - if parse_iso_date(date) >= mindate and activity_blacklist: - return 1 - return 0 - -def sort_first(list_, key): - return sorted(list_, key=lambda x: key(x[0])) From e493e1f3c81f8e1768df22a23738e4b9af0a6597 Mon Sep 17 00:00:00 2001 From: Alex Miller Date: Tue, 18 Aug 2020 11:04:58 -0400 Subject: [PATCH 018/375] Decode date byte string --- make_html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/make_html.py b/make_html.py index 9286663f26..c8ea5dcce3 100644 --- a/make_html.py +++ b/make_html.py @@ -86,7 +86,7 @@ def get_codelist_values(codelist_values_for_element): # Custom Jinja globals app.jinja_env.globals['url'] = lambda x: x -app.jinja_env.globals['datetime_generated'] = subprocess.check_output(['date', '+%Y-%m-%d %H:%M:%S %z']).strip() +app.jinja_env.globals['datetime_generated'] = subprocess.check_output(['date', '+%Y-%m-%d %H:%M:%S %z']).strip().decode('utf-8') app.jinja_env.globals['datetime_data'] = date_time_data_str app.jinja_env.globals['datetime_data_homepage'] = date_time_data_obj.strftime('%d %B %Y (at %H:%M)') app.jinja_env.globals['stats_url'] = 'http://dashboard.iatistandard.org/stats' From b092b4af2df7c7d85832982afe61eba7ab68784f Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 23 Mar 2021 21:45:57 +0000 Subject: [PATCH 019/375] There are no tests, so remove requirements_dev.txt --- requirements_dev.txt | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 requirements_dev.txt diff --git a/requirements_dev.txt b/requirements_dev.txt deleted file mode 100644 index a32183852f..0000000000 --- a/requirements_dev.txt +++ /dev/null @@ -1,4 +0,0 @@ --r requirements.txt -pytest==3.0.6 -pytest-cov==2.4.0 -coveralls==1.1 From e244196bdf768e814c74ec14602ef0d36a60a611 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 23 Mar 2021 22:12:00 +0000 Subject: [PATCH 020/375] Fix typo in template --- templates/base.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/base.html b/templates/base.html index 6ccac26844..e56df05e86 100644 --- a/templates/base.html +++ b/templates/base.html @@ -143,7 +143,7 @@ From d1308178218d6151af4695194343619fc45dfb6b Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 23 Mar 2021 22:13:14 +0000 Subject: [PATCH 021/375] Remove all instances of print_function import --- make_html.py | 1 - plots.py | 1 - speakers_kit.py | 1 - 3 files changed, 3 deletions(-) diff --git a/make_html.py b/make_html.py index c8ea5dcce3..9e6a41ce89 100644 --- a/make_html.py +++ b/make_html.py @@ -2,7 +2,6 @@ # This uses Jinja templating to render the HTML templates in the 'templates' folder # Data is based on the files in the 'stats-calculated' folder, and extra logic in other files in this repository -from __future__ import print_function import argparse import os import re diff --git a/plots.py b/plots.py index b8d09958c2..4604ff7c5a 100644 --- a/plots.py +++ b/plots.py @@ -11,7 +11,6 @@ and num2date """ -from __future__ import print_function import datetime import numpy as np import matplotlib as mpl diff --git a/speakers_kit.py b/speakers_kit.py index cae6567102..814e6c1583 100644 --- a/speakers_kit.py +++ b/speakers_kit.py @@ -1,4 +1,3 @@ -from __future__ import print_function import json import data import csv From 91d4b5adc0c4680783aa1823dca1fff42b113731 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 23 Mar 2021 22:24:13 +0000 Subject: [PATCH 022/375] Simplify requirements --- requirements.txt | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/requirements.txt b/requirements.txt index 267388ac00..075796baee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,20 +1,8 @@ +Flask==0.12.3 +Frozen-Flask==0.13 Jinja2==2.9.5 +lxml +pytz matplotlib==2.0.0 -requests==2.20.0 -Frozen-Flask==0.13 -unicodecsv==0.14.1 -lxml==3.7.2 -pytz==2016.10 - -# These are dependencies of the above, but are specificied explicitly in order to pin versions -Flask==0.12.3 -MarkupSafe==0.23 +requests Werkzeug==0.12.2 -argparse==1.4.0 -itsdangerous==0.24 -mock==2.0.0 -nose==1.3.7 -numpy==1.12.0 -pyparsing==2.1.10 -python-dateutil==2.6.0 -six==1.10.0 From 198998aa7bfce70d6a733ff385937891176c9ecc Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 23 Mar 2021 23:11:12 +0000 Subject: [PATCH 023/375] Try creating a build workflow --- .github/workflows/build.yml | 45 +++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 .github/workflows/build.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000000..1e9906de85 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,45 @@ +name: Build +on: + schedule: + - cron: '0 6 * * *' +jobs: + generate_stats: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - name: Set up Python 3.7 + uses: actions/setup-python@v1 + with: + python-version: 3.7 + - uses: actions/cache@v2 + name: Cache dependencies + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements_dev.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + - name: Fetch data + run: ./fetch_data.sh + - name: Fetch stats + run: | + mkdir stats-calculated out + curl --compressed "http://dashboard.iatistandard.org/stats/ckan.json" > stats-calculated/ckan.json + curl --compressed "http://dashboard.iatistandard.org/stats/gitdate.json" > stats-calculated/gitdate.json + git clone --quiet --branch gh-pages https://github.com/codeforIATI/IATI-Stats stats-calculated/current + - name: Build HTML output + run: python make_html.py + - name: Add CNAME to output + run: echo "dashboard.codeforiati.org" > out/CNAME + - name: Deploy 🚀 + uses: JamesIves/github-pages-deploy-action@3.7.1 + with: + GIT_CONFIG_NAME: Code for IATI bot + GIT_CONFIG_EMAIL: 57559326+codeforIATIbot@users.noreply.github.com + GITHUB_TOKEN: ${{ secrets.TOKEN }} + BRANCH: gh-pages + FOLDER: out + CLEAN: true From 5f42be6c97c1bfd1415bcc854d72d7dae5a12079 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 23 Mar 2021 23:11:26 +0000 Subject: [PATCH 024/375] Python3 --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 192509d73f..a12fa144bd 100644 --- a/README.rst +++ b/README.rst @@ -39,7 +39,7 @@ Requirements: * Unix based setup (e.g. Linux, Mac OS X) with bash etc. * wget and curl installed -* Python 2.7 +* Python 3 * Python dev library ``python-dev`` * python-virtualenv (optional) * Development files for libfreetype, libpng, libxml and libxslt e.g. ``libfreetype6-dev libpng-dev libxml2-dev libxslt-dev``. From 83a74dc3150f4ba6c94cd9e7d86905e007ff1084 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 23 Mar 2021 23:11:46 +0000 Subject: [PATCH 025/375] Temporary hack! --- data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data.py b/data.py index e3ddbfc97a..1d4f2555fc 100644 --- a/data.py +++ b/data.py @@ -170,7 +170,7 @@ def get_publisher_name(self): # Loop over this list and return the publisher name if it is found within the historic list of publishers for x in path_components: - if x in JSONDir('./stats-calculated/gitaggregate-publisher-dated').keys(): + if x in JSONDir('./stats-calculated/current/aggregated-publisher').keys(): return x # If got to the end of the loop and nothing found, this folder does not relate to a single publisher From f4ae069221d1f9c9f676ed61cd76a88662bb7462 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 23 Mar 2021 23:29:39 +0000 Subject: [PATCH 026/375] Build CSV output --- .github/workflows/build.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1e9906de85..210d1ab039 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -30,6 +30,8 @@ jobs: curl --compressed "http://dashboard.iatistandard.org/stats/ckan.json" > stats-calculated/ckan.json curl --compressed "http://dashboard.iatistandard.org/stats/gitdate.json" > stats-calculated/gitdate.json git clone --quiet --branch gh-pages https://github.com/codeforIATI/IATI-Stats stats-calculated/current + - name: Build CSV output + run: python make_csv.py - name: Build HTML output run: python make_html.py - name: Add CNAME to output From a46a1ebc2b69f60ad9ae7250ec3747a6575e3cfe Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 23 Mar 2021 23:31:19 +0000 Subject: [PATCH 027/375] Rename workflow --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 210d1ab039..ab7a26ceee 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -3,7 +3,7 @@ on: schedule: - cron: '0 6 * * *' jobs: - generate_stats: + build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 From f2fb1c70efa201be135b1c4a240e7b74922b078b Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 23 Mar 2021 23:31:28 +0000 Subject: [PATCH 028/375] Build on push --- .github/workflows/build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ab7a26ceee..85f76feef1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,5 +1,6 @@ name: Build on: + push: schedule: - cron: '0 6 * * *' jobs: From b3d96e8a37a0280eca3cb370da6f9818686067e4 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 00:55:37 +0000 Subject: [PATCH 029/375] Remove some superfluous stuff from base template --- templates/base.html | 99 ++++++++++----------------------------------- 1 file changed, 22 insertions(+), 77 deletions(-) diff --git a/templates/base.html b/templates/base.html index e56df05e86..2d6391e5e3 100644 --- a/templates/base.html +++ b/templates/base.html @@ -65,29 +65,9 @@ #footer{background-color:#f5f5f5;margin-top: 30px} .container .text-muted{margin: 20px 0;} - .notify-banner { - background-color: #fbde6b; - text-align: center; - padding-top: 10px; - padding-bottom: 10px; - } .navbar-default { margin-bottom: 0px; } - - #new-menu { - /* padding-top: 20px; */ - } - - #new-menu .container { - position: relative; - max-width: 1200px; - margin-top: 2em; - padding: 2em; - padding-top: 1em; - padding-bottom: 1em; - box-shadow: 0 0 50px 0 rgba(0, 0, 0, 0.15); - } @@ -99,67 +79,32 @@
- -
- IATI Publishing Statistics now has a new home: http://publishingstats.iatistandard.org. -
-
{% block container %} {% block page_header_div %} From 16032250ed3da5b8515749669bed9e95116a3186 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 01:10:57 +0000 Subject: [PATCH 030/375] Use metadata instead of gitdate --- .github/workflows/build.yml | 4 ++-- data.py | 2 +- make_html.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 85f76feef1..bf9d5fee3a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -28,8 +28,8 @@ jobs: - name: Fetch stats run: | mkdir stats-calculated out - curl --compressed "http://dashboard.iatistandard.org/stats/ckan.json" > stats-calculated/ckan.json - curl --compressed "http://dashboard.iatistandard.org/stats/gitdate.json" > stats-calculated/gitdate.json + curl --compressed "https://dashboard-stats.codeforiati.org/ckan.json" > stats-calculated/ckan.json + curl --compressed "https://dashboard-stats.codeforiati.org/metadata.json" > stats-calculated/metadata.json git clone --quiet --branch gh-pages https://github.com/codeforIATI/IATI-Stats stats-calculated/current - name: Build CSV output run: python make_csv.py diff --git a/data.py b/data.py index 1d4f2555fc..1a87571221 100644 --- a/data.py +++ b/data.py @@ -242,7 +242,7 @@ def deep_merge(obj1, obj2): current_stats['inverted_file_grouped'] = GroupFiles(current_stats['inverted_file']) ckan_publishers = JSONDir('./data/ckan_publishers') ckan = json.load(open('./stats-calculated/ckan.json'), object_pairs_hook=OrderedDict) -gitdate = json.load(open('./stats-calculated/gitdate.json'), object_pairs_hook=OrderedDict) +metadata = json.load(open('./stats-calculated/metadata.json'), object_pairs_hook=OrderedDict) with open('./data/downloads/errors') as fp: for line in fp: if line != '.\n': diff --git a/make_html.py b/make_html.py index 9e6a41ce89..e75cdfb83c 100644 --- a/make_html.py +++ b/make_html.py @@ -75,8 +75,8 @@ def get_codelist_values(codelist_values_for_element): return list(set([y for x in codelist_values_for_element.items() for y in list(x[1])])) # Store data processing times -date_time_data_str = max(gitdate.values()) -date_time_data_obj = datetime.datetime.strptime(date_time_data_str[:19], '%Y-%m-%d %H:%M:%S') # Ignores timezone as this is unhelpful for user output +date_time_data_str = metadata['created_at'] +date_time_data_obj = datetime.datetime.strptime(date_time_data_str[:19], '%Y-%m-%dT%H:%M:%S') # Ignores timezone as this is unhelpful for user output # Custom Jinja filters app.jinja_env.filters['xpath_to_url'] = xpath_to_url From 930a13bbf94dab2811437c329f2490a7e0bcacbe Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 01:11:15 +0000 Subject: [PATCH 031/375] =?UTF-8?q?Don=E2=80=99t=20build=20on=20push?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/build.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bf9d5fee3a..f0257e0b5c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,6 +1,5 @@ name: Build on: - push: schedule: - cron: '0 6 * * *' jobs: From 278d90f744515a8c020ab2c4a966e266332ab361 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 01:17:46 +0000 Subject: [PATCH 032/375] =?UTF-8?q?Another=20hack=20=E2=80=93=20create=20t?= =?UTF-8?q?he=20failed=20downloads=20plot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/build.yml | 2 + plots.py | 74 ++++++++++++++++++------------------- 2 files changed, 39 insertions(+), 37 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f0257e0b5c..64f6d50667 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -30,6 +30,8 @@ jobs: curl --compressed "https://dashboard-stats.codeforiati.org/ckan.json" > stats-calculated/ckan.json curl --compressed "https://dashboard-stats.codeforiati.org/metadata.json" > stats-calculated/metadata.json git clone --quiet --branch gh-pages https://github.com/codeforIATI/IATI-Stats stats-calculated/current + - name: Make plots + run: python plots.py - name: Build CSV output run: python make_csv.py - name: Build HTML output diff --git a/plots.py b/plots.py index 4604ff7c5a..7354b389a6 100644 --- a/plots.py +++ b/plots.py @@ -145,47 +145,47 @@ def make_plot(stat_path, git_stats, img_prefix=''): git_stats = AugmentedJSONDir('./stats-calculated/gitaggregate-dated') for stat_path in [ - 'activities', - 'publishers', - 'activity_files', - 'organisation_files', - 'file_size', + # 'activities', + # 'publishers', + # 'activity_files', + # 'organisation_files', + # 'file_size', 'failed_downloads', - 'invalidxml', - 'nonstandardroots', - 'unique_identifiers', - ('validation', lambda x: x == 'fail', ''), - ('publishers_validation', lambda x: x == 'fail', ''), - ('publisher_has_org_file', lambda x: x == 'no', ''), - ('versions', lambda x: x in expected_versions, '_expected'), - ('versions', lambda x: x not in expected_versions, '_other'), - ('publishers_per_version', lambda x: x in expected_versions, '_expected'), - ('publishers_per_version', lambda x: x not in expected_versions, '_other'), - ('file_size_bins', lambda x: True, ''), - ('publisher_types', lambda x: True, ''), - ('activities_per_publisher_type', lambda x: True, '') + # 'invalidxml', + # 'nonstandardroots', + # 'unique_identifiers', + # ('validation', lambda x: x == 'fail', ''), + # ('publishers_validation', lambda x: x == 'fail', ''), + # ('publisher_has_org_file', lambda x: x == 'no', ''), + # ('versions', lambda x: x in expected_versions, '_expected'), + # ('versions', lambda x: x not in expected_versions, '_other'), + # ('publishers_per_version', lambda x: x in expected_versions, '_expected'), + # ('publishers_per_version', lambda x: x not in expected_versions, '_other'), + # ('file_size_bins', lambda x: True, ''), + # ('publisher_types', lambda x: True, ''), + # ('activities_per_publisher_type', lambda x: True, '') ]: make_plot(stat_path, git_stats) # Delete git_stats variable to save memory del git_stats -try: - os.makedirs('out/publisher_imgs') -except OSError: - pass - -git_stats_publishers = AugmentedJSONDir('./stats-calculated/gitaggregate-publisher-dated/') -for publisher, git_stats_publisher in git_stats_publishers.items(): - for stat_path in [ - 'activities', - 'activity_files', - 'organisation_files', - 'file_size', - 'invalidxml', - 'nonstandardroots', - 'publisher_unique_identifiers', - ('validation', lambda x: x == 'fail', ''), - ('versions', lambda x: True, ''), - ]: - make_plot(stat_path, git_stats_publisher, 'publisher_imgs/{0}_'.format(publisher)) +# try: +# os.makedirs('out/publisher_imgs') +# except OSError: +# pass + +# git_stats_publishers = AugmentedJSONDir('./stats-calculated/gitaggregate-publisher-dated/') +# for publisher, git_stats_publisher in git_stats_publishers.items(): +# for stat_path in [ +# 'activities', +# 'activity_files', +# 'organisation_files', +# 'file_size', +# 'invalidxml', +# 'nonstandardroots', +# 'publisher_unique_identifiers', +# ('validation', lambda x: x == 'fail', ''), +# ('versions', lambda x: True, ''), +# ]: +# make_plot(stat_path, git_stats_publisher, 'publisher_imgs/{0}_'.format(publisher)) From e9b2ab3ba7b687adddd5cc494f1dbf5f0e2dd9c1 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 01:27:23 +0000 Subject: [PATCH 033/375] Remove zendesk widget --- templates/base.html | 5 ----- 1 file changed, 5 deletions(-) diff --git a/templates/base.html b/templates/base.html index 2d6391e5e3..f7f0db087d 100644 --- a/templates/base.html +++ b/templates/base.html @@ -70,11 +70,6 @@ } - - - - {% block extrahead %}{% endblock %} From 2dabc8f744e48ff0176fa59c6722e71a502a01d8 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 01:27:33 +0000 Subject: [PATCH 034/375] Remove google analytics --- templates/base.html | 9 --------- 1 file changed, 9 deletions(-) diff --git a/templates/base.html b/templates/base.html index f7f0db087d..e550fa9602 100644 --- a/templates/base.html +++ b/templates/base.html @@ -145,15 +145,6 @@

{{page_titles[page]}}

{% block tablesorterscript %}{% endblock %} - {% block extrafooter %}{% endblock %} From 0f831cfdcbb5cd6143eb89182378bd6d6e3156c7 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 01:28:03 +0000 Subject: [PATCH 035/375] Fix some links --- templates/base.html | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/templates/base.html b/templates/base.html index e550fa9602..4ecd46aa5a 100644 --- a/templates/base.html +++ b/templates/base.html @@ -123,15 +123,15 @@

{{page_titles[page]}}

From 2c5976a2470f84b486d3c6c1ec76f69758a97a19 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 01:34:09 +0000 Subject: [PATCH 037/375] Various FAQ fixes --- templates/faq.html | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/templates/faq.html b/templates/faq.html index a5974691ac..64c94b3722 100644 --- a/templates/faq.html +++ b/templates/faq.html @@ -16,13 +16,13 @@

When is the Dashboard updated?

Why is there a difference between the data download and Dashboard generation time?

The data is downloaded, and then there is a period of computing time to generate the statistics that inform the Dashboard and Publishing Statistics.

-

Usually, there is a small period of time between the two. However, we do track this as sometimes things break - and the site fails to regenerate. If you spot something, please also let us know via support@iatistandard.org.

+

Usually, there is a small period of time between the two. However, we do track this as sometimes things break - and the site fails to regenerate. If you spot something, please also let us know by creating an issue on github.

Does a graph going up or down mean something good?

No. There may be a number of reasons why a certain graph or number rises or falls.

In some cases, a fall in the graph may well be down to the fact that the Dashboard script failed to download the relevant data. This could be due to any number of reasons - and not necessarily anything to do with the setup of the IATI publisher.

Therefore, we stress to treat the graphs, numbers and statistics on the IATI Dashboard with caution, context and consideration.

-

Should you think something is really wrong, please contact us via support@iatistandard.org.

+

Should you think something is really wrong, please let us know by creating an issue on github.

What do the Publishing Statistics numbers mean?

Different tables capture how timely, forward looking and comprehensive a publisher’s IATI data is. A summary of this information is displayed on the Summary Statistics page, where each quality is marked out of 100.

@@ -30,7 +30,7 @@

What do the Publishing Statistics numbers mean?

In some cases, a low number may well be down to the fact that the Dashboard script failed to download the relevant data or the methodology for an element within IATI may need amending. This could be due to any number of reasons - and not necessarily anything to do with the setup of the IATI publisher.

Only the timeliness tables track an organisation’s publishing over time. If the Publishing Statistics doesn’t pick up your data one month we are unable to back fill the table.

Therefore, we stress to treat the tables and their corresponding marks with caution, context and consideration.

-

Should you think something is really wrong, please contact us via support@iatistandard.org.

+

Should you think something is really wrong, please create an issue on github.

Can I build my own version of this Dashboard?

Yes - the source code is all open source:

@@ -41,9 +41,7 @@

Can I build my own version of this Dashboard?

We advise you to check through the technical specifications.

How can I suggest a new function?

-

Ideally, we’d suggest to check through the list of issues we have logged in our Github repositories.

- -

Alternatively, please email us via support@iatistandard.org

+

Ideally, we’d suggest to check through the list of issues we have logged in our Github repositories.

We’ve published/updated our IATI data, but the numbers haven’t updated.

We’d suggest two initial checks:

@@ -53,14 +51,14 @@

We’ve published/updated our IATI data, but the numbers haven’t updated.<
  • Has the dashboard updated since you published (check the times at the footer of this page)?
  • -

    Should you still believe that data is missing from the Dashboard, we’d love to hear from you - please contact us on support@iatistandard.org

    +

    Should you still believe that data is missing from the Dashboard, we’d love to hear from you - please contact us by creating an issue on github.

    I want to get to the raw data of a publisher - how can I do that?

    Two ways:

    1. Visit the IATI Registry and access the relevant links to the XML files - these links are often found on the relevant Dashboard page.
    2. -
    3. Try a query via the IATI Datastore
    4. +
    5. Try a query via the IATI Datastore Classic
    {% endblock %} From 9661b07c7191d3b382b461dbb5e3de08afa4812f Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 02:34:47 +0000 Subject: [PATCH 038/375] Improve url_to_filename --- make_html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/make_html.py b/make_html.py index e75cdfb83c..6b3286e721 100644 --- a/make_html.py +++ b/make_html.py @@ -80,7 +80,7 @@ def get_codelist_values(codelist_values_for_element): # Custom Jinja filters app.jinja_env.filters['xpath_to_url'] = xpath_to_url -app.jinja_env.filters['url_to_filename'] = lambda x: x.split('/')[-1] +app.jinja_env.filters['url_to_filename'] = lambda x: x.rstrip('/').split('/')[-1] app.jinja_env.filters['dataset_to_publisher'] = dataset_to_publisher # Custom Jinja globals From 0618f4393b8ce423bdf606497fdcd0c0bf4c9772 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 02:57:17 +0000 Subject: [PATCH 039/375] =?UTF-8?q?Don=E2=80=99t=20bother=20with=20github?= =?UTF-8?q?=20pages?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 64f6d50667..fde4c24e84 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -27,8 +27,8 @@ jobs: - name: Fetch stats run: | mkdir stats-calculated out - curl --compressed "https://dashboard-stats.codeforiati.org/ckan.json" > stats-calculated/ckan.json - curl --compressed "https://dashboard-stats.codeforiati.org/metadata.json" > stats-calculated/metadata.json + curl --compressed "https://raw.githubusercontent.com/codeforIATI/IATI-Stats/gh-pages/ckan.json" > stats-calculated/ckan.json + curl --compressed "https://raw.githubusercontent.com/codeforIATI/IATI-Stats/gh-pages/metadata.json" > stats-calculated/metadata.json git clone --quiet --branch gh-pages https://github.com/codeforIATI/IATI-Stats stats-calculated/current - name: Make plots run: python plots.py From aae4b0e4dc8257292afb722a70946c44d8ac7dff Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 08:54:29 +0000 Subject: [PATCH 040/375] Fix some more FAQ links --- templates/faq.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/faq.html b/templates/faq.html index 64c94b3722..758b3a0ecd 100644 --- a/templates/faq.html +++ b/templates/faq.html @@ -35,8 +35,8 @@

    What do the Publishing Statistics numbers mean?

    Can I build my own version of this Dashboard?

    Yes - the source code is all open source:

      -
    1. https://github.com/IATI/IATI-Dashboard/.
    2. -
    3. https://github.com/IATI/IATI-Publishing-Statistics
    4. +
    5. https://github.com/codeforIATI/IATI-Dashboard/.
    6. +
    7. https://github.com/codeforIATI/IATI-Publishing-Statistics

    We advise you to check through the technical specifications.

    From aafad4c13d101e839b19054dadba54927ecf8617 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 12:46:26 +0000 Subject: [PATCH 041/375] Favicon is never used, so remove it completely --- git.sh | 1 - static/img/favicon.png | Bin 313 -> 0 bytes templates/base.html | 4 +--- 3 files changed, 1 insertion(+), 4 deletions(-) delete mode 100644 static/img/favicon.png diff --git a/git.sh b/git.sh index c7b6c780a5..4cd15706b2 100755 --- a/git.sh +++ b/git.sh @@ -22,7 +22,6 @@ echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running make_html.py" python make_html.py $1 $2|| exit 1 echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Copying static elements" -cp static/img/favicon.png out/ cp static/img/tablesorter-icons.gif out/ echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Make a backup of the old web directory and make new content live" diff --git a/static/img/favicon.png b/static/img/favicon.png deleted file mode 100644 index f70593250e25c0deebc178d8a185678f2885f873..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 313 zcmeAS@N?(olHy`uVBq!ia0vp^3LwnE1|*BCs=fdz#^NA%Cx&(BWL^R}Y)RhkE)4%c zaKYZ?lYt_f1s;*bK<(E-n9)gNb_Gz7y~NYkmHi%*w4j#S&RthP>UMg%IEF+VetX@P z>yU#0>jQnIj^2dC^oOiRE-6>?eq`r=P`PHyrjJb5#4^|Du$Y>yOkr+P^SaTPm~h|d z_l(-##xB1%s^*`0ncNi3a62*B?4;o)rup@=_IzBbHPy>uEvJcgL~_4l!k*?bQI=gV zTzUU&MIU=IPEJoY}%y#;ebxRqI}p!)<}^=6#>pGkJMrw zN*y@wu~8$#XIteQu_t~T&+SiLf32(bVf+o@>Zyv$0|GdJo@8L~boFyt=akR{0A2@l AK>z>% diff --git a/templates/base.html b/templates/base.html index 7bb787bc20..8686127ba2 100644 --- a/templates/base.html +++ b/templates/base.html @@ -131,9 +131,7 @@

    {{page_titles[page]}}

    (NB This is the time the download task completed. Any changes made after this time will not be reflected).
    - For details on how often these updates are applied, see IATI Dashboard update FAQ.
    - - Favicon created from: Statistics designed by Nate Eul from the Noun Project
    + For details on how often these updates are applied, see IATI Dashboard update FAQ.
    From a43d840e64c2bc269a34cbd16e801de759be6def Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 13:21:52 +0000 Subject: [PATCH 042/375] Standardise timestamps in the footer --- make_html.py | 14 +++++++------- templates/base.html | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/make_html.py b/make_html.py index 6b3286e721..e84eec80ee 100644 --- a/make_html.py +++ b/make_html.py @@ -5,16 +5,17 @@ import argparse import os import re -import subprocess from collections import defaultdict from flask import Flask, render_template, redirect, abort, Response app = Flask(__name__) +import pytz import licenses from vars import expected_versions import text -import datetime +from datetime import datetime +from dateutil import parser print('Doing initial data import') from data import * @@ -75,8 +76,7 @@ def get_codelist_values(codelist_values_for_element): return list(set([y for x in codelist_values_for_element.items() for y in list(x[1])])) # Store data processing times -date_time_data_str = metadata['created_at'] -date_time_data_obj = datetime.datetime.strptime(date_time_data_str[:19], '%Y-%m-%dT%H:%M:%S') # Ignores timezone as this is unhelpful for user output +date_time_data_obj = parser.parse(metadata['created_at']) # Custom Jinja filters app.jinja_env.filters['xpath_to_url'] = xpath_to_url @@ -85,8 +85,8 @@ def get_codelist_values(codelist_values_for_element): # Custom Jinja globals app.jinja_env.globals['url'] = lambda x: x -app.jinja_env.globals['datetime_generated'] = subprocess.check_output(['date', '+%Y-%m-%d %H:%M:%S %z']).strip().decode('utf-8') -app.jinja_env.globals['datetime_data'] = date_time_data_str +app.jinja_env.globals['datetime_generated'] = lambda: datetime.utcnow().replace(tzinfo=pytz.utc).strftime('%Y-%m-%d %H:%M:%S %Z') +app.jinja_env.globals['datetime_data'] = date_time_data_obj.strftime('%Y-%m-%d %H:%M:%S %Z') app.jinja_env.globals['datetime_data_homepage'] = date_time_data_obj.strftime('%d %B %Y (at %H:%M)') app.jinja_env.globals['stats_url'] = 'http://dashboard.iatistandard.org/stats' app.jinja_env.globals['sorted'] = sorted @@ -109,7 +109,7 @@ def get_codelist_values(codelist_values_for_element): app.jinja_env.globals['set'] = set app.jinja_env.globals['firstint'] = firstint app.jinja_env.globals['expected_versions'] = expected_versions -app.jinja_env.globals['current_year'] = datetime.datetime.now().year +app.jinja_env.globals['current_year'] = datetime.now().year # Following variables set in coverage branch but not in master # app.jinja_env.globals['float'] = float # app.jinja_env.globals['dac2012'] = dac2012 diff --git a/templates/base.html b/templates/base.html index 8686127ba2..746d7bb907 100644 --- a/templates/base.html +++ b/templates/base.html @@ -127,7 +127,7 @@

    {{page_titles[page]}}

    Report bugs, and request features using Github issues.
    - Generated on {{datetime_generated}} from data downloaded {{datetime_data}}.
    + Generated on {{ datetime_generated() }} from data downloaded {{ datetime_data }}.
    (NB This is the time the download task completed. Any changes made after this time will not be reflected).
    From 52866c744e167dab31e43777a41fd1763afae843 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 13:39:40 +0000 Subject: [PATCH 043/375] Error codes link should link to curl docs --- templates/download.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/download.html b/templates/download.html index 24c3a1ad07..49c42c66ad 100644 --- a/templates/download.html +++ b/templates/download.html @@ -21,7 +21,7 @@
    - + From 6fafa215d8d541f6356c081942854409c955394a Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 19:47:45 +0000 Subject: [PATCH 044/375] =?UTF-8?q?Remove=20the=20"I=E2=80=99m=20always=20?= =?UTF-8?q?frustrated"=20text?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/ISSUE_TEMPLATE/feature_request.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index bbcbbe7d61..0214585569 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -8,7 +8,7 @@ assignees: '' --- **Is your feature request related to a problem? Please describe.** -A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] +A clear and concise description of what the problem is. **Describe the solution you'd like** A clear and concise description of what you want to happen. From 33d602b4d1ae144055b51df67144700c3caa1039 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 19:48:06 +0000 Subject: [PATCH 045/375] Remove "Other" issue template Github provides this functionality already --- .github/ISSUE_TEMPLATE/other.md | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 .github/ISSUE_TEMPLATE/other.md diff --git a/.github/ISSUE_TEMPLATE/other.md b/.github/ISSUE_TEMPLATE/other.md deleted file mode 100644 index 9d633eb4d2..0000000000 --- a/.github/ISSUE_TEMPLATE/other.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -name: Other -about: A blank template -title: '' -labels: '' -assignees: '' - ---- - - From c907598b8e19a6aa8e39f91b2267e20fb5913b80 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 21:50:36 +0000 Subject: [PATCH 046/375] Fix some URLs --- templates/codelist.html | 2 +- templates/codelists.html | 4 ++-- templates/publisher.html | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/templates/codelist.html b/templates/codelist.html index fc4ee5c588..d82da01075 100644 --- a/templates/codelist.html +++ b/templates/codelist.html @@ -12,7 +12,7 @@

    Codelist values used for {{element}}

    Note: this attribute can be on multiple codelists (specified by the @vocabulary attribute) but the dashboard only currently checks against the default codelist - see issue #174. Therefore some publishers may incorrectly show up as "Not On Codelist".
    {% endif %}

    (This page in JSON format)

    -

    Values should be on the {{codelist_mapping[major_version].get(element)}} codelist.

    +

    Values should be on the {{codelist_mapping[major_version].get(element)}} codelist.

    {% endblock %} {% block content %} diff --git a/templates/codelists.html b/templates/codelists.html index 6bac7a6616..29d262d371 100644 --- a/templates/codelists.html +++ b/templates/codelists.html @@ -26,9 +26,9 @@

    Codelists for version {{major_version}}.xx

    {% for i, (element, values) in enumerate(current_stats.inverted_publisher.codelist_values_by_major_version[major_version].items()) %}
    - + - + {% with codes=sorted(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)).intersection(get_codelist_values(values))) %} {% endwith %} diff --git a/templates/publisher.html b/templates/publisher.html index e7d5099b4c..778f39097b 100644 --- a/templates/publisher.html +++ b/templates/publisher.html @@ -257,7 +257,7 @@

    Financial

    Budgets

    -

    The below figures are calculated based on the data contained within the <budget> element for each reported activity. Original and revised elements are based on the value declared in the budget/@type attribute. Where budgets fall across two calendar years, the month of the <period-end> date is used to determine annual groupings, with budgets for periods ending January-June added to the previous calendar year.

    +

    The below figures are calculated based on the data contained within the <budget> element for each reported activity. Original and revised elements are based on the value declared in the budget/@type attribute. Where budgets fall across two calendar years, the month of the <period-end> date is used to determine annual groupings, with budgets for periods ending January-June added to the previous calendar year.

    {{element}}
    {{element}}Publisher Registry Dataset URLError CodeError Code
    {{element}}{{codelist_mapping[major_version].get(element)}}{{codelist_mapping[major_version].get(element)}} {{values|length}}{{codelist_sets[major_version].get(codelist_mapping[major_version].get(element))|length}}{{codelist_sets[major_version].get(codelist_mapping[major_version].get(element))|length}}{{codes|length}}
    @@ -340,7 +340,7 @@

    Codelist Values (version {{major_version}}.xx)

    {% with element_i=current_stats.inverted_publisher.codelist_values_by_major_version[major_version].keys().index(element) %} - + {% with codes=sorted(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)).intersection(values.keys())) %}
    {{element}}{{codelist_mapping[major_version].get(element)}}{{codelist_mapping[major_version].get(element)}}{% if codes|count %} {{codes|count}} From 9cab7b13f318a908be49f2d48c33404697115f8e Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 21:51:20 +0000 Subject: [PATCH 047/375] Whitespace --- templates/codelist.html | 2 +- templates/codelists.html | 2 +- templates/elements.html | 2 +- templates/identifiers.html | 4 ++-- templates/validation.html | 6 +++--- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/templates/codelist.html b/templates/codelist.html index d82da01075..101d2c08a2 100644 --- a/templates/codelist.html +++ b/templates/codelist.html @@ -20,7 +20,7 @@

    Codelist values used for {{element}}

    {% with elements=reverse_codelist_mapping[major_version][codelist_mapping[element]] %} {% if elements|count > 1 %} - Other elements/attributes on this codelist: + Other elements/attributes on this codelist:

      {% for el in elements%} {% if el in current_stats.inverted_publisher.codelist_values[major_version].keys() %} diff --git a/templates/codelists.html b/templates/codelists.html index 29d262d371..426602f154 100644 --- a/templates/codelists.html +++ b/templates/codelists.html @@ -1,7 +1,7 @@ {% extends 'base.html' %} {% import 'boxes.html' as boxes %} {% block content %} -

      Count of the different values used by all publishers, per codelist.

      +

      Count of the different values used by all publishers, per codelist.

      Note: some Elements/Attributes may use the same codelist. In each case, the values counted are different for each use.

      {% include 'tablesorter_instructions.html' %} diff --git a/templates/elements.html b/templates/elements.html index d5e6081ad2..caf4c8d584 100644 --- a/templates/elements.html +++ b/templates/elements.html @@ -44,6 +44,6 @@ {% endfor%}
    -
    + {% endblock %} diff --git a/templates/identifiers.html b/templates/identifiers.html index 3a4215e5c9..0c2e44778e 100644 --- a/templates/identifiers.html +++ b/templates/identifiers.html @@ -11,7 +11,7 @@

    Duplicate identifiers: a count of the unique iati-identifier that are duplicated. Instances of duplicate identifiers: the total number of activities that contain a duplicate iati-identifier, within a publisher dataset. - Example: two identifiers could be found as having duplicates. Across the dataset, these duplicates could account for 200 activities. + Example: two identifiers could be found as having duplicates. Across the dataset, these duplicates could account for 200 activities.

    {% include 'tablesorter_instructions.html' %} @@ -34,6 +34,6 @@

    {% endfor %} - + {% endblock %} diff --git a/templates/validation.html b/templates/validation.html index 1e71a9ea3d..bae27368aa 100644 --- a/templates/validation.html +++ b/templates/validation.html @@ -11,7 +11,7 @@

    Breakdown By Publisher

    - +

    List of files that fail validation, grouped by publisher

    {% for publisher,datasets in current_stats.inverted_file_grouped.validation.fail.items() %} @@ -47,7 +47,7 @@

    List of files that fail validation, grouped by publisher

    {% endfor%} - +

    Count of files that fail validation, per publisher.

    @@ -67,6 +67,6 @@

    Count of files that fail validation, per publisher.

    {% endfor%} -
    + {% endblock %} From 3128f334c5ecfc33a86f487bc3f6586797502e4a Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 21:52:56 +0000 Subject: [PATCH 048/375] Fix stats_url --- make_html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/make_html.py b/make_html.py index e84eec80ee..59a638841d 100644 --- a/make_html.py +++ b/make_html.py @@ -88,7 +88,7 @@ def get_codelist_values(codelist_values_for_element): app.jinja_env.globals['datetime_generated'] = lambda: datetime.utcnow().replace(tzinfo=pytz.utc).strftime('%Y-%m-%d %H:%M:%S %Z') app.jinja_env.globals['datetime_data'] = date_time_data_obj.strftime('%Y-%m-%d %H:%M:%S %Z') app.jinja_env.globals['datetime_data_homepage'] = date_time_data_obj.strftime('%d %B %Y (at %H:%M)') -app.jinja_env.globals['stats_url'] = 'http://dashboard.iatistandard.org/stats' +app.jinja_env.globals['stats_url'] = 'https://github.com/codeforIATI/IATI-Stats/tree/gh-pages' app.jinja_env.globals['sorted'] = sorted app.jinja_env.globals['enumerate'] = enumerate app.jinja_env.globals['top_titles'] = text.top_titles From 8e856ed81ffc13f7c3a217d235bf4f19cc064e14 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 21:53:14 +0000 Subject: [PATCH 049/375] =?UTF-8?q?Comment=20this=20out=20for=20now=20(sin?= =?UTF-8?q?ce=20it=20won=E2=80=99t=20work)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- templates/boxes.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/boxes.html b/templates/boxes.html index 4e7c6b930c..c4c93c30d7 100644 --- a/templates/boxes.html +++ b/templates/boxes.html @@ -8,7 +8,7 @@

    {{title}}

    {% if json %} - (J) + {% endif %}
    From de98f2e31274eb7d4b1c3c0224841a0e0caedc9f Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 21:58:45 +0000 Subject: [PATCH 050/375] Point to gh pages for JSON; gh for listings --- make_html.py | 3 ++- templates/publisher.html | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/make_html.py b/make_html.py index 59a638841d..12b93fe0eb 100644 --- a/make_html.py +++ b/make_html.py @@ -88,7 +88,8 @@ def get_codelist_values(codelist_values_for_element): app.jinja_env.globals['datetime_generated'] = lambda: datetime.utcnow().replace(tzinfo=pytz.utc).strftime('%Y-%m-%d %H:%M:%S %Z') app.jinja_env.globals['datetime_data'] = date_time_data_obj.strftime('%Y-%m-%d %H:%M:%S %Z') app.jinja_env.globals['datetime_data_homepage'] = date_time_data_obj.strftime('%d %B %Y (at %H:%M)') -app.jinja_env.globals['stats_url'] = 'https://github.com/codeforIATI/IATI-Stats/tree/gh-pages' +app.jinja_env.globals['stats_url'] = 'https://dashboard-stats.codeforiati.org' +app.jinja_env.globals['stats_gh_url'] = 'https://github.com/codeforIATI/IATI-Stats/tree/gh-pages' app.jinja_env.globals['sorted'] = sorted app.jinja_env.globals['enumerate'] = enumerate app.jinja_env.globals['top_titles'] = text.top_titles diff --git a/templates/publisher.html b/templates/publisher.html index 778f39097b..5e2e5ccfdb 100644 --- a/templates/publisher.html +++ b/templates/publisher.html @@ -4,7 +4,7 @@ {{ super () }} Publisher: {{publisher_name[publisher]}} {% endblock %} {% block page_header %} -(Publisher Stats JSON) +(Publisher Stats JSON)

    Publisher: {{publisher_name[publisher]}}

    {% endblock %} From 900613c9f73809f102080bb80386692c7cbcc609 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 21:59:51 +0000 Subject: [PATCH 051/375] Clone to stats-calculated --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index fde4c24e84..8c073381ee 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -29,7 +29,7 @@ jobs: mkdir stats-calculated out curl --compressed "https://raw.githubusercontent.com/codeforIATI/IATI-Stats/gh-pages/ckan.json" > stats-calculated/ckan.json curl --compressed "https://raw.githubusercontent.com/codeforIATI/IATI-Stats/gh-pages/metadata.json" > stats-calculated/metadata.json - git clone --quiet --branch gh-pages https://github.com/codeforIATI/IATI-Stats stats-calculated/current + git clone --quiet --branch gh-pages https://github.com/codeforIATI/IATI-Stats stats-calculated - name: Make plots run: python plots.py - name: Build CSV output From ced44680135f55f54066dab6393e28a3549d502d Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 22:46:34 +0000 Subject: [PATCH 052/375] Run at 5am --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8c073381ee..08417b0e49 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,7 +1,7 @@ name: Build on: schedule: - - cron: '0 6 * * *' + - cron: '0 5 * * *' jobs: build: runs-on: ubuntu-latest From 20cc867d7b94db3d41b035bb96116ecd1137f361 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 23:22:24 +0000 Subject: [PATCH 053/375] Add dateutil to requirements --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 075796baee..4497adc0b9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ Flask==0.12.3 Frozen-Flask==0.13 Jinja2==2.9.5 lxml +python-dateutil==2.8.1 pytz matplotlib==2.0.0 requests From 0cdf79efe5e4b0fc14f7aa85b0f1daa9c3e81b1a Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 23:29:16 +0000 Subject: [PATCH 054/375] Remove IATI css and js --- templates/base.html | 2 -- 1 file changed, 2 deletions(-) diff --git a/templates/base.html b/templates/base.html index 746d7bb907..c01986497c 100644 --- a/templates/base.html +++ b/templates/base.html @@ -4,7 +4,6 @@ - {% block title %}IATI Dashboard - {{page_titles[page]}}{% endblock %} @@ -140,7 +139,6 @@

    {{page_titles[page]}}

    {% block tablesorterscript %}{% endblock %} - {% block extrafooter %}{% endblock %} From 975cde43ecd6860abb93de1a6f13ca87b95f7052 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 24 Mar 2021 23:29:40 +0000 Subject: [PATCH 055/375] Upgrade bootstrap --- templates/base.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/base.html b/templates/base.html index c01986497c..611783d8d8 100644 --- a/templates/base.html +++ b/templates/base.html @@ -4,7 +4,7 @@ - + {% block title %}IATI Dashboard - {{page_titles[page]}}{% endblock %} {% block extrahead %}{% endblock %} @@ -76,6 +84,7 @@
    {% if publisher in ckan and dataset_name in ckan[publisher] %} - validator + {% endif %}
    From 85a2706364cf7ccf718831fb2403d8343d4dd7af Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 5 Apr 2021 13:55:47 +0100 Subject: [PATCH 117/375] Revert "Merge pull request #560 from IATI/removing-publisher-stats" This reverts commit 4391812af26edec2e004221d8095c82dd3f03620, reversing changes made to 9071fa9ade3cbc77ddcfd0d86f2c37bcfea498ce. --- comprehensiveness.py | 189 +++++++++ forwardlooking.py | 88 ++++ git.sh | 2 +- humanitarian.py | 60 +++ make_csv.py | 119 ++++++ make_html.py | 49 ++- static/templates/base.html | 219 ++++++++++ summary_stats.py | 121 ++++++ templates/comprehensiveness.html | 47 +++ templates/comprehensiveness_base.html | 443 ++++++++++++++++++++ templates/comprehensiveness_core.html | 68 +++ templates/comprehensiveness_financials.html | 54 +++ templates/comprehensiveness_valueadded.html | 65 +++ templates/coverage.html | 47 +++ templates/forwardlooking.html | 239 +++++++++++ templates/summary_stats.html | 166 ++++++++ templates/timeliness.html | 272 ++++++++++++ templates/timeliness_base.html | 47 +++ templates/timeliness_timelag.html | 205 +++++++++ text.py | 2 +- 20 files changed, 2494 insertions(+), 8 deletions(-) create mode 100644 comprehensiveness.py create mode 100644 forwardlooking.py create mode 100644 humanitarian.py create mode 100644 static/templates/base.html create mode 100644 summary_stats.py create mode 100644 templates/comprehensiveness.html create mode 100644 templates/comprehensiveness_base.html create mode 100644 templates/comprehensiveness_core.html create mode 100644 templates/comprehensiveness_financials.html create mode 100644 templates/comprehensiveness_valueadded.html create mode 100644 templates/coverage.html create mode 100644 templates/forwardlooking.html create mode 100644 templates/summary_stats.html create mode 100644 templates/timeliness.html create mode 100644 templates/timeliness_base.html create mode 100644 templates/timeliness_timelag.html diff --git a/comprehensiveness.py b/comprehensiveness.py new file mode 100644 index 0000000000..962e6d567e --- /dev/null +++ b/comprehensiveness.py @@ -0,0 +1,189 @@ +# This file converts raw comprehensiveness data to percentages, and calculates averages. + +from data import publishers_ordered_by_title, get_publisher_stats, publisher_name + +columns = { + 'summary': [ + # Format for elements within this list - and similar lists below ('core', 'financials', etc): + # slug, header, weighting when calculating average + ('core_average', 'Core Average', 2), + ('financials_average', 'Financials Average', 1), + ('valueadded_average', 'Value Added Average', 1), + ('summary_average', 'Weighted Average', 0), # i.e. don't include the average within the calculation of the average + ], + 'core': [ + ('version', 'Version', 1), + ('reporting-org', 'Reporting-Org', 1), + ('iati-identifier', 'Iati-identifier', 1), + ('participating-org', 'Participating Organisation', 1), + ('title', 'Title', 1), + ('description', 'Description', 1), + ('activity-status', 'Status', 1), + ('activity-date', 'Activity Date', 1), + ('sector', 'Sector', 1), + ('country_or_region', 'Country or Region', 1), + ('core_average', 'Average', 0), # i.e. don't include the average within the calculation of the average + ], + 'financials': [ + ('transaction_commitment', 'Transaction - Commitment', 1, 'first_hierarchy_with_commitments'), + ('transaction_spend', 'Transaction - Disbursement or Expenditure', 1, 'bottom_hierarchy'), + ('transaction_traceability', 'Transaction - Traceability', 1, 'bottom_hierarchy'), + ('budget', 'Budget', 1, 'hierarchy_with_most_budgets'), + ('financials_average', 'Average', 0), # i.e. don't include the average within the calculation of the average + ], + 'valueadded':[ + ('contact-info', 'Contacts', 1), + ('location', 'Location Details', 1), + ('location_point_pos', 'Geographic Coordinates', 1), + ('sector_dac', 'DAC Sectors', 1), + ('capital-spend', 'Capital Spend', 1), + ('document-link', 'Activity Documents', 1), + ('aid_type', 'Aid Type', 1), + ('recipient_language', 'Recipient Language', 1), + ('result_indicator', 'Result/ Indicator', 1), + ('valueadded_average', 'Average', 0), # i.e. don't include the average within the calculation of the average + ]} + +# Build dictionaries for all the column_headers and column_slugs defined above +column_headers = {tabname:[x[1] for x in values] for tabname, values in columns.items()} +column_slugs = {tabname:[x[0] for x in values] for tabname, values in columns.items()} + +# Build directory to lookup the hierarchy which should be used in the numerator +# e.g. {'activity-date': 'all', 'activity-status': 'all', [...] budget': 'hierarchy_with_most_budgets', [etc]} +column_base_lookup = { + col[0]: col[3] if len(col) > 3 else 'all' + for col_group, col_components in columns.items() + for col in col_components + } + + +def denominator(key, stats): + """Return the appropriate denominator value for a given key. + Returns either the specifc demominator calculated, or a default denominator value. + """ + + # If stats not pased to this function, return zero + if not stats: + return 0 + + # If there is a specific denominator for the given key, return this + if key in stats['comprehensiveness_denominators']: + return int(stats['comprehensiveness_denominators'][key]) + + # Otherwise, return the default denominator + else: + return int(stats['comprehensiveness_denominator_default']) + + +def get_hierarchy_with_most_budgets(stats): + """Find the hierarchy which contains the greatest number of budgets. + Will only count hierarchies where the default denominator is greater than zero. + Input: + stats -- a JSONDir object of publisher stats + Returns: + Key of the hierarchy with greatest number of budgets, or None + """ + + try: + # Get the key with the largest number of budgets + budgets = max(stats['by_hierarchy'], key=(lambda x: + stats['by_hierarchy'][x]['comprehensiveness'].get('budget', 0) + + stats['by_hierarchy'][x]['comprehensiveness'].get('budget_not_provided', 0) + if stats['by_hierarchy'][x]['comprehensiveness_denominator_default'] > 0 else None) + ) + return budgets + except KeyError: + # Return None if this publisher has no comprehensiveness data in any hierarchy - i.e. KeyError + return None + except ValueError: + # Some publishers have no data in 'by_hierarchy' at all - i.e. ValueError: max() arg is an empty sequence + return None + + +def get_first_hierarchy_with_commitments(stats): + """Return the number of the first hierarchy that contains at least 1 commitment + (according to the comprehensiveness counts) + Returns: + Number of first hierarchy with commitments or None if no commitments in any hierarchy + """ + hierarchies_with_commitments = {x: y['comprehensiveness']['transaction_commitment'] + for x,y in stats.get('by_hierarchy',{}).iteritems() + if y['comprehensiveness'].get('transaction_commitment', 0) > 0} + return min(hierarchies_with_commitments) if len(hierarchies_with_commitments) else None + + +def generate_row(publisher): + """Generate comprehensiveness table data for a given publisher + """ + + publisher_stats = get_publisher_stats(publisher) + + # Set an inital dictionary, which will later be populated further + row = {} + row['publisher'] = publisher + row['publisher_title'] = publisher_name[publisher] + + + # Calculate percentages for publisher data populated with any data + for slug in column_slugs['core'] + column_slugs['financials'] + column_slugs['valueadded']: + + # Set the stats base for calculating the numerator. This is based on the hierarchy set in the lookup + if column_base_lookup[slug] == 'bottom_hierarchy': + publisher_base = publisher_stats.get('bottom_hierarchy', {}) + + elif column_base_lookup[slug] == 'hierarchy_with_most_budgets': + publisher_base = publisher_stats['by_hierarchy'].get(get_hierarchy_with_most_budgets(publisher_stats), {}) + + elif column_base_lookup[slug] == 'first_hierarchy_with_commitments': + if get_first_hierarchy_with_commitments(publisher_stats): + publisher_base = publisher_stats['by_hierarchy'].get(get_first_hierarchy_with_commitments(publisher_stats), {}) + else: + publisher_base = publisher_stats.get('bottom_hierarchy', {}) + + else: + # Most common case will be column_base_lookup[slug] == 'all': + publisher_base = publisher_stats + + if slug == 'budget': + budget_all = publisher_base.get('comprehensiveness', {}).get(slug, 0) + budget_not_provided_all = publisher_base.get('comprehensiveness', {}).get('budget_not_provided', 0) + numerator_all = budget_all + budget_not_provided_all + budget_valid = publisher_base.get('comprehensiveness_with_validation', {}).get(slug, 0) + budget_not_provided_valid = publisher_base.get('comprehensiveness_with_validation', {}).get('budget_not_provided', 0) + numerator_valid = budget_valid + budget_not_provided_valid + else: + numerator_all = publisher_base.get('comprehensiveness', {}).get(slug, 0) + numerator_valid = publisher_base.get('comprehensiveness_with_validation', {}).get(slug, 0) + + if denominator(slug, publisher_base) != 0: + # Populate the row with the %age + row[slug] = int(round( + float(numerator_all)/denominator(slug, publisher_base)*100 + )) + row[slug+'_valid'] = int(round( + float(numerator_valid)/denominator(slug, publisher_base)*100 + )) + + # Loop for averages + # Calculate the average for each grouping, and the overall 'summary' average + for page in ['core', 'financials', 'valueadded', 'summary']: + # Note that the summary must be last, so that it can use the average calculations from the other groupings + row[page+'_average'] = int(round( + sum((row.get(x[0]) or 0)*x[2] for x in columns[page]) / float(sum(x[2] for x in columns[page])) + )) + row[page+'_average_valid'] = int(round( + sum((row.get(x[0]+'_valid') or 0)*x[2] for x in columns[page]) / float(sum(x[2] for x in columns[page])) + )) + + return row + + +def table(): + """Generate comprehensiveness table data for every publisher and return as a generator object + """ + + # Loop over the data for each publisher + for publisher_title, publisher in publishers_ordered_by_title: + + # Generate a row object + yield generate_row(publisher) diff --git a/forwardlooking.py b/forwardlooking.py new file mode 100644 index 0000000000..e843ec0c2d --- /dev/null +++ b/forwardlooking.py @@ -0,0 +1,88 @@ +# This file converts raw forward-looking data to percentages + +from data import publishers_ordered_by_title, get_publisher_stats, publisher_name +import datetime + +# Create a variable with the current year as an integer +this_year = datetime.date.today().year + +# Create a list containing three years: the current year and two following +years = map(str, range(this_year, this_year + 3)) + +# Set column groupings, to be displayed in the user output +column_headers = [ + 'Current activities at the start of each year', + 'Current activities with budgets for each year', + 'Percentage of current activities with budgets' +] + +def generate_row(publisher): + """Generate forward-looking table data for a given publisher + """ + + # Store the data for this publisher as a new variable + publisher_stats = get_publisher_stats(publisher) + + # Create a list for publisher data, and populate it with basic data + row = {} + row['publisher'] = publisher + row['publisher_title'] = publisher_name[publisher] + row['year_columns'] = [{},{},{}] + row['budget_not_provided'] = False + # Work with hierarchies + by_hierarchy = publisher_stats['by_hierarchy'] + hierarchies_with_nonzero_budgets = [ + h for h, stats in by_hierarchy.items() + if not all(x == 0 for x in stats['forwardlooking_activities_with_budgets'].values()) + ] + + # Flag if budgets on current activities are reported at more than one hierarchy + row['flag'] = len(hierarchies_with_nonzero_budgets) > 1 + + hierarchies_with_budget_not_provided = [ + h for h, stats in by_hierarchy.items() + if not all(x == 0 for x in stats['forwardlooking_activities_with_budget_not_provided'].values()) + ] + + # Loop over each of the three years (i.e. this year and the following two years) to generate the statistics for the table + for year in years: + if(len(hierarchies_with_budget_not_provided) > 0): + row['budget_not_provided'] = True + # If 'forwardlooking_activities_current' and 'forwardlooking_activities_with_budgets' or 'forwardlooking_activities_with_budget_not_provided' are in the bottom hierarchy + if 'forwardlooking_activities_current' in publisher_stats['bottom_hierarchy'] and ('forwardlooking_activities_with_budgets' in publisher_stats['bottom_hierarchy'] or 'forwardlooking_activities_with_budget_not_provided' in publisher_stats['bottom_hierarchy']): + if len(hierarchies_with_nonzero_budgets) != 1: + # If budgets are at more than one hierarchy (or no hierarchies), just use activities at all hierarchies + row['year_columns'][0][year] = publisher_stats['forwardlooking_activities_current'].get(year) or 0 + row['year_columns'][1][year] = publisher_stats['forwardlooking_activities_with_budgets'].get(year) or 0 + if row['budget_not_provided']: + row['year_columns'][1][year] += publisher_stats['forwardlooking_activities_with_budget_not_provided'].get(year) or 0 + else: + # Else, use the hierarchy which they are reported at + row['year_columns'][0][year] = by_hierarchy[hierarchies_with_nonzero_budgets[0]]['forwardlooking_activities_current'].get(year) or 0 + row['year_columns'][1][year] = by_hierarchy[hierarchies_with_nonzero_budgets[0]]['forwardlooking_activities_with_budgets'].get(year) or 0 + if row['budget_not_provided']: + row['year_columns'][1][year] += by_hierarchy[hierarchies_with_nonzero_budgets[0]]['forwardlooking_activities_with_budget_not_provided'].get(year) or 0 + + if not int(row['year_columns'][0][year]): + row['year_columns'][2][year] = '-' + else: + row['year_columns'][2][year] = int(round(float(row['year_columns'][1][year])/float(row['year_columns'][0][year])*100)) + else: + # Else if either 'forwardlooking_activities_current' or 'forwardlooking_activities_with_budgets' are not in the bottom hierarchy, set data zero + # This should only occur if a publisher has 0 activities + row['year_columns'][0][year] = '0' + row['year_columns'][1][year] = '0' + row['year_columns'][2][year] = '-' + + return row + + +def table(): + """Generate forward-looking table data for every publisher and return as a generator object + """ + + # Loop over each publisher + for publisher_title, publisher in publishers_ordered_by_title: + + # Return a generator object + yield generate_row(publisher) diff --git a/git.sh b/git.sh index f28b187d3b..06f77926c3 100755 --- a/git.sh +++ b/git.sh @@ -19,7 +19,7 @@ echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running speakers kit.py" python speakers_kit.py || exit 1 echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running make_html.py" -python make_html.py $1 $2|| exit 1 +python make_html.py || exit 1 echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Copying static elements" cp -r static/* out/ diff --git a/humanitarian.py b/humanitarian.py new file mode 100644 index 0000000000..cf29d01203 --- /dev/null +++ b/humanitarian.py @@ -0,0 +1,60 @@ +# This file builds a table to show humanitarian reporting for each publisher + +from data import publishers_ordered_by_title, get_publisher_stats +import common + +# Set column groupings, to be displayed in the user output +columns = [ + # slug, header + ('publisher_type', 'Publisher Type'), + ('num_activities', 'Number of Activities'), + ('publishing_humanitarian', 'Publishing Humanitarian?'), + ('humanitarian_attrib', 'Using Humanitarian Attribute?'), + ('appeal_emergency', 'Appeal or Emergency Details'), + ('clusters', 'Clusters'), + ('average', 'Average') + ] + + +def table(): + """Generate data for the humanitarian table + """ + + # Loop over each publisher + for publisher_title, publisher in publishers_ordered_by_title: + # Store the data for this publisher as a new variable + publisher_stats = get_publisher_stats(publisher) + + # Create a list for publisher data, and populate it with basic data + row = {} + row['publisher'] = publisher + row['publisher_title'] = publisher_title + row['publisher_type'] = common.get_publisher_type(publisher)['name'] + + # Get data from IATI-Stats output + row['num_activities'] = publisher_stats.get('humanitarian', {}).get('is_humanitarian', '0') + row['publishing_humanitarian'] = 100 if int(row['num_activities']) > 0 else 0 + + # Calculate percentage of all humanitarian activities that are defined using the @humanitarian attribute + row['humanitarian_attrib'] = ( + publisher_stats.get('humanitarian', {}).get('is_humanitarian_by_attrib', '0') / float(row['num_activities']) + if int(row['num_activities']) > 0 else 0 + ) * 100 + + # Calculate percentage of all humanitarian activities that use the element to define an appeal or emergency + row['appeal_emergency'] = ( + publisher_stats.get('humanitarian', {}).get('contains_humanitarian_scope', '0') / float(row['num_activities']) + if int(row['num_activities']) > 0 else 0 + ) * 100 + + # Calculate percentage of all humanitarian activities that use clusters + row['clusters'] = ( + publisher_stats.get('humanitarian', {}).get('uses_humanitarian_clusters_vocab', '0') / float(row['num_activities']) + if int(row['num_activities']) > 0 else 0 + ) * 100 + + # Calculate the mean average + row['average'] = (row['publishing_humanitarian'] + row['humanitarian_attrib'] + row['appeal_emergency'] + row['clusters']) / float(4) + + # Return a generator object + yield row diff --git a/make_csv.py b/make_csv.py index c46e62ee9d..b3c9026fc0 100644 --- a/make_csv.py +++ b/make_csv.py @@ -71,3 +71,122 @@ def publisher_dicts(): writer.writeheader() for publisher_json in data.ckan_publishers.values(): writer.writerow({x: publisher_json['result'].get(x) or 0 for x in keys}) + + +# Timeliness CSV files (frequency and timelag) +import timeliness +previous_months = timeliness.previous_months_reversed + +for fname, f, assessment_label in ( + ('timeliness_frequency.csv', timeliness.publisher_frequency_sorted, 'Frequency'), + ('timeliness_timelag.csv', timeliness.publisher_timelag_sorted, 'Time lag') + ): + with open(os.path.join('out', fname), 'w') as fp: + writer = csv.writer(fp) + writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + [assessment_label]) + for publisher, publisher_title, per_month,assessment in f(): + writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment]) + + +# Forward-looking CSV file +import forwardlooking + +with open(os.path.join('out', 'forwardlooking.csv'), 'w') as fp: + writer = csv.writer(fp) + writer.writerow(['Publisher Name', 'Publisher Registry Id'] + [ '{} ({})'.format(header, year) for header in forwardlooking.column_headers for year in forwardlooking.years]) + for row in forwardlooking.table(): + writer.writerow([row['publisher_title'], row['publisher']] + [ year_column[year] for year_column in row['year_columns'] for year in forwardlooking.years]) + + +# Comprehensiveness CSV files ('summary', 'core', 'financials' and 'valueadded') +import comprehensiveness + +for tab in comprehensiveness.columns.keys(): + with open(os.path.join('out', 'comprehensiveness_{}.csv'.format(tab)), 'w') as fp: + writer = csv.writer(fp) + writer.writerow(['Publisher Name', 'Publisher Registry Id'] + + [ x+' (with valid data)' for x in comprehensiveness.column_headers[tab] ] + + [ x+' (with any data)' for x in comprehensiveness.column_headers[tab] ]) + for row in comprehensiveness.table(): + writer.writerow([row['publisher_title'], row['publisher']] + + [ row[slug+'_valid'] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab] ] + + [ row[slug] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab] ]) + + +# Coverage CSV file +import coverage + +with open(os.path.join('out', 'coverage.csv'), 'w') as fp: + writer = csv.writer(fp) + # Add column headers + writer.writerow([ + 'Publisher Name', + 'Publisher Registry Id', + '2014 IATI Spend (US $m)', + '2015 IATI Spend (US $m)', + '2014 Reference Spend (US $m)', + '2015 Reference Spend (US $m)', + '2015 Official Forecast (US $m)', + 'Spend Ratio (%)', + 'No reference data available (Historic publishers)', + 'No reference data available (New publishers)', + 'Data quality issue reported' + ]) + for row in coverage.table(): + # Write each row + writer.writerow([ + row['publisher_title'], + row['publisher'], + row['iati_spend_2014'], + row['iati_spend_2015'], + row['reference_spend_2014'], + row['reference_spend_2015'], + row['official_forecast_2015'], + row['spend_ratio'], + row['no_data_flag_red'], + row['no_data_flag_amber'], + row['spend_data_error_reported_flag'] + ]) + + +# Summary Stats CSV file +import summary_stats + +with open(os.path.join('out', 'summary_stats.csv'), 'w') as fp: + writer = csv.writer(fp) + # Add column headers + writer.writerow(['Publisher Name', 'Publisher Registry Id'] + [header for slug, header in summary_stats.columns]) + for row in summary_stats.table(): + # Write each row + writer.writerow([row['publisher_title'], row['publisher']] + [row[slug] for slug, header in summary_stats.columns]) + + +# Humanitarian CSV file +import humanitarian + +with open(os.path.join('out', 'humanitarian.csv'), 'w') as fp: + writer = csv.writer(fp) + # Add column headers + writer.writerow([ + 'Publisher Name', + 'Publisher Registry Id', + 'Publisher Type', + 'Number of Activities', + 'Publishing Humanitarian', + 'Using Humanitarian Attribute', + 'Appeal or Emergency Details', + 'Clusters', + 'Humanitarian Score' + ]) + for row in humanitarian.table(): + writer.writerow([ + row['publisher_title'], + row['publisher'], + row['publisher_type'], + row['num_activities'], + row['publishing_humanitarian'], + row['humanitarian_attrib'], + row['appeal_emergency'], + row['clusters'], + row['average'] + ]) diff --git a/make_html.py b/make_html.py index c42f6c2250..c5e7fff91c 100644 --- a/make_html.py +++ b/make_html.py @@ -12,6 +12,12 @@ import pytz import licenses +import timeliness +import forwardlooking +import comprehensiveness +import coverage +import summary_stats +import humanitarian from vars import expected_versions import text from datetime import datetime @@ -57,7 +63,7 @@ def nested_dictinvert(d): def dataset_to_publisher(publisher_slug): """ Converts a dataset (package) slug e.g. dfid-bd to the corresponding publisher slug e.g. dfid """ - return publisher_slug.rsplit('-', 1)[0] + return publisher_slug.rsplit('-',1)[0] def firstint(s): @@ -100,6 +106,7 @@ def get_codelist_values(codelist_values_for_element): app.jinja_env.filters['xpath_to_url'] = xpath_to_url app.jinja_env.filters['url_to_filename'] = lambda x: x.rstrip('/').split('/')[-1] app.jinja_env.filters['dataset_to_publisher'] = dataset_to_publisher +app.jinja_env.filters['has_future_transactions'] = timeliness.has_future_transactions # Custom Jinja globals app.jinja_env.globals['url'] = lambda x: x @@ -146,6 +153,17 @@ def get_codelist_values(codelist_values_for_element): 'data_quality', 'exploring_data', 'publishers', + 'publishing_stats', + 'timeliness', + 'timeliness_timelag', + 'forwardlooking', + 'comprehensiveness', + 'comprehensiveness_core', + 'comprehensiveness_financials', + 'comprehensiveness_valueadded', + 'coverage', + 'summary_stats', + 'humanitarian', 'files', 'activities', 'download', @@ -167,8 +185,27 @@ def get_codelist_values(codelist_values_for_element): def basic_page(page_name): if page_name in basic_page_names: kwargs = {} - parent_page_name = page_name - return render_template(page_name + '.html', page=parent_page_name, **kwargs) + if page_name.startswith('timeliness'): + kwargs['timeliness'] = timeliness + parent_page_name = 'timeliness' + elif page_name.startswith('forwardlooking'): + kwargs['forwardlooking'] = forwardlooking + parent_page_name = 'forwardlooking' + elif page_name.startswith('comprehensiveness'): + kwargs['comprehensiveness'] = comprehensiveness + parent_page_name = 'comprehensiveness' + elif page_name.startswith('coverage'): + kwargs['coverage'] = coverage + parent_page_name = 'coverage' + elif page_name.startswith('summary_stats'): + kwargs['summary_stats'] = summary_stats + parent_page_name = 'summary_stats' + elif page_name.startswith('humanitarian'): + kwargs['humanitarian'] = humanitarian + parent_page_name = 'humanitarian' + else: + parent_page_name = page_name + return render_template(page_name+'.html', page=parent_page_name, **kwargs) else: abort(404) @@ -201,7 +238,7 @@ def publisher(publisher): 'sum_original': {k: v.get(year) for k, v in publisher_stats['sum_budgets_by_type_by_year']['1'].items()} if '1' in publisher_stats['sum_budgets_by_type_by_year'] else None, 'count_revised': publisher_stats['count_budgets_by_type_by_year']['2'].get(year) if '2' in publisher_stats['count_budgets_by_type_by_year'] else None, 'sum_revised': {k: v.get(year) for k, v in publisher_stats['sum_budgets_by_type_by_year']['2'].items()} if '2' in publisher_stats['sum_budgets_by_type_by_year'] else None - } for year in sorted(set(sum((list(x.keys()) for x in publisher_stats['count_budgets_by_type_by_year'].values()), []))) + } for year in sorted(set(sum((x.keys() for x in publisher_stats['count_budgets_by_type_by_year'].values()), []))) ] failure_count = len(current_stats['inverted_file_publisher'][publisher]['validation'].get('fail', {})) return render_template('publisher.html', @@ -273,7 +310,7 @@ def csv_development(name): @app.route('/publisher_imgs/.png') def image_development_publisher(image): print(image) - return Response(open(os.path.join('out', 'publisher_imgs', image + '.png')).read(), mimetype='image/png') + return Response(open(os.path.join('out', 'publisher_imgs', image+'.png')).read(), mimetype='image/png') if __name__ == '__main__': @@ -311,4 +348,4 @@ def url_generator(): license = 'None' yield 'licenses_individual_license', {'license': license} - freezer.freeze() + freezer.freeze() diff --git a/static/templates/base.html b/static/templates/base.html new file mode 100644 index 0000000000..8581a28b0b --- /dev/null +++ b/static/templates/base.html @@ -0,0 +1,219 @@ + + + + + + + + + + {% block title %}IATI Dashboard - {{page_titles[page]}}{% endblock %} + + + + + + + {% block extrahead %}{% endblock %} + + +
    + + + + +
    + {% block container %} + {% block page_header_div %} + + {% endblock %} + + {% block content %} + {% endblock %} + + {% endblock %} +
    + +
    + + + + + + + +{% block tablesorterscript %}{% endblock %} + + + {% block extrafooter %}{% endblock %} + + diff --git a/summary_stats.py b/summary_stats.py new file mode 100644 index 0000000000..31301edc2a --- /dev/null +++ b/summary_stats.py @@ -0,0 +1,121 @@ +# This file converts a range of transparency data to percentages + +from data import publishers_ordered_by_title, get_publisher_stats, secondary_publishers +import common +import timeliness +import forwardlooking +import comprehensiveness +import coverage + +# Set column groupings, to be displayed in the user output +columns = [ + # slug, header + ('publisher_type', 'Publisher Type'), + ('timeliness', 'Timeliness'), + ('forwardlooking', 'Forward looking'), + ('comprehensive', 'Comprehensive'), + ('score', 'Score') + ] + + +def is_number(s): + """ @todo Document this function + """ + try: + float(s) + return True + except ValueError: + return False + +def convert_to_int(x): + """ @todo Document this function + """ + if is_number(x): + return int(x) + else: + return 0 + + +def table(): + """Generate data for the publisher forward-looking table + """ + + # Store timeliness data in variable + timeliness_frequency_data = timeliness.publisher_frequency_dict() + timeliness_timelag_data = timeliness.publisher_timelag_dict() + + # Loop over each publisher + for publisher_title, publisher in publishers_ordered_by_title: + + # Store the data for this publisher as a new variable + publisher_stats = get_publisher_stats(publisher) + + # Skip if all activities from this publisher are secondary reported + if publisher in secondary_publishers: + continue + + # Create a list for publisher data, and populate it with basic data + row = {} + row['publisher'] = publisher + row['publisher_title'] = publisher_title + row['publisher_type'] = common.get_publisher_type(publisher)['name'] + + # Compute timeliness statistic + # Assign frequency score + # Get initial frequency assessment, or use empty set in the case where the publisher is not found + frequency_assessment_data = timeliness_frequency_data.get(publisher, ()) + frequency_assessment = None if len(frequency_assessment_data) < 4 else frequency_assessment_data[3] + if frequency_assessment == 'Monthly': + frequency_score = 4 + elif frequency_assessment == 'Quarterly': + frequency_score = 3 + elif frequency_assessment == 'Six-Monthly': + frequency_score = 2 + elif frequency_assessment == 'Annual': + frequency_score = 1 + else: # frequency_assessment == 'Less than Annual' or something else! + frequency_score = 0 + + # Assign timelag score + # Get initial timelag assessment, or use empty set in the case where the publisher is not found + timelag_assessment_data = timeliness_timelag_data.get(publisher, ()) + timelag_assessment = None if len(timelag_assessment_data) < 4 else timelag_assessment_data[3] + if timelag_assessment == 'One month': + timelag_score = 4 + elif timelag_assessment == 'A quarter': + timelag_score = 3 + elif timelag_assessment == 'Six months': + timelag_score = 2 + elif timelag_assessment == 'One year': + timelag_score = 1 + else: # timelag_assessment == 'More than one year' or something else! + timelag_score = 0 + + # Compute the percentage + row['timeliness'] = int( round((float(frequency_score + timelag_score) / 8) * 100)) + + + # Compute forward-looking statistic + # Get the forward-looking data for this publisher + publisher_forwardlooking_data = forwardlooking.generate_row(publisher) + + # Convert the data for this publishers 'Percentage of current activities with budgets' fields into integers + numbers = [ int(x) for x in publisher_forwardlooking_data['year_columns'][2].itervalues() if is_number(x) ] + + # Compute and store the mean average for these fields + row['forwardlooking'] = sum(int(round(y)) for y in numbers) / len(publisher_forwardlooking_data['year_columns'][2]) + + + # Compute comprehensive statistic + # Get the comprehensiveness data for this publisher + publisher_comprehensiveness_data = comprehensiveness.generate_row(publisher) + + # Set the comprehensive value to be the summary average for valid data + row['comprehensive'] = convert_to_int(publisher_comprehensiveness_data['summary_average_valid']) + + + # Compute score + row['score'] = int( round(float(row['timeliness'] + row['forwardlooking'] + row['comprehensive']) / 3 )) + + # Return a generator object + yield row diff --git a/templates/comprehensiveness.html b/templates/comprehensiveness.html new file mode 100644 index 0000000000..a29025cdca --- /dev/null +++ b/templates/comprehensiveness.html @@ -0,0 +1,47 @@ +{% set tab='summary' %} +{% extends 'comprehensiveness_base.html' %} + + +{% block table_title %} +Summary Table of Comprehensiveness Values +{% endblock %} + + +{% block heading_detail %} +

    This tab summarises the average percentage of current activities where elements are populated with valid data. (Values in parentheses indicate percentage of activities where elements are populated with any data.) The columns show the average for each of the core, financial and value-added elements as defined in the associated tabs. The Weighted Average column provides an indication of the overall comprehensiveness of the data. It gives a double weighting to the Core Average as these elements are essential for IATI data to be valid for use.

    + +

    {Weighted Average} = (({Core Average} * 2) + {Financials Average} + {Value Added Average}) / 4

    +{% endblock %} + + +{% block narrative_text %} +

    Overview

    +

    To assess comprehensiveness, publication of selected elements of the standard have been aggregated into three sections. "Core" are the mandatory fields specified by version 2.01 of the Activity Standard. Financials cover publishing of both financial transactions and budgets. Value Added are optional elements of widespread benefit to users.

    + +

    Core Average

    +

    An average of the percentages assigned to the ten mandatory activity elements as specified on the Core tab.

    + +

    Financials Average

    +

    An average of the percentages assigned to four financial elements as specified on the Financials tab.

    + +

    Value Added Average

    +

    An average of the percentages assigned to the ten most useful recommended (non-mandatory) elements as specified on the Value Added tab.

    + +

    Weighted Average

    +

    Twice the Core average plus the Financials average plus the Value-Added average, divided by 4.

    +{% endblock %} + + +{% block assessment_text %} +

    No assessments are currently employed.

    +{% endblock %} + + +{% block exceptions_text %} +

    See, where applicable, the exceptions for each individual element on the associated tabs.

    +{% endblock %} + + +{% block comparison_text %} +

    The original methodology counted all IATI elements. This approach is more selective, is fairer to publishers as it excludes fields not applicable to all types of organisation, and places more emphasis on mandatory fields.

    +{% endblock %} diff --git a/templates/comprehensiveness_base.html b/templates/comprehensiveness_base.html new file mode 100644 index 0000000000..dcccc034cf --- /dev/null +++ b/templates/comprehensiveness_base.html @@ -0,0 +1,443 @@ +{% extends 'base.html' %} +{% import 'boxes.html' as boxes %} + +{% block container %} + + {% block page_header_div %} + {{ super() }} + {% endblock %} + + + + + + {% block content %} +
    +
    + (This table as CSV) +

    {% block table_title %}Table of Comprehensiveness values{% endblock %}

    +
    + + {% if self.heading_detail() %} +
    + {% block heading_detail %}{% endblock %} + {% include 'tablesorter_instructions.html' %} +
    + {% endif %} + + + + + + {% for column_header in comprehensiveness.column_headers[tab] %} + + {% endfor %} + + + + {% for row in comprehensiveness.table() %} + + + {% for column_slug in comprehensiveness.column_slugs[tab] %} + + {% endif %} + {% else %}-{% endif %} + {% endfor %} + + {% endfor %} + +
    Publisher Name{{column_header}}
    {{row.publisher_title}}{% if column_slug in row %} + {{row[column_slug+'_valid']}} + {% if row[column_slug+'_valid'] != row[column_slug] %} + ({{row[column_slug]}})
    +
    + + + {% block narrative %} +
    +
    +

    Narrative

    +
    +
    + {% block narrative_text %}{% endblock %} +
    +
    + {% endblock %} + + + {% block assessment %} +
    +
    +

    Assessment

    +
    +
    + {% block assessment_text %}{% endblock %} +
    +
    + {% endblock %} + + + {% block exceptions %} +
    +
    +

    Exceptions

    +
    +
    + {% block exceptions_text %}{% endblock %} +
    +
    + {% endblock %} + + + {% block comparison %} +
    +
    +

    Comparison with original Global Partnership Indicator methodology

    +
    +
    + {% block comparison_text %} +

    These tests are more targeted than the original methodology which merely checked for the existence of all fields irrespective of their importance.

    + {% endblock %} +
    +
    + {% endblock %} + + +
    +
    +

    Pseudocode

    +
    +
    + +

    For the purpose of this calculation, each iati-activity XML block is an activity.

    + +

    To determine the lowest hierarchy:

    +
    +Lowest hierarchy =
    +   Largest integer reported in the hierarchy attribute of any iati-activity element
    +
    + +

    To determine whether an activity is at the lowest hierarchical level:

    + +
    +If the @hierarchy attribute is missing
    +    If the lowest hierarchical level is 1
    +        Activity is at lowest hierarchical level
    +    Else
    +        Activity is NOT at lowest hierarchical level
    +Else if the @hierarchy attribute == lowest hierarchy
    +    Activity is at lowest hierarchical level
    +Else
    +    Activity is NOT at lowest hierarchical level
    +
    + +

    To test whether an activity is current:

    + +
    +end dates =
    +    For each activity-date of type end-planned or end-actual
    +        Parse activity-date/@iso-date as an iso date ('yyyy-mm-dd...')
    +        If this does not work parse activity-date/text() as an iso date ('yyyy-mm-dd...')
    +        If neither work, ignore this activity-date
    +
    +If activity-status/@code exists
    +    If activity-status/@code is 2 exists
    +        The activity is current
    +    Else
    +        The activity is current
    +Else
    +    If end dates (see above) is empty
    +        The activity is current
    +    Else
    +        If there is an end date where (date year >= current year)
    +            The activity is current
    +        Else
    +            The activity is not current
    +
    +
    + +

    To determine whether we use an activity is relevant for a given comprehensiveness test.

    + +
    +start date =
    +    If activity-date[@type="start-actual"] exists
    +        Parse activity-date[@type="start-actual"]/@iso_date as an iso date ('yyyy-mm-dd...')
    +            If this works, we have the start date, else null
    +    Else If activity-date[@type="start-planned"] exists
    +        Parse activity-date[@type="start-planned"]/@iso_date as an iso date ('yyyy-mm-dd...')
    +            If this works, we have the start date, else null
    +    Else
    +        null
    +
    +If the activity is current
    +    If we are on the financials tab
    +        If hierarchy = lowest level
    +            If the comprehensiveness test is 'Transaction - Disbursement or Expenditure'
    +                If (start date isn't null
    +                        and start date < today
    +                        and today - start date < 365 days)
    +                    Use activity
    +                Else
    +                    Ignore activity
    +            Else If the comprehensiveness test is 'Transaction - Traceability'
    +                If transaction/transaction-type[@code="IF"] exists (1.xx) or transaction/transaction-type[@code="1"] exists (2.xx)
    +                    Use activity
    +                Else
    +                    Ignore activity
    +            Else
    +                Use activity
    +        Else
    +            Ignore activity
    +    Else
    +        Use activity
    +Else
    +    Ignore activity
    +
    + + {% block table_test_methodology_full %} + + {% block table_test_methodology_header %} + + + {% endblock %} + + {% block table_test_methodology_core %} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {% endblock %} + + {% block table_test_methodology_financials %} + + + + + + + + + + + + + + + + + + + + + + + + + + + + {% endblock %} + + {% block table_test_methodology_valueadded %} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {% endblock %} + +
    Tab + Comprehensiveness Test + Basic Methodology + Validation Methodology +
    CoreVersion + iati-activities/@version must exist for the file the activity is in + iati-activities/@version must be on the Version codelist +
    CoreReporting-Orgreporting-org/@ref and must exist and reporting-org must have textual content
    CoreIati-identifierActivity must contain a nonempty iati-identifierIf publishing at v2.xx, the iati-identifier must start with either a) the value in reporting-org/@ref or b) a value in other-identifier/@ref where @type="B1" (i.e. 'Previous Reporting Organisation Identifier'). No validation check is applied for v1.xx data, as it only became possible to add other-identifier/@type="B1" from v2.01.
    CoreParticipating OrganisationActivity must contain at least 1 participating-orgAt least one participating-org must have @role Funding (1.xx) or 1 (2.xx)
    CoreTitleActivity must contain a non-empty title element
    CoreDescriptionActivity must contain a non-empty description element
    CoreStatusActivity must contain an activity-status elementactivity-status/@code must be on the ActivityStats codelist
    CoreActivity DateActivity must contain at least 1 activity-date elementAt least 1 activity-date element in the activity must have @type equal to start-planned or start-actual (1.xx) or 1 or 2 (2.xx)
    CoreSectorAt least one sector element present at activity level OR in all transactionsIf activity level AND more than 1 per vocab, percentage must add up per vocab
    CoreCountry or Region(recipient-country OR recipient-region) at activity level OR (2.xx only) on all transactionsIf activity level AND more than 1 THEN percentages must add up
    FinancialsTransaction - Commitment + Activity must have at least 1 transaction with @type of 'Commitment' (i.e. C (1.xx) or 2 (2.xx)) or 'Incoming commitment' (i.e. 11).All transactions of this @type must have a value that is a valid xs:decimal AND on of (transaction-date/@iso-date OR value/@value-date) that is a valid xs:date +
    FinancialsTransaction - Disbursement or ExpenditureActivity must have at least 1 transaction with @type D or E (1.xx) or 3 or 4 (2.xx)All transactions of these @type's must have a value that is a valid xs:decimal AND on of (transaction-date/@iso-date OR value/@value-date) that is a valid xs:date +
    FinancialsTransaction - CurrencyAll transactions must have value/@currency OR the activity must have a @default-currency attribute. All transactions must have value/@value-dateAll currency values must be on the Currency codelist. value/@value-date must be valid xs:date.
    FinancialsTransaction - TraceabilityAll transactions of @type=IF (1.xx) or @type=1 (2.xx) must contain provider-org/@provider-activity-id
    FinancialsBudgetActivity must have at least 1 budget element OR the activity must have the budget-not-provided attribute AND no budget elementsEach budget element must contain period-start/@iso-date and period-end/@iso-date and value/@value-date that are valid xs:dates AND a value element that is a valid xs:decimal OR the activity element must have a valid iati-activity/@budget-not-provided attribute under the BudgetNotProvided codelist AND no budget elements
    Value addedContacts + Activity must contain 1 contact-info/email element + +
    Value addedLocation DetailsActivity must contain location/name OR location/description OR location/location-administrative) OR location/point/pos
    Value addedGeographic CoordinatesActivity must contain location/point/poslocation/point/pos must contain valid coordinates (two space separated decimals), and not be 0 0
    Value addedDAC SectorsAt least 1 sector where @vocabulary is DAC or DAC-3 (1.xx) or 1 or 2 (2.xx), must be reported at activity level, unless there is no @vocabulary attribute, in which case DAC/1 is assumed. If there is no DAC sector element at activity level it must be reported within all transactions contained within that activity.Must be valid code on the DAC or DAC-3 lists as appropriate.
    Value addedCapital SpendActivity must contain the capital-spend/@percentage attribute
    Value addedActivity DocumentsActivity must contain at least 1 document-linkEach document-link must contain valid document category code and a url that is a valid xs:anyURI and contains the string //.
    Value addedAid TypeActivity must contain either i) a value in default-aid-type/@code or ii) each transaction must contain a value in transaction/aid-type/@code.Must be valid code on the AidType codelist.
    Value addedRecipient LanguageOnly activities containing only one recipient-country are assessed. Activity must contain title and description elements containing at least one of the official languages spoken for the defined recipient-country/@code.
    Value addedResult/IndicatorActivity must contain the result/indicator element
    + {% endblock %} + +

    Where the Validation Methodology column is blank, no extra conditions are imposed over the basic methodology.

    +

    The main percentage is the percentage of relevant activities that satisfy the basic and validation methodology for the given Comprehensiveness Test and publisher.

    +

    The bracketed percentage is the percentage of relevant activities that satisfy the basic methodology for the given Comprehensiveness Test and publisher. This is only shown if it is different to the main percentage.

    +
    + + +
    + {% endblock %} +{% endblock %} + +{% block tablesorteroptions %}{ + widgets: ['stickyHeaders'], + textExtraction:{ + 1: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') >= 0) return '0'; + else return $(node).text(); + }, + 2: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') >= 0) return '0'; + else return $(node).text(); + }, + 3: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') >= 0) return '0'; + else return $(node).text(); + }, + 4: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') >= 0) return '0'; + else return $(node).text(); + }, + 5: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') >= 0) return '0'; + else return $(node).text(); + }, + 6: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') >= 0) return '0'; + else return $(node).text(); + }, + 7: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') >= 0) return '0'; + else return $(node).text(); + }, + 8: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') >= 0) return '0'; + else return $(node).text(); + }, + 9: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') >= 0) return '0'; + else return $(node).text(); + }, + 10: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') >= 0) return '0'; + else return $(node).text(); + }, + 11: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') >= 0) return '0'; + else return $(node).text(); + } + } +} +{% endblock %} +{% block tablesortertarget %}table#main_table{% endblock %} diff --git a/templates/comprehensiveness_core.html b/templates/comprehensiveness_core.html new file mode 100644 index 0000000000..c8c00ede6c --- /dev/null +++ b/templates/comprehensiveness_core.html @@ -0,0 +1,68 @@ +{% set tab='core' %} +{% extends 'comprehensiveness_base.html' %} + + +{% block heading_detail %} +

    Core elements are those that are mandatory in version 2.01 of the IATI Activity standard. The core elements are: Version, Reporting-Organisation, IATI-identifier, Participating Organisation, Title, Description, Status, Activity Date, Sector, and Country or Region.

    + +

    This table shows the percentage of current activities where the core elements are populated with valid data. (Values in parentheses indicate percentage of activities where elements are populated with any data.) The scoring for the Summary Stats page recognises the importance of the core by giving it double weighting in the overall comprehensiveness component.

    + +

    Key:
    + Dashes: Where a publisher has published to IATI in the past but whose portfolio contains no current activities. +

    +{% endblock %} + + +{% block narrative_text %} +

    Overview

    +

    Version 2.01 introduced a more stringent approach to the way in which data can be validated (through the requirement that elements are published in a specified order) and as a result ten elements are mandatory for all activities under all conditions. This tab measures how comprehensively publishers are meeting these requirements, irrespective of whether they are using V2.01 or not.

    + +

    Only current activities are assessed. A current activity is one with an activity status of implementing, a planned end date beyond today, or no end date.

    + +

    Details

    +
    Version
    +

    Percentage of all current activities which contain a valid version number in the <iati-activities> file header element.

    + +
    Reporting-Org
    +

    Percentage of all current activities which contain both a name and an identifier for the reporting organisation. (In future this will also check that the identifier contains a valid prefix identifying a registration agency.)

    + +
    Iati-identifier
    +

    Percentage of all current activities that contain a valid activity identifier. This MUST be prefixed with either the identifier reported for the reporting organisation, or (if publishing at v2.xx) an identifier reported in the <other-identifier> element. (In future this will also check that each identifier is globally unique.)

    + +
    Participating Organisation
    +

    Percentage of all current activities that contain a participating organisation of type funding.

    + +
    Title
    +

    Percentage of all current activities that contain a title.

    + +
    Description
    +

    Percentage of all current activities that contain a description.

    + +
    Status
    +

    Percentage of all current activities that contain a validly coded activity status.

    + +
    Activity Date
    +

    Percentage of all current activities that contain a valid planned or actual start date.

    + +
    Sector
    +

    Percentage of all current activities that EITHER contain at least one valid activity-level sector element OR all transactions contain a valid sector element. If multiple sectors are reported per vocabulary at activity level reported percentages must add up to 100% for the activity to be assessed as valid.

    + +
    Country or Region
    +

    Percentage of all current activities that EITHER contain at least one valid activity level recipient country or region OR all transactions containing only one valid country or region. If more than one country and/or region is reported at activity then they must all contain percentages adding up to 100%.

    +{% endblock %} + + +{% block assessment_text %} +

    Only elements containing valid data are counted. Where applicable a total including invalid data is provided in parentheses.

    +{% endblock %} + + +{% block exceptions_text %} +

    Only current activities are checked. A current activity is one with an activity status of implementing, a planned end date beyond today, or no end date. Note that publisher rows consisting of dashes reflect those that have published to IATI in the past but whose portfolio contains no current activities.

    +{% endblock %} + + +{% block table_test_methodology_financials %} +{% endblock %} +{% block table_test_methodology_valueadded %} +{% endblock %} diff --git a/templates/comprehensiveness_financials.html b/templates/comprehensiveness_financials.html new file mode 100644 index 0000000000..43db1eb72f --- /dev/null +++ b/templates/comprehensiveness_financials.html @@ -0,0 +1,54 @@ +{% set tab='financials' %} +{% extends 'comprehensiveness_base.html' %} + + +{% block table_title %} +Table of Financial values +{% endblock %} + + +{% block heading_detail %} +

    Four aspects of financial reporting are tracked: the reporting of commitments and spend, the ability to track funds across activities and organisations, and the existence of activity budgets (which are also given added weight in the forward-looking component). Only current activities are assessed.

    + +

    This table shows the percentage of current activities where these financial elements are populated with valid data. (Values in parentheses indicate percentage of activities where elements are populated with any data.)

    + +

    The statistics on this page are calculated based on either i) the data in each publishers' lowest hierarchy, or ii) data in the hierarchy which contains the greatest number of budgets for the given publisher - see the narrative section for full details.

    + +

    Key:
    + Dashes: Where a publisher has published to IATI in the past but whose portfolio contains no current activities. +

    +{% endblock %} + + +{% block narrative_text %} +
    Transaction - Commitment
    +

    For the data in the chosen hierarchy, the percentage of all current activities that contain at least one transaction of type Commitment or Incoming Commitment. The hierarchy chosen for this calculation is the highest hierarchy that contains an above-defined commitment tranaction. Only one hierarchy is selected due in line with stated IATI rules on multi-level reporting. + +

    Transaction - Disbursement or Expenditure
    +

    For the data at the publishers' lowest hierarchy, the percentage of current activities that contain at least one transaction of type Disbursement or Expenditure.

    + + +
    Transaction - Traceability
    +

    For the data at the publishers' lowest hierarchy, the percentage of current activities containing a transaction of type Incoming Funds that also contain the IATI identifier for the funding organisation's activity. This links the funds disbursed by one organisation and received by another. (NB activities that do not contain incoming funds transactions are excluded from the calculation.) (In future the syntax of the provider-activity-id will also be validated.)

    + +

    Donor publishers who list themselves within as a participating-org of either 1 (i.e. 'Funding') or 3 (i.e. 'Extending') AND who are not listed as type 4 (i.e. 'Implementing') will be given credit for traceability, as they are at the top of the funding chain.

    + +
    Budget
    +

    For the hierarchy which contains the greatest number of budgets, the percentage of all current activities that contain at least one valid budget entry. A valid budget entry must contain a valid period-start AND a valid period-end AND a valid value AND a valid value-date OR the activity has the @budget-not-provided attribute.

    +{% endblock %} + + +{% block assessment_text %} +

    Only elements containing valid data are counted. Where applicable a total including invalid data is provided in parentheses.

    +{% endblock %} + + +{% block exceptions_text %} +

    Only current activities are checked. A current activity is one with an activity status of implementing, a planned end date beyond today, or no end date. For disbursements and expenditure activities less than one year old (based on activity start date) are also excluded.

    +{% endblock %} + + +{% block table_test_methodology_core %} +{% endblock %} +{% block table_test_methodology_valueadded %} +{% endblock %} diff --git a/templates/comprehensiveness_valueadded.html b/templates/comprehensiveness_valueadded.html new file mode 100644 index 0000000000..fb21e4649e --- /dev/null +++ b/templates/comprehensiveness_valueadded.html @@ -0,0 +1,65 @@ +{% set tab='valueadded' %} +{% extends 'comprehensiveness_base.html' %} + + +{% block table_title %} +Table of Value-Added values +{% endblock %} + + +{% block heading_detail %} +

    This table attempts to capture the breadth, richness and usefulness of high quality IATI data. The value added elements included in this table are: Contacts, Location Details, Geographic Coordinates, DAC Sectors, Capital Spend, Activity Documents, Activity Website, Conditions Attached, and Result/Indicator.

    + +

    This table shows the percentage of current activities where these value added elements are populated with valid data. (Values in parentheses indicate percentage of activities where elements are populated with any data.)

    + +

    Key:
    + Dashes (all fields except recipient language): Where a publisher has published to IATI in the past but whose portfolio contains no current activities.
    + Dashes (recipient language field): Where a publisher has no current activites targetted at only one recipient country. +

    +{% endblock %} + + +{% block narrative_text %} +
    Contacts
    +

    The percentage of all current activities that contain at least one contact email address.

    + +
    Location Details
    +

    The percentage of all current activities that contain at least one location name, location description, administrative area, or coordinates.

    + +
    Geographic Coordinates
    +

    The importance of geocoded data is the reason why coordinates are assessed in addition to basic location details. The percentage of all current activities that contain at least one set of geographic coordinates.

    + +
    DAC Sectors
    +

    Sector is a mandatory element and is assessed in the Core component. While it is not mandatory for publishers to utilise OECD DAC Sector/Purpose codes it is highly recommended that they do so as these codes are in widespread use and allow for comparison across activities. The percentage of all current activities that contain at least 1 valid DAC-CRS purpose code.

    + +
    Capital Spend
    +

    For sustainable planning it is useful for developing countries to know how project funding is split between capital and recurrent expenditure. The percentage of all current activities that contain a capital spend percentage. (N.B. that 0% is a valid entry.)

    + +
    Activity Documents
    +

    The percentage of all current activities that contain at least 1 document link.

    + +
    Aid Type
    +

    The percentage of all current activities that contain details of the type of aid being supplied. This can be done at activity level using default-aid-type, or at transaction level using aid-type.

    + +
    Recipient Language
    +

    The percentage of activities targeted at only one recipient-country that contain title and description elements with at least one of the official languages spoken in that country. These calculations are based on a list of official languages within each country.

    + +
    Result/Indicator
    +

    The percentage of all current activities that contain at least one validly reported results indicator.

    +{% endblock %} + + +{% block assessment_text %} +

    Only elements containing valid data are counted. Where applicable a total including invalid data is provided in parentheses.

    +{% endblock %} + + +{% block exceptions_text %} +

    Only current activities are checked. A current activity is one with an activity status of implementing, a planned end date beyond today, or no end date. Publisher rows containing dashes indicate that no current activities are published.

    +{% endblock %} + + +{% block table_test_methodology_core %} +{% endblock %} +{% block table_test_methodology_financials %} +{% endblock %} \ No newline at end of file diff --git a/templates/coverage.html b/templates/coverage.html new file mode 100644 index 0000000000..b357228ace --- /dev/null +++ b/templates/coverage.html @@ -0,0 +1,47 @@ +{% extends 'base.html' %} +{% import 'boxes.html' as boxes %} +{% block content %} + + + +{% endblock %} + +{% block tablesorteroptions %} +{ + widgets: ['stickyHeaders'], + textExtraction: { 8: function(node,table,cellIndex) { return $(node).attr('data-severity'); } } +} +{% endblock %} diff --git a/templates/forwardlooking.html b/templates/forwardlooking.html new file mode 100644 index 0000000000..f33fe85b57 --- /dev/null +++ b/templates/forwardlooking.html @@ -0,0 +1,239 @@ +{% extends 'base.html' %} +{% import 'boxes.html' as boxes %} +{% block content %} + + +
    + +
    + (This table as CSV) +

    Activities with Forward Looking Budget Allocations

    +
    + +
    +

    + The number of activities with budgets reported for each year is compared against the total number of activities current at the start of each year. The first block shows the number of activities that are, or will be, current in this and the next two years. The second block shows how many of these activities contain a budget for the corresponding year. The third block expresses this as a percentage. +

    + +

    + Activities are excluded from forward looking calculations if they contain commitment transactions and 90% of the total commitment value has already been disbursed or expended in the corresponding year or previously. Additionally, activities are excluded if they have less than six months left to run. +

    + +

    Key:
    + Dashes: Where a percentage cannot be calculated, because the denominator is zero.
    + Red flag: Publisher currently publishing forward looking budgets at more than one hierarchical level.
    + Yellow flag: Publisher currently publishing the 'budget not provided' attribute for some or all activities.
    +

    + {% include 'tablesorter_instructions.html' %} +
    + + + + + + {% for column_header in forwardlooking.column_headers %} + + {% endfor %} + + + {% for i in range(0,3) %} + {% for year in forwardlooking.years %} + + {% endfor %} + {% endfor %} + + + + {% for row in forwardlooking.table() %} + + + + {% for column in row.year_columns %} + {% for year in forwardlooking.years %} + + {% endfor %} + {% endfor %} + + + {% endfor %} + +
    Publisher Name{{column_header}} +
    {{year}}
    {{row.publisher_title}} + {{column[year]}} + *{% elif row['budget_not_provided'] %};background-color: #fcf8aa" data-severity="{{row['budget-not-provided']}}">{% elif row['flag'] %};background-color: #f2aaaa" data-severity="{{row['flag']}}">{% else %}">{% endif %}
    +
    + + + +
    +
    +

    Narrative

    +
    +
    +

    Developing countries have, since 2008, been asking their development partners to provide forward-looking data which can be used for both planning and budget preparation. While aggregated country-level budgets have a certain political value it is activity-level data that is of greatest benefit, and which this dimension attempts to assess.

    +

    The standard asks publishers to break down their total commitment to an activity into annual or quarterly budgets - i.e. the sum of the reported budgets matches the sum of commitments. It is stressed that these budget breakdowns are indicative and are in no way binding.

    +

    This assessment counts the number of current activities for this and the next two years that contain budgets. It is based on a number of assumptions:

    +
      +
    • For any given future year all current activities should contain a budget.
    • +
    • Activities are deemed to be current in any given year if their end date is reported to be in this year or beyond (or if there is no end date)
    • +
    • Counting the number of activities that contain budgets provides a fairer result than summing the value of these budgets. The proportion of a publisher's total commitment for a future year that has already been committed to existing projects may vary greatly (e.g. you may have earmarked an amount to spend in three-years’ time, but not yet agreed on how to spend it.)
    • +
    • For publisher's reporting multiple hierarchical levels ONLY the level that budgets are reported at is used in this calculation. However if budgets are reported at multiple levels, all activities are counted, and the publisher is marked with a red flag.
    • +
    +

    As noted above, activities are excluded from forward looking calculations if they contain commitment transactions and 90% of the total commitment value has already been disbursed or expended in the corresponding year or previously. Additionally, activities are excluded if they have less than six months left to run (based on the reported actual or planned end date).

    +
    +
    + + +
    +
    +

    Assessment

    +
    +
    +

    No separate assessment is provided as the percentage of current activities containing budgets for this and the next two years is the de facto assessment. No attempt is currently being made to turn these into a descriptive summary (as, for example, "Frequency = "Monthly"). The percentage for the middle year (i.e. 'next year') is of most relevance to developing countries.

    + +
    +
    + + + +
    +
    +

    Exceptions

    +
    +
    +

    Dashes

    +

    Where a percentage can not be calculated, because the denominator is zero, a dash is used.

    +

    Red Flags

    +

    Publishers currently publishing forward looking budgets at more than one hierarchical level.

    +

    Yellow Flags

    +

    Publishers currently publishing the 'budget not provided' attribute for some or all activities.

    +
    +
    + + + +
    +
    +

    Comparison with original Global Partnership Indicator methodology

    +
    +
    +

    This methodology differs substantially from the original GP Indicator in two ways.

    +
      +
    • All current activities are assessed, NOT only those containing Country Programmable Aid. CPA is calculated by the Forward Spending Survey by excluding activities based on a complex filtering of purpose codes, finance types and aid types. Firstly this is difficult to explain. Secondly the provision, or lack thereof, of forward looking data is not believed to be determined by CPA status. Thirdly, as a multi-stakeholder standard many IATI publishers, in particular implementing agencies, do not necessarily report CRS-specific fields.
    • +
    • As explained above the number of activities with budgets, NOT the value of budgets is counted.
    • +
    +
    +
    + + +
    +
    +

    Pseudocode

    +
    +
    + +

    For the purpose of this calculation, each iati-activity XML block is an activity.

    + +

    To test whether an activity is current in a given year:

    + +
    +end dates =
    +    For each activity-date that describes an end date (type is end-planned or end-actual in 1.xx, 3 or 4 in 2.xx)
    +        Parse activity-date/@iso-date as an iso date ('yyyy-mm-dd...')
    +        If this does not work parse activity-date/text() as an iso date ('yyyy-mm-dd...')
    +        If neither work, ignore this activity-date
    +If no end dates were successfully parsed
    +    The activity is current
    +Else
    +    If there is an end date where (date year >= given year)
    +        The activity is current
    +    Else
    +        The activity is not current
    +
    + +

    To find the year for a budget:

    + +
    +start =
    +    Parse period-start/@iso-date as an iso date ('yyyy-mm-dd...')
    +    If this does not work parse period-start/text() as an iso date ('yyyy-mm-dd...')
    +    Otherwise null
    +end =
    +    Parse period-end/@iso-date as an iso date ('yyyy-mm-dd...')
    +    If this does not work parse period-end/text() as an iso date ('yyyy-mm-dd...')
    +    Otherwise null
    +
    +If start and end are both not null
    +    If (end - start <= 370 days)
    +        If end month >= 7
    +            budget year = end year
    +        Else
    +            budget year = end year - 1
    +    Else ignore the budget
    +Else budget year is null
    +
    + +

    The relevant hierarchical level is:

    + +
    +If the @hierarchy attribute is missing the hierarchy value is 1
    +Else the hierarchy value is the value of the @hierarchy attribute
    +
    +If all budgets for current activities in the given years have the same hierarchy value
    +    Only activities with that hierarchy value are at a relevant hierarchical level
    +Else
    +    All activities are considered to be at a relevant hierarchical level
    +
    + +

    To calculate the "Current activities" column, count the number of activities that are:

    +
      +
    • at a relevant hierarchical level (see above)
    • +
    • AND current
    • +
    +

    To calculate the "Current activities with budgets" column, count the number of activities that are: +

      +
    • at a relevant hierarchical level (see above)
    • +
    • AND current
    • +
    • AND contain at least one budget budget with a budget year (as described above) that matches the year of the column OR contains the budget-not-provided attribute
    • +
    +

    + +
    +
    +{% endblock %} + +{% block tablesorteroptions %}{ + widgets: ['stickyHeaders'], + textExtraction:{ + 7: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') > 0) return '0'; + else return $(node).text(); + }, + 8: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') > 0) return '0'; + else return $(node).text(); + }, + 9: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') > 0) return '0'; + else return $(node).text(); + } + } +}{% endblock %} +{% block tablesortertarget %}table#main_table{% endblock %} + diff --git a/templates/summary_stats.html b/templates/summary_stats.html new file mode 100644 index 0000000000..0f3615b63c --- /dev/null +++ b/templates/summary_stats.html @@ -0,0 +1,166 @@ +{% extends 'base.html' %} +{% import 'boxes.html' as boxes %} +{% block content %} + + + +
    +
    + (This table as CSV) +

    Summary Statistics

    +
    + + +
    +

    This table assesses all IATI publishers by scoring three dimensions – Timeliness, Forward-looking and Comprehensiveness. The methodology is explained below the table and in the related Publisher Statistics pages. In summary:

    + +

    {Score} = ( {Timeliness} + {Forward looking} + {Comprehensive} ) / 3  

    + + {% include 'tablesorter_instructions.html' %} +
    + + + + + + {% for column_slug, column_header in summary_stats.columns %} + + {% endfor %} + + + + {% for row in summary_stats.table() %} + + + {% for column_slug, column_header in summary_stats.columns %} + + + {% endfor %} + +
    Publisher Name{{column_header}}
    {{row.publisher_title}}{{row[column_slug]}} + {% endfor %} +
    +
    + + +
    +
    +

    Narrative

    +
    +
    +

    Timeliness

    +

    This is calculated by scoring the assessments made on the + frequency and timelag pages on a scale of + 0 to 4 (as below), dividing the sum of the two scores by 8, and expressing the result as + a percentage. The methodology used in making the assesments is detailed on the frequency and timelag pages. +

    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Frequency assessmentScore
    Monthly4
    Quarterly3
    Six-Monthly2
    Annual1
    Less than Annual0
    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Time lag assessmentScore
    One month4
    A quarter3
    Six months2
    One year1
    More than one year0
    + + +

    Forward looking

    +

    The average percentage of current activities with budgets for each of the years {{current_year}} - {{current_year + 2}}. + The component values and a detailed methodology are displayed on the forward looking page. +

    + + +

    Comprehensive

    +

    The average of comprehensiveness averages for core, financials and value-added. The core average has a double-weighting.

    + + +

    Score

    +

    The mean average of the three values above.

    + +

    {Score} = ( {Timeliness} + {Forward looking} + {Comprehensive} ) / 3

    + +
    +
    + + +
    +
    +

    Exceptions

    +
    +
    +
    Secondary reporters
    +

    Publishers who publish all of their activities as a secondary reporter do not appear in this table. + {% if summary_stats.secondary_publishers|length > 0 %} + Therefore, a total of {{ summary_stats.secondary_publishers|length }} publishers have been excluded in this regard: +

      + {% for publisher in summary_stats.secondary_publishers %} +
    • {{ publisher_name[publisher] }}
    • + {% endfor %} +
    + {% endif %} +

    +
    +
    + + +{% endblock %} + +{% block tablesorteroptions %} +{ + widgets: ['stickyHeaders'] +} +{% endblock %} diff --git a/templates/timeliness.html b/templates/timeliness.html new file mode 100644 index 0000000000..657ea59561 --- /dev/null +++ b/templates/timeliness.html @@ -0,0 +1,272 @@ +{% extends 'timeliness_base.html' %} +{% import 'boxes.html' as boxes %} + +{% block frequency_li %} class="active"{% endblock %} + +{% block content %} +
    + +
    + (This table as CSV) +

    Table of Frequency assessments

    +
    + + + +
    +

    This table seeks to measure how often a publisher updates their data. There is no simple answer as to what constitutes an update. Can any trivial edit be regarded as an update? As transactions are the most numerous element reported in IATI the adopted methodology assumes that a publisher has updated their data if a transaction with a more recent transaction date than previously published is detected across the publisher's entire portfolio.

    + +

    The table records the number of days in each of the last twelve months on which the most recently recorded transaction date was observed by the Dashboard to have changed. (The current month is also displayed for informational purposes, but is not used in the assessment.)

    + +

    Key:
    + Red flag: Publisher currently publishing future transaction dates.
    + Yellow flag: Publisher not currently publishing future transaction dates, but did report future transactions at some point in the last twelve calendar months (See exceptions).

    + +

    You are invited to participate in the ongoing consultation on publisher statistics and the summary statistics which is taking place on the IATI Discussion Forum

    + {% include 'tablesorter_instructions.html' %} +
    + + + + + + + {% for month in timeliness.previous_months_reversed %} + + {% endfor %} + + + + + {% for publisher, publisher_title, per_month, assessment in timeliness.publisher_frequency_sorted() %} + + + {% for month in timeliness.previous_months_reversed %} + + {% endfor %} + + {% set hft=publisher|has_future_transactions %} + + + + {% endfor %} + +
    Publisher Name + {{timeliness.this_year-1}} + {{timeliness.this_year}} + + Frequency +
    {{timeliness.short_month(month)}}{{timeliness.short_month(timeliness.this_month)}}
    {{publisher_title}}{{per_month[month] or 0}}{{per_month[timeliness.this_month] or 0}}{% if hft %}*{% endif %}{{assessment}}
    +
    + + +
    +
    +

    Summary of Publisher Performance

    +
    + + + + + + + + + {% set summary = timeliness.publisher_frequency_summary() %} + {% for assessment, count in timeliness.sort_first(summary.items(), timeliness.frequency_index) %} + + + + + {% endfor %} + + + + + + +
    FrequencyCount
    {{assessment}}{{count}}
    Total{{summary.values()|sum}}
    +
    + + + +
    +
    +

    Narrative

    +
    +
    +

    The frequency statistics attempt to assess how often any part of a publisher's data is substantively updated.

    + +

    For the purposes of these statistics an update is assumed to have taken place on any given day when the most recently recorded transaction date across a publisher's entire portfolio is observed to have changed to a more recent date. This approach has been adopted as transactions are the most numerous and most frequently updated elements in the reporting of activities.

    + +

    The table of statistics records the number of days in each of the last twelve calendar months (the current month is also displayed for informational purposes, but is not used in the assessment) on which the most recently recorded transaction date was observed by the Dashboard to have changed. The Dashboard maintains a statistical snapshot of each day, which allows for this data to be recalculated using historical recordings.

    +
    +
    + + + +
    +
    +

    Assessment

    +
    +
    +

    To assess these statistics one also has to take into account how long a publisher has been publishing to IATI. This is calculated based on when a publisher first appeared in the statistical snapshot.

    +

    These statistics are then assessed as follows:

    + +

    For publishers of 1 year or more

    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Updates reported in ... + Assessment +
    7 or more of the past 12 full months AND data updated at least once in last 2 full monthsMonthly
    3 or more of the past 12 full months AND data updated at least once in last 4 full monthsQuarterly
    2 of the last 6 month periodsSix-monthly
    1 of the last 12 monthsAnnual
    None of the last 12 monthsLess than annual
    + +

    For publishers of six months or more

    + + + + + + + + + + + + + + + + + + + +
    Updates reported in ... + Assessment +
    4 of the last 6 monthsMonthly
    2 of the last 2 quartersQuarterly
    1 of the last 12 monthsAnnual
    + +

    For publishers of three months or more

    + + + + + + + + + + + + + + + +
    Updates reported in ... + Assessment +
    3 of the last 3 monthsMonthly
    1 of the last 6 monthsAnnual
    + +

    For publishers of less than 3 months

    + + + + + + + + + + + +
    Updates reported in ... + Assessment +
    1 of the last 3 monthsAnnual
    +
    +
    + + + +
    +
    +

    Exceptions

    +
    +
    +

    Future transaction dates disrupt these statistics. For example a publisher might today report a transaction date for each month for the next year and never refresh their data. Over the next year, as each of these future dates move into the past, the statistics would incorrectly give the publisher a frequency assessment of monthly, even though they did not refresh their data.

    + +

    Future transaction dates may affect the assessments on this page. Publishers who currently have future transaction dates have a red flag next to their assessment. A yellow flag indicates that although a publisher does not currently have future transactions, they did report future transactions at some point over the last twelve calendar months.

    + +

    In our further work on this dashboard page, we plan to exclude activities in which future transactions dates are reported from these assessments until such time that a publisher's entire portfolio no longer contains any future dates.

    +
    +
    + + + +
    +
    +

    Comparison with original Global Partnership Indicator methodology

    +
    +
    +

    This methodology is substantially different. + +

    In the original Indicator methodology the IATI Registry log dates were analysed to assess when updates had been made. This approach was flawed as the Registry logs record any change, no matter how trivial. A spelling correction, for example, would count as an update. Similarly if a publisher's file was inaccessible, its reappearance would count as an update.

    +
    +
    + + + +
    +
    +

    Pseudocode

    +
    +
    +

    To get a count of updates by calendar month (for a given publisher):

    +
    +For data captured each day over the past year
    +    For each transaction (of any type) in the publisher's data
    +        Get the transaction date as follows:
    +            If transaction-date exists
    +                If transaction-date/@iso-date exists
    +                    Use transaction-date/@iso-date
    +                Else
    +                    Use transaction-date/text()
    +            Else if value/@value-date exists
    +                Use value/@value-date
    +            Else the transaction is ignore
    +        Parse the start of the transaction date as an iso date (yyyy-mm-dd...).
    +            If it does not match, the transaction is ignored.
    +        Store a record of this transaction date.
    +    Of the recorded dates, find the latest date that is on or before the date the data was captured.
    +    Record this date against the date of data capture
    +previous transaction date = 0001-01-01
    +Loop over the list of dates
    +    If transaction date > previous transaction date
    +        previous transaction date = transaction date
    +        Record an update as having happened on this day
    +Count the updates by calendar month
    +
    +
    +
    +{% endblock %} diff --git a/templates/timeliness_base.html b/templates/timeliness_base.html new file mode 100644 index 0000000000..d0618b9a2e --- /dev/null +++ b/templates/timeliness_base.html @@ -0,0 +1,47 @@ +{% extends 'base.html' %} +{% import 'boxes.html' as boxes %} + +{% block container %} + + {% block page_header_div %} + {{ super() }} + {% endblock %} + + + + + + {% block content %} + {% endblock %} +{% endblock %} + + + +{% block tablesorteroptions %} +{ + widgets: ['stickyHeaders'], + textExtraction: { 14: function(node,table,cellIndex) { return $(node).attr('data-severity'); }, + 15: function(node,table,cellIndex) { return $(node).attr('data-index'); } } +} +{% endblock %} +{% block tablesortertarget %}table#main_table{% endblock %} + diff --git a/templates/timeliness_timelag.html b/templates/timeliness_timelag.html new file mode 100644 index 0000000000..191b8401bc --- /dev/null +++ b/templates/timeliness_timelag.html @@ -0,0 +1,205 @@ +{% extends 'timeliness_base.html' %} +{% import 'boxes.html' as boxes %} + +{% block timelag_li %} class="active"{% endblock %} + +{% block content %} +
    + +
    + (This table as CSV) +

    Table of Time lag assessments

    +
    + + +
    +

    The time-lag statistics attempt to assess how up to date the data is at the point that it is refreshed. For instance a publisher may refresh their data monthly, but the refreshed data is in fact three months old. Alternatively a publisher may only refresh their data once a year, but when they do it contains current data that is less than one month out of date. Transactions are the most numerous and most regularly refreshed elements in reported IATI activities and they are therefore used to make this assessment. The table of statistics shows the number of transaction dates reported in each of the last twelve calendar months. The current month is shown for informational purposes, but excluded from the assessment.

    + +

    Key:
    + Red flag: Publisher currently publishing future transaction dates.
    + Yellow flag: Publisher not currently publishing future transaction dates, but did report future transactions at some point in the last twelve calendar months (See exceptions).

    + +

    You are invited to participate in the ongoing consultation on publisher statistics and the summary statistics which is taking place on the IATI Discussion Forum.

    + + {% include 'tablesorter_instructions.html' %} +
    + + + + + + + + {% for month in timeliness.previous_months_reversed %} + + {% endfor %} + + + + + {% for publisher, publisher_title, per_month, assessment in timeliness.publisher_timelag_sorted() %} + + + {% for month in timeliness.previous_months_reversed %} + + {% endfor %} + + {% set hft=publisher|has_future_transactions %} + + + + {% endfor %} + +
    Publisher Name + {{timeliness.this_year-1}} + {{timeliness.this_year}} + + Time lag +
    {{timeliness.short_month(month)}}{{timeliness.short_month(timeliness.this_month)}}
    {{publisher_title}}{{per_month[month] or 0}}{{per_month[timeliness.this_month] or 0}}{% if hft %}*{% endif %}{{assessment}}
    +
    + + + + +
    +
    +

    Summary of Publisher Performance

    +
    + + + + + + + + + {% set summary = timeliness.publisher_timelag_summary() %} + {% for assessment, count in timeliness.sort_first(summary.items(), timeliness.timelag_index) %} + + + + + {% endfor %} + + + + + + +
    FrequencyCount
    {{assessment}}{{count}}
    Total{{summary.values()|sum}}
    +
    + + + + +
    +
    +

    Narrative

    +
    +
    +

    The time-lag statistics attempt to assess how up to date the data is at the point that it is refreshed. For instance a publisher may refresh their data monthly, but the refreshed data is in fact three months old. Alternatively a publisher may only refresh their data once a year, but when they do it contains current data that is less than one month out of date.

    + +

    Transactions are the most numerous and most regularly refreshed elements in reported IATI activities and they are therefore used to make this assessment.

    + +

    The table of statistics shows the number of transaction dates reported in each of the last twelve calendar months. The current month is shown for informational purposes, but excluded from the assessment.

    +
    +
    + + + + + +
    +
    +

    Assessment

    +
    +
    +

    These statistics are assessed as follows:

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Transactions reported for ...Assessment
    2 of the last 3 monthsOne month in arrears
    1 of the last 3 monthsA quarter in arrears
    Any of the last 6 monthsSix months in arrears
    1 of the last 12 monthsOne year in arrears
    None of the last 12 monthsMore than one year in arrears
    +
    +
    + + + + +
    +
    +

    Exceptions

    +
    +
    +

    Future transaction dates disrupt these statistics. For example a publisher might today report a transaction date for each month for the next year and never refresh their data. Over the next year, as each of these future dates move into the past, the statistics would incorrectly give the publisher a time-lag assessment of one month in arrears, even though they did not refresh their data.

    + +

    Future transaction dates may affect the assessments on this page. Publishers who currently have future transaction dates have a red flag next to their assessment. A yellow flag indicates that although a publisher does not currently have future transactions, they did report future transactions at some point over the last twelve calendar months.

    + +

    In our further work on this dashboard page, we plan to exclude activities in which future transactions dates are reported from these assessments until such time that a publisher's entire portfolio no longer contains any future dates.

    +
    +
    + + + + +
    +
    +

    Comparison with orginal Global Partnership Indicator methodology

    +
    +
    +

    No change.

    +
    +
    + + + +
    +
    +

    Pseudocode

    +
    +
    +

    To get a count of transactions by calendar month (for a given publisher):

    +
    +Using most recently captured data.
    +For each transaction (of any type) in the publisher's data:
    +    Get the transaction date as follows:
    +        If transaction-date exists
    +            If transaction-date/@iso-date exists
    +                Use transaction-date/@iso-date
    +            Else
    +                Use transaction-date/text()
    +        Else if value/@value-date exists
    +            Use value/@value-date
    +        Else the transaction is ignored
    +    Parse the start of the transaction date as an iso date (yyyy-mm-dd...).
    +        If it does not match, the transaction is ignored.
    +    Record a count of the transaction date against the calendar year and month
    +
    +
    +
    +{% endblock %} diff --git a/text.py b/text.py index 1263decad5..c75c1abc92 100644 --- a/text.py +++ b/text.py @@ -94,7 +94,7 @@ top_navigation = ['index', 'headlines', 'data_quality', 'exploring_data', 'faq'] navigation = { - 'headlines': ['publishers', 'files', 'activities'], + 'headlines': [ 'publishers', 'files', 'activities'], 'data_quality': ['download', 'xml', 'validation', 'versions', 'licenses', 'organisation', 'identifiers', 'reporting_orgs'], 'exploring_data': ['elements', 'codelists', 'booleans', 'dates'], 'publishing_stats': ['timeliness', 'forwardlooking', 'comprehensiveness', 'coverage', 'summary_stats', 'humanitarian'] From 8778f3b866b9fdbee7f4001cb4f547fbbba88f22 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 5 Apr 2021 22:10:21 +0100 Subject: [PATCH 118/375] Revert "Remove timeliness.py" This reverts commit 8999ac2a303d2200d4af2857b4802c255d4bb7cb. --- timeliness.py | 198 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 198 insertions(+) create mode 100644 timeliness.py diff --git a/timeliness.py b/timeliness.py new file mode 100644 index 0000000000..4775bfeb3e --- /dev/null +++ b/timeliness.py @@ -0,0 +1,198 @@ +# This file converts raw timeliness data into the associated Dashboard assessments + +from __future__ import print_function +from data import JSONDir, publisher_name, get_publisher_stats, get_registry_id_matches +import datetime +from dateutil.relativedelta import relativedelta +from collections import defaultdict, Counter + + +def short_month(month_str): + """Return the 'short month' represeentation of a date which is inputted as a string, seperated with dashes + For example '01-03-2012' returns 'Mar' + """ + short_months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + return short_months[int(month_str.split('-')[1]) - 1] + + +def parse_iso_date(d): + """Parse a string representation of a date into a datetime object + """ + try: + return datetime.date(int(d[:4]), int(d[5:7]), int(d[8:10])) + except (ValueError, TypeError): + return None + + +def previous_months_generator(d): + """Returns a generator object with the previous month for a given datetime object + """ + year = d.year + month = d.month + for i in range(0,12): + month -= 1 + if month <= 0: + year -= 1 + month = 12 + yield year,month + +# Store lists of previous months +previous_months = ['{}-{}'.format(year,str(month).zfill(2)) for year,month in previous_months_generator(datetime.date.today())] +previous_months_reversed=list(reversed(previous_months)) + +# Store the current month as a string +today = datetime.date.today() +this_month = '{}-{}'.format(today.year, str(today.month).zfill(2)) + +# Store a list of the past 12 months from today +previous_month_days = [today - relativedelta(months=x) for x in range(1, 13)] + +# Store the current month and year numbers +this_month_number = datetime.datetime.today().month +this_year = datetime.datetime.today().year + + +def publisher_frequency(): + """Generate the publisher frequency data + """ + + # Load all the data from 'gitaggregate-publisher-dated' into memory + gitaggregate_publisher = JSONDir('./stats-calculated/gitaggregate-publisher-dated') + + # Loop over each publisher - i.e. a publisher folder within 'gitaggregate-publisher-dated' + for publisher, agg in gitaggregate_publisher.items(): + + # Skip to the next publisher if there is no data for 'most_recent_transaction_date' for this publisher + if not 'most_recent_transaction_date' in agg: + continue + + # Skip if this publisher appears in the list of publishers who have since changed their Registry ID + if publisher in get_registry_id_matches().keys(): + continue + + updates_per_month = defaultdict(int) + previous_transaction_date = datetime.date(1,1,1) + + # Find the most recent transaction date and parse into a datetime object + for gitdate, transaction_date_str in sorted(agg['most_recent_transaction_date'].items()): + transaction_date = parse_iso_date(transaction_date_str) + + # If transaction date has increased + if transaction_date is not None and transaction_date > previous_transaction_date: + previous_transaction_date = transaction_date + updates_per_month[gitdate[:7]] += 1 + + # Find the first date that this publisher made data available, and parse into a datetime object + first_published_string = sorted(agg['most_recent_transaction_date'])[0] + first_published = parse_iso_date(first_published_string) + + # Implement the assessment logic on http://dashboard.iatistandard.org/timeliness.html#h_assesment + + if first_published >= previous_month_days[2]: + # This is a publisher of less than 3 months + #if True in [ x in updates_per_month for x in previous_months[:3] ]: + frequency = 'Annual' + elif first_published >= previous_month_days[5]: + # This is a publisher of less than 6 months + if all([ x in updates_per_month for x in previous_months[:3] ]): + frequency = 'Monthly' + else: + frequency = 'Annual' + elif first_published >= previous_month_days[11]: + # This is a publisher of less than 12 months + if [ x in updates_per_month for x in previous_months[:6] ].count(True) >= 4: + frequency = 'Monthly' + elif any([ x in updates_per_month for x in previous_months[:3] ]) and any([ x in updates_per_month for x in previous_months[3:6] ]): + frequency = 'Quarterly' + else: + frequency = 'Annual' + else: + # This is a publisher of 1 year or more + if ([ x in updates_per_month for x in previous_months[:12] ].count(True) >= 7) and ([ x in updates_per_month for x in previous_months[:2] ].count(True) >= 1): + # Data updated in 7 or more of past 12 full months AND data updated at least once in last 2 full months. + frequency = 'Monthly' + elif ([ x in updates_per_month for x in previous_months[:12] ].count(True) >= 3) and ([ x in updates_per_month for x in previous_months[:4] ].count(True) >= 1): + # Data updated in 3 or more of past 12 full months AND data updated at least once in last 4 full months. + frequency = 'Quarterly' + elif any([ x in updates_per_month for x in previous_months[:6] ]) and any([ x in updates_per_month for x in previous_months[6:12] ]): + # There has been an update in 2 of the last 6 month periods + frequency = 'Six-Monthly' + elif any([ x in updates_per_month for x in previous_months[:12] ]): + # There has been an update in 1 of the last 12 months + frequency = 'Annual' + else: + # There has been an update in none of the last 12 months + frequency = 'Less than Annual' + + # If the publisher is in the list of current publishers, return a generator object + if publisher in publisher_name: + yield publisher, publisher_name.get(publisher), updates_per_month, frequency + + +def frequency_index(frequency): + return ['Monthly', 'Quarterly', 'Six-Monthly', 'Annual', 'Less than Annual'].index(frequency) + +def publisher_frequency_sorted(): + return sorted(publisher_frequency(), key=lambda (publisher, publisher_title , _, frequency): ( + frequency_index(frequency), + publisher_title + )) + +def publisher_frequency_dict(): + publisher_data_list = sorted(publisher_frequency(), key=lambda publisher: publisher[0] ) + data = {} + for v in publisher_data_list: + data[v[0]] = v + return data + +def publisher_frequency_summary(): + return Counter(frequency for _,_,_,frequency in publisher_frequency()) + +def timelag_index(timelag): + return ['One month', 'A quarter', 'Six months', 'One year', 'More than one year'].index(timelag) + +def publisher_timelag_sorted(): + publisher_timelags = [ (publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag']) for publisher,agg in JSONDir('./stats-calculated/current/aggregated-publisher').items() ] + return sorted(publisher_timelags, key=lambda (publisher, publisher_title, _, timelag): ( + timelag_index(timelag), + publisher_title + )) + +def publisher_timelag_dict(): + publisher_timelags = [ (publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag']) for publisher,agg in JSONDir('./stats-calculated/current/aggregated-publisher').items() ] + data = {} + for v in publisher_timelags: + data[v[0]] = v + return data + +def publisher_timelag_summary(): + return Counter(timelag for _,_,_,timelag in publisher_timelag_sorted()) + +blacklist_publisher = JSONDir('./stats-blacklist/gitaggregate-publisher-dated') + +def has_future_transactions(publisher): + """ + returns 0, 1 or 2 + Returns 2 if the most recent data for a publisher has future transactions. + Returns 1 if the publisher has ever had future transactions. + Returns -1 if the publisher has not been checked for some reason. + Returns 0 otherwise. + """ + publisher_stats = get_publisher_stats(publisher) + if 'transaction_dates' in publisher_stats: + for transaction_type, transaction_counts in publisher_stats['transaction_dates'].items(): + for transaction_date_string, count in transaction_counts.items(): + transaction_date = parse_iso_date(transaction_date_string) + if transaction_date and transaction_date > datetime.date.today(): + return 2 + if publisher not in blacklist_publisher: + return -1 + today = datetime.date.today() + mindate = datetime.date(today.year-1, today.month, 1) + for date, activity_blacklist in blacklist_publisher[publisher]['activities_with_future_transactions'].items(): + if parse_iso_date(date) >= mindate and activity_blacklist: + return 1 + return 0 + +def sort_first(list_, key): + return sorted(list_, key=lambda x: key(x[0])) From c49ea5d89ac4bb6fe6298c617844c5dbdabf62f1 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 5 Apr 2021 22:33:23 +0100 Subject: [PATCH 119/375] Bring pubstats code up to date --- comprehensiveness.py | 51 ++++++++------- forwardlooking.py | 7 ++- make_csv.py | 89 +++++++++++++-------------- make_html.py | 10 +-- summary_stats.py | 18 +++--- templates/comprehensiveness_base.html | 4 +- templates/coverage.html | 10 +-- templates/forwardlooking.html | 4 +- templates/summary_stats.html | 4 +- templates/timeliness.html | 4 +- templates/timeliness_base.html | 2 +- templates/timeliness_timelag.html | 4 +- text.py | 5 +- timeliness.py | 86 ++++++++++++++------------ 14 files changed, 151 insertions(+), 147 deletions(-) diff --git a/comprehensiveness.py b/comprehensiveness.py index 962e6d567e..8da2b7f59f 100644 --- a/comprehensiveness.py +++ b/comprehensiveness.py @@ -9,7 +9,7 @@ ('core_average', 'Core Average', 2), ('financials_average', 'Financials Average', 1), ('valueadded_average', 'Value Added Average', 1), - ('summary_average', 'Weighted Average', 0), # i.e. don't include the average within the calculation of the average + ('summary_average', 'Weighted Average', 0), # i.e. don't include the average within the calculation of the average ], 'core': [ ('version', 'Version', 1), @@ -22,16 +22,16 @@ ('activity-date', 'Activity Date', 1), ('sector', 'Sector', 1), ('country_or_region', 'Country or Region', 1), - ('core_average', 'Average', 0), # i.e. don't include the average within the calculation of the average + ('core_average', 'Average', 0), # i.e. don't include the average within the calculation of the average ], 'financials': [ ('transaction_commitment', 'Transaction - Commitment', 1, 'first_hierarchy_with_commitments'), ('transaction_spend', 'Transaction - Disbursement or Expenditure', 1, 'bottom_hierarchy'), ('transaction_traceability', 'Transaction - Traceability', 1, 'bottom_hierarchy'), ('budget', 'Budget', 1, 'hierarchy_with_most_budgets'), - ('financials_average', 'Average', 0), # i.e. don't include the average within the calculation of the average + ('financials_average', 'Average', 0), # i.e. don't include the average within the calculation of the average ], - 'valueadded':[ + 'valueadded': [ ('contact-info', 'Contacts', 1), ('location', 'Location Details', 1), ('location_point_pos', 'Geographic Coordinates', 1), @@ -41,12 +41,12 @@ ('aid_type', 'Aid Type', 1), ('recipient_language', 'Recipient Language', 1), ('result_indicator', 'Result/ Indicator', 1), - ('valueadded_average', 'Average', 0), # i.e. don't include the average within the calculation of the average + ('valueadded_average', 'Average', 0), # i.e. don't include the average within the calculation of the average ]} # Build dictionaries for all the column_headers and column_slugs defined above -column_headers = {tabname:[x[1] for x in values] for tabname, values in columns.items()} -column_slugs = {tabname:[x[0] for x in values] for tabname, values in columns.items()} +column_headers = {tabname: [x[1] for x in values] for tabname, values in columns.items()} +column_slugs = {tabname: [x[0] for x in values] for tabname, values in columns.items()} # Build directory to lookup the hierarchy which should be used in the numerator # e.g. {'activity-date': 'all', 'activity-status': 'all', [...] budget': 'hierarchy_with_most_budgets', [etc]} @@ -54,7 +54,7 @@ col[0]: col[3] if len(col) > 3 else 'all' for col_group, col_components in columns.items() for col in col_components - } +} def denominator(key, stats): @@ -86,10 +86,10 @@ def get_hierarchy_with_most_budgets(stats): try: # Get the key with the largest number of budgets - budgets = max(stats['by_hierarchy'], key=(lambda x: - stats['by_hierarchy'][x]['comprehensiveness'].get('budget', 0) + - stats['by_hierarchy'][x]['comprehensiveness'].get('budget_not_provided', 0) - if stats['by_hierarchy'][x]['comprehensiveness_denominator_default'] > 0 else None) + budgets = max(stats['by_hierarchy'], key=( + lambda x: + stats['by_hierarchy'][x]['comprehensiveness'].get('budget', 0) + stats['by_hierarchy'][x]['comprehensiveness'].get('budget_not_provided', 0) + if stats['by_hierarchy'][x]['comprehensiveness_denominator_default'] > 0 else -1) ) return budgets except KeyError: @@ -107,8 +107,8 @@ def get_first_hierarchy_with_commitments(stats): Number of first hierarchy with commitments or None if no commitments in any hierarchy """ hierarchies_with_commitments = {x: y['comprehensiveness']['transaction_commitment'] - for x,y in stats.get('by_hierarchy',{}).iteritems() - if y['comprehensiveness'].get('transaction_commitment', 0) > 0} + for x, y in stats.get('by_hierarchy', {}).items() + if y['comprehensiveness'].get('transaction_commitment', 0) > 0} return min(hierarchies_with_commitments) if len(hierarchies_with_commitments) else None @@ -123,7 +123,6 @@ def generate_row(publisher): row['publisher'] = publisher row['publisher_title'] = publisher_name[publisher] - # Calculate percentages for publisher data populated with any data for slug in column_slugs['core'] + column_slugs['financials'] + column_slugs['valueadded']: @@ -158,22 +157,22 @@ def generate_row(publisher): if denominator(slug, publisher_base) != 0: # Populate the row with the %age row[slug] = int(round( - float(numerator_all)/denominator(slug, publisher_base)*100 - )) - row[slug+'_valid'] = int(round( - float(numerator_valid)/denominator(slug, publisher_base)*100 - )) + float(numerator_all) / denominator(slug, publisher_base) * 100 + )) + row[slug + '_valid'] = int(round( + float(numerator_valid) / denominator(slug, publisher_base) * 100 + )) # Loop for averages # Calculate the average for each grouping, and the overall 'summary' average for page in ['core', 'financials', 'valueadded', 'summary']: # Note that the summary must be last, so that it can use the average calculations from the other groupings - row[page+'_average'] = int(round( - sum((row.get(x[0]) or 0)*x[2] for x in columns[page]) / float(sum(x[2] for x in columns[page])) - )) - row[page+'_average_valid'] = int(round( - sum((row.get(x[0]+'_valid') or 0)*x[2] for x in columns[page]) / float(sum(x[2] for x in columns[page])) - )) + row[page + '_average'] = int(round( + sum((row.get(x[0]) or 0) * x[2] for x in columns[page]) / float(sum(x[2] for x in columns[page])) + )) + row[page + '_average_valid'] = int(round( + sum((row.get(x[0] + '_valid') or 0) * x[2] for x in columns[page]) / float(sum(x[2] for x in columns[page])) + )) return row diff --git a/forwardlooking.py b/forwardlooking.py index e843ec0c2d..c444f576f9 100644 --- a/forwardlooking.py +++ b/forwardlooking.py @@ -7,7 +7,7 @@ this_year = datetime.date.today().year # Create a list containing three years: the current year and two following -years = map(str, range(this_year, this_year + 3)) +years = list(map(str, range(this_year, this_year + 3))) # Set column groupings, to be displayed in the user output column_headers = [ @@ -16,6 +16,7 @@ 'Percentage of current activities with budgets' ] + def generate_row(publisher): """Generate forward-looking table data for a given publisher """ @@ -27,7 +28,7 @@ def generate_row(publisher): row = {} row['publisher'] = publisher row['publisher_title'] = publisher_name[publisher] - row['year_columns'] = [{},{},{}] + row['year_columns'] = [{}, {}, {}] row['budget_not_provided'] = False # Work with hierarchies by_hierarchy = publisher_stats['by_hierarchy'] @@ -66,7 +67,7 @@ def generate_row(publisher): if not int(row['year_columns'][0][year]): row['year_columns'][2][year] = '-' else: - row['year_columns'][2][year] = int(round(float(row['year_columns'][1][year])/float(row['year_columns'][0][year])*100)) + row['year_columns'][2][year] = int(round(float(row['year_columns'][1][year]) / float(row['year_columns'][0][year]) * 100)) else: # Else if either 'forwardlooking_activities_current' or 'forwardlooking_activities_with_budgets' are not in the bottom hierarchy, set data zero # This should only occur if a publisher has 0 activities diff --git a/make_csv.py b/make_csv.py index b3c9026fc0..8d72ef687b 100644 --- a/make_csv.py +++ b/make_csv.py @@ -75,16 +75,17 @@ def publisher_dicts(): # Timeliness CSV files (frequency and timelag) import timeliness + previous_months = timeliness.previous_months_reversed for fname, f, assessment_label in ( ('timeliness_frequency.csv', timeliness.publisher_frequency_sorted, 'Frequency'), ('timeliness_timelag.csv', timeliness.publisher_timelag_sorted, 'Time lag') - ): +): with open(os.path.join('out', fname), 'w') as fp: writer = csv.writer(fp) writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + [assessment_label]) - for publisher, publisher_title, per_month,assessment in f(): + for publisher, publisher_title, per_month, assessment in f(): writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment]) @@ -93,9 +94,9 @@ def publisher_dicts(): with open(os.path.join('out', 'forwardlooking.csv'), 'w') as fp: writer = csv.writer(fp) - writer.writerow(['Publisher Name', 'Publisher Registry Id'] + [ '{} ({})'.format(header, year) for header in forwardlooking.column_headers for year in forwardlooking.years]) + writer.writerow(['Publisher Name', 'Publisher Registry Id'] + ['{} ({})'.format(header, year) for header in forwardlooking.column_headers for year in forwardlooking.years]) for row in forwardlooking.table(): - writer.writerow([row['publisher_title'], row['publisher']] + [ year_column[year] for year_column in row['year_columns'] for year in forwardlooking.years]) + writer.writerow([row['publisher_title'], row['publisher']] + [year_column[year] for year_column in row['year_columns'] for year in forwardlooking.years]) # Comprehensiveness CSV files ('summary', 'core', 'financials' and 'valueadded') @@ -104,49 +105,45 @@ def publisher_dicts(): for tab in comprehensiveness.columns.keys(): with open(os.path.join('out', 'comprehensiveness_{}.csv'.format(tab)), 'w') as fp: writer = csv.writer(fp) - writer.writerow(['Publisher Name', 'Publisher Registry Id'] + - [ x+' (with valid data)' for x in comprehensiveness.column_headers[tab] ] + - [ x+' (with any data)' for x in comprehensiveness.column_headers[tab] ]) + writer.writerow(['Publisher Name', 'Publisher Registry Id'] + [x + ' (with valid data)' for x in comprehensiveness.column_headers[tab]] + [x + ' (with any data)' for x in comprehensiveness.column_headers[tab]]) for row in comprehensiveness.table(): - writer.writerow([row['publisher_title'], row['publisher']] - + [ row[slug+'_valid'] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab] ] - + [ row[slug] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab] ]) - - -# Coverage CSV file -import coverage - -with open(os.path.join('out', 'coverage.csv'), 'w') as fp: - writer = csv.writer(fp) - # Add column headers - writer.writerow([ - 'Publisher Name', - 'Publisher Registry Id', - '2014 IATI Spend (US $m)', - '2015 IATI Spend (US $m)', - '2014 Reference Spend (US $m)', - '2015 Reference Spend (US $m)', - '2015 Official Forecast (US $m)', - 'Spend Ratio (%)', - 'No reference data available (Historic publishers)', - 'No reference data available (New publishers)', - 'Data quality issue reported' - ]) - for row in coverage.table(): - # Write each row - writer.writerow([ - row['publisher_title'], - row['publisher'], - row['iati_spend_2014'], - row['iati_spend_2015'], - row['reference_spend_2014'], - row['reference_spend_2015'], - row['official_forecast_2015'], - row['spend_ratio'], - row['no_data_flag_red'], - row['no_data_flag_amber'], - row['spend_data_error_reported_flag'] - ]) + writer.writerow([row['publisher_title'], row['publisher']] + [row[slug + '_valid'] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab]] + [row[slug] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab]]) + + +# # Coverage CSV file +# import coverage + +# with open(os.path.join('out', 'coverage.csv'), 'w') as fp: +# writer = csv.writer(fp) +# # Add column headers +# writer.writerow([ +# 'Publisher Name', +# 'Publisher Registry Id', +# '2014 IATI Spend (US $m)', +# '2015 IATI Spend (US $m)', +# '2014 Reference Spend (US $m)', +# '2015 Reference Spend (US $m)', +# '2015 Official Forecast (US $m)', +# 'Spend Ratio (%)', +# 'No reference data available (Historic publishers)', +# 'No reference data available (New publishers)', +# 'Data quality issue reported' +# ]) +# for row in coverage.table(): +# # Write each row +# writer.writerow([ +# row['publisher_title'], +# row['publisher'], +# row['iati_spend_2014'], +# row['iati_spend_2015'], +# row['reference_spend_2014'], +# row['reference_spend_2015'], +# row['official_forecast_2015'], +# row['spend_ratio'], +# row['no_data_flag_red'], +# row['no_data_flag_amber'], +# row['spend_data_error_reported_flag'] +# ]) # Summary Stats CSV file diff --git a/make_html.py b/make_html.py index c5e7fff91c..81fa0772aa 100644 --- a/make_html.py +++ b/make_html.py @@ -15,7 +15,7 @@ import timeliness import forwardlooking import comprehensiveness -import coverage +# import coverage import summary_stats import humanitarian from vars import expected_versions @@ -195,7 +195,7 @@ def basic_page(page_name): kwargs['comprehensiveness'] = comprehensiveness parent_page_name = 'comprehensiveness' elif page_name.startswith('coverage'): - kwargs['coverage'] = coverage + # kwargs['coverage'] = coverage parent_page_name = 'coverage' elif page_name.startswith('summary_stats'): kwargs['summary_stats'] = summary_stats @@ -205,7 +205,7 @@ def basic_page(page_name): parent_page_name = 'humanitarian' else: parent_page_name = page_name - return render_template(page_name+'.html', page=parent_page_name, **kwargs) + return render_template(page_name + '.html', page=parent_page_name, **kwargs) else: abort(404) @@ -238,7 +238,7 @@ def publisher(publisher): 'sum_original': {k: v.get(year) for k, v in publisher_stats['sum_budgets_by_type_by_year']['1'].items()} if '1' in publisher_stats['sum_budgets_by_type_by_year'] else None, 'count_revised': publisher_stats['count_budgets_by_type_by_year']['2'].get(year) if '2' in publisher_stats['count_budgets_by_type_by_year'] else None, 'sum_revised': {k: v.get(year) for k, v in publisher_stats['sum_budgets_by_type_by_year']['2'].items()} if '2' in publisher_stats['sum_budgets_by_type_by_year'] else None - } for year in sorted(set(sum((x.keys() for x in publisher_stats['count_budgets_by_type_by_year'].values()), []))) + } for year in sorted(set(sum((list(x.keys()) for x in publisher_stats['count_budgets_by_type_by_year'].values()), []))) ] failure_count = len(current_stats['inverted_file_publisher'][publisher]['validation'].get('fail', {})) return render_template('publisher.html', @@ -310,7 +310,7 @@ def csv_development(name): @app.route('/publisher_imgs/.png') def image_development_publisher(image): print(image) - return Response(open(os.path.join('out', 'publisher_imgs', image+'.png')).read(), mimetype='image/png') + return Response(open(os.path.join('out', 'publisher_imgs', image + '.png')).read(), mimetype='image/png') if __name__ == '__main__': diff --git a/summary_stats.py b/summary_stats.py index 31301edc2a..bbcce98074 100644 --- a/summary_stats.py +++ b/summary_stats.py @@ -15,7 +15,7 @@ ('forwardlooking', 'Forward looking'), ('comprehensive', 'Comprehensive'), ('score', 'Score') - ] +] def is_number(s): @@ -27,6 +27,7 @@ def is_number(s): except ValueError: return False + def convert_to_int(x): """ @todo Document this function """ @@ -73,7 +74,7 @@ def table(): frequency_score = 2 elif frequency_assessment == 'Annual': frequency_score = 1 - else: # frequency_assessment == 'Less than Annual' or something else! + else: # frequency_assessment == 'Less than Annual' or something else! frequency_score = 0 # Assign timelag score @@ -88,24 +89,22 @@ def table(): timelag_score = 2 elif timelag_assessment == 'One year': timelag_score = 1 - else: # timelag_assessment == 'More than one year' or something else! + else: # timelag_assessment == 'More than one year' or something else! timelag_score = 0 # Compute the percentage - row['timeliness'] = int( round((float(frequency_score + timelag_score) / 8) * 100)) - + row['timeliness'] = int(round((float(frequency_score + timelag_score) / 8) * 100)) # Compute forward-looking statistic # Get the forward-looking data for this publisher publisher_forwardlooking_data = forwardlooking.generate_row(publisher) # Convert the data for this publishers 'Percentage of current activities with budgets' fields into integers - numbers = [ int(x) for x in publisher_forwardlooking_data['year_columns'][2].itervalues() if is_number(x) ] - + numbers = [int(x) for x in publisher_forwardlooking_data['year_columns'][2].values() if is_number(x)] + # Compute and store the mean average for these fields row['forwardlooking'] = sum(int(round(y)) for y in numbers) / len(publisher_forwardlooking_data['year_columns'][2]) - # Compute comprehensive statistic # Get the comprehensiveness data for this publisher publisher_comprehensiveness_data = comprehensiveness.generate_row(publisher) @@ -113,9 +112,8 @@ def table(): # Set the comprehensive value to be the summary average for valid data row['comprehensive'] = convert_to_int(publisher_comprehensiveness_data['summary_average_valid']) - # Compute score - row['score'] = int( round(float(row['timeliness'] + row['forwardlooking'] + row['comprehensive']) / 3 )) + row['score'] = int(round(float(row['timeliness'] + row['forwardlooking'] + row['comprehensive']) / 3)) # Return a generator object yield row diff --git a/templates/comprehensiveness_base.html b/templates/comprehensiveness_base.html index dcccc034cf..6da713704e 100644 --- a/templates/comprehensiveness_base.html +++ b/templates/comprehensiveness_base.html @@ -1,5 +1,5 @@ {% extends 'base.html' %} -{% import 'boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes %} {% block container %} @@ -40,7 +40,7 @@

    {% block table_title %}Table of Comprehensiveness values {% if self.heading_detail() %}
    {% block heading_detail %}{% endblock %} - {% include 'tablesorter_instructions.html' %} + {% include '_partials/tablesorter_instructions.html' %}
    {% endif %} diff --git a/templates/coverage.html b/templates/coverage.html index b357228ace..cc3e91ddfa 100644 --- a/templates/coverage.html +++ b/templates/coverage.html @@ -1,5 +1,5 @@ {% extends 'base.html' %} -{% import 'boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes %} {% block content %} {% endblock %} diff --git a/templates/forwardlooking.html b/templates/forwardlooking.html index f33fe85b57..eb24c6876a 100644 --- a/templates/forwardlooking.html +++ b/templates/forwardlooking.html @@ -1,5 +1,5 @@ {% extends 'base.html' %} -{% import 'boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes %} {% block content %}
    • Table
    • @@ -40,7 +40,7 @@

      Activities with Forward Looking Budget Allocations

      Red flag: Publisher currently publishing forward looking budgets at more than one hierarchical level.
      Yellow flag: Publisher currently publishing the 'budget not provided' attribute for some or all activities.

      - {% include 'tablesorter_instructions.html' %} + {% include '_partials/tablesorter_instructions.html' %} diff --git a/templates/summary_stats.html b/templates/summary_stats.html index 0f3615b63c..2e2f4e05e4 100644 --- a/templates/summary_stats.html +++ b/templates/summary_stats.html @@ -1,5 +1,5 @@ {% extends 'base.html' %} -{% import 'boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes %} {% block content %}
        @@ -24,7 +24,7 @@

        Summary Statistics

        {Score} = ( {Timeliness} + {Forward looking} + {Comprehensive} ) / 3  

        - {% include 'tablesorter_instructions.html' %} + {% include '_partials/tablesorter_instructions.html' %}
      diff --git a/templates/timeliness.html b/templates/timeliness.html index 657ea59561..c8c69c29fc 100644 --- a/templates/timeliness.html +++ b/templates/timeliness.html @@ -1,5 +1,5 @@ {% extends 'timeliness_base.html' %} -{% import 'boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes %} {% block frequency_li %} class="active"{% endblock %} @@ -23,7 +23,7 @@

      Table of Frequency assessments

      Yellow flag: Publisher not currently publishing future transaction dates, but did report future transactions at some point in the last twelve calendar months (See exceptions).

      You are invited to participate in the ongoing consultation on publisher statistics and the summary statistics which is taking place on the IATI Discussion Forum

      - {% include 'tablesorter_instructions.html' %} + {% include '_partials/tablesorter_instructions.html' %}
      diff --git a/templates/timeliness_base.html b/templates/timeliness_base.html index d0618b9a2e..9ce1f3471a 100644 --- a/templates/timeliness_base.html +++ b/templates/timeliness_base.html @@ -1,5 +1,5 @@ {% extends 'base.html' %} -{% import 'boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes %} {% block container %} diff --git a/templates/timeliness_timelag.html b/templates/timeliness_timelag.html index 191b8401bc..0cf0d0f490 100644 --- a/templates/timeliness_timelag.html +++ b/templates/timeliness_timelag.html @@ -1,5 +1,5 @@ {% extends 'timeliness_base.html' %} -{% import 'boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes %} {% block timelag_li %} class="active"{% endblock %} @@ -21,7 +21,7 @@

      Table of Time lag assessments

      You are invited to participate in the ongoing consultation on publisher statistics and the summary statistics which is taking place on the IATI Discussion Forum.

      - {% include 'tablesorter_instructions.html' %} + {% include '_partials/tablesorter_instructions.html' %} diff --git a/text.py b/text.py index c75c1abc92..e63d91ebc0 100644 --- a/text.py +++ b/text.py @@ -4,6 +4,7 @@ 'index': 'Home', 'headlines': 'Headlines', 'data_quality': 'Data Quality', + 'publishing_stats': 'Publishing Statistics', 'exploring_data': 'Exploring Data', 'faq': 'FAQ' } @@ -92,9 +93,9 @@ 'identifiers': 'Duplicate Identifiers', }) -top_navigation = ['index', 'headlines', 'data_quality', 'exploring_data', 'faq'] +top_navigation = ['index', 'headlines', 'data_quality', 'publishing_stats', 'exploring_data', 'faq'] navigation = { - 'headlines': [ 'publishers', 'files', 'activities'], + 'headlines': ['publishers', 'files', 'activities'], 'data_quality': ['download', 'xml', 'validation', 'versions', 'licenses', 'organisation', 'identifiers', 'reporting_orgs'], 'exploring_data': ['elements', 'codelists', 'booleans', 'dates'], 'publishing_stats': ['timeliness', 'forwardlooking', 'comprehensiveness', 'coverage', 'summary_stats', 'humanitarian'] diff --git a/timeliness.py b/timeliness.py index 4775bfeb3e..1d102b8274 100644 --- a/timeliness.py +++ b/timeliness.py @@ -1,6 +1,5 @@ # This file converts raw timeliness data into the associated Dashboard assessments -from __future__ import print_function from data import JSONDir, publisher_name, get_publisher_stats, get_registry_id_matches import datetime from dateutil.relativedelta import relativedelta @@ -29,16 +28,17 @@ def previous_months_generator(d): """ year = d.year month = d.month - for i in range(0,12): + for i in range(0, 12): month -= 1 if month <= 0: year -= 1 month = 12 - yield year,month + yield year, month + # Store lists of previous months -previous_months = ['{}-{}'.format(year,str(month).zfill(2)) for year,month in previous_months_generator(datetime.date.today())] -previous_months_reversed=list(reversed(previous_months)) +previous_months = ['{}-{}'.format(year, str(month).zfill(2)) for year, month in previous_months_generator(datetime.date.today())] +previous_months_reversed = list(reversed(previous_months)) # Store the current month as a string today = datetime.date.today() @@ -58,12 +58,12 @@ def publisher_frequency(): # Load all the data from 'gitaggregate-publisher-dated' into memory gitaggregate_publisher = JSONDir('./stats-calculated/gitaggregate-publisher-dated') - + # Loop over each publisher - i.e. a publisher folder within 'gitaggregate-publisher-dated' for publisher, agg in gitaggregate_publisher.items(): - + # Skip to the next publisher if there is no data for 'most_recent_transaction_date' for this publisher - if not 'most_recent_transaction_date' in agg: + if 'most_recent_transaction_date' not in agg: continue # Skip if this publisher appears in the list of publishers who have since changed their Registry ID @@ -71,53 +71,53 @@ def publisher_frequency(): continue updates_per_month = defaultdict(int) - previous_transaction_date = datetime.date(1,1,1) - + previous_transaction_date = datetime.date(1, 1, 1) + # Find the most recent transaction date and parse into a datetime object for gitdate, transaction_date_str in sorted(agg['most_recent_transaction_date'].items()): transaction_date = parse_iso_date(transaction_date_str) - # If transaction date has increased + # If transaction date has increased if transaction_date is not None and transaction_date > previous_transaction_date: previous_transaction_date = transaction_date updates_per_month[gitdate[:7]] += 1 - + # Find the first date that this publisher made data available, and parse into a datetime object first_published_string = sorted(agg['most_recent_transaction_date'])[0] first_published = parse_iso_date(first_published_string) - + # Implement the assessment logic on http://dashboard.iatistandard.org/timeliness.html#h_assesment if first_published >= previous_month_days[2]: # This is a publisher of less than 3 months - #if True in [ x in updates_per_month for x in previous_months[:3] ]: + # if True in [ x in updates_per_month for x in previous_months[:3] ]: frequency = 'Annual' elif first_published >= previous_month_days[5]: # This is a publisher of less than 6 months - if all([ x in updates_per_month for x in previous_months[:3] ]): + if all([x in updates_per_month for x in previous_months[:3]]): frequency = 'Monthly' else: frequency = 'Annual' elif first_published >= previous_month_days[11]: # This is a publisher of less than 12 months - if [ x in updates_per_month for x in previous_months[:6] ].count(True) >= 4: + if [x in updates_per_month for x in previous_months[:6]].count(True) >= 4: frequency = 'Monthly' - elif any([ x in updates_per_month for x in previous_months[:3] ]) and any([ x in updates_per_month for x in previous_months[3:6] ]): + elif any([x in updates_per_month for x in previous_months[:3]]) and any([x in updates_per_month for x in previous_months[3:6]]): frequency = 'Quarterly' else: frequency = 'Annual' else: # This is a publisher of 1 year or more - if ([ x in updates_per_month for x in previous_months[:12] ].count(True) >= 7) and ([ x in updates_per_month for x in previous_months[:2] ].count(True) >= 1): - # Data updated in 7 or more of past 12 full months AND data updated at least once in last 2 full months. + if ([x in updates_per_month for x in previous_months[:12]].count(True) >= 7) and ([x in updates_per_month for x in previous_months[:2]].count(True) >= 1): + # Data updated in 7 or more of past 12 full months AND data updated at least once in last 2 full months. frequency = 'Monthly' - elif ([ x in updates_per_month for x in previous_months[:12] ].count(True) >= 3) and ([ x in updates_per_month for x in previous_months[:4] ].count(True) >= 1): + elif ([x in updates_per_month for x in previous_months[:12]].count(True) >= 3) and ([x in updates_per_month for x in previous_months[:4]].count(True) >= 1): # Data updated in 3 or more of past 12 full months AND data updated at least once in last 4 full months. frequency = 'Quarterly' - elif any([ x in updates_per_month for x in previous_months[:6] ]) and any([ x in updates_per_month for x in previous_months[6:12] ]): + elif any([x in updates_per_month for x in previous_months[:6]]) and any([x in updates_per_month for x in previous_months[6:12]]): # There has been an update in 2 of the last 6 month periods frequency = 'Six-Monthly' - elif any([ x in updates_per_month for x in previous_months[:12] ]): + elif any([x in updates_per_month for x in previous_months[:12]]): # There has been an update in 1 of the last 12 months frequency = 'Annual' else: @@ -125,51 +125,58 @@ def publisher_frequency(): frequency = 'Less than Annual' # If the publisher is in the list of current publishers, return a generator object - if publisher in publisher_name: + if publisher in publisher_name: yield publisher, publisher_name.get(publisher), updates_per_month, frequency def frequency_index(frequency): return ['Monthly', 'Quarterly', 'Six-Monthly', 'Annual', 'Less than Annual'].index(frequency) + def publisher_frequency_sorted(): - return sorted(publisher_frequency(), key=lambda (publisher, publisher_title , _, frequency): ( - frequency_index(frequency), - publisher_title - )) + return sorted( + publisher_frequency(), + key=lambda tup: (frequency_index(tup[3]), tup[1])) + def publisher_frequency_dict(): - publisher_data_list = sorted(publisher_frequency(), key=lambda publisher: publisher[0] ) + publisher_data_list = sorted(publisher_frequency(), key=lambda publisher: publisher[0]) data = {} - for v in publisher_data_list: + for v in publisher_data_list: data[v[0]] = v return data + def publisher_frequency_summary(): - return Counter(frequency for _,_,_,frequency in publisher_frequency()) + return Counter(frequency for _, _, _, frequency in publisher_frequency()) + def timelag_index(timelag): return ['One month', 'A quarter', 'Six months', 'One year', 'More than one year'].index(timelag) + def publisher_timelag_sorted(): - publisher_timelags = [ (publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag']) for publisher,agg in JSONDir('./stats-calculated/current/aggregated-publisher').items() ] - return sorted(publisher_timelags, key=lambda (publisher, publisher_title, _, timelag): ( - timelag_index(timelag), - publisher_title - )) + publisher_timelags = [(publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag']) for publisher, agg in JSONDir('./stats-calculated/current/aggregated-publisher').items()] + return sorted( + publisher_timelags, + key=lambda tup: (timelag_index(tup[3]), tup[1])) + def publisher_timelag_dict(): - publisher_timelags = [ (publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag']) for publisher,agg in JSONDir('./stats-calculated/current/aggregated-publisher').items() ] + publisher_timelags = [(publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag']) for publisher, agg in JSONDir('./stats-calculated/current/aggregated-publisher').items()] data = {} - for v in publisher_timelags: + for v in publisher_timelags: data[v[0]] = v return data + def publisher_timelag_summary(): - return Counter(timelag for _,_,_,timelag in publisher_timelag_sorted()) + return Counter(timelag for _, _, _, timelag in publisher_timelag_sorted()) + blacklist_publisher = JSONDir('./stats-blacklist/gitaggregate-publisher-dated') + def has_future_transactions(publisher): """ returns 0, 1 or 2 @@ -188,11 +195,12 @@ def has_future_transactions(publisher): if publisher not in blacklist_publisher: return -1 today = datetime.date.today() - mindate = datetime.date(today.year-1, today.month, 1) + mindate = datetime.date(today.year - 1, today.month, 1) for date, activity_blacklist in blacklist_publisher[publisher]['activities_with_future_transactions'].items(): if parse_iso_date(date) >= mindate and activity_blacklist: return 1 return 0 + def sort_first(list_, key): return sorted(list_, key=lambda x: key(x[0])) From 1442ac4ae7c1d8ce2dcad630cbc004d73c3c1006 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 6 Apr 2021 12:51:50 +0100 Subject: [PATCH 120/375] Run 3 hours later --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 21896e13d0..15ec97dc0b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,7 +1,7 @@ name: Build on: schedule: - - cron: '0 5 * * *' + - cron: '0 8 * * *' jobs: build: runs-on: ubuntu-latest From de7ed60c8fb78eb82a282506557a19ed3d4bc8b5 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sun, 11 Apr 2021 23:34:17 +0100 Subject: [PATCH 121/375] Copy --- text.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text.py b/text.py index 1263decad5..86b2894d36 100644 --- a/text.py +++ b/text.py @@ -13,7 +13,7 @@ 'headlines': 'Headlines', 'data_quality': 'Data Quality', 'exploring_data': 'Exploring Data', - 'faq': 'Code for IATI Dashboard Frequently Asked Questions', + 'faq': 'Frequently Asked Questions', 'publishers': 'IATI Publishers', 'files': 'IATI Files', 'activities': 'IATI Activities', From 5b142c8bf1a5d0b6f50b94e16a4d323f77e6c655 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Fri, 16 Apr 2021 14:32:58 +0100 Subject: [PATCH 122/375] Switch back to source for codelist mappings --- fetch_v2_codelists.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fetch_v2_codelists.py b/fetch_v2_codelists.py index 95a325f437..3fed82fcec 100644 --- a/fetch_v2_codelists.py +++ b/fetch_v2_codelists.py @@ -29,7 +29,7 @@ def mapping_to_json(mappings): mapping_urls = [ - 'https://raw.githubusercontent.com/andylolz/IATI-Codelists/version-2.03/mapping.xml', + 'https://raw.githubusercontent.com/IATI/IATI-Codelists/version-2.03/mapping.xml', 'https://raw.githubusercontent.com/codeforIATI/Unofficial-Codelists/master/mapping.xml'] mappings = [] for mapping_url in mapping_urls: From 240272d34bf8e2333389b8e7cf6dd8fecdea1696 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 28 Apr 2021 22:36:54 +0100 Subject: [PATCH 123/375] Push gh-pages to dashboard-public --- .github/workflows/build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 15ec97dc0b..8ca22f9d1f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -44,6 +44,7 @@ jobs: GIT_CONFIG_NAME: Code for IATI bot GIT_CONFIG_EMAIL: 57559326+codeforIATIbot@users.noreply.github.com GITHUB_TOKEN: ${{ secrets.TOKEN }} + REPOSITORY_NAME: codeforIATI/dashboard-public BRANCH: gh-pages FOLDER: out CLEAN: true From 3864ee69be8cd7b477b05b3eab5f6568956584ec Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 29 Apr 2021 09:42:40 +0100 Subject: [PATCH 124/375] Update some URLs --- .github/workflows/build.yml | 6 +++--- make_html.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8ca22f9d1f..356e13701a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -27,9 +27,9 @@ jobs: - name: Fetch stats run: | mkdir out - git clone --quiet --branch gh-pages https://github.com/codeforIATI/IATI-Stats stats-calculated - curl --compressed "https://raw.githubusercontent.com/codeforIATI/IATI-Stats/gh-pages/ckan.json" > stats-calculated/ckan.json - curl --compressed "https://raw.githubusercontent.com/codeforIATI/IATI-Stats/gh-pages/metadata.json" > stats-calculated/metadata.json + git clone --quiet --branch gh-pages https://github.com/codeforIATI/IATI-Stats-public stats-calculated + curl --compressed "https://raw.githubusercontent.com/codeforIATI/IATI-Stats-public/gh-pages/ckan.json" > stats-calculated/ckan.json + curl --compressed "https://raw.githubusercontent.com/codeforIATI/IATI-Stats-public/gh-pages/metadata.json" > stats-calculated/metadata.json - name: Make plots run: python plots.py - name: Build CSV output diff --git a/make_html.py b/make_html.py index c42f6c2250..74cc7e64b5 100644 --- a/make_html.py +++ b/make_html.py @@ -107,7 +107,7 @@ def get_codelist_values(codelist_values_for_element): app.jinja_env.globals['datetime_data'] = date_time_data_obj.strftime('%Y-%m-%d %H:%M:%S %Z') app.jinja_env.globals['datetime_data_homepage'] = date_time_data_obj.strftime('%d %B %Y (at %H:%M)') app.jinja_env.globals['stats_url'] = 'https://dashboard-stats.codeforiati.org' -app.jinja_env.globals['stats_gh_url'] = 'https://github.com/codeforIATI/IATI-Stats/tree/gh-pages' +app.jinja_env.globals['stats_gh_url'] = 'https://github.com/codeforIATI/IATI-Stats-public/tree/gh-pages' app.jinja_env.globals['path_exists'] = path_exists app.jinja_env.globals['sorted'] = sorted app.jinja_env.globals['enumerate'] = enumerate From 72e10c51b4482fdde2189f0e705eadc47610b391 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Fri, 30 Apr 2021 14:51:39 +0100 Subject: [PATCH 125/375] Fix dataset_to_publisher lookup --- data.py | 5 +++++ make_html.py | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/data.py b/data.py index 4ef31664af..e87061c053 100644 --- a/data.py +++ b/data.py @@ -182,6 +182,11 @@ def deep_merge(obj1, obj2): } ckan_publishers = JSONDir('./data/ckan_publishers') ckan = json.load(open('./stats-calculated/ckan.json'), object_pairs_hook=OrderedDict) +dataset_to_publisher_dict = { + dataset: publisher + for publisher, publisher_dict in ckan.items() + for dataset in publisher_dict.keys() +} metadata = json.load(open('./stats-calculated/metadata.json'), object_pairs_hook=OrderedDict) with open('./data/downloads/errors') as fp: for line in fp: diff --git a/make_html.py b/make_html.py index 74cc7e64b5..e3bb212a22 100644 --- a/make_html.py +++ b/make_html.py @@ -24,6 +24,7 @@ codelist_mapping, codelist_sets, current_stats, + dataset_to_publisher_dict, get_publisher_stats, MAJOR_VERSIONS, metadata, @@ -57,7 +58,7 @@ def nested_dictinvert(d): def dataset_to_publisher(publisher_slug): """ Converts a dataset (package) slug e.g. dfid-bd to the corresponding publisher slug e.g. dfid """ - return publisher_slug.rsplit('-', 1)[0] + return dataset_to_publisher_dict.get(publisher_slug, '') def firstint(s): From 7bcfae5b8c778dcae0a78880e3cedb67a0f49a4d Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Fri, 30 Apr 2021 14:51:50 +0100 Subject: [PATCH 126/375] =?UTF-8?q?Don=E2=80=99t=20print=20stuff=20when=20?= =?UTF-8?q?imported=20data?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- make_html.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/make_html.py b/make_html.py index e3bb212a22..72058b7e5d 100644 --- a/make_html.py +++ b/make_html.py @@ -16,8 +16,6 @@ import text from datetime import datetime from dateutil import parser - -print('Doing initial data import') from data import ( ckan, ckan_publishers, @@ -31,7 +29,6 @@ publisher_name, publishers_ordered_by_title, slugs) -print('Initial data import finished') app = Flask(__name__) From 6d750d21b4ac43df9643a63f50d6f813d3c62776 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Fri, 30 Apr 2021 14:51:39 +0100 Subject: [PATCH 127/375] Fix dataset_to_publisher lookup --- data.py | 5 +++++ make_html.py | 5 +++-- templates/files.html | 2 +- templates/xml.html | 4 ++-- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/data.py b/data.py index 4ef31664af..e87061c053 100644 --- a/data.py +++ b/data.py @@ -182,6 +182,11 @@ def deep_merge(obj1, obj2): } ckan_publishers = JSONDir('./data/ckan_publishers') ckan = json.load(open('./stats-calculated/ckan.json'), object_pairs_hook=OrderedDict) +dataset_to_publisher_dict = { + dataset: publisher + for publisher, publisher_dict in ckan.items() + for dataset in publisher_dict.keys() +} metadata = json.load(open('./stats-calculated/metadata.json'), object_pairs_hook=OrderedDict) with open('./data/downloads/errors') as fp: for line in fp: diff --git a/make_html.py b/make_html.py index 74cc7e64b5..29e3bfd06d 100644 --- a/make_html.py +++ b/make_html.py @@ -24,6 +24,7 @@ codelist_mapping, codelist_sets, current_stats, + dataset_to_publisher_dict, get_publisher_stats, MAJOR_VERSIONS, metadata, @@ -54,10 +55,10 @@ def nested_dictinvert(d): return inv -def dataset_to_publisher(publisher_slug): +def dataset_to_publisher(dataset_slug): """ Converts a dataset (package) slug e.g. dfid-bd to the corresponding publisher slug e.g. dfid """ - return publisher_slug.rsplit('-', 1)[0] + return dataset_to_publisher_dict.get(dataset_slug, '') def firstint(s): diff --git a/templates/files.html b/templates/files.html index 14236317e2..665e080b5d 100644 --- a/templates/files.html +++ b/templates/files.html @@ -46,7 +46,7 @@

      File Sizes

      {% for package, activities in current_stats.inverted_file.activities.items() %} - + diff --git a/templates/xml.html b/templates/xml.html index 7624921aa4..7a54004ec5 100644 --- a/templates/xml.html +++ b/templates/xml.html @@ -34,7 +34,7 @@

      Files where XML is not well-formed

      {% for dataset, invalid in current_stats.inverted_file.invalidxml.items() %} {% if invalid %} - + {% endif %} @@ -61,7 +61,7 @@

      Files with non-standard roots

      {% for dataset, nonstandard in current_stats.inverted_file.nonstandardroots.items() %} {% if nonstandard %} - + {% endif %} From d2245e6cbd89e5b8de3aad4ff0cf8dd3de626ed8 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Fri, 30 Apr 2021 14:51:50 +0100 Subject: [PATCH 128/375] =?UTF-8?q?Don=E2=80=99t=20print=20stuff=20when=20?= =?UTF-8?q?imported=20data?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- make_html.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/make_html.py b/make_html.py index 29e3bfd06d..ffb77d8d58 100644 --- a/make_html.py +++ b/make_html.py @@ -16,8 +16,6 @@ import text from datetime import datetime from dateutil import parser - -print('Doing initial data import') from data import ( ckan, ckan_publishers, @@ -31,7 +29,6 @@ publisher_name, publishers_ordered_by_title, slugs) -print('Initial data import finished') app = Flask(__name__) From 7149e5bb7582173ed96106f6d92b06148e0b60bb Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Fri, 30 Apr 2021 19:01:54 +0100 Subject: [PATCH 129/375] Blitz dashboard-public history every time --- .github/workflows/build.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 356e13701a..b37debbdbd 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -48,3 +48,5 @@ jobs: BRANCH: gh-pages FOLDER: out CLEAN: true + SILENT: true + SINGLE_COMMIT: true From 2c556309e7e463067893072032435d618ba8b1a0 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sat, 1 May 2021 23:41:09 +0100 Subject: [PATCH 130/375] =?UTF-8?q?Temporarily=20comment=20out=20bits=20th?= =?UTF-8?q?at=20don=E2=80=99t=20work=20yet?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- make_csv.py | 42 +++++++++++++++++++++--------------------- make_html.py | 8 ++++---- text.py | 2 +- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/make_csv.py b/make_csv.py index 8d72ef687b..10618c0708 100644 --- a/make_csv.py +++ b/make_csv.py @@ -73,20 +73,20 @@ def publisher_dicts(): writer.writerow({x: publisher_json['result'].get(x) or 0 for x in keys}) -# Timeliness CSV files (frequency and timelag) -import timeliness +# # Timeliness CSV files (frequency and timelag) +# import timeliness -previous_months = timeliness.previous_months_reversed +# previous_months = timeliness.previous_months_reversed -for fname, f, assessment_label in ( - ('timeliness_frequency.csv', timeliness.publisher_frequency_sorted, 'Frequency'), - ('timeliness_timelag.csv', timeliness.publisher_timelag_sorted, 'Time lag') -): - with open(os.path.join('out', fname), 'w') as fp: - writer = csv.writer(fp) - writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + [assessment_label]) - for publisher, publisher_title, per_month, assessment in f(): - writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment]) +# for fname, f, assessment_label in ( +# ('timeliness_frequency.csv', timeliness.publisher_frequency_sorted, 'Frequency'), +# ('timeliness_timelag.csv', timeliness.publisher_timelag_sorted, 'Time lag') +# ): +# with open(os.path.join('out', fname), 'w') as fp: +# writer = csv.writer(fp) +# writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + [assessment_label]) +# for publisher, publisher_title, per_month, assessment in f(): +# writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment]) # Forward-looking CSV file @@ -146,16 +146,16 @@ def publisher_dicts(): # ]) -# Summary Stats CSV file -import summary_stats +# # Summary Stats CSV file +# import summary_stats -with open(os.path.join('out', 'summary_stats.csv'), 'w') as fp: - writer = csv.writer(fp) - # Add column headers - writer.writerow(['Publisher Name', 'Publisher Registry Id'] + [header for slug, header in summary_stats.columns]) - for row in summary_stats.table(): - # Write each row - writer.writerow([row['publisher_title'], row['publisher']] + [row[slug] for slug, header in summary_stats.columns]) +# with open(os.path.join('out', 'summary_stats.csv'), 'w') as fp: +# writer = csv.writer(fp) +# # Add column headers +# writer.writerow(['Publisher Name', 'Publisher Registry Id'] + [header for slug, header in summary_stats.columns]) +# for row in summary_stats.table(): +# # Write each row +# writer.writerow([row['publisher_title'], row['publisher']] + [row[slug] for slug, header in summary_stats.columns]) # Humanitarian CSV file diff --git a/make_html.py b/make_html.py index 33c302efcb..fae4919264 100644 --- a/make_html.py +++ b/make_html.py @@ -152,15 +152,15 @@ def get_codelist_values(codelist_values_for_element): 'exploring_data', 'publishers', 'publishing_stats', - 'timeliness', - 'timeliness_timelag', + # 'timeliness', + # 'timeliness_timelag', 'forwardlooking', 'comprehensiveness', 'comprehensiveness_core', 'comprehensiveness_financials', 'comprehensiveness_valueadded', - 'coverage', - 'summary_stats', + # 'coverage', + # 'summary_stats', 'humanitarian', 'files', 'activities', diff --git a/text.py b/text.py index 685bc1a78d..891c846649 100644 --- a/text.py +++ b/text.py @@ -98,5 +98,5 @@ 'headlines': ['publishers', 'files', 'activities'], 'data_quality': ['download', 'xml', 'validation', 'versions', 'licenses', 'organisation', 'identifiers', 'reporting_orgs'], 'exploring_data': ['elements', 'codelists', 'booleans', 'dates'], - 'publishing_stats': ['timeliness', 'forwardlooking', 'comprehensiveness', 'coverage', 'summary_stats', 'humanitarian'] + 'publishing_stats': ['forwardlooking', 'comprehensiveness', 'humanitarian'] } From 597e6ee1d1bbbf43961863e78e89e2248c467675 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sun, 2 May 2021 15:40:54 +0100 Subject: [PATCH 131/375] Footer copy tweak --- templates/base.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/base.html b/templates/base.html index fa0852382f..0bddf466bc 100644 --- a/templates/base.html +++ b/templates/base.html @@ -139,7 +139,7 @@

      {{page_titles[page]}}

      (NB This is the time the download task started. Any changes made after this time may not be reflected).
      - For details on how often these updates are applied, see Code for IATI Dashboard update FAQ. + For details on how often these updates are applied, see the Code for IATI Dashboard FAQ. From 66c70d6545ca661d400aa4a2acb3432ea9865e0b Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 3 May 2021 12:22:34 +0100 Subject: [PATCH 132/375] Remove base template from static --- static/templates/base.html | 219 ------------------------------------- 1 file changed, 219 deletions(-) delete mode 100644 static/templates/base.html diff --git a/static/templates/base.html b/static/templates/base.html deleted file mode 100644 index 8581a28b0b..0000000000 --- a/static/templates/base.html +++ /dev/null @@ -1,219 +0,0 @@ - - - - - - - - - - {% block title %}IATI Dashboard - {{page_titles[page]}}{% endblock %} - - - - - - - {% block extrahead %}{% endblock %} - - -
      - - - - -
      - {% block container %} - {% block page_header_div %} - - {% endblock %} - - {% block content %} - {% endblock %} - - {% endblock %} -
      - -
      - - - - - - - -{% block tablesorterscript %}{% endblock %} - - - {% block extrafooter %}{% endblock %} - - From 19b5b009e5aadcb01686c1cd6b7c13353eabdbf3 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 3 May 2021 12:32:38 +0100 Subject: [PATCH 133/375] Tidy up CSS --- templates/base.html | 129 ++++++++++++++++++++++++-------------------- 1 file changed, 72 insertions(+), 57 deletions(-) diff --git a/templates/base.html b/templates/base.html index 0bddf466bc..49af920243 100644 --- a/templates/base.html +++ b/templates/base.html @@ -5,64 +5,77 @@ - - {% block title %}Code for IATI Dashboard - {{page_titles[page]}}{% endblock %} + + {% block title %}Code for IATI Dashboard - {{page_titles[page]}}{% endblock %} {% block extrahead %}{% endblock %} From 2da38832065b8034921a1b1700ffbc9e7daf296e Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 3 May 2021 12:36:47 +0100 Subject: [PATCH 134/375] Fix variable name typo --- templates/forwardlooking.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/forwardlooking.html b/templates/forwardlooking.html index eb24c6876a..b15ce67570 100644 --- a/templates/forwardlooking.html +++ b/templates/forwardlooking.html @@ -72,7 +72,7 @@

      Activities with Forward Looking Budget Allocations

      {% endfor %} {% endfor %} -
      + {% endfor %} From 5e3c5a5d20c11fcdefa6aa9cc5e3731e15a9c50f Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 3 May 2021 17:07:44 +0100 Subject: [PATCH 135/375] Fix sorting by flag column on forward-looking page --- templates/forwardlooking.html | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/templates/forwardlooking.html b/templates/forwardlooking.html index b15ce67570..c730a45be0 100644 --- a/templates/forwardlooking.html +++ b/templates/forwardlooking.html @@ -62,7 +62,7 @@

      Activities with Forward Looking Budget Allocations

      {% for row in forwardlooking.table() %} - + {% for column in row.year_columns %} @@ -72,7 +72,17 @@

      Activities with Forward Looking Budget Allocations

      {% endfor %} {% endfor %} - + {% endfor %} @@ -232,8 +242,10 @@

      Pseudocode

      9: function(node,table,cellIndex) { if ($(node).text().indexOf('-') > 0) return '0'; else return $(node).text(); + }, + 10: function(node,table,cellIndex) { + return $(node).attr('data-severity'); } } }{% endblock %} {% block tablesortertarget %}table#main_table{% endblock %} - From e02236d392dee952162266d2b18443300f42ed30 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 3 May 2021 17:23:07 +0100 Subject: [PATCH 136/375] Revert "Remove travis.yml and test_comprehensiveness.py" This reverts commit 675855727801ce40079986caf6a307d3ad75febc. --- .travis.yml | 9 +++++++++ tests/test_comprehensiveness.py | 26 ++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 .travis.yml create mode 100644 tests/test_comprehensiveness.py diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000000..8dc95b2928 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,9 @@ +language: python +python: + - "2.7" +install: + - pip install -r requirements_dev.txt + - ./get_stats.sh + - travis_wait 20 ./fetch_data.sh +script: py.test --cov . +after_success: coveralls diff --git a/tests/test_comprehensiveness.py b/tests/test_comprehensiveness.py new file mode 100644 index 0000000000..2b0054b90b --- /dev/null +++ b/tests/test_comprehensiveness.py @@ -0,0 +1,26 @@ +import sys +from os import path +sys.path.append( path.dirname( path.dirname( path.abspath(__file__) ) ) ) + +import comprehensiveness + +mock_stats = { + 'comprehensiveness': { + 'activity-date': 2, + 'activity-status': 2, + 'recipient_language': 0, + 'transaction_spend': 1, + }, + 'comprehensiveness_denominator_default': 2, + 'comprehensiveness_denominators': { + 'recipient_language': 0, + 'transaction_spend': 1, + 'transaction_traceability': 1 + } + } + +def test_denominator(): + assert comprehensiveness.denominator('activity-date', mock_stats) == 2 + assert comprehensiveness.denominator('transaction_spend', mock_stats) == 1 + assert comprehensiveness.denominator('non_existant_key', mock_stats) == 2 # Passing a non existant key will return the default denominator + assert comprehensiveness.denominator('activity-date', None) == 0 # Passing a 'Falsey' value as the stats param will return 0 From ea713003846aa3e373ba293c930bcee5f66e7055 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 3 May 2021 17:38:10 +0100 Subject: [PATCH 137/375] Reinstate tests --- .github/workflows/build.yml | 2 +- .github/workflows/ci.yml | 39 +++++++++++++++++++++++++++++++++ .travis.yml | 9 -------- requirements_dev.txt | 5 +++++ tests/test_comprehensiveness.py | 22 ++++++++++--------- 5 files changed, 57 insertions(+), 20 deletions(-) create mode 100644 .github/workflows/ci.yml delete mode 100644 .travis.yml create mode 100644 requirements_dev.txt diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b37debbdbd..8512b4afd6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,7 +15,7 @@ jobs: name: Cache dependencies with: path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements_dev.txt') }} + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} restore-keys: | ${{ runner.os }}-pip- - name: Install dependencies diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000000..05a1da733a --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,39 @@ +name: CI +on: [push, pull_request] +jobs: + ci: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - name: Set up Python 3.7 + uses: actions/setup-python@v1 + with: + python-version: 3.7 + - uses: actions/cache@v2 + name: Cache dependencies + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements_dev.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements_dev.txt + - name: Lint + run: flake8 + - name: Fetch data + run: ./fetch_data.sh + - name: Fetch stats + run: | + mkdir out + git clone --quiet --branch gh-pages https://github.com/codeforIATI/IATI-Stats-public stats-calculated + curl --compressed "https://raw.githubusercontent.com/codeforIATI/IATI-Stats-public/gh-pages/ckan.json" > stats-calculated/ckan.json + curl --compressed "https://raw.githubusercontent.com/codeforIATI/IATI-Stats-public/gh-pages/metadata.json" > stats-calculated/metadata.json + - name: Run tests + run: pytest --cov . + # - name: Coveralls + # run: coveralls --service=github-actions + # env: + # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 8dc95b2928..0000000000 --- a/.travis.yml +++ /dev/null @@ -1,9 +0,0 @@ -language: python -python: - - "2.7" -install: - - pip install -r requirements_dev.txt - - ./get_stats.sh - - travis_wait 20 ./fetch_data.sh -script: py.test --cov . -after_success: coveralls diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100644 index 0000000000..91e78d4319 --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1,5 @@ +-r requirements.txt +pytest<6.1.0 +pytest-cov==2.11.1 +coveralls==3.0.1 +flake8==3.7.7 diff --git a/tests/test_comprehensiveness.py b/tests/test_comprehensiveness.py index 2b0054b90b..88a98e64a7 100644 --- a/tests/test_comprehensiveness.py +++ b/tests/test_comprehensiveness.py @@ -1,23 +1,25 @@ import sys from os import path -sys.path.append( path.dirname( path.dirname( path.abspath(__file__) ) ) ) + +sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) import comprehensiveness mock_stats = { 'comprehensiveness': { - 'activity-date': 2, - 'activity-status': 2, - 'recipient_language': 0, - 'transaction_spend': 1, - }, + 'activity-date': 2, + 'activity-status': 2, + 'recipient_language': 0, + 'transaction_spend': 1, + }, 'comprehensiveness_denominator_default': 2, 'comprehensiveness_denominators': { - 'recipient_language': 0, - 'transaction_spend': 1, - 'transaction_traceability': 1 - } + 'recipient_language': 0, + 'transaction_spend': 1, + 'transaction_traceability': 1 } +} + def test_denominator(): assert comprehensiveness.denominator('activity-date', mock_stats) == 2 From 60b6178b79a8a8c38264e4073a697959945f5b5d Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 3 May 2021 19:16:34 +0100 Subject: [PATCH 138/375] Linting --- .github/workflows/ci.yml | 10 ++++---- coverage.py | 4 ---- humanitarian.py | 14 +++++------ make_csv.py | 42 ++++++++++++++------------------- plots.py | 15 ++++++------ summary_stats.py | 3 +-- tests/test_comprehensiveness.py | 2 +- 7 files changed, 39 insertions(+), 51 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 05a1da733a..5487d37ed4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,8 +32,8 @@ jobs: curl --compressed "https://raw.githubusercontent.com/codeforIATI/IATI-Stats-public/gh-pages/metadata.json" > stats-calculated/metadata.json - name: Run tests run: pytest --cov . - # - name: Coveralls - # run: coveralls --service=github-actions - # env: - # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} + - name: Coveralls + run: coveralls --service=github-actions + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} diff --git a/coverage.py b/coverage.py index 6c6029e84b..69fe8de47e 100644 --- a/coverage.py +++ b/coverage.py @@ -169,10 +169,6 @@ def table(): # Loop over each publisher for publisher_title, publisher in publishers_ordered_by_title: - - # Store the data for this publisher as new variables - publisher_stats = get_publisher_stats(publisher) - # Skip if all activities from this publisher are secondary reported if publisher in secondary_publishers: continue diff --git a/humanitarian.py b/humanitarian.py index cf29d01203..4bd1de4480 100644 --- a/humanitarian.py +++ b/humanitarian.py @@ -13,7 +13,7 @@ ('appeal_emergency', 'Appeal or Emergency Details'), ('clusters', 'Clusters'), ('average', 'Average') - ] +] def table(): @@ -38,20 +38,20 @@ def table(): # Calculate percentage of all humanitarian activities that are defined using the @humanitarian attribute row['humanitarian_attrib'] = ( publisher_stats.get('humanitarian', {}).get('is_humanitarian_by_attrib', '0') / float(row['num_activities']) - if int(row['num_activities']) > 0 else 0 - ) * 100 + if int(row['num_activities']) > 0 else 0 + ) * 100 # Calculate percentage of all humanitarian activities that use the element to define an appeal or emergency row['appeal_emergency'] = ( publisher_stats.get('humanitarian', {}).get('contains_humanitarian_scope', '0') / float(row['num_activities']) - if int(row['num_activities']) > 0 else 0 - ) * 100 + if int(row['num_activities']) > 0 else 0 + ) * 100 # Calculate percentage of all humanitarian activities that use clusters row['clusters'] = ( publisher_stats.get('humanitarian', {}).get('uses_humanitarian_clusters_vocab', '0') / float(row['num_activities']) - if int(row['num_activities']) > 0 else 0 - ) * 100 + if int(row['num_activities']) > 0 else 0 + ) * 100 # Calculate the mean average row['average'] = (row['publishing_humanitarian'] + row['humanitarian_attrib'] + row['appeal_emergency'] + row['clusters']) / float(4) diff --git a/make_csv.py b/make_csv.py index 10618c0708..2874b33026 100644 --- a/make_csv.py +++ b/make_csv.py @@ -4,6 +4,24 @@ import os import data +# # Timeliness CSV files (frequency and timelag) +# import timeliness + +# Forward-looking CSV file +import forwardlooking + +# Comprehensiveness CSV files ('summary', 'core', 'financials' and 'valueadded') +import comprehensiveness + +# # Coverage CSV file +# import coverage + +# # Summary Stats CSV file +# import summary_stats + +# Humanitarian CSV file +import humanitarian + publisher_name = {publisher: publisher_json['result']['title'] for publisher, publisher_json in data.ckan_publishers.items()} @@ -72,10 +90,6 @@ def publisher_dicts(): for publisher_json in data.ckan_publishers.values(): writer.writerow({x: publisher_json['result'].get(x) or 0 for x in keys}) - -# # Timeliness CSV files (frequency and timelag) -# import timeliness - # previous_months = timeliness.previous_months_reversed # for fname, f, assessment_label in ( @@ -88,20 +102,12 @@ def publisher_dicts(): # for publisher, publisher_title, per_month, assessment in f(): # writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment]) - -# Forward-looking CSV file -import forwardlooking - with open(os.path.join('out', 'forwardlooking.csv'), 'w') as fp: writer = csv.writer(fp) writer.writerow(['Publisher Name', 'Publisher Registry Id'] + ['{} ({})'.format(header, year) for header in forwardlooking.column_headers for year in forwardlooking.years]) for row in forwardlooking.table(): writer.writerow([row['publisher_title'], row['publisher']] + [year_column[year] for year_column in row['year_columns'] for year in forwardlooking.years]) - -# Comprehensiveness CSV files ('summary', 'core', 'financials' and 'valueadded') -import comprehensiveness - for tab in comprehensiveness.columns.keys(): with open(os.path.join('out', 'comprehensiveness_{}.csv'.format(tab)), 'w') as fp: writer = csv.writer(fp) @@ -109,10 +115,6 @@ def publisher_dicts(): for row in comprehensiveness.table(): writer.writerow([row['publisher_title'], row['publisher']] + [row[slug + '_valid'] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab]] + [row[slug] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab]]) - -# # Coverage CSV file -# import coverage - # with open(os.path.join('out', 'coverage.csv'), 'w') as fp: # writer = csv.writer(fp) # # Add column headers @@ -145,10 +147,6 @@ def publisher_dicts(): # row['spend_data_error_reported_flag'] # ]) - -# # Summary Stats CSV file -# import summary_stats - # with open(os.path.join('out', 'summary_stats.csv'), 'w') as fp: # writer = csv.writer(fp) # # Add column headers @@ -157,10 +155,6 @@ def publisher_dicts(): # # Write each row # writer.writerow([row['publisher_title'], row['publisher']] + [row[slug] for slug, header in summary_stats.columns]) - -# Humanitarian CSV file -import humanitarian - with open(os.path.join('out', 'humanitarian.csv'), 'w') as fp: writer = csv.writer(fp) # Add column headers diff --git a/plots.py b/plots.py index 01b46da054..2ed0452839 100644 --- a/plots.py +++ b/plots.py @@ -12,19 +12,18 @@ """ import datetime -import numpy as np -import matplotlib as mpl -mpl.use('Agg') -import matplotlib.pyplot as plt -import matplotlib.dates as mdates +import numpy as np # noqa: F401 from collections import defaultdict -import os +import os # noqa: F401 import csv import common import data -from vars import expected_versions - +from vars import expected_versions # noqa: F401 +import matplotlib as mpl mpl.use('Agg') +import matplotlib.pyplot as plt # noqa: E402 +import matplotlib.dates as mdates # noqa: E402 + # Import failed_downloads as a global failed_downloads = csv.reader(open('data/downloads/history.csv')) diff --git a/summary_stats.py b/summary_stats.py index bbcce98074..f80818ed46 100644 --- a/summary_stats.py +++ b/summary_stats.py @@ -5,7 +5,6 @@ import timeliness import forwardlooking import comprehensiveness -import coverage # Set column groupings, to be displayed in the user output columns = [ @@ -49,7 +48,7 @@ def table(): for publisher_title, publisher in publishers_ordered_by_title: # Store the data for this publisher as a new variable - publisher_stats = get_publisher_stats(publisher) + get_publisher_stats(publisher) # Skip if all activities from this publisher are secondary reported if publisher in secondary_publishers: diff --git a/tests/test_comprehensiveness.py b/tests/test_comprehensiveness.py index 88a98e64a7..5b4c4ae937 100644 --- a/tests/test_comprehensiveness.py +++ b/tests/test_comprehensiveness.py @@ -3,7 +3,7 @@ sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) -import comprehensiveness +import comprehensiveness # noqa: E402 mock_stats = { 'comprehensiveness': { From c547f1a6270c3077b3bd1887cd26cee2ec228f47 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 3 May 2021 22:13:58 +0100 Subject: [PATCH 139/375] Fix link to boolean values JSON --- templates/booleans.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/booleans.html b/templates/booleans.html index 0c3eca46f0..b368804228 100644 --- a/templates/booleans.html +++ b/templates/booleans.html @@ -4,7 +4,7 @@ {% block content %}
      -

      (This table as JSON)

      +

      (This table as JSON)

      From 75429ac8a32fbe8f5be6f94011057fca1bcc4c1b Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 3 May 2021 22:15:26 +0100 Subject: [PATCH 140/375] Whitespace --- templates/booleans.html | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/templates/booleans.html b/templates/booleans.html index b368804228..66190c7cae 100644 --- a/templates/booleans.html +++ b/templates/booleans.html @@ -4,7 +4,7 @@ {% block content %}
      -

      (This table as JSON)

      +

      (This table as JSON)

      @@ -15,11 +15,11 @@
      {% for boolean, values in current_stats.inverted_publisher.boolean_values.items() %} - {% for value, publishers in values.items()%} + {% for value, publishers in values.items() %} - - - + + + {% endfor %} {% endfor %} From e8b891cacc31068c1e4be65ee4fbf0b37c957d3d Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 3 May 2021 22:20:29 +0100 Subject: [PATCH 141/375] =?UTF-8?q?Don=E2=80=99t=20includes=20tests/=20in?= =?UTF-8?q?=20coverage=20stats?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .coveragerc | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .coveragerc diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000000..a7cba45f3d --- /dev/null +++ b/.coveragerc @@ -0,0 +1,3 @@ +[run] +omit = + tests/* From 8406d946e967de0203821008c5a3abc94ec9ad6c Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 3 May 2021 22:21:43 +0100 Subject: [PATCH 142/375] Add coveralls badge to README --- README.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.rst b/README.rst index 7bdcbffd6d..ad56383ca3 100644 --- a/README.rst +++ b/README.rst @@ -4,6 +4,9 @@ Code for IATI Dashboard .. image:: https://img.shields.io/badge/license-GPLv3-blue.svg :target: https://github.com/codeforIATI/dashboard/blob/main/LICENSE.md +.. image:: https://coveralls.io/repos/github/codeforIATI/dashboard/badge.svg?branch=main + :target: https://coveralls.io/github/codeforIATI/dashboard?branch=main + Introduction ------------ From 1396ee819e498b0cc5c8b23eb37a235341187fad Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 4 May 2021 10:06:08 +0100 Subject: [PATCH 143/375] Refactor --- data.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/data.py b/data.py index e87061c053..547fea63f4 100644 --- a/data.py +++ b/data.py @@ -135,12 +135,13 @@ def get_registry_id_matches(): """ # Load registry IDs for publishers who have changed their registry ID - reader = csv.DictReader(open('registry_id_relationships.csv', 'rU'), delimiter=',') - - # Load this data into a dictonary - registry_matches = {} - for row in reader: - registry_matches[row['previous_registry_id']] = row['current_registry_id'] + with open('registry_id_relationships.csv') as f: + reader = csv.DictReader(f) + # Load this data into a dictonary + registry_matches = { + row['previous_registry_id']: row['current_registry_id'] + for row in reader + } return registry_matches From b40d3863cfcff2d2d42f0c17ef3e3a6023dbfdc4 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 4 May 2021 10:06:20 +0100 Subject: [PATCH 144/375] Refactor --- data.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/data.py b/data.py index 547fea63f4..23e2be8a6a 100644 --- a/data.py +++ b/data.py @@ -1,4 +1,5 @@ -from collections import OrderedDict, MutableMapping +from collections import OrderedDict +from collections.abc import MutableMapping import json import os import re From 52585e677e6358917e73bab734e92139198cfe16 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 4 May 2021 08:11:13 +0100 Subject: [PATCH 145/375] Add github issues from iati-data-bugtracker --- data.py | 17 ++++++++------ fetch_data.py | 10 ++++----- fetch_data.sh | 6 ++++- fetch_github_issues.py | 45 +++++++++++++++++++++++++++++++++++++ make_html.py | 6 +++-- templates/publisher.html | 48 +++++++++++++++++++++++++++++++++++----- 6 files changed, 110 insertions(+), 22 deletions(-) create mode 100644 fetch_github_issues.py diff --git a/data.py b/data.py index e87061c053..5b8998da5a 100644 --- a/data.py +++ b/data.py @@ -1,4 +1,5 @@ -from collections import OrderedDict, MutableMapping +from collections import OrderedDict +from collections.abc import MutableMapping import json import os import re @@ -135,12 +136,13 @@ def get_registry_id_matches(): """ # Load registry IDs for publishers who have changed their registry ID - reader = csv.DictReader(open('registry_id_relationships.csv', 'rU'), delimiter=',') - - # Load this data into a dictonary - registry_matches = {} - for row in reader: - registry_matches[row['previous_registry_id']] = row['current_registry_id'] + with open('registry_id_relationships.csv') as f: + reader = csv.DictReader(f) + # Load this data into a dictonary + registry_matches = { + row['previous_registry_id']: row['current_registry_id'] + for row in reader + } return registry_matches @@ -181,6 +183,7 @@ def deep_merge(obj1, obj2): 'download_errors': [] } ckan_publishers = JSONDir('./data/ckan_publishers') +github_issues = JSONDir('./data/github/publishers') ckan = json.load(open('./stats-calculated/ckan.json'), object_pairs_hook=OrderedDict) dataset_to_publisher_dict = { dataset: publisher diff --git a/fetch_data.py b/fetch_data.py index 33b0b23e57..0fabbe0eb2 100644 --- a/fetch_data.py +++ b/fetch_data.py @@ -7,16 +7,14 @@ We're particulary looking for information such as name, organisation type, and the link back to the registry """ - -import requests +from pathlib import Path import os import json +import requests + # Make a directory to save the data about each publisher -try: - os.makedirs(os.path.join('data', 'ckan_publishers')) -except OSError: - pass +os.makedirs(Path('data/ckan_publishers'), exist_ok=True) page_size = 50 url = 'https://iatiregistry.org/api/3/action/organization_list' diff --git a/fetch_data.sh b/fetch_data.sh index 89538e06cb..12ebec4295 100755 --- a/fetch_data.sh +++ b/fetch_data.sh @@ -4,10 +4,14 @@ mkdir -p data/downloads/ wget "https://gist.githubusercontent.com/codeforIATIbot/f117c9be138aa94c9762d57affc51a64/raw/errors" -O data/downloads/errors -# Get CKAN (IATI Registry) data +# Get CKAN (IATI Registry) rm -rf data/ckan_publishers/ python fetch_data.py +# Get GitHub data +rm -rf data/github/ +python fetch_github_issues.py + # Generate a csv file with the number of download errors logged since 2013 cd data/downloads echo "cloning download errors" diff --git a/fetch_github_issues.py b/fetch_github_issues.py new file mode 100644 index 0000000000..121262c9d0 --- /dev/null +++ b/fetch_github_issues.py @@ -0,0 +1,45 @@ +""" +Fetch codeforIATI/iati-data-bugtracker github issues +""" +from pathlib import Path +from collections import defaultdict +import os +import json + +import requests + +# Make a directory to save github issue data +os.makedirs(Path('data/github/publishers'), exist_ok=True) + +j = requests.get( + 'https://api.github.com/repos/codeforIATI/iati-data-bugtracker/issues', + params={'per_page': 100, 'state': 'open'}).json() +with open('data/github/issues.json', 'w') as fp: + json.dump(j, fp) + +publishers = defaultdict(list) +with open(Path('data/github/issues.json')) as f: + issues = json.load(f) +for issue in issues: + awaiting_triage = [ + l for l in issue['labels'] + if l['name'] == 'awaiting triage'] + if awaiting_triage: + # ignore these + continue + pub_ids = [ + x['name'].split(': ', 1)[1] + for x in issue['labels'] + if x['name'].startswith('publisher: ')] + for pub_id in pub_ids: + publishers[pub_id].append({ + 'title': issue['title'], + 'html_url': issue['html_url'], + 'created_at': issue['created_at'], + 'updated_at': issue['updated_at'], + 'state': issue['state'], + 'labels': [l for l in issue['labels'] if not l['name'].startswith('publisher: ')], + }) +for pub_id, issues in publishers.items(): + with open(Path(f'data/github/publishers/{pub_id}.json'), 'w') as f: + json.dump(issues, f) diff --git a/make_html.py b/make_html.py index e48b9c487a..4b37a7fe1e 100644 --- a/make_html.py +++ b/make_html.py @@ -9,8 +9,8 @@ from collections import defaultdict from flask import Flask, render_template, redirect, abort, Response - import pytz + import licenses import timeliness import forwardlooking @@ -29,6 +29,7 @@ codelist_sets, current_stats, dataset_to_publisher_dict, + github_issues, get_publisher_stats, MAJOR_VERSIONS, metadata, @@ -94,7 +95,7 @@ def get_codelist_values(codelist_values_for_element): Input: Set of codelist values for a given element (listed by publisher), for example: current_stats['inverted_publisher']['codelist_values_by_major_version']['1']['.//@xml:lang'] """ - return list(set([y for x in codelist_values_for_element.items() for y in list(x[1])])) + return list(set([y for x in codelist_values_for_element.items() for y in list(x[1].keys())])) # Store data processing times @@ -128,6 +129,7 @@ def get_codelist_values(codelist_values_for_element): app.jinja_env.globals['current_stats'] = current_stats app.jinja_env.globals['ckan'] = ckan app.jinja_env.globals['ckan_publishers'] = ckan_publishers +app.jinja_env.globals['github_issues'] = github_issues app.jinja_env.globals['publisher_name'] = publisher_name app.jinja_env.globals['publishers_ordered_by_title'] = publishers_ordered_by_title app.jinja_env.globals['get_publisher_stats'] = get_publisher_stats diff --git a/templates/publisher.html b/templates/publisher.html index 043d9b1364..a0e3b54f4d 100644 --- a/templates/publisher.html +++ b/templates/publisher.html @@ -138,8 +138,11 @@

      Headlines

      Data Quality

      - {% if current_stats.inverted_file_publisher[publisher].validation.fail %} + {% set data_quality_issue = false %} +
      + {% if current_stats.inverted_file_publisher[publisher].validation.fail %} + {% set data_quality_issue = true %}
      @@ -174,14 +177,10 @@

      Data Quality

      - {% else %} - -

      No issues were found.

      - -
      {% endif %} {% if 1 in publisher_inverted.invalidxml.values() %} + {% set data_quality_issue = true %}
      @@ -209,6 +208,7 @@

      Files where XML is not well-formed

      {% endif %} {% if 1 in publisher_inverted.nonstandardroots.values() %} + {% set data_quality_issue = true %}
      {{publisher_name[package|dataset_to_publisher]}}{{publisher_name[package[:-4]|dataset_to_publisher]}} {{package[:-4]}} {{activities}} {{current_stats.inverted_file.organisations.get(package)}}
      {{dataset|dataset_to_publisher}}{{dataset[:-4]|dataset_to_publisher}} {{dataset[:-4]}}
      {{dataset|dataset_to_publisher}}{{dataset[:-4]|dataset_to_publisher}} {{dataset[:-4]}}
      *{% elif row['budget_not_provided'] %};background-color: #fcf8aa" data-severity="{{row['budget-not-provided']}}">{% elif row['flag'] %};background-color: #f2aaaa" data-severity="{{row['flag']}}">{% else %}">{% endif %}*{% elif row['budget_not_provided'] %};background-color: #fcf8aa" data-severity="{{row['budget_not_provided']}}">{% elif row['flag'] %};background-color: #f2aaaa" data-severity="{{row['flag']}}">{% else %}">{% endif %}
      {{row.publisher_title}} *{% elif row['budget_not_provided'] %};background-color: #fcf8aa" data-severity="{{row['budget_not_provided']}}">{% elif row['flag'] %};background-color: #f2aaaa" data-severity="{{row['flag']}}">{% else %}">{% endif %}* + {%- elif row['budget_not_provided'] -%} + background-color: #fcf8aa" data-severity="1">* + {%- elif row['flag'] -%} + background-color: #f2aaaa" data-severity="2">* + {%- else -%} + " data-severity="0"> + {%- endif -%} +
      ElementValuesPublishers
      {{boolean}}{{value}}{%for publisher in publishers%}{{publisher}} {%endfor%}{{ boolean }}{{ value }}{% for publisher in publishers %}{{ publisher }} {% endfor %}
      @@ -238,6 +238,42 @@

      Files with non-standard roots

      + {% if github_issues and publisher in github_issues %} + {% set data_quality_issue = true %} +
      +
      +

      Issues Raised

      +
      +
      +

      Data and metadata issues are raised on this Github issue tracker. If you spot a problem with IATI data or metadata, please raise an issue.

      +
      +
      + + + + + + + + + + {% for issue in github_issues[publisher] %} + + + + + + + {% endfor %} + +
      TitleLabelsCreatedLast updated
      {{ issue.title }}{% for label in issue.labels %}{{ label.name }} {% endfor %}{{ issue.created_at[:10] }}{{ issue.updated_at[:10] }}
      + + {% endif %} + + {% if not data_quality_issue %} +

      No issues were found.

      + {% endif %} +

      Financial

      {% macro currency_value(d) %} From 259fcbf8f32938cb6424f09c4c4cf877542608f5 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sat, 8 May 2021 22:15:42 +0100 Subject: [PATCH 146/375] Get most graphs working again --- plots.py | 181 +++++++++++++++++++++++++++-------------------- requirements.txt | 1 + 2 files changed, 107 insertions(+), 75 deletions(-) diff --git a/plots.py b/plots.py index 2ed0452839..f60af23238 100644 --- a/plots.py +++ b/plots.py @@ -13,49 +13,83 @@ """ import datetime import numpy as np # noqa: F401 -from collections import defaultdict -import os # noqa: F401 +import os import csv -import common +import json +# import common import data -from vars import expected_versions # noqa: F401 +from vars import expected_versions +from git import Repo import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt # noqa: E402 import matplotlib.dates as mdates # noqa: E402 -# Import failed_downloads as a global -failed_downloads = csv.reader(open('data/downloads/history.csv')) -gitaggregate_publisher = data.JSONDir('./stats-calculated/gitaggregate-publisher-dated') +class GitJSONDir(data.JSONDir): + def __init__(self, folder, repo_path): + self.repo_path = repo_path + self.folder = folder + self.repo = Repo(repo_path) + metadata_file = 'metadata.json' + commits = self.repo.git.log( + '--format=%h', + '--', + metadata_file).split('\n') + dates = [] + for commit in commits: + content = self.get_contents(commit, metadata_file) + dates.append(json.loads(content)['updated_at']) + self.lookup = list(zip(commits, dates)) + + def get_contents(self, commit, path): + blob = self.repo.git.ls_tree(commit, path).split()[2] + return self.repo.git.cat_file('blob', blob) + + def keys(self): + return [x for x in os.listdir(self.repo_path + self.folder)] - -class AugmentedJSONDir(data.JSONDir): def __getitem__(self, key): if key == 'failed_downloads': - return dict((row[0], row[1]) for row in failed_downloads) - elif key == 'publisher_types': - out = defaultdict(lambda: defaultdict(int)) - for publisher, publisher_data in gitaggregate_publisher.items(): - if publisher in data.ckan_publishers: - organization_type = common.get_publisher_type(publisher)['name'] - for datestring, count in publisher_data['activities'].items(): - out[datestring][organization_type] += 1 - else: - print('Publisher not matched:', publisher) - return out - elif key == 'activities_per_publisher_type': - out = defaultdict(lambda: defaultdict(int)) - for publisher, publisher_data in gitaggregate_publisher.items(): - if publisher in data.ckan_publishers: - organization_type = common.get_publisher_type(publisher)['name'] - for datestring, count in publisher_data['activities'].items(): - out[datestring][organization_type] += count - else: - print('Publisher not matched:', publisher) - return out + with open('data/downloads/history.csv') as f: + return dict((row[0], row[1]) for row in csv.reader(f)) else: - return super(AugmentedJSONDir, self).__getitem__(key) + items = {} + for commit, date in self.lookup: + try: + item = self.get_contents( + commit, + self.folder + key + '.json') + except IndexError: + continue + items[date] = json.loads(item) + return items + + # if key == 'publisher_types': + # out = defaultdict(lambda: defaultdict(int)) + # for publisher, publisher_data in gitaggregate_publisher.items(): + # if publisher in data.ckan_publishers: + # organization_type = common.get_publisher_type(publisher)['name'] + # for datestring, count in publisher_data['activities'].items(): + # out[datestring][organization_type] += 1 + # else: + # print('Publisher not matched:', publisher) + # return out + # elif key == 'activities_per_publisher_type': + # out = defaultdict(lambda: defaultdict(int)) + # for publisher, publisher_data in gitaggregate_publisher.items(): + # if publisher in data.ckan_publishers: + # organization_type = common.get_publisher_type(publisher)['name'] + # for datestring, count in publisher_data['activities'].items(): + # out[datestring][organization_type] += count + # else: + # print('Publisher not matched:', publisher) + # return out + # else: + # return super().__getitem__(key) + + +gitjsondir = GitJSONDir('current/aggregated/', './stats-calculated/') def make_plot(stat_path, git_stats, img_prefix=''): @@ -141,52 +175,49 @@ def make_plot(stat_path, git_stats, img_prefix=''): del writer -# Load aggregated stats for all data -print("All data") -git_stats = AugmentedJSONDir('./stats-calculated/gitaggregate-dated') - for stat_path in [ - # 'activities', - # 'publishers', - # 'activity_files', - # 'organisation_files', - # 'file_size', + 'activities', + 'publishers', + 'activity_files', + 'organisation_files', + 'file_size', 'failed_downloads', - # 'invalidxml', - # 'nonstandardroots', - # 'unique_identifiers', - # ('validation', lambda x: x == 'fail', ''), - # ('publishers_validation', lambda x: x == 'fail', ''), - # ('publisher_has_org_file', lambda x: x == 'no', ''), - # ('versions', lambda x: x in expected_versions, '_expected'), - # ('versions', lambda x: x not in expected_versions, '_other'), - # ('publishers_per_version', lambda x: x in expected_versions, '_expected'), - # ('publishers_per_version', lambda x: x not in expected_versions, '_other'), - # ('file_size_bins', lambda x: True, ''), + 'invalidxml', + 'nonstandardroots', + 'unique_identifiers', + ('validation', lambda x: x == 'fail', ''), + ('publishers_validation', lambda x: x == 'fail', ''), + ('publisher_has_org_file', lambda x: x == 'no', ''), + ('versions', lambda x: x in expected_versions, '_expected'), + ('versions', lambda x: x not in expected_versions, '_other'), + ('publishers_per_version', lambda x: x in expected_versions, '_expected'), + ('publishers_per_version', lambda x: x not in expected_versions, '_other'), + ('file_size_bins', lambda x: True, ''), # ('publisher_types', lambda x: True, ''), # ('activities_per_publisher_type', lambda x: True, '') ]: - make_plot(stat_path, git_stats) - -# Delete git_stats variable to save memory -del git_stats - -# try: -# os.makedirs('out/publisher_imgs') -# except OSError: -# pass - -# git_stats_publishers = AugmentedJSONDir('./stats-calculated/gitaggregate-publisher-dated/') -# for publisher, git_stats_publisher in git_stats_publishers.items(): -# for stat_path in [ -# 'activities', -# 'activity_files', -# 'organisation_files', -# 'file_size', -# 'invalidxml', -# 'nonstandardroots', -# 'publisher_unique_identifiers', -# ('validation', lambda x: x == 'fail', ''), -# ('versions', lambda x: True, ''), -# ]: -# make_plot(stat_path, git_stats_publisher, 'publisher_imgs/{0}_'.format(publisher)) + make_plot(stat_path, gitjsondir) + +# Delete gitjsondir variable to save memory +del gitjsondir + +try: + os.makedirs('out/publisher_imgs') +except OSError: + pass + +git_stats_publisher = GitJSONDir('current/aggregated-publisher/', './stats-calculated/') +for publisher in git_stats_publisher.keys(): + for stat_path in [ + 'activities', + 'activity_files', + 'organisation_files', + 'file_size', + 'invalidxml', + 'nonstandardroots', + 'publisher_unique_identifiers', + ('validation', lambda x: x == 'fail', ''), + ('versions', lambda x: True, ''), + ]: + git_stats_publisher.folder = 'current/aggregated-publisher/' + publisher + '/' + make_plot(stat_path, git_stats_publisher, 'publisher_imgs/{0}_'.format(publisher)) diff --git a/requirements.txt b/requirements.txt index c73730f709..8899e91ed6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ Flask==0.12.3 Frozen-Flask==0.15 +GitPython==3.1.14 Jinja2==2.11.3 lxml python-dateutil==2.8.1 From 46645f6b6f6ebf91a96375f779c8cca07f09742f Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sat, 8 May 2021 22:34:14 +0100 Subject: [PATCH 147/375] Build on push --- .github/workflows/build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8512b4afd6..ad28498f55 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,5 +1,6 @@ name: Build on: + push: schedule: - cron: '0 8 * * *' jobs: From e83b49999c4754a362867ad08c4791dd980d6ce8 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sat, 8 May 2021 23:33:03 +0100 Subject: [PATCH 148/375] Get the two remaining plots working --- plots.py | 94 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 49 insertions(+), 45 deletions(-) diff --git a/plots.py b/plots.py index f60af23238..9b59ff4ba0 100644 --- a/plots.py +++ b/plots.py @@ -12,11 +12,12 @@ """ import datetime -import numpy as np # noqa: F401 import os +from os.path import exists import csv +from collections import defaultdict import json -# import common +import common import data from vars import expected_versions from git import Repo @@ -27,20 +28,23 @@ class GitJSONDir(data.JSONDir): - def __init__(self, folder, repo_path): + def __init__(self, folder, repo_path, lookup=None): self.repo_path = repo_path self.folder = folder self.repo = Repo(repo_path) - metadata_file = 'metadata.json' - commits = self.repo.git.log( - '--format=%h', - '--', - metadata_file).split('\n') - dates = [] - for commit in commits: - content = self.get_contents(commit, metadata_file) - dates.append(json.loads(content)['updated_at']) - self.lookup = list(zip(commits, dates)) + if lookup: + self.lookup = lookup + else: + metadata_file = 'metadata.json' + commits = self.repo.git.log( + '--format=%h', + '--', + metadata_file).split('\n') + dates = [] + for commit in commits: + content = self.get_contents(commit, metadata_file) + dates.append(json.loads(content)['updated_at']) + self.lookup = list(zip(commits, dates)) def get_contents(self, commit, path): blob = self.repo.git.ls_tree(commit, path).split()[2] @@ -53,41 +57,42 @@ def __getitem__(self, key): if key == 'failed_downloads': with open('data/downloads/history.csv') as f: return dict((row[0], row[1]) for row in csv.reader(f)) + if key == 'publisher_types': + out = defaultdict(lambda: defaultdict(int)) + gitaggregate_publisher = GitJSONDir('current/aggregated-publisher/', './stats-calculated/') + for publisher, publisher_data in gitaggregate_publisher.items(): + if publisher in data.ckan_publishers: + organization_type = common.get_publisher_type(publisher)['name'] + for datestring, count in publisher_data['activities'].items(): + out[datestring][organization_type] += 1 + else: + print('Publisher not matched:', publisher) + return out + elif key == 'activities_per_publisher_type': + out = defaultdict(lambda: defaultdict(int)) + gitaggregate_publisher = GitJSONDir('current/aggregated-publisher/', './stats-calculated/') + for publisher, publisher_data in gitaggregate_publisher.items(): + if publisher in data.ckan_publishers: + organization_type = common.get_publisher_type(publisher)['name'] + for datestring, count in publisher_data['activities'].items(): + out[datestring][organization_type] += count + else: + print('Publisher not matched:', publisher) + return out + elif exists(self.repo_path + self.folder + key): + return GitJSONDir(self.folder + key + '/', self.repo_path, self.lookup) else: items = {} for commit, date in self.lookup: try: - item = self.get_contents( - commit, - self.folder + key + '.json') + items[date] = json.loads( + self.get_contents( + commit, + self.folder + key + '.json')) except IndexError: continue - items[date] = json.loads(item) return items - # if key == 'publisher_types': - # out = defaultdict(lambda: defaultdict(int)) - # for publisher, publisher_data in gitaggregate_publisher.items(): - # if publisher in data.ckan_publishers: - # organization_type = common.get_publisher_type(publisher)['name'] - # for datestring, count in publisher_data['activities'].items(): - # out[datestring][organization_type] += 1 - # else: - # print('Publisher not matched:', publisher) - # return out - # elif key == 'activities_per_publisher_type': - # out = defaultdict(lambda: defaultdict(int)) - # for publisher, publisher_data in gitaggregate_publisher.items(): - # if publisher in data.ckan_publishers: - # organization_type = common.get_publisher_type(publisher)['name'] - # for datestring, count in publisher_data['activities'].items(): - # out[datestring][organization_type] += count - # else: - # print('Publisher not matched:', publisher) - # return out - # else: - # return super().__getitem__(key) - gitjsondir = GitJSONDir('current/aggregated/', './stats-calculated/') @@ -193,8 +198,8 @@ def make_plot(stat_path, git_stats, img_prefix=''): ('publishers_per_version', lambda x: x in expected_versions, '_expected'), ('publishers_per_version', lambda x: x not in expected_versions, '_other'), ('file_size_bins', lambda x: True, ''), - # ('publisher_types', lambda x: True, ''), - # ('activities_per_publisher_type', lambda x: True, '') + ('publisher_types', lambda x: True, ''), + ('activities_per_publisher_type', lambda x: True, '') ]: make_plot(stat_path, gitjsondir) @@ -206,8 +211,8 @@ def make_plot(stat_path, git_stats, img_prefix=''): except OSError: pass -git_stats_publisher = GitJSONDir('current/aggregated-publisher/', './stats-calculated/') -for publisher in git_stats_publisher.keys(): +git_stats_publishers = GitJSONDir('current/aggregated-publisher/', './stats-calculated/') +for publisher, git_stats_publisher in git_stats_publishers.items(): for stat_path in [ 'activities', 'activity_files', @@ -219,5 +224,4 @@ def make_plot(stat_path, git_stats, img_prefix=''): ('validation', lambda x: x == 'fail', ''), ('versions', lambda x: True, ''), ]: - git_stats_publisher.folder = 'current/aggregated-publisher/' + publisher + '/' make_plot(stat_path, git_stats_publisher, 'publisher_imgs/{0}_'.format(publisher)) From 3cde7b9ec05ddc24f556f108ba8459ead413d552 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sun, 9 May 2021 09:43:11 +0100 Subject: [PATCH 149/375] Only build on push to main --- .github/workflows/build.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ad28498f55..30c9bf729b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,6 +1,8 @@ name: Build on: push: + branches: + - main schedule: - cron: '0 8 * * *' jobs: From 7c665e89003ca7d53d8e0b25eeb63c1f94ccff15 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sun, 9 May 2021 09:46:01 +0100 Subject: [PATCH 150/375] Remove path_exists check --- templates/_partials/boxes.html | 2 -- 1 file changed, 2 deletions(-) diff --git a/templates/_partials/boxes.html b/templates/_partials/boxes.html index 099faa4a79..021c0dc868 100644 --- a/templates/_partials/boxes.html +++ b/templates/_partials/boxes.html @@ -12,7 +12,6 @@

      {% endif %}
      - {% if path_exists(image) %}

      {{description|safe}}

      @@ -20,7 +19,6 @@

      {% endif %}

      - {% endif %} {% endmacro %} From 2d66fd4548a599e62e1bf6407fb2a8e3974f6194 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sun, 9 May 2021 10:31:51 +0100 Subject: [PATCH 151/375] Rename "Analytics" --- .github/workflows/build.yml | 2 +- CONTRIBUTING.rst | 2 +- README.rst | 34 +++++++++++++-------------- make_html.py | 2 +- static/CNAME | 2 +- templates/base.html | 8 +++---- templates/coverage.html | 6 ++--- templates/data_quality.html | 4 ++-- templates/faq.html | 39 +++++++++++++++---------------- templates/headlines.html | 2 +- templates/index.html | 6 ++--- templates/publishing_stats.html | 4 +--- templates/timeliness.html | 6 ++--- templates/timeliness_timelag.html | 2 +- text.py | 12 +++++----- timeliness.py | 4 ++-- 16 files changed, 66 insertions(+), 69 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 30c9bf729b..e02ede11f1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -47,7 +47,7 @@ jobs: GIT_CONFIG_NAME: Code for IATI bot GIT_CONFIG_EMAIL: 57559326+codeforIATIbot@users.noreply.github.com GITHUB_TOKEN: ${{ secrets.TOKEN }} - REPOSITORY_NAME: codeforIATI/dashboard-public + REPOSITORY_NAME: codeforIATI/analytics-public BRANCH: gh-pages FOLDER: out CLEAN: true diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 6dc74c032a..85105c13ea 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -1,4 +1,4 @@ -If you would like to contribute to the IATI Dashboard project, you can.... +If you would like to contribute to the Code for IATI Analytics project, you can.... * Send us feedback about your user experience. Contact details at: https://github.com/codeforIATI * Report bugs diff --git a/README.rst b/README.rst index ad56383ca3..d21d9272ac 100644 --- a/README.rst +++ b/README.rst @@ -1,27 +1,27 @@ -Code for IATI Dashboard +Code for IATI Analytics ======================= .. image:: https://img.shields.io/badge/license-GPLv3-blue.svg - :target: https://github.com/codeforIATI/dashboard/blob/main/LICENSE.md + :target: https://github.com/codeforIATI/analytics/blob/main/LICENSE.md -.. image:: https://coveralls.io/repos/github/codeforIATI/dashboard/badge.svg?branch=main - :target: https://coveralls.io/github/codeforIATI/dashboard?branch=main +.. image:: https://coveralls.io/repos/github/codeforIATI/analytics/badge.svg?branch=main + :target: https://coveralls.io/github/codeforIATI/analytics?branch=main Introduction ------------ -The Code for IATI dashboard displays key numbers and graphs about the data on the `IATI registry `__. +Code for IATI Analytics displays key numbers and graphs about the data on the `IATI registry `__. -See the Dashboard in action at https://dashboard.codeforiati.org +See Analytics in action at https://analytics.codeforiati.org -The Dashboard is in beta, all contents/urls/machine readable downloads are subject to change. +Analytics is in beta. All contents / URLs / machine-readable downloads are subject to change. -This repository is the code for the Dashboard frontend. Stats are generated from the Registry by stats code in a separate repository - https://github.com/codeforIATI/IATI-Stats +This repository is the code for Analytics frontend. Statistics are generated from the Registry by code in a separate repository - https://github.com/codeforIATI/IATI-Stats Technology Overview ^^^^^^^^^^^^^^^^^^^ -The dashboard mostly in Python, with some helper Bash scripts. +Analytics is mostly written in Python, with some helper Bash scripts. Python scripts: @@ -31,8 +31,8 @@ Python scripts: Bash helper scripts: -* The main source of data for the Dashboard is stats generated by `IATI-Stats `_ (about the data on the IATI Registry). ``get_stats.sh`` can be used to fetch a recently calculated copy of these stats. (or see calculating your own stats section below) -* The Dashboard also uses various other data from online sources (including GitHub). These can be fetched using ``fetch_data.sh``. +* The main source of data are the statistics generated by `IATI-Stats `_ (about the data on the IATI Registry). ``get_stats.sh`` can be used to fetch a recently calculated copy of these stats. (or see calculating your own stats section below) +* Analytics also uses various other data from online sources (including GitHub). These can be fetched using ``fetch_data.sh``. * ``git.sh`` runs all the above commands, see Usage below. Installation @@ -56,8 +56,8 @@ To install: .. code-block:: bash ## Get the code - git clone https://github.com/codeforIATI/dashboard.git - cd dashboard + git clone https://github.com/codeforIATI/analytics.git + cd analytics ## Set up a virtual environment (recommended) # Create a virtual environment @@ -76,7 +76,7 @@ To install: Usage ^^^^^ -The following steps are performed routinely on our `deployed Dashboard `__. +The following steps are performed routinely: .. code-block:: bash @@ -105,10 +105,10 @@ For development, you can use the live Flask development server, instead of Froze Using the live development server is highly recommended, because it displays full bracktraces for 500 errors, whereas frozen flask does not. -Calculating your own stats for the dashboard -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Calculating your own statistics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The Dashboard requires a `stats-calculated` directory, which can be downloaded using the `get_stats.sh` shell script as described above, or calculated yourself using http://github.com/codeforIATI/IATI-Stats . `stats-calculated` corresponds to the `gitout` directory generated by `IATI-Stat's git.sh `__. +Analytics requires a `stats-calculated` directory, which can be downloaded using the `get_stats.sh` shell script as described above, or calculated yourself using http://github.com/codeforIATI/IATI-Stats . `stats-calculated` corresponds to the `gitout` directory generated by `IATI-Stat's git.sh `__. Often you only want to regenerate the current stats, use `get_stats.sh` to download the pre-calculated historical stats and just replace the stats-calculated/current directory with the out directory produced by running the `loop, aggregate and invert commands individually `__. diff --git a/make_html.py b/make_html.py index 4b37a7fe1e..9f4dce81b5 100644 --- a/make_html.py +++ b/make_html.py @@ -112,7 +112,7 @@ def get_codelist_values(codelist_values_for_element): app.jinja_env.globals['datetime_generated'] = lambda: datetime.utcnow().replace(tzinfo=pytz.utc).strftime('%Y-%m-%d %H:%M:%S %Z') app.jinja_env.globals['datetime_data'] = date_time_data_obj.strftime('%Y-%m-%d %H:%M:%S %Z') app.jinja_env.globals['datetime_data_homepage'] = date_time_data_obj.strftime('%d %B %Y (at %H:%M)') -app.jinja_env.globals['stats_url'] = 'https://dashboard-stats.codeforiati.org' +app.jinja_env.globals['stats_url'] = 'https://stats.codeforiati.org' app.jinja_env.globals['stats_gh_url'] = 'https://github.com/codeforIATI/IATI-Stats-public/tree/gh-pages' app.jinja_env.globals['path_exists'] = path_exists app.jinja_env.globals['sorted'] = sorted diff --git a/static/CNAME b/static/CNAME index adca41cf97..f5afd29b75 100644 --- a/static/CNAME +++ b/static/CNAME @@ -1 +1 @@ -dashboard.codeforiati.org +analytics.codeforiati.org diff --git a/templates/base.html b/templates/base.html index 49af920243..945e4b96a9 100644 --- a/templates/base.html +++ b/templates/base.html @@ -90,7 +90,7 @@ } - {% block title %}Code for IATI Dashboard - {{page_titles[page]}}{% endblock %} + {% block title %}Code for IATI Analytics - {{page_titles[page]}}{% endblock %} {% block extrahead %}{% endblock %} @@ -146,15 +146,15 @@

      {{page_titles[page]}}

      diff --git a/templates/coverage.html b/templates/coverage.html index cc3e91ddfa..fa821b8de3 100644 --- a/templates/coverage.html +++ b/templates/coverage.html @@ -10,9 +10,9 @@

      The coverage stats page is being rebuilt.

      Previously, the IATI technical team followed a manual process of contacting IATI publishers and requesting total operational spend values via email. - Results were stored in a public Google sheet. Data was collected for the years 2014 and 2015 and the values were used to calculate a coverage-adjusted score in the Summary Statistics page. + Results were stored in a public Google sheet. Data was collected for the years 2014 and 2015 and the values were used to calculate a coverage-adjusted score in the Summary Statistics page. As this was a very time consuming exercise (compounded by the increase in the number of publishers) coverage data collection has not been done since 2016, resulting in the coverage-adjusted scores in the summary statistics being out of date for the majority of publishers. - As a result, in September 2018 the technical team took the decision to remove the coverage-adjusted values from the Dashboard. + As a result, in September 2018 the technical team took the decision to remove the coverage-adjusted values.


      @@ -25,7 +25,7 @@

      The coverage stats page is being rebuilt.

      In future, we plan to use an automated system that will calculate the coverage statistic for an organisation by using its IATI organisation file. We are exploring a methodology that uses the <total-expenditure> element in a publisher’s organisation file, and compares this to the total spend for a given year in their activity files. - Completing this work requires an update to the dashboard methodology. This is in our job queue but we don’t have an estimated time for completing the work. + Completing this work requires an update to the methodology. This is in our job queue but we don’t have an estimated time for completing the work. Please look out for updates via IATI Discuss and our website.


      diff --git a/templates/data_quality.html b/templates/data_quality.html index 5d2ff0f6d1..81be6c67a3 100644 --- a/templates/data_quality.html +++ b/templates/data_quality.html @@ -1,6 +1,6 @@ {% extends 'section_index.html' %} {% block about %} -

      This section of the Code for IATI Dashboard tracks published IATI data around a series of quality measures.

      -

      These are specifically technical measures - schema validation, download issues, XML formatting, etc - that can be easily rectified by publishers. No attempt is made to evaluate the actual content of the data - the dashboard should be used for sense checking and technical fixes.

      +

      This section tracks published IATI data around a series of quality measures.

      +

      These are specifically technical measures - schema validation, download issues, XML formatting, etc - that can be easily rectified by publishers. No attempt is made to evaluate the actual content of the data - it should be used for sense checking and technical fixes.

      {% endblock about %} {% block publisher_page_hash %}#h_dataquality{% endblock %} diff --git a/templates/faq.html b/templates/faq.html index bc0b022a73..f0c095cad5 100644 --- a/templates/faq.html +++ b/templates/faq.html @@ -4,53 +4,52 @@
      -

      Who provides the Code for IATI Dashboard?

      -

      This fork of the original IATI Dashboard is maintained by Code for IATI volunteers. The Dashboard grew out of various projects to track and provide metrics for how IATI data is published and how the IATI Standard is used in practice.

      +

      Who provides Code for IATI Analytics?

      +

      This fork of the original IATI Dashboard is maintained by Code for IATI volunteers. The IATI Dashboard grew out of various projects to track and provide metrics for how IATI data is published and how the IATI Standard is used in practice.

      -

      When is the Dashboard updated?

      -

      The Dashboard is generated daily.

      +

      When is this site updated updated?

      +

      Code for IATI Analytics is generated daily.

      This is a two step process:

        -
      1. The Dashboard first downloads the data that is linked to from the IATI Registry - see “data downloaded” date time at the footer of each page.
      2. -
      3. From this dataset, the relevant statistics are calculated, and the Dashboard is then updated - see “generated” date/time at footer.
      4. +
      5. First the data that is linked to from the IATI Registry is downloaded - see the “data downloaded” timestamp in the footer of each page.
      6. +
      7. From this dataset, the relevant statistics are calculated, and this site is then updated - see the “generated” timestamp in footer.
      -

      Why is there a difference between the data download and Dashboard generation time?

      -

      The data is downloaded, and then there is a period of computing time to generate the statistics that inform the Dashboard.

      -

      Usually, there is a small period of time between the two. However, we do track this as sometimes things break - and the site fails to regenerate. If you spot something, please also let us know by creating an issue on github.

      +

      Why is there a difference between the data download and generation time?

      +

      The data is downloaded, and then there is a period of computing time to generate the statistics.

      +

      Usually, there is a short period of time between the two. However, we do track this as sometimes the site may fail to regenerate. If you spot something, please also let us know by creating an issue on github.

      Does a graph going up or down mean something good?

      -

      No. There may be a number of reasons why a certain graph or number rises or falls.

      -

      In some cases, a fall in the graph may well be down to the fact that the Dashboard script failed to download the relevant data. This could be due to any number of reasons - and not necessarily anything to do with the setup of the IATI publisher.

      -

      Therefore, we stress to treat the graphs, numbers and statistics on the Dashboard with caution, context and consideration.

      -

      Should you think something is really wrong, please let us know by creating an issue on github.

      +

      No. There may be a number of reasons why a certain graph or number rises or falls. It is not necessarily anything to do with the setup of the IATI publisher.

      +

      Therefore, we stress to treat the graphs, numbers and statistics on this site with caution, context and consideration.

      +

      Should you think something is really wrong, please let us know by creating an issue on github.

      -

      Can I build my own version of this Dashboard?

      -

      Yes - the source code is all open source. This copy is itself a fork (plus various bugfixes and upgrades of the Dashboard maintained by the IATI Secretariat:

      +

      Can I build my own version of this site?

      +

      Yes - the source code is all open source:

      1. https://github.com/codeforIATI/IATI-Stats.
      2. -
      3. https://github.com/codeforIATI/IATI-Dashboard.
      4. +
      5. https://github.com/codeforIATI/analytics.

      We advise you to check through the technical specifications.

      How can I suggest a new function?

      -

      Ideally, we’d suggest to check through the list of issues we have logged in our Github repositories.

      +

      Ideally, we’d suggest to check through the list of issues we have logged in our Github repositories.

      We’ve published/updated our IATI data, but the numbers haven’t updated.

      We’d suggest two initial checks:

      1. Is the data registered on the IATI Registry?
      2. -
      3. Has the dashboard updated since you published (check the times at the footer of this page)?
      4. +
      5. Has this site updated since you published (check the timestamps in the footer of this page)?
      -

      Should you still believe that data is missing from the Dashboard, we’d love to hear from you - please contact us by creating an issue on github.

      +

      Should you still believe that data is missing, we’d love to hear from you - please contact us by creating an issue on github.

      I want to get to the raw data of a publisher - how can I do that?

      Two ways:

        -
      1. Visit the IATI Registry and access the relevant links to the XML files - these links are often found on the relevant Dashboard page.
      2. +
      3. Visit the IATI Registry and access the relevant links to the XML files - these links are often found on the relevant page.
      4. Try a query via the IATI Datastore Classic
      diff --git a/templates/headlines.html b/templates/headlines.html index f592d48ad6..efc23b667f 100644 --- a/templates/headlines.html +++ b/templates/headlines.html @@ -1,6 +1,6 @@ {% extends 'section_index.html' %} {% block about %} -

      This section of the Code for IATI dashboard tracks published IATI data and provides headline information.

      +

      This section tracks published IATI data and provides headline information.

      This is useful to gain a quick insight into published IATI data - and can also be used as a starting point to explore specific focal points around IATI.

      {% endblock about %} diff --git a/templates/index.html b/templates/index.html index c245885258..1203c349bc 100644 --- a/templates/index.html +++ b/templates/index.html @@ -1,8 +1,8 @@ {% extends 'section_index.html' %} {% block about %} -

      The Dashboard is generated daily, with the last update based on data in the IATI Registry on {{datetime_data_homepage}}. For more information, see the FAQ.

      -

      Many of the tables in the Dashboard are sortable by clicking on the headers.

      -

      Many of the datasets in the Dashboard are available in machine readable JSON format. Some links to JSON are abbreviated to (J).

      +

      These analytics are generated daily, with the last update based on data in the IATI Registry on {{ datetime_data_homepage }}. For more information, see the FAQ.

      +

      Many of the tables are sortable by clicking on the headers.

      +

      Many of the datasets are available in machine readable JSON format. Some links to JSON are abbreviated to (J).

      {% endblock %} {% block lhs_column %} {{ super () }} diff --git a/templates/publishing_stats.html b/templates/publishing_stats.html index c8f2aa1a67..26b676cb26 100644 --- a/templates/publishing_stats.html +++ b/templates/publishing_stats.html @@ -1,8 +1,6 @@ {% extends 'section_index.html' %} {% block about %} -

      The statistics on IATI data that are calculated routinely and displayed on this dashboard are now a central part of the service that the Technical Team provides to publishers and users of IATI data alike: for publishers to better understand how to improve their data; for users to assess which data is likely to meet their particular needs; and for the technical team itself to prioritise its commitments to data quality.

      - -

      It is also important that a consistent approach is developed in the presentation of statistics in IATI's annual report and the Dashboard's own calculations, as well as when IATI is asked to contribute to monitoring reports. This section of the Dashboard is focused to do just that.

      +

      The statistics on IATI data that are calculated routinely and displayed here allow publishers to better understand how to improve their data, and allow users to assess which data is likely to meet their particular needs.

      Each of the pages in this section contains:

      diff --git a/templates/timeliness.html b/templates/timeliness.html index c8c69c29fc..141570872b 100644 --- a/templates/timeliness.html +++ b/templates/timeliness.html @@ -16,7 +16,7 @@

      Table of Frequency assessments

      This table seeks to measure how often a publisher updates their data. There is no simple answer as to what constitutes an update. Can any trivial edit be regarded as an update? As transactions are the most numerous element reported in IATI the adopted methodology assumes that a publisher has updated their data if a transaction with a more recent transaction date than previously published is detected across the publisher's entire portfolio.

      -

      The table records the number of days in each of the last twelve months on which the most recently recorded transaction date was observed by the Dashboard to have changed. (The current month is also displayed for informational purposes, but is not used in the assessment.)

      +

      The table records the number of days in each of the last twelve months on which the most recently recorded transaction date was observed to have changed. (The current month is also displayed for informational purposes, but is not used in the assessment.)

      Key:
      Red flag: Publisher currently publishing future transaction dates.
      @@ -99,7 +99,7 @@

      Narrative

      For the purposes of these statistics an update is assumed to have taken place on any given day when the most recently recorded transaction date across a publisher's entire portfolio is observed to have changed to a more recent date. This approach has been adopted as transactions are the most numerous and most frequently updated elements in the reporting of activities.

      -

      The table of statistics records the number of days in each of the last twelve calendar months (the current month is also displayed for informational purposes, but is not used in the assessment) on which the most recently recorded transaction date was observed by the Dashboard to have changed. The Dashboard maintains a statistical snapshot of each day, which allows for this data to be recalculated using historical recordings.

      +

      The table of statistics records the number of days in each of the last twelve calendar months (the current month is also displayed for informational purposes, but is not used in the assessment) on which the most recently recorded transaction date was observed to have changed. A daily statistical snapshot is maintained, which allows for this data to be recalculated using historical recordings.

      @@ -218,7 +218,7 @@

      Exceptions

      Future transaction dates may affect the assessments on this page. Publishers who currently have future transaction dates have a red flag next to their assessment. A yellow flag indicates that although a publisher does not currently have future transactions, they did report future transactions at some point over the last twelve calendar months.

      -

      In our further work on this dashboard page, we plan to exclude activities in which future transactions dates are reported from these assessments until such time that a publisher's entire portfolio no longer contains any future dates.

      +

      We plan to exclude activities in which future transactions dates are reported from these assessments until such time that a publisher's entire portfolio no longer contains any future dates.

      diff --git a/templates/timeliness_timelag.html b/templates/timeliness_timelag.html index 0cf0d0f490..6ff2ba1353 100644 --- a/templates/timeliness_timelag.html +++ b/templates/timeliness_timelag.html @@ -160,7 +160,7 @@

      Exceptions

      Future transaction dates may affect the assessments on this page. Publishers who currently have future transaction dates have a red flag next to their assessment. A yellow flag indicates that although a publisher does not currently have future transactions, they did report future transactions at some point over the last twelve calendar months.

      -

      In our further work on this dashboard page, we plan to exclude activities in which future transactions dates are reported from these assessments until such time that a publisher's entire portfolio no longer contains any future dates.

      +

      We plan to exclude activities in which future transactions dates are reported from these assessments until such time that a publisher's entire portfolio no longer contains any future dates.

      diff --git a/text.py b/text.py index 891c846649..6fec91cb5b 100644 --- a/text.py +++ b/text.py @@ -10,7 +10,7 @@ } page_titles = { - 'index': 'Code for IATI Dashboard', + 'index': 'Code for IATI Analytics', 'headlines': 'Headlines', 'data_quality': 'Data Quality', 'exploring_data': 'Exploring Data', @@ -43,7 +43,7 @@ } page_leads = { - 'index': 'The Code for IATI Dashboard provides statistics, charts and metrics on data accessed via the IATI Registry.', + 'index': 'Statistics, charts and metrics on data accessed via the IATI Registry.', 'data_quality': 'What needs fixing in IATI data?', 'exploring_data': 'Which parts of the IATI Standard are being used?', 'headlines': 'What is the size, scope and scale of published IATI data?', @@ -68,13 +68,13 @@ 'publishers': 'Publishers represent organisation accounts in the IATI Registry.', 'files': 'Files are logged on the IATI Registry by publishers The files contain data on activities and the organisation. A publisher may have multiple files, which can contain multiple activities.', 'activities': 'Activities are the individual projects found in files. A file can contain one or many activities, from a publisher.', - 'download': 'Files that failed to download, when accessed via the IATI Registry. Note: This may because no URL is listed on the registry, or when requesting the URL the publisher\'s server returns an error message (e.g. because there is no file at that location). Since the dashboard\'s download occurs routinely, some files that failed to download may now be available.', - 'xml': 'This page shows files that are not well-formed XML, accessed via the IATI Registry. ', + 'download': 'Files that failed to download, when accessed via the IATI Registry. Note: This may because no URL is listed on the registry, or when requesting the URL the publisher\'s server returns an error message (e.g. because there is no file at that location). Some files that failed to download when last checked may since have become available.', + 'xml': 'This page shows files that are not well-formed XML, accessed via the IATI Registry.', 'validation': 'IATI files are validated against the appropriate IATI Schema. Note: this is based on the version declared in the file and whether it\'s an activity/organisation file.', 'versions': 'Files are reported against a specific version of the IATI Standard, using the version attribute in the iati-activities element.', 'rulesets': 'The IATI Ruleset describe constraints, conditions and logics that are additional to the IATI schema. Note: Currently, on the IATI Standard Ruleset is tested.', - 'licenses': 'Licences are applied to files by publishers on the IATI Registry, and explain how data can be used. ', - 'organisation': 'Checking the IATI Registry for files that have iati-organisations as the root element. IATI Organisation files contain general information about the organisations in the delivery chain. ', + 'licenses': 'Licences are applied to files by publishers on the IATI Registry, and explain how data can be used.', + 'organisation': 'Checking the IATI Registry for files that have iati-organisations as the root element. IATI Organisation files contain general information about the organisations in the delivery chain.', 'identifiers': 'Checking the iati-identifier element for duplicate values per publisher. A duplicate appears if a publisher creates two activities with the same identifier.', 'reporting_orgs': 'Checking the reporting-org identifiers in IATI data.', 'elements': 'Checking usage of all elements within the IATI Standard.', diff --git a/timeliness.py b/timeliness.py index 1d102b8274..681d55a031 100644 --- a/timeliness.py +++ b/timeliness.py @@ -1,4 +1,4 @@ -# This file converts raw timeliness data into the associated Dashboard assessments +# This file converts raw timeliness data into the associated Publishing Statistics assessments from data import JSONDir, publisher_name, get_publisher_stats, get_registry_id_matches import datetime @@ -86,7 +86,7 @@ def publisher_frequency(): first_published_string = sorted(agg['most_recent_transaction_date'])[0] first_published = parse_iso_date(first_published_string) - # Implement the assessment logic on http://dashboard.iatistandard.org/timeliness.html#h_assesment + # Implement the assessment logic on https://analytics.codeforiati.org/timeliness.html#h_assesment if first_published >= previous_month_days[2]: # This is a publisher of less than 3 months From 926143231bc538e297423032d94c7f9a9f265cbb Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 10 May 2021 13:47:58 +0100 Subject: [PATCH 152/375] Add validate links back in --- templates/publisher.html | 2 +- templates/validation.html | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/publisher.html b/templates/publisher.html index a0e3b54f4d..4f02ecfbf7 100644 --- a/templates/publisher.html +++ b/templates/publisher.html @@ -167,7 +167,7 @@

      Data Quality

      {% if publisher in ckan and dataset_name in ckan[publisher] %} - + validator {% endif %}
      diff --git a/templates/validation.html b/templates/validation.html index 787a4a4928..4a662c5ad6 100644 --- a/templates/validation.html +++ b/templates/validation.html @@ -45,7 +45,7 @@

      List of files that fail validation, grouped by publisher

      {% if publisher in ckan and dataset_name in ckan[publisher] %} - + validator {% endif %}
      From 2c4ab9d7f9ece376beaede273522d650ebabfd1d Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 10 May 2021 16:03:14 +0100 Subject: [PATCH 153/375] Remove path_exists (it was wrong anyway) --- make_html.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/make_html.py b/make_html.py index 9f4dce81b5..4d15e9e980 100644 --- a/make_html.py +++ b/make_html.py @@ -40,10 +40,6 @@ app = Flask(__name__) -def path_exists(path): - return os.path.exists(os.path.join('out', path)) - - def dictinvert(d): inv = defaultdict(list) for k, v in d.items(): @@ -114,7 +110,6 @@ def get_codelist_values(codelist_values_for_element): app.jinja_env.globals['datetime_data_homepage'] = date_time_data_obj.strftime('%d %B %Y (at %H:%M)') app.jinja_env.globals['stats_url'] = 'https://stats.codeforiati.org' app.jinja_env.globals['stats_gh_url'] = 'https://github.com/codeforIATI/IATI-Stats-public/tree/gh-pages' -app.jinja_env.globals['path_exists'] = path_exists app.jinja_env.globals['sorted'] = sorted app.jinja_env.globals['enumerate'] = enumerate app.jinja_env.globals['top_titles'] = text.top_titles From 7b6814ee54618ba119ab6c5b3c30b264f1fdaf8d Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 10 May 2021 22:08:09 +0100 Subject: [PATCH 154/375] =?UTF-8?q?Don=E2=80=99t=20build=20on=20push?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/build.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e02ede11f1..b7de803a11 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,8 +1,5 @@ name: Build on: - push: - branches: - - main schedule: - cron: '0 8 * * *' jobs: From df7bd028818e6d5efe9099cc6fbcdd043fc43f6f Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 10 May 2021 22:08:21 +0100 Subject: [PATCH 155/375] Add github action badge to README --- README.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index d21d9272ac..793c29febf 100644 --- a/README.rst +++ b/README.rst @@ -1,11 +1,12 @@ Code for IATI Analytics ======================= -.. image:: https://img.shields.io/badge/license-GPLv3-blue.svg - :target: https://github.com/codeforIATI/analytics/blob/main/LICENSE.md - +.. image:: https://github.com/codeforIATI/analytics/actions/workflows/ci.yml/badge.svg?branch=main + :target: https://github.com/codeforIATI/analytics/actions/workflows/ci.yml .. image:: https://coveralls.io/repos/github/codeforIATI/analytics/badge.svg?branch=main :target: https://coveralls.io/github/codeforIATI/analytics?branch=main +.. image:: https://img.shields.io/badge/license-GPLv3-blue.svg + :target: https://github.com/codeforIATI/analytics/blob/main/LICENSE.md Introduction ------------ From aa88556f8f82b2551c676109bb017c74035e4e49 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 10 May 2021 22:08:32 +0100 Subject: [PATCH 156/375] Remove cruft --- git.sh | 33 --------------------------------- 1 file changed, 33 deletions(-) delete mode 100755 git.sh diff --git a/git.sh b/git.sh deleted file mode 100755 index 06f77926c3..0000000000 --- a/git.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Starting Dashboard generation" - -echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Removing 'out' directory and creating a new one" -rm -rf out -mkdir out - -echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Fetching data" -./fetch_data.sh &> fetch_data.log || exit 1 - -echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running plots.py" -python plots.py || exit 1 - -echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running make_csv.py" -python make_csv.py || exit 1 - -echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running speakers kit.py" -python speakers_kit.py || exit 1 - -echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running make_html.py" -python make_html.py || exit 1 - -echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Copying static elements" -cp -r static/* out/ - -echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Make a backup of the old web directory and make new content live" -rsync -a --delete web web.bk -mv web web.1 -mv out web -rm -rf web.1 - -echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Dashboard generation complete" From 44f4eafad5bfe9faaa19f0f1bf4f874e0507c9f6 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 10 May 2021 23:44:05 +0100 Subject: [PATCH 157/375] No .xml here, so no need to remove it --- templates/download.html | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/templates/download.html b/templates/download.html index c10eff53cf..45a92752b3 100644 --- a/templates/download.html +++ b/templates/download.html @@ -29,10 +29,10 @@ {% for code, publisher, dataset, url in current_stats.download_errors %} - {{publisher}} - {{dataset[:-4]}} - {{url|url_to_filename}} - {{code}} + {{ publisher }} + {{ dataset }} + {{ url|url_to_filename }} + {{ code }} {% endfor %} From 1387c1f756c187e2d62e0f30a08cca3a97371fd6 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 11 May 2021 23:56:25 +0100 Subject: [PATCH 158/375] Fix bug with links on files page --- templates/files.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/files.html b/templates/files.html index 665e080b5d..95ed207e4c 100644 --- a/templates/files.html +++ b/templates/files.html @@ -46,7 +46,7 @@

      File Sizes

      {% for package, activities in current_stats.inverted_file.activities.items() %} - {{publisher_name[package[:-4]|dataset_to_publisher]}} + {{publisher_name[package[:-4]|dataset_to_publisher]}} {{package[:-4]}} {{activities}} {{current_stats.inverted_file.organisations.get(package)}} From 3170dc59a1ae2afbee0abf1f80d51e19c2d1e033 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 12 May 2021 00:00:58 +0100 Subject: [PATCH 159/375] Comment out currently unused code --- data.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/data.py b/data.py index 5b8998da5a..bb88f73053 100644 --- a/data.py +++ b/data.py @@ -67,21 +67,21 @@ def __getitem__(self, key): with open(os.path.join(self.folder, key + '.json')) as fp: data = json.load(fp, object_pairs_hook=OrderedDict) - # Deal with publishers who had an old registry ID - # If this publisher had at least one old ID in the past - if (self.get_publisher_name() in get_registry_id_matches().values()) and ('gitaggregate' in self.folder): - # Perform the merging - # Look over the set of changed registry IDs - for previous_id, current_id in get_registry_id_matches().items(): - folder = self.folder - previous_path = os.path.join(folder.replace(current_id, previous_id), key + '.json') - # If this publisher has had an old ID and there is data for it - if (current_id == self.get_publisher_name()) and os.path.exists(previous_path): - # Get the corresponding value for the old publisher ID, and merge with the existing value for this publisher - with open(previous_path) as old_fp: - old_pub_data = json.load(old_fp, object_pairs_hook=OrderedDict) - deep_merge(data, old_pub_data) - # FIXME i) Should deep_merge attempt to sort this ordereddict ii) Should there be an attempt to aggregate/average conflicting values? + # # Deal with publishers who had an old registry ID + # # If this publisher had at least one old ID in the past + # if (self.get_publisher_name() in get_registry_id_matches().values()) and ('gitaggregate' in self.folder): + # # Perform the merging + # # Look over the set of changed registry IDs + # for previous_id, current_id in get_registry_id_matches().items(): + # folder = self.folder + # previous_path = os.path.join(folder.replace(current_id, previous_id), key + '.json') + # # If this publisher has had an old ID and there is data for it + # if (current_id == self.get_publisher_name()) and os.path.exists(previous_path): + # # Get the corresponding value for the old publisher ID, and merge with the existing value for this publisher + # with open(previous_path) as old_fp: + # old_pub_data = json.load(old_fp, object_pairs_hook=OrderedDict) + # deep_merge(data, old_pub_data) + # # FIXME i) Should deep_merge attempt to sort this ordereddict ii) Should there be an attempt to aggregate/average conflicting values? else: # No value found as either a folder or json file raise KeyError(key) From f409e8d608a655d095027825a626d431eef75f06 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 12 May 2021 00:01:11 +0100 Subject: [PATCH 160/375] =?UTF-8?q?Don=E2=80=99t=20output=20CSV=20files=20?= =?UTF-8?q?here?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I don’t think these CSV files are used, so we can skip this. --- plots.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/plots.py b/plots.py index 9b59ff4ba0..5b96caa576 100644 --- a/plots.py +++ b/plots.py @@ -164,20 +164,20 @@ def make_plot(stat_path, git_stats, img_prefix=''): fig.savefig('out/{0}{1}{2}.png'.format(img_prefix, stat_name, stat_path[2] if type(stat_path) == tuple else ''), dpi=dpi) plt.close('all') - fn = 'out/{0}{1}.csv'.format(img_prefix, stat_name) - with open(fn, 'w') as fp: - writer = csv.writer(fp) - if keys: - sorted_keys = sorted(list(keys)) - writer.writerow(['date'] + sorted_keys) - else: - writer.writerow(['date', 'value']) - for k, v in items: - if keys: - writer.writerow([k] + [v.get(key) for key in sorted_keys]) - else: - writer.writerow([k, v]) - del writer + # fn = 'out/{0}{1}.csv'.format(img_prefix, stat_name) + # with open(fn, 'w') as fp: + # writer = csv.writer(fp) + # if keys: + # sorted_keys = sorted(list(keys)) + # writer.writerow(['date'] + sorted_keys) + # else: + # writer.writerow(['date', 'value']) + # for k, v in items: + # if keys: + # writer.writerow([k] + [v.get(key) for key in sorted_keys]) + # else: + # writer.writerow([k, v]) + # del writer for stat_path in [ From 421d80d164007af78c61554f79e01bd78c77df22 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 12 May 2021 02:55:16 +0100 Subject: [PATCH 161/375] Add historical data to plots --- .github/workflows/build.yml | 4 ++- data.py | 42 +++++++++++++++++++++++ plots.py | 66 ++++++++++--------------------------- 3 files changed, 62 insertions(+), 50 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b7de803a11..67b4ede6c5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -23,7 +23,9 @@ jobs: python -m pip install --upgrade pip pip install -r requirements.txt - name: Fetch data - run: ./fetch_data.sh + run: | + ./fetch_data.sh + git clone --quiet https://github.com/codeforIATI/IATI-Stats-historical data/historical - name: Fetch stats run: | mkdir out diff --git a/data.py b/data.py index bb88f73053..ffdcb3220a 100644 --- a/data.py +++ b/data.py @@ -5,6 +5,7 @@ import re import csv from decimal import Decimal +from git import Repo # Modified from: @@ -118,6 +119,47 @@ def get_publisher_name(self): return None +class GitJSONDir(JSONDir): + def __init__(self, folder, repo_path, lookup=None): + self.repo_path = repo_path + self.folder = folder + self.repo = Repo(repo_path) + if lookup: + self.lookup = lookup + else: + metadata_file = 'metadata.json' + commits = self.repo.git.log( + '--format=%h', + '--', + metadata_file).split('\n') + dates = [] + for commit in commits: + content = self.get_contents(commit, metadata_file) + dates.append(json.loads(content)['updated_at']) + self.lookup = list(zip(commits, dates)) + + def get_contents(self, commit, path): + blob = self.repo.git.ls_tree(commit, path).split()[2] + return self.repo.git.cat_file('blob', blob) + + def keys(self): + return [x for x in os.listdir(self.repo_path + self.folder)] + + def __getitem__(self, key): + if os.path.exists(self.repo_path + self.folder + key): + return GitJSONDir(self.folder + key + '/', self.repo_path, self.lookup) + items = {} + for commit, date in self.lookup: + try: + items[date] = json.loads( + self.get_contents( + commit, + self.folder + key + '.json')) + except IndexError: + continue + return items + + def get_publisher_stats(publisher, stats_type='aggregated'): """Function to obtain current data for a given publisher. Returns: A JSONDir object for the publisher, or an empty dictionary if the publisher diff --git a/plots.py b/plots.py index 5b96caa576..e79316d0f1 100644 --- a/plots.py +++ b/plots.py @@ -13,53 +13,25 @@ """ import datetime import os -from os.path import exists import csv from collections import defaultdict -import json import common import data from vars import expected_versions -from git import Repo import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt # noqa: E402 import matplotlib.dates as mdates # noqa: E402 -class GitJSONDir(data.JSONDir): - def __init__(self, folder, repo_path, lookup=None): - self.repo_path = repo_path - self.folder = folder - self.repo = Repo(repo_path) - if lookup: - self.lookup = lookup - else: - metadata_file = 'metadata.json' - commits = self.repo.git.log( - '--format=%h', - '--', - metadata_file).split('\n') - dates = [] - for commit in commits: - content = self.get_contents(commit, metadata_file) - dates.append(json.loads(content)['updated_at']) - self.lookup = list(zip(commits, dates)) - - def get_contents(self, commit, path): - blob = self.repo.git.ls_tree(commit, path).split()[2] - return self.repo.git.cat_file('blob', blob) - - def keys(self): - return [x for x in os.listdir(self.repo_path + self.folder)] - +class AugmentedGitJSONDir(data.GitJSONDir): def __getitem__(self, key): if key == 'failed_downloads': with open('data/downloads/history.csv') as f: return dict((row[0], row[1]) for row in csv.reader(f)) if key == 'publisher_types': out = defaultdict(lambda: defaultdict(int)) - gitaggregate_publisher = GitJSONDir('current/aggregated-publisher/', './stats-calculated/') + gitaggregate_publisher = data.GitJSONDir('current/aggregated-publisher/', './stats-calculated/') for publisher, publisher_data in gitaggregate_publisher.items(): if publisher in data.ckan_publishers: organization_type = common.get_publisher_type(publisher)['name'] @@ -70,7 +42,7 @@ def __getitem__(self, key): return out elif key == 'activities_per_publisher_type': out = defaultdict(lambda: defaultdict(int)) - gitaggregate_publisher = GitJSONDir('current/aggregated-publisher/', './stats-calculated/') + gitaggregate_publisher = data.GitJSONDir('current/aggregated-publisher/', './stats-calculated/') for publisher, publisher_data in gitaggregate_publisher.items(): if publisher in data.ckan_publishers: organization_type = common.get_publisher_type(publisher)['name'] @@ -79,31 +51,24 @@ def __getitem__(self, key): else: print('Publisher not matched:', publisher) return out - elif exists(self.repo_path + self.folder + key): - return GitJSONDir(self.folder + key + '/', self.repo_path, self.lookup) else: - items = {} - for commit, date in self.lookup: - try: - items[date] = json.loads( - self.get_contents( - commit, - self.folder + key + '.json')) - except IndexError: - continue - return items + return super().__getitem__(key) -gitjsondir = GitJSONDir('current/aggregated/', './stats-calculated/') +historical = data.JSONDir( + './data/historical/gitaggregate-dated') +gitjsondir = AugmentedGitJSONDir( + 'current/aggregated/', './stats-calculated/') -def make_plot(stat_path, git_stats, img_prefix=''): +def make_plot(stat_path, historical_stats, git_stats, img_prefix=''): if type(stat_path) == tuple: stat_name = stat_path[0] else: stat_name = stat_path - stat_dict = git_stats.get(stat_name) + stat_dict = git_stats.get(stat_name, {}) + stat_dict = {**historical_stats.get(stat_name, {}), **stat_dict} if not stat_dict: return items = sorted(stat_dict.items()) @@ -201,7 +166,7 @@ def make_plot(stat_path, git_stats, img_prefix=''): ('publisher_types', lambda x: True, ''), ('activities_per_publisher_type', lambda x: True, '') ]: - make_plot(stat_path, gitjsondir) + make_plot(stat_path, historical, gitjsondir) # Delete gitjsondir variable to save memory del gitjsondir @@ -211,7 +176,10 @@ def make_plot(stat_path, git_stats, img_prefix=''): except OSError: pass -git_stats_publishers = GitJSONDir('current/aggregated-publisher/', './stats-calculated/') +historical_publishers = data.JSONDir( + './data/historical/gitaggregate-publisher-dated') +git_stats_publishers = AugmentedGitJSONDir( + 'current/aggregated-publisher/', './stats-calculated/') for publisher, git_stats_publisher in git_stats_publishers.items(): for stat_path in [ 'activities', @@ -224,4 +192,4 @@ def make_plot(stat_path, git_stats, img_prefix=''): ('validation', lambda x: x == 'fail', ''), ('versions', lambda x: True, ''), ]: - make_plot(stat_path, git_stats_publisher, 'publisher_imgs/{0}_'.format(publisher)) + make_plot(stat_path, historical_publishers[publisher], git_stats_publisher, 'publisher_imgs/{0}_'.format(publisher)) From 2af899c686091d1ba148defebf5d43e2b6dd4526 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 12 May 2021 09:11:57 +0100 Subject: [PATCH 162/375] Add dev deployment --- .github/workflows/build.yml | 25 ++++++++++++++++++++++++- CNAME | 1 + 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 CNAME diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 67b4ede6c5..dffd4f201d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,5 +1,8 @@ name: Build on: + push: + branches: + - dev schedule: - cron: '0 8 * * *' jobs: @@ -40,7 +43,11 @@ jobs: run: python make_html.py - name: Add static files to output run: cp -r static/* out - - name: Deploy 🚀 + - name: Set CNAME (production) + if: github.ref == 'refs/heads/main' + run: echo "analytics.codeforiati.org" > out/CNAME + - name: Deploy (production) 🚀 + if: github.ref == 'refs/heads/main' uses: JamesIves/github-pages-deploy-action@3.7.1 with: GIT_CONFIG_NAME: Code for IATI bot @@ -52,3 +59,19 @@ jobs: CLEAN: true SILENT: true SINGLE_COMMIT: true + - name: Set CNAME (dev) + if: github.ref == 'refs/heads/dev' + run: echo "dev.analytics.codeforiati.org" > out/CNAME + - name: Deploy (dev) 🚀 + if: github.ref == 'refs/heads/dev' + uses: JamesIves/github-pages-deploy-action@3.7.1 + with: + GIT_CONFIG_NAME: Code for IATI bot + GIT_CONFIG_EMAIL: 57559326+codeforIATIbot@users.noreply.github.com + GITHUB_TOKEN: ${{ secrets.TOKEN }} + REPOSITORY_NAME: codeforIATI/analytics-dev + BRANCH: gh-pages + FOLDER: out + CLEAN: true + SILENT: true + SINGLE_COMMIT: true diff --git a/CNAME b/CNAME new file mode 100644 index 0000000000..f5afd29b75 --- /dev/null +++ b/CNAME @@ -0,0 +1 @@ +analytics.codeforiati.org From 2da573c23459f1bb5b619263c6b8065a0adc67ba Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 12 May 2021 09:23:43 +0100 Subject: [PATCH 163/375] Remove this (added in error) --- CNAME | 1 - 1 file changed, 1 deletion(-) delete mode 100644 CNAME diff --git a/CNAME b/CNAME deleted file mode 100644 index f5afd29b75..0000000000 --- a/CNAME +++ /dev/null @@ -1 +0,0 @@ -analytics.codeforiati.org From 1cd85bab30b08e1eed73f41aa5011c2a256e87f5 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 12 May 2021 11:22:52 +0100 Subject: [PATCH 164/375] =?UTF-8?q?Don=E2=80=99t=20assume=20publisher=20ex?= =?UTF-8?q?ists=20in=20historical=20data?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- plots.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plots.py b/plots.py index e79316d0f1..9bf651fe93 100644 --- a/plots.py +++ b/plots.py @@ -192,4 +192,4 @@ def make_plot(stat_path, historical_stats, git_stats, img_prefix=''): ('validation', lambda x: x == 'fail', ''), ('versions', lambda x: True, ''), ]: - make_plot(stat_path, historical_publishers[publisher], git_stats_publisher, 'publisher_imgs/{0}_'.format(publisher)) + make_plot(stat_path, historical_publishers.get(publisher, {}), git_stats_publisher, 'publisher_imgs/{0}_'.format(publisher)) From 2835074b2e882c275136a4430f8822766a54ecf0 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 13 May 2021 00:31:00 +0100 Subject: [PATCH 165/375] Go back to using gitaggregate --- data.py | 72 +++++++++---------------------------------- make_csv.py | 42 ++++++++++++------------- make_html.py | 6 ++-- plots.py | 79 +++++++++++++++++++++++------------------------- requirements.txt | 1 - text.py | 3 +- 6 files changed, 78 insertions(+), 125 deletions(-) diff --git a/data.py b/data.py index ffdcb3220a..5b8998da5a 100644 --- a/data.py +++ b/data.py @@ -5,7 +5,6 @@ import re import csv from decimal import Decimal -from git import Repo # Modified from: @@ -68,21 +67,21 @@ def __getitem__(self, key): with open(os.path.join(self.folder, key + '.json')) as fp: data = json.load(fp, object_pairs_hook=OrderedDict) - # # Deal with publishers who had an old registry ID - # # If this publisher had at least one old ID in the past - # if (self.get_publisher_name() in get_registry_id_matches().values()) and ('gitaggregate' in self.folder): - # # Perform the merging - # # Look over the set of changed registry IDs - # for previous_id, current_id in get_registry_id_matches().items(): - # folder = self.folder - # previous_path = os.path.join(folder.replace(current_id, previous_id), key + '.json') - # # If this publisher has had an old ID and there is data for it - # if (current_id == self.get_publisher_name()) and os.path.exists(previous_path): - # # Get the corresponding value for the old publisher ID, and merge with the existing value for this publisher - # with open(previous_path) as old_fp: - # old_pub_data = json.load(old_fp, object_pairs_hook=OrderedDict) - # deep_merge(data, old_pub_data) - # # FIXME i) Should deep_merge attempt to sort this ordereddict ii) Should there be an attempt to aggregate/average conflicting values? + # Deal with publishers who had an old registry ID + # If this publisher had at least one old ID in the past + if (self.get_publisher_name() in get_registry_id_matches().values()) and ('gitaggregate' in self.folder): + # Perform the merging + # Look over the set of changed registry IDs + for previous_id, current_id in get_registry_id_matches().items(): + folder = self.folder + previous_path = os.path.join(folder.replace(current_id, previous_id), key + '.json') + # If this publisher has had an old ID and there is data for it + if (current_id == self.get_publisher_name()) and os.path.exists(previous_path): + # Get the corresponding value for the old publisher ID, and merge with the existing value for this publisher + with open(previous_path) as old_fp: + old_pub_data = json.load(old_fp, object_pairs_hook=OrderedDict) + deep_merge(data, old_pub_data) + # FIXME i) Should deep_merge attempt to sort this ordereddict ii) Should there be an attempt to aggregate/average conflicting values? else: # No value found as either a folder or json file raise KeyError(key) @@ -119,47 +118,6 @@ def get_publisher_name(self): return None -class GitJSONDir(JSONDir): - def __init__(self, folder, repo_path, lookup=None): - self.repo_path = repo_path - self.folder = folder - self.repo = Repo(repo_path) - if lookup: - self.lookup = lookup - else: - metadata_file = 'metadata.json' - commits = self.repo.git.log( - '--format=%h', - '--', - metadata_file).split('\n') - dates = [] - for commit in commits: - content = self.get_contents(commit, metadata_file) - dates.append(json.loads(content)['updated_at']) - self.lookup = list(zip(commits, dates)) - - def get_contents(self, commit, path): - blob = self.repo.git.ls_tree(commit, path).split()[2] - return self.repo.git.cat_file('blob', blob) - - def keys(self): - return [x for x in os.listdir(self.repo_path + self.folder)] - - def __getitem__(self, key): - if os.path.exists(self.repo_path + self.folder + key): - return GitJSONDir(self.folder + key + '/', self.repo_path, self.lookup) - items = {} - for commit, date in self.lookup: - try: - items[date] = json.loads( - self.get_contents( - commit, - self.folder + key + '.json')) - except IndexError: - continue - return items - - def get_publisher_stats(publisher, stats_type='aggregated'): """Function to obtain current data for a given publisher. Returns: A JSONDir object for the publisher, or an empty dictionary if the publisher diff --git a/make_csv.py b/make_csv.py index 2874b33026..4b02669f25 100644 --- a/make_csv.py +++ b/make_csv.py @@ -4,8 +4,8 @@ import os import data -# # Timeliness CSV files (frequency and timelag) -# import timeliness +# Timeliness CSV files (frequency and timelag) +import timeliness # Forward-looking CSV file import forwardlooking @@ -16,8 +16,8 @@ # # Coverage CSV file # import coverage -# # Summary Stats CSV file -# import summary_stats +# Summary Stats CSV file +import summary_stats # Humanitarian CSV file import humanitarian @@ -90,17 +90,17 @@ def publisher_dicts(): for publisher_json in data.ckan_publishers.values(): writer.writerow({x: publisher_json['result'].get(x) or 0 for x in keys}) -# previous_months = timeliness.previous_months_reversed +previous_months = timeliness.previous_months_reversed -# for fname, f, assessment_label in ( -# ('timeliness_frequency.csv', timeliness.publisher_frequency_sorted, 'Frequency'), -# ('timeliness_timelag.csv', timeliness.publisher_timelag_sorted, 'Time lag') -# ): -# with open(os.path.join('out', fname), 'w') as fp: -# writer = csv.writer(fp) -# writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + [assessment_label]) -# for publisher, publisher_title, per_month, assessment in f(): -# writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment]) +for fname, f, assessment_label in ( + ('timeliness_frequency.csv', timeliness.publisher_frequency_sorted, 'Frequency'), + ('timeliness_timelag.csv', timeliness.publisher_timelag_sorted, 'Time lag') +): + with open(os.path.join('out', fname), 'w') as fp: + writer = csv.writer(fp) + writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + [assessment_label]) + for publisher, publisher_title, per_month, assessment in f(): + writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment]) with open(os.path.join('out', 'forwardlooking.csv'), 'w') as fp: writer = csv.writer(fp) @@ -147,13 +147,13 @@ def publisher_dicts(): # row['spend_data_error_reported_flag'] # ]) -# with open(os.path.join('out', 'summary_stats.csv'), 'w') as fp: -# writer = csv.writer(fp) -# # Add column headers -# writer.writerow(['Publisher Name', 'Publisher Registry Id'] + [header for slug, header in summary_stats.columns]) -# for row in summary_stats.table(): -# # Write each row -# writer.writerow([row['publisher_title'], row['publisher']] + [row[slug] for slug, header in summary_stats.columns]) +with open(os.path.join('out', 'summary_stats.csv'), 'w') as fp: + writer = csv.writer(fp) + # Add column headers + writer.writerow(['Publisher Name', 'Publisher Registry Id'] + [header for slug, header in summary_stats.columns]) + for row in summary_stats.table(): + # Write each row + writer.writerow([row['publisher_title'], row['publisher']] + [row[slug] for slug, header in summary_stats.columns]) with open(os.path.join('out', 'humanitarian.csv'), 'w') as fp: writer = csv.writer(fp) diff --git a/make_html.py b/make_html.py index 4d15e9e980..b5e25b6c32 100644 --- a/make_html.py +++ b/make_html.py @@ -149,15 +149,15 @@ def get_codelist_values(codelist_values_for_element): 'exploring_data', 'publishers', 'publishing_stats', - # 'timeliness', - # 'timeliness_timelag', + 'timeliness', + 'timeliness_timelag', 'forwardlooking', 'comprehensiveness', 'comprehensiveness_core', 'comprehensiveness_financials', 'comprehensiveness_valueadded', # 'coverage', - # 'summary_stats', + 'summary_stats', 'humanitarian', 'files', 'activities', diff --git a/plots.py b/plots.py index 9bf651fe93..f5ac9203e6 100644 --- a/plots.py +++ b/plots.py @@ -12,26 +12,30 @@ """ import datetime -import os -import csv +import numpy as np # noqa: F401 from collections import defaultdict +import os # noqa: F401 +import csv import common import data -from vars import expected_versions +from vars import expected_versions # noqa: F401 import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt # noqa: E402 import matplotlib.dates as mdates # noqa: E402 +# Import failed_downloads as a global +failed_downloads = csv.reader(open('data/downloads/history.csv')) + +gitaggregate_publisher = data.JSONDir('./stats-calculated/gitaggregate-publisher-dated') + -class AugmentedGitJSONDir(data.GitJSONDir): +class AugmentedJSONDir(data.JSONDir): def __getitem__(self, key): if key == 'failed_downloads': - with open('data/downloads/history.csv') as f: - return dict((row[0], row[1]) for row in csv.reader(f)) - if key == 'publisher_types': + return dict((row[0], row[1]) for row in failed_downloads) + elif key == 'publisher_types': out = defaultdict(lambda: defaultdict(int)) - gitaggregate_publisher = data.GitJSONDir('current/aggregated-publisher/', './stats-calculated/') for publisher, publisher_data in gitaggregate_publisher.items(): if publisher in data.ckan_publishers: organization_type = common.get_publisher_type(publisher)['name'] @@ -42,7 +46,6 @@ def __getitem__(self, key): return out elif key == 'activities_per_publisher_type': out = defaultdict(lambda: defaultdict(int)) - gitaggregate_publisher = data.GitJSONDir('current/aggregated-publisher/', './stats-calculated/') for publisher, publisher_data in gitaggregate_publisher.items(): if publisher in data.ckan_publishers: organization_type = common.get_publisher_type(publisher)['name'] @@ -52,23 +55,16 @@ def __getitem__(self, key): print('Publisher not matched:', publisher) return out else: - return super().__getitem__(key) + return super(AugmentedJSONDir, self).__getitem__(key) -historical = data.JSONDir( - './data/historical/gitaggregate-dated') -gitjsondir = AugmentedGitJSONDir( - 'current/aggregated/', './stats-calculated/') - - -def make_plot(stat_path, historical_stats, git_stats, img_prefix=''): +def make_plot(stat_path, git_stats, img_prefix=''): if type(stat_path) == tuple: stat_name = stat_path[0] else: stat_name = stat_path - stat_dict = git_stats.get(stat_name, {}) - stat_dict = {**historical_stats.get(stat_name, {}), **stat_dict} + stat_dict = git_stats.get(stat_name) if not stat_dict: return items = sorted(stat_dict.items()) @@ -129,21 +125,25 @@ def make_plot(stat_path, historical_stats, git_stats, img_prefix=''): fig.savefig('out/{0}{1}{2}.png'.format(img_prefix, stat_name, stat_path[2] if type(stat_path) == tuple else ''), dpi=dpi) plt.close('all') - # fn = 'out/{0}{1}.csv'.format(img_prefix, stat_name) - # with open(fn, 'w') as fp: - # writer = csv.writer(fp) - # if keys: - # sorted_keys = sorted(list(keys)) - # writer.writerow(['date'] + sorted_keys) - # else: - # writer.writerow(['date', 'value']) - # for k, v in items: - # if keys: - # writer.writerow([k] + [v.get(key) for key in sorted_keys]) - # else: - # writer.writerow([k, v]) - # del writer + fn = 'out/{0}{1}.csv'.format(img_prefix, stat_name) + with open(fn, 'w') as fp: + writer = csv.writer(fp) + if keys: + sorted_keys = sorted(list(keys)) + writer.writerow(['date'] + sorted_keys) + else: + writer.writerow(['date', 'value']) + for k, v in items: + if keys: + writer.writerow([k] + [v.get(key) for key in sorted_keys]) + else: + writer.writerow([k, v]) + del writer + +# Load aggregated stats for all data +print("All data") +git_stats = AugmentedJSONDir('./stats-calculated/gitaggregate-dated') for stat_path in [ 'activities', @@ -166,20 +166,17 @@ def make_plot(stat_path, historical_stats, git_stats, img_prefix=''): ('publisher_types', lambda x: True, ''), ('activities_per_publisher_type', lambda x: True, '') ]: - make_plot(stat_path, historical, gitjsondir) + make_plot(stat_path, git_stats) -# Delete gitjsondir variable to save memory -del gitjsondir +# Delete git_stats variable to save memory +del git_stats try: os.makedirs('out/publisher_imgs') except OSError: pass -historical_publishers = data.JSONDir( - './data/historical/gitaggregate-publisher-dated') -git_stats_publishers = AugmentedGitJSONDir( - 'current/aggregated-publisher/', './stats-calculated/') +git_stats_publishers = AugmentedJSONDir('./stats-calculated/gitaggregate-publisher-dated/') for publisher, git_stats_publisher in git_stats_publishers.items(): for stat_path in [ 'activities', @@ -192,4 +189,4 @@ def make_plot(stat_path, historical_stats, git_stats, img_prefix=''): ('validation', lambda x: x == 'fail', ''), ('versions', lambda x: True, ''), ]: - make_plot(stat_path, historical_publishers.get(publisher, {}), git_stats_publisher, 'publisher_imgs/{0}_'.format(publisher)) + make_plot(stat_path, git_stats_publisher, 'publisher_imgs/{0}_'.format(publisher)) diff --git a/requirements.txt b/requirements.txt index 8899e91ed6..c73730f709 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,5 @@ Flask==0.12.3 Frozen-Flask==0.15 -GitPython==3.1.14 Jinja2==2.11.3 lxml python-dateutil==2.8.1 diff --git a/text.py b/text.py index 6fec91cb5b..bee12c7be5 100644 --- a/text.py +++ b/text.py @@ -37,7 +37,6 @@ 'timeliness': 'Timeliness', 'forwardlooking': 'Forward Looking', 'comprehensiveness': 'Comprehensiveness', - 'coverage': 'Coverage', 'summary_stats': 'Summary Statistics', 'humanitarian': 'Humanitarian Reporting' } @@ -98,5 +97,5 @@ 'headlines': ['publishers', 'files', 'activities'], 'data_quality': ['download', 'xml', 'validation', 'versions', 'licenses', 'organisation', 'identifiers', 'reporting_orgs'], 'exploring_data': ['elements', 'codelists', 'booleans', 'dates'], - 'publishing_stats': ['forwardlooking', 'comprehensiveness', 'humanitarian'] + 'publishing_stats': ['timeliness', 'forwardlooking', 'comprehensiveness', 'summary_stats', 'humanitarian'] } From 198277dce5fbcdb77c252d9a977990562f145572 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 13 May 2021 00:38:33 +0100 Subject: [PATCH 166/375] =?UTF-8?q?Revert=20"Comment=20this=20out=20for=20?= =?UTF-8?q?now=20(since=20it=20won=E2=80=99t=20work)"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 8e856ed81ffc13f7c3a217d235bf4f19cc064e14. --- templates/_partials/boxes.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/_partials/boxes.html b/templates/_partials/boxes.html index 021c0dc868..c08ae6b237 100644 --- a/templates/_partials/boxes.html +++ b/templates/_partials/boxes.html @@ -8,7 +8,7 @@

      {{title}}

      {% if json %} - + (J) {% endif %}
      From 30cf2b71ea59d47e9621a60954de5eda5a7ae82c Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 13 May 2021 08:24:14 +0100 Subject: [PATCH 167/375] Revert "Wordbreak all table cells" This reverts commit 127698a391a8d98e063bdf094b634a6b41e1f636. --- templates/base.html | 2 +- templates/elements.html | 2 +- templates/files.html | 4 ++-- templates/publisher.html | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/templates/base.html b/templates/base.html index 945e4b96a9..9a1d8428d2 100644 --- a/templates/base.html +++ b/templates/base.html @@ -23,7 +23,7 @@ .panel-heading { clear: both; } - td, .break { + .break { word-break:break-all; } .panel-body h4, .panel-body h5 { diff --git a/templates/elements.html b/templates/elements.html index dd7587e8e1..3a930c520a 100644 --- a/templates/elements.html +++ b/templates/elements.html @@ -37,7 +37,7 @@ {% for i, (element,publishers) in enumerate(current_stats.inverted_publisher.elements.items()) %} - {{element}} + {{element}} {{publishers|length}} {{current_stats.aggregated.elements[element]}} {{current_stats.aggregated.elements_total[element]}} diff --git a/templates/files.html b/templates/files.html index 95ed207e4c..7f8b6d0be1 100644 --- a/templates/files.html +++ b/templates/files.html @@ -46,8 +46,8 @@

      File Sizes

      {% for package, activities in current_stats.inverted_file.activities.items() %} - {{publisher_name[package[:-4]|dataset_to_publisher]}} - {{package[:-4]}} + {{publisher_name[package[:-4]|dataset_to_publisher]}} + {{package[:-4]}} {{activities}} {{current_stats.inverted_file.organisations.get(package)}} {{current_stats.inverted_file.file_size.get(package)|filesizeformat}} diff --git a/templates/publisher.html b/templates/publisher.html index 4f02ecfbf7..df5a43e934 100644 --- a/templates/publisher.html +++ b/templates/publisher.html @@ -341,7 +341,7 @@

      Files

      {% for package, activities in publisher_inverted.activities.items() %} - {{package[:-4]}} + {{package[:-4]}} {{activities}} {{current_stats.inverted_file.organisations.get(package)}} {{current_stats.inverted_file.file_size.get(package)|filesizeformat}} From cd063c5eb5948650d8f6b01a780b5597878bacf4 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 13 May 2021 12:59:26 +0100 Subject: [PATCH 168/375] Fix has_future_transactions --- make_csv.py | 2 +- templates/timeliness.html | 3 +-- templates/timeliness_timelag.html | 3 +-- timeliness.py | 24 +++++++++++------------- 4 files changed, 14 insertions(+), 18 deletions(-) diff --git a/make_csv.py b/make_csv.py index 4b02669f25..32f817cf9c 100644 --- a/make_csv.py +++ b/make_csv.py @@ -99,7 +99,7 @@ def publisher_dicts(): with open(os.path.join('out', fname), 'w') as fp: writer = csv.writer(fp) writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + [assessment_label]) - for publisher, publisher_title, per_month, assessment in f(): + for publisher, publisher_title, per_month, assessment, hft in f(): writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment]) with open(os.path.join('out', 'forwardlooking.csv'), 'w') as fp: diff --git a/templates/timeliness.html b/templates/timeliness.html index 141570872b..a1cf9a5ba9 100644 --- a/templates/timeliness.html +++ b/templates/timeliness.html @@ -43,14 +43,13 @@

      Table of Frequency assessments

      - {% for publisher, publisher_title, per_month, assessment in timeliness.publisher_frequency_sorted() %} + {% for publisher, publisher_title, per_month, assessment, hft in timeliness.publisher_frequency_sorted() %} {{publisher_title}} {% for month in timeliness.previous_months_reversed %} {{per_month[month] or 0}} {% endfor %} {{per_month[timeliness.this_month] or 0}} - {% set hft=publisher|has_future_transactions %} {% if hft %}*{% endif %} {{assessment}} diff --git a/templates/timeliness_timelag.html b/templates/timeliness_timelag.html index 6ff2ba1353..8d17e97707 100644 --- a/templates/timeliness_timelag.html +++ b/templates/timeliness_timelag.html @@ -42,14 +42,13 @@

      Table of Time lag assessments

      - {% for publisher, publisher_title, per_month, assessment in timeliness.publisher_timelag_sorted() %} + {% for publisher, publisher_title, per_month, assessment, hft in timeliness.publisher_timelag_sorted() %} {{publisher_title}} {% for month in timeliness.previous_months_reversed %} {{per_month[month] or 0}} {% endfor %} {{per_month[timeliness.this_month] or 0}} - {% set hft=publisher|has_future_transactions %} {% if hft %}*{% endif %} {{assessment}} diff --git a/timeliness.py b/timeliness.py index 681d55a031..5c208f42ff 100644 --- a/timeliness.py +++ b/timeliness.py @@ -86,6 +86,8 @@ def publisher_frequency(): first_published_string = sorted(agg['most_recent_transaction_date'])[0] first_published = parse_iso_date(first_published_string) + hft = has_future_transactions(publisher) + # Implement the assessment logic on https://analytics.codeforiati.org/timeliness.html#h_assesment if first_published >= previous_month_days[2]: @@ -126,7 +128,7 @@ def publisher_frequency(): # If the publisher is in the list of current publishers, return a generator object if publisher in publisher_name: - yield publisher, publisher_name.get(publisher), updates_per_month, frequency + yield publisher, publisher_name.get(publisher), updates_per_month, frequency, hft def frequency_index(frequency): @@ -148,7 +150,7 @@ def publisher_frequency_dict(): def publisher_frequency_summary(): - return Counter(frequency for _, _, _, frequency in publisher_frequency()) + return Counter(frequency for _, _, _, frequency, _ in publisher_frequency()) def timelag_index(timelag): @@ -156,14 +158,14 @@ def timelag_index(timelag): def publisher_timelag_sorted(): - publisher_timelags = [(publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag']) for publisher, agg in JSONDir('./stats-calculated/current/aggregated-publisher').items()] + publisher_timelags = [(publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag'], has_future_transactions(publisher)) for publisher, agg in JSONDir('./stats-calculated/current/aggregated-publisher').items()] return sorted( publisher_timelags, key=lambda tup: (timelag_index(tup[3]), tup[1])) def publisher_timelag_dict(): - publisher_timelags = [(publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag']) for publisher, agg in JSONDir('./stats-calculated/current/aggregated-publisher').items()] + publisher_timelags = [(publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag'], has_future_transactions(publisher)) for publisher, agg in JSONDir('./stats-calculated/current/aggregated-publisher').items()] data = {} for v in publisher_timelags: data[v[0]] = v @@ -171,10 +173,7 @@ def publisher_timelag_dict(): def publisher_timelag_summary(): - return Counter(timelag for _, _, _, timelag in publisher_timelag_sorted()) - - -blacklist_publisher = JSONDir('./stats-blacklist/gitaggregate-publisher-dated') + return Counter(timelag for _, _, _, timelag, _ in publisher_timelag_sorted()) def has_future_transactions(publisher): @@ -182,9 +181,9 @@ def has_future_transactions(publisher): returns 0, 1 or 2 Returns 2 if the most recent data for a publisher has future transactions. Returns 1 if the publisher has ever had future transactions. - Returns -1 if the publisher has not been checked for some reason. Returns 0 otherwise. """ + today = datetime.date.today() publisher_stats = get_publisher_stats(publisher) if 'transaction_dates' in publisher_stats: for transaction_type, transaction_counts in publisher_stats['transaction_dates'].items(): @@ -192,11 +191,10 @@ def has_future_transactions(publisher): transaction_date = parse_iso_date(transaction_date_string) if transaction_date and transaction_date > datetime.date.today(): return 2 - if publisher not in blacklist_publisher: - return -1 - today = datetime.date.today() + + gitaggregate_publisher = JSONDir('./stats-calculated/gitaggregate-publisher-dated').get(publisher, {}) mindate = datetime.date(today.year - 1, today.month, 1) - for date, activity_blacklist in blacklist_publisher[publisher]['activities_with_future_transactions'].items(): + for date, activity_blacklist in gitaggregate_publisher.get('activities_with_future_transactions', {}).items(): if parse_iso_date(date) >= mindate and activity_blacklist: return 1 return 0 From 05b2d6f3fbbc14b136536eef83b0f38ddea4fef8 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 13 May 2021 13:52:19 +0100 Subject: [PATCH 169/375] Add "first published" to timeliness HTML & CSV --- make_csv.py | 20 +++++++++++--------- templates/timeliness.html | 23 +++++++++++++++++++++-- templates/timeliness_base.html | 10 ---------- templates/timeliness_timelag.html | 17 +++++++++++++++++ timeliness.py | 13 ++++++++++--- 5 files changed, 59 insertions(+), 24 deletions(-) diff --git a/make_csv.py b/make_csv.py index 32f817cf9c..857687eb8f 100644 --- a/make_csv.py +++ b/make_csv.py @@ -92,15 +92,17 @@ def publisher_dicts(): previous_months = timeliness.previous_months_reversed -for fname, f, assessment_label in ( - ('timeliness_frequency.csv', timeliness.publisher_frequency_sorted, 'Frequency'), - ('timeliness_timelag.csv', timeliness.publisher_timelag_sorted, 'Time lag') -): - with open(os.path.join('out', fname), 'w') as fp: - writer = csv.writer(fp) - writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + [assessment_label]) - for publisher, publisher_title, per_month, assessment, hft in f(): - writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment]) +with open(os.path.join('out', 'timeliness_frequency.csv'), 'w') as fp: + writer = csv.writer(fp) + writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + ['Frequency', 'First published']) + for publisher, publisher_title, per_month, assessment, hft, first_published_band in timeliness.publisher_frequency_sorted(): + writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment, first_published_band]) + +with open(os.path.join('out', 'timeliness_timelag.csv'), 'w') as fp: + writer = csv.writer(fp) + writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + ['Time lag']) + for publisher, publisher_title, per_month, assessment, hft in timeliness.publisher_timelag_sorted(): + writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment]) with open(os.path.join('out', 'forwardlooking.csv'), 'w') as fp: writer = csv.writer(fp) diff --git a/templates/timeliness.html b/templates/timeliness.html index a1cf9a5ba9..7a35740473 100644 --- a/templates/timeliness.html +++ b/templates/timeliness.html @@ -30,6 +30,7 @@

      Table of Frequency assessments

      Publisher Name + First published {{timeliness.this_year-1}} {{timeliness.this_year}} @@ -43,15 +44,16 @@

      Table of Frequency assessments

      - {% for publisher, publisher_title, per_month, assessment, hft in timeliness.publisher_frequency_sorted() %} + {% for publisher, publisher_title, per_month, assessment, hft, first_published_band in timeliness.publisher_frequency_sorted() %} {{publisher_title}} + {{ first_published_band }} {% for month in timeliness.previous_months_reversed %} {{per_month[month] or 0}} {% endfor %} {{per_month[timeliness.this_month] or 0}} {% if hft %}*{% endif %} - {{assessment}} + {{ assessment }} {% endfor %} @@ -269,3 +271,20 @@

      Pseudocode

      {% endblock %} + +{% block tablesorteroptions %} +{ + widgets: ['stickyHeaders'], + textExtraction: { + 1: function(node, table, cellIndex) { + return $(node).attr('data-index'); + }, + 15: function(node, table, cellIndex) { + return $(node).attr('data-severity'); + }, + 16: function(node, table, cellIndex) { + return $(node).attr('data-index'); + } + } +} +{% endblock %} diff --git a/templates/timeliness_base.html b/templates/timeliness_base.html index 9ce1f3471a..5219f186e1 100644 --- a/templates/timeliness_base.html +++ b/templates/timeliness_base.html @@ -34,14 +34,4 @@ {% endblock %} {% endblock %} - - -{% block tablesorteroptions %} -{ - widgets: ['stickyHeaders'], - textExtraction: { 14: function(node,table,cellIndex) { return $(node).attr('data-severity'); }, - 15: function(node,table,cellIndex) { return $(node).attr('data-index'); } } -} -{% endblock %} {% block tablesortertarget %}table#main_table{% endblock %} - diff --git a/templates/timeliness_timelag.html b/templates/timeliness_timelag.html index 8d17e97707..c03f44f5bb 100644 --- a/templates/timeliness_timelag.html +++ b/templates/timeliness_timelag.html @@ -202,3 +202,20 @@

      Pseudocode

      {% endblock %} + +{% block tablesorteroptions %} +{ + widgets: ['stickyHeaders'], + textExtraction: { + 4: function(node, table, cellIndex) { + return $(node).attr('data-index'); + }, + 14: function(node, table, cellIndex) { + return $(node).attr('data-severity'); + }, + 15: function(node, table, cellIndex) { + return $(node).attr('data-index'); + } + } +} +{% endblock %} diff --git a/timeliness.py b/timeliness.py index 5c208f42ff..f174be6497 100644 --- a/timeliness.py +++ b/timeliness.py @@ -92,16 +92,18 @@ def publisher_frequency(): if first_published >= previous_month_days[2]: # This is a publisher of less than 3 months - # if True in [ x in updates_per_month for x in previous_months[:3] ]: + first_published_band = 'Less than 3 months ago' frequency = 'Annual' elif first_published >= previous_month_days[5]: # This is a publisher of less than 6 months + first_published_band = 'Less than 6 months ago' if all([x in updates_per_month for x in previous_months[:3]]): frequency = 'Monthly' else: frequency = 'Annual' elif first_published >= previous_month_days[11]: # This is a publisher of less than 12 months + first_published_band = 'Less than 1 year ago' if [x in updates_per_month for x in previous_months[:6]].count(True) >= 4: frequency = 'Monthly' elif any([x in updates_per_month for x in previous_months[:3]]) and any([x in updates_per_month for x in previous_months[3:6]]): @@ -109,6 +111,7 @@ def publisher_frequency(): else: frequency = 'Annual' else: + first_published_band = 'More than 1 year ago' # This is a publisher of 1 year or more if ([x in updates_per_month for x in previous_months[:12]].count(True) >= 7) and ([x in updates_per_month for x in previous_months[:2]].count(True) >= 1): # Data updated in 7 or more of past 12 full months AND data updated at least once in last 2 full months. @@ -128,7 +131,7 @@ def publisher_frequency(): # If the publisher is in the list of current publishers, return a generator object if publisher in publisher_name: - yield publisher, publisher_name.get(publisher), updates_per_month, frequency, hft + yield publisher, publisher_name.get(publisher), updates_per_month, frequency, hft, first_published_band def frequency_index(frequency): @@ -150,13 +153,17 @@ def publisher_frequency_dict(): def publisher_frequency_summary(): - return Counter(frequency for _, _, _, frequency, _ in publisher_frequency()) + return Counter(frequency for _, _, _, frequency, _, _ in publisher_frequency()) def timelag_index(timelag): return ['One month', 'A quarter', 'Six months', 'One year', 'More than one year'].index(timelag) +def first_published_band_index(first_published_band): + return ['More than 1 year ago', 'Less than 1 year ago', 'Less than 6 months ago', 'Less than 3 months ago'].index(first_published_band) + + def publisher_timelag_sorted(): publisher_timelags = [(publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag'], has_future_transactions(publisher)) for publisher, agg in JSONDir('./stats-calculated/current/aggregated-publisher').items()] return sorted( From db17e6e23be2bee6e8a83218660dda55f8e1bfb2 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 13 May 2021 14:30:47 +0100 Subject: [PATCH 170/375] New CNAME for dev --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index dffd4f201d..48ff45a2bc 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -61,7 +61,7 @@ jobs: SINGLE_COMMIT: true - name: Set CNAME (dev) if: github.ref == 'refs/heads/dev' - run: echo "dev.analytics.codeforiati.org" > out/CNAME + run: echo "analytics-dev.codeforiati.org" > out/CNAME - name: Deploy (dev) 🚀 if: github.ref == 'refs/heads/dev' uses: JamesIves/github-pages-deploy-action@3.7.1 From df6753fabfe020ad5262e65afff6345312e1d284 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 13 May 2021 15:48:44 +0100 Subject: [PATCH 171/375] No need to set CNAME on production --- .github/workflows/build.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 48ff45a2bc..0eb9229d23 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -43,9 +43,6 @@ jobs: run: python make_html.py - name: Add static files to output run: cp -r static/* out - - name: Set CNAME (production) - if: github.ref == 'refs/heads/main' - run: echo "analytics.codeforiati.org" > out/CNAME - name: Deploy (production) 🚀 if: github.ref == 'refs/heads/main' uses: JamesIves/github-pages-deploy-action@3.7.1 From 90401cbbe8043092de1ef20246697ce8ebacb4cd Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 13 May 2021 22:44:44 +0100 Subject: [PATCH 172/375] Make tables full-width --- templates/element.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/element.html b/templates/element.html index b24b3a51c1..15ed1314d9 100644 --- a/templates/element.html +++ b/templates/element.html @@ -20,7 +20,7 @@

      Publishers

      -
      +

      Publishing this element

      @@ -54,7 +54,7 @@

      Publishing this element

      -
      +

      Not publishing this element

      From 5c453b0389ef0031038ea07c7f8c047fb1483b13 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 13 May 2021 22:55:03 +0100 Subject: [PATCH 173/375] Refactor --- timeliness.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/timeliness.py b/timeliness.py index f174be6497..49db8e89ab 100644 --- a/timeliness.py +++ b/timeliness.py @@ -45,7 +45,7 @@ def previous_months_generator(d): this_month = '{}-{}'.format(today.year, str(today.month).zfill(2)) # Store a list of the past 12 months from today -previous_month_days = [today - relativedelta(months=x) for x in range(1, 13)] +previous_month_days = [today - relativedelta(months=x) for x in range(13)] # Store the current month and year numbers this_month_number = datetime.datetime.today().month @@ -90,18 +90,18 @@ def publisher_frequency(): # Implement the assessment logic on https://analytics.codeforiati.org/timeliness.html#h_assesment - if first_published >= previous_month_days[2]: + if first_published >= previous_month_days[3]: # This is a publisher of less than 3 months first_published_band = 'Less than 3 months ago' frequency = 'Annual' - elif first_published >= previous_month_days[5]: + elif first_published >= previous_month_days[6]: # This is a publisher of less than 6 months first_published_band = 'Less than 6 months ago' if all([x in updates_per_month for x in previous_months[:3]]): frequency = 'Monthly' else: frequency = 'Annual' - elif first_published >= previous_month_days[11]: + elif first_published >= previous_month_days[12]: # This is a publisher of less than 12 months first_published_band = 'Less than 1 year ago' if [x in updates_per_month for x in previous_months[:6]].count(True) >= 4: From 4512fb7eb15523db53785a413928bbf664a9b128 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 13 May 2021 22:58:59 +0100 Subject: [PATCH 174/375] Add more granular first published bands --- timeliness.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/timeliness.py b/timeliness.py index 49db8e89ab..2dc8c0e705 100644 --- a/timeliness.py +++ b/timeliness.py @@ -46,6 +46,7 @@ def previous_months_generator(d): # Store a list of the past 12 months from today previous_month_days = [today - relativedelta(months=x) for x in range(13)] +previous_year_days = [today - relativedelta(years=x) for x in range(6)] # Store the current month and year numbers this_month_number = datetime.datetime.today().month @@ -96,14 +97,14 @@ def publisher_frequency(): frequency = 'Annual' elif first_published >= previous_month_days[6]: # This is a publisher of less than 6 months - first_published_band = 'Less than 6 months ago' + first_published_band = '3-6 months ago' if all([x in updates_per_month for x in previous_months[:3]]): frequency = 'Monthly' else: frequency = 'Annual' elif first_published >= previous_month_days[12]: # This is a publisher of less than 12 months - first_published_band = 'Less than 1 year ago' + first_published_band = '6-12 months ago' if [x in updates_per_month for x in previous_months[:6]].count(True) >= 4: frequency = 'Monthly' elif any([x in updates_per_month for x in previous_months[:3]]) and any([x in updates_per_month for x in previous_months[3:6]]): @@ -111,7 +112,12 @@ def publisher_frequency(): else: frequency = 'Annual' else: - first_published_band = 'More than 1 year ago' + if first_published >= previous_year_days[3]: + first_published_band = '1-3 years ago' + elif first_published >= previous_year_days[5]: + first_published_band = '3-5 years ago' + else: + first_published_band = 'More than 5 years ago' # This is a publisher of 1 year or more if ([x in updates_per_month for x in previous_months[:12]].count(True) >= 7) and ([x in updates_per_month for x in previous_months[:2]].count(True) >= 1): # Data updated in 7 or more of past 12 full months AND data updated at least once in last 2 full months. @@ -161,7 +167,7 @@ def timelag_index(timelag): def first_published_band_index(first_published_band): - return ['More than 1 year ago', 'Less than 1 year ago', 'Less than 6 months ago', 'Less than 3 months ago'].index(first_published_band) + return ['More than 5 years ago', '3-5 years ago', '1-3 years ago', '6-12 months ago', '3-6 months ago', 'Less than 3 months ago'].index(first_published_band) def publisher_timelag_sorted(): From 10314a66c85c9964e36683bc3413595a660f3fbc Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Fri, 14 May 2021 10:55:40 +0100 Subject: [PATCH 175/375] Refactor timelag --- timeliness.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/timeliness.py b/timeliness.py index 2dc8c0e705..5cc1b70848 100644 --- a/timeliness.py +++ b/timeliness.py @@ -170,19 +170,18 @@ def first_published_band_index(first_published_band): return ['More than 5 years ago', '3-5 years ago', '1-3 years ago', '6-12 months ago', '3-6 months ago', 'Less than 3 months ago'].index(first_published_band) +def publisher_timelag(): + return [(publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag'], has_future_transactions(publisher)) for publisher, agg in JSONDir('./stats-calculated/current/aggregated-publisher').items()] + + def publisher_timelag_sorted(): - publisher_timelags = [(publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag'], has_future_transactions(publisher)) for publisher, agg in JSONDir('./stats-calculated/current/aggregated-publisher').items()] return sorted( - publisher_timelags, + publisher_timelag(), key=lambda tup: (timelag_index(tup[3]), tup[1])) def publisher_timelag_dict(): - publisher_timelags = [(publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag'], has_future_transactions(publisher)) for publisher, agg in JSONDir('./stats-calculated/current/aggregated-publisher').items()] - data = {} - for v in publisher_timelags: - data[v[0]] = v - return data + return {v[0]: v for v in publisher_timelag()} def publisher_timelag_summary(): From 98327a5e2bc37afec83b055728ddbf5403897ee5 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sat, 15 May 2021 17:47:27 +0100 Subject: [PATCH 176/375] Build on repository dispatch, instead of schedule --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0eb9229d23..5eb9159a51 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -3,8 +3,8 @@ on: push: branches: - dev - schedule: - - cron: '0 8 * * *' + repository_dispatch: + types: [build] jobs: build: runs-on: ubuntu-latest From ccb2033b85429a9f6957a8154b1a039fdd19108c Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 17 May 2021 10:11:21 +0100 Subject: [PATCH 177/375] Refactor --- .github/workflows/build.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5eb9159a51..4c3d190f76 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -4,7 +4,8 @@ on: branches: - dev repository_dispatch: - types: [build] + types: + - build jobs: build: runs-on: ubuntu-latest From 50d1792bba5e23f989827f6afb6e1d8146a68234 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 17 May 2021 10:54:13 +0100 Subject: [PATCH 178/375] Point to C4I reference and codelist sites --- make_html.py | 6 +++--- templates/codelist.html | 2 +- templates/comprehensiveness_base.html | 14 +++++++------- templates/coverage.html | 4 ++-- templates/publisher.html | 2 +- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/make_html.py b/make_html.py index b5e25b6c32..fee23ec112 100644 --- a/make_html.py +++ b/make_html.py @@ -71,11 +71,11 @@ def firstint(s): def xpath_to_url(path): path = path.strip('./') if path.startswith('iati-activity'): - return 'http://iatistandard.org/activity-standard/iati-activities/' + path.split('@')[0] + return 'https://reference.codeforiati.org/activity-standard/iati-activities/' + path.split('@')[0] elif path.startswith('iati-organisation'): - return 'http://iatistandard.org/organisation-standard/iati-organisations/' + path.split('@')[0] + return 'https://reference.codeforiati.org/organisation-standard/iati-organisations/' + path.split('@')[0] else: - return 'http://iatistandard.org/activity-standard/iati-activities/iati-activity/' + path.split('@')[0] + return 'https://reference.codeforiati.org/activity-standard/iati-activities/iati-activity/' + path.split('@')[0] def registration_agency(orgid): diff --git a/templates/codelist.html b/templates/codelist.html index f4cdb19c3b..5c7ec0a079 100644 --- a/templates/codelist.html +++ b/templates/codelist.html @@ -9,7 +9,7 @@

      Codelist values used for {{element}}

      Who uses {{codelist_mapping[major_version].get(element)}} in {{element}}?

      (for files published to version {{major_version}}.xx of the standard)

      (This page in JSON format)

      -

      Values should be on the {{codelist_mapping[major_version].get(element)}} codelist.

      +

      Values should be on the {{codelist_mapping[major_version].get(element)}} codelist.

      {% endblock %} {% block content %} diff --git a/templates/comprehensiveness_base.html b/templates/comprehensiveness_base.html index aabd01f9e5..c6c7a1ff1e 100644 --- a/templates/comprehensiveness_base.html +++ b/templates/comprehensiveness_base.html @@ -229,7 +229,7 @@

      Pseudocode

      Core Version iati-activities/@version must exist for the file the activity is in - iati-activities/@version must be on the Version codelist + iati-activities/@version must be on the Version codelist Core @@ -265,7 +265,7 @@

      Pseudocode

      Core Status Activity must contain an activity-status element - activity-status/@code must be on the ActivityStats codelist + activity-status/@code must be on the ActivityStats codelist Core @@ -304,7 +304,7 @@

      Pseudocode

      Financials Transaction - Currency All transactions must have value/@currency OR the activity must have a @default-currency attribute. All transactions must have value/@value-date - All currency values must be on the Currency codelist. value/@value-date must be valid xs:date. + All currency values must be on the Currency codelist. value/@value-date must be valid xs:date. Financials @@ -316,7 +316,7 @@

      Pseudocode

      Financials Budget Activity must have at least 1 budget element OR the activity must have the budget-not-provided attribute AND no budget elements - Each budget element must contain period-start/@iso-date and period-end/@iso-date and value/@value-date that are valid xs:dates AND a value element that is a valid xs:decimal OR the activity element must have a valid iati-activity/@budget-not-provided attribute under the BudgetNotProvided codelist AND no budget elements + Each budget element must contain period-start/@iso-date and period-end/@iso-date and value/@value-date that are valid xs:dates AND a value element that is a valid xs:decimal OR the activity element must have a valid iati-activity/@budget-not-provided attribute under the BudgetNotProvided codelist AND no budget elements {% endblock %} @@ -343,7 +343,7 @@

      Pseudocode

      Value added DAC Sectors At least 1 sector where @vocabulary is DAC or DAC-3 (1.xx) or 1 or 2 (2.xx), must be reported at activity level, unless there is no @vocabulary attribute, in which case DAC/1 is assumed. If there is no DAC sector element at activity level it must be reported within all transactions contained within that activity. - Must be valid code on the DAC or DAC-3 lists as appropriate. + Must be valid code on the DAC or DAC-3 lists as appropriate. Value added @@ -361,12 +361,12 @@

      Pseudocode

      Value added Aid Type Activity must contain either i) a value in default-aid-type/@code or ii) each transaction must contain a value in transaction/aid-type/@code. - Must be valid code on the AidType codelist. + Must be valid code on the AidType codelist. Value added Recipient Language - Only activities containing only one recipient-country are assessed. Activity must contain title and description elements containing at least one of the official languages spoken for the defined recipient-country/@code. + Only activities containing only one recipient-country are assessed. Activity must contain title and description elements containing at least one of the official languages spoken for the defined recipient-country/@code. diff --git a/templates/coverage.html b/templates/coverage.html index fa821b8de3..3fd56f256a 100644 --- a/templates/coverage.html +++ b/templates/coverage.html @@ -23,8 +23,8 @@

      The coverage stats page is being rebuilt.


      - In future, we plan to use an automated system that will calculate the coverage statistic for an organisation by using its IATI organisation file. - We are exploring a methodology that uses the <total-expenditure> element in a publisher’s organisation file, and compares this to the total spend for a given year in their activity files. + In future, we plan to use an automated system that will calculate the coverage statistic for an organisation by using its IATI organisation file. + We are exploring a methodology that uses the <total-expenditure> element in a publisher’s organisation file, and compares this to the total spend for a given year in their activity files. Completing this work requires an update to the methodology. This is in our job queue but we don’t have an estimated time for completing the work. Please look out for updates via IATI Discuss and our website.

      diff --git a/templates/publisher.html b/templates/publisher.html index df5a43e934..77ffaac761 100644 --- a/templates/publisher.html +++ b/templates/publisher.html @@ -291,7 +291,7 @@

      Financial

      Budgets

      -

      The below figures are calculated based on the data contained within the <budget> element for each reported activity. Original and revised elements are based on the value declared in the budget/@type attribute. Where budgets fall across two calendar years, the month of the <period-end> date is used to determine annual groupings, with budgets for periods ending January-June added to the previous calendar year.

      +

      The below figures are calculated based on the data contained within the <budget> element for each reported activity. Original and revised elements are based on the value declared in the budget/@type attribute. Where budgets fall across two calendar years, the month of the <period-end> date is used to determine annual groupings, with budgets for periods ending January-June added to the previous calendar year.

      From 477489c7203cfee10f2225c1582615c891a89dd0 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 17 May 2021 11:00:28 +0100 Subject: [PATCH 179/375] element -> element/attribute --- templates/elements.html | 12 ++++++------ text.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/templates/elements.html b/templates/elements.html index 3a930c520a..21b234d678 100644 --- a/templates/elements.html +++ b/templates/elements.html @@ -6,8 +6,8 @@ @@ -18,11 +18,11 @@
      -

      Usage of an IATI Element by: +

      Usage of IATI elements/attributes by:

        -
      • Publisher: Total number of publishers that use this element (at least once)
      • -
      • Activities / Orgs: total number of iati-activity or iati-organisation (where applicable) that use this element.
      • -
      • Total - Count of total instances in which this element is used, across all publishers.
      • +
      • Publishers: Total number of publishers that use this element/attribute (at least once)
      • +
      • Activities/Orgs: total number of iati-activity or iati-organisation (where applicable) that use this element/attribute.
      • +
      • Total - Count of total instances in which this element/attribute is used, across all publishers.

      {% include '_partials/tablesorter_instructions.html' %} diff --git a/text.py b/text.py index bee12c7be5..62fd37b28e 100644 --- a/text.py +++ b/text.py @@ -76,7 +76,7 @@ 'organisation': 'Checking the IATI Registry for files that have iati-organisations as the root element. IATI Organisation files contain general information about the organisations in the delivery chain.', 'identifiers': 'Checking the iati-identifier element for duplicate values per publisher. A duplicate appears if a publisher creates two activities with the same identifier.', 'reporting_orgs': 'Checking the reporting-org identifiers in IATI data.', - 'elements': 'Checking usage of all elements within the IATI Standard.', + 'elements': 'Checking usage of all elements/attributes within the IATI Standard.', 'codelists': 'Checking usage of codelists across IATI data files.', 'booleans': 'Checking usage of booleans across IATI data files. Booleans are values that are either true or false. In XML true or 1 can be used for true and false or 0 can be used for false.', } From 655ca15ec2dbd706ac8fe9ee6fc5d79f32e60487 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 17 May 2021 11:03:53 +0100 Subject: [PATCH 180/375] Fix link to CSV --- templates/elements.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/elements.html b/templates/elements.html index 21b234d678..9b987ae6ba 100644 --- a/templates/elements.html +++ b/templates/elements.html @@ -7,7 +7,7 @@
      From b521f050b7e26141d9d8c911f48e0878b069b466 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 17 May 2021 11:04:47 +0100 Subject: [PATCH 181/375] Whitespace --- templates/_partials/boxes.html | 8 +- templates/activities.html | 12 +-- templates/base.html | 10 +- templates/codelist.html | 26 ++--- templates/codelists.html | 16 +-- templates/comprehensiveness_base.html | 10 +- templates/dates.html | 10 +- templates/download.html | 6 +- templates/element.html | 38 +++---- templates/elements.html | 16 +-- templates/files.html | 34 +++---- templates/forwardlooking.html | 10 +- templates/humanitarian.html | 10 +- templates/identifiers.html | 6 +- templates/index.html | 26 ++--- templates/license.html | 8 +- templates/licenses.html | 8 +- templates/organisation.html | 8 +- templates/publisher.html | 136 +++++++++++++------------- templates/publishers.html | 40 ++++---- templates/registration_agencies.html | 14 +-- templates/reporting_orgs.html | 8 +- templates/summary_stats.html | 10 +- templates/timeliness.html | 24 ++--- templates/timeliness_timelag.html | 26 ++--- templates/validation.html | 18 ++-- templates/versions.html | 18 ++-- templates/xml.html | 20 ++-- 28 files changed, 288 insertions(+), 288 deletions(-) diff --git a/templates/_partials/boxes.html b/templates/_partials/boxes.html index c08ae6b237..80c8e443d0 100644 --- a/templates/_partials/boxes.html +++ b/templates/_partials/boxes.html @@ -4,16 +4,16 @@

      {% set title_id = title.replace(' ', '-').lower() %} - {{number}} - {{title}} + {{ number }} + {{ title }}

      {% if json %} - (J) + (J) {% endif %}
      -

      {{description|safe}}

      +

      {{ description|safe }}

      {% if legend %} diff --git a/templates/activities.html b/templates/activities.html index 76f2797d2d..55c19c10a0 100644 --- a/templates/activities.html +++ b/templates/activities.html @@ -2,13 +2,13 @@ {% import '_partials/boxes.html' as boxes %} {% block content %}
      - {{boxes.box('Total activities', current_stats.aggregated.activities, 'activities.png', 'activities.json', + {{ boxes.box('Total activities', current_stats.aggregated.activities, 'activities.png', 'activities.json', description='Total count of activities across all publishers, over time. - Note: this includes activities with duplicate iati-identifier')}} - {{boxes.box('Unique Activities', current_stats.aggregated.unique_identifiers, 'unique_identifiers.png', 'unique_identifiers.json', + Note: this includes activities with duplicate iati-identifier') }} + {{ boxes.box('Unique Activities', current_stats.aggregated.unique_identifiers, 'unique_identifiers.png', 'unique_identifiers.json', description='Total count of unique activities across all publishers, over time - Note: this excludes counts of duplicate iati-identifier')}} - {{boxes.box('Activities by publisher type', '', 'activities_per_publisher_type.png', None, 'activities_per_publisher_type_legend.png', - description='Count of all activities, aggregated by publisher type, over time.')}} + Note: this excludes counts of duplicate iati-identifier') }} + {{ boxes.box('Activities by publisher type', '', 'activities_per_publisher_type.png', None, 'activities_per_publisher_type_legend.png', + description='Count of all activities, aggregated by publisher type, over time.') }}
      {% endblock %} diff --git a/templates/base.html b/templates/base.html index 9a1d8428d2..b9829462dd 100644 --- a/templates/base.html +++ b/templates/base.html @@ -90,7 +90,7 @@ } - {% block title %}Code for IATI Analytics - {{page_titles[page]}}{% endblock %} + {% block title %}Code for IATI Analytics - {{ page_titles[page] }}{% endblock %} {% block extrahead %}{% endblock %} @@ -117,7 +117,7 @@ @@ -128,9 +128,9 @@ {% block page_header_div %} {% endblock %} diff --git a/templates/codelist.html b/templates/codelist.html index 5c7ec0a079..2c37fbedaa 100644 --- a/templates/codelist.html +++ b/templates/codelist.html @@ -5,11 +5,11 @@ {% endblock %} {% block page_header %} -

      Codelist values used for {{element}}

      -

      Who uses {{codelist_mapping[major_version].get(element)}} in {{element}}?

      -

      (for files published to version {{major_version}}.xx of the standard)

      -

      (This page in JSON format)

      -

      Values should be on the {{codelist_mapping[major_version].get(element)}} codelist.

      +

      Codelist values used for {{ element }}

      +

      Who uses {{ codelist_mapping[major_version].get(element) }} in {{ element }}?

      +

      (for files published to version {{ major_version }}.xx of the standard)

      +

      (This page in JSON format)

      +

      Values should be on the {{ codelist_mapping[major_version].get(element) }} codelist.

      {% endblock %} {% block content %} @@ -20,9 +20,9 @@

      Codelist values used for {{element}}

      {% if elements|count > 1 %} Other elements/attributes on this codelist:
        - {% for el in elements%} + {% for el in elements %} {% if el in current_stats.inverted_publisher.codelist_values[major_version].keys() %} - {% if el!=element%}
      • {{el}}
      • {% endif %} + {% if el!=element %}
      • {{ el }}
      • {% endif %} {% endif %} {% endfor %}
      @@ -38,15 +38,15 @@

      Codelist values used for {{element}}

      On Codelist

      -

      Codes that are on the {{codelist_mapping[major_version].get(element)}} codelist.

      +

      Codes that are on the {{ codelist_mapping[major_version].get(element) }} codelist.

      {% for value, publishers in values.items() %} {% if value in codelist_sets[major_version].get(codelist_mapping[major_version].get(element)) %} - {% endif %} {% endfor %} @@ -61,15 +61,15 @@

      On Codelist

      Not On Codelist

      -

      Codes that are not on the {{codelist_mapping[major_version].get(element)}} codelist.

      +

      Codes that are not on the {{ codelist_mapping[major_version].get(element) }} codelist.

      ValuePublishers
      {{value}} - {{publishers|length}} +
      {{ value }} + {{ publishers|length }}
      {% for value, publishers in values.items() %} {% if not value in codelist_sets[major_version].get(codelist_mapping[major_version].get(element)) %} - {% endif %} {% endfor %} diff --git a/templates/codelists.html b/templates/codelists.html index c3096d9ea7..cf687bf23f 100644 --- a/templates/codelists.html +++ b/templates/codelists.html @@ -10,7 +10,7 @@ {% for major_version in MAJOR_VERSIONS %}
      -

      Codelists for version {{major_version}}.xx

      +

      Codelists for version {{ major_version }}.xx

      {% if major_version not in current_stats.inverted_publisher.codelist_values_by_major_version %} There are no publishers using {{ major_version }}.xx codelists yet. @@ -19,7 +19,7 @@

      Codelists for version {{major_version}}.xx

      - + @@ -27,15 +27,15 @@

      Codelists for version {{major_version}}.xx

      {% for i, (element, values) in enumerate(current_stats.inverted_publisher.codelist_values_by_major_version[major_version].items()) %} - - - - + + + + {% with codes=sorted(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)).intersection(get_codelist_values(values))) %} - + {% endwith %} {% with codes=sorted(set(get_codelist_values(values)).difference(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)))) %} - + {% endwith %} {% endfor %} diff --git a/templates/comprehensiveness_base.html b/templates/comprehensiveness_base.html index c6c7a1ff1e..9c9e787d0c 100644 --- a/templates/comprehensiveness_base.html +++ b/templates/comprehensiveness_base.html @@ -33,7 +33,7 @@ {% block content %}
      - (This table as CSV) + (This table as CSV)

      {% block table_title %}Table of Comprehensiveness values{% endblock %}

      @@ -49,19 +49,19 @@

      {% block table_title %}Table of Comprehensiveness values

      {% for column_header in comprehensiveness.column_headers[tab] %} - + {% endfor %} {% for row in comprehensiveness.table() %} - + {% for column_slug in comprehensiveness.column_slugs[tab] %} + ({{ row[column_slug] }}) {% endif %} {% else %}-{% endif %} {% endfor %} diff --git a/templates/dates.html b/templates/dates.html index 0b90051933..ba81fd3adb 100644 --- a/templates/dates.html +++ b/templates/dates.html @@ -19,11 +19,11 @@ {% for publisher_title,publisher in publishers_ordered_by_title %} {% set publisher_stats = get_publisher_stats(publisher) %} - - - - - + + + + + {% endfor %} diff --git a/templates/download.html b/templates/download.html index 45a92752b3..fd3a20a1ba 100644 --- a/templates/download.html +++ b/templates/download.html @@ -2,15 +2,15 @@ {% import '_partials/boxes.html' as boxes %} {% block content %}
      - {{boxes.box('Files that fail to download', current_stats.download_errors|length, 'failed_downloads.png', - description='Count of files that fail to download, over time.')}} + {{ boxes.box('Files that fail to download', current_stats.download_errors|length, 'failed_downloads.png', + description='Count of files that fail to download, over time.') }}

      History of Download Errors

      -

      This table as JSON

      +

      This table as JSON

      diff --git a/templates/element.html b/templates/element.html index 15ed1314d9..5f5b923d2f 100644 --- a/templates/element.html +++ b/templates/element.html @@ -5,9 +5,9 @@ {% endblock %} {% block page_header %} -

      Usage of {{element}}

      -

      Who uses {{element}}?

      -

      Checking usage of {{element}} across publishers, files and activities.

      +

      Usage of {{ element }}

      +

      Who uses {{ element }}?

      +

      Checking usage of {{ element }} across publishers, files and activities.

      {% endblock %} {% block content %} @@ -15,7 +15,7 @@

      Usage of {{element}}

      Publishers

      -

      (In JSON format)

      +

      (In JSON format)

      @@ -37,18 +37,18 @@

      Publishing this element

      {% for publisher in sorted(publishers) %} - - {% with publisher_inverted=get_publisher_stats(publisher, 'inverted-file')%} - + + {% with publisher_inverted=get_publisher_stats(publisher, 'inverted-file') %} + {% endwith %} - - {% with publisher_stats=get_publisher_stats(publisher)%} - - + + {% with publisher_stats=get_publisher_stats(publisher) %} + + {% endwith %} - + - {% endfor%} + {% endfor %}
      ValuePublishers
      {{value}} - {{publishers|length}} +
      {{ value }} + {{ publishers|length }}
      Element/Attribute on codelist CodelistTotal Values Used (J)Total Values Used (J) Total Values on Codelist Values used, on Codelist Values used, not on Codelist
      {{element}}{{codelist_mapping[major_version].get(element)}}{{values|length}}{{codelist_sets[major_version].get(codelist_mapping[major_version].get(element))|length}}{{ element }}{{ codelist_mapping[major_version].get(element) }}{{ values|length }}{{ codelist_sets[major_version].get(codelist_mapping[major_version].get(element))|length }}{{codes|length}}{{ codes|length }}{{codes|length}}{{ codes|length }}
      Publisher Name{{column_header}}{{ column_header }}
      {{row.publisher_title}}{{ row.publisher_title }}{% if column_slug in row %} - {{row[column_slug+'_valid']}} + {{ row[column_slug+'_valid'] }} {% if row[column_slug+'_valid'] != row[column_slug] %} - ({{row[column_slug]}})
      {{publisher_title}}{% if publisher_stats.date_extremes.min.overall %}{{publisher_stats.date_extremes.min.overall}}{% endif %}{% if publisher_stats.date_extremes.max.overall %}{{publisher_stats.date_extremes.max.overall}}{% endif %}{% if publisher_stats.date_extremes.max.by_type['start-actual'] %}{{publisher_stats.date_extremes.max.by_type['start-actual'] }}{% endif %}{% if publisher_stats.date_extremes.max.by_type['end-actual'] %}{{publisher_stats.date_extremes.max.by_type['end-actual'] }}{% endif %}{{ publisher_title }}{% if publisher_stats.date_extremes.min.overall %}{{ publisher_stats.date_extremes.min.overall }}{% endif %}{% if publisher_stats.date_extremes.max.overall %}{{ publisher_stats.date_extremes.max.overall }}{% endif %}{% if publisher_stats.date_extremes.max.by_type['start-actual'] %}{{ publisher_stats.date_extremes.max.by_type['start-actual'] }}{% endif %}{% if publisher_stats.date_extremes.max.by_type['end-actual'] %}{{ publisher_stats.date_extremes.max.by_type['end-actual'] }}{% endif %}
      {{publisher}}{% if 'elements' in publisher_inverted %}{{publisher_inverted.elements[element]|count}}{% endif %}{{ publisher }}{% if 'elements' in publisher_inverted %}{{ publisher_inverted.elements[element]|count }}{% endif %}{{current_stats.inverted_publisher.activity_files.get(publisher)+current_stats.inverted_publisher.organisation_files.get(publisher)}}{{publisher_stats.elements[element]}}{{publisher_stats.elements_total[element]}}{{ current_stats.inverted_publisher.activity_files.get(publisher)+current_stats.inverted_publisher.organisation_files.get(publisher) }}{{ publisher_stats.elements[element] }}{{ publisher_stats.elements_total[element] }}{{current_stats.inverted_publisher.activities[publisher]}}{{ current_stats.inverted_publisher.activities[publisher] }}
      @@ -70,13 +70,13 @@

      Not publishing this element

      {% for publisher in current_stats.inverted_publisher.publishers %} {% if publisher not in publishers %} - {{publisher}} - {{current_stats.inverted_publisher.activity_files.get(publisher)+current_stats.inverted_publisher.organisation_files.get(publisher)}} - {{current_stats.inverted_publisher.activities[publisher]}} - {{current_stats.inverted_publisher.organisations[publisher]}} + {{ publisher }} + {{ current_stats.inverted_publisher.activity_files.get(publisher)+current_stats.inverted_publisher.organisation_files.get(publisher) }} + {{ current_stats.inverted_publisher.activities[publisher] }} + {{ current_stats.inverted_publisher.organisations[publisher] }} {% endif %} - {% endfor%} + {% endfor %}
      @@ -97,7 +97,7 @@

      Files

      {% for publisher in current_stats.inverted_file_publisher %} {% with datasets = current_stats.inverted_file_publisher[publisher].elements.get(element) %} {% if datasets %} - {{publisher}} + {{ publisher }} {% for dataset in datasets.keys() %} {{ dataset[:-4] }} {% endfor %} diff --git a/templates/elements.html b/templates/elements.html index 9b987ae6ba..af532be3aa 100644 --- a/templates/elements.html +++ b/templates/elements.html @@ -30,19 +30,19 @@ - - - + + + {% for i, (element,publishers) in enumerate(current_stats.inverted_publisher.elements.items()) %} - - - - + + + + - {% endfor%} + {% endfor %}
      Element/AttributePublishers (J)Activities/Orgs with element (J)Total Instances of Element (J)Publishers (J)Activities/Orgs with element (J)Total Instances of Element (J)
      {{element}}{{publishers|length}}{{current_stats.aggregated.elements[element]}}{{current_stats.aggregated.elements_total[element]}}{{ element }}{{ publishers|length }}{{ current_stats.aggregated.elements[element] }}{{ current_stats.aggregated.elements_total[element] }}
      diff --git a/templates/files.html b/templates/files.html index 7f8b6d0be1..a01b06b9f5 100644 --- a/templates/files.html +++ b/templates/files.html @@ -2,17 +2,17 @@ {% import '_partials/boxes.html' as boxes %} {% block content %}
      - {{boxes.box('Total activity files', current_stats.aggregated.activity_files, 'activity_files.png', 'activity_files.json', - description='Count of total number of activity files over time.')}} - {{boxes.box('Total organisation files', current_stats.aggregated.organisation_files, 'organisation_files.png', 'organisation_files.json', - description='Count of total number of organisation files, over time.')}} + {{ boxes.box('Total activity files', current_stats.aggregated.activity_files, 'activity_files.png', 'activity_files.json', + description='Count of total number of activity files over time.') }} + {{ boxes.box('Total organisation files', current_stats.aggregated.organisation_files, 'organisation_files.png', 'organisation_files.json', + description='Count of total number of organisation files, over time.') }}
      - {{boxes.box('Total File Size', current_stats.aggregated.file_size|filesizeformat, 'file_size.png', 'file_size.json')}} + {{ boxes.box('Total File Size', current_stats.aggregated.file_size|filesizeformat, 'file_size.png', 'file_size.json') }}
      - (J) + (J)

      File Sizes

      {% include '_partials/tablesorter_instructions.html' %} @@ -20,8 +20,8 @@

      File Sizes

      {% for bin,freq in sorted(current_stats.aggregated.file_size_bins.items(), key=firstint) %} - {{bin}} - {{freq}} + {{ bin }} + {{ freq }} {% endfor %} @@ -38,19 +38,19 @@

      File Sizes

      Publisher Package - Activities (J) - Organisations (J) - File Size (J) + Activities (J) + Organisations (J) + File Size (J) {% for package, activities in current_stats.inverted_file.activities.items() %} - {{publisher_name[package[:-4]|dataset_to_publisher]}} - {{package[:-4]}} - {{activities}} - {{current_stats.inverted_file.organisations.get(package)}} - {{current_stats.inverted_file.file_size.get(package)|filesizeformat}} + {{ publisher_name[package[:-4]|dataset_to_publisher] }} + {{ package[:-4] }} + {{ activities }} + {{ current_stats.inverted_file.organisations.get(package) }} + {{ current_stats.inverted_file.file_size.get(package)|filesizeformat }} {% endfor %} @@ -60,5 +60,5 @@

      File Sizes

      {% endblock %} {% block tablesorteroptions %} -{textExtraction:{4: function(node,table,cellIndex) { return $(node).attr('data-bytes'); }}} +{textExtraction:{4: function(node,table,cellIndex) { return $(node).attr('data-bytes'); } }} {% endblock %} diff --git a/templates/forwardlooking.html b/templates/forwardlooking.html index c730a45be0..3183319f09 100644 --- a/templates/forwardlooking.html +++ b/templates/forwardlooking.html @@ -22,7 +22,7 @@
      - (This table as CSV) + (This table as CSV)

      Activities with Forward Looking Budget Allocations

      @@ -48,14 +48,14 @@

      Activities with Forward Looking Budget Allocations

      Publisher Name {% for column_header in forwardlooking.column_headers %} - {{column_header}} + {{ column_header }} {% endfor %} {% for i in range(0,3) %} {% for year in forwardlooking.years %} - {{year}} + {{ year }} {% endfor %} {% endfor %} @@ -63,12 +63,12 @@

      Activities with Forward Looking Budget Allocations

      {% for row in forwardlooking.table() %} - {{row.publisher_title}} + {{ row.publisher_title }} {% for column in row.year_columns %} {% for year in forwardlooking.years %} - {{column[year]}} + {{ column[year] }} {% endfor %} {% endfor %} diff --git a/templates/humanitarian.html b/templates/humanitarian.html index 9f2a34d9c4..663d80b1a5 100644 --- a/templates/humanitarian.html +++ b/templates/humanitarian.html @@ -10,7 +10,7 @@
      @@ -28,20 +28,20 @@

      Humanitarian

      Publisher Name {% for _, column_header in humanitarian.columns %} - {{column_header}} + {{ column_header }} {% endfor %} {% for row in humanitarian.table() %} - {{row.publisher_title}} + {{ row.publisher_title }} {% for column_slug, _ in humanitarian.columns %} {%- if column_slug == 'publisher_type' -%} - {{row[column_slug]}} + {{ row[column_slug] }} {%- else -%} - {{row[column_slug]|int}} + {{ row[column_slug]|int }} {%- endif -%} {% endfor %} diff --git a/templates/identifiers.html b/templates/identifiers.html index 291e6be8c7..b69afaacd9 100644 --- a/templates/identifiers.html +++ b/templates/identifiers.html @@ -28,9 +28,9 @@

      {% set publisher_stats = get_publisher_stats(publisher) %} {% if publisher_stats.publisher_duplicate_identifiers|count != 0 %} - {{publisher_title}} - {{publisher_stats.publisher_duplicate_identifiers|length}} - {{publisher_stats.publisher_duplicate_identifiers.values()|sum}} + {{ publisher_title }} + {{ publisher_stats.publisher_duplicate_identifiers|length }} + {{ publisher_stats.publisher_duplicate_identifiers.values()|sum }} {% endif %} {% endfor %} diff --git a/templates/index.html b/templates/index.html index 1203c349bc..51fee9bb8e 100644 --- a/templates/index.html +++ b/templates/index.html @@ -18,7 +18,7 @@

      - {{current_stats.aggregated.activities}} + {{ current_stats.aggregated.activities }} Activities @@ -26,7 +26,7 @@

      - {{current_stats.aggregated.iati_identifiers|count}} + {{ current_stats.aggregated.iati_identifiers|count }} Unique Activities @@ -34,7 +34,7 @@

      - {{current_stats.aggregated.publishers}} + {{ current_stats.aggregated.publishers }} Publishers @@ -42,7 +42,7 @@

      - {{current_stats.aggregated.activity_files}} + {{ current_stats.aggregated.activity_files }} Activity Files @@ -50,7 +50,7 @@

      - {{current_stats.aggregated.organisation_files}} + {{ current_stats.aggregated.organisation_files }} Organisation Files @@ -58,7 +58,7 @@

      - {{current_stats.aggregated.file_size|filesizeformat}} + {{ current_stats.aggregated.file_size|filesizeformat }} Total File Size @@ -66,7 +66,7 @@

      - {{current_stats.download_errors|length}} + {{ current_stats.download_errors|length }} Files Fail to Download @@ -74,7 +74,7 @@

      - {{current_stats.aggregated.invalidxml}} + {{ current_stats.aggregated.invalidxml }} Files where XML is not well-formed @@ -82,21 +82,21 @@

      - {{current_stats.aggregated.nonstandardroots}} + {{ current_stats.aggregated.nonstandardroots }} Files with Nonstandard Roots - {{current_stats.aggregated.toolarge}} + {{ current_stats.aggregated.toolarge }} Files that are too large to be processed - {{current_stats.aggregated.validation.fail}} + {{ current_stats.aggregated.validation.fail }} Files don't validate against the schema @@ -104,7 +104,7 @@

      - {{current_stats.aggregated.publishers_validation.fail}} + {{ current_stats.aggregated.publishers_validation.fail }} Publishers have non-validating files @@ -112,7 +112,7 @@

      - {{current_stats.aggregated.publisher_has_org_file.no}} + {{ current_stats.aggregated.publisher_has_org_file.no }} Publishers with no organisation file diff --git a/templates/license.html b/templates/license.html index 60f68f3a28..07e42b8918 100644 --- a/templates/license.html +++ b/templates/license.html @@ -1,8 +1,8 @@ {% extends 'base.html' %} {% import '_partials/boxes.html' as boxes %} {% block page_header %} -

      {{license_names[license]}}

      -

      License id: {{license}}

      +

      {{ license_names[license] }}

      +

      License id: {{ license }}

      {% endblock %} {% block content %} @@ -20,8 +20,8 @@

      {{license_names[license]}}

      {% for publisher, files in publisher_counts %} - {{publisher}} - {{files}} + {{ publisher }} + {{ files }} {% endfor %} diff --git a/templates/licenses.html b/templates/licenses.html index efa1a47c9c..ff03473513 100644 --- a/templates/licenses.html +++ b/templates/licenses.html @@ -20,10 +20,10 @@ {% for license, files in sorted(license_count.items()) %} - {{license_names[license]}} - {{license}} - {{files}} - {{publisher_license_count[license]}} + {{ license_names[license] }} + {{ license }} + {{ files }} + {{ publisher_license_count[license] }} {% endfor %} diff --git a/templates/organisation.html b/templates/organisation.html index 999db55c6a..be1c3439e1 100644 --- a/templates/organisation.html +++ b/templates/organisation.html @@ -2,22 +2,22 @@ {% import '_partials/boxes.html' as boxes %} {% block content %}
      - {{boxes.box('Publishers without an Organisation File', current_stats.aggregated.publisher_has_org_file.no, 'publisher_has_org_file.png', 'publisher_has_org_file.json', - description='Count of publishers without an organisation file, over time.')}} + {{ boxes.box('Publishers without an Organisation File', current_stats.aggregated.publisher_has_org_file.no, 'publisher_has_org_file.png', 'publisher_has_org_file.json', + description='Count of publishers without an organisation file, over time.') }}

      List of publishers without an Organisation File

      - (J) + (J)

      The following publishers do not have an organisation file listed on the IATI Registry.

      diff --git a/templates/publisher.html b/templates/publisher.html index 77ffaac761..63bd29a5e6 100644 --- a/templates/publisher.html +++ b/templates/publisher.html @@ -1,11 +1,11 @@ {% extends 'base.html' %} {% import '_partials/boxes.html' as boxes %} {% block title %} -{{ super () }} Publisher: {{publisher_name[publisher]}} +{{ super () }} Publisher: {{ publisher_name[publisher] }} {% endblock %} {% block page_header %} -(Publisher Stats JSON) -

      Publisher: {{publisher_name[publisher]}}

      +(Publisher Stats JSON) +

      Publisher: {{ publisher_name[publisher] }}

      {% endblock %} @@ -53,60 +53,60 @@

      Headlines

      On the Registry - {{publisher}} + {{ publisher }} Reporting Org on Registry {% if ckan_publishers and publisher in ckan_publishers %} - {{ckan_publishers[publisher].result.publisher_iati_id}} + {{ ckan_publishers[publisher].result.publisher_iati_id }} {% endif %} Reporting Org(s) in Data - {% for org in publisher_stats.reporting_orgs%} - {{org|replace(' ', ' ')}} + {% for org in publisher_stats.reporting_orgs %} + {{ org|replace(' ', ' ') }} {% endfor %} Activity Files - {{publisher_stats.activity_files}} + {{ publisher_stats.activity_files }} Organisation Files - {{publisher_stats.organisation_files}} + {{ publisher_stats.organisation_files }} Total File Size - {{publisher_stats.file_size|filesizeformat}} + {{ publisher_stats.file_size|filesizeformat }} Activities - {{publisher_stats.activities}} + {{ publisher_stats.activities }} Unique Activities - {{publisher_stats.iati_identifiers|count}} + {{ publisher_stats.iati_identifiers|count }} Organisations - {{publisher_stats.organisations}} + {{ publisher_stats.organisations }} Versions - {%for version in publisher_stats.versions.keys() %} - {{version|replace(' ', ' ')|safe}} - {%endfor%} + {% for version in publisher_stats.versions.keys() %} + {{ version|replace(' ', ' ')|safe }} + {% endfor %} Hierarchies - {%for hierarchy in publisher_stats.hierarchies %} - {{hierarchy}} - {%endfor%} + {% for hierarchy in publisher_stats.hierarchies %} + {{ hierarchy }} + {% endfor %} Licenses - {%for license in publisher_licenses %} - {{license}} - {%endfor%} + {% for license in publisher_licenses %} + {{ license }} + {% endfor %} Files failing validation @@ -118,22 +118,22 @@

      Headlines

      - {{boxes.box('Activities', publisher_stats.activities, '../publisher_imgs/'+publisher+'_activities.png', publisher+'/activities.json', '', '-publisher')}} + {{ boxes.box('Activities', publisher_stats.activities, '../publisher_imgs/'+publisher+'_activities.png', publisher+'/activities.json', '', '-publisher') }}
      - {{boxes.box('Activity Files', publisher_stats.activity_files, '../publisher_imgs/'+publisher+'_activity_files.png', publisher+'/activity_files.json', '', '-publisher')}} - {{boxes.box('Organisation Files', publisher_stats.organisation_files, '../publisher_imgs/'+publisher+'_organisation_files.png', publisher+'/organisation_files.json', '', '-publisher')}} + {{ boxes.box('Activity Files', publisher_stats.activity_files, '../publisher_imgs/'+publisher+'_activity_files.png', publisher+'/activity_files.json', '', '-publisher') }} + {{ boxes.box('Organisation Files', publisher_stats.organisation_files, '../publisher_imgs/'+publisher+'_organisation_files.png', publisher+'/organisation_files.json', '', '-publisher') }}
      - {{boxes.box('Files per version', '', '../publisher_imgs/'+publisher+'_versions.png', publisher+'/versions.json', '../publisher_imgs/'+publisher+'_versions_legend.png', '-publisher')}} - {{boxes.box('Total File Size', publisher_stats.file_size|filesizeformat, '../publisher_imgs/'+publisher+'_file_size.png', publisher+'/file_size.json', '', '-publisher')}} + {{ boxes.box('Files per version', '', '../publisher_imgs/'+publisher+'_versions.png', publisher+'/versions.json', '../publisher_imgs/'+publisher+'_versions_legend.png', '-publisher') }} + {{ boxes.box('Total File Size', publisher_stats.file_size|filesizeformat, '../publisher_imgs/'+publisher+'_file_size.png', publisher+'/file_size.json', '', '-publisher') }}
      - {{boxes.box('Files failing validation', publisher_stats.validation.fail, '../publisher_imgs/'+publisher+'_validation.png', publisher+'/validation.json', '', '-publisher')}} - {{boxes.box('Files where XML is not well-formed', publisher_stats.invalidxml, '../publisher_imgs/'+publisher+'_invalidxml.png', publisher+'/invalidxml.json', '', '-publisher')}} + {{ boxes.box('Files failing validation', publisher_stats.validation.fail, '../publisher_imgs/'+publisher+'_validation.png', publisher+'/validation.json', '', '-publisher') }} + {{ boxes.box('Files where XML is not well-formed', publisher_stats.invalidxml, '../publisher_imgs/'+publisher+'_invalidxml.png', publisher+'/invalidxml.json', '', '-publisher') }}

      Data Quality

      @@ -158,11 +158,11 @@

      Data Quality

    • @@ -184,7 +184,7 @@

      Data Quality

      - (J) + (J)

      Files where XML is not well-formed

      @@ -197,7 +197,7 @@

      Files where XML is not well-formed

      {% for dataset, invalid in publisher_inverted.invalidxml.items() %} {% if invalid %} - + {% endif %} {% endfor %} @@ -213,7 +213,7 @@

      Files where XML is not well-formed

      {{dataset[:-4]}}{{ dataset[:-4] }}
      - (J) + (J)

      Files with non-standard roots

      @@ -226,7 +226,7 @@

      Files with non-standard roots

      {% for dataset, nonstandard in publisher_inverted.nonstandardroots.items() %} {% if nonstandard %} - + {% endif %} {% endfor %} @@ -280,7 +280,7 @@

      Financial

      {% if d %} {% for currency, value in d.items() %} {% if value!=None %} - {{value}} {{currency}}
      + {{ value }} {{ currency }}
      {% endif %} {% endfor %} {% endif %} @@ -308,13 +308,13 @@

      Budgets

      {% for row in budget_table %} - - - - - - - + + + + + + + {% endfor %} @@ -332,20 +332,20 @@

      Files

      - - - - + + + + {% for package, activities in publisher_inverted.activities.items() %} - - - - - + + + + + {% endfor %} @@ -357,8 +357,8 @@

      Files

      {{dataset[:-4]}}{{ dataset[:-4] }}
      {{row.year}}{% if row.count_total %}{{row.count_total}}{% endif %}{% if row.sum_total %}{{currency_value(row.sum_total)}}{% endif %}{% if row.count_original %}{{row.count_original}}{% endif %}{{currency_value(row.sum_original)}}{% if row.count_revised %}{{row.count_revised}}{% endif %}{{currency_value(row.sum_revised)}}{{ row.year }}{% if row.count_total %}{{ row.count_total }}{% endif %}{% if row.sum_total %}{{ currency_value(row.sum_total) }}{% endif %}{% if row.count_original %}{{ row.count_original }}{% endif %}{{ currency_value(row.sum_original) }}{% if row.count_revised %}{{ row.count_revised }}{% endif %}{{ currency_value(row.sum_revised) }}
      PackageActivities (J)Organisations (J)File Size (J)Version (J)Activities (J)Organisations (J)File Size (J)Version (J)
      {{package[:-4]}}{{activities}}{{current_stats.inverted_file.organisations.get(package)}}{{current_stats.inverted_file.file_size.get(package)|filesizeformat}}{{current_stats.aggregated_file[publisher][package]['versions'].keys()|first}}{{ package[:-4] }}{{ activities }}{{ current_stats.inverted_file.organisations.get(package) }}{{ current_stats.inverted_file.file_size.get(package)|filesizeformat }}{{ current_stats.aggregated_file[publisher][package]['versions'].keys()|first }}
      - (J) -

      Codelist Values (version {{major_version}}.xx)

      + (J) +

      Codelist Values (version {{ major_version }}.xx)

      @@ -374,23 +374,23 @@

      Codelist Values (version {{major_version}}.xx)

      {% for element, values in publisher_stats.codelist_values_by_major_version[major_version].items() %} {% with element_i=element_list.index(element) %} - - + + {% with codes=sorted(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)).intersection(values.keys())) %} {% endwith %} {% with codes=sorted(set(values.keys()).difference(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)))) %} {% endwith %} {% endwith %} @@ -406,7 +406,7 @@

      Codelist Values (version {{major_version}}.xx)

      {{element}}{{codelist_mapping[major_version].get(element)}}{{ element }}{{ codelist_mapping[major_version].get(element) }}{% if codes|count %} - {{codes|count}} - {%else%}{{codes|count}}{%endif%} + {{ codes|count }} + {% else %}{{ codes|count }}{% endif %} {% if codes|count %} - - {{codes|count}} + + {{ codes|count }} - {%else%} - {{codes|count}} - {%endif%} + {% else %} + {{ codes|count }} + {% endif %}
      - (J) + (J)

      Elements and Attributes Published

      @@ -422,9 +422,9 @@

      Elements and Attributes Published

      {% for element, count in publisher_stats['elements'].items() %} {% with element_i=element_list.index(element) %} - - - + + + {% endwith %} {% endfor %} @@ -446,5 +446,5 @@

      Elements and Attributes Published

      {% block tablesorterscript %} - + {% endblock %} diff --git a/templates/publishers.html b/templates/publishers.html index 1129997fe8..bec18c0509 100644 --- a/templates/publishers.html +++ b/templates/publishers.html @@ -2,10 +2,10 @@ {% import '_partials/boxes.html' as boxes %} {% block content %}
      - {{boxes.box('Publishers', current_stats.aggregated.publishers, 'publishers.png', 'publishers.json', - description='This graph shows the number of organisations publishing IATI data over time.')}} - {{boxes.box('Publishers by type', '', 'publisher_types.png', None, 'publisher_types_legend.png', - description='This graph show the various types of organisations publishing IATI data.')}} + {{ boxes.box('Publishers', current_stats.aggregated.publishers, 'publishers.png', 'publishers.json', + description='This graph shows the number of organisations publishing IATI data over time.') }} + {{ boxes.box('Publishers by type', '', 'publisher_types.png', None, 'publisher_types_legend.png', + description='This graph show the various types of organisations publishing IATI data.') }}
      @@ -13,7 +13,7 @@
      -

      (This table as CSV)

      +

      (This table as CSV)

      List of current active IATI publishers, Click on the publisher name for more details.

      {% include '_partials/tablesorter_instructions.html' %}
      @@ -22,35 +22,35 @@
      - - + + - - - + + + {% for publisher_title,publisher in publishers_ordered_by_title %} {% set publisher_stats = get_publisher_stats(publisher) %} - - - - - - - - + + + + + + + + {% endfor %}
      {{element}}{{count}}{{publisher_inverted.elements[element]|count}}{{ element }}{{ count }}{{ publisher_inverted.elements[element]|count }}
      Publisher Name Publisher Registry IdActivities (J)Organisations (J)Activities (J)Organisations (J) Files (*)Total File Size (J)Hierarchies (J)Reporting Orgs (J)Total File Size (J)Hierarchies (J)Reporting Orgs (J)
      {{publisher_name[publisher]}}{{publisher}}{{current_stats.inverted_publisher.activities[publisher]}}{{publisher_stats.organisations}}{{current_stats.inverted_publisher.activity_files.get(publisher)+current_stats.inverted_publisher.organisation_files.get(publisher)}}{{current_stats.inverted_publisher.file_size.get(publisher)|filesizeformat}}{{publisher_stats.hierarchies|length}}{{publisher_stats.reporting_orgs|length}}{{ publisher_name[publisher] }}{{ publisher }}{{ current_stats.inverted_publisher.activities[publisher] }}{{ publisher_stats.organisations }}{{ current_stats.inverted_publisher.activity_files.get(publisher)+current_stats.inverted_publisher.organisation_files.get(publisher) }}{{ current_stats.inverted_publisher.file_size.get(publisher)|filesizeformat }}{{ publisher_stats.hierarchies|length }}{{ publisher_stats.reporting_orgs|length }}
      -

      * Files is the sum of Activity Files (J) and Organisation Files (J).

      +

      * Files is the sum of Activity Files (J) and Organisation Files (J).

      {% endblock %} {% block tablesorteroptions %} -{textExtraction:{5: function(node,table,cellIndex) { return $(node).attr('data-bytes'); }}} +{textExtraction:{5: function(node,table,cellIndex) { return $(node).attr('data-bytes'); } }} {% endblock %} diff --git a/templates/registration_agencies.html b/templates/registration_agencies.html index 11fbd20306..ff2a68dcd3 100644 --- a/templates/registration_agencies.html +++ b/templates/registration_agencies.html @@ -21,9 +21,9 @@

      {% for registration_agency, count in sorted(registration_agencies.items()) %} - {{registration_agency}} - {{count}} - {{registration_agencies_publishers[registration_agency]|count}} + {{ registration_agency }} + {{ count }} + {{ registration_agencies_publishers[registration_agency]|count }} {% endfor %} @@ -50,10 +50,10 @@

      {% for orgid, publishers in nonmatching %} {% for publisher, count in publishers.items() %} - {{orgid|replace(' ', ' ')}} - {{publisher}} - {{publisher_name[publisher]}} - {{count}} + {{ orgid|replace(' ', ' ') }} + {{ publisher }} + {{ publisher_name[publisher] }} + {{ count }} {% endfor %} {% endfor %} diff --git a/templates/reporting_orgs.html b/templates/reporting_orgs.html index abfcfc35a0..e7103921f3 100644 --- a/templates/reporting_orgs.html +++ b/templates/reporting_orgs.html @@ -26,10 +26,10 @@

      {% set reporting_orgs_key = publisher_stats.reporting_orgs.keys()|first %} {% if publisher_stats.reporting_orgs|count != 1 or reporting_orgs_key != ckan_publishers[publisher].result.publisher_iati_id %} - {{publisher_title}} - {{ckan_publishers[publisher].result.publisher_iati_id}} - {{publisher_stats.reporting_orgs|length}} - {% for ro in publisher_stats.reporting_orgs%}{{ro}} {% endfor %} + {{ publisher_title }} + {{ ckan_publishers[publisher].result.publisher_iati_id }} + {{ publisher_stats.reporting_orgs|length }} + {% for ro in publisher_stats.reporting_orgs %}{{ ro }} {% endfor %} {% endif %} {% endfor %} diff --git a/templates/summary_stats.html b/templates/summary_stats.html index 2e2f4e05e4..17c58da6bf 100644 --- a/templates/summary_stats.html +++ b/templates/summary_stats.html @@ -14,7 +14,7 @@
      - (This table as CSV) + (This table as CSV)

      Summary Statistics

      @@ -32,16 +32,16 @@

      Summary Statistics

      Publisher Name {% for column_slug, column_header in summary_stats.columns %} - {{column_header}} + {{ column_header }} {% endfor %} {% for row in summary_stats.table() %} - {{row.publisher_title}} + {{ row.publisher_title }} {% for column_slug, column_header in summary_stats.columns %} - {{row[column_slug]}} + {{ row[column_slug] }} {% endfor %} @@ -119,7 +119,7 @@

      Timeliness

      Forward looking

      -

      The average percentage of current activities with budgets for each of the years {{current_year}} - {{current_year + 2}}. +

      The average percentage of current activities with budgets for each of the years {{ current_year }} - {{ current_year + 2 }}. The component values and a detailed methodology are displayed on the forward looking page.

      diff --git a/templates/timeliness.html b/templates/timeliness.html index 7a35740473..fc1c01ad06 100644 --- a/templates/timeliness.html +++ b/templates/timeliness.html @@ -7,7 +7,7 @@
      - (This table as CSV) + (This table as CSV)

      Table of Frequency assessments

      @@ -31,28 +31,28 @@

      Table of Frequency assessments

      Publisher Name First published - {{timeliness.this_year-1}} - {{timeliness.this_year}} + {{ timeliness.this_year-1 }} + {{ timeliness.this_year }} Frequency {% for month in timeliness.previous_months_reversed %} - {{timeliness.short_month(month)}} + {{ timeliness.short_month(month) }} {% endfor %} - {{timeliness.short_month(timeliness.this_month)}} + {{ timeliness.short_month(timeliness.this_month) }} {% for publisher, publisher_title, per_month, assessment, hft, first_published_band in timeliness.publisher_frequency_sorted() %} - {{publisher_title}} + {{ publisher_title }} {{ first_published_band }} {% for month in timeliness.previous_months_reversed %} - {{per_month[month] or 0}} + {{ per_month[month] or 0 }} {% endfor %} - {{per_month[timeliness.this_month] or 0}} - {% if hft %}*{% endif %} + {{ per_month[timeliness.this_month] or 0 }} + {% if hft %}*{% endif %} {{ assessment }} {% endfor %} @@ -76,15 +76,15 @@

      Summary of Publisher Performance

      {% set summary = timeliness.publisher_frequency_summary() %} {% for assessment, count in timeliness.sort_first(summary.items(), timeliness.frequency_index) %} - {{assessment}} - {{count}} + {{ assessment }} + {{ count }} {% endfor %} Total - {{summary.values()|sum}} + {{ summary.values()|sum }}
      diff --git a/templates/timeliness_timelag.html b/templates/timeliness_timelag.html index c03f44f5bb..bdb9d251d7 100644 --- a/templates/timeliness_timelag.html +++ b/templates/timeliness_timelag.html @@ -7,7 +7,7 @@
      - (This table as CSV) + (This table as CSV)

      Table of Time lag assessments

      @@ -29,28 +29,28 @@

      Table of Time lag assessments

      Publisher Name - {{timeliness.this_year-1}} - {{timeliness.this_year}} + {{ timeliness.this_year-1 }} + {{ timeliness.this_year }} Time lag {% for month in timeliness.previous_months_reversed %} - {{timeliness.short_month(month)}} + {{ timeliness.short_month(month) }} {% endfor %} - {{timeliness.short_month(timeliness.this_month)}} + {{ timeliness.short_month(timeliness.this_month) }} {% for publisher, publisher_title, per_month, assessment, hft in timeliness.publisher_timelag_sorted() %} - {{publisher_title}} + {{ publisher_title }} {% for month in timeliness.previous_months_reversed %} - {{per_month[month] or 0}} + {{ per_month[month] or 0 }} {% endfor %} - {{per_month[timeliness.this_month] or 0}} - {% if hft %}*{% endif %} - {{assessment}} + {{ per_month[timeliness.this_month] or 0 }} + {% if hft %}*{% endif %} + {{ assessment }} {% endfor %} @@ -75,15 +75,15 @@

      Summary of Publisher Performance

      {% set summary = timeliness.publisher_timelag_summary() %} {% for assessment, count in timeliness.sort_first(summary.items(), timeliness.timelag_index) %} - {{assessment}} - {{count}} + {{ assessment }} + {{ count }} {% endfor %} Total - {{summary.values()|sum}} + {{ summary.values()|sum }}
      diff --git a/templates/validation.html b/templates/validation.html index 4a662c5ad6..e3d5d5d3b4 100644 --- a/templates/validation.html +++ b/templates/validation.html @@ -2,10 +2,10 @@ {% import '_partials/boxes.html' as boxes %} {% block content %}
      - {{boxes.box('Invalid files', current_stats.aggregated.validation.fail, 'validation.png', 'validation.json', - description='Count of files that do not validate against the relevant schema, over time.')}} - {{boxes.box('Publishers with invalid files', current_stats.aggregated.publishers_validation.fail, 'publishers_validation.png', 'publishers_validation.json', - description='Count of publishers that have at least one invalid file, over time')}} + {{ boxes.box('Invalid files', current_stats.aggregated.validation.fail, 'validation.png', 'validation.json', + description='Count of files that do not validate against the relevant schema, over time.') }} + {{ boxes.box('Publishers with invalid files', current_stats.aggregated.publishers_validation.fail, 'publishers_validation.png', 'publishers_validation.json', + description='Count of publishers that have at least one invalid file, over time') }}
      @@ -24,7 +24,7 @@

      List of files that fail validation, grouped by publisher

      {% if datasets %}
      -
      {{ publisher_name[publisher ]}} ({{ datasets|length }})
      +
      {{ publisher_name[publisher ] }} ({{ datasets|length }})
      @@ -40,7 +40,7 @@

      List of files that fail validation, grouped by publisher

      @@ -66,14 +66,14 @@

      Count of files that fail validation, per publisher.

      - - + + {% for publisher in current_stats.inverted_file_publisher %} {% if 'fail' in current_stats.inverted_file_publisher[publisher].validation %} - + {% endif %} diff --git a/templates/versions.html b/templates/versions.html index f150b96542..bbf8c93e22 100644 --- a/templates/versions.html +++ b/templates/versions.html @@ -6,10 +6,10 @@ {{ boxes.box('Files per version (other)', '', 'versions_other.png', 'versions.json', 'versions_other_legend.png', description='Count of files per other versions, over time. These values do not actually exist as IATI versions.') }}
      - {{boxes.box('Publishers per version (expected)', '', 'publishers_per_version_expected.png', 'publishers_per_version.json', - description='Count of publishers per IATI version, over time. Note: If a publisher utilises two or more versions, they are counted for each.')}} - {{boxes.box('Publishers per version (other)', '', 'publishers_per_version_other.png', 'publishers_per_version.json', - description='Count of publishers per other version, over time')}} + {{ boxes.box('Publishers per version (expected)', '', 'publishers_per_version_expected.png', 'publishers_per_version.json', + description='Count of publishers per IATI version, over time. Note: If a publisher utilises two or more versions, they are counted for each.') }} + {{ boxes.box('Publishers per version (other)', '', 'publishers_per_version_other.png', 'publishers_per_version.json', + description='Count of publishers per other version, over time') }}
      {% if 'true' in current_stats.aggregated.version_mismatch %} @@ -48,7 +48,7 @@

      Inconsistent versions

      Publishers by version

      -

      (In JSON format)

      +

      (In JSON format)

      @@ -64,11 +64,11 @@

      Expected versions

      {% for version, publishers in current_stats.inverted_publisher.versions.items() %} {% if version in expected_versions %}
      - {{version|replace(' ', ' ')|safe}} + {{ version|replace(' ', ' ')|safe }}
      Publisher (J)Failing files (J)Publisher (J)Failing files (J)
      {{ publisher_name[publisher ]}}{{ publisher_name[publisher ] }} {{ current_stats.inverted_file_publisher[publisher].validation.fail|length }}
      {% for publisher in publishers %} - + {% endfor %}
      {{publisher_name[publisher]}}{{ publisher_name[publisher] }}
      @@ -91,11 +91,11 @@

      Other versions

      {% for version, publishers in current_stats.inverted_publisher.versions.items() %} {% if version not in expected_versions %}
      - {{version|replace(' ', ' ')|safe}} + {{ version|replace(' ', ' ')|safe }} {% for publisher in publishers %} - + {% endfor %}
      {{publisher_name[publisher]}}{{ publisher_name[publisher] }}
      diff --git a/templates/xml.html b/templates/xml.html index 7a54004ec5..4ab7a10b32 100644 --- a/templates/xml.html +++ b/templates/xml.html @@ -9,10 +9,10 @@ {% block content %}
      - {{boxes.box('Files where XML is not well-formed', current_stats.aggregated.invalidxml, 'invalidxml.png', 'invalidxml.json', - description='Count of files where the XML that is not well-formed, over time. Note: this is different from validation against the schema.')}} - {{boxes.box('Files with non-standard roots', current_stats.aggregated.nonstandardroots, 'nonstandardroots.png', 'nonstandardroots.json', - description='Count of files with non-standard root, over time. Note: Files with non-standard roots are those where the root XML element is not iati-activities or iati-organisation as we would expect.

      ')}} + {{ boxes.box('Files where XML is not well-formed', current_stats.aggregated.invalidxml, 'invalidxml.png', 'invalidxml.json', + description='Count of files where the XML that is not well-formed, over time. Note: this is different from validation against the schema.') }} + {{ boxes.box('Files with non-standard roots', current_stats.aggregated.nonstandardroots, 'nonstandardroots.png', 'nonstandardroots.json', + description='Count of files with non-standard root, over time. Note: Files with non-standard roots are those where the root XML element is not iati-activities or iati-organisation as we would expect.

      ') }}
      @@ -20,7 +20,7 @@
      - (J) + (J)

      Files where XML is not well-formed

      @@ -34,8 +34,8 @@

      Files where XML is not well-formed

      {% for dataset, invalid in current_stats.inverted_file.invalidxml.items() %} {% if invalid %} - - + + {% endif %} {% endfor %} @@ -48,7 +48,7 @@

      Files where XML is not well-formed

      {{dataset[:-4]|dataset_to_publisher}}{{dataset[:-4]}}{{ dataset[:-4]|dataset_to_publisher }}{{ dataset[:-4] }}
      - (J) + (J)

      Files with non-standard roots

      @@ -61,8 +61,8 @@

      Files with non-standard roots

      {% for dataset, nonstandard in current_stats.inverted_file.nonstandardroots.items() %} {% if nonstandard %} - - + + {% endif %} {% endfor %} From de96c3ae1dbc9701cc07cb9dc3a11729ddc8a468 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 17 May 2021 11:33:10 +0100 Subject: [PATCH 182/375] Add an explanation about empty attributes --- make_html.py | 1 + templates/element.html | 4 ++++ templates/elements.html | 1 + 3 files changed, 6 insertions(+) diff --git a/make_html.py b/make_html.py index fee23ec112..d7ba4c6de5 100644 --- a/make_html.py +++ b/make_html.py @@ -269,6 +269,7 @@ def element(slug): element=element, publishers=publishers, url=lambda x: '../' + x, + element_or_attribute='attribute' if '@' in element else 'element', page='elements') diff --git a/templates/element.html b/templates/element.html index 5f5b923d2f..044450350e 100644 --- a/templates/element.html +++ b/templates/element.html @@ -8,6 +8,10 @@

      Usage of {{ element }}

      Who uses {{ element }}?

      Checking usage of {{ element }} across publishers, files and activities.

      + + {% if element_or_attribute == 'attribute' %} +

      Attributes containing an empty value are treated as not published.

      + {% endif %} {% endblock %} {% block content %} diff --git a/templates/elements.html b/templates/elements.html index af532be3aa..3037742494 100644 --- a/templates/elements.html +++ b/templates/elements.html @@ -25,6 +25,7 @@
    • Total - Count of total instances in which this element/attribute is used, across all publishers.
    • +

      Attributes containing an empty value are treated as not published.

      {% include '_partials/tablesorter_instructions.html' %}
      {{dataset[:-4]|dataset_to_publisher}}{{dataset[:-4]}}{{ dataset[:-4]|dataset_to_publisher }}{{ dataset[:-4] }}
      From 357cfc13253f53b29ffd59445cf6aa8790402e90 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 17 May 2021 11:33:35 +0100 Subject: [PATCH 183/375] Distinguish elements from attributes a bit --- templates/element.html | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/templates/element.html b/templates/element.html index 044450350e..161ba4e285 100644 --- a/templates/element.html +++ b/templates/element.html @@ -27,16 +27,16 @@

      Publishers

      -

      Publishing this element

      +

      Publishing this {{ element_or_attribute }}

      - - - - - + + + + + {% for publisher in sorted(publishers) %} @@ -61,14 +61,14 @@

      Publishing this element

      -

      Not publishing this element

      +

      Not publishing this {{ element_or_attribute }}

      PublisherFiles with ElementTotal FilesActivities/Orgs with ElementTotal Instances of ElementTotal ActivitiesFiles with {{ element_or_attribute }}Total filesActivities/Orgs with {{ element_or_attribute }}Total instances of {{ element_or_attribute }}Total activities
      - - - + + + {% for publisher in current_stats.inverted_publisher.publishers %} From 475cddd76f16b86e20b7c8ae86901a0c85291689 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 17 May 2021 14:28:52 +0100 Subject: [PATCH 184/375] Tweak wording --- templates/element.html | 2 +- templates/elements.html | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/element.html b/templates/element.html index 161ba4e285..bbc83a27e3 100644 --- a/templates/element.html +++ b/templates/element.html @@ -10,7 +10,7 @@

      Usage of {{ element }}

      Checking usage of {{ element }} across publishers, files and activities.

      {% if element_or_attribute == 'attribute' %} -

      Attributes containing an empty value are treated as not published.

      +

      An empty value for the attribute is treated the same as if the attribute is not present.

      {% endif %} {% endblock %} diff --git a/templates/elements.html b/templates/elements.html index 3037742494..657f6997f7 100644 --- a/templates/elements.html +++ b/templates/elements.html @@ -25,7 +25,7 @@
    • Total - Count of total instances in which this element/attribute is used, across all publishers.
    • -

      Attributes containing an empty value are treated as not published.

      +

      Empty values for attributes are treated the same as if the attribute is not present.

      {% include '_partials/tablesorter_instructions.html' %}
      PublisherTotal FilesTotal ActivitiesTotal OrganisationsTotal filesTotal activitiesTotal organisations
      From f9b8919231b7ac4765851c8199858951d1364de7 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 17 May 2021 14:39:28 +0100 Subject: [PATCH 185/375] Add #attributes to attribute reference links --- make_html.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/make_html.py b/make_html.py index d7ba4c6de5..d26ba6bceb 100644 --- a/make_html.py +++ b/make_html.py @@ -71,11 +71,14 @@ def firstint(s): def xpath_to_url(path): path = path.strip('./') if path.startswith('iati-activity'): - return 'https://reference.codeforiati.org/activity-standard/iati-activities/' + path.split('@')[0] + url = 'https://reference.codeforiati.org/activity-standard/iati-activities/' + path.split('@')[0] elif path.startswith('iati-organisation'): - return 'https://reference.codeforiati.org/organisation-standard/iati-organisations/' + path.split('@')[0] + url = 'https://reference.codeforiati.org/organisation-standard/iati-organisations/' + path.split('@')[0] else: - return 'https://reference.codeforiati.org/activity-standard/iati-activities/iati-activity/' + path.split('@')[0] + url = 'https://reference.codeforiati.org/activity-standard/iati-activities/iati-activity/' + path.split('@')[0] + if '@' in path: + url += '#attributes' + return url def registration_agency(orgid): From 9deae4993c21193f557b0aa7c2c05f642ea6b526 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 18 May 2021 09:01:01 +0100 Subject: [PATCH 186/375] Add codelist instance count to publisher pages --- templates/publisher.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/publisher.html b/templates/publisher.html index 63bd29a5e6..089e7577e2 100644 --- a/templates/publisher.html +++ b/templates/publisher.html @@ -378,14 +378,14 @@

      Codelist Values (version {{ major_version }}.xx)

      {% with codes=sorted(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)).intersection(values.keys())) %} {% endwith %} {% with codes=sorted(set(values.keys()).difference(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)))) %} {% endif %} {% endfor %} @@ -69,7 +69,7 @@

      Not On Codelist

      {% for value, publishers in values.items() %} {% if not value in codelist_sets[major_version].get(codelist_mapping[major_version].get(element)) %} {% endif %} {% endfor %} From 36813795a592e264ac1d818c866c1fa6d0e6f5be Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 18 May 2021 19:10:41 +0100 Subject: [PATCH 188/375] Exclude invalid/custom elements from elements page --- .github/workflows/build.yml | 2 ++ .gitignore | 4 ++-- data.py | 25 +++++++++++++++++++++++++ fetch_data.sh | 13 +++++++++++++ make_html.py | 2 ++ pytest.ini | 4 ++++ requirements.txt | 1 + templates/elements.html | 2 ++ 8 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 pytest.ini diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4c3d190f76..ef3c985f42 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -42,6 +42,8 @@ jobs: run: python make_csv.py - name: Build HTML output run: python make_html.py + - name: Delete files + run: rm -rf data stats-calculated - name: Add static files to output run: cp -r static/* out - name: Deploy (production) 🚀 diff --git a/.gitignore b/.gitignore index cc8af738d9..52a1db6179 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ -stats-calculated* -data +/stats-calculated/ +/data/ out # Byte-compiled / optimized / DLL files diff --git a/data.py b/data.py index 5b8998da5a..e9f6581635 100644 --- a/data.py +++ b/data.py @@ -6,6 +6,8 @@ import csv from decimal import Decimal +from xmlschema import XMLSchema + # Modified from: # https://github.com/IATI/IATI-Stats/blob/1d20ed1e/stats/common/decorators.py#L5-L13 @@ -196,6 +198,29 @@ def deep_merge(obj1, obj2): if line != '.\n': current_stats['download_errors'].append(line.strip('\n').split(' ', 3)) +sources105 = [ + './data/schemas/1.05/iati-activities-schema.xsd', + './data/schemas/1.05/iati-organisations-schema.xsd'] +sources203 = [ + './data/schemas/2.03/iati-activities-schema.xsd', + './data/schemas/2.03/iati-organisations-schema.xsd'] +schema105 = XMLSchema(sources105) +schema203 = XMLSchema(sources203) + + +def is_valid_element(path): + try: + if schema203.get_element(None, path=path): + return True + except AttributeError: + pass + try: + if schema105.get_element(None, path=path): + return True + except AttributeError: + pass + return False + def transform_codelist_mapping_keys(codelist_mapping): # Perform the same transformation as https://github.com/IATI/IATI-Stats/blob/d622f8e88af4d33b1161f906ec1b53c63f2f0936/stats.py#L12 diff --git a/fetch_data.sh b/fetch_data.sh index 12ebec4295..9d72cb8794 100755 --- a/fetch_data.sh +++ b/fetch_data.sh @@ -44,4 +44,17 @@ echo "Fetching Codelists-2" rm -rf data/IATI-Codelists-2 python fetch_v2_codelists.py +echo "Fetching schemas" +mkdir data/schemas +cd data/schemas +# for v in 1.01 1.02 1.03 1.04 1.05 2.01 2.02 2.03; do +for v in 1.05 2.03; do + git clone https://github.com/IATI/IATI-Schemas.git $v + cd $v + git checkout version-$v + git pull + cd .. +done +cd .. + echo "completed fetching data" diff --git a/make_html.py b/make_html.py index d26ba6bceb..e53bace115 100644 --- a/make_html.py +++ b/make_html.py @@ -35,6 +35,7 @@ metadata, publisher_name, publishers_ordered_by_title, + is_valid_element, slugs) app = Flask(__name__) @@ -144,6 +145,7 @@ def get_codelist_values(codelist_values_for_element): app.jinja_env.globals['codelist_mapping'] = codelist_mapping app.jinja_env.globals['codelist_sets'] = codelist_sets app.jinja_env.globals['get_codelist_values'] = get_codelist_values +app.jinja_env.globals['is_valid_element'] = is_valid_element basic_page_names = [ 'index', diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000000..d85657b286 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +testpaths = tests +norecursedirs = *__pycache__* *.pytest_cache* +console_output_style = count diff --git a/requirements.txt b/requirements.txt index c73730f709..c74c70d7e3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,4 @@ pytz matplotlib==2.2.5 requests Werkzeug==0.12.2 +xmlschema==1.6.2 diff --git a/templates/elements.html b/templates/elements.html index 657f6997f7..40f24fa32e 100644 --- a/templates/elements.html +++ b/templates/elements.html @@ -37,12 +37,14 @@ {% for i, (element,publishers) in enumerate(current_stats.inverted_publisher.elements.items()) %} + {% if is_valid_element(element) %} + {% endif %} {% endfor %}
      {{ codelist_mapping[major_version].get(element) }}{% if codes|count %} - {{ codes|count }} + {{ codes|count }} {% else %}{{ codes|count }}{% endif %} {% if codes|count %} - + {{ codes|count }} {% else %} From f6f4cbddf08a15b4a12311e622caaadc0b5ec490 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 18 May 2021 09:21:41 +0100 Subject: [PATCH 187/375] Also show instance count on codelist page --- templates/codelist.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/codelist.html b/templates/codelist.html index 2c37fbedaa..5658ff2530 100644 --- a/templates/codelist.html +++ b/templates/codelist.html @@ -46,7 +46,7 @@

      On Codelist

      {% for value, publishers in values.items() %} {% if value in codelist_sets[major_version].get(codelist_mapping[major_version].get(element)) %}
      {{ value }} - {{ publishers|length }} + {{ publishers|length }}
      {{ value }} - {{ publishers|length }} + {{ publishers|length }}
      {{ element }} {{ publishers|length }} {{ current_stats.aggregated.elements[element] }} {{ current_stats.aggregated.elements_total[element] }}
      From 26f01b2679bbed0a4bee230c990d182a27645bdd Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 19 May 2021 13:17:08 +0100 Subject: [PATCH 189/375] Fix popups on publisher page --- templates/publisher.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/publisher.html b/templates/publisher.html index 089e7577e2..980cfd3e26 100644 --- a/templates/publisher.html +++ b/templates/publisher.html @@ -378,14 +378,14 @@

      Codelist Values (version {{ major_version }}.xx)

      {{ codelist_mapping[major_version].get(element) }} {% with codes=sorted(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)).intersection(values.keys())) %} {% if codes|count %} - {{ codes|count }} + {{ codes|count }} {% else %}{{ codes|count }}{% endif %} {% endwith %} {% with codes=sorted(set(values.keys()).difference(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)))) %} {% if codes|count %} - + {{ codes|count }} {% else %} From 7dd5820f7a6b3576aa1931567e1d083edd783440 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 19 May 2021 13:35:32 +0100 Subject: [PATCH 190/375] Change popover link focus style --- templates/base.html | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/templates/base.html b/templates/base.html index b9829462dd..c0e280cb40 100644 --- a/templates/base.html +++ b/templates/base.html @@ -88,6 +88,10 @@ .navbar-brand img { height: 30px; } + + a.popover-html:focus { + outline: none; + } {% block title %}Code for IATI Analytics - {{ page_titles[page] }}{% endblock %} From e06ba00ebfc3bbdfed384bff91061043bda9e3e3 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 19 May 2021 20:31:20 +0100 Subject: [PATCH 191/375] Fix sort order of booleans --- templates/booleans.html | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/templates/booleans.html b/templates/booleans.html index 66190c7cae..bbee0df0e9 100644 --- a/templates/booleans.html +++ b/templates/booleans.html @@ -29,3 +29,13 @@
      {% endblock %} + +{% block tablesorteroptions %}{ + textExtraction:{ + 1: function(node,table,cellIndex) { + if (['1', 'true'].indexOf($(node).text()) !== -1) return 1; + if (['0', 'false'].indexOf($(node).text()) !== -1) return 0; + return -1; + } + } +}{% endblock %} From e11dab1bd3b587dc62c3b76e6984dc7736becfaf Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 19 May 2021 22:59:38 +0100 Subject: [PATCH 192/375] No need to fetch IATI-Stats-historical --- .github/workflows/build.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4c3d190f76..4d25bb6915 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -27,9 +27,7 @@ jobs: python -m pip install --upgrade pip pip install -r requirements.txt - name: Fetch data - run: | - ./fetch_data.sh - git clone --quiet https://github.com/codeforIATI/IATI-Stats-historical data/historical + run: ./fetch_data.sh - name: Fetch stats run: | mkdir out From 0d56cb021852102eee0d62a1a615bc09487e9a3a Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Wed, 19 May 2021 23:34:30 +0100 Subject: [PATCH 193/375] Comprehensive -> Comprehensiveness everywhere --- summary_stats.py | 10 +++++----- templates/summary_stats.html | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/summary_stats.py b/summary_stats.py index f80818ed46..82f5b5a01c 100644 --- a/summary_stats.py +++ b/summary_stats.py @@ -12,7 +12,7 @@ ('publisher_type', 'Publisher Type'), ('timeliness', 'Timeliness'), ('forwardlooking', 'Forward looking'), - ('comprehensive', 'Comprehensive'), + ('comprehensiveness', 'Comprehensiveness'), ('score', 'Score') ] @@ -104,15 +104,15 @@ def table(): # Compute and store the mean average for these fields row['forwardlooking'] = sum(int(round(y)) for y in numbers) / len(publisher_forwardlooking_data['year_columns'][2]) - # Compute comprehensive statistic + # Compute comprehensiveness statistic # Get the comprehensiveness data for this publisher publisher_comprehensiveness_data = comprehensiveness.generate_row(publisher) - # Set the comprehensive value to be the summary average for valid data - row['comprehensive'] = convert_to_int(publisher_comprehensiveness_data['summary_average_valid']) + # Set the comprehensiveness value to be the summary average for valid data + row['comprehensiveness'] = convert_to_int(publisher_comprehensiveness_data['summary_average_valid']) # Compute score - row['score'] = int(round(float(row['timeliness'] + row['forwardlooking'] + row['comprehensive']) / 3)) + row['score'] = int(round(float(row['timeliness'] + row['forwardlooking'] + row['comprehensiveness']) / 3)) # Return a generator object yield row diff --git a/templates/summary_stats.html b/templates/summary_stats.html index 17c58da6bf..9a88c4af78 100644 --- a/templates/summary_stats.html +++ b/templates/summary_stats.html @@ -22,7 +22,7 @@

      Summary Statistics

      This table assesses all IATI publishers by scoring three dimensions – Timeliness, Forward-looking and Comprehensiveness. The methodology is explained below the table and in the related Publisher Statistics pages. In summary:

      -

      {Score} = ( {Timeliness} + {Forward looking} + {Comprehensive} ) / 3  

      +

      {Score} = ( {Timeliness} + {Forward looking} + {Comprehensiveness} ) / 3  

      {% include '_partials/tablesorter_instructions.html' %}
      @@ -124,14 +124,14 @@

      Forward looking

      -

      Comprehensive

      +

      Comprehensiveness

      The average of comprehensiveness averages for core, financials and value-added. The core average has a double-weighting.

      Score

      The mean average of the three values above.

      -

      {Score} = ( {Timeliness} + {Forward looking} + {Comprehensive} ) / 3

      +

      {Score} = ( {Timeliness} + {Forward looking} + {Comprehensiveness} ) / 3

      From 54dba88c22a05d2b71b303a9c88037cd6070f577 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 3 Jun 2021 13:25:08 +0100 Subject: [PATCH 194/375] Remove conditions from reference URLs --- make_html.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/make_html.py b/make_html.py index e53bace115..1a9325b4c2 100644 --- a/make_html.py +++ b/make_html.py @@ -71,6 +71,8 @@ def firstint(s): def xpath_to_url(path): path = path.strip('./') + # remove conditions + path = re.sub(r'\[[^]]+\]', '', path) if path.startswith('iati-activity'): url = 'https://reference.codeforiati.org/activity-standard/iati-activities/' + path.split('@')[0] elif path.startswith('iati-organisation'): From b02f51ba4c85b564f41e63b02529fac54611baf5 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 8 Jun 2021 23:18:12 +0100 Subject: [PATCH 195/375] Add C4I sidebar --- templates/base.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/templates/base.html b/templates/base.html index c0e280cb40..36de8a4a27 100644 --- a/templates/base.html +++ b/templates/base.html @@ -103,7 +103,7 @@
      -

      Data and metadata issues are raised on this Github issue tracker. If you spot a problem with IATI data or metadata, please raise an issue.

      +

      Data and metadata issues are raised publicly on IATI Data Bugtracker. If you spot a problem with IATI data or metadata, please raise an issue.

      From 1f40dc92d0f6beecd13ee72d237488becd0c1cbf Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 13 Jul 2021 22:46:58 +0100 Subject: [PATCH 197/375] Add null licenses to notspecified count --- licenses.py | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/licenses.py b/licenses.py index 58d23d5891..5b43c6ac3f 100644 --- a/licenses.py +++ b/licenses.py @@ -87,7 +87,12 @@ with open('./stats-calculated/ckan.json') as handler: ckan = json.load(handler, object_pairs_hook=OrderedDict) -licenses = [package.get('license_id') for _, publisher in ckan.items() for _, package in publisher.items()] +licenses = [ + package['license_id'] + if package['license_id'] is not None + else 'notspecified' + for _, publisher in ckan.items() + for _, package in publisher.items()] def licenses_for_publisher(publisher_name): @@ -97,15 +102,24 @@ def licenses_for_publisher(publisher_name): return set() # Return unique licenses used - return set([package.get('license_id') for package in ckan[publisher_name].values()]) + return set([ + package['license_id'] + if package['license_id'] is not None + else 'notspecified' + for package in ckan[publisher_name].values()]) def main(): - licenses_and_publisher = set([(package.get('license_id') if package.get('license_id') else 'notspecified', publisher_name) for publisher_name, publisher in ckan.items() for package_name, package in publisher.items()]) + licenses_and_publisher = set([ + (package['license_id'] + if package['license_id'] is not None + else 'notspecified', publisher_name) + for publisher_name, publisher in ckan.items() + for package_name, package in publisher.items()]) licenses_per_publisher = [license for license, publisher in licenses_and_publisher] return render_template('licenses.html', license_names=license_names, - license_count=dict((x if x else 'notspecified', licenses.count(x)) for x in set(licenses)), + license_count=dict((x, licenses.count(x)) for x in set(licenses)), publisher_license_count=dict((x, licenses_per_publisher.count(x)) for x in set(licenses_per_publisher)), sorted=sorted, page='licenses', @@ -113,9 +127,12 @@ def main(): def individual_license(license): - if license == 'None': - license = None - publishers = [publisher_name for publisher_name, publisher in ckan.items() for _, package in publisher.items() if package.get('license_id') == license] + publishers = [ + publisher_name + for publisher_name, publisher in ckan.items() + for _, package in publisher.items() + if package['license_id'] == license or ( + license == 'notspecified' and package['license_id'] is None)] publisher_counts = [(publisher, publishers.count(publisher)) for publisher in set(publishers)] return render_template('license.html', url=lambda x: '../' + x, From 1049525deac249dd35877fce49f0c073d89ab8dd Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 13 Jul 2021 23:13:58 +0100 Subject: [PATCH 198/375] Bring get_stats.sh up to date --- .github/workflows/build.yml | 4 +--- .github/workflows/ci.yml | 4 +--- get_stats.sh | 25 ++----------------------- 3 files changed, 4 insertions(+), 29 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index fc6a831b12..ba28cbfedd 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -31,9 +31,7 @@ jobs: - name: Fetch stats run: | mkdir out - git clone --quiet --branch gh-pages https://github.com/codeforIATI/IATI-Stats-public stats-calculated - curl --compressed "https://raw.githubusercontent.com/codeforIATI/IATI-Stats-public/gh-pages/ckan.json" > stats-calculated/ckan.json - curl --compressed "https://raw.githubusercontent.com/codeforIATI/IATI-Stats-public/gh-pages/metadata.json" > stats-calculated/metadata.json + ./get_stats.sh - name: Make plots run: python plots.py - name: Build CSV output diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5487d37ed4..c0e4a11c54 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,9 +27,7 @@ jobs: - name: Fetch stats run: | mkdir out - git clone --quiet --branch gh-pages https://github.com/codeforIATI/IATI-Stats-public stats-calculated - curl --compressed "https://raw.githubusercontent.com/codeforIATI/IATI-Stats-public/gh-pages/ckan.json" > stats-calculated/ckan.json - curl --compressed "https://raw.githubusercontent.com/codeforIATI/IATI-Stats-public/gh-pages/metadata.json" > stats-calculated/metadata.json + ./get_stats.sh - name: Run tests run: pytest --cov . - name: Coveralls diff --git a/get_stats.sh b/get_stats.sh index eb2601626e..0ff18b70b7 100755 --- a/get_stats.sh +++ b/get_stats.sh @@ -1,24 +1,3 @@ -# This script is the same as https://github.com/IATI/IATI-Stats/blob/master/get_stats.sh -# but with only the dated historical aggregates. -mkdir stats-calculated -for f in ckan gitdate; do - curl --compressed "http://dashboard.iatistandard.org/stats/${f}.json" > stats-calculated/${f}.json -done +#!/bin/bash -mkdir stats-blacklist -cd stats-blacklist -wget "http://dashboard.iatistandard.org/stats-blacklist/current.tar.gz" -O current.tar.gz -wget "http://dashboard.iatistandard.org/stats-blacklist/gitaggregate-dated.tar.gz" -O gitaggregate-dated.tar.gz -wget "http://dashboard.iatistandard.org/stats-blacklist/gitaggregate-publisher-dated.tar.gz" -O gitaggregate-publisher-dated.tar.gz -tar -xf current.tar.gz -tar -xf gitaggregate-dated.tar.gz -tar -xf gitaggregate-publisher-dated.tar.gz -cd .. - -cd stats-calculated -wget "http://dashboard.iatistandard.org/stats/current.tar.gz" -O current.tar.gz -wget "http://dashboard.iatistandard.org/stats/gitaggregate-dated.tar.gz" -O gitaggregate-dated.tar.gz -wget "http://dashboard.iatistandard.org/stats/gitaggregate-publisher-dated.tar.gz" -O gitaggregate-publisher-dated.tar.gz -tar -xf current.tar.gz -tar -xf gitaggregate-dated.tar.gz -tar -xf gitaggregate-publisher-dated.tar.gz +git clone --quiet --branch gh-pages https://github.com/codeforIATI/IATI-Stats-public stats-calculated From cb94a3503dd4c1c1084191e08f5fcd51fdad3ced Mon Sep 17 00:00:00 2001 From: notshi Date: Wed, 14 Jul 2021 18:24:59 +0100 Subject: [PATCH 199/375] [#52] Added column for url and include known urls --- licenses.py | 16 ++++++++++++++++ templates/licenses.html | 4 +++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/licenses.py b/licenses.py index 5b43c6ac3f..6904ab6b66 100644 --- a/licenses.py +++ b/licenses.py @@ -83,6 +83,20 @@ 'xnet': 'OSI Approved::X.Net License', 'zpl': 'OSI Approved::Zope Public License', 'zlib-license': 'OSI Approved::zlib/libpng license'} + +license_url = { + 'cc-nc': 'https://licenses.opendefinition.org/licenses/CC-BY-NC-4.0.json', + 'cc-by': 'http://www.opendefinition.org/licenses/cc-by', + 'cc-by-sa': 'http://www.opendefinition.org/licenses/cc-by-sa', + 'cc-zero': 'http://www.opendefinition.org/licenses/cc-zero', + 'odc-by': 'http://www.opendefinition.org/licenses/odc-by', + 'odc-odbl': 'http://www.opendefinition.org/licenses/odc-odbl', + 'odc-pddl': 'http://www.opendefinition.org/licenses/odc-pddl', + 'other-at': 'https://licenses.opendefinition.org/licenses/other-at.json', + 'other-open': 'https://licenses.opendefinition.org/licenses/other-open.json', + 'other-pd': 'https://licenses.opendefinition.org/licenses/other-pd.json', + 'other-nc': 'https://licenses.opendefinition.org/licenses/other-nc.json', + 'uk-ogl': 'http://reference.data.gov.uk/id/open-government-licence'} with open('./stats-calculated/ckan.json') as handler: ckan = json.load(handler, object_pairs_hook=OrderedDict) @@ -119,6 +133,7 @@ def main(): licenses_per_publisher = [license for license, publisher in licenses_and_publisher] return render_template('licenses.html', license_names=license_names, + license_url=license_url, license_count=dict((x, licenses.count(x)) for x in set(licenses)), publisher_license_count=dict((x, licenses_per_publisher.count(x)) for x in set(licenses_per_publisher)), sorted=sorted, @@ -138,6 +153,7 @@ def individual_license(license): url=lambda x: '../' + x, license=license, license_names=license_names, + license_url=license_url, publisher_counts=publisher_counts, page='licenses', licenses=True) diff --git a/templates/licenses.html b/templates/licenses.html index ff03473513..92013088d5 100644 --- a/templates/licenses.html +++ b/templates/licenses.html @@ -8,10 +8,11 @@

      Count of publishers per licences in use on the IATI Registry.

      {% include '_partials/tablesorter_instructions.html' %} -
      +
      + @@ -21,6 +22,7 @@ {% for license, files in sorted(license_count.items()) %} + From e13fd560ae69e51bade0fad25df32ad466d93267 Mon Sep 17 00:00:00 2001 From: notshi Date: Wed, 14 Jul 2021 18:36:22 +0100 Subject: [PATCH 200/375] [#52] Added column for url and include known urls --- licenses.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/licenses.py b/licenses.py index 6904ab6b66..384a6da80d 100644 --- a/licenses.py +++ b/licenses.py @@ -83,7 +83,7 @@ 'xnet': 'OSI Approved::X.Net License', 'zpl': 'OSI Approved::Zope Public License', 'zlib-license': 'OSI Approved::zlib/libpng license'} - + license_url = { 'cc-nc': 'https://licenses.opendefinition.org/licenses/CC-BY-NC-4.0.json', 'cc-by': 'http://www.opendefinition.org/licenses/cc-by', From e1e55385d62ddd05cfc1a065568e41ddf946e3e6 Mon Sep 17 00:00:00 2001 From: notshi Date: Wed, 14 Jul 2021 18:39:49 +0100 Subject: [PATCH 201/375] fix whitespace --- licenses.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/licenses.py b/licenses.py index 384a6da80d..4629af0228 100644 --- a/licenses.py +++ b/licenses.py @@ -83,7 +83,7 @@ 'xnet': 'OSI Approved::X.Net License', 'zpl': 'OSI Approved::Zope Public License', 'zlib-license': 'OSI Approved::zlib/libpng license'} - + license_url = { 'cc-nc': 'https://licenses.opendefinition.org/licenses/CC-BY-NC-4.0.json', 'cc-by': 'http://www.opendefinition.org/licenses/cc-by', From 279d2fc5bf1c323e4332c08cf62cc87305b97250 Mon Sep 17 00:00:00 2001 From: notshi Date: Fri, 16 Jul 2021 22:38:11 +0100 Subject: [PATCH 202/375] link the license names --- templates/licenses.html | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/templates/licenses.html b/templates/licenses.html index 92013088d5..adc31a740b 100644 --- a/templates/licenses.html +++ b/templates/licenses.html @@ -12,7 +12,6 @@ - @@ -21,8 +20,7 @@ {% for license, files in sorted(license_count.items()) %} - - + From ee0a02d852e87edb92dc4a4b609b2367dfc73942 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sun, 18 Jul 2021 12:10:59 +0100 Subject: [PATCH 203/375] Remove links to discuss --- templates/timeliness.html | 1 - templates/timeliness_timelag.html | 2 -- 2 files changed, 3 deletions(-) diff --git a/templates/timeliness.html b/templates/timeliness.html index fc1c01ad06..3feb7f17cd 100644 --- a/templates/timeliness.html +++ b/templates/timeliness.html @@ -22,7 +22,6 @@

      Table of Frequency assessments

      Red flag: Publisher currently publishing future transaction dates.
      Yellow flag: Publisher not currently publishing future transaction dates, but did report future transactions at some point in the last twelve calendar months (See exceptions).

      -

      You are invited to participate in the ongoing consultation on publisher statistics and the summary statistics which is taking place on the IATI Discussion Forum

      {% include '_partials/tablesorter_instructions.html' %} diff --git a/templates/timeliness_timelag.html b/templates/timeliness_timelag.html index bdb9d251d7..bb013751ad 100644 --- a/templates/timeliness_timelag.html +++ b/templates/timeliness_timelag.html @@ -19,8 +19,6 @@

      Table of Time lag assessments

      Red flag: Publisher currently publishing future transaction dates.
      Yellow flag: Publisher not currently publishing future transaction dates, but did report future transactions at some point in the last twelve calendar months (See exceptions).

      -

      You are invited to participate in the ongoing consultation on publisher statistics and the summary statistics which is taking place on the IATI Discussion Forum.

      - {% include '_partials/tablesorter_instructions.html' %} From b8229be58daf81f7b0e9b3bdcfe67839a0de4b48 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 20 Jul 2021 13:51:05 +0100 Subject: [PATCH 204/375] =?UTF-8?q?Don=E2=80=99t=20hardcode=20license=20UR?= =?UTF-8?q?Ls?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- licenses.py | 17 +++-------------- templates/licenses.html | 6 +++--- 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/licenses.py b/licenses.py index 4629af0228..71bf88e6fc 100644 --- a/licenses.py +++ b/licenses.py @@ -84,19 +84,8 @@ 'zpl': 'OSI Approved::Zope Public License', 'zlib-license': 'OSI Approved::zlib/libpng license'} -license_url = { - 'cc-nc': 'https://licenses.opendefinition.org/licenses/CC-BY-NC-4.0.json', - 'cc-by': 'http://www.opendefinition.org/licenses/cc-by', - 'cc-by-sa': 'http://www.opendefinition.org/licenses/cc-by-sa', - 'cc-zero': 'http://www.opendefinition.org/licenses/cc-zero', - 'odc-by': 'http://www.opendefinition.org/licenses/odc-by', - 'odc-odbl': 'http://www.opendefinition.org/licenses/odc-odbl', - 'odc-pddl': 'http://www.opendefinition.org/licenses/odc-pddl', - 'other-at': 'https://licenses.opendefinition.org/licenses/other-at.json', - 'other-open': 'https://licenses.opendefinition.org/licenses/other-open.json', - 'other-pd': 'https://licenses.opendefinition.org/licenses/other-pd.json', - 'other-nc': 'https://licenses.opendefinition.org/licenses/other-nc.json', - 'uk-ogl': 'http://reference.data.gov.uk/id/open-government-licence'} +with open('./stats-calculated/licenses.json') as handler: + license_urls = json.load(handler) with open('./stats-calculated/ckan.json') as handler: ckan = json.load(handler, object_pairs_hook=OrderedDict) @@ -133,7 +122,7 @@ def main(): licenses_per_publisher = [license for license, publisher in licenses_and_publisher] return render_template('licenses.html', license_names=license_names, - license_url=license_url, + license_urls=license_urls, license_count=dict((x, licenses.count(x)) for x in set(licenses)), publisher_license_count=dict((x, licenses_per_publisher.count(x)) for x in set(licenses_per_publisher)), sorted=sorted, diff --git a/templates/licenses.html b/templates/licenses.html index adc31a740b..f1d207ae97 100644 --- a/templates/licenses.html +++ b/templates/licenses.html @@ -11,8 +11,8 @@
      License NameLicense Url License Id Files Publishers
      {{ license_names[license] }}{{ license_url[license] }} {{ license }} {{ files }} {{ publisher_license_count[license] }}
      License NameLicense Url License Id Files Publishers
      {{ license_names[license] }}{{ license_url[license] }}{{ license_names[license] }} {{ license }} {{ files }} {{ publisher_license_count[license] }}
      - - + + @@ -20,7 +20,7 @@ {% for license, files in sorted(license_count.items()) %} - + From 5f2f55ce841db69b0e177614b2841d12234ee717 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 20 Jul 2021 14:05:44 +0100 Subject: [PATCH 205/375] Flip around the links here I personally find this more intuitive --- templates/licenses.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/licenses.html b/templates/licenses.html index f1d207ae97..5eb4ed3441 100644 --- a/templates/licenses.html +++ b/templates/licenses.html @@ -20,8 +20,8 @@ {% for license, files in sorted(license_count.items()) %} - - + + From a4224c1ac33bbe6883bddd523bf96344b89159fe Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 20 Jul 2021 14:47:07 +0100 Subject: [PATCH 206/375] Fix bug in license template; fix for new format --- licenses.py | 2 +- templates/license.html | 8 +++++++- templates/licenses.html | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/licenses.py b/licenses.py index 71bf88e6fc..f9285ee5d4 100644 --- a/licenses.py +++ b/licenses.py @@ -142,7 +142,7 @@ def individual_license(license): url=lambda x: '../' + x, license=license, license_names=license_names, - license_url=license_url, + license_urls=license_urls, publisher_counts=publisher_counts, page='licenses', licenses=True) diff --git a/templates/license.html b/templates/license.html index 07e42b8918..588411a59f 100644 --- a/templates/license.html +++ b/templates/license.html @@ -2,7 +2,13 @@ {% import '_partials/boxes.html' as boxes %} {% block page_header %}

      {{ license_names[license] }}

      -

      License id: {{ license }}

      +

      License ID: + {% if license_urls[license]['url'] %} + {{ license }} + {% else %} + {{ license }} + {% endif %} +

      {% endblock %} {% block content %} diff --git a/templates/licenses.html b/templates/licenses.html index 5eb4ed3441..0a442f242b 100644 --- a/templates/licenses.html +++ b/templates/licenses.html @@ -21,7 +21,7 @@ {% for license, files in sorted(license_count.items()) %}
      - + From d559d2153df8d5e5322504063311d538eb9eee35 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 22 Jul 2021 23:52:56 +0100 Subject: [PATCH 207/375] Rejig license page layout once again --- templates/license.html | 10 +++------- templates/licenses.html | 6 ++---- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/templates/license.html b/templates/license.html index 588411a59f..6579144862 100644 --- a/templates/license.html +++ b/templates/license.html @@ -1,13 +1,9 @@ {% extends 'base.html' %} {% import '_partials/boxes.html' as boxes %} {% block page_header %} -

      {{ license_names[license] }}

      -

      License ID: - {% if license_urls[license]['url'] %} - {{ license }} - {% else %} - {{ license }} - {% endif %} +

      {{ license_names[license] }}{% if license_urls[license]['url'] %} (source){% endif %}

      +

      + License ID: {{ license }}

      {% endblock %} diff --git a/templates/licenses.html b/templates/licenses.html index 0a442f242b..6793e2916b 100644 --- a/templates/licenses.html +++ b/templates/licenses.html @@ -12,7 +12,6 @@
      - @@ -20,10 +19,9 @@ {% for license, files in sorted(license_count.items()) %} - - + - + {% endfor %} From 0a738eafe9dd0106d700208034f92b7a44bc9a9f Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Fri, 23 Jul 2021 07:27:23 +0100 Subject: [PATCH 208/375] Add license ID back --- templates/licenses.html | 2 ++ 1 file changed, 2 insertions(+) diff --git a/templates/licenses.html b/templates/licenses.html index 6793e2916b..6b3aa3da69 100644 --- a/templates/licenses.html +++ b/templates/licenses.html @@ -12,6 +12,7 @@ + @@ -20,6 +21,7 @@ {% for license, files in sorted(license_count.items()) %} + From 8abde83f00b7d8fe0b6c4c30c77eedaaba07031e Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 19 Aug 2021 23:10:09 +0100 Subject: [PATCH 209/375] If something goes wrong, raise an exception --- fetch_data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fetch_data.py b/fetch_data.py index 0fabbe0eb2..22131c1c25 100644 --- a/fetch_data.py +++ b/fetch_data.py @@ -32,6 +32,7 @@ while True: params['offset'] = page_size * page res = requests.get(url, params=params).json()['result'] + res.raise_for_status() if res == []: break for publisher in res: From ac168a08e34392eb6ced6d5efb37c2a93724da55 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Fri, 20 Aug 2021 07:43:23 +0100 Subject: [PATCH 210/375] Refactor fetch_data to work around registry bug --- fetch_data.py | 38 +++++++++++++++----------------------- 1 file changed, 15 insertions(+), 23 deletions(-) diff --git a/fetch_data.py b/fetch_data.py index 22131c1c25..834fb1ff90 100644 --- a/fetch_data.py +++ b/fetch_data.py @@ -1,6 +1,6 @@ """ Uses the CKAN API on the IATI Registry to fetch data about publishers -Makes a call to get a list of all organisations, then +Makes a call to get a list of all publishers, then grabs data about each individual publisher and stores the information in one file per publisher. @@ -10,6 +10,7 @@ from pathlib import Path import os import json +import time import requests @@ -17,27 +18,18 @@ os.makedirs(Path('data/ckan_publishers'), exist_ok=True) page_size = 50 -url = 'https://iatiregistry.org/api/3/action/organization_list' -params = { - 'all_fields': 'true', - 'include_extras': 'true', - 'include_tags': 'true', - 'include_groups': 'true', - 'include_users': 'true', - 'limit': page_size, -} +res = requests.get('https://iatiregistry.org/api/3/action/organization_list') +res.raise_for_status() +publisher_ids = res.json()['result'] +url = 'https://iatiregistry.org/api/3/action/organization_show' -# Loop through the organisation list json, saving a file of information about each publisher -page = 0 -while True: - params['offset'] = page_size * page - res = requests.get(url, params=params).json()['result'] +# Loop through the publisher list, saving a file of information about each publisher +for publisher_id in publisher_ids: + res = requests.get(url, params={'id': publisher_id}) + time.sleep(0.1) res.raise_for_status() - if res == []: - break - for publisher in res: - name = publisher.get('name') - output = {'result': publisher} - with open(os.path.join('data', 'ckan_publishers', name + '.json'), 'w') as fp: - _ = json.dump(output, fp) - page += 1 + publisher = res.json()['result'] + name = publisher.get('name') + output = {'result': publisher} + with open(os.path.join('data', 'ckan_publishers', name + '.json'), 'w') as fp: + _ = json.dump(output, fp) From fbfbf2c29acb19945b06b3cd8360038338054944 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Fri, 20 Aug 2021 21:14:22 +0100 Subject: [PATCH 211/375] Refactor to use registry.codeforiati.org --- fetch_data.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/fetch_data.py b/fetch_data.py index 834fb1ff90..80d18c0e91 100644 --- a/fetch_data.py +++ b/fetch_data.py @@ -8,28 +8,23 @@ name, organisation type, and the link back to the registry """ from pathlib import Path -import os +from os.path import join +from os import makedirs import json -import time import requests # Make a directory to save the data about each publisher -os.makedirs(Path('data/ckan_publishers'), exist_ok=True) +output_path = Path('data/ckan_publishers') +makedirs(output_path, exist_ok=True) -page_size = 50 -res = requests.get('https://iatiregistry.org/api/3/action/organization_list') +res = requests.get('https://registry.codeforiati.org/publisher_list.json') res.raise_for_status() -publisher_ids = res.json()['result'] -url = 'https://iatiregistry.org/api/3/action/organization_show' +publishers = res.json()['result'] # Loop through the publisher list, saving a file of information about each publisher -for publisher_id in publisher_ids: - res = requests.get(url, params={'id': publisher_id}) - time.sleep(0.1) - res.raise_for_status() - publisher = res.json()['result'] +for publisher in publishers: name = publisher.get('name') output = {'result': publisher} - with open(os.path.join('data', 'ckan_publishers', name + '.json'), 'w') as fp: + with open(join(output_path, name + '.json'), 'w') as fp: _ = json.dump(output, fp) From 9ca75d6c237c1f6fe2c157140964d312ccc9d5d3 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 20 Sep 2021 12:50:26 +0100 Subject: [PATCH 212/375] Remove superfluous comma MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hadn’t realised this was syntactically valid tbh. --- data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data.py b/data.py index e9f6581635..d6a7d57e5f 100644 --- a/data.py +++ b/data.py @@ -226,7 +226,7 @@ def transform_codelist_mapping_keys(codelist_mapping): # Perform the same transformation as https://github.com/IATI/IATI-Stats/blob/d622f8e88af4d33b1161f906ec1b53c63f2f0936/stats.py#L12 codelist_mapping = {k: v for k, v in codelist_mapping.items() if not k.startswith('//iati-organisation')} codelist_mapping = {re.sub(r'^\/\/iati-activity', './', k): v for k, v in codelist_mapping.items()} - codelist_mapping = {re.sub(r'^\/\/', './/', k): v for k, v, in codelist_mapping.items()} + codelist_mapping = {re.sub(r'^\/\/', './/', k): v for k, v in codelist_mapping.items()} return codelist_mapping From 394a6463c8dbbecb99903b370e134480347f8f9d Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 19 Oct 2021 23:02:52 +0100 Subject: [PATCH 213/375] Add issues raised to nav --- templates/publisher.html | 3 +++ 1 file changed, 3 insertions(+) diff --git a/templates/publisher.html b/templates/publisher.html index 22aac4f325..1fdb6c3353 100644 --- a/templates/publisher.html +++ b/templates/publisher.html @@ -22,6 +22,9 @@

      Publisher: {{ publisher_name[publisher] }}

      {% if failure_count > 0 %}
    • Files Failing Validation {% endif %} + {% if github_issues and publisher in github_issues %} +
    • Issues Raised + {% endif %} {% if 1 in publisher_inverted.invalidxml.values() %}
    • Files where XML is not well-formed {% endif %} From 9ce5f110da6599ee9b76f1f60da2a16c52484620 Mon Sep 17 00:00:00 2001 From: Mark Brough Date: Tue, 29 Mar 2022 12:55:45 +0200 Subject: [PATCH 214/375] Pin markupsafe dependency --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index c74c70d7e3..8f90cad9da 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ matplotlib==2.2.5 requests Werkzeug==0.12.2 xmlschema==1.6.2 +markupsafe==2.0.1 From a532b4c861b4df9afd703d09daaae86416cdd3ef Mon Sep 17 00:00:00 2001 From: Mark Brough Date: Tue, 29 Mar 2022 15:55:46 +0200 Subject: [PATCH 215/375] Pin itsdangerous --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 8f90cad9da..9b25d95e1e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,4 @@ requests Werkzeug==0.12.2 xmlschema==1.6.2 markupsafe==2.0.1 +itsdangerous==2.0.1 From 6ea17b03c5500c52d2463992e6036f0d77b5aeca Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 30 Aug 2022 12:52:43 +0100 Subject: [PATCH 216/375] Make codelist page a bit more defensive --- templates/codelists.html | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/templates/codelists.html b/templates/codelists.html index cf687bf23f..cc3ed7b689 100644 --- a/templates/codelists.html +++ b/templates/codelists.html @@ -27,16 +27,18 @@

      Codelists for version {{ major_version }}.xx

    • {% for i, (element, values) in enumerate(current_stats.inverted_publisher.codelist_values_by_major_version[major_version].items()) %} - - - - - {% with codes=sorted(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)).intersection(get_codelist_values(values))) %} - - {% endwith %} - {% with codes=sorted(set(get_codelist_values(values)).difference(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)))) %} - - {% endwith %} + {% if codelist_sets[major_version].get(codelist_mapping[major_version].get(element)) %} + + + + + {% with codes=sorted(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)).intersection(get_codelist_values(values))) %} + + {% endwith %} + {% with codes=sorted(set(get_codelist_values(values)).difference(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)))) %} + + {% endwith %} + {% endif %} {% endfor %} From d31d948dcfc7d1de15cc8995d7b7d6293493fd08 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 30 Aug 2022 12:52:54 +0100 Subject: [PATCH 217/375] Shallow clone stats repo --- get_stats.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/get_stats.sh b/get_stats.sh index 0ff18b70b7..c51790dfe5 100755 --- a/get_stats.sh +++ b/get_stats.sh @@ -1,3 +1,3 @@ #!/bin/bash -git clone --quiet --branch gh-pages https://github.com/codeforIATI/IATI-Stats-public stats-calculated +git clone --depth=1 --quiet --branch gh-pages https://github.com/codeforIATI/IATI-Stats-public stats-calculated From f69bf50fe4d236132874dbc07fe2b522ba13f29b Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 30 Aug 2022 15:24:16 +0100 Subject: [PATCH 218/375] Revert "Make codelist page a bit more defensive" This reverts commit 6ea17b03c5500c52d2463992e6036f0d77b5aeca. --- templates/codelists.html | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/templates/codelists.html b/templates/codelists.html index cc3ed7b689..cf687bf23f 100644 --- a/templates/codelists.html +++ b/templates/codelists.html @@ -27,18 +27,16 @@

      Codelists for version {{ major_version }}.xx

      {% for i, (element, values) in enumerate(current_stats.inverted_publisher.codelist_values_by_major_version[major_version].items()) %} - {% if codelist_sets[major_version].get(codelist_mapping[major_version].get(element)) %} - - - - - {% with codes=sorted(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)).intersection(get_codelist_values(values))) %} - - {% endwith %} - {% with codes=sorted(set(get_codelist_values(values)).difference(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)))) %} - - {% endwith %} - {% endif %} + + + + + {% with codes=sorted(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)).intersection(get_codelist_values(values))) %} + + {% endwith %} + {% with codes=sorted(set(get_codelist_values(values)).difference(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)))) %} + + {% endwith %} {% endfor %} From b4ecb64c1600d9335b7fe480775a5e93579b80b2 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 30 Aug 2022 15:39:54 +0100 Subject: [PATCH 219/375] Fetch all codelists from codeforIATI repos --- fetch_data.sh | 2 +- fetch_v2_codelists.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fetch_data.sh b/fetch_data.sh index 9d72cb8794..10f1605b59 100755 --- a/fetch_data.sh +++ b/fetch_data.sh @@ -34,7 +34,7 @@ cd ../../../ # Get codelists for versions v1.x and v2.x of the IATI Standard rm -rf data/IATI-Codelists-1 echo "cloning Codelists-1" -git clone --branch version-1.05 https://github.com/IATI/IATI-Codelists.git data/IATI-Codelists-1 +git clone --branch version-1.05 https://github.com/codeforIATI/IATI-Codelists.git data/IATI-Codelists-1 cd data/IATI-Codelists-1 echo "running gen.sh for Codelist-1" ./gen.sh diff --git a/fetch_v2_codelists.py b/fetch_v2_codelists.py index 3fed82fcec..4a445dc1a1 100644 --- a/fetch_v2_codelists.py +++ b/fetch_v2_codelists.py @@ -29,7 +29,7 @@ def mapping_to_json(mappings): mapping_urls = [ - 'https://raw.githubusercontent.com/IATI/IATI-Codelists/version-2.03/mapping.xml', + 'https://raw.githubusercontent.com/codeforIATI/IATI-Codelists/version-2.03/mapping.xml', 'https://raw.githubusercontent.com/codeforIATI/Unofficial-Codelists/master/mapping.xml'] mappings = [] for mapping_url in mapping_urls: From 9554add13e65832cbc9eb168afcc357308cdb576 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 8 Sep 2022 23:21:17 +0100 Subject: [PATCH 220/375] Show the names of codes --- data.py | 4 ++++ make_html.py | 2 ++ templates/codelist.html | 6 ++++-- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/data.py b/data.py index d6a7d57e5f..f48c03ce14 100644 --- a/data.py +++ b/data.py @@ -253,6 +253,10 @@ def create_codelist_mapping(major_version): cname: set(c['code'] for c in codelist['data']) for cname, codelist in JSONDir('data/IATI-Codelists-{}/out/clv2/json/en/'.format(major_version)).items() } for major_version in MAJOR_VERSIONS} +codelist_lookup = { + major_version: { + cname: {c['code']: c for c in codelist['data']} for cname, codelist in JSONDir('data/IATI-Codelists-{}/out/clv2/json/en/'.format(major_version)).items() + } for major_version in MAJOR_VERSIONS} # Simple look up to map publisher id to a publishers given name (title) publisher_name = {publisher: publisher_json['result']['title'] for publisher, publisher_json in ckan_publishers.items()} diff --git a/make_html.py b/make_html.py index 1a9325b4c2..2fd0c693cb 100644 --- a/make_html.py +++ b/make_html.py @@ -27,6 +27,7 @@ ckan_publishers, codelist_mapping, codelist_sets, + codelist_lookup, current_stats, dataset_to_publisher_dict, github_issues, @@ -146,6 +147,7 @@ def get_codelist_values(codelist_values_for_element): app.jinja_env.globals['slugs'] = slugs app.jinja_env.globals['codelist_mapping'] = codelist_mapping app.jinja_env.globals['codelist_sets'] = codelist_sets +app.jinja_env.globals['codelist_lookup'] = codelist_lookup app.jinja_env.globals['get_codelist_values'] = get_codelist_values app.jinja_env.globals['is_valid_element'] = is_valid_element diff --git a/templates/codelist.html b/templates/codelist.html index 5658ff2530..3926f9f8ce 100644 --- a/templates/codelist.html +++ b/templates/codelist.html @@ -41,11 +41,13 @@

      On Codelist

      Codes that are on the {{ codelist_mapping[major_version].get(element) }} codelist.

      License NameLicense IdLicenseLicense ID Files Publishers
      {{ license_names[license] }}{% if license_urls.get(license) %}{{ license_names[license] }}{% else %}{{ license_names[license] }}{% endif %} {{ license }} {{ files }} {{ publisher_license_count[license] }}
      {% if license_urls.get(license) %}{{ license_names[license] }}{% else %}{{ license_names[license] }}{% endif %}{{ license }}{{ license_names[license] }}{% if license_urls.get(license) %}{{ license }}{% else %}{{ license }}{% endif %} {{ files }} {{ publisher_license_count[license] }}
      {{ license_names[license] }}{% if license_urls.get(license) %}{{ license }}{% else %}{{ license }}{% endif %}{% if license_urls[license]['url'] %}{{ license }}{% else %}{{ license }}{% endif %} {{ files }} {{ publisher_license_count[license] }}
      LicenseLicense ID Files Publishers
      {{ license_names[license] }}{% if license_urls[license]['url'] %}{{ license }}{% else %}{{ license }}{% endif %}{% if license_urls[license]['url'] %}{{ license_names[license] }}{% else %}{{ license_names[license] }}{% endif %} {{ files }}{{ publisher_license_count[license] }}{{ publisher_license_count[license] }}
      LicenseLicense ID Files Publishers
      {% if license_urls[license]['url'] %}{{ license_names[license] }}{% else %}{{ license_names[license] }}{% endif %}{{ license }} {{ files }} {{ publisher_license_count[license] }}
      {{ element }}{{ codelist_mapping[major_version].get(element) }}{{ values|length }}{{ codelist_sets[major_version].get(codelist_mapping[major_version].get(element))|length }}{{ codes|length }}{{ codes|length }}{{ element }}{{ codelist_mapping[major_version].get(element) }}{{ values|length }}{{ codelist_sets[major_version].get(codelist_mapping[major_version].get(element))|length }}{{ codes|length }}{{ codes|length }}
      {{ element }}{{ codelist_mapping[major_version].get(element) }}{{ values|length }}{{ codelist_sets[major_version].get(codelist_mapping[major_version].get(element))|length }}{{ codes|length }}{{ codes|length }}{{ element }}{{ codelist_mapping[major_version].get(element) }}{{ values|length }}{{ codelist_sets[major_version].get(codelist_mapping[major_version].get(element))|length }}{{ codes|length }}{{ codes|length }}
      - + {% for value, publishers in values.items() %} {% if value in codelist_sets[major_version].get(codelist_mapping[major_version].get(element)) %} - + + {% endif %} From 3c2b04d8199435294b651952fcb5d4afe3104190 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sat, 10 Sep 2022 10:18:43 +0100 Subject: [PATCH 221/375] Fix timestamp --- make_html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/make_html.py b/make_html.py index 2fd0c693cb..b3be65efad 100644 --- a/make_html.py +++ b/make_html.py @@ -114,7 +114,7 @@ def get_codelist_values(codelist_values_for_element): app.jinja_env.globals['url'] = lambda x: x app.jinja_env.globals['datetime_generated'] = lambda: datetime.utcnow().replace(tzinfo=pytz.utc).strftime('%Y-%m-%d %H:%M:%S %Z') app.jinja_env.globals['datetime_data'] = date_time_data_obj.strftime('%Y-%m-%d %H:%M:%S %Z') -app.jinja_env.globals['datetime_data_homepage'] = date_time_data_obj.strftime('%d %B %Y (at %H:%M)') +app.jinja_env.globals['datetime_data_homepage'] = date_time_data_obj.strftime('%-d %B %Y (at %H:%M)') app.jinja_env.globals['stats_url'] = 'https://stats.codeforiati.org' app.jinja_env.globals['stats_gh_url'] = 'https://github.com/codeforIATI/IATI-Stats-public/tree/gh-pages' app.jinja_env.globals['sorted'] = sorted From 96286d04db41f0361f25558088855567f563ff27 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 6 Oct 2022 10:46:42 +0100 Subject: [PATCH 222/375] Nicer formatting of datestamps in footer --- make_html.py | 7 +++---- templates/base.html | 2 +- templates/index.html | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/make_html.py b/make_html.py index b3be65efad..c5849c938a 100644 --- a/make_html.py +++ b/make_html.py @@ -112,9 +112,8 @@ def get_codelist_values(codelist_values_for_element): # Custom Jinja globals app.jinja_env.globals['url'] = lambda x: x -app.jinja_env.globals['datetime_generated'] = lambda: datetime.utcnow().replace(tzinfo=pytz.utc).strftime('%Y-%m-%d %H:%M:%S %Z') -app.jinja_env.globals['datetime_data'] = date_time_data_obj.strftime('%Y-%m-%d %H:%M:%S %Z') -app.jinja_env.globals['datetime_data_homepage'] = date_time_data_obj.strftime('%-d %B %Y (at %H:%M)') +app.jinja_env.globals['datetime_generated'] = lambda: datetime.utcnow().replace(tzinfo=pytz.utc).strftime('%-d %B %Y (at %H:%M %Z)') +app.jinja_env.globals['datetime_data'] = date_time_data_obj.strftime('%-d %B %Y (at %H:%M %Z)') app.jinja_env.globals['stats_url'] = 'https://stats.codeforiati.org' app.jinja_env.globals['stats_gh_url'] = 'https://github.com/codeforIATI/IATI-Stats-public/tree/gh-pages' app.jinja_env.globals['sorted'] = sorted @@ -138,7 +137,7 @@ def get_codelist_values(codelist_values_for_element): app.jinja_env.globals['set'] = set app.jinja_env.globals['firstint'] = firstint app.jinja_env.globals['expected_versions'] = expected_versions -app.jinja_env.globals['current_year'] = datetime.now().year +app.jinja_env.globals['current_year'] = datetime.utcnow().year # Following variables set in coverage branch but not in master # app.jinja_env.globals['float'] = float # app.jinja_env.globals['dac2012'] = dac2012 diff --git a/templates/base.html b/templates/base.html index 36de8a4a27..2c6f377232 100644 --- a/templates/base.html +++ b/templates/base.html @@ -154,7 +154,7 @@

      {{ page_titles[page] }}

      Report bugs, and request features using Github issues.
      - Generated at {{ datetime_generated() }} from data downloaded at {{ datetime_data }}.
      + Generated on {{ datetime_generated() }} from data downloaded on {{ datetime_data }}.
      (NB This is the time the download task started. Any changes made after this time may not be reflected).
      diff --git a/templates/index.html b/templates/index.html index 51fee9bb8e..a1924a3c82 100644 --- a/templates/index.html +++ b/templates/index.html @@ -1,6 +1,6 @@ {% extends 'section_index.html' %} {% block about %} -

      These analytics are generated daily, with the last update based on data in the IATI Registry on {{ datetime_data_homepage }}. For more information, see the FAQ.

      +

      These analytics are generated daily, with the last update based on data in the IATI Registry on {{ datetime_data }}. For more information, see the FAQ.

      Many of the tables are sortable by clicking on the headers.

      Many of the datasets are available in machine readable JSON format. Some links to JSON are abbreviated to (J).

      {% endblock %} From 8133fa1b18ea261831d152f17bc19020e943626e Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Fri, 18 Nov 2022 14:16:23 +0000 Subject: [PATCH 223/375] Round forward looking stats to 2dp --- forwardlooking.py | 6 +++--- summary_stats.py | 10 +++++----- templates/forwardlooking.html | 2 +- templates/summary_stats.html | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/forwardlooking.py b/forwardlooking.py index c444f576f9..b8f7524989 100644 --- a/forwardlooking.py +++ b/forwardlooking.py @@ -67,12 +67,12 @@ def generate_row(publisher): if not int(row['year_columns'][0][year]): row['year_columns'][2][year] = '-' else: - row['year_columns'][2][year] = int(round(float(row['year_columns'][1][year]) / float(row['year_columns'][0][year]) * 100)) + row['year_columns'][2][year] = float(row['year_columns'][1][year]) / float(row['year_columns'][0][year]) * 100 else: # Else if either 'forwardlooking_activities_current' or 'forwardlooking_activities_with_budgets' are not in the bottom hierarchy, set data zero # This should only occur if a publisher has 0 activities - row['year_columns'][0][year] = '0' - row['year_columns'][1][year] = '0' + row['year_columns'][0][year] = 0 + row['year_columns'][1][year] = 0 row['year_columns'][2][year] = '-' return row diff --git a/summary_stats.py b/summary_stats.py index 82f5b5a01c..9d65d1462e 100644 --- a/summary_stats.py +++ b/summary_stats.py @@ -27,11 +27,11 @@ def is_number(s): return False -def convert_to_int(x): +def convert_to_float(x): """ @todo Document this function """ if is_number(x): - return int(x) + return float(x) else: return 0 @@ -92,7 +92,7 @@ def table(): timelag_score = 0 # Compute the percentage - row['timeliness'] = int(round((float(frequency_score + timelag_score) / 8) * 100)) + row['timeliness'] = (float(frequency_score + timelag_score) / 8) * 100 # Compute forward-looking statistic # Get the forward-looking data for this publisher @@ -109,10 +109,10 @@ def table(): publisher_comprehensiveness_data = comprehensiveness.generate_row(publisher) # Set the comprehensiveness value to be the summary average for valid data - row['comprehensiveness'] = convert_to_int(publisher_comprehensiveness_data['summary_average_valid']) + row['comprehensiveness'] = convert_to_float(publisher_comprehensiveness_data['summary_average_valid']) # Compute score - row['score'] = int(round(float(row['timeliness'] + row['forwardlooking'] + row['comprehensiveness']) / 3)) + row['score'] = float(row['timeliness'] + row['forwardlooking'] + row['comprehensiveness']) / 3 # Return a generator object yield row diff --git a/templates/forwardlooking.html b/templates/forwardlooking.html index 3183319f09..4532f787b2 100644 --- a/templates/forwardlooking.html +++ b/templates/forwardlooking.html @@ -68,7 +68,7 @@

      Activities with Forward Looking Budget Allocations

      {% for column in row.year_columns %} {% for year in forwardlooking.years %}
      {% endfor %} {% endfor %} diff --git a/templates/summary_stats.html b/templates/summary_stats.html index 9a88c4af78..3026da7791 100644 --- a/templates/summary_stats.html +++ b/templates/summary_stats.html @@ -41,7 +41,7 @@

      Summary Statistics

      {% for column_slug, column_header in summary_stats.columns %} - From 47f2caa7af85b786ca25a256f6b45edb282855f2 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Fri, 18 Nov 2022 20:08:11 +0000 Subject: [PATCH 224/375] Add commit hash to frontend --- make_html.py | 4 ++++ templates/base.html | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/make_html.py b/make_html.py index c5849c938a..055b041eb4 100644 --- a/make_html.py +++ b/make_html.py @@ -6,6 +6,7 @@ import json import os import re +import subprocess from collections import defaultdict from flask import Flask, render_template, redirect, abort, Response @@ -114,6 +115,9 @@ def get_codelist_values(codelist_values_for_element): app.jinja_env.globals['url'] = lambda x: x app.jinja_env.globals['datetime_generated'] = lambda: datetime.utcnow().replace(tzinfo=pytz.utc).strftime('%-d %B %Y (at %H:%M %Z)') app.jinja_env.globals['datetime_data'] = date_time_data_obj.strftime('%-d %B %Y (at %H:%M %Z)') +app.jinja_env.globals['commit_hash'] = subprocess.run( + 'git show --format=%H --no-patch'.split(), + capture_output=True).stdout.decode().strip() app.jinja_env.globals['stats_url'] = 'https://stats.codeforiati.org' app.jinja_env.globals['stats_gh_url'] = 'https://github.com/codeforIATI/IATI-Stats-public/tree/gh-pages' app.jinja_env.globals['sorted'] = sorted diff --git a/templates/base.html b/templates/base.html index 2c6f377232..40479ca3f4 100644 --- a/templates/base.html +++ b/templates/base.html @@ -150,7 +150,7 @@

      {{ page_titles[page] }}

      {% endfor %} From c6b89c7660763a3dcbd4f37f0ff140320124ef29 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sun, 20 Nov 2022 10:03:26 +0000 Subject: [PATCH 226/375] Show comprehensiveness stats to 2dp --- comprehensiveness.py | 21 +++++++-------------- templates/comprehensiveness_base.html | 4 ++-- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/comprehensiveness.py b/comprehensiveness.py index 8da2b7f59f..a4f960590a 100644 --- a/comprehensiveness.py +++ b/comprehensiveness.py @@ -68,11 +68,11 @@ def denominator(key, stats): # If there is a specific denominator for the given key, return this if key in stats['comprehensiveness_denominators']: - return int(stats['comprehensiveness_denominators'][key]) + return float(stats['comprehensiveness_denominators'][key]) # Otherwise, return the default denominator else: - return int(stats['comprehensiveness_denominator_default']) + return float(stats['comprehensiveness_denominator_default']) def get_hierarchy_with_most_budgets(stats): @@ -156,23 +156,16 @@ def generate_row(publisher): if denominator(slug, publisher_base) != 0: # Populate the row with the %age - row[slug] = int(round( - float(numerator_all) / denominator(slug, publisher_base) * 100 - )) - row[slug + '_valid'] = int(round( - float(numerator_valid) / denominator(slug, publisher_base) * 100 - )) + row[slug] = float(numerator_all) / denominator(slug, publisher_base) * 100 + row[slug + '_valid'] = float(numerator_valid) / denominator(slug, publisher_base) * 100 # Loop for averages # Calculate the average for each grouping, and the overall 'summary' average for page in ['core', 'financials', 'valueadded', 'summary']: # Note that the summary must be last, so that it can use the average calculations from the other groupings - row[page + '_average'] = int(round( - sum((row.get(x[0]) or 0) * x[2] for x in columns[page]) / float(sum(x[2] for x in columns[page])) - )) - row[page + '_average_valid'] = int(round( - sum((row.get(x[0] + '_valid') or 0) * x[2] for x in columns[page]) / float(sum(x[2] for x in columns[page])) - )) + row[page + '_average'] = sum((row.get(x[0]) or 0) * x[2] for x in columns[page]) / float(sum(x[2] for x in columns[page])) + + row[page + '_average_valid'] = sum((row.get(x[0] + '_valid') or 0) * x[2] for x in columns[page]) / float(sum(x[2] for x in columns[page])) return row diff --git a/templates/comprehensiveness_base.html b/templates/comprehensiveness_base.html index 9c9e787d0c..7d3155c0e2 100644 --- a/templates/comprehensiveness_base.html +++ b/templates/comprehensiveness_base.html @@ -59,9 +59,9 @@

      {% block table_title %}Table of Comprehensiveness values

      {% for column_slug in comprehensiveness.column_slugs[tab] %} + ({{ row[column_slug] | round(2) }}) {% endif %} {% else %}-{% endif %} {% endfor %} From 03153e2c6dac77b36dcfed7a01083cb45484389d Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sun, 20 Nov 2022 10:04:12 +0000 Subject: [PATCH 227/375] Slightly prettier rounding --- make_html.py | 7 +++++++ templates/comprehensiveness_base.html | 4 ++-- templates/forwardlooking.html | 2 +- templates/humanitarian.html | 2 +- templates/summary_stats.html | 2 +- 5 files changed, 12 insertions(+), 5 deletions(-) diff --git a/make_html.py b/make_html.py index 055b041eb4..461c81e769 100644 --- a/make_html.py +++ b/make_html.py @@ -71,6 +71,12 @@ def firstint(s): return int(m.group(0)) +def pretty_float(f, ndigits=2): + if int(f) == f: + return int(f) + return round(f, ndigits) + + def xpath_to_url(path): path = path.strip('./') # remove conditions @@ -110,6 +116,7 @@ def get_codelist_values(codelist_values_for_element): app.jinja_env.filters['url_to_filename'] = lambda x: x.rstrip('/').split('/')[-1] app.jinja_env.filters['dataset_to_publisher'] = dataset_to_publisher app.jinja_env.filters['has_future_transactions'] = timeliness.has_future_transactions +app.jinja_env.filters['pretty_float'] = pretty_float # Custom Jinja globals app.jinja_env.globals['url'] = lambda x: x diff --git a/templates/comprehensiveness_base.html b/templates/comprehensiveness_base.html index 7d3155c0e2..08e29a4734 100644 --- a/templates/comprehensiveness_base.html +++ b/templates/comprehensiveness_base.html @@ -59,9 +59,9 @@

      {% block table_title %}Table of Comprehensiveness values

      {% for column_slug in comprehensiveness.column_slugs[tab] %} + ({{ row[column_slug] | pretty_float }}) {% endif %} {% else %}-{% endif %} {% endfor %} diff --git a/templates/forwardlooking.html b/templates/forwardlooking.html index 4532f787b2..725042b730 100644 --- a/templates/forwardlooking.html +++ b/templates/forwardlooking.html @@ -68,7 +68,7 @@

      Activities with Forward Looking Budget Allocations

      {% for column in row.year_columns %} {% for year in forwardlooking.years %}
      {% endfor %} {% endfor %} diff --git a/templates/humanitarian.html b/templates/humanitarian.html index 72aac68d68..978f1d408f 100644 --- a/templates/humanitarian.html +++ b/templates/humanitarian.html @@ -43,7 +43,7 @@

      Humanitarian

      {%- elif column_slug in ['num_activities', 'publishing_humanitarian'] -%} {{ row[column_slug]|int }} {%- else -%} - {{ row[column_slug]|round(2) }} + {{ row[column_slug]|pretty_float }} {%- endif -%} {% endfor %} diff --git a/templates/summary_stats.html b/templates/summary_stats.html index 3026da7791..f2d9f006fa 100644 --- a/templates/summary_stats.html +++ b/templates/summary_stats.html @@ -41,7 +41,7 @@

      Summary Statistics

      {% for column_slug, column_header in summary_stats.columns %} - From b177f208818930626b1ab6e9d135938ff2561ee8 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sun, 20 Nov 2022 10:11:18 +0000 Subject: [PATCH 228/375] Make heading names more consistent --- comprehensiveness.py | 4 ++-- templates/comprehensiveness_base.html | 4 ++-- templates/comprehensiveness_core.html | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/comprehensiveness.py b/comprehensiveness.py index a4f960590a..5cc1a147f6 100644 --- a/comprehensiveness.py +++ b/comprehensiveness.py @@ -13,8 +13,8 @@ ], 'core': [ ('version', 'Version', 1), - ('reporting-org', 'Reporting-Org', 1), - ('iati-identifier', 'Iati-identifier', 1), + ('reporting-org', 'Reporting Organisation', 1), + ('iati-identifier', 'IATI Identifier', 1), ('participating-org', 'Participating Organisation', 1), ('title', 'Title', 1), ('description', 'Description', 1), diff --git a/templates/comprehensiveness_base.html b/templates/comprehensiveness_base.html index 08e29a4734..97dd39c7b9 100644 --- a/templates/comprehensiveness_base.html +++ b/templates/comprehensiveness_base.html @@ -233,13 +233,13 @@

      Pseudocode

      - + - + diff --git a/templates/comprehensiveness_core.html b/templates/comprehensiveness_core.html index c8c00ede6c..f0417f9dae 100644 --- a/templates/comprehensiveness_core.html +++ b/templates/comprehensiveness_core.html @@ -3,7 +3,7 @@ {% block heading_detail %} -

      Core elements are those that are mandatory in version 2.01 of the IATI Activity standard. The core elements are: Version, Reporting-Organisation, IATI-identifier, Participating Organisation, Title, Description, Status, Activity Date, Sector, and Country or Region.

      +

      Core elements are those that are mandatory in version 2.01 of the IATI Activity standard. The core elements are: Version, Reporting Organisation, IATI Identifier, Participating Organisation, Title, Description, Status, Activity Date, Sector, and Country or Region.

      This table shows the percentage of current activities where the core elements are populated with valid data. (Values in parentheses indicate percentage of activities where elements are populated with any data.) The scoring for the Summary Stats page recognises the importance of the core by giving it double weighting in the overall comprehensiveness component.

      @@ -23,10 +23,10 @@

      Details

      Version

      Percentage of all current activities which contain a valid version number in the <iati-activities> file header element.

      -
      Reporting-Org
      +
      Reporting Organisation

      Percentage of all current activities which contain both a name and an identifier for the reporting organisation. (In future this will also check that the identifier contains a valid prefix identifying a registration agency.)

      -
      Iati-identifier
      +
      IATI Identifier

      Percentage of all current activities that contain a valid activity identifier. This MUST be prefixed with either the identifier reported for the reporting organisation, or (if publishing at v2.xx) an identifier reported in the <other-identifier> element. (In future this will also check that each identifier is globally unique.)

      Participating Organisation
      From afd96cf89ae0b2d62bca6ada0f304df89c22e0e9 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sun, 20 Nov 2022 10:40:08 +0000 Subject: [PATCH 229/375] Add a workflow dispatch trigger --- .github/workflows/build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ba28cbfedd..1e4aa61e9a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -3,6 +3,7 @@ on: push: branches: - dev + workflow_dispatch: # for triggering builds manually repository_dispatch: types: - build From c5798949f5d5b870f18390578e9bd1c9fac52fa4 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sun, 20 Nov 2022 13:29:10 +0000 Subject: [PATCH 230/375] Rename function; document --- make_html.py | 12 +++++++----- templates/comprehensiveness_base.html | 4 ++-- templates/forwardlooking.html | 2 +- templates/humanitarian.html | 4 +--- templates/summary_stats.html | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/make_html.py b/make_html.py index 461c81e769..62de6571f1 100644 --- a/make_html.py +++ b/make_html.py @@ -71,10 +71,12 @@ def firstint(s): return int(m.group(0)) -def pretty_float(f, ndigits=2): - if int(f) == f: - return int(f) - return round(f, ndigits) +def round_nicely(val, ndigits=2): + """ Round a float, but remove the trailing .0 from integers that python insists on + """ + if int(val) == float(val): + return int(val) + return round(float(val), ndigits) def xpath_to_url(path): @@ -116,7 +118,7 @@ def get_codelist_values(codelist_values_for_element): app.jinja_env.filters['url_to_filename'] = lambda x: x.rstrip('/').split('/')[-1] app.jinja_env.filters['dataset_to_publisher'] = dataset_to_publisher app.jinja_env.filters['has_future_transactions'] = timeliness.has_future_transactions -app.jinja_env.filters['pretty_float'] = pretty_float +app.jinja_env.filters['round_nicely'] = round_nicely # Custom Jinja globals app.jinja_env.globals['url'] = lambda x: x diff --git a/templates/comprehensiveness_base.html b/templates/comprehensiveness_base.html index 97dd39c7b9..40bd9d6e2b 100644 --- a/templates/comprehensiveness_base.html +++ b/templates/comprehensiveness_base.html @@ -59,9 +59,9 @@

      {% block table_title %}Table of Comprehensiveness values

      {% for column_slug in comprehensiveness.column_slugs[tab] %} + ({{ row[column_slug] | round_nicely }}) {% endif %} {% else %}-{% endif %} {% endfor %} diff --git a/templates/forwardlooking.html b/templates/forwardlooking.html index 725042b730..2e1084a948 100644 --- a/templates/forwardlooking.html +++ b/templates/forwardlooking.html @@ -68,7 +68,7 @@

      Activities with Forward Looking Budget Allocations

      {% for column in row.year_columns %} {% for year in forwardlooking.years %}
      {% endfor %} {% endfor %} diff --git a/templates/humanitarian.html b/templates/humanitarian.html index 978f1d408f..71bf8a0697 100644 --- a/templates/humanitarian.html +++ b/templates/humanitarian.html @@ -40,10 +40,8 @@

      Humanitarian

      {% endfor %} diff --git a/templates/summary_stats.html b/templates/summary_stats.html index f2d9f006fa..60554c91bb 100644 --- a/templates/summary_stats.html +++ b/templates/summary_stats.html @@ -41,7 +41,7 @@

      Summary Statistics

      {% for column_slug, column_header in summary_stats.columns %} - From 3a9ad6a7e42456bed24d91fd78be147d17f5083c Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sun, 20 Nov 2022 14:01:59 +0000 Subject: [PATCH 231/375] Move table sorter instructions --- templates/files.html | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/templates/files.html b/templates/files.html index a01b06b9f5..8bca028381 100644 --- a/templates/files.html +++ b/templates/files.html @@ -15,7 +15,6 @@ (J)

      File Sizes

      - {% include '_partials/tablesorter_instructions.html' %}
      ValuePublishers
      ValueNamePublishers
      {{ value }} +
      {{ value }}{{ codelist_lookup[major_version].get(codelist_mapping[major_version].get(element))[value]['name'] }} {{ publishers|length }}
      - {{ column[year] }} + {% if column[year] != '-' %}{{ column[year] | round(2) }}{% else %}-{% endif %}
      {{ row.publisher_title }}{{ row[column_slug] }} + {% if column_slug == "publisher_type" %}{{ row[column_slug] }}{% else %}{{ row[column_slug] | round(2) }}{% endif %} {% endfor %}
      {%- if column_slug == 'publisher_type' -%} {{ row[column_slug] }} - {%- else -%} + {%- elif column_slug in ['num_activities', 'publishing_humanitarian'] -%} {{ row[column_slug]|int }} + {%- else -%} + {{ row[column_slug]|round(2) }} {%- endif -%} {{ row.publisher_title }}{% if column_slug in row %} - {{ row[column_slug+'_valid'] }} + {{ row[column_slug+'_valid'] | round(2) }} {% if row[column_slug+'_valid'] != row[column_slug] %} - ({{ row[column_slug] }}){{ row.publisher_title }}{% if column_slug in row %} - {{ row[column_slug+'_valid'] | round(2) }} + {{ row[column_slug+'_valid'] | pretty_float }} {% if row[column_slug+'_valid'] != row[column_slug] %} - ({{ row[column_slug] | round(2) }}) - {% if column[year] != '-' %}{{ column[year] | round(2) }}{% else %}-{% endif %} + {% if column[year] != '-' %}{{ column[year] | pretty_float }}{% else %}-{% endif %}
      {{ row.publisher_title }}{% if column_slug == "publisher_type" %}{{ row[column_slug] }}{% else %}{{ row[column_slug] | round(2) }}{% endif %} + {% if column_slug == "publisher_type" %}{{ row[column_slug] }}{% else %}{{ row[column_slug] | pretty_float }}{% endif %} {% endfor %}
      CoreReporting-OrgReporting Organisation reporting-org/@ref and must exist and reporting-org must have textual content
      CoreIati-identifierIATI Identifier Activity must contain a nonempty iati-identifier If publishing at v2.xx, the iati-identifier must start with either a) the value in reporting-org/@ref or b) a value in other-identifier/@ref where @type="B1" (i.e. 'Previous Reporting Organisation Identifier'). No validation check is applied for v1.xx data, as it only became possible to add other-identifier/@type="B1" from v2.01.
      {{ row.publisher_title }}{% if column_slug in row %} - {{ row[column_slug+'_valid'] | pretty_float }} + {{ row[column_slug+'_valid'] | round_nicely }} {% if row[column_slug+'_valid'] != row[column_slug] %} - ({{ row[column_slug] | pretty_float }}) - {% if column[year] != '-' %}{{ column[year] | pretty_float }}{% else %}-{% endif %} + {% if column[year] != '-' %}{{ column[year] | round_nicely }}{% else %}-{% endif %} {%- if column_slug == 'publisher_type' -%} {{ row[column_slug] }} - {%- elif column_slug in ['num_activities', 'publishing_humanitarian'] -%} - {{ row[column_slug]|int }} {%- else -%} - {{ row[column_slug]|pretty_float }} + {{ row[column_slug]|round_nicely }} {%- endif -%}
      {{ row.publisher_title }}{% if column_slug == "publisher_type" %}{{ row[column_slug] }}{% else %}{{ row[column_slug] | pretty_float }}{% endif %} + {% if column_slug == "publisher_type" %}{{ row[column_slug] }}{% else %}{{ row[column_slug] | round_nicely }}{% endif %} {% endfor %}
      {% for bin,freq in sorted(current_stats.aggregated.file_size_bins.items(), key=firstint) %} @@ -33,6 +32,9 @@

      File Sizes

      +
      + {% include '_partials/tablesorter_instructions.html' %} +
      From 9aca67db2690bb734aee69643aab71c128d4db36 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sun, 20 Nov 2022 14:37:22 +0000 Subject: [PATCH 232/375] Add an explanatory line to the table --- templates/files.html | 1 + 1 file changed, 1 insertion(+) diff --git a/templates/files.html b/templates/files.html index 8bca028381..f33674359e 100644 --- a/templates/files.html +++ b/templates/files.html @@ -33,6 +33,7 @@

      File Sizes

      +

      Total file size by publisher

      {% include '_partials/tablesorter_instructions.html' %}
      From b83504057c9edfba74058d37494b828f20287bfa Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sun, 20 Nov 2022 17:48:19 +0000 Subject: [PATCH 233/375] Commify numbers --- templates/_partials/boxes.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/_partials/boxes.html b/templates/_partials/boxes.html index 80c8e443d0..11f246bb00 100644 --- a/templates/_partials/boxes.html +++ b/templates/_partials/boxes.html @@ -4,7 +4,7 @@

      {% set title_id = title.replace(' ', '-').lower() %} - {{ number }} + {% if number %}{{ "{:,}".format(number) }}{% endif %} {{ title }}

      {% if json %} From 9f25fca5de74916161f881826d9eb0b5a86234f5 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sun, 20 Nov 2022 18:16:25 +0000 Subject: [PATCH 234/375] Add some missing legends --- templates/versions.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/versions.html b/templates/versions.html index bbf8c93e22..ae6d3804a2 100644 --- a/templates/versions.html +++ b/templates/versions.html @@ -6,9 +6,9 @@ {{ boxes.box('Files per version (other)', '', 'versions_other.png', 'versions.json', 'versions_other_legend.png', description='Count of files per other versions, over time. These values do not actually exist as IATI versions.') }}
      - {{ boxes.box('Publishers per version (expected)', '', 'publishers_per_version_expected.png', 'publishers_per_version.json', + {{ boxes.box('Publishers per version (expected)', '', 'publishers_per_version_expected.png', 'publishers_per_version.json', 'versions_expected_legend.png', description='Count of publishers per IATI version, over time. Note: If a publisher utilises two or more versions, they are counted for each.') }} - {{ boxes.box('Publishers per version (other)', '', 'publishers_per_version_other.png', 'publishers_per_version.json', + {{ boxes.box('Publishers per version (other)', '', 'publishers_per_version_other.png', 'publishers_per_version.json', 'versions_other_legend.png', description='Count of publishers per other version, over time') }}
      From a220bdd57953247f19cc29cb7e2b03a058f35159 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sun, 20 Nov 2022 18:17:17 +0000 Subject: [PATCH 235/375] Have a go at switching to chartjs --- templates/_partials/boxes.html | 37 +++++++++++++++++++++++++++++++--- templates/base.html | 3 +++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/templates/_partials/boxes.html b/templates/_partials/boxes.html index 80c8e443d0..0d0a41f250 100644 --- a/templates/_partials/boxes.html +++ b/templates/_partials/boxes.html @@ -14,9 +14,40 @@

      {{ description|safe }}

      - - {% if legend %} - + {% if json and not legend %} + + + {% else %} + + {% if legend %} + + {% endif %} {% endif %}
      diff --git a/templates/base.html b/templates/base.html index 40479ca3f4..5ba66f4cc1 100644 --- a/templates/base.html +++ b/templates/base.html @@ -97,6 +97,9 @@ {% block title %}Code for IATI Analytics - {{ page_titles[page] }}{% endblock %} {% block extrahead %}{% endblock %} + + +
      From c8a8054b8faea3e59bf2c05ba7e7b9addfa5cd83 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sun, 20 Nov 2022 19:00:05 +0000 Subject: [PATCH 236/375] Check if number is actually a number --- templates/_partials/boxes.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/_partials/boxes.html b/templates/_partials/boxes.html index 11f246bb00..9d300568c9 100644 --- a/templates/_partials/boxes.html +++ b/templates/_partials/boxes.html @@ -4,7 +4,7 @@

      {% set title_id = title.replace(' ', '-').lower() %} - {% if number %}{{ "{:,}".format(number) }}{% endif %} + {% if number is number %}{{ "{:,}".format(number) }}{% else %}{{ number }}{% endif %} {{ title }}

      {% if json %} From 0f875d09b60b475e60581b497db71a3ad5a97665 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sun, 20 Nov 2022 19:02:04 +0000 Subject: [PATCH 237/375] Fix legend test --- templates/_partials/boxes.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/_partials/boxes.html b/templates/_partials/boxes.html index 0d0a41f250..d0177e722b 100644 --- a/templates/_partials/boxes.html +++ b/templates/_partials/boxes.html @@ -14,7 +14,7 @@

      {{ description|safe }}

      - {% if json and not legend %} + {% if json and legend == "" %} -{% block tablesorterscript %}{% endblock %} +{% block tablesorterscript %}{% endblock %} {% block extrafooter %}{% endblock %} From c14977ab9ab60d341336b628bba477df94d96a39 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 1 Dec 2022 08:28:06 +0000 Subject: [PATCH 255/375] rst -> md --- CONTRIBUTING.rst => CONTRIBUTING.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename CONTRIBUTING.rst => CONTRIBUTING.md (100%) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.md similarity index 100% rename from CONTRIBUTING.rst rename to CONTRIBUTING.md From 7206e67a3f56740484a27fc65bdd22ab21946c42 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 1 Dec 2022 08:29:45 +0000 Subject: [PATCH 256/375] Markdownify --- CONTRIBUTING.md | 49 ++++++++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 85105c13ea..38ab6d9608 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -5,63 +5,58 @@ If you would like to contribute to the Code for IATI Analytics project, you can. * Request new features * Contribute code or documents to improve the application. See the list of specific tasks below. -How to report a bug or request a feature -======================================== +## How to report a bug or request a feature + If you are able to work with GitHub then please "Create an issue" Before creating a new issue check to see if the issue already exists. If not then please do report it. If you are not comfortable working with GitHub, but would still like to contribute, then talk to us. Details at: https://github.com/codeforIATI +## How to contribute code and documents -How to contribute code and documents -==================================== - -How we use branches in this repository --------------------------------------- +### How we use branches in this repository * master represents our main development branch * live is the branch we are currently using for our deployed instance of the code * Other branches represent development work or bug fixes. -Submitting changes ------------------- +### Submitting changes * Fork this repository (if you haven't previously) * Make sure you're working on top of an up to date copy of IATI's master branch - - Create a branch named after the work you're doing (if you're targeting a specific issue, start the branch name with the issue number e.g. ``42-feature-name``) + - Create a branch named after the work you're doing (if you're targeting a specific issue, start the branch name with the issue number e.g. `42-feature-name`) * Do your work - - If your work addresses a specific issue, reference that issue in your commit message by starting the commit message with ``[#issue number]`` e.g. ``[#64]`` + - If your work addresses a specific issue, reference that issue in your commit message by starting the commit message with `[#issue number]` e.g. `[#64]` * Create a pull request -Specific Tasks: -=============== +## Specific Tasks: + +### Deployment -Deployment ----------- If you find any issues in deploying your own version of the code we'd love to hear about it and try to improve our deployment documentation. -User Interface --------------- +### User Interface + Can you improve the user interface? Can you make it look 'nicer'? -Documentation -------------- +### Documentation + We would welcome any improvements to how the code or the application is documented. -Statistics ----------- +### Statistics + Can you check that the generated statstics are telling us what we think they are telling us? Do you have other tests/statistics that we could be generating? -Unit Tests ----------- +### Unit Tests + Can you improve the unit testing to make deployment more robust? -Fix a Bitesize issue --------------------- +### Fix a Bitesize issue + We mark some of issues as 'Bitesize'. Generally these will help ease you into the code and help you find your way around. -Talk to us -========== +## Talk to us + We'd love to hear from you. Details at: https://github.com/codeforIATI From 854b98aaad5d74e57d337dceff904d7e114451dc Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 1 Dec 2022 08:32:33 +0000 Subject: [PATCH 257/375] Update contributing instructions --- CONTRIBUTING.md | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 38ab6d9608..dc84b1fff4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -7,28 +7,27 @@ If you would like to contribute to the Code for IATI Analytics project, you can. ## How to report a bug or request a feature -If you are able to work with GitHub then please "Create an issue" +If you are able to work with GitHub then please [create an issue](https://github.com/codeforIATI/analytics/issues/new/choose). -Before creating a new issue check to see if the issue already exists. If not then please do report it. +Before creating a new issue check to see if the issue already exists. If not then please do create it. -If you are not comfortable working with GitHub, but would still like to contribute, then talk to us. Details at: https://github.com/codeforIATI +If you are not comfortable working with GitHub, but would still like to contribute, then talk to us. Details at: https://codeforiati.org/get-involved/ ## How to contribute code and documents ### How we use branches in this repository -* master represents our main development branch -* live is the branch we are currently using for our deployed instance of the code +* `main` represents our main development branch, and is the branch we are currently using for our deployed instance of the code * Other branches represent development work or bug fixes. ### Submitting changes * Fork this repository (if you haven't previously) -* Make sure you're working on top of an up to date copy of IATI's master branch +* Make sure you're working on top of an up to date copy of the `main` branch - Create a branch named after the work you're doing (if you're targeting a specific issue, start the branch name with the issue number e.g. `42-feature-name`) * Do your work - If your work addresses a specific issue, reference that issue in your commit message by starting the commit message with `[#issue number]` e.g. `[#64]` -* Create a pull request +* Create a pull request against `main` ## Specific Tasks: @@ -59,4 +58,4 @@ We mark some of issues as 'Bitesize'. Generally these will help ease you into th ## Talk to us -We'd love to hear from you. Details at: https://github.com/codeforIATI +We'd love to hear from you. Details at: https://codeforiati.org/get-involved/ From f93a6b3a406c7bee288416c6e7585fd439001b7a Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 1 Dec 2022 09:04:25 +0000 Subject: [PATCH 258/375] Fix homepage title text --- templates/index.html | 1 + 1 file changed, 1 insertion(+) diff --git a/templates/index.html b/templates/index.html index a1924a3c82..5e5fe3f608 100644 --- a/templates/index.html +++ b/templates/index.html @@ -1,4 +1,5 @@ {% extends 'section_index.html' %} +{% block title %}Code for IATI Analytics{% endblock %} {% block about %}

      These analytics are generated daily, with the last update based on data in the IATI Registry on {{ datetime_data }}. For more information, see the FAQ.

      Many of the tables are sortable by clicking on the headers.

      From 31a965ea7925f9642fff9aff6fc228f41ca7f89d Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 1 Dec 2022 09:04:32 +0000 Subject: [PATCH 259/375] Reorder title text --- templates/base.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/base.html b/templates/base.html index 59b8c20b24..4d8b00c36e 100644 --- a/templates/base.html +++ b/templates/base.html @@ -7,7 +7,7 @@ - {% block title %}Code for IATI Analytics - {{ page_titles[page] }}{% endblock %} + {% block title %}{{ page_titles[page] }} – Code for IATI Analytics{% endblock %} {% block extrahead %}{% endblock %} From 7339b88b97e8918aacf59063bb95969f3afa0f86 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 1 Dec 2022 09:23:13 +0000 Subject: [PATCH 260/375] Revert "Have a go at switching to chartjs" This reverts commit a220bdd57953247f19cc29cb7e2b03a058f35159. --- templates/_partials/boxes.html | 37 +++------------------------------- templates/base.html | 3 --- 2 files changed, 3 insertions(+), 37 deletions(-) diff --git a/templates/_partials/boxes.html b/templates/_partials/boxes.html index 2344f50197..9d300568c9 100644 --- a/templates/_partials/boxes.html +++ b/templates/_partials/boxes.html @@ -14,40 +14,9 @@

      {{ description|safe }}

      - {% if json and legend == "" %} - - - {% else %} - - {% if legend %} - - {% endif %} + + {% if legend %} + {% endif %}
      diff --git a/templates/base.html b/templates/base.html index 4d8b00c36e..edea61e14e 100644 --- a/templates/base.html +++ b/templates/base.html @@ -10,9 +10,6 @@ {% block title %}{{ page_titles[page] }} – Code for IATI Analytics{% endblock %} {% block extrahead %}{% endblock %} - - -
      From 9ad20b28f1d9087621ee22f33e1a9be709a40540 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 1 Dec 2022 10:14:27 +0000 Subject: [PATCH 261/375] =?UTF-8?q?Don=E2=80=99t=20redirect=20from=20/=20t?= =?UTF-8?q?o=20/index.html?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- make_html.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/make_html.py b/make_html.py index 89a1582621..ff70f8f8e7 100644 --- a/make_html.py +++ b/make_html.py @@ -8,7 +8,7 @@ import subprocess from collections import defaultdict -from flask import Flask, render_template, redirect, abort, Response, send_from_directory +from flask import Flask, render_template, abort, Response, send_from_directory import pytz import licenses @@ -231,7 +231,11 @@ def download_errors_json(): return Response(json.dumps(current_stats['download_errors'], indent=2), mimetype='application/json'), -app.add_url_rule('/', 'index_redirect', lambda: redirect('index.html')) +@app.route('/') +def homepage(): + return render_template('index.html', page='index') + + app.add_url_rule('/licenses.html', 'licenses', licenses.main) app.add_url_rule('/license/.html', 'licenses_individual_license', licenses.individual_license) From 32cb2f62baed9375219416dcf336350fa249658c Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 1 Dec 2022 14:26:49 +0000 Subject: [PATCH 262/375] Revert "Rejig some files" This reverts commit 13a7d107184d3988c62252dde60f96709dfec093. --- .github/workflows/build.yml | 4 +--- templates/_partials/tablesorter_instructions.html | 2 +- templates/base.html | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 842752e6d1..1e4aa61e9a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -42,9 +42,7 @@ jobs: - name: Delete files run: rm -rf data stats-calculated - name: Add static files to output - run: | - mv static/favicon.ico out - cp -r static out + run: cp -r static/* out - name: Deploy (production) 🚀 if: github.ref == 'refs/heads/main' uses: JamesIves/github-pages-deploy-action@3.7.1 diff --git a/templates/_partials/tablesorter_instructions.html b/templates/_partials/tablesorter_instructions.html index eaccab344d..348d0f3f39 100644 --- a/templates/_partials/tablesorter_instructions.html +++ b/templates/_partials/tablesorter_instructions.html @@ -1 +1 @@ -

      Click the icons to sort the table by a column. Selecting further columns whilst holding the shift key will enable secondary (tertiary etc) sorting by the desired column/s.

      +

      Click the icons to sort the table by a column. Selecting further columns whilst holding the shift key will enable secondary (tertiary etc) sorting by the desired column/s.

      \ No newline at end of file diff --git a/templates/base.html b/templates/base.html index edea61e14e..1ddce9d6b3 100644 --- a/templates/base.html +++ b/templates/base.html @@ -5,7 +5,7 @@ - + {% block title %}{{ page_titles[page] }} – Code for IATI Analytics{% endblock %} From eca2e05fe1bb443d6d211562696197eacf5605d4 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 1 Dec 2022 14:28:49 +0000 Subject: [PATCH 263/375] Fix links to homepage --- make_html.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/make_html.py b/make_html.py index ff70f8f8e7..6b0b085ebe 100644 --- a/make_html.py +++ b/make_html.py @@ -120,7 +120,7 @@ def get_codelist_values(codelist_values_for_element): app.jinja_env.filters['round_nicely'] = round_nicely # Custom Jinja globals -app.jinja_env.globals['url'] = lambda x: x +app.jinja_env.globals['url'] = lambda x: '/' if x == 'index.html' else x app.jinja_env.globals['datetime_generated'] = lambda: datetime.utcnow().replace(tzinfo=pytz.utc).strftime('%-d %B %Y (at %H:%M %Z)') app.jinja_env.globals['datetime_data'] = date_time_data_obj.strftime('%-d %B %Y (at %H:%M %Z)') app.jinja_env.globals['commit_hash'] = subprocess.run( @@ -163,7 +163,6 @@ def get_codelist_values(codelist_values_for_element): app.jinja_env.globals['is_valid_element'] = is_valid_element basic_page_names = [ - 'index', 'headlines', 'data_quality', 'exploring_data', From d6d80a60d8ef4ca731bf66a9c3918ce7db6716d4 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 1 Dec 2022 16:52:56 +0000 Subject: [PATCH 264/375] Serve all static files through dev server --- make_html.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/make_html.py b/make_html.py index 6b0b085ebe..355426495e 100644 --- a/make_html.py +++ b/make_html.py @@ -316,10 +316,10 @@ def registration_agencies(): nonmatching=nonmatching) -# Serve favicon through the development server (--live) -@app.route('/favicon.ico') -def favicon_development(): - return send_from_directory('static', 'favicon.ico') +# Serve static files through the development server (--live) +@app.route('/') +def favicon_development(filename): + return send_from_directory('static', filename) @app.route('/.csv') From 4d2852296817d2206ac86d48b28502a1ec28c644 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 1 Dec 2022 16:53:10 +0000 Subject: [PATCH 265/375] Fix path to IATI-Stats-public --- make_html.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/make_html.py b/make_html.py index 355426495e..01474c7d56 100644 --- a/make_html.py +++ b/make_html.py @@ -126,8 +126,11 @@ def get_codelist_values(codelist_values_for_element): app.jinja_env.globals['commit_hash'] = subprocess.run( 'git show --format=%H --no-patch'.split(), capture_output=True).stdout.decode().strip() +app.jinja_env.globals['stats_commit_hash'] = subprocess.run( + 'git -C stats-calculated show --format=%H --no-patch'.split(), + capture_output=True).stdout.decode().strip() app.jinja_env.globals['stats_url'] = 'https://stats.codeforiati.org' -app.jinja_env.globals['stats_gh_url'] = 'https://github.com/codeforIATI/IATI-Stats-public/tree/gh-pages' +app.jinja_env.globals['stats_gh_url'] = 'https://github.com/codeforIATI/IATI-Stats-public/tree/' + app.jinja_env.globals['stats_commit_hash'] app.jinja_env.globals['sorted'] = sorted app.jinja_env.globals['enumerate'] = enumerate app.jinja_env.globals['top_titles'] = text.top_titles From b052fee42d29b39167d2b213e2fe6033a44bc298 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 1 Dec 2022 17:23:40 +0000 Subject: [PATCH 266/375] Fix call to send_from_directory --- make_html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/make_html.py b/make_html.py index 01474c7d56..711cdbfb9a 100644 --- a/make_html.py +++ b/make_html.py @@ -332,7 +332,7 @@ def csv_development(name): @app.route('/publisher_imgs/.png') def image_development_publisher(image): - return send_from_directory('out', 'publisher_imgs', image + '.png') + return send_from_directory('out/publisher_imgs', image + '.png') if __name__ == '__main__': From 4a60da1623f48510f47916f020a598803b20bb4d Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 1 Dec 2022 17:24:05 +0000 Subject: [PATCH 267/375] Fix title formatting --- templates/codelist.html | 2 +- templates/element.html | 2 +- templates/publisher.html | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/templates/codelist.html b/templates/codelist.html index 3926f9f8ce..6cd468f7a3 100644 --- a/templates/codelist.html +++ b/templates/codelist.html @@ -1,7 +1,7 @@ {% extends 'base.html' %} {% import '_partials/boxes.html' as boxes %} {% block title %} -{{ super () }} - Codelists +Codelists {{ super () }} {% endblock %} {% block page_header %} diff --git a/templates/element.html b/templates/element.html index bbc83a27e3..d2b6961461 100644 --- a/templates/element.html +++ b/templates/element.html @@ -1,7 +1,7 @@ {% extends 'base.html' %} {% import '_partials/boxes.html' as boxes %} {% block title %} -{{ super () }} - Elements +Elements {{ super () }} {% endblock %} {% block page_header %} diff --git a/templates/publisher.html b/templates/publisher.html index 1fdb6c3353..8a7f98a59d 100644 --- a/templates/publisher.html +++ b/templates/publisher.html @@ -1,7 +1,7 @@ {% extends 'base.html' %} {% import '_partials/boxes.html' as boxes %} {% block title %} -{{ super () }} Publisher: {{ publisher_name[publisher] }} +Publisher: {{ publisher_name[publisher] }} {{ super () }} {% endblock %} {% block page_header %} (Publisher Stats JSON) From 78ec12a881573e85096f65e978a380696248f12c Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 1 Dec 2022 18:59:12 +0000 Subject: [PATCH 268/375] Remove unnecessary blocks --- templates/codelist.html | 3 --- templates/element.html | 3 --- templates/xml.html | 6 ------ 3 files changed, 12 deletions(-) diff --git a/templates/codelist.html b/templates/codelist.html index 6cd468f7a3..080e5bf132 100644 --- a/templates/codelist.html +++ b/templates/codelist.html @@ -1,8 +1,5 @@ {% extends 'base.html' %} {% import '_partials/boxes.html' as boxes %} -{% block title %} -Codelists {{ super () }} -{% endblock %} {% block page_header %}

      Codelist values used for {{ element }}

      diff --git a/templates/element.html b/templates/element.html index d2b6961461..c59840cefb 100644 --- a/templates/element.html +++ b/templates/element.html @@ -1,8 +1,5 @@ {% extends 'base.html' %} {% import '_partials/boxes.html' as boxes %} -{% block title %} -Elements {{ super () }} -{% endblock %} {% block page_header %}

      Usage of {{ element }}

      diff --git a/templates/xml.html b/templates/xml.html index 4ab7a10b32..e8e98436be 100644 --- a/templates/xml.html +++ b/templates/xml.html @@ -1,13 +1,7 @@ {% extends 'base.html' %} {% import '_partials/boxes.html' as boxes %} -{% block page_header %} -{{ super () }} - -{% endblock %} - {% block content %} -
      {{ boxes.box('Files where XML is not well-formed', current_stats.aggregated.invalidxml, 'invalidxml.png', 'invalidxml.json', description='Count of files where the XML that is not well-formed, over time. Note: this is different from validation against the schema.') }} From 462c3e1919532a71cd8c5a7f755ea830013e8335 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 1 Dec 2022 18:59:21 +0000 Subject: [PATCH 269/375] Remove whitespace --- templates/index.html | 2 +- templates/publisher.html | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/index.html b/templates/index.html index 5e5fe3f608..c55691f138 100644 --- a/templates/index.html +++ b/templates/index.html @@ -6,7 +6,7 @@

      Many of the datasets are available in machine readable JSON format. Some links to JSON are abbreviated to (J).

      {% endblock %} {% block lhs_column %} -{{ super () }} +{{ super() }}
      diff --git a/templates/publisher.html b/templates/publisher.html index 8a7f98a59d..061b54d60d 100644 --- a/templates/publisher.html +++ b/templates/publisher.html @@ -1,7 +1,7 @@ {% extends 'base.html' %} {% import '_partials/boxes.html' as boxes %} {% block title %} -Publisher: {{ publisher_name[publisher] }} {{ super () }} +Publisher: {{ publisher_name[publisher] }} {{ super() }} {% endblock %} {% block page_header %} (Publisher Stats JSON) From 5f62c2105e296fce52444d930bcc5dd409ed6950 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 1 Dec 2022 19:18:06 +0000 Subject: [PATCH 270/375] Remove this inline CSS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It’s superfluous --- templates/comprehensiveness_base.html | 4 ++-- templates/forwardlooking.html | 4 ++-- templates/humanitarian.html | 4 ++-- templates/summary_stats.html | 4 ++-- templates/timeliness.html | 6 +++--- templates/timeliness_timelag.html | 4 ++-- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/templates/comprehensiveness_base.html b/templates/comprehensiveness_base.html index 40bd9d6e2b..0f4b28fbf4 100644 --- a/templates/comprehensiveness_base.html +++ b/templates/comprehensiveness_base.html @@ -47,9 +47,9 @@

      {% block table_title %}Table of Comprehensiveness values

      - + {% for column_header in comprehensiveness.column_headers[tab] %} - + {% endfor %} diff --git a/templates/forwardlooking.html b/templates/forwardlooking.html index 2e1084a948..0e67b78e4a 100644 --- a/templates/forwardlooking.html +++ b/templates/forwardlooking.html @@ -46,9 +46,9 @@

      Activities with Forward Looking Budget Allocations

      Publisher NamePublisher Name{{ column_header }}{{ column_header }}
      - + {% for column_header in forwardlooking.column_headers %} - + {% endfor %} diff --git a/templates/humanitarian.html b/templates/humanitarian.html index 71bf8a0697..be1367c747 100644 --- a/templates/humanitarian.html +++ b/templates/humanitarian.html @@ -26,9 +26,9 @@

      Humanitarian

      Publisher NamePublisher Name{{ column_header }}{{ column_header }}
      - + {% for _, column_header in humanitarian.columns %} - + {% endfor %} diff --git a/templates/summary_stats.html b/templates/summary_stats.html index 60554c91bb..bf82b36833 100644 --- a/templates/summary_stats.html +++ b/templates/summary_stats.html @@ -30,9 +30,9 @@

      Summary Statistics

      Publisher NamePublisher Name{{ column_header }}{{ column_header }}
      - + {% for column_slug, column_header in summary_stats.columns %} - + {% endfor %} diff --git a/templates/timeliness.html b/templates/timeliness.html index 3feb7f17cd..c5354d8cf3 100644 --- a/templates/timeliness.html +++ b/templates/timeliness.html @@ -28,12 +28,12 @@

      Table of Frequency assessments

      Publisher NamePublisher Name{{ column_header }}{{ column_header }}
      - {% for month in timeliness.previous_months_reversed %} diff --git a/templates/timeliness_timelag.html b/templates/timeliness_timelag.html index bb013751ad..5056bbd00f 100644 --- a/templates/timeliness_timelag.html +++ b/templates/timeliness_timelag.html @@ -26,11 +26,11 @@

      Table of Time lag assessments

      Publisher Name - First published + Publisher Name + First published {{ timeliness.this_year-1 }} {{ timeliness.this_year }} - Frequency + Frequency
      - {% for month in timeliness.previous_months_reversed %} From 89191d80c0deeb34a5cb4e75c688620df2589459 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Thu, 1 Dec 2022 23:11:06 +0000 Subject: [PATCH 271/375] Fix all summary stat table borders --- templates/comprehensiveness_base.html | 10 +++++----- templates/forwardlooking.html | 16 +++++++-------- templates/humanitarian.html | 10 +++++----- templates/summary_stats.html | 10 +++++----- templates/timeliness.html | 28 +++++++++++++-------------- templates/timeliness_timelag.html | 24 +++++++++++------------ 6 files changed, 49 insertions(+), 49 deletions(-) diff --git a/templates/comprehensiveness_base.html b/templates/comprehensiveness_base.html index 0f4b28fbf4..a66d55fa48 100644 --- a/templates/comprehensiveness_base.html +++ b/templates/comprehensiveness_base.html @@ -47,18 +47,18 @@

      {% block table_title %}Table of Comprehensiveness values

      Publisher Name + Publisher Name {{ timeliness.this_year-1 }} {{ timeliness.this_year }} - Time lag + Time lag
      - + {% for column_header in comprehensiveness.column_headers[tab] %} - + {% endfor %} {% for row in comprehensiveness.table() %} - - + + {% for column_slug in comprehensiveness.column_slugs[tab] %} - diff --git a/templates/forwardlooking.html b/templates/forwardlooking.html index 0e67b78e4a..6eab272e4e 100644 --- a/templates/forwardlooking.html +++ b/templates/forwardlooking.html @@ -46,33 +46,33 @@

      Activities with Forward Looking Budget Allocations

      Publisher NamePublisher Name{{ column_header }}{{ column_header }}
      {{ row.publisher_title }}
      {{ row.publisher_title }}{% if column_slug in row %} + {% if column_slug in row %} {{ row[column_slug+'_valid'] | round_nicely }} {% if row[column_slug+'_valid'] != row[column_slug] %} ({{ row[column_slug] | round_nicely }})
      - + {% for column_header in forwardlooking.column_headers %} - + {% endfor %} - {% for i in range(0,3) %} {% for year in forwardlooking.years %} - + {% endfor %} {% endfor %} {% for row in forwardlooking.table() %} - - + + {% for column in row.year_columns %} {% for year in forwardlooking.years %} - {% endfor %} {% endfor %} -
      Publisher NamePublisher Name{{ column_header }}{{ column_header }} +
      {{ year }}{{ year }}
      {{ row.publisher_title }}
      {{ row.publisher_title }} + {% if column[year] != '-' %}{{ column[year] | round_nicely }}{% else %}-{% endif %} * {%- elif row['budget_not_provided'] -%} diff --git a/templates/humanitarian.html b/templates/humanitarian.html index be1367c747..9f00da5cd1 100644 --- a/templates/humanitarian.html +++ b/templates/humanitarian.html @@ -26,18 +26,18 @@

      Humanitarian

      - + {% for _, column_header in humanitarian.columns %} - + {% endfor %} {% for row in humanitarian.table() %} - - + + {% for column_slug, _ in humanitarian.columns %} -
      Publisher NamePublisher Name{{ column_header }}{{ column_header }}
      {{ row.publisher_title }}
      {{ row.publisher_title }} + {%- if column_slug == 'publisher_type' -%} {{ row[column_slug] }} {%- else -%} diff --git a/templates/summary_stats.html b/templates/summary_stats.html index bf82b36833..088a9a0793 100644 --- a/templates/summary_stats.html +++ b/templates/summary_stats.html @@ -30,18 +30,18 @@

      Summary Statistics

      - + {% for column_slug, column_header in summary_stats.columns %} - + {% endfor %} {% for row in summary_stats.table() %} - - + + {% for column_slug, column_header in summary_stats.columns %} - diff --git a/templates/timeliness.html b/templates/timeliness.html index c5354d8cf3..7e4ad0c91f 100644 --- a/templates/timeliness.html +++ b/templates/timeliness.html @@ -28,31 +28,31 @@

      Table of Frequency assessments

      Publisher NamePublisher Name{{ column_header }}{{ column_header }}
      {{ row.publisher_title }}
      {{ row.publisher_title }}{% if column_slug == "publisher_type" %}{{ row[column_slug] }}{% else %}{{ row[column_slug] | round_nicely }}{% endif %} + {% if column_slug == "publisher_type" %}{{ row[column_slug] }}{% else %}{{ row[column_slug] | round_nicely }}{% endif %} {% endfor %}
      - {% for month in timeliness.previous_months_reversed %} - + {% endfor %} - + {% for publisher, publisher_title, per_month, assessment, hft, first_published_band in timeliness.publisher_frequency_sorted() %} - - - + + + {% for month in timeliness.previous_months_reversed %} {% endfor %} - - - + + + {% endfor %} diff --git a/templates/timeliness_timelag.html b/templates/timeliness_timelag.html index 5056bbd00f..b3ceeccf18 100644 --- a/templates/timeliness_timelag.html +++ b/templates/timeliness_timelag.html @@ -26,29 +26,29 @@

      Table of Time lag assessments

      Publisher Name - First published - {{ timeliness.this_year-1 }} - {{ timeliness.this_year }} - - Frequency + Publisher Name + First published + {{ timeliness.this_year-1 }} + {{ timeliness.this_year }} + + Frequency
      {{ timeliness.short_month(month) }}{{ timeliness.short_month(month) }}{{ timeliness.short_month(timeliness.this_month) }}{{ timeliness.short_month(timeliness.this_month) }}
      {{ publisher_title }}{{ first_published_band }}
      {{ publisher_title }}{{ first_published_band }}{{ per_month[month] or 0 }}{{ per_month[timeliness.this_month] or 0 }}{% if hft %}*{% endif %}{{ assessment }}{{ per_month[timeliness.this_month] or 0 }}{% if hft %}*{% endif %}{{ assessment }}
      - {% for month in timeliness.previous_months_reversed %} - + {% endfor %} - + {% for publisher, publisher_title, per_month, assessment, hft in timeliness.publisher_timelag_sorted() %} - - + + {% for month in timeliness.previous_months_reversed %} {% endfor %} - - - + + + {% endfor %} From 12eb0b63c88e58a81d42120f5c3ebea741e0f6fd Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Fri, 2 Dec 2022 09:15:40 +0000 Subject: [PATCH 272/375] Remove links to comments (there are no comments) --- templates/comprehensiveness_base.html | 2 -- templates/forwardlooking.html | 2 -- templates/summary_stats.html | 2 -- templates/timeliness_base.html | 2 -- 4 files changed, 8 deletions(-) diff --git a/templates/comprehensiveness_base.html b/templates/comprehensiveness_base.html index a66d55fa48..ae46696ce9 100644 --- a/templates/comprehensiveness_base.html +++ b/templates/comprehensiveness_base.html @@ -25,8 +25,6 @@
    • |
    • Comparison with original Global Partnership Indicator methodology
    • |
    • -
    • Comment
    • -
    • |
    • Pseudocode
    • diff --git a/templates/forwardlooking.html b/templates/forwardlooking.html index 6eab272e4e..d1a03109d6 100644 --- a/templates/forwardlooking.html +++ b/templates/forwardlooking.html @@ -14,8 +14,6 @@
    • |
    • Comparison with original Global Partnership Indicator methodology
    • |
    • -
    • Comment
    • -
    • |
    • Pseudocode
    • diff --git a/templates/summary_stats.html b/templates/summary_stats.html index 088a9a0793..3be45c9971 100644 --- a/templates/summary_stats.html +++ b/templates/summary_stats.html @@ -8,8 +8,6 @@
    • Narrative
    • |
    • Exceptions
    • -
    • |
    • -
    • Comment
    • diff --git a/templates/timeliness_base.html b/templates/timeliness_base.html index 5219f186e1..ab38af7f21 100644 --- a/templates/timeliness_base.html +++ b/templates/timeliness_base.html @@ -25,8 +25,6 @@
    • |
    • Comparison with original Global Partnership Indicator methodology
    • |
    • -
    • Comment
    • -
    • |
    • Pseudocode
    • From e6b71e008993825b87d13e0e2ea1fe8220a00008 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Fri, 2 Dec 2022 09:16:54 +0000 Subject: [PATCH 273/375] Smooth scroll --- static/style.css | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/static/style.css b/static/style.css index fe3f78cdc7..231cfb2f4b 100644 --- a/static/style.css +++ b/static/style.css @@ -1,3 +1,7 @@ +html { + scroll-behavior: smooth; +} + em { font-style: italic; } From 95d7fdfc4671f1c4f1712035aad03173cc124c38 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Fri, 2 Dec 2022 13:15:00 +0000 Subject: [PATCH 274/375] Use `url_for` everywhere we can --- templates/base.html | 9 +++++---- templates/element.html | 6 +++--- templates/publisher.html | 6 +++--- templates/section_index.html | 10 ++++------ templates/versions.html | 2 +- text.py | 2 +- 6 files changed, 17 insertions(+), 18 deletions(-) diff --git a/templates/base.html b/templates/base.html index 1ddce9d6b3..9e2bc589c1 100644 --- a/templates/base.html +++ b/templates/base.html @@ -26,15 +26,16 @@
      diff --git a/templates/element.html b/templates/element.html index c59840cefb..b8fe888aac 100644 --- a/templates/element.html +++ b/templates/element.html @@ -38,7 +38,7 @@

      Publishing this {{ element_or_attribute }}

      {% for publisher in sorted(publishers) %} - + {% with publisher_inverted=get_publisher_stats(publisher, 'inverted-file') %} {% endwith %} @@ -71,7 +71,7 @@

      Not publishing this {{ element_or_attribute }}

      {% for publisher in current_stats.inverted_publisher.publishers %} {% if publisher not in publishers %} - + @@ -98,7 +98,7 @@

      Files

      {% for publisher in current_stats.inverted_file_publisher %} {% with datasets = current_stats.inverted_file_publisher[publisher].elements.get(element) %} {% if datasets %} - {% with element_i=element_list.index(element) %} - + {% with codes=sorted(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)).intersection(values.keys())) %} {% with element_i=element_list.index(element) %} - + - + {% endwith %} {% endfor %} diff --git a/templates/section_index.html b/templates/section_index.html index 1abcf289bc..16fea03866 100644 --- a/templates/section_index.html +++ b/templates/section_index.html @@ -27,12 +27,10 @@

        {% for item in (top_navigation if page=='index' else navigation[navigation_reverse[page]]) %} - {% if item!='index' %} -
      • -

        {{ page_titles[item] }}

        -

        {{ page_leads[item]|safe }}

        -
      • - {% endif %} +
      • +

        {{ page_titles[item] }}

        +

        {{ page_leads[item]|safe }}

        +
      • {% endfor %}
      diff --git a/templates/versions.html b/templates/versions.html index ae6d3804a2..79fb906c1d 100644 --- a/templates/versions.html +++ b/templates/versions.html @@ -29,7 +29,7 @@

      Inconsistent versions

      {% for publisher in current_stats.inverted_file_publisher %} {% with datasets = current_stats.inverted_file_publisher[publisher].version_mismatch.get('true', {}) %} {% if datasets %} - {% endif %} {% endfor %} @@ -68,7 +68,7 @@

      Not On Codelist

      {% for value, publishers in values.items() %} {% if not value in codelist_sets[major_version].get(codelist_mapping[major_version].get(element)) %} {% endif %} {% endfor %} diff --git a/templates/codelists.html b/templates/codelists.html index cf687bf23f..9a408891bc 100644 --- a/templates/codelists.html +++ b/templates/codelists.html @@ -27,15 +27,15 @@

      Codelists for version {{ major_version }}.xx

      {% for i, (element, values) in enumerate(current_stats.inverted_publisher.codelist_values_by_major_version[major_version].items()) %} - + - + {% with codes=sorted(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)).intersection(get_codelist_values(values))) %} - + {% endwith %} {% with codes=sorted(set(get_codelist_values(values)).difference(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)))) %} - + {% endwith %} {% endfor %} diff --git a/templates/comprehensiveness_base.html b/templates/comprehensiveness_base.html index 2202991f08..c711e4c887 100644 --- a/templates/comprehensiveness_base.html +++ b/templates/comprehensiveness_base.html @@ -8,10 +8,10 @@ {% endblock %}
        @@ -54,7 +54,7 @@

        {% block table_title %}Table of Comprehensiveness values

      {% for row in comprehensiveness.table() %} - + {% for column_slug in comprehensiveness.column_slugs[tab] %} - + diff --git a/templates/download.html b/templates/download.html index 1d4c72589f..4f9f66e695 100644 --- a/templates/download.html +++ b/templates/download.html @@ -29,7 +29,7 @@ {% for code, publisher, dataset, url in current_stats.download_errors %} - + diff --git a/templates/elements.html b/templates/elements.html index 40f24fa32e..d027314142 100644 --- a/templates/elements.html +++ b/templates/elements.html @@ -6,8 +6,8 @@ @@ -39,8 +39,8 @@ {% for i, (element,publishers) in enumerate(current_stats.inverted_publisher.elements.items()) %} {% if is_valid_element(element) %} - - + + diff --git a/templates/files.html b/templates/files.html index f33674359e..e3a1e05cfa 100644 --- a/templates/files.html +++ b/templates/files.html @@ -49,7 +49,7 @@

      File Sizes

      {% for package, activities in current_stats.inverted_file.activities.items() %} - + diff --git a/templates/forwardlooking.html b/templates/forwardlooking.html index 28764d2950..fa1145b5c5 100644 --- a/templates/forwardlooking.html +++ b/templates/forwardlooking.html @@ -61,7 +61,7 @@

      Activities with Forward Looking Budget Allocations

      {% for row in forwardlooking.table() %} - + {% for column in row.year_columns %} {% for year in forwardlooking.years %} diff --git a/templates/humanitarian.html b/templates/humanitarian.html index 026b17e8d8..465d855cb2 100644 --- a/templates/humanitarian.html +++ b/templates/humanitarian.html @@ -18,7 +18,7 @@

      Humanitarian

      This table assesses the extent to which IATI publishers are reporting on humanitarian attributes.

      -

      The statistics on this page do not form part of the Summary Statstics.

      +

      The statistics on this page do not form part of the Summary Statstics.

      {% include '_partials/tablesorter_instructions.html' %}
      @@ -35,7 +35,7 @@

      Humanitarian

      {% for row in humanitarian.table() %} - + {% for column_slug, _ in humanitarian.columns %} - + diff --git a/templates/index.html b/templates/index.html index c55691f138..230be34e80 100644 --- a/templates/index.html +++ b/templates/index.html @@ -1,7 +1,7 @@ {% extends 'section_index.html' %} {% block title %}Code for IATI Analytics{% endblock %} {% block about %} -

      These analytics are generated daily, with the last update based on data in the IATI Registry on {{ datetime_data }}. For more information, see the FAQ.

      +

      These analytics are generated daily, with the last update based on data in the IATI Registry on {{ datetime_data }}. For more information, see the FAQ.

      Many of the tables are sortable by clicking on the headers.

      Many of the datasets are available in machine readable JSON format. Some links to JSON are abbreviated to (J).

      {% endblock %} @@ -18,7 +18,7 @@

      @@ -26,7 +26,7 @@

      @@ -34,7 +34,7 @@

      @@ -42,7 +42,7 @@

      @@ -50,7 +50,7 @@

      @@ -58,7 +58,7 @@

      @@ -66,7 +66,7 @@

      @@ -74,7 +74,7 @@

      @@ -82,7 +82,7 @@

      @@ -96,7 +96,7 @@

      @@ -104,7 +104,7 @@

      @@ -112,7 +112,7 @@

      diff --git a/templates/license.html b/templates/license.html index 6579144862..7e170572e0 100644 --- a/templates/license.html +++ b/templates/license.html @@ -22,7 +22,7 @@

      {{ license_names[license] }}{% if license_urls[license]['url'] %} {{ publisher }} +

      {% endfor %} diff --git a/templates/licenses.html b/templates/licenses.html index 6b3aa3da69..2d148c56c6 100644 --- a/templates/licenses.html +++ b/templates/licenses.html @@ -23,7 +23,7 @@ - + {% endfor %} diff --git a/templates/organisation.html b/templates/organisation.html index be1c3439e1..0692d5fd9e 100644 --- a/templates/organisation.html +++ b/templates/organisation.html @@ -17,7 +17,7 @@

      The following publishers do not have an organisation file listed on the IATI Registry.

      diff --git a/templates/publisher.html b/templates/publisher.html index a965f79075..e6b4636f00 100644 --- a/templates/publisher.html +++ b/templates/publisher.html @@ -108,7 +108,7 @@

      Headlines

      diff --git a/templates/publishers.html b/templates/publishers.html index 351c7c7879..08037a9e18 100644 --- a/templates/publishers.html +++ b/templates/publishers.html @@ -34,8 +34,8 @@ {% for publisher_title,publisher in publishers_ordered_by_title %} {% set publisher_stats = get_publisher_stats(publisher) %} - - + + diff --git a/templates/registration_agencies.html b/templates/registration_agencies.html index ff2a68dcd3..8f3fe14eed 100644 --- a/templates/registration_agencies.html +++ b/templates/registration_agencies.html @@ -51,8 +51,8 @@

      {% for publisher, count in publishers.items() %}

      - - + + {% endfor %} diff --git a/templates/reporting_orgs.html b/templates/reporting_orgs.html index e7103921f3..f27e0eb7c1 100644 --- a/templates/reporting_orgs.html +++ b/templates/reporting_orgs.html @@ -26,7 +26,7 @@

      {% set reporting_orgs_key = publisher_stats.reporting_orgs.keys()|first %} {% if publisher_stats.reporting_orgs|count != 1 or reporting_orgs_key != ckan_publishers[publisher].result.publisher_iati_id %}

      - + diff --git a/templates/summary_stats.html b/templates/summary_stats.html index a4fe63c282..21791d153e 100644 --- a/templates/summary_stats.html +++ b/templates/summary_stats.html @@ -37,7 +37,7 @@

      Summary Statistics

      {% for row in summary_stats.table() %} - + {% for column_slug, column_header in summary_stats.columns %}
      Publisher Name - {{ timeliness.this_year-1 }} - {{ timeliness.this_year }} - - Time lag + Publisher Name + {{ timeliness.this_year-1 }} + {{ timeliness.this_year }} + + Time lag
      {{ timeliness.short_month(month) }}{{ timeliness.short_month(month) }}{{ timeliness.short_month(timeliness.this_month) }}{{ timeliness.short_month(timeliness.this_month) }}
      {{ publisher_title }}
      {{ publisher_title }}{{ per_month[month] or 0 }}{{ per_month[timeliness.this_month] or 0 }}{% if hft %}*{% endif %}{{ assessment }}{{ per_month[timeliness.this_month] or 0 }}{% if hft %}*{% endif %}{{ assessment }}
      {{ publisher }}{{ publisher }}{% if 'elements' in publisher_inverted %}{{ publisher_inverted.elements[element]|count }}{% endif %}
      {{ publisher }}{{ publisher }} {{ current_stats.inverted_publisher.activity_files.get(publisher)+current_stats.inverted_publisher.organisation_files.get(publisher) }} {{ current_stats.inverted_publisher.activities[publisher] }} {{ current_stats.inverted_publisher.organisations[publisher] }}
      {{ publisher }} +
      {{ publisher }} {% for dataset in datasets.keys() %} {{ dataset[:-4] }} {% endfor %} diff --git a/templates/publisher.html b/templates/publisher.html index 061b54d60d..a965f79075 100644 --- a/templates/publisher.html +++ b/templates/publisher.html @@ -377,7 +377,7 @@

      Codelist Values (version {{ major_version }}.xx)

      {% for element, values in publisher_stats.codelist_values_by_major_version[major_version].items() %}
      {{ element }}{{ element }} {{ codelist_mapping[major_version].get(element) }}{% if codes|count %} @@ -425,9 +425,9 @@

      Elements and Attributes Published

      {% for element, count in publisher_stats['elements'].items() %}
      {{ element }}{{ element }} {{ count }}{{ publisher_inverted.elements[element]|count }}{{ publisher_inverted.elements[element]|count }}
      {{ publisher_name[publisher] }} +
      {{ publisher_name[publisher] }} {% for dataset in datasets.keys() %} {{ dataset[:-4] }} {% endfor %} diff --git a/text.py b/text.py index b82a2221d3..191cbdad40 100644 --- a/text.py +++ b/text.py @@ -94,7 +94,7 @@ 'identifiers': 'Duplicate Identifiers', }) -top_navigation = ['index', 'headlines', 'data_quality', 'publishing_stats', 'exploring_data', 'faq'] +top_navigation = ['headlines', 'data_quality', 'publishing_stats', 'exploring_data', 'faq'] navigation = { 'headlines': ['publishers', 'files', 'activities'], 'data_quality': ['download', 'xml', 'validation', 'versions', 'licenses', 'organisation', 'identifiers', 'reporting_orgs'], From 777bae98c034490b633d8b931766f3f0e2814dd0 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Fri, 2 Dec 2022 21:56:50 +0000 Subject: [PATCH 275/375] Use url_for for static files --- make_html.py | 2 +- templates/comprehensiveness_base.html | 2 +- templates/download.html | 2 +- templates/forwardlooking.html | 2 +- templates/humanitarian.html | 2 +- templates/publishers.html | 2 +- templates/summary_stats.html | 2 +- templates/timeliness.html | 2 +- templates/timeliness_timelag.html | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/make_html.py b/make_html.py index 711cdbfb9a..f1696a228c 100644 --- a/make_html.py +++ b/make_html.py @@ -39,7 +39,7 @@ is_valid_element, slugs) -app = Flask(__name__) +app = Flask(__name__, static_folder='') def dictinvert(d): diff --git a/templates/comprehensiveness_base.html b/templates/comprehensiveness_base.html index ae46696ce9..2202991f08 100644 --- a/templates/comprehensiveness_base.html +++ b/templates/comprehensiveness_base.html @@ -31,7 +31,7 @@ {% block content %}
      - (This table as CSV) + (This table as CSV)

      {% block table_title %}Table of Comprehensiveness values{% endblock %}

      diff --git a/templates/download.html b/templates/download.html index fd3a20a1ba..1d4c72589f 100644 --- a/templates/download.html +++ b/templates/download.html @@ -10,7 +10,7 @@

      History of Download Errors

      -

      This table as JSON

      +

      This table as JSON

      diff --git a/templates/forwardlooking.html b/templates/forwardlooking.html index d1a03109d6..28764d2950 100644 --- a/templates/forwardlooking.html +++ b/templates/forwardlooking.html @@ -20,7 +20,7 @@
      - (This table as CSV) + (This table as CSV)

      Activities with Forward Looking Budget Allocations

      diff --git a/templates/humanitarian.html b/templates/humanitarian.html index 9f00da5cd1..026b17e8d8 100644 --- a/templates/humanitarian.html +++ b/templates/humanitarian.html @@ -10,7 +10,7 @@
      diff --git a/templates/publishers.html b/templates/publishers.html index bec18c0509..351c7c7879 100644 --- a/templates/publishers.html +++ b/templates/publishers.html @@ -13,7 +13,7 @@
      -

      (This table as CSV)

      +

      (This table as CSV)

      List of current active IATI publishers, Click on the publisher name for more details.

      {% include '_partials/tablesorter_instructions.html' %}
      diff --git a/templates/summary_stats.html b/templates/summary_stats.html index 3be45c9971..a4fe63c282 100644 --- a/templates/summary_stats.html +++ b/templates/summary_stats.html @@ -12,7 +12,7 @@
      - (This table as CSV) + (This table as CSV)

      Summary Statistics

      diff --git a/templates/timeliness.html b/templates/timeliness.html index 7e4ad0c91f..84253d02ac 100644 --- a/templates/timeliness.html +++ b/templates/timeliness.html @@ -7,7 +7,7 @@
      - (This table as CSV) + (This table as CSV)

      Table of Frequency assessments

      diff --git a/templates/timeliness_timelag.html b/templates/timeliness_timelag.html index b3ceeccf18..7426e76b4f 100644 --- a/templates/timeliness_timelag.html +++ b/templates/timeliness_timelag.html @@ -7,7 +7,7 @@
      - (This table as CSV) + (This table as CSV)

      Table of Time lag assessments

      From 2ca2668158ec2d82f64d2b91dfde2d32438ff0dc Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sat, 3 Dec 2022 20:57:20 +0000 Subject: [PATCH 276/375] Fix static file issue --- make_html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/make_html.py b/make_html.py index f1696a228c..7bed57a171 100644 --- a/make_html.py +++ b/make_html.py @@ -39,7 +39,7 @@ is_valid_element, slugs) -app = Flask(__name__, static_folder='') +app = Flask(__name__, static_url_path='') def dictinvert(d): From 48110e0c12f41e754ed4e0a7018575b84fe33f2f Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sat, 3 Dec 2022 22:09:38 +0000 Subject: [PATCH 277/375] Use url_for for loads more URLs --- make_html.py | 2 +- templates/base.html | 6 +++--- templates/codelist.html | 4 ++-- templates/codelists.html | 8 ++++---- templates/comprehensiveness_base.html | 10 +++++----- templates/comprehensiveness_core.html | 2 +- templates/coverage.html | 2 +- templates/dates.html | 2 +- templates/download.html | 2 +- templates/elements.html | 8 ++++---- templates/files.html | 2 +- templates/forwardlooking.html | 2 +- templates/humanitarian.html | 4 ++-- templates/identifiers.html | 2 +- templates/index.html | 26 +++++++++++++------------- templates/license.html | 2 +- templates/licenses.html | 2 +- templates/organisation.html | 2 +- templates/publisher.html | 2 +- templates/publishers.html | 4 ++-- templates/registration_agencies.html | 4 ++-- templates/reporting_orgs.html | 2 +- templates/summary_stats.html | 12 ++++++------ templates/timeliness.html | 2 +- templates/timeliness_base.html | 4 ++-- templates/timeliness_timelag.html | 2 +- templates/traceability.html | 2 +- templates/validation.html | 2 +- templates/versions.html | 4 ++-- templates/xml.html | 6 +++--- 30 files changed, 67 insertions(+), 67 deletions(-) diff --git a/make_html.py b/make_html.py index 7bed57a171..8035ec2197 100644 --- a/make_html.py +++ b/make_html.py @@ -115,11 +115,11 @@ def get_codelist_values(codelist_values_for_element): # Custom Jinja filters app.jinja_env.filters['xpath_to_url'] = xpath_to_url app.jinja_env.filters['url_to_filename'] = lambda x: x.rstrip('/').split('/')[-1] -app.jinja_env.filters['dataset_to_publisher'] = dataset_to_publisher app.jinja_env.filters['has_future_transactions'] = timeliness.has_future_transactions app.jinja_env.filters['round_nicely'] = round_nicely # Custom Jinja globals +app.jinja_env.globals['dataset_to_publisher'] = dataset_to_publisher app.jinja_env.globals['url'] = lambda x: '/' if x == 'index.html' else x app.jinja_env.globals['datetime_generated'] = lambda: datetime.utcnow().replace(tzinfo=pytz.utc).strftime('%-d %B %Y (at %H:%M %Z)') app.jinja_env.globals['datetime_data'] = date_time_data_obj.strftime('%-d %B %Y (at %H:%M %Z)') diff --git a/templates/base.html b/templates/base.html index 9e2bc589c1..3aa2863ccb 100644 --- a/templates/base.html +++ b/templates/base.html @@ -5,8 +5,8 @@ - - + + {% block title %}{{ page_titles[page] }} – Code for IATI Analytics{% endblock %} {% block extrahead %}{% endblock %} @@ -72,7 +72,7 @@

      {{ page_titles[page] }}

      (NB This is the time the download task started. Any changes made after this time may not be reflected).
      - For details on how often these updates are applied, see the Code for IATI Analytics FAQ. + For details on how often these updates are applied, see the Code for IATI Analytics FAQ.
      diff --git a/templates/codelist.html b/templates/codelist.html index 080e5bf132..4c2ef72f67 100644 --- a/templates/codelist.html +++ b/templates/codelist.html @@ -45,7 +45,7 @@

      On Codelist

      {{ value }} {{ codelist_lookup[major_version].get(codelist_mapping[major_version].get(element))[value]['name'] }} - {{ publishers|length }} + {{ publishers|length }}
      {{ value }} - {{ publishers|length }} + {{ publishers|length }}
      {{ element }}{{ element }} {{ codelist_mapping[major_version].get(element) }}{{ values|length }}{{ values|length }} {{ codelist_sets[major_version].get(codelist_mapping[major_version].get(element))|length }}{{ codes|length }}{{ codes|length }}{{ codes|length }}{{ codes|length }}
      {{ row.publisher_title }}{{ row.publisher_title }}{% if column_slug in row %} {{ row[column_slug+'_valid'] | round_nicely }} diff --git a/templates/comprehensiveness_core.html b/templates/comprehensiveness_core.html index f0417f9dae..e082052c81 100644 --- a/templates/comprehensiveness_core.html +++ b/templates/comprehensiveness_core.html @@ -5,7 +5,7 @@ {% block heading_detail %}

      Core elements are those that are mandatory in version 2.01 of the IATI Activity standard. The core elements are: Version, Reporting Organisation, IATI Identifier, Participating Organisation, Title, Description, Status, Activity Date, Sector, and Country or Region.

      -

      This table shows the percentage of current activities where the core elements are populated with valid data. (Values in parentheses indicate percentage of activities where elements are populated with any data.) The scoring for the Summary Stats page recognises the importance of the core by giving it double weighting in the overall comprehensiveness component.

      +

      This table shows the percentage of current activities where the core elements are populated with valid data. (Values in parentheses indicate percentage of activities where elements are populated with any data.) The scoring for the Summary Stats page recognises the importance of the core by giving it double weighting in the overall comprehensiveness component.

      Key:
      Dashes: Where a publisher has published to IATI in the past but whose portfolio contains no current activities. diff --git a/templates/coverage.html b/templates/coverage.html index 3fd56f256a..63640c0b56 100644 --- a/templates/coverage.html +++ b/templates/coverage.html @@ -10,7 +10,7 @@

      The coverage stats page is being rebuilt.

      Previously, the IATI technical team followed a manual process of contacting IATI publishers and requesting total operational spend values via email. - Results were stored in a public Google sheet. Data was collected for the years 2014 and 2015 and the values were used to calculate a coverage-adjusted score in the Summary Statistics page. + Results were stored in a public Google sheet. Data was collected for the years 2014 and 2015 and the values were used to calculate a coverage-adjusted score in the Summary Statistics page. As this was a very time consuming exercise (compounded by the increase in the number of publishers) coverage data collection has not been done since 2016, resulting in the coverage-adjusted scores in the summary statistics being out of date for the majority of publishers. As a result, in September 2018 the technical team took the decision to remove the coverage-adjusted values.

      diff --git a/templates/dates.html b/templates/dates.html index ba81fd3adb..fd9ffdb2d6 100644 --- a/templates/dates.html +++ b/templates/dates.html @@ -19,7 +19,7 @@ {% for publisher_title,publisher in publishers_ordered_by_title %} {% set publisher_stats = get_publisher_stats(publisher) %}
      {{ publisher_title }}{{ publisher_title }} {% if publisher_stats.date_extremes.min.overall %}{{ publisher_stats.date_extremes.min.overall }}{% endif %} {% if publisher_stats.date_extremes.max.overall %}{{ publisher_stats.date_extremes.max.overall }}{% endif %} {% if publisher_stats.date_extremes.max.by_type['start-actual'] %}{{ publisher_stats.date_extremes.max.by_type['start-actual'] }}{% endif %}
      {{ publisher }}{{ publisher }} {{ dataset }} {{ url|url_to_filename }} {{ code }}
      {{ element }}{{ publishers|length }}{{ element }}{{ publishers|length }} {{ current_stats.aggregated.elements[element] }} {{ current_stats.aggregated.elements_total[element] }}
      {{ publisher_name[package[:-4]|dataset_to_publisher] }}{{ dataset_to_publisher(publisher_name[package[:-4]]) }} {{ package[:-4] }} {{ activities }} {{ current_stats.inverted_file.organisations.get(package) }}
      {{ row.publisher_title }}{{ row.publisher_title }}
      {{ row.publisher_title }}{{ row.publisher_title }} {%- if column_slug == 'publisher_type' -%} diff --git a/templates/identifiers.html b/templates/identifiers.html index b69afaacd9..fc95c3d641 100644 --- a/templates/identifiers.html +++ b/templates/identifiers.html @@ -28,7 +28,7 @@

      {% set publisher_stats = get_publisher_stats(publisher) %} {% if publisher_stats.publisher_duplicate_identifiers|count != 0 %}

      {{ publisher_title }}{{ publisher_title }} {{ publisher_stats.publisher_duplicate_identifiers|length }} {{ publisher_stats.publisher_duplicate_identifiers.values()|sum }}
      - + {{ current_stats.aggregated.activities }}
      - + {{ current_stats.aggregated.iati_identifiers|count }}
      - + {{ current_stats.aggregated.publishers }}
      - + {{ current_stats.aggregated.activity_files }}
      - + {{ current_stats.aggregated.organisation_files }}
      - + {{ current_stats.aggregated.file_size|filesizeformat }}
      - + {{ current_stats.download_errors|length }}
      - + {{ current_stats.aggregated.invalidxml }}
      - + {{ current_stats.aggregated.nonstandardroots }}
      - + {{ current_stats.aggregated.validation.fail }}
      - + {{ current_stats.aggregated.publishers_validation.fail }}
      - + {{ current_stats.aggregated.publisher_has_org_file.no }} {{ publisher }} {{ files }}
      {% if license_urls[license]['url'] %}{{ license_names[license] }}{% else %}{{ license_names[license] }}{% endif %} {{ license }} {{ files }}{{ publisher_license_count[license] }}{{ publisher_license_count[license] }}
      Licenses {% for license in publisher_licenses %} - {{ license }} + {{ license }} {% endfor %}
      {{ publisher_name[publisher] }}{{ publisher }}{{ publisher_name[publisher] }}{{ publisher }} {{ current_stats.inverted_publisher.activities[publisher] }} {{ publisher_stats.organisations }} {{ current_stats.inverted_publisher.activity_files.get(publisher)+current_stats.inverted_publisher.organisation_files.get(publisher) }}
      {{ orgid|replace(' ', ' ') }}{{ publisher }}{{ publisher_name[publisher] }}{{ publisher }}{{ publisher_name[publisher] }} {{ count }}
      {{ publisher_title }}{{ publisher_title }} {{ ckan_publishers[publisher].result.publisher_iati_id }} {{ publisher_stats.reporting_orgs|length }} {% for ro in publisher_stats.reporting_orgs %}{{ ro }} {% endfor %}
      {{ row.publisher_title }}{{ row.publisher_title }}{% if column_slug == "publisher_type" %}{{ row[column_slug] }}{% else %}{{ row[column_slug] | round_nicely }}{% endif %} {% endfor %} @@ -55,10 +55,10 @@

      Narrative

      Timeliness

      -

      This is calculated by scoring the assessments made on the - frequency and timelag pages on a scale of +

      This is calculated by scoring the assessments made on the + frequency and timelag pages on a scale of 0 to 4 (as below), dividing the sum of the two scores by 8, and expressing the result as - a percentage. The methodology used in making the assesments is detailed on the frequency and timelag pages. + a percentage. The methodology used in making the assesments is detailed on the frequency and timelag pages.

      @@ -118,12 +118,12 @@

      Timeliness

      Forward looking

      The average percentage of current activities with budgets for each of the years {{ current_year }} - {{ current_year + 2 }}. - The component values and a detailed methodology are displayed on the forward looking page. + The component values and a detailed methodology are displayed on the forward looking page.

      Comprehensiveness

      -

      The average of comprehensiveness averages for core, financials and value-added. The core average has a double-weighting.

      +

      The average of comprehensiveness averages for core, financials and value-added. The core average has a double-weighting.

      Score

      diff --git a/templates/timeliness.html b/templates/timeliness.html index 84253d02ac..55f8160a58 100644 --- a/templates/timeliness.html +++ b/templates/timeliness.html @@ -45,7 +45,7 @@

      Table of Frequency assessments

      {% for publisher, publisher_title, per_month, assessment, hft, first_published_band in timeliness.publisher_frequency_sorted() %} - + {% for month in timeliness.previous_months_reversed %} diff --git a/templates/timeliness_base.html b/templates/timeliness_base.html index ab38af7f21..9ef36533af 100644 --- a/templates/timeliness_base.html +++ b/templates/timeliness_base.html @@ -8,8 +8,8 @@ {% endblock %}
        diff --git a/templates/timeliness_timelag.html b/templates/timeliness_timelag.html index 7426e76b4f..8cf5f9a742 100644 --- a/templates/timeliness_timelag.html +++ b/templates/timeliness_timelag.html @@ -42,7 +42,7 @@

        Table of Time lag assessments

      {% for publisher, publisher_title, per_month, assessment, hft in timeliness.publisher_timelag_sorted() %} - + {% for month in timeliness.previous_months_reversed %} {% endfor %} diff --git a/templates/traceability.html b/templates/traceability.html index eaf8e343d7..be48254beb 100644 --- a/templates/traceability.html +++ b/templates/traceability.html @@ -26,7 +26,7 @@ {% for publisher_title,publisher in publishers_ordered_by_title %} {% set publisher_stats = get_publisher_stats(publisher) %} - + - + {% endif %} @@ -55,7 +55,7 @@

      Files with non-standard roots

      {% for dataset, nonstandard in current_stats.inverted_file.nonstandardroots.items() %} {% if nonstandard %} - + {% endif %} From b80a6314a2fd77af3ddf2c4876716e719da6d9c7 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Sun, 4 Dec 2022 14:10:32 +0000 Subject: [PATCH 278/375] Loop over each license at most once --- make_html.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/make_html.py b/make_html.py index 8035ec2197..42c8a37cd2 100644 --- a/make_html.py +++ b/make_html.py @@ -365,9 +365,7 @@ def url_generator(): 'slug': slug, 'major_version': major_version } - for license in licenses.licenses: - if license is None: - license = 'None' + for license in set(licenses.licenses): yield 'licenses_individual_license', {'license': license} freezer.freeze() From 67bb84ee73e4d7e1837687e11d9282a5e702ed84 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 5 Dec 2022 07:28:36 +0000 Subject: [PATCH 279/375] Only generate timeliness for existing publishers --- timeliness.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/timeliness.py b/timeliness.py index 5cc1b70848..7a44269719 100644 --- a/timeliness.py +++ b/timeliness.py @@ -1,6 +1,6 @@ # This file converts raw timeliness data into the associated Publishing Statistics assessments -from data import JSONDir, publisher_name, get_publisher_stats, get_registry_id_matches +from data import JSONDir, publishers_ordered_by_title, publisher_name, get_publisher_stats, get_registry_id_matches import datetime from dateutil.relativedelta import relativedelta from collections import defaultdict, Counter @@ -61,7 +61,9 @@ def publisher_frequency(): gitaggregate_publisher = JSONDir('./stats-calculated/gitaggregate-publisher-dated') # Loop over each publisher - i.e. a publisher folder within 'gitaggregate-publisher-dated' - for publisher, agg in gitaggregate_publisher.items(): + + for publisher_title, publisher in publishers_ordered_by_title: + agg = gitaggregate_publisher[publisher] # Skip to the next publisher if there is no data for 'most_recent_transaction_date' for this publisher if 'most_recent_transaction_date' not in agg: @@ -135,9 +137,8 @@ def publisher_frequency(): # There has been an update in none of the last 12 months frequency = 'Less than Annual' - # If the publisher is in the list of current publishers, return a generator object - if publisher in publisher_name: - yield publisher, publisher_name.get(publisher), updates_per_month, frequency, hft, first_published_band + # return a generator object + yield publisher, publisher_title, updates_per_month, frequency, hft, first_published_band def frequency_index(frequency): From 00911cbf38829380899ddd046add3b7ec9212345 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 5 Dec 2022 08:09:53 +0000 Subject: [PATCH 280/375] Upgrade workflow dependencies --- .github/workflows/build.yml | 49 ++++++++++++++++++++----------------- .github/workflows/ci.yml | 6 ++--- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1e4aa61e9a..b39daf0c9c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -11,12 +11,15 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 + - name: Checkout this repo + uses: actions/checkout@v3 + with: + persist-credentials: false - name: Set up Python 3.7 - uses: actions/setup-python@v1 + uses: actions/setup-python@v4 with: python-version: 3.7 - - uses: actions/cache@v2 + - uses: actions/cache@v3 name: Cache dependencies with: path: ~/.cache/pip @@ -45,30 +48,30 @@ jobs: run: cp -r static/* out - name: Deploy (production) 🚀 if: github.ref == 'refs/heads/main' - uses: JamesIves/github-pages-deploy-action@3.7.1 + uses: JamesIves/github-pages-deploy-action@v4 with: - GIT_CONFIG_NAME: Code for IATI bot - GIT_CONFIG_EMAIL: 57559326+codeforIATIbot@users.noreply.github.com - GITHUB_TOKEN: ${{ secrets.TOKEN }} - REPOSITORY_NAME: codeforIATI/analytics-public - BRANCH: gh-pages - FOLDER: out - CLEAN: true - SILENT: true - SINGLE_COMMIT: true + git-config-name: Code for IATI bot + git-config-email: 57559326+codeforIATIbot@users.noreply.github.com + token: ${{ secrets.TOKEN }} + repository-name: codeforIATI/analytics-public + branch: gh-pages + folder: out + clean: true + silent: true + single-commit: true - name: Set CNAME (dev) if: github.ref == 'refs/heads/dev' run: echo "analytics-dev.codeforiati.org" > out/CNAME - name: Deploy (dev) 🚀 if: github.ref == 'refs/heads/dev' - uses: JamesIves/github-pages-deploy-action@3.7.1 + uses: JamesIves/github-pages-deploy-action@v4 with: - GIT_CONFIG_NAME: Code for IATI bot - GIT_CONFIG_EMAIL: 57559326+codeforIATIbot@users.noreply.github.com - GITHUB_TOKEN: ${{ secrets.TOKEN }} - REPOSITORY_NAME: codeforIATI/analytics-dev - BRANCH: gh-pages - FOLDER: out - CLEAN: true - SILENT: true - SINGLE_COMMIT: true + git-config-name: Code for IATI bot + git-config-email: 57559326+codeforIATIbot@users.noreply.github.com + token: ${{ secrets.TOKEN }} + repository-name: codeforIATI/analytics-dev + branch: gh-pages + folder: out + clean: true + silent: true + single-commit: true diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c0e4a11c54..5079fa9bf7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,12 +4,12 @@ jobs: ci: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v3 - name: Set up Python 3.7 - uses: actions/setup-python@v1 + uses: actions/setup-python@v4 with: python-version: 3.7 - - uses: actions/cache@v2 + - uses: actions/cache@v3 name: Cache dependencies with: path: ~/.cache/pip From 69d88c50ef04a49f54fd7d4702080f7b18efd7ef Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 5 Dec 2022 08:34:22 +0000 Subject: [PATCH 281/375] Remove unused url lambdas --- make_html.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/make_html.py b/make_html.py index 42c8a37cd2..a2eca5413e 100644 --- a/make_html.py +++ b/make_html.py @@ -264,7 +264,6 @@ def publisher(publisher): ] failure_count = len(current_stats['inverted_file_publisher'][publisher]['validation'].get('fail', {})) return render_template('publisher.html', - url=lambda x: '../' + x, publisher=publisher, publisher_stats=publisher_stats, failure_count=failure_count, @@ -282,7 +281,6 @@ def codelist(major_version, slug): element=element, values=values, reverse_codelist_mapping={major_version: dictinvert(mapping) for major_version, mapping in codelist_mapping.items()}, - url=lambda x: '../../' + x, major_version=major_version, page='codelists') @@ -295,7 +293,6 @@ def element(slug): return render_template('element.html', element=element, publishers=publishers, - url=lambda x: '../' + x, element_or_attribute='attribute' if '@' in element else 'element', page='elements') From 70d743a47de7d98f7613f9751170e6cd825d8d7b Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 5 Dec 2022 11:33:11 +0000 Subject: [PATCH 282/375] No need for this anymore --- .github/workflows/build.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b39daf0c9c..76d5e869da 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -44,8 +44,6 @@ jobs: run: python make_html.py - name: Delete files run: rm -rf data stats-calculated - - name: Add static files to output - run: cp -r static/* out - name: Deploy (production) 🚀 if: github.ref == 'refs/heads/main' uses: JamesIves/github-pages-deploy-action@v4 From 2336bfda5c86a25a2737d1219cbae1e9a73c1308 Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 5 Dec 2022 19:44:38 +0000 Subject: [PATCH 283/375] Remove unused url lambda --- licenses.py | 1 - 1 file changed, 1 deletion(-) diff --git a/licenses.py b/licenses.py index f9285ee5d4..4a15ee71d3 100644 --- a/licenses.py +++ b/licenses.py @@ -139,7 +139,6 @@ def individual_license(license): license == 'notspecified' and package['license_id'] is None)] publisher_counts = [(publisher, publishers.count(publisher)) for publisher in set(publishers)] return render_template('license.html', - url=lambda x: '../' + x, license=license, license_names=license_names, license_urls=license_urls, From 452727ece1389bf6442cb94969771ff3e76f36be Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Mon, 5 Dec 2022 19:48:12 +0000 Subject: [PATCH 284/375] Just 404 on publisher error --- make_html.py | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/make_html.py b/make_html.py index a2eca5413e..1ccd9386eb 100644 --- a/make_html.py +++ b/make_html.py @@ -245,24 +245,27 @@ def homepage(): @app.route('/publisher/.html') def publisher(publisher): publisher_stats = get_publisher_stats(publisher) - budget_table = [{ - 'year': 'Total', - 'count_total': sum(sum(x.values()) for x in publisher_stats['count_budgets_by_type_by_year'].values()), - 'sum_total': {currency: sum(sums.values()) for by_currency in publisher_stats['sum_budgets_by_type_by_year'].values() for currency, sums in by_currency.items()}, - 'count_original': sum(publisher_stats['count_budgets_by_type_by_year']['1'].values()) if '1' in publisher_stats['count_budgets_by_type_by_year'] else None, - 'sum_original': {k: sum(v.values()) for k, v in publisher_stats['sum_budgets_by_type_by_year']['1'].items()} if '1' in publisher_stats['sum_budgets_by_type_by_year'] else None, - 'count_revised': sum(publisher_stats['count_budgets_by_type_by_year']['2'].values()) if '2' in publisher_stats['count_budgets_by_type_by_year'] else None, - 'sum_revised': {k: sum(v.values()) for k, v in publisher_stats['sum_budgets_by_type_by_year']['2'].items()} if '2' in publisher_stats['sum_budgets_by_type_by_year'] else None - }] + [{'year': year, - 'count_total': sum(x[year] for x in publisher_stats['count_budgets_by_type_by_year'].values() if year in x), - 'sum_total': {currency: sums.get(year) for by_currency in publisher_stats['sum_budgets_by_type_by_year'].values() for currency, sums in by_currency.items()}, - 'count_original': publisher_stats['count_budgets_by_type_by_year']['1'].get(year) if '1' in publisher_stats['count_budgets_by_type_by_year'] else None, - 'sum_original': {k: v.get(year) for k, v in publisher_stats['sum_budgets_by_type_by_year']['1'].items()} if '1' in publisher_stats['sum_budgets_by_type_by_year'] else None, - 'count_revised': publisher_stats['count_budgets_by_type_by_year']['2'].get(year) if '2' in publisher_stats['count_budgets_by_type_by_year'] else None, - 'sum_revised': {k: v.get(year) for k, v in publisher_stats['sum_budgets_by_type_by_year']['2'].items()} if '2' in publisher_stats['sum_budgets_by_type_by_year'] else None - } for year in sorted(set(sum((list(x.keys()) for x in publisher_stats['count_budgets_by_type_by_year'].values()), []))) - ] - failure_count = len(current_stats['inverted_file_publisher'][publisher]['validation'].get('fail', {})) + try: + budget_table = [{ + 'year': 'Total', + 'count_total': sum(sum(x.values()) for x in publisher_stats['count_budgets_by_type_by_year'].values()), + 'sum_total': {currency: sum(sums.values()) for by_currency in publisher_stats['sum_budgets_by_type_by_year'].values() for currency, sums in by_currency.items()}, + 'count_original': sum(publisher_stats['count_budgets_by_type_by_year']['1'].values()) if '1' in publisher_stats['count_budgets_by_type_by_year'] else None, + 'sum_original': {k: sum(v.values()) for k, v in publisher_stats['sum_budgets_by_type_by_year']['1'].items()} if '1' in publisher_stats['sum_budgets_by_type_by_year'] else None, + 'count_revised': sum(publisher_stats['count_budgets_by_type_by_year']['2'].values()) if '2' in publisher_stats['count_budgets_by_type_by_year'] else None, + 'sum_revised': {k: sum(v.values()) for k, v in publisher_stats['sum_budgets_by_type_by_year']['2'].items()} if '2' in publisher_stats['sum_budgets_by_type_by_year'] else None + }] + [{'year': year, + 'count_total': sum(x[year] for x in publisher_stats['count_budgets_by_type_by_year'].values() if year in x), + 'sum_total': {currency: sums.get(year) for by_currency in publisher_stats['sum_budgets_by_type_by_year'].values() for currency, sums in by_currency.items()}, + 'count_original': publisher_stats['count_budgets_by_type_by_year']['1'].get(year) if '1' in publisher_stats['count_budgets_by_type_by_year'] else None, + 'sum_original': {k: v.get(year) for k, v in publisher_stats['sum_budgets_by_type_by_year']['1'].items()} if '1' in publisher_stats['sum_budgets_by_type_by_year'] else None, + 'count_revised': publisher_stats['count_budgets_by_type_by_year']['2'].get(year) if '2' in publisher_stats['count_budgets_by_type_by_year'] else None, + 'sum_revised': {k: v.get(year) for k, v in publisher_stats['sum_budgets_by_type_by_year']['2'].items()} if '2' in publisher_stats['sum_budgets_by_type_by_year'] else None + } for year in sorted(set(sum((list(x.keys()) for x in publisher_stats['count_budgets_by_type_by_year'].values()), []))) + ] + failure_count = len(current_stats['inverted_file_publisher'][publisher]['validation'].get('fail', {})) + except KeyError: + abort(404) return render_template('publisher.html', publisher=publisher, publisher_stats=publisher_stats, @@ -344,6 +347,7 @@ def image_development_publisher(image): from flask_frozen import Freezer app.config['FREEZER_DESTINATION'] = 'out' app.config['FREEZER_REMOVE_EXTRA_FILES'] = False + app.config['FREEZER_IGNORE_404_NOT_FOUND'] = True app.debug = False # Comment to turn off debugging app.testing = True # Comment to turn off debugging freezer = Freezer(app) From 4d6e620772fe35ec2001d67dec2718e5d653386d Mon Sep 17 00:00:00 2001 From: Andy Lulham Date: Tue, 6 Dec 2022 08:46:01 +0000 Subject: [PATCH 285/375] Revert "Only generate timeliness for existing publishers" This reverts commit 67bb84ee73e4d7e1837687e11d9282a5e702ed84. --- timeliness.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/timeliness.py b/timeliness.py index 7a44269719..5cc1b70848 100644 --- a/timeliness.py +++ b/timeliness.py @@ -1,6 +1,6 @@ # This file converts raw timeliness data into the associated Publishing Statistics assessments -from data import JSONDir, publishers_ordered_by_title, publisher_name, get_publisher_stats, get_registry_id_matches +from data import JSONDir, publisher_name, get_publisher_stats, get_registry_id_matches import datetime from dateutil.relativedelta import relativedelta from collections import defaultdict, Counter @@ -61,9 +61,7 @@ def publisher_frequency(): gitaggregate_publisher = JSONDir('./stats-calculated/gitaggregate-publisher-dated') # Loop over each publisher - i.e. a publisher folder within 'gitaggregate-publisher-dated' - - for publisher_title, publisher in publishers_ordered_by_title: - agg = gitaggregate_publisher[publisher] + for publisher, agg in gitaggregate_publisher.items(): # Skip to the next publisher if there is no data for 'most_recent_transaction_date' for this publisher if 'most_recent_transaction_date' not in agg: @@ -137,8 +135,9 @@ def publisher_frequency(): # There has been an update in none of the last 12 months frequency = 'Less than Annual' - # return a generator object - yield publisher, publisher_title, updates_per_month, frequency, hft, first_published_band + # If the publisher is in the list of current publishers, return a generator object + if publisher in publisher_name: + yield publisher, publisher_name.get(publisher), updates_per_month, frequency, hft, first_published_band def frequency_index(frequency): From 4448f5444e9d2cab9358bb12b757ffba96181b04 Mon Sep 17 00:00:00 2001 From: Ben Webb Date: Thu, 8 Dec 2022 16:14:00 +0000 Subject: [PATCH 286/375] Add org ids stats tables, including to publisher page --- data.py | 3 ++- make_html.py | 11 +++++++++ templates/org_ids.html | 38 +++++++++++++++++++++++++++++ templates/org_type.html | 52 ++++++++++++++++++++++++++++++++++++++++ templates/publisher.html | 35 +++++++++++++++++++++++++++ text.py | 4 +++- 6 files changed, 141 insertions(+), 2 deletions(-) create mode 100644 templates/org_ids.html create mode 100644 templates/org_type.html diff --git a/data.py b/data.py index f48c03ce14..500c472030 100644 --- a/data.py +++ b/data.py @@ -291,5 +291,6 @@ def make_slugs(keys): if major_version in current_stats['inverted_publisher']['codelist_values_by_major_version'] else make_slugs([]) ) for major_version in MAJOR_VERSIONS}, - 'element': make_slugs(current_stats['inverted_publisher']['elements'].keys()) + 'element': make_slugs(current_stats['inverted_publisher']['elements'].keys()), + 'org_type': make_slugs(['accountable_org', 'extending_org', 'funding_org', 'implementing_org', 'provider_org', 'receiver_org']), } diff --git a/make_html.py b/make_html.py index 1ccd9386eb..2766e133ff 100644 --- a/make_html.py +++ b/make_html.py @@ -195,6 +195,7 @@ def get_codelist_values(codelist_values_for_element): 'booleans', 'dates', 'traceability', + 'org_ids', 'faq', ] @@ -300,6 +301,14 @@ def element(slug): page='elements') +@app.route('/org_type/.html') +def org_type(slug): + assert slug in slugs['org_type']['by_slug'] + return render_template('org_type.html', + slug=slug, + page='org_ids') + + @app.route('/registration_agencies.html') def registration_agencies(): registration_agencies = defaultdict(int) @@ -366,6 +375,8 @@ def url_generator(): 'slug': slug, 'major_version': major_version } + for slug in slugs['org_type']['by_slug']: + yield 'org_type', {'slug': slug} for license in set(licenses.licenses): yield 'licenses_individual_license', {'license': license} diff --git a/templates/org_ids.html b/templates/org_ids.html new file mode 100644 index 0000000000..cca35d9452 --- /dev/null +++ b/templates/org_ids.html @@ -0,0 +1,38 @@ +{% extends 'base.html' %} +{% block content %} +
      +
      +
      +
      + {% include '_partials/tablesorter_instructions.html' %} +
      +
      {{ publisher_title }}{{ publisher_title }} {{ first_published_band }}{{ per_month[month] or 0 }}
      {{ publisher_title }}{{ publisher_title }}{{ per_month[month] or 0 }}
      {{ publisher_title }}{{ publisher_title }} {%- if publisher_stats.traceable_activities_by_publisher_id -%} {{ '{:,}'.format(publisher_stats.traceable_activities_by_publisher_id) }} diff --git a/templates/validation.html b/templates/validation.html index e3d5d5d3b4..96880c7b0e 100644 --- a/templates/validation.html +++ b/templates/validation.html @@ -24,7 +24,7 @@

      List of files that fail validation, grouped by publisher

      {% if datasets %}
      -
      {{ publisher_name[publisher ] }} ({{ datasets|length }})
      +
      {{ publisher_name[publisher ] }} ({{ datasets|length }})
      diff --git a/templates/versions.html b/templates/versions.html index 79fb906c1d..a4c5ec73db 100644 --- a/templates/versions.html +++ b/templates/versions.html @@ -68,7 +68,7 @@

      Expected versions

      {% for publisher in publishers %} - + {% endfor %}
      {{ publisher_name[publisher] }}{{ publisher_name[publisher] }}
      @@ -95,7 +95,7 @@

      Other versions

      {% for publisher in publishers %} - + {% endfor %}
      {{ publisher_name[publisher] }}{{ publisher_name[publisher] }}
      diff --git a/templates/xml.html b/templates/xml.html index e8e98436be..2c6da8ce2f 100644 --- a/templates/xml.html +++ b/templates/xml.html @@ -4,7 +4,7 @@ {% block content %}
      {{ boxes.box('Files where XML is not well-formed', current_stats.aggregated.invalidxml, 'invalidxml.png', 'invalidxml.json', - description='Count of files where the XML that is not well-formed, over time. Note: this is different from validation against the schema.') }} + description='Count of files where the XML that is not well-formed, over time. Note: this is different from validation against the schema.'.format(url_for('basic_page', page_name='validation'))) }} {{ boxes.box('Files with non-standard roots', current_stats.aggregated.nonstandardroots, 'nonstandardroots.png', 'nonstandardroots.json', description='Count of files with non-standard root, over time. Note: Files with non-standard roots are those where the root XML element is not iati-activities or iati-organisation as we would expect.

      ') }}
      @@ -28,7 +28,7 @@

      Files where XML is not well-formed

      {% for dataset, invalid in current_stats.inverted_file.invalidxml.items() %} {% if invalid %}
      {{ dataset[:-4]|dataset_to_publisher }}{{ dataset_to_publisher(dataset[:-4]) }} {{ dataset[:-4] }}
      {{ dataset[:-4]|dataset_to_publisher }}{{ dataset_to_publisher(dataset[:-4]) }} {{ dataset[:-4] }}
      + + + + + + + + + + + + {% for slug in slugs.org_type.by_slug %} + {% set transaction_stats = current_stats.aggregated[slug + '_transaction_stats'] %} + + + + {% endfor %} + +
      Org TypeTotal Org ElementsTotal RefsTotal Non-Empty RefsTotal Refs Excluding Self RefsTotal Valid Refs
      {{ slug.replace('_org', '') | capitalize }}{{ '{:,}'.format(transaction_stats.total_orgs) if transaction_stats.total_orgs else '' }} + {{ '{:,}'.format(transaction_stats.total_refs) if transaction_stats.total_refs else '' }} + {{ '{:,}'.format(transaction_stats.total_full_refs) if transaction_stats.total_full_refs else '' }} + {{ '{:,}'.format(transaction_stats.total_notself_refs) if transaction_stats.total_notself_refs else '' }} + {{ '{:,}'.format(transaction_stats.total_valid_refs) if transaction_stats.total_valid_refs else '' }} +
      +
      + + + +{% endblock %} diff --git a/templates/org_type.html b/templates/org_type.html new file mode 100644 index 0000000000..c759091937 --- /dev/null +++ b/templates/org_type.html @@ -0,0 +1,52 @@ +{% extends 'base.html' %} + +{% block title %} +Organisation Identifiers: {{ slug.replace('_org', '') | capitalize }} Orgs +{% endblock %} + +{% block page_header %} +

      Organisation Identifiers: {{ slug.replace('_org', '') | capitalize }} Orgs

      +{% endblock %} + +{% block content %} +
      +
      +
      +
      +

      Blurb

      + {% include '_partials/tablesorter_instructions.html' %} +
      + + + + + + + + + + + + + + + {% for publisher_title, publisher in publishers_ordered_by_title %} + {% set publisher_stats = get_publisher_stats(publisher) %} + {% set transaction_stats = publisher_stats[slug + '_transaction_stats'] %} + + + + + {% endfor %} + +
      Publisher NamePublisher Registry IdTotal Org ElementsTotal RefsTotal Non-Empty RefsTotal Refs Excluding Self RefsTotal Valid RefsPercentage of Org Elements with Valid Refs
      {{ publisher_name[publisher] }}{{ publisher }}{{ '{:,}'.format(transaction_stats.total_orgs or 0) }} + {{ '{:,}'.format(transaction_stats.total_refs or 0) }} + {{ '{:,}'.format(transaction_stats.total_full_refs or 0) }} + {{ '{:,}'.format(transaction_stats.total_notself_refs or 0) }} + {{ '{:,}'.format(transaction_stats.total_valid_refs or 0) }} + {{ '{:,.2f}'.format((transaction_stats.total_valid_refs or 0) / transaction_stats.total_orgs * 100) if transaction_stats.total_orgs else 0 }} +
      +
      +
      +
      +{% endblock %} diff --git a/templates/publisher.html b/templates/publisher.html index e6b4636f00..038bbd8bb3 100644 --- a/templates/publisher.html +++ b/templates/publisher.html @@ -39,6 +39,7 @@

      Publisher: {{ publisher_name[publisher] }}

    • Files
    • Codelist values
    • Elements and Attributes published +
    • Organisation Identifiers
    • @@ -436,6 +437,40 @@

      Elements and Attributes Published

      +
      + +
      +

      Organisation Identifiers

      +
      +
      + + + + + + + + + + + + + {% for slug in slugs.org_type.by_slug %} + {% set transaction_stats = publisher_stats[slug + '_transaction_stats'] %} + + + + + {% endfor %} + +
      Org TypeTotal Org ElementsTotal RefsTotal Non-Empty RefsTotal Refs Excluding Self RefsTotal Valid Refs
      {{ slug.replace('_org', '') | capitalize }}{{ '{:,}'.format(transaction_stats.total_orgs or 0) }} + {{ '{:,}'.format(transaction_stats.total_refs or 0) }} + {{ '{:,}'.format(transaction_stats.total_full_refs or 0) }} + {{ '{:,}'.format(transaction_stats.total_notself_refs or 0) }} + {{ '{:,}'.format(transaction_stats.total_valid_refs or 0) }} + (J)
      +
      + {% endblock %} {% block extrafooter %} diff --git a/text.py b/text.py index 191cbdad40..56081bcd79 100644 --- a/text.py +++ b/text.py @@ -33,6 +33,7 @@ 'booleans': 'Booleans', 'dates': 'Dates', 'traceability': 'Traceability', + 'org_ids': 'Organisation Identifiers', 'publishing_stats': 'Publishing Statistics', 'coverage': 'Coverage', 'timeliness': 'Timeliness', @@ -64,6 +65,7 @@ 'booleans': 'How are booleans used in IATI data?', 'dates': 'What date ranges do publishers publish data for?', 'traceability': 'How much of a publisher’s spending is traceable to other publishers’ activities?', + 'org_ids': 'Are organisation identifiers being used correctly?', } page_sub_leads = { 'publishers': 'Publishers represent organisation accounts in the IATI Registry.', @@ -98,6 +100,6 @@ navigation = { 'headlines': ['publishers', 'files', 'activities'], 'data_quality': ['download', 'xml', 'validation', 'versions', 'licenses', 'organisation', 'identifiers', 'reporting_orgs'], - 'exploring_data': ['elements', 'codelists', 'booleans', 'dates', 'traceability'], + 'exploring_data': ['elements', 'codelists', 'booleans', 'dates', 'traceability', 'org_ids'], 'publishing_stats': ['timeliness', 'forwardlooking', 'comprehensiveness', 'summary_stats', 'humanitarian'] } From e966b7d830baf480f5353b4f81de9d65a63251d4 Mon Sep 17 00:00:00 2001 From: Ben Webb Date: Thu, 8 Dec 2022 17:55:50 +0000 Subject: [PATCH 287/375] Group columns in the main org IDs stats table, and change % calculation --- templates/org_type.html | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/templates/org_type.html b/templates/org_type.html index c759091937..fddfa3ed24 100644 --- a/templates/org_type.html +++ b/templates/org_type.html @@ -19,13 +19,21 @@

      Organisation Identifiers: {{ slug.replace('_org', '') | capitalize }} Orgs< - - - - - - - + + + + + + + + + + + + + + + @@ -34,14 +42,20 @@

      Organisation Identifiers: {{ slug.replace('_org', '') | capitalize }} Orgs< {% set publisher_stats = get_publisher_stats(publisher) %} {% set transaction_stats = publisher_stats[slug + '_transaction_stats'] %}

      - - + + {% endfor %} From 8a7304ba0ab8e8f0e26f0eaaac596cc7f374cea3 Mon Sep 17 00:00:00 2001 From: Ben Webb Date: Fri, 9 Dec 2022 10:11:12 +0000 Subject: [PATCH 288/375] Tweak the layout of the org IDs stats table Following feedback https://github.com/codeforIATI/analytics/pull/80#issuecomment-1343437680 --- templates/org_type.html | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/templates/org_type.html b/templates/org_type.html index fddfa3ed24..0af742e302 100644 --- a/templates/org_type.html +++ b/templates/org_type.html @@ -22,14 +22,13 @@

      Organisation Identifiers: {{ slug.replace('_org', '') | capitalize }} Orgs<

      - + - @@ -55,7 +54,7 @@

      Organisation Identifiers: {{ slug.replace('_org', '') | capitalize }} Orgs<

      {% endfor %} From 1967c84e60674f5d710fc604abe2f9ca9d97c8ef Mon Sep 17 00:00:00 2001 From: Ben Webb Date: Fri, 9 Dec 2022 10:41:47 +0000 Subject: [PATCH 289/375] Apply org id table updates to overview table and publisher page --- templates/org_id_table_cells.html | 12 ++++++++++++ templates/org_id_table_header.html | 16 ++++++++++++++++ templates/org_ids.html | 16 ++++------------ templates/org_type.html | 28 ++-------------------------- templates/publisher.html | 20 ++++++-------------- 5 files changed, 40 insertions(+), 52 deletions(-) create mode 100644 templates/org_id_table_cells.html create mode 100644 templates/org_id_table_header.html diff --git a/templates/org_id_table_cells.html b/templates/org_id_table_cells.html new file mode 100644 index 0000000000..e572263206 --- /dev/null +++ b/templates/org_id_table_cells.html @@ -0,0 +1,12 @@ + + + + {% if extra_column %} + + {% endif %} + + + + + + + + + + diff --git a/templates/org_ids.html b/templates/org_ids.html index cca35d9452..3833e46b55 100644 --- a/templates/org_ids.html +++ b/templates/org_ids.html @@ -9,24 +9,16 @@
      Publisher NamePublisher Registry IdTotal Org ElementsTotal RefsTotal Non-Empty RefsTotal Refs Excluding Self RefsTotal Valid RefsPublisher NamePublisher Registry IdTotalSelf RefsExcluding Self Refs
      Org ElementsRefsNon-Empty RefsSelf RefsOrg ElementsRefsNon-Empty RefsValid Refs Percentage of Org Elements with Valid Refs
      {{ publisher_name[publisher] }}{{ publisher }}{{ publisher_name[publisher] }}{{ publisher }} {{ '{:,}'.format(transaction_stats.total_orgs or 0) }} {{ '{:,}'.format(transaction_stats.total_refs or 0) }} {{ '{:,}'.format(transaction_stats.total_full_refs or 0) }} + {% set self_refs = (transaction_stats.total_full_refs or 0) - (transaction_stats.total_notself_refs or 0) %} + {{ '{:,}'.format(self_refs) }} + {% set total_org_elements_excluding_self_refs = (transaction_stats.total_orgs or 0) - self_refs %} + {{ '{:,}'.format(total_org_elements_excluding_self_refs) }} + {% set total_refs_excluding_self_refs = (transaction_stats.total_refs or 0) - self_refs %} + {{ '{:,}'.format(total_refs_excluding_self_refs) }} {{ '{:,}'.format(transaction_stats.total_notself_refs or 0) }} {{ '{:,}'.format(transaction_stats.total_valid_refs or 0) }} - {{ '{:,.2f}'.format((transaction_stats.total_valid_refs or 0) / transaction_stats.total_orgs * 100) if transaction_stats.total_orgs else 0 }} + {{ '{:,.2f}'.format((transaction_stats.total_valid_refs or 0) / total_org_elements_excluding_self_refs * 100) if total_org_elements_excluding_self_refs else 0 }}
      Publisher Name Publisher Registry Id TotalSelf RefsSelf Refs Excluding Self Refs
      Org Elements Refs Non-Empty RefsSelf Refs Org Elements Refs Non-Empty Refs{{ '{:,}'.format(total_refs_excluding_self_refs) }} {{ '{:,}'.format(transaction_stats.total_notself_refs or 0) }} {{ '{:,}'.format(transaction_stats.total_valid_refs or 0) }} - {{ '{:,.2f}'.format((transaction_stats.total_valid_refs or 0) / total_org_elements_excluding_self_refs * 100) if total_org_elements_excluding_self_refs else 0 }} + {{ (((transaction_stats.total_valid_refs or 0) / total_org_elements_excluding_self_refs * 100) if total_org_elements_excluding_self_refs else 0) | round_nicely }}
      {{ '{:,}'.format(transaction_stats.total_orgs or 0) }} +{{ '{:,}'.format(transaction_stats.total_refs or 0) }} +{{ '{:,}'.format(transaction_stats.total_full_refs or 0) }} +{% set self_refs = (transaction_stats.total_full_refs or 0) - (transaction_stats.total_notself_refs or 0) %} +{{ '{:,}'.format(self_refs) }} +{% set total_org_elements_excluding_self_refs = (transaction_stats.total_orgs or 0) - self_refs %} +{{ '{:,}'.format(total_org_elements_excluding_self_refs) }} +{% set total_refs_excluding_self_refs = (transaction_stats.total_refs or 0) - self_refs %} +{{ '{:,}'.format(total_refs_excluding_self_refs) }} +{{ '{:,}'.format(transaction_stats.total_notself_refs or 0) }} +{{ '{:,}'.format(transaction_stats.total_valid_refs or 0) }} +{{ (((transaction_stats.total_valid_refs or 0) / total_org_elements_excluding_self_refs * 100) if total_org_elements_excluding_self_refs else 0) | round_nicely }} diff --git a/templates/org_id_table_header.html b/templates/org_id_table_header.html new file mode 100644 index 0000000000..a65d3c4856 --- /dev/null +++ b/templates/org_id_table_header.html @@ -0,0 +1,16 @@ + TotalSelf RefsExcluding Self Refs
      Org ElementsRefsNon-Empty RefsOrg ElementsRefsNon-Empty RefsValid RefsPercentage of Org Elements with Valid Refs
      - - - - - - + + {% include 'org_id_table_header.html' %} {% for slug in slugs.org_type.by_slug %} {% set transaction_stats = current_stats.aggregated[slug + '_transaction_stats'] %} - - + {% include 'org_id_table_cells.html' %} {% endfor %} diff --git a/templates/org_type.html b/templates/org_type.html index 0af742e302..79b321b012 100644 --- a/templates/org_type.html +++ b/templates/org_type.html @@ -13,7 +13,6 @@

      Organisation Identifiers: {{ slug.replace('_org', '') | capitalize }} Orgs<
      -

      Blurb

      {% include '_partials/tablesorter_instructions.html' %}

      Org TypeTotal Org ElementsTotal RefsTotal Non-Empty RefsTotal Refs Excluding Self RefsTotal Valid RefsOrg Type
      {{ slug.replace('_org', '') | capitalize }}{{ '{:,}'.format(transaction_stats.total_orgs) if transaction_stats.total_orgs else '' }} - {{ '{:,}'.format(transaction_stats.total_refs) if transaction_stats.total_refs else '' }} - {{ '{:,}'.format(transaction_stats.total_full_refs) if transaction_stats.total_full_refs else '' }} - {{ '{:,}'.format(transaction_stats.total_notself_refs) if transaction_stats.total_notself_refs else '' }} - {{ '{:,}'.format(transaction_stats.total_valid_refs) if transaction_stats.total_valid_refs else '' }} + {{ slug.replace('_org', '') | capitalize }}
      @@ -21,19 +20,7 @@

      Organisation Identifiers: {{ slug.replace('_org', '') | capitalize }} Orgs<

      - - - - - - - - - - - - - + {% include 'org_id_table_header.html' %} @@ -43,18 +30,7 @@

      Organisation Identifiers: {{ slug.replace('_org', '') | capitalize }} Orgs<

      - {% endfor %} diff --git a/templates/publisher.html b/templates/publisher.html index 038bbd8bb3..2b78a82c64 100644 --- a/templates/publisher.html +++ b/templates/publisher.html @@ -445,26 +445,18 @@

      Organisation Identifiers

      Publisher Name Publisher Registry IdTotalSelf RefsExcluding Self Refs
      Org ElementsRefsNon-Empty RefsOrg ElementsRefsNon-Empty RefsValid RefsPercentage of Org Elements with Valid Refs
      {{ publisher_name[publisher] }} {{ publisher }}{{ '{:,}'.format(transaction_stats.total_orgs or 0) }} - {{ '{:,}'.format(transaction_stats.total_refs or 0) }} - {{ '{:,}'.format(transaction_stats.total_full_refs or 0) }} - {% set self_refs = (transaction_stats.total_full_refs or 0) - (transaction_stats.total_notself_refs or 0) %} - {{ '{:,}'.format(self_refs) }} - {% set total_org_elements_excluding_self_refs = (transaction_stats.total_orgs or 0) - self_refs %} - {{ '{:,}'.format(total_org_elements_excluding_self_refs) }} - {% set total_refs_excluding_self_refs = (transaction_stats.total_refs or 0) - self_refs %} - {{ '{:,}'.format(total_refs_excluding_self_refs) }} - {{ '{:,}'.format(transaction_stats.total_notself_refs or 0) }} - {{ '{:,}'.format(transaction_stats.total_valid_refs or 0) }} - {{ (((transaction_stats.total_valid_refs or 0) / total_org_elements_excluding_self_refs * 100) if total_org_elements_excluding_self_refs else 0) | round_nicely }} + {% include 'org_id_table_cells.html' %}
      - - - - - - - + + {% set extra_column = true %} + {% include 'org_id_table_header.html' %} {% for slug in slugs.org_type.by_slug %} {% set transaction_stats = publisher_stats[slug + '_transaction_stats'] %} - - + + {% include 'org_id_table_cells.html' %} + {% endfor %} From ba060b0d4329d1b508c996aec00214e7944e32af Mon Sep 17 00:00:00 2001 From: Ocre42 Date: Wed, 26 Jun 2019 15:44:41 +0100 Subject: [PATCH 290/375] Link publishers to dashboard pages, update methodology for year calculations --- forwardlooking.py | 15 ++++++++------- static/templates/comprehensiveness_base.html | 2 +- static/templates/forwardlooking.html | 2 +- static/templates/humanitarian.html | 2 +- static/templates/summary_stats.html | 2 +- static/templates/timeliness.html | 2 +- static/templates/timeliness_timelag.html | 2 +- 7 files changed, 14 insertions(+), 13 deletions(-) diff --git a/forwardlooking.py b/forwardlooking.py index 37c50a98b4..1544f903e5 100644 --- a/forwardlooking.py +++ b/forwardlooking.py @@ -46,21 +46,22 @@ def generate_row(publisher): # Loop over each of the three years (i.e. this year and the following two years) to generate the statistics for the table for year in years: - - forwardlooking_budget = 'forwardlooking_activities_with_budgets' if(len(hierarchies_with_budget_not_provided) > 0): - forwardlooking_budget = 'forwardlooking_activities_with_budget_not_provided' row['budget_not_provided'] = True - # If 'forwardlooking_activities_current' and 'forwardlooking_activities_with_budgets' are both in the bottom hierarchy - if 'forwardlooking_activities_current' in publisher_stats['bottom_hierarchy'] and forwardlooking_budget in publisher_stats['bottom_hierarchy'] : + # If 'forwardlooking_activities_current' and 'forwardlooking_activities_with_budgets' or 'forwardlooking_activities_with_budget_not_provided' are in the bottom hierarchy + if 'forwardlooking_activities_current' in publisher_stats['bottom_hierarchy'] and ('forwardlooking_activities_with_budgets' in publisher_stats['bottom_hierarchy'] or 'forwardlooking_activities_with_budget_not_provided' in publisher_stats['bottom_hierarchy']): if len(hierarchies_with_nonzero_budgets) != 1: # If budgets are at more than one hierarchy (or no hierarchies), just use activities at all hierarchies row['year_columns'][0][year] = publisher_stats['forwardlooking_activities_current'].get(year) or 0 - row['year_columns'][1][year] = publisher_stats[forwardlooking_budget].get(year) or 0 + row['year_columns'][1][year] = publisher_stats['forwardlooking_activities_with_budgets'].get(year) or 0 + if row['budget_not_provided']: + row['year_columns'][1][year] += publisher_stats['forwardlooking_activities_with_budget_not_provided'].get(year) or 0 else: # Else, use the hierarchy which they are reported at row['year_columns'][0][year] = by_hierarchy[hierarchies_with_nonzero_budgets[0]]['forwardlooking_activities_current'].get(year) or 0 - row['year_columns'][1][year] = by_hierarchy[hierarchies_with_nonzero_budgets[0]][forwardlooking_budget].get(year) or 0 + row['year_columns'][1][year] = by_hierarchy[hierarchies_with_nonzero_budgets[0]]['forwardlooking_activities_with_budgets'].get(year) or 0 + if row['budget_not_provided']: + by_hierarchy[hierarchies_with_nonzero_budgets[0]]['forwardlooking_activities_with_budget_not_provided'].get(year) or 0 if not int(row['year_columns'][0][year]): row['year_columns'][2][year] = '-' diff --git a/static/templates/comprehensiveness_base.html b/static/templates/comprehensiveness_base.html index dcccc034cf..5442e074ff 100644 --- a/static/templates/comprehensiveness_base.html +++ b/static/templates/comprehensiveness_base.html @@ -56,7 +56,7 @@

      {% block table_title %}Table of Comprehensiveness values

      {% for row in comprehensiveness.table() %} - + {% for column_slug in comprehensiveness.column_slugs[tab] %} {% for row in forwardlooking.table() %} - + {% for column in row.year_columns %} {% for year in forwardlooking.years %} diff --git a/static/templates/humanitarian.html b/static/templates/humanitarian.html index cc8f8cbe4d..5b1d6ca1c4 100644 --- a/static/templates/humanitarian.html +++ b/static/templates/humanitarian.html @@ -35,7 +35,7 @@

      Humanitarian

      {% for row in humanitarian.table() %} - + {% for column_slug, _ in humanitarian.columns %} {% for row in summary_stats.table() %} - + {% for column_slug, column_header in summary_stats.columns %} {% for publisher, publisher_title, per_month, assessment in timeliness.publisher_frequency_sorted() %} - + {% for month in timeliness.previous_months_reversed %} {% endfor %} diff --git a/static/templates/timeliness_timelag.html b/static/templates/timeliness_timelag.html index 191b8401bc..e0dad09432 100644 --- a/static/templates/timeliness_timelag.html +++ b/static/templates/timeliness_timelag.html @@ -44,7 +44,7 @@

      Table of Time lag assessments

      {% for publisher, publisher_title, per_month, assessment in timeliness.publisher_timelag_sorted() %} - + {% for month in timeliness.previous_months_reversed %} {% endfor %} From 9c3fa972a3f829969451e43e273588c199e04f73 Mon Sep 17 00:00:00 2001 From: Ocre42 Date: Thu, 27 Jun 2019 10:04:31 +0100 Subject: [PATCH 291/375] typo fix and adding methodology change to budget-years --- forwardlooking.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/forwardlooking.py b/forwardlooking.py index 1544f903e5..0b16852789 100644 --- a/forwardlooking.py +++ b/forwardlooking.py @@ -61,7 +61,7 @@ def generate_row(publisher): row['year_columns'][0][year] = by_hierarchy[hierarchies_with_nonzero_budgets[0]]['forwardlooking_activities_current'].get(year) or 0 row['year_columns'][1][year] = by_hierarchy[hierarchies_with_nonzero_budgets[0]]['forwardlooking_activities_with_budgets'].get(year) or 0 if row['budget_not_provided']: - by_hierarchy[hierarchies_with_nonzero_budgets[0]]['forwardlooking_activities_with_budget_not_provided'].get(year) or 0 + row['year_columns'][1][year] += by_hierarchy[hierarchies_with_nonzero_budgets[0]]['forwardlooking_activities_with_budget_not_provided'].get(year) or 0 if not int(row['year_columns'][0][year]): row['year_columns'][2][year] = '-' From 584b8bae66ea695eafac4431b9e38d152c776ef7 Mon Sep 17 00:00:00 2001 From: Ocre42 Date: Thu, 27 Jun 2019 12:15:01 +0100 Subject: [PATCH 292/375] Update methodology narrative to fit new budget year calculations --- static/templates/forwardlooking.html | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/static/templates/forwardlooking.html b/static/templates/forwardlooking.html index 271ceae039..e076fd4f31 100644 --- a/static/templates/forwardlooking.html +++ b/static/templates/forwardlooking.html @@ -181,7 +181,9 @@

      Pseudocode

      If start and end are both not null If (end - start <= 370 days) - If end month >= 7 + If end year == start year + budget year = end year + Elif end month >= 7 budget year = end year Else budget year = end year - 1 From 6668016e33b6796c2d4b932aeded052cb37adc71 Mon Sep 17 00:00:00 2001 From: Ocre42 Date: Fri, 16 Aug 2019 11:34:11 +0100 Subject: [PATCH 293/375] Include validation of bnp and add flag to comprehensiveness --- comprehensiveness.py | 3 +- make_csv.py | 25 +++++--- static/templates/comprehensiveness_base.html | 6 ++ .../comprehensiveness_financials.html | 4 +- static/templates/faq.html | 57 ------------------- static/templates/forwardlooking.html | 2 +- 6 files changed, 30 insertions(+), 67 deletions(-) delete mode 100644 static/templates/faq.html diff --git a/comprehensiveness.py b/comprehensiveness.py index 962e6d567e..0245962a86 100644 --- a/comprehensiveness.py +++ b/comprehensiveness.py @@ -31,7 +31,7 @@ ('budget', 'Budget', 1, 'hierarchy_with_most_budgets'), ('financials_average', 'Average', 0), # i.e. don't include the average within the calculation of the average ], - 'valueadded':[ + 'valueadded': [ ('contact-info', 'Contacts', 1), ('location', 'Location Details', 1), ('location_point_pos', 'Geographic Coordinates', 1), @@ -147,6 +147,7 @@ def generate_row(publisher): if slug == 'budget': budget_all = publisher_base.get('comprehensiveness', {}).get(slug, 0) budget_not_provided_all = publisher_base.get('comprehensiveness', {}).get('budget_not_provided', 0) + row['flag'] = budget_not_provided_all > 0 numerator_all = budget_all + budget_not_provided_all budget_valid = publisher_base.get('comprehensiveness_with_validation', {}).get(slug, 0) budget_not_provided_valid = publisher_base.get('comprehensiveness_with_validation', {}).get('budget_not_provided', 0) diff --git a/make_csv.py b/make_csv.py index d0b3c95894..3514aebb18 100644 --- a/make_csv.py +++ b/make_csv.py @@ -107,13 +107,24 @@ def publisher_dicts(): for tab in comprehensiveness.columns.keys(): with open(os.path.join('out', 'comprehensiveness_{}.csv'.format(tab)), 'w') as fp: writer = unicodecsv.writer(fp) - writer.writerow(['Publisher Name', 'Publisher Registry Id'] + - [ x+' (with valid data)' for x in comprehensiveness.column_headers[tab] ] + - [ x+' (with any data)' for x in comprehensiveness.column_headers[tab] ]) - for row in comprehensiveness.table(): - writer.writerow([row['publisher_title'], row['publisher']] - + [ row[slug+'_valid'] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab] ] - + [ row[slug] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab] ]) + if tab == 'financials': + writer.writerow(['Publisher Name', 'Publisher Registry Id'] + + [x + ' (with valid data)' for x in comprehensiveness.column_headers[tab]] + + [x + ' (with any data)' for x in comprehensiveness.column_headers[tab]] + + ['Using budget-not-provided']) + for row in comprehensiveness.table(): + writer.writerow([row['publisher_title'], row['publisher']] + + [row[slug + '_valid'] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab]] + + [row[slug] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab]] + + ['Yes' if row['flag'] else '-']) + else: + writer.writerow(['Publisher Name', 'Publisher Registry Id'] + + [x + ' (with valid data)' for x in comprehensiveness.column_headers[tab]] + + [x + ' (with any data)' for x in comprehensiveness.column_headers[tab]]) + for row in comprehensiveness.table(): + writer.writerow([row['publisher_title'], row['publisher']] + + [row[slug + '_valid'] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab]] + + [row[slug] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab]]) diff --git a/static/templates/comprehensiveness_base.html b/static/templates/comprehensiveness_base.html index 5442e074ff..4a50370f69 100644 --- a/static/templates/comprehensiveness_base.html +++ b/static/templates/comprehensiveness_base.html @@ -51,6 +51,9 @@

      {% block table_title %}Table of Comprehensiveness values {% for column_header in comprehensiveness.column_headers[tab] %}

      {% endfor %} + {% if tab == 'financials' %} + + {% endif %} @@ -65,6 +68,9 @@

      {% block table_title %}Table of Comprehensiveness values {% endif %} {% else %}-{% endif %} {% endfor %} + {% if tab == 'financials' %} +

      + {% endif %} {% endfor %} diff --git a/static/templates/comprehensiveness_financials.html b/static/templates/comprehensiveness_financials.html index 43db1eb72f..9ac65b4585 100644 --- a/static/templates/comprehensiveness_financials.html +++ b/static/templates/comprehensiveness_financials.html @@ -15,7 +15,8 @@

      The statistics on this page are calculated based on either i) the data in each publishers' lowest hierarchy, or ii) data in the hierarchy which contains the greatest number of budgets for the given publisher - see the narrative section for full details.

      Key:
      - Dashes: Where a publisher has published to IATI in the past but whose portfolio contains no current activities. + Dashes: Where a publisher has published to IATI in the past but whose portfolio contains no current activities.
      + Yellow flag: Publisher currently publishing the 'budget not provided' attribute for some or all activities.

      {% endblock %} @@ -40,6 +41,7 @@
      Budget
      {% block assessment_text %}

      Only elements containing valid data are counted. Where applicable a total including invalid data is provided in parentheses.

      +

      Activities with the budget-not-provided attribute will not be counted if any budget elements are found.

      {% endblock %} diff --git a/static/templates/faq.html b/static/templates/faq.html deleted file mode 100644 index 94150e8787..0000000000 --- a/static/templates/faq.html +++ /dev/null @@ -1,57 +0,0 @@ -{% extends 'base.html' %} - -{% block content %} - -

      Who runs the IATI Dashboard?

      -

      The Dashboard is maintained by the technical team of the IATI Secretariat. It has grown out of various projects to track IATI data around various facets and metrics.

      - -

      When is the Dashboard updated?

      -

      The Dashboard is generated through a process that starts shortly after midnight UK time on the following days each month:

      -
        -
      • 1st, 4th, 7th, 10th, 13th, 16th, 19th, 22nd, 25th (and the 28th for months with 31 days)
      • -
      -

      This is a two step process:

      -
        -
      1. The Dashboard scripts first download the data that is linked to from the IATI Registry - see “data downloaded” date time at the footer of the page.
      2. -
      3. From this dataset, the relevant statistics are calculated, and the Dashboard is then updated - see “generated” date/time at footer.
      4. -
      -

      The generation process usually takes between 24 and 48 hours to complete.

      - -

      Why is there a difference between the data download and Dashboard generation time?

      -

      The data is downloaded, and then there is a period of computing time to generate the statistics that inform the Dashboard.

      -

      Usually, there is a small period of time between the two. However, we do track this as sometimes things break - and the site fails to regenerate. If you spot something, please also let us know via via support@iatistandard.org.

      - -

      Does a graph going up or down mean something good?

      -

      No. There may be a number of reasons why a certain graph or number rises or falls.

      -

      In some cases, a fall in the graph may well be down to the fact that the Dashboard script failed to download the relevant data. This could be due to any number of reasons - and not necessarily anything to do with the setup of the IATI publisher.

      -

      Therefore, we stress to treat the graphs, numbers and statistics on the IATI Dashboard with caution, context and consideration.

      -

      Should you think something is really wrong, please contact us via support@iatistandard.org.

      - -

      Can I build my own version of this Dashboard?

      -

      Yes - the source code is all open source: https://github.com/IATI/IATI-Dashboard/.

      -

      We advise you to check through the technical specifications.

      - -

      How can I suggest a new function?

      -

      Ideally, we’d suggest to check through the list of issues we have logged in our Github repository.

      - -

      Alternatively, please email us via support@iatistandard.org

      - -

      We’ve published/updated our IATI data, but it doesn’t appear in the Dashboard.

      -

      We’d suggest two initial checks:

      - -
        -
      1. Is the data registered on the IATI Registry?
      2. -
      3. Has the dashboard updated since you published (check the times at the footer of this page)?
      4. -
      - -

      Should you still believe that data is missing from the Dashboard, we’d love to hear from you - please contact us on support@iatistandard.org

      - - -

      I want to get to the raw data of a publisher - how can I do that?

      -

      Two ways:

      -
        -
      1. Visit the IATI Registry and access the relevant links to the XML files - these links are often found on the relevant Dashboard page.
      2. -
      3. Try a query via the IATI Datastore
      4. -
      - -{% endblock %} diff --git a/static/templates/forwardlooking.html b/static/templates/forwardlooking.html index e076fd4f31..630914eb8b 100644 --- a/static/templates/forwardlooking.html +++ b/static/templates/forwardlooking.html @@ -106,7 +106,7 @@

      Assessment

      No separate assessment is provided as the percentage of current activities containing budgets for this and the next two years is the de facto assessment. No attempt is currently being made to turn these into a descriptive summary (as, for example, "Frequency = "Monthly"). The percentage for the middle year (i.e. 'next year') is of most relevance to developing countries.

      - +

      Activities with the budget-not-provided attribute will not be counted if any budget elements are found.

      From 5b2605fe69d0b2aa5874723c38e8bbfdfce07297 Mon Sep 17 00:00:00 2001 From: Ocre42 Date: Mon, 19 Aug 2019 10:22:01 +0100 Subject: [PATCH 294/375] Updating copy of assesments --- static/templates/comprehensiveness_financials.html | 2 +- static/templates/forwardlooking.html | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/static/templates/comprehensiveness_financials.html b/static/templates/comprehensiveness_financials.html index 9ac65b4585..281926d3a3 100644 --- a/static/templates/comprehensiveness_financials.html +++ b/static/templates/comprehensiveness_financials.html @@ -41,7 +41,7 @@
      Budget
      {% block assessment_text %}

      Only elements containing valid data are counted. Where applicable a total including invalid data is provided in parentheses.

      -

      Activities with the budget-not-provided attribute will not be counted if any budget elements are found.

      +

      Activities with the budget-not-provided attribute will not be counted as valid if any budget elements are found.

      {% endblock %} diff --git a/static/templates/forwardlooking.html b/static/templates/forwardlooking.html index 630914eb8b..498c65925d 100644 --- a/static/templates/forwardlooking.html +++ b/static/templates/forwardlooking.html @@ -106,7 +106,7 @@

      Assessment

      No separate assessment is provided as the percentage of current activities containing budgets for this and the next two years is the de facto assessment. No attempt is currently being made to turn these into a descriptive summary (as, for example, "Frequency = "Monthly"). The percentage for the middle year (i.e. 'next year') is of most relevance to developing countries.

      -

      Activities with the budget-not-provided attribute will not be counted if any budget elements are found.

      +

      Activities with any budget elements that are also found to contain the budget-not-provided attribute will not receive a forward looking score.

      From 6fa5cb0e9d58b485ebde48091e1ae7d856f67743 Mon Sep 17 00:00:00 2001 From: James B Date: Fri, 4 Aug 2023 08:50:55 +0100 Subject: [PATCH 295/375] Changes from live server that were not checked into git --- fetch_data.py | 2 +- static/templates/base.html | 13 ++++++++++++- static/templates/coverage.html | 4 ++-- static/templates/publishing_stats.html | 4 ++-- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/fetch_data.py b/fetch_data.py index a531c308bc..331ec5ab67 100644 --- a/fetch_data.py +++ b/fetch_data.py @@ -18,7 +18,7 @@ except OSError: pass -page_size = 100 +page_size = 50 url = 'https://iatiregistry.org/api/3/action/organization_list' params = { 'all_fields': 'true', diff --git a/static/templates/base.html b/static/templates/base.html index a1f0c0e316..32041879aa 100644 --- a/static/templates/base.html +++ b/static/templates/base.html @@ -94,7 +94,18 @@ - + + + {% block extrahead %}{% endblock %} diff --git a/static/templates/coverage.html b/static/templates/coverage.html index b357228ace..8ab1e032bd 100644 --- a/static/templates/coverage.html +++ b/static/templates/coverage.html @@ -9,10 +9,10 @@

      The coverage stats page is being rebuilt.


      - Previously, the IATI technical team followed a manual process of contacting IATI publishers and requesting total operational spend values via email. + Previously, the IATI Secretariat followed a manual process of contacting IATI publishers and requesting total operational spend values via email. Results were stored in a public Google sheet. Data was collected for the years 2014 and 2015 and the values were used to calculate a coverage-adjusted score in the Summary Statistics page. As this was a very time consuming exercise (compounded by the increase in the number of publishers) coverage data collection has not been done since 2016, resulting in the coverage-adjusted scores in the summary statistics being out of date for the majority of publishers. - As a result, in September 2018 the technical team took the decision to remove the coverage-adjusted values from the Dashboard. + As a result, in September 2018 the IATI Secretariat took the decision to remove the coverage-adjusted values from the Dashboard.


      diff --git a/static/templates/publishing_stats.html b/static/templates/publishing_stats.html index 3da3877e5a..8b83becb42 100644 --- a/static/templates/publishing_stats.html +++ b/static/templates/publishing_stats.html @@ -1,6 +1,6 @@ {% extends 'section_index.html' %} {% block about %} -

      The statistics on IATI data that are calculated routinely and displayed on this dashboard are now a central part of the service that the Technical Team provides to publishers and users of IATI data alike: for publishers to better understand how to improve their data; for users to assess which data is likely to meet their particular needs; and for the technical team itself to prioritise its commitments to data quality.

      +

      The statistics on IATI data that are calculated routinely and displayed on this dashboard are now a central part of the service that the IATI Secretariat provides to publishers and users of IATI data alike: for publishers to better understand how to improve their data; for users to assess which data is likely to meet their particular needs; and for the IATI Secretariat itself to prioritise its commitments to data quality.

      It is also important that a consistent approach is developed in the presentation of statistics in IATI's annual report and the Dashboard's own calculations, as well as when IATI is asked to contribute to monitoring reports. This section of the Dashboard is focused to do just that.

      @@ -16,7 +16,7 @@

      The statistics on these pages are refreshed frequently. The methodology is open to inspection and will be changed in response to feedback and discussion.

      -

      The overriding concern of the Technical Team is that the methodologies being tested here gain buy-in from our members and publishers so that they can be used as a credible benchmark in improving the quality of IATI data.

      +

      The overriding concern of the IATI Secretariat is that the methodologies being tested here gain buy-in from our members and publishers so that they can be used as a credible benchmark in improving the quality of IATI data.

      {% endblock about %} From 62e0470ca497a5650010a4fd9e0917e8365dca4f Mon Sep 17 00:00:00 2001 From: James B Date: Fri, 4 Aug 2023 15:47:46 +0100 Subject: [PATCH 296/375] Fetch JS & CSS over HTTPS --- static/templates/base.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/static/templates/base.html b/static/templates/base.html index 32041879aa..055db67ef7 100644 --- a/static/templates/base.html +++ b/static/templates/base.html @@ -4,7 +4,7 @@ - + {% block title %}IATI Dashboard - {{page_titles[page]}}{% endblock %} @@ -215,7 +215,7 @@

      {{page_titles[page]}}

      {% block tablesorterscript %}{% endblock %} - + From 77ac303d532368df9251ca307bca31511080aa71 Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Mon, 12 Aug 2024 17:31:48 +0100 Subject: [PATCH 312/375] feat: Updated base template Updated base template to change page title, remove codeforIATI navigation, and change URLs to FAQ and GitHub repo/issue pages. --- templates/base.html | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/templates/base.html b/templates/base.html index 980e77a81b..3bf3405ddf 100644 --- a/templates/base.html +++ b/templates/base.html @@ -7,7 +7,7 @@ - {% block title %}{{ page_titles[page] }} – Code for IATI Analytics{% endblock %} + IATI Dashboard – {% block title %}{{ page_titles[page] }}{% endblock %} @@ -28,15 +28,6 @@
      - + - + {% with codes=sorted(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)).intersection(get_codelist_values(values))) %} {% endwith %} diff --git a/templates/comprehensiveness_base.html b/templates/comprehensiveness_base.html index cbbec9f94b..2a35ed2fa6 100644 --- a/templates/comprehensiveness_base.html +++ b/templates/comprehensiveness_base.html @@ -233,7 +233,7 @@

      Pseudocode

      @@ -269,7 +269,7 @@

      Pseudocode

      - + @@ -308,7 +308,7 @@

      Pseudocode

      - + @@ -320,7 +320,7 @@

      Pseudocode

      - + {% endblock %} @@ -347,7 +347,7 @@

      Pseudocode

      - + @@ -365,7 +365,7 @@

      Pseudocode

      - + diff --git a/templates/coverage.html b/templates/coverage.html index cc30268a07..bda3d541b4 100644 --- a/templates/coverage.html +++ b/templates/coverage.html @@ -23,8 +23,8 @@

      The coverage stats page is being rebuilt.


      - In future, we plan to use an automated system that will calculate the coverage statistic for an organisation by using its IATI organisation file. - We are exploring a methodology that uses the <total-expenditure> element in a publisher’s organisation file, and compares this to the total spend for a given year in their activity files. + In future, we plan to use an automated system that will calculate the coverage statistic for an organisation by using its IATI organisation file. + We are exploring a methodology that uses the <total-expenditure> element in a publisher’s organisation file, and compares this to the total spend for a given year in their activity files. Completing this work requires an update to the methodology. This is in our job queue but we don’t have an estimated time for completing the work. Please look out for updates via IATI Discuss and our website.

      diff --git a/templates/publisher.html b/templates/publisher.html index 2b78a82c64..5808e83847 100644 --- a/templates/publisher.html +++ b/templates/publisher.html @@ -171,7 +171,7 @@

      Data Quality

      {% if publisher in ckan and dataset_name in ckan[publisher] %} - validator + validator {% endif %}
      @@ -295,7 +295,7 @@

      Financial

      Budgets

      -

      The below figures are calculated based on the data contained within the <budget> element for each reported activity. Original and revised elements are based on the value declared in the budget/@type attribute. Where budgets fall across two calendar years, the month of the <period-end> date is used to determine annual groupings, with budgets for periods ending January-June added to the previous calendar year.

      +

      The below figures are calculated based on the data contained within the <budget> element for each reported activity. Original and revised elements are based on the value declared in the budget/@type attribute. Where budgets fall across two calendar years, the month of the <period-end> date is used to determine annual groupings, with budgets for periods ending January-June added to the previous calendar year.

      Org TypeTotal Org ElementsTotal RefsTotal Non-Empty RefsTotal Refs Excluding Self RefsTotal Valid RefsOrg Type
      {{ slug.replace('_org', '') | capitalize }}{{ '{:,}'.format(transaction_stats.total_orgs or 0) }} - {{ '{:,}'.format(transaction_stats.total_refs or 0) }} - {{ '{:,}'.format(transaction_stats.total_full_refs or 0) }} - {{ '{:,}'.format(transaction_stats.total_notself_refs or 0) }} - {{ '{:,}'.format(transaction_stats.total_valid_refs or 0) }} - (J){{ slug.replace('_org', '') | capitalize }}(J)
      {{row.publisher_title}}{{row.publisher_title}}{% if column_slug in row %} {{row[column_slug+'_valid']}} diff --git a/static/templates/forwardlooking.html b/static/templates/forwardlooking.html index f33fe85b57..271ceae039 100644 --- a/static/templates/forwardlooking.html +++ b/static/templates/forwardlooking.html @@ -63,7 +63,7 @@

      Activities with Forward Looking Budget Allocations

      {{row.publisher_title}}{{row.publisher_title}}
      {{row.publisher_title}}{{row.publisher_title}} {%- if column_slug == 'publisher_type' -%} diff --git a/static/templates/summary_stats.html b/static/templates/summary_stats.html index 0f3615b63c..1b8a73f1ff 100644 --- a/static/templates/summary_stats.html +++ b/static/templates/summary_stats.html @@ -39,7 +39,7 @@

      Summary Statistics

      {{row.publisher_title}}{{row.publisher_title}}{{row[column_slug]}} {% endfor %} diff --git a/static/templates/timeliness.html b/static/templates/timeliness.html index 657ea59561..c4d500c199 100644 --- a/static/templates/timeliness.html +++ b/static/templates/timeliness.html @@ -45,7 +45,7 @@

      Table of Frequency assessments

      {{publisher_title}}{{publisher_title}}{{per_month[month] or 0}}
      {{publisher_title}}{{publisher_title}}{{per_month[month] or 0}}{{column_header}}
      {{ element }}{{ codelist_mapping[major_version].get(element) }}{{ codelist_mapping[major_version].get(element) }} {{ values|length }}{{ codelist_sets[major_version].get(codelist_mapping[major_version].get(element))|length }}{{ codelist_sets[major_version].get(codelist_mapping[major_version].get(element))|length }}{{ codes|length }} Core Version iati-activities/@version must exist for the file the activity is in - iati-activities/@version must be on the Version codelist + iati-activities/@version must be on the Version codelist
      Core Core Status Activity must contain an activity-status elementactivity-status/@code must be on the ActivityStats codelistactivity-status/@code must be on the ActivityStats codelist
      Core Financials Transaction - Currency All transactions must have value/@currency OR the activity must have a @default-currency attribute. All transactions must have value/@value-dateAll currency values must be on the Currency codelist. value/@value-date must be valid xs:date.All currency values must be on the Currency codelist. value/@value-date must be valid xs:date.
      Financials Financials Budget Activity must have at least 1 budget element OR the activity must have the budget-not-provided attribute AND no budget elementsEach budget element must contain period-start/@iso-date and period-end/@iso-date and value/@value-date that are valid xs:dates AND a value element that is a valid xs:decimal OR the activity element must have a valid iati-activity/@budget-not-provided attribute under the BudgetNotProvided codelist AND no budget elementsEach budget element must contain period-start/@iso-date and period-end/@iso-date and value/@value-date that are valid xs:dates AND a value element that is a valid xs:decimal OR the activity element must have a valid iati-activity/@budget-not-provided attribute under the BudgetNotProvided codelist AND no budget elements
      Value added DAC Sectors At least 1 sector where @vocabulary is DAC or DAC-3 (1.xx) or 1 or 2 (2.xx), must be reported at activity level, unless there is no @vocabulary attribute, in which case DAC/1 is assumed. If there is no DAC sector element at activity level it must be reported within all transactions contained within that activity.Must be valid code on the DAC or DAC-3 lists as appropriate.Must be valid code on the DAC or DAC-3 lists as appropriate.
      Value added Value added Aid Type Activity must contain either i) a value in default-aid-type/@code or ii) each transaction must contain a value in transaction/aid-type/@code.Must be valid code on the AidType codelist.Must be valid code on the AidType codelist.
      Value added
      @@ -379,7 +379,7 @@

      Codelist Values (version {{ major_version }}.xx)

      {% with element_i=element_list.index(element) %} - + {% with codes=sorted(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)).intersection(values.keys())) %} {% with element_i=element_list.index(element) %} - + - {% with codes=sorted(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)).intersection(values.keys())) %} + {% with codes=func.sorted(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)).intersection(values.keys())) %} {% endwith %} - {% with codes=sorted(set(values.keys()).difference(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)))) %} + {% with codes=func.sorted(func.set(values.keys()).difference(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)))) %} {% with element_i=element_list.index(element) %} - + - + {% endwith %} {% endfor %} @@ -447,15 +448,15 @@

      Organisation Identifiers

      {% set extra_column = true %} - {% include 'org_id_table_header.html' %} + {% include '_partials/org_id_table_header.html' with context %} {% for slug in slugs.org_type.by_slug %} {% set transaction_stats = publisher_stats[slug + '_transaction_stats'] %} - - {% include 'org_id_table_cells.html' %} + + {% include '_partials/org_id_table_cells.html' with context %} {% endfor %} From 1991c7d10d32e283a8105befb1e8305129a6865b Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Wed, 9 Oct 2024 12:40:26 +0100 Subject: [PATCH 327/375] docs: Update repository documentation Updated both the readme (including converting to markdown) and contributors guide. The readme now includes information on how to run the Django version of the Dashboard. --- CONTRIBUTING.md | 32 ++++++------ README.md | 124 +++++++++++++++++++++++++++++++++++++++++++ README.rst | 136 ------------------------------------------------ 3 files changed, 140 insertions(+), 152 deletions(-) create mode 100644 README.md delete mode 100644 README.rst diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index dc84b1fff4..02f29ea5b3 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,33 +1,37 @@ -If you would like to contribute to the Code for IATI Analytics project, you can.... +# Contributing -* Send us feedback about your user experience. Contact details at: https://github.com/codeforIATI +If you would like to contribute to the IATI Dashboard you can.... + +* Send us feedback about your user experience. You can find our [contact details on the IATI Organisation page on GitHub](https://github.com/IATI) * Report bugs * Request new features * Contribute code or documents to improve the application. See the list of specific tasks below. + ## How to report a bug or request a feature -If you are able to work with GitHub then please [create an issue](https://github.com/codeforIATI/analytics/issues/new/choose). +If you are able to work with GitHub then please [create an issue](https://github.com/IATI/IATI-Dashboard/issues/new/choose). + +Before creating a new issue [check to see if the issue already exists](https://github.com/IATI/IATI-Dashboard/issues/). If not then please do create it. -Before creating a new issue check to see if the issue already exists. If not then please do create it. +If you are not comfortable working with GitHub, but would still like to contribute, then talk to us. You can reach us via the central [IATI Developer Documentation pages](https://iatistandard.org/en/guidance/developer/). -If you are not comfortable working with GitHub, but would still like to contribute, then talk to us. Details at: https://codeforiati.org/get-involved/ ## How to contribute code and documents ### How we use branches in this repository -* `main` represents our main development branch, and is the branch we are currently using for our deployed instance of the code +* `live` represents the branch we are currently using for our deployed instance of the code. +* Eventually a `develop` branch will be for development work that is not yet live. * Other branches represent development work or bug fixes. ### Submitting changes * Fork this repository (if you haven't previously) -* Make sure you're working on top of an up to date copy of the `main` branch - - Create a branch named after the work you're doing (if you're targeting a specific issue, start the branch name with the issue number e.g. `42-feature-name`) -* Do your work - - If your work addresses a specific issue, reference that issue in your commit message by starting the commit message with `[#issue number]` e.g. `[#64]` -* Create a pull request against `main` +* Make sure you're working on top of an up to date copy of the `live` branch +* Create a branch named after the work you're doing (if you're targeting a specific issue, start the branch name with the issue number e.g. `42-feature-name`). +* Do your work, creating atomic commits as you go. If your work addresses a specific issue, reference that issue in your commit message using the full URL to the issue. Please name your commits starting with a one-word description of the commit, e.g., *fix*, *update*, *refactor*. +* Create a pull request against `develop`. ## Specific Tasks: @@ -52,10 +56,6 @@ Do you have other tests/statistics that we could be generating? Can you improve the unit testing to make deployment more robust? -### Fix a Bitesize issue - -We mark some of issues as 'Bitesize'. Generally these will help ease you into the code and help you find your way around. - ## Talk to us -We'd love to hear from you. Details at: https://codeforiati.org/get-involved/ +We'd love to hear from you. You can find our [contact details at the main IATI GitHub page](https://github.com/IATI). diff --git a/README.md b/README.md new file mode 100644 index 0000000000..4592fe5625 --- /dev/null +++ b/README.md @@ -0,0 +1,124 @@ +IATI Dashboard +============== + +[![Coverage Status](https://coveralls.io/repos/github/IATI/IATI-Dashboard/badge.svg?branch=merge-codeforiati-and-publishingstats)](https://coveralls.io/github/IATI/IATI-Dashboard?branch=merge-codeforiati-and-publishingstats) +[![GPLv3 License](https://img.shields.io/badge/license-GPLv3-blue.svg)](https://github.com/IATI/IATI-Dashboard/blob/merge-codeforiati-and-publishingstats/LICENSE.md) + +## Summary + + Product | IATI Dashboard +--- | --- +Description | A Django web application that provides key numbers, statistics and graphs about the data on the [IATI registry](http://iatiregistry.org/). This repository is currently a development version where the IATI Dashboard/Publishing Statistics and Code for IATI Analytics are being merged. +Website | Development only; see [IATI Dashboard](https://dashboard.iatistandard.org), and [Code for IATI Analytics](https://analytics.codeforiati.org) for live versions. +Related | Repositories for the [live version of the IATI Dashboard](https://github.com/IATI/IATI-Dashboard), [live version of the IATI Publishing Stats](https://github.com/IATI/IATI-Publishing-Statistics), and [Code for IATI Analytics](https://github.com/codeforIATI/analytics). Data is generated from [Code for IATI Stats](https://github.com/codeforIATI/IATI-Stats). +Documentation | Rest of README.md +Technical Issues | See https://github.com/IATI/IATI-Dashboard/issues +Support | https://iatistandard.org/en/guidance/get-support/ + +## High-level requirements + +* Python 3.12 +* Unix-based setup (e.g., Linux, MacOS X) with `bash`, `wget` and `curl` installed. +* Development files for libfreetype, libpng, libxml and libxslt e.g. ``libfreetype6-dev libpng-dev libxml2-dev libxslt-dev``. + +## Running the app locally +### Overview +The IATI Dashboard is mostly written in Python but also has some helper Bash scripts to collect the data that the dashboard uses. Top-level steps required to run the Dashboard are: + +1. Setup Python environment and install dependencies. +2. Fetch the data. +3. Build the static graphs and other data that will be served via the Dashboard. +4. Run the web server. + +Paths to different directories are set in `./src/config.py`. + +### 1. Setup environment + +Assuming that this repository has been cloned and you are in the root directory of the repository. + +``` +# Setup and activate a virtual environment (recommended) - here we use virtualenv +virtualenv ve +source ve/bin/activate +``` + +Now install the dependencies. + +``` +pip install -r requirements.txt +``` + +### 2. Fetching the data + +Bash scripts are used to fetch the data that the Dashboard will present. They will store data in `./data` and `./stats-calculated`. + +``` +# Fetch the necessary calculated stats +./get_stats.sh + +# Fetch some extra data from github and github gists and other sources on the internet +./fetch_data.sh +``` + +### 3. Build static data and graphs and copy to static + +``` +mkdir out +cd src +python make_plots.py +python make_csv.py +python speakers_kit.py +cp ../out/data static/ +cp ../img/aggregate static/ +cp ../img/publishers static/ +``` + +### 4. Run the webserver. + +From `./src/`: + +``` +python manage.py runserver +``` + +The Dashboard will now be accessible from `localhost:8000/`. + + +## Development + +### Calculating your own statistics + +The IATI Dashboard requires a `stats-calculated` directory, which can be downloaded using the `get_stats.sh` shell script as described above. This can also be calculated using [Code for IATI Stats](http://github.com/codeforIATI/IATI-Stats) where `stats-calculated` corresponds to the `gitout` directory generated by [`git.sh` in IATI-Stats](https://github.com/codeforIATI/IATI-Stats#running-for-every-commit-in-the-data-directory). + +Often you only want to regenerate the current stats, use `get_stats.sh` to download the pre-calculated historical stats and just replace the `stats-calculated/current directory` with the `out` directory produced by running the [loop, aggregate and invert commands individually](https://github.com/codeforIATI/IATI-Stats#getting-started), then regenerate graphs and CSV files as per the above. + +### Adding new dependencies + +If a change requires new dependencies then please add to `requirements.in` or `requirements_dev.in` as appropriate and recompile: + +``` +pip-compile requirements_dev.in +pip-compile requirements.in +``` + +### Linting + +Code linting is carried out using [Flake8](https://flake8.pycqa.org/en/latest/) and `setup.cfg` has the configuration. + +## License + Copyright (C) 2013-2015 Ben Webb + Copyright (C) 2013-2014 David Carpenter + Copyright (C) 2021 Andy Lulham + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . diff --git a/README.rst b/README.rst deleted file mode 100644 index 793c29febf..0000000000 --- a/README.rst +++ /dev/null @@ -1,136 +0,0 @@ -Code for IATI Analytics -======================= - -.. image:: https://github.com/codeforIATI/analytics/actions/workflows/ci.yml/badge.svg?branch=main - :target: https://github.com/codeforIATI/analytics/actions/workflows/ci.yml -.. image:: https://coveralls.io/repos/github/codeforIATI/analytics/badge.svg?branch=main - :target: https://coveralls.io/github/codeforIATI/analytics?branch=main -.. image:: https://img.shields.io/badge/license-GPLv3-blue.svg - :target: https://github.com/codeforIATI/analytics/blob/main/LICENSE.md - -Introduction ------------- - -Code for IATI Analytics displays key numbers and graphs about the data on the `IATI registry `__. - -See Analytics in action at https://analytics.codeforiati.org - -Analytics is in beta. All contents / URLs / machine-readable downloads are subject to change. - -This repository is the code for Analytics frontend. Statistics are generated from the Registry by code in a separate repository - https://github.com/codeforIATI/IATI-Stats - -Technology Overview -^^^^^^^^^^^^^^^^^^^ - -Analytics is mostly written in Python, with some helper Bash scripts. - -Python scripts: - -* ``make_html.py`` contains is a Flask application that makes use of Frozen Flask to generate some static HTML. -* ``make_csv.py`` generates CSV files. -* ``plots.py`` generates static images of graphs using matplotlib. - -Bash helper scripts: - -* The main source of data are the statistics generated by `IATI-Stats `_ (about the data on the IATI Registry). ``get_stats.sh`` can be used to fetch a recently calculated copy of these stats. (or see calculating your own stats section below) -* Analytics also uses various other data from online sources (including GitHub). These can be fetched using ``fetch_data.sh``. -* ``git.sh`` runs all the above commands, see Usage below. - -Installation -^^^^^^^^^^^^ - -Requirements: - -* Unix based setup (e.g. Linux, Mac OS X) with bash etc. -* wget and curl installed -* Python 3 -* Python dev library ``python-dev`` -* python-virtualenv (optional) -* Development files for libfreetype, libpng, libxml and libxslt e.g. ``libfreetype6-dev libpng-dev libxml2-dev libxslt-dev``. - - (alternatively, you may be able to install some of the python dependencies in - requirements.txt using your package manager) - - -To install: - -.. code-block:: bash - - ## Get the code - git clone https://github.com/codeforIATI/analytics.git - cd analytics - - ## Set up a virtual environment (recommended) - # Create a virtual environment - virtualenv pyenv - # Activate the virtual environment - # (you need to this every time you open a new terminal session) - source pyenv/bin/activate - - ## Install python dependencies - ## Use pip as described below, or your distro's package manager to install - ## the dependcies in requirements.txt - # If you are running a less recent linux distro, you will need to install distribute - easy_install -U distribute - pip install -r requirements.txt - -Usage -^^^^^ - -The following steps are performed routinely: - -.. code-block:: bash - - # Fetch the necessary calculated stats - ./get_stats.sh - # Fetch some extra data from github and github gists - ./fetch_data.sh - - mkdir out - python plots.py - python make_csv.py - python make_html.py - -make_html.py will output a MissingURLGeneratorWarning. This is expected, as some of the URLs defined are for the live development server only (see below). - -The full list of steps for our deployment can be found in ``git.sh``. (The name of this is now a misnomer as the output is no longer a git repository - previously a commit was pushed to GitHub pages.) - -Development -^^^^^^^^^^^ - -For development, you can use the live Flask development server, instead of Frozen Flask. - -.. code-block:: bash - - python make_html.py --live - -Using the live development server is highly recommended, because it displays full bracktraces for 500 errors, whereas frozen flask does not. - -Calculating your own statistics -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Analytics requires a `stats-calculated` directory, which can be downloaded using the `get_stats.sh` shell script as described above, or calculated yourself using http://github.com/codeforIATI/IATI-Stats . `stats-calculated` corresponds to the `gitout` directory generated by `IATI-Stat's git.sh `__. - -Often you only want to regenerate the current stats, use `get_stats.sh` to download the pre-calculated historical stats and just replace the stats-calculated/current directory with the out directory produced by running the `loop, aggregate and invert commands individually `__. - -License -^^^^^^^ - -:: - - Copyright (C) 2013-2015 Ben Webb - Copyright (C) 2013-2014 David Carpenter - Copyright (C) 2021 Andy Lulham - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . From c44aa31b677b823ab50fe202c47f84370d42a165 Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Mon, 21 Oct 2024 13:59:54 +0100 Subject: [PATCH 328/375] refactor: Renamed source code directory names for clarity For clarity in the repository this commit renames the directories that store the source code. All the source code in src/ was moved to dashboard/. Also, the actual Django app code was moved from src/dashboard/ to dashboard/ui/. This commit doesn't change the import statements to make the app run, this will be done in the next commit. --- {src => dashboard}/common.py | 0 {src => dashboard}/comprehensiveness.py | 0 {src => dashboard}/config.py | 0 {src => dashboard}/coverage.py | 0 {src => dashboard}/data.py | 0 {src => dashboard}/forwardlooking.py | 0 {src => dashboard}/humanitarian.py | 0 {src => dashboard}/make_csv.py | 0 {src => dashboard}/make_html.py | 0 {src => dashboard}/make_plots.py | 0 {src => dashboard}/manage.py | 0 {src => dashboard}/speakers_kit.py | 0 {src => dashboard}/static/.nojekyll | 0 {src => dashboard}/static/img/favicon-16x16.png | Bin {src => dashboard}/static/img/favicon-32x32.png | Bin {src => dashboard}/static/img/favicon.ico | Bin {src => dashboard}/static/img/tablesorter-icons.gif | Bin {src => dashboard}/static/style.css | 0 {src => dashboard}/summary_stats.py | 0 {src => dashboard}/templates/_partials/boxes.html | 0 .../templates/_partials/org_id_table_cells.html | 0 .../templates/_partials/org_id_table_header.html | 0 .../_partials/tablesorter_instructions.html | 0 {src => dashboard}/templates/activities.html | 0 {src => dashboard}/templates/base.html | 0 {src => dashboard}/templates/booleans.html | 0 {src => dashboard}/templates/codelist.html | 0 {src => dashboard}/templates/codelists.html | 0 {src => dashboard}/templates/comprehensiveness.html | 0 .../templates/comprehensiveness_base.html | 0 .../templates/comprehensiveness_core.html | 0 .../templates/comprehensiveness_financials.html | 0 .../templates/comprehensiveness_valueadded.html | 0 {src => dashboard}/templates/coverage.html | 0 {src => dashboard}/templates/data_quality.html | 0 {src => dashboard}/templates/dates.html | 0 {src => dashboard}/templates/download.html | 0 {src => dashboard}/templates/element.html | 0 {src => dashboard}/templates/elements.html | 0 {src => dashboard}/templates/exploring_data.html | 0 {src => dashboard}/templates/faq.html | 0 {src => dashboard}/templates/files.html | 0 {src => dashboard}/templates/forwardlooking.html | 0 {src => dashboard}/templates/headlines.html | 0 {src => dashboard}/templates/humanitarian.html | 0 {src => dashboard}/templates/identifiers.html | 0 {src => dashboard}/templates/index.html | 0 {src => dashboard}/templates/license.html | 0 {src => dashboard}/templates/licenses.html | 0 {src => dashboard}/templates/org_ids.html | 0 {src => dashboard}/templates/org_type.html | 0 {src => dashboard}/templates/organisation.html | 0 {src => dashboard}/templates/publisher.html | 0 {src => dashboard}/templates/publishers.html | 0 {src => dashboard}/templates/publishing_stats.html | 0 .../templates/registration_agencies.html | 0 {src => dashboard}/templates/reporting_orgs.html | 0 {src => dashboard}/templates/section_index.html | 0 {src => dashboard}/templates/summary_stats.html | 0 {src => dashboard}/templates/timeliness.html | 0 {src => dashboard}/templates/timeliness_base.html | 0 .../templates/timeliness_timelag.html | 0 {src => dashboard}/templates/traceability.html | 0 {src => dashboard}/templates/validation.html | 0 {src => dashboard}/templates/versions.html | 0 {src => dashboard}/templates/xml.html | 0 {src/dashboard => dashboard/tests}/__init__.py | 0 {src => dashboard}/tests/test_comprehensiveness.py | 0 {src => dashboard}/tests/test_timeliness.py | 0 {src => dashboard}/text.py | 0 {src => dashboard}/timeliness.py | 0 {src/tests => dashboard/ui}/__init__.py | 0 {src/dashboard => dashboard/ui}/asgi.py | 0 {src/dashboard => dashboard/ui}/jinja2.py | 0 {src/dashboard => dashboard/ui}/settings.py | 0 {src/dashboard => dashboard/ui}/template_funcs.py | 0 {src/dashboard => dashboard/ui}/urls.py | 0 {src/dashboard => dashboard/ui}/views.py | 0 {src/dashboard => dashboard/ui}/wsgi.py | 0 {src => dashboard}/vars.py | 0 80 files changed, 0 insertions(+), 0 deletions(-) rename {src => dashboard}/common.py (100%) rename {src => dashboard}/comprehensiveness.py (100%) rename {src => dashboard}/config.py (100%) rename {src => dashboard}/coverage.py (100%) rename {src => dashboard}/data.py (100%) rename {src => dashboard}/forwardlooking.py (100%) rename {src => dashboard}/humanitarian.py (100%) rename {src => dashboard}/make_csv.py (100%) rename {src => dashboard}/make_html.py (100%) rename {src => dashboard}/make_plots.py (100%) rename {src => dashboard}/manage.py (100%) rename {src => dashboard}/speakers_kit.py (100%) rename {src => dashboard}/static/.nojekyll (100%) rename {src => dashboard}/static/img/favicon-16x16.png (100%) rename {src => dashboard}/static/img/favicon-32x32.png (100%) rename {src => dashboard}/static/img/favicon.ico (100%) rename {src => dashboard}/static/img/tablesorter-icons.gif (100%) rename {src => dashboard}/static/style.css (100%) rename {src => dashboard}/summary_stats.py (100%) rename {src => dashboard}/templates/_partials/boxes.html (100%) rename {src => dashboard}/templates/_partials/org_id_table_cells.html (100%) rename {src => dashboard}/templates/_partials/org_id_table_header.html (100%) rename {src => dashboard}/templates/_partials/tablesorter_instructions.html (100%) rename {src => dashboard}/templates/activities.html (100%) rename {src => dashboard}/templates/base.html (100%) rename {src => dashboard}/templates/booleans.html (100%) rename {src => dashboard}/templates/codelist.html (100%) rename {src => dashboard}/templates/codelists.html (100%) rename {src => dashboard}/templates/comprehensiveness.html (100%) rename {src => dashboard}/templates/comprehensiveness_base.html (100%) rename {src => dashboard}/templates/comprehensiveness_core.html (100%) rename {src => dashboard}/templates/comprehensiveness_financials.html (100%) rename {src => dashboard}/templates/comprehensiveness_valueadded.html (100%) rename {src => dashboard}/templates/coverage.html (100%) rename {src => dashboard}/templates/data_quality.html (100%) rename {src => dashboard}/templates/dates.html (100%) rename {src => dashboard}/templates/download.html (100%) rename {src => dashboard}/templates/element.html (100%) rename {src => dashboard}/templates/elements.html (100%) rename {src => dashboard}/templates/exploring_data.html (100%) rename {src => dashboard}/templates/faq.html (100%) rename {src => dashboard}/templates/files.html (100%) rename {src => dashboard}/templates/forwardlooking.html (100%) rename {src => dashboard}/templates/headlines.html (100%) rename {src => dashboard}/templates/humanitarian.html (100%) rename {src => dashboard}/templates/identifiers.html (100%) rename {src => dashboard}/templates/index.html (100%) rename {src => dashboard}/templates/license.html (100%) rename {src => dashboard}/templates/licenses.html (100%) rename {src => dashboard}/templates/org_ids.html (100%) rename {src => dashboard}/templates/org_type.html (100%) rename {src => dashboard}/templates/organisation.html (100%) rename {src => dashboard}/templates/publisher.html (100%) rename {src => dashboard}/templates/publishers.html (100%) rename {src => dashboard}/templates/publishing_stats.html (100%) rename {src => dashboard}/templates/registration_agencies.html (100%) rename {src => dashboard}/templates/reporting_orgs.html (100%) rename {src => dashboard}/templates/section_index.html (100%) rename {src => dashboard}/templates/summary_stats.html (100%) rename {src => dashboard}/templates/timeliness.html (100%) rename {src => dashboard}/templates/timeliness_base.html (100%) rename {src => dashboard}/templates/timeliness_timelag.html (100%) rename {src => dashboard}/templates/traceability.html (100%) rename {src => dashboard}/templates/validation.html (100%) rename {src => dashboard}/templates/versions.html (100%) rename {src => dashboard}/templates/xml.html (100%) rename {src/dashboard => dashboard/tests}/__init__.py (100%) rename {src => dashboard}/tests/test_comprehensiveness.py (100%) rename {src => dashboard}/tests/test_timeliness.py (100%) rename {src => dashboard}/text.py (100%) rename {src => dashboard}/timeliness.py (100%) rename {src/tests => dashboard/ui}/__init__.py (100%) rename {src/dashboard => dashboard/ui}/asgi.py (100%) rename {src/dashboard => dashboard/ui}/jinja2.py (100%) rename {src/dashboard => dashboard/ui}/settings.py (100%) rename {src/dashboard => dashboard/ui}/template_funcs.py (100%) rename {src/dashboard => dashboard/ui}/urls.py (100%) rename {src/dashboard => dashboard/ui}/views.py (100%) rename {src/dashboard => dashboard/ui}/wsgi.py (100%) rename {src => dashboard}/vars.py (100%) diff --git a/src/common.py b/dashboard/common.py similarity index 100% rename from src/common.py rename to dashboard/common.py diff --git a/src/comprehensiveness.py b/dashboard/comprehensiveness.py similarity index 100% rename from src/comprehensiveness.py rename to dashboard/comprehensiveness.py diff --git a/src/config.py b/dashboard/config.py similarity index 100% rename from src/config.py rename to dashboard/config.py diff --git a/src/coverage.py b/dashboard/coverage.py similarity index 100% rename from src/coverage.py rename to dashboard/coverage.py diff --git a/src/data.py b/dashboard/data.py similarity index 100% rename from src/data.py rename to dashboard/data.py diff --git a/src/forwardlooking.py b/dashboard/forwardlooking.py similarity index 100% rename from src/forwardlooking.py rename to dashboard/forwardlooking.py diff --git a/src/humanitarian.py b/dashboard/humanitarian.py similarity index 100% rename from src/humanitarian.py rename to dashboard/humanitarian.py diff --git a/src/make_csv.py b/dashboard/make_csv.py similarity index 100% rename from src/make_csv.py rename to dashboard/make_csv.py diff --git a/src/make_html.py b/dashboard/make_html.py similarity index 100% rename from src/make_html.py rename to dashboard/make_html.py diff --git a/src/make_plots.py b/dashboard/make_plots.py similarity index 100% rename from src/make_plots.py rename to dashboard/make_plots.py diff --git a/src/manage.py b/dashboard/manage.py similarity index 100% rename from src/manage.py rename to dashboard/manage.py diff --git a/src/speakers_kit.py b/dashboard/speakers_kit.py similarity index 100% rename from src/speakers_kit.py rename to dashboard/speakers_kit.py diff --git a/src/static/.nojekyll b/dashboard/static/.nojekyll similarity index 100% rename from src/static/.nojekyll rename to dashboard/static/.nojekyll diff --git a/src/static/img/favicon-16x16.png b/dashboard/static/img/favicon-16x16.png similarity index 100% rename from src/static/img/favicon-16x16.png rename to dashboard/static/img/favicon-16x16.png diff --git a/src/static/img/favicon-32x32.png b/dashboard/static/img/favicon-32x32.png similarity index 100% rename from src/static/img/favicon-32x32.png rename to dashboard/static/img/favicon-32x32.png diff --git a/src/static/img/favicon.ico b/dashboard/static/img/favicon.ico similarity index 100% rename from src/static/img/favicon.ico rename to dashboard/static/img/favicon.ico diff --git a/src/static/img/tablesorter-icons.gif b/dashboard/static/img/tablesorter-icons.gif similarity index 100% rename from src/static/img/tablesorter-icons.gif rename to dashboard/static/img/tablesorter-icons.gif diff --git a/src/static/style.css b/dashboard/static/style.css similarity index 100% rename from src/static/style.css rename to dashboard/static/style.css diff --git a/src/summary_stats.py b/dashboard/summary_stats.py similarity index 100% rename from src/summary_stats.py rename to dashboard/summary_stats.py diff --git a/src/templates/_partials/boxes.html b/dashboard/templates/_partials/boxes.html similarity index 100% rename from src/templates/_partials/boxes.html rename to dashboard/templates/_partials/boxes.html diff --git a/src/templates/_partials/org_id_table_cells.html b/dashboard/templates/_partials/org_id_table_cells.html similarity index 100% rename from src/templates/_partials/org_id_table_cells.html rename to dashboard/templates/_partials/org_id_table_cells.html diff --git a/src/templates/_partials/org_id_table_header.html b/dashboard/templates/_partials/org_id_table_header.html similarity index 100% rename from src/templates/_partials/org_id_table_header.html rename to dashboard/templates/_partials/org_id_table_header.html diff --git a/src/templates/_partials/tablesorter_instructions.html b/dashboard/templates/_partials/tablesorter_instructions.html similarity index 100% rename from src/templates/_partials/tablesorter_instructions.html rename to dashboard/templates/_partials/tablesorter_instructions.html diff --git a/src/templates/activities.html b/dashboard/templates/activities.html similarity index 100% rename from src/templates/activities.html rename to dashboard/templates/activities.html diff --git a/src/templates/base.html b/dashboard/templates/base.html similarity index 100% rename from src/templates/base.html rename to dashboard/templates/base.html diff --git a/src/templates/booleans.html b/dashboard/templates/booleans.html similarity index 100% rename from src/templates/booleans.html rename to dashboard/templates/booleans.html diff --git a/src/templates/codelist.html b/dashboard/templates/codelist.html similarity index 100% rename from src/templates/codelist.html rename to dashboard/templates/codelist.html diff --git a/src/templates/codelists.html b/dashboard/templates/codelists.html similarity index 100% rename from src/templates/codelists.html rename to dashboard/templates/codelists.html diff --git a/src/templates/comprehensiveness.html b/dashboard/templates/comprehensiveness.html similarity index 100% rename from src/templates/comprehensiveness.html rename to dashboard/templates/comprehensiveness.html diff --git a/src/templates/comprehensiveness_base.html b/dashboard/templates/comprehensiveness_base.html similarity index 100% rename from src/templates/comprehensiveness_base.html rename to dashboard/templates/comprehensiveness_base.html diff --git a/src/templates/comprehensiveness_core.html b/dashboard/templates/comprehensiveness_core.html similarity index 100% rename from src/templates/comprehensiveness_core.html rename to dashboard/templates/comprehensiveness_core.html diff --git a/src/templates/comprehensiveness_financials.html b/dashboard/templates/comprehensiveness_financials.html similarity index 100% rename from src/templates/comprehensiveness_financials.html rename to dashboard/templates/comprehensiveness_financials.html diff --git a/src/templates/comprehensiveness_valueadded.html b/dashboard/templates/comprehensiveness_valueadded.html similarity index 100% rename from src/templates/comprehensiveness_valueadded.html rename to dashboard/templates/comprehensiveness_valueadded.html diff --git a/src/templates/coverage.html b/dashboard/templates/coverage.html similarity index 100% rename from src/templates/coverage.html rename to dashboard/templates/coverage.html diff --git a/src/templates/data_quality.html b/dashboard/templates/data_quality.html similarity index 100% rename from src/templates/data_quality.html rename to dashboard/templates/data_quality.html diff --git a/src/templates/dates.html b/dashboard/templates/dates.html similarity index 100% rename from src/templates/dates.html rename to dashboard/templates/dates.html diff --git a/src/templates/download.html b/dashboard/templates/download.html similarity index 100% rename from src/templates/download.html rename to dashboard/templates/download.html diff --git a/src/templates/element.html b/dashboard/templates/element.html similarity index 100% rename from src/templates/element.html rename to dashboard/templates/element.html diff --git a/src/templates/elements.html b/dashboard/templates/elements.html similarity index 100% rename from src/templates/elements.html rename to dashboard/templates/elements.html diff --git a/src/templates/exploring_data.html b/dashboard/templates/exploring_data.html similarity index 100% rename from src/templates/exploring_data.html rename to dashboard/templates/exploring_data.html diff --git a/src/templates/faq.html b/dashboard/templates/faq.html similarity index 100% rename from src/templates/faq.html rename to dashboard/templates/faq.html diff --git a/src/templates/files.html b/dashboard/templates/files.html similarity index 100% rename from src/templates/files.html rename to dashboard/templates/files.html diff --git a/src/templates/forwardlooking.html b/dashboard/templates/forwardlooking.html similarity index 100% rename from src/templates/forwardlooking.html rename to dashboard/templates/forwardlooking.html diff --git a/src/templates/headlines.html b/dashboard/templates/headlines.html similarity index 100% rename from src/templates/headlines.html rename to dashboard/templates/headlines.html diff --git a/src/templates/humanitarian.html b/dashboard/templates/humanitarian.html similarity index 100% rename from src/templates/humanitarian.html rename to dashboard/templates/humanitarian.html diff --git a/src/templates/identifiers.html b/dashboard/templates/identifiers.html similarity index 100% rename from src/templates/identifiers.html rename to dashboard/templates/identifiers.html diff --git a/src/templates/index.html b/dashboard/templates/index.html similarity index 100% rename from src/templates/index.html rename to dashboard/templates/index.html diff --git a/src/templates/license.html b/dashboard/templates/license.html similarity index 100% rename from src/templates/license.html rename to dashboard/templates/license.html diff --git a/src/templates/licenses.html b/dashboard/templates/licenses.html similarity index 100% rename from src/templates/licenses.html rename to dashboard/templates/licenses.html diff --git a/src/templates/org_ids.html b/dashboard/templates/org_ids.html similarity index 100% rename from src/templates/org_ids.html rename to dashboard/templates/org_ids.html diff --git a/src/templates/org_type.html b/dashboard/templates/org_type.html similarity index 100% rename from src/templates/org_type.html rename to dashboard/templates/org_type.html diff --git a/src/templates/organisation.html b/dashboard/templates/organisation.html similarity index 100% rename from src/templates/organisation.html rename to dashboard/templates/organisation.html diff --git a/src/templates/publisher.html b/dashboard/templates/publisher.html similarity index 100% rename from src/templates/publisher.html rename to dashboard/templates/publisher.html diff --git a/src/templates/publishers.html b/dashboard/templates/publishers.html similarity index 100% rename from src/templates/publishers.html rename to dashboard/templates/publishers.html diff --git a/src/templates/publishing_stats.html b/dashboard/templates/publishing_stats.html similarity index 100% rename from src/templates/publishing_stats.html rename to dashboard/templates/publishing_stats.html diff --git a/src/templates/registration_agencies.html b/dashboard/templates/registration_agencies.html similarity index 100% rename from src/templates/registration_agencies.html rename to dashboard/templates/registration_agencies.html diff --git a/src/templates/reporting_orgs.html b/dashboard/templates/reporting_orgs.html similarity index 100% rename from src/templates/reporting_orgs.html rename to dashboard/templates/reporting_orgs.html diff --git a/src/templates/section_index.html b/dashboard/templates/section_index.html similarity index 100% rename from src/templates/section_index.html rename to dashboard/templates/section_index.html diff --git a/src/templates/summary_stats.html b/dashboard/templates/summary_stats.html similarity index 100% rename from src/templates/summary_stats.html rename to dashboard/templates/summary_stats.html diff --git a/src/templates/timeliness.html b/dashboard/templates/timeliness.html similarity index 100% rename from src/templates/timeliness.html rename to dashboard/templates/timeliness.html diff --git a/src/templates/timeliness_base.html b/dashboard/templates/timeliness_base.html similarity index 100% rename from src/templates/timeliness_base.html rename to dashboard/templates/timeliness_base.html diff --git a/src/templates/timeliness_timelag.html b/dashboard/templates/timeliness_timelag.html similarity index 100% rename from src/templates/timeliness_timelag.html rename to dashboard/templates/timeliness_timelag.html diff --git a/src/templates/traceability.html b/dashboard/templates/traceability.html similarity index 100% rename from src/templates/traceability.html rename to dashboard/templates/traceability.html diff --git a/src/templates/validation.html b/dashboard/templates/validation.html similarity index 100% rename from src/templates/validation.html rename to dashboard/templates/validation.html diff --git a/src/templates/versions.html b/dashboard/templates/versions.html similarity index 100% rename from src/templates/versions.html rename to dashboard/templates/versions.html diff --git a/src/templates/xml.html b/dashboard/templates/xml.html similarity index 100% rename from src/templates/xml.html rename to dashboard/templates/xml.html diff --git a/src/dashboard/__init__.py b/dashboard/tests/__init__.py similarity index 100% rename from src/dashboard/__init__.py rename to dashboard/tests/__init__.py diff --git a/src/tests/test_comprehensiveness.py b/dashboard/tests/test_comprehensiveness.py similarity index 100% rename from src/tests/test_comprehensiveness.py rename to dashboard/tests/test_comprehensiveness.py diff --git a/src/tests/test_timeliness.py b/dashboard/tests/test_timeliness.py similarity index 100% rename from src/tests/test_timeliness.py rename to dashboard/tests/test_timeliness.py diff --git a/src/text.py b/dashboard/text.py similarity index 100% rename from src/text.py rename to dashboard/text.py diff --git a/src/timeliness.py b/dashboard/timeliness.py similarity index 100% rename from src/timeliness.py rename to dashboard/timeliness.py diff --git a/src/tests/__init__.py b/dashboard/ui/__init__.py similarity index 100% rename from src/tests/__init__.py rename to dashboard/ui/__init__.py diff --git a/src/dashboard/asgi.py b/dashboard/ui/asgi.py similarity index 100% rename from src/dashboard/asgi.py rename to dashboard/ui/asgi.py diff --git a/src/dashboard/jinja2.py b/dashboard/ui/jinja2.py similarity index 100% rename from src/dashboard/jinja2.py rename to dashboard/ui/jinja2.py diff --git a/src/dashboard/settings.py b/dashboard/ui/settings.py similarity index 100% rename from src/dashboard/settings.py rename to dashboard/ui/settings.py diff --git a/src/dashboard/template_funcs.py b/dashboard/ui/template_funcs.py similarity index 100% rename from src/dashboard/template_funcs.py rename to dashboard/ui/template_funcs.py diff --git a/src/dashboard/urls.py b/dashboard/ui/urls.py similarity index 100% rename from src/dashboard/urls.py rename to dashboard/ui/urls.py diff --git a/src/dashboard/views.py b/dashboard/ui/views.py similarity index 100% rename from src/dashboard/views.py rename to dashboard/ui/views.py diff --git a/src/dashboard/wsgi.py b/dashboard/ui/wsgi.py similarity index 100% rename from src/dashboard/wsgi.py rename to dashboard/ui/wsgi.py diff --git a/src/vars.py b/dashboard/vars.py similarity index 100% rename from src/vars.py rename to dashboard/vars.py From 00fdb778810cde3b186e42ad9d1644bb161ab691 Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Mon, 21 Oct 2024 14:21:41 +0100 Subject: [PATCH 329/375] refactor: Adjust project structure to use new directory structure Change import statements and Django project strings to account for moving the dashboard from dashboard/ to ui/. --- dashboard/manage.py | 2 +- dashboard/ui/asgi.py | 2 +- dashboard/ui/settings.py | 6 +++--- dashboard/ui/urls.py | 26 +++++++++++++------------- dashboard/ui/views.py | 4 ++-- dashboard/ui/wsgi.py | 2 +- 6 files changed, 21 insertions(+), 21 deletions(-) diff --git a/dashboard/manage.py b/dashboard/manage.py index 73d4f5b5fe..2ea28dd972 100755 --- a/dashboard/manage.py +++ b/dashboard/manage.py @@ -6,7 +6,7 @@ def main(): """Run administrative tasks.""" - os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'dashboard.settings') + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'ui.settings') try: from django.core.management import execute_from_command_line except ImportError as exc: diff --git a/dashboard/ui/asgi.py b/dashboard/ui/asgi.py index bcbc3073e9..fb118a8cab 100644 --- a/dashboard/ui/asgi.py +++ b/dashboard/ui/asgi.py @@ -11,6 +11,6 @@ from django.core.asgi import get_asgi_application -os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'dashboard.settings') +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'ui.settings') application = get_asgi_application() diff --git a/dashboard/ui/settings.py b/dashboard/ui/settings.py index e30e28c5b6..d8008da83b 100644 --- a/dashboard/ui/settings.py +++ b/dashboard/ui/settings.py @@ -49,7 +49,7 @@ 'django.middleware.clickjacking.XFrameOptionsMiddleware', ] -ROOT_URLCONF = 'dashboard.urls' +ROOT_URLCONF = 'ui.urls' TEMPLATES = [ { @@ -76,12 +76,12 @@ 'django.contrib.auth.context_processors.auth', 'django.contrib.messages.context_processors.messages', ], - 'environment': 'dashboard.jinja2.environment' + 'environment': 'ui.jinja2.environment' }, }, ] -WSGI_APPLICATION = 'dashboard.wsgi.application' +WSGI_APPLICATION = 'ui.wsgi.application' # Database diff --git a/dashboard/ui/urls.py b/dashboard/ui/urls.py index d04dff4b32..fd8fd75e6c 100644 --- a/dashboard/ui/urls.py +++ b/dashboard/ui/urls.py @@ -18,25 +18,25 @@ from django.urls import path # from django.shortcuts import redirect -import dashboard.views +import ui.views urlpatterns = [ path('admin/', admin.site.urls), # Top level dashboard pages. - path('', dashboard.views.index, name="dash-index"), - path('headlines', dashboard.views.headlines, name="dash-headlines"), - path('data-quality', dashboard.views.data_quality, name="dash-dataquality"), - path('publishing-statistics', dashboard.views.publishing_stats, name="dash-publishingstats"), - path('exploring-data', dashboard.views.exploring_data, name="dash-exploringdata"), - path('faq', dashboard.views.faq, name="dash-faq"), + path('', ui.views.index, name="dash-index"), + path('headlines', ui.views.headlines, name="dash-headlines"), + path('data-quality', ui.views.data_quality, name="dash-dataquality"), + path('publishing-statistics', ui.views.publishing_stats, name="dash-publishingstats"), + path('exploring-data', ui.views.exploring_data, name="dash-exploringdata"), + path('faq', ui.views.faq, name="dash-faq"), # Headlines pages and detail pages - placeholders for now. - path('headlines/publishers', dashboard.views.headlines_publishers, name="dash-headlines-publishers"), - path('headlines/files', dashboard.views.headlines_files, name="dash-headlines-files"), - path('headlines/activities', dashboard.views.headlines_activities, name="dash-headlines-activities"), - path('headlines/publishers/', dashboard.views.headlines_publisher_detail, name='dash-headlines-publisher-detail'), + path('headlines/publishers', ui.views.headlines_publishers, name="dash-headlines-publishers"), + path('headlines/files', ui.views.headlines_files, name="dash-headlines-files"), + path('headlines/activities', ui.views.headlines_activities, name="dash-headlines-activities"), + path('headlines/publishers/', ui.views.headlines_publisher_detail, name='dash-headlines-publisher-detail'), # Data quality pages. path('data-quality/download-errors', lambda x: None, name="dash-dataquality-download"), @@ -44,8 +44,8 @@ path('data-quality/validation', lambda x: None, name="dash-dataquality-validation"), path('data-quality/versions', lambda x: None, name="dash-dataquality-versions"), path('data-quality/organisation-xml', lambda x: None, name="dash-dataquality-organisation"), - path('data-quality/licenses', dashboard.views.dataquality_licenses, name="dash-dataquality-licenses"), - path('data-quality/licenses/', dashboard.views.dataquality_licenses_detail, name="dash-dataquality-licenses-detail"), + path('data-quality/licenses', ui.views.dataquality_licenses, name="dash-dataquality-licenses"), + path('data-quality/licenses/', ui.views.dataquality_licenses_detail, name="dash-dataquality-licenses-detail"), path('data-quality/identifiers', lambda x: None, name="dash-dataquality-identifiers"), path('data-quality/reporting-orgs', lambda x: None, name="dash-dataquality-reportingorgs"), diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py index 2a2aab03e6..5a307e63b2 100644 --- a/dashboard/ui/views.py +++ b/dashboard/ui/views.py @@ -14,7 +14,7 @@ import config import text -import dashboard.template_funcs +import ui.template_funcs from data import ( ckan, @@ -145,7 +145,7 @@ def _make_context(page_name: str): commit_hash=COMMIT_HASH, stats_commit_hash=STATS_COMMIT_HASH, func={"sorted": sorted, - "firstint": dashboard.template_funcs.firstint, + "firstint": ui.template_funcs.firstint, "dataset_to_publisher": lambda x: dataset_to_publisher_dict.get(x, ""), "get_publisher_stats": get_publisher_stats, "is_valid_element": is_valid_element, diff --git a/dashboard/ui/wsgi.py b/dashboard/ui/wsgi.py index 4d5897aeb4..c94858e9fb 100644 --- a/dashboard/ui/wsgi.py +++ b/dashboard/ui/wsgi.py @@ -11,6 +11,6 @@ from django.core.wsgi import get_wsgi_application -os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'dashboard.settings') +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'ui.settings') application = get_wsgi_application() From cc8f7756dc45c3fdeaf2c1cf3a788086055a3ddf Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Mon, 21 Oct 2024 14:47:56 +0100 Subject: [PATCH 330/375] refactor: Added data quality/download errors page Refactored template and added url routes and view functions to add the download errors page in the data quality section. --- dashboard/templates/download.html | 12 ++++++------ dashboard/ui/urls.py | 3 ++- dashboard/ui/views.py | 10 ++++++++++ 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/dashboard/templates/download.html b/dashboard/templates/download.html index 4f9f66e695..7ec23d01d1 100644 --- a/dashboard/templates/download.html +++ b/dashboard/templates/download.html @@ -1,8 +1,8 @@ {% extends 'base.html' %} -{% import '_partials/boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block content %}
      - {{ boxes.box('Files that fail to download', current_stats.download_errors|length, 'failed_downloads.png', + {{ boxes.box('Files that fail to download', current_stats.download_errors|length, 'img/aggregate/failed_downloads.png', description='Count of files that fail to download, over time.') }}
      @@ -10,7 +10,7 @@
      - {% for code, publisher, dataset, url in current_stats.download_errors %} + {% for code, publisher, dataset, err_url in current_stats.download_errors %} - + - + {% endfor %} diff --git a/dashboard/ui/urls.py b/dashboard/ui/urls.py index fd8fd75e6c..f1be005766 100644 --- a/dashboard/ui/urls.py +++ b/dashboard/ui/urls.py @@ -39,7 +39,8 @@ path('headlines/publishers/', ui.views.headlines_publisher_detail, name='dash-headlines-publisher-detail'), # Data quality pages. - path('data-quality/download-errors', lambda x: None, name="dash-dataquality-download"), + path('data-quality/download-errors', ui.views.dataquality_download, name="dash-dataquality-download"), + path('data/download_errors.json', ui.views.dataquality_download_errorsjson, name="dash-dataquality-download-json"), path('data-quality/xml-errors', lambda x: None, name="dash-dataquality-xml"), path('data-quality/validation', lambda x: None, name="dash-dataquality-validation"), path('data-quality/versions', lambda x: None, name="dash-dataquality-versions"), diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py index 5a307e63b2..3752a7b35b 100644 --- a/dashboard/ui/views.py +++ b/dashboard/ui/views.py @@ -246,6 +246,16 @@ def headlines_publisher_detail(request, publisher=None): # # Views to generate data quality pages. # +def dataquality_download(request): + template = loader.get_template("download.html") + context = _make_context("download") + return HttpResponse(template.render(context, request)) + + +def dataquality_download_errorsjson(request): + return HttpResponse(json.dumps(current_stats['download_errors'], indent=2), content_type='application/json') + + def dataquality_licenses(request): template = loader.get_template("licenses.html") context = _make_context("licenses") From dba5e953baba6a944fa510f8746f925068654e3f Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Mon, 21 Oct 2024 15:00:31 +0100 Subject: [PATCH 331/375] refactor: Added data quality/xml errors page Refactored template, finished url route and added view function for the XML errors page in the data quality section. --- dashboard/templates/xml.html | 12 ++++++------ dashboard/ui/urls.py | 2 +- dashboard/ui/views.py | 6 ++++++ 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/dashboard/templates/xml.html b/dashboard/templates/xml.html index 2c6da8ce2f..46c9da4a74 100644 --- a/dashboard/templates/xml.html +++ b/dashboard/templates/xml.html @@ -1,11 +1,11 @@ {% extends 'base.html' %} -{% import '_partials/boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block content %}
      - {{ boxes.box('Files where XML is not well-formed', current_stats.aggregated.invalidxml, 'invalidxml.png', 'invalidxml.json', - description='Count of files where the XML that is not well-formed, over time. Note: this is different from validation against the schema.'.format(url_for('basic_page', page_name='validation'))) }} - {{ boxes.box('Files with non-standard roots', current_stats.aggregated.nonstandardroots, 'nonstandardroots.png', 'nonstandardroots.json', + {{ boxes.box('Files where XML is not well-formed', current_stats.aggregated.invalidxml, 'img/aggregate/invalidxml.png', 'invalidxml.json', + description='Count of files where the XML that is not well-formed, over time. Note: this is different from validation against the schema.'.format( url('dash-dataquality-validation'))) }} + {{ boxes.box('Files with non-standard roots', current_stats.aggregated.nonstandardroots, 'img/aggregate/nonstandardroots.png', 'nonstandardroots.json', description='Count of files with non-standard root, over time. Note: Files with non-standard roots are those where the root XML element is not iati-activities or iati-organisation as we would expect.

      ') }}
      @@ -28,7 +28,7 @@

      Files where XML is not well-formed

      {% for dataset, invalid in current_stats.inverted_file.invalidxml.items() %} {% if invalid %}
      - + {% endif %} @@ -55,7 +55,7 @@

      Files with non-standard roots

      {% for dataset, nonstandard in current_stats.inverted_file.nonstandardroots.items() %} {% if nonstandard %} - + {% endif %} diff --git a/dashboard/ui/urls.py b/dashboard/ui/urls.py index f1be005766..c0050afc7f 100644 --- a/dashboard/ui/urls.py +++ b/dashboard/ui/urls.py @@ -41,7 +41,7 @@ # Data quality pages. path('data-quality/download-errors', ui.views.dataquality_download, name="dash-dataquality-download"), path('data/download_errors.json', ui.views.dataquality_download_errorsjson, name="dash-dataquality-download-json"), - path('data-quality/xml-errors', lambda x: None, name="dash-dataquality-xml"), + path('data-quality/xml-errors', ui.views.dataquality_xml, name="dash-dataquality-xml"), path('data-quality/validation', lambda x: None, name="dash-dataquality-validation"), path('data-quality/versions', lambda x: None, name="dash-dataquality-versions"), path('data-quality/organisation-xml', lambda x: None, name="dash-dataquality-organisation"), diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py index 3752a7b35b..0cd9cebe47 100644 --- a/dashboard/ui/views.py +++ b/dashboard/ui/views.py @@ -256,6 +256,12 @@ def dataquality_download_errorsjson(request): return HttpResponse(json.dumps(current_stats['download_errors'], indent=2), content_type='application/json') +def dataquality_xml(request): + template = loader.get_template("xml.html") + context = _make_context("xml") + return HttpResponse(template.render(context, request)) + + def dataquality_licenses(request): template = loader.get_template("licenses.html") context = _make_context("licenses") From d99ba6daccd72b0747b260505807cdd015f6d869 Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Mon, 21 Oct 2024 15:07:08 +0100 Subject: [PATCH 332/375] refactor: Added data quality/validation page Refactored template, updated url route and wrote view function to implement the validation page in the data quality section. --- dashboard/templates/validation.html | 8 ++++---- dashboard/ui/urls.py | 2 +- dashboard/ui/views.py | 6 ++++++ 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/dashboard/templates/validation.html b/dashboard/templates/validation.html index c01936cc3d..ee09ecd54a 100644 --- a/dashboard/templates/validation.html +++ b/dashboard/templates/validation.html @@ -1,10 +1,10 @@ {% extends 'base.html' %} -{% import '_partials/boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block content %}
      - {{ boxes.box('Invalid files', current_stats.aggregated.validation.fail, 'validation.png', 'validation.json', + {{ boxes.box('Invalid files', current_stats.aggregated.validation.fail, 'img/aggregate/validation.png', 'validation.json', description='Count of files that do not validate against the relevant schema, over time.') }} - {{ boxes.box('Publishers with invalid files', current_stats.aggregated.publishers_validation.fail, 'publishers_validation.png', 'publishers_validation.json', + {{ boxes.box('Publishers with invalid files', current_stats.aggregated.publishers_validation.fail, 'img/aggregate/publishers_validation.png', 'publishers_validation.json', description='Count of publishers that have at least one invalid file, over time') }}
      @@ -24,7 +24,7 @@

      List of files that fail validation, grouped by publisher

      {% if datasets %}
      -
      {{ publisher_name[publisher ] }} ({{ datasets|length }})
      +
      {{ publisher_name[publisher ] }} ({{ datasets|length }})
      diff --git a/dashboard/ui/urls.py b/dashboard/ui/urls.py index c0050afc7f..0045c40327 100644 --- a/dashboard/ui/urls.py +++ b/dashboard/ui/urls.py @@ -42,7 +42,7 @@ path('data-quality/download-errors', ui.views.dataquality_download, name="dash-dataquality-download"), path('data/download_errors.json', ui.views.dataquality_download_errorsjson, name="dash-dataquality-download-json"), path('data-quality/xml-errors', ui.views.dataquality_xml, name="dash-dataquality-xml"), - path('data-quality/validation', lambda x: None, name="dash-dataquality-validation"), + path('data-quality/validation', ui.views.dataquality_validation, name="dash-dataquality-validation"), path('data-quality/versions', lambda x: None, name="dash-dataquality-versions"), path('data-quality/organisation-xml', lambda x: None, name="dash-dataquality-organisation"), path('data-quality/licenses', ui.views.dataquality_licenses, name="dash-dataquality-licenses"), diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py index 0cd9cebe47..dcc7205e35 100644 --- a/dashboard/ui/views.py +++ b/dashboard/ui/views.py @@ -262,6 +262,12 @@ def dataquality_xml(request): return HttpResponse(template.render(context, request)) +def dataquality_validation(request): + template = loader.get_template("validation.html") + context = _make_context("validation") + return HttpResponse(template.render(context, request)) + + def dataquality_licenses(request): template = loader.get_template("licenses.html") context = _make_context("licenses") From 65929147787d07ca7668be0e239da56c28fd6e19 Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Mon, 21 Oct 2024 15:28:41 +0100 Subject: [PATCH 333/375] refactor: Added data quality/versions page Refactored template, updated url route, and wrote view function for the versions page in the data quality section. --- dashboard/templates/versions.html | 16 ++++++++-------- dashboard/ui/urls.py | 2 +- dashboard/ui/views.py | 8 ++++++++ 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/dashboard/templates/versions.html b/dashboard/templates/versions.html index a4c5ec73db..b6bb50c840 100644 --- a/dashboard/templates/versions.html +++ b/dashboard/templates/versions.html @@ -1,14 +1,14 @@ {% extends 'base.html' %} -{% import '_partials/boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block content %}
      - {{ boxes.box('Files per version (expected)', '', 'versions_expected.png', 'versions.json', 'versions_expected_legend.png', description='Count of files per IATI version, over time. Expected: these are actual versions of the IATI Standard.') }} - {{ boxes.box('Files per version (other)', '', 'versions_other.png', 'versions.json', 'versions_other_legend.png', description='Count of files per other versions, over time. These values do not actually exist as IATI versions.') }} + {{ boxes.box('Files per version (expected)', '', 'img/aggregate/versions_expected.png', 'versions.json', 'img/aggregate/versions_expected_legend.png', description='Count of files per IATI version, over time. Expected: these are actual versions of the IATI Standard.') }} + {{ boxes.box('Files per version (other)', '', 'img/aggregate/versions_other.png', 'versions.json', 'img/aggregate/versions_other_legend.png', description='Count of files per other versions, over time. These values do not actually exist as IATI versions.') }}
      - {{ boxes.box('Publishers per version (expected)', '', 'publishers_per_version_expected.png', 'publishers_per_version.json', 'versions_expected_legend.png', + {{ boxes.box('Publishers per version (expected)', '', 'img/aggregate/publishers_per_version_expected.png', 'publishers_per_version.json', 'img/aggregate/versions_expected_legend.png', description='Count of publishers per IATI version, over time. Note: If a publisher utilises two or more versions, they are counted for each.') }} - {{ boxes.box('Publishers per version (other)', '', 'publishers_per_version_other.png', 'publishers_per_version.json', 'versions_other_legend.png', + {{ boxes.box('Publishers per version (other)', '', 'img/aggregate/publishers_per_version_other.png', 'publishers_per_version.json', 'img/aggregate/versions_other_legend.png', description='Count of publishers per other version, over time') }}
      @@ -29,7 +29,7 @@

      Inconsistent versions

      {% for publisher in current_stats.inverted_file_publisher %} {% with datasets = current_stats.inverted_file_publisher[publisher].version_mismatch.get('true', {}) %} {% if datasets %} -
      {% for publisher_title,publisher in publishers_ordered_by_title %} - {% set publisher_stats = get_publisher_stats(publisher) %} + {% set publisher_stats = func.get_publisher_stats(publisher) %} {% if publisher_stats.publisher_duplicate_identifiers|count != 0 %} - + diff --git a/dashboard/ui/urls.py b/dashboard/ui/urls.py index 58457e4c8a..3a75b83741 100644 --- a/dashboard/ui/urls.py +++ b/dashboard/ui/urls.py @@ -47,7 +47,7 @@ path('data-quality/organisation', ui.views.dataquality_orgxml, name="dash-dataquality-organisation"), path('data-quality/licenses', ui.views.dataquality_licenses, name="dash-dataquality-licenses"), path('data-quality/licenses/', ui.views.dataquality_licenses_detail, name="dash-dataquality-licenses-detail"), - path('data-quality/identifiers', lambda x: None, name="dash-dataquality-identifiers"), + path('data-quality/identifiers', ui.views.dataquality_identifiers, name="dash-dataquality-identifiers"), path('data-quality/reporting-orgs', lambda x: None, name="dash-dataquality-reportingorgs"), # Exploring data pages. diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py index bd4da863ae..b6997e38bd 100644 --- a/dashboard/ui/views.py +++ b/dashboard/ui/views.py @@ -309,3 +309,9 @@ def dataquality_orgxml(request): template = loader.get_template("organisation.html") context = _make_context("organisation") return HttpResponse(template.render(context, request)) + + +def dataquality_identifiers(request): + template = loader.get_template("identifiers.html") + context = _make_context("identifiers") + return HttpResponse(template.render(context, request)) From 2acc92a689d6277aea5d6a271cc9df7e802c5d79 Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Tue, 22 Oct 2024 11:32:08 +0100 Subject: [PATCH 336/375] refactor: Added data quality/reporting orgs page Refactored template, updated url route, and wrote view function for the reporting orgs page in the data quality section. --- dashboard/templates/reporting_orgs.html | 6 +++--- dashboard/ui/urls.py | 2 +- dashboard/ui/views.py | 6 ++++++ 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/dashboard/templates/reporting_orgs.html b/dashboard/templates/reporting_orgs.html index f27e0eb7c1..3d2702f9aa 100644 --- a/dashboard/templates/reporting_orgs.html +++ b/dashboard/templates/reporting_orgs.html @@ -1,5 +1,5 @@ {% extends 'base.html' %} -{% import '_partials/boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block content %}
      @@ -22,11 +22,11 @@

      {% for publisher_title,publisher in publishers_ordered_by_title %} - {% set publisher_stats = get_publisher_stats(publisher) %} + {% set publisher_stats = func.get_publisher_stats(publisher) %} {% set reporting_orgs_key = publisher_stats.reporting_orgs.keys()|first %} {% if publisher_stats.reporting_orgs|count != 1 or reporting_orgs_key != ckan_publishers[publisher].result.publisher_iati_id %} - + diff --git a/dashboard/ui/urls.py b/dashboard/ui/urls.py index 3a75b83741..298985f6af 100644 --- a/dashboard/ui/urls.py +++ b/dashboard/ui/urls.py @@ -48,7 +48,7 @@ path('data-quality/licenses', ui.views.dataquality_licenses, name="dash-dataquality-licenses"), path('data-quality/licenses/', ui.views.dataquality_licenses_detail, name="dash-dataquality-licenses-detail"), path('data-quality/identifiers', ui.views.dataquality_identifiers, name="dash-dataquality-identifiers"), - path('data-quality/reporting-orgs', lambda x: None, name="dash-dataquality-reportingorgs"), + path('data-quality/reporting-orgs', ui.views.dataquality_reportingorgs, name="dash-dataquality-reportingorgs"), # Exploring data pages. path('exploring-data/elements', lambda x: None, name="dash-exploringdata-elements"), diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py index b6997e38bd..e3e8ac1748 100644 --- a/dashboard/ui/views.py +++ b/dashboard/ui/views.py @@ -315,3 +315,9 @@ def dataquality_identifiers(request): template = loader.get_template("identifiers.html") context = _make_context("identifiers") return HttpResponse(template.render(context, request)) + + +def dataquality_reportingorgs(request): + template = loader.get_template("reporting_orgs.html") + context = _make_context("reporting_orgs") + return HttpResponse(template.render(context, request)) From 02528147c23f4da7619b7de0eaac1e2519a848d3 Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Mon, 28 Oct 2024 16:28:16 +0000 Subject: [PATCH 337/375] refactor: Added exploring data/element(s) pages Refactored the templates for the elements and element detail pages in the exploring data section, completed the url routes and wrote the view functions. As part of this also found a bug due to changed behaviour in xmlschema that required a change to is_valid_element, fixed and renamed to is_valid_element_or_attribute to reflect its behaviour for checking if a given path points to an element or an attribute. --- dashboard/data.py | 35 ++++++++++++++++++------------- dashboard/templates/element.html | 14 ++++++------- dashboard/templates/elements.html | 14 ++++++------- dashboard/ui/urls.py | 4 ++-- dashboard/ui/views.py | 25 +++++++++++++++++++--- 5 files changed, 59 insertions(+), 33 deletions(-) diff --git a/dashboard/data.py b/dashboard/data.py index 17e1bd1e35..ba59673127 100644 --- a/dashboard/data.py +++ b/dashboard/data.py @@ -6,7 +6,7 @@ import csv from decimal import Decimal -from xmlschema import XMLSchema +import xmlschema import config @@ -211,21 +211,28 @@ def deep_merge(obj1, obj2): sources203 = [ config.join_data_path('schemas/2.03/iati-activities-schema.xsd'), config.join_data_path('schemas/2.03/iati-organisations-schema.xsd')] -schema105 = XMLSchema(sources105) -schema203 = XMLSchema(sources203) +schema105 = xmlschema.XMLSchema(sources105) +schema203 = xmlschema.XMLSchema(sources203) -def is_valid_element(path): - try: - if schema203.get_element(None, path=path): - return True - except AttributeError: - pass - try: - if schema105.get_element(None, path=path): - return True - except AttributeError: - pass +def is_valid_element_or_attribute(path: str) -> bool: + """Checks to see if a path is in either the 2.03 or 1.05 schema + + Parameters + ---------- + path : str + Path to the element or attribute to find. + + Returns + ------- + bool + True if the path is a known element or attribute. + """ + if isinstance(schema203.find(path), (xmlschema.XsdElement, xmlschema.XsdAttribute)): + return True + if isinstance(schema105.find(path), (xmlschema.XsdElement, xmlschema.XsdAttribute)): + return True + return False diff --git a/dashboard/templates/element.html b/dashboard/templates/element.html index b8fe888aac..6ac861d01b 100644 --- a/dashboard/templates/element.html +++ b/dashboard/templates/element.html @@ -1,5 +1,5 @@ {% extends 'base.html' %} -{% import '_partials/boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block page_header %}

      Usage of {{ element }}

      @@ -36,14 +36,14 @@

      Publishing this {{ element_or_attribute }}

      - {% for publisher in sorted(publishers) %} + {% for publisher in func.sorted(publishers) %} - - {% with publisher_inverted=get_publisher_stats(publisher, 'inverted-file') %} + + {% with publisher_inverted=func.get_publisher_stats(publisher, 'inverted-file') %} {% endwith %} - {% with publisher_stats=get_publisher_stats(publisher) %} + {% with publisher_stats=func.get_publisher_stats(publisher) %} {% endwith %} @@ -71,7 +71,7 @@

      Not publishing this {{ element_or_attribute }}

      {% for publisher in current_stats.inverted_publisher.publishers %} {% if publisher not in publishers %} - + @@ -98,7 +98,7 @@

      Files

      {% for publisher in current_stats.inverted_file_publisher %} {% with datasets = current_stats.inverted_file_publisher[publisher].elements.get(element) %} {% if datasets %} - - {% for i, (element,publishers) in enumerate(current_stats.inverted_publisher.elements.items()) %} - {% if is_valid_element(element) %} + {% for i, (element,publishers) in func.enumerate(current_stats.inverted_publisher.elements.items()) %} + {% if func.is_valid_element_or_attribute(element) %} - - + + diff --git a/dashboard/ui/urls.py b/dashboard/ui/urls.py index 298985f6af..11f3483f9a 100644 --- a/dashboard/ui/urls.py +++ b/dashboard/ui/urls.py @@ -51,8 +51,8 @@ path('data-quality/reporting-orgs', ui.views.dataquality_reportingorgs, name="dash-dataquality-reportingorgs"), # Exploring data pages. - path('exploring-data/elements', lambda x: None, name="dash-exploringdata-elements"), - path('exploring-data/elements/', lambda x: None, name="dash-exploringdata-elements-detail"), + path('exploring-data/elements', ui.views.exploringdata_elements, name="dash-exploringdata-elements"), + path('exploring-data/elements/', ui.views.exploringdata_element_detail, name="dash-exploringdata-elements-detail"), path('exploring-data/codelists', lambda x: None, name="dash-exploringdata-codelists"), path('exploring-data/codelists//', lambda x: None, name="dash-exploringdata-codelists-detail"), path('exploring-data/booleans', lambda x: None, name="dash-exploringdata-booleans"), diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py index e3e8ac1748..4c71f49853 100644 --- a/dashboard/ui/views.py +++ b/dashboard/ui/views.py @@ -31,7 +31,7 @@ metadata, publisher_name, publishers_ordered_by_title, - is_valid_element, + is_valid_element_or_attribute, slugs) @@ -150,8 +150,9 @@ def _make_context(page_name: str): "firstint": ui.template_funcs.firstint, "dataset_to_publisher": lambda x: dataset_to_publisher_dict.get(x, ""), "get_publisher_stats": get_publisher_stats, - "is_valid_element": is_valid_element, - "set": set + "is_valid_element_or_attribute": is_valid_element_or_attribute, + "set": set, + "enumerate": enumerate } ) context["navigation_reverse"].update({k: k for k in text.navigation}) @@ -321,3 +322,21 @@ def dataquality_reportingorgs(request): template = loader.get_template("reporting_orgs.html") context = _make_context("reporting_orgs") return HttpResponse(template.render(context, request)) + + +# +# Exploring data pages. +# +def exploringdata_elements(request): + template = loader.get_template("elements.html") + return HttpResponse(template.render(_make_context("elements"), request)) + + +def exploringdata_element_detail(request, element=None): + template = loader.get_template("element.html") + context = _make_context("elements") + i = slugs['element']['by_slug'][element] + context["element"] = list(current_stats['inverted_publisher']['elements'])[i] + context["publishers"] = list(current_stats['inverted_publisher']['elements'].values())[i] + context["element_or_attribute"] = 'attribute' if '@' in context["element"] else 'element' + return HttpResponse(template.render(context, request)) From 264783c66bc2a47901ae778ad187f5fba4f211e8 Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Mon, 28 Oct 2024 19:32:36 +0000 Subject: [PATCH 338/375] refactor: Added exploring data / organisation identifier pages Refactored the templates, completed the url routes, and wrote the view functions for the organisation identifer pages in the exploring data section. --- dashboard/templates/org_ids.html | 6 +++--- dashboard/templates/org_type.html | 10 +++++----- dashboard/ui/urls.py | 4 ++-- dashboard/ui/views.py | 13 +++++++++++++ 4 files changed, 23 insertions(+), 10 deletions(-) diff --git a/dashboard/templates/org_ids.html b/dashboard/templates/org_ids.html index 3833e46b55..b36f51257c 100644 --- a/dashboard/templates/org_ids.html +++ b/dashboard/templates/org_ids.html @@ -10,15 +10,15 @@ - {% include 'org_id_table_header.html' %} + {% include '_partials/org_id_table_header.html' %} {% for slug in slugs.org_type.by_slug %} {% set transaction_stats = current_stats.aggregated[slug + '_transaction_stats'] %} - - {% include 'org_id_table_cells.html' %} + + {% include '_partials/org_id_table_cells.html' %} {% endfor %} diff --git a/dashboard/templates/org_type.html b/dashboard/templates/org_type.html index 79b321b012..0602447245 100644 --- a/dashboard/templates/org_type.html +++ b/dashboard/templates/org_type.html @@ -20,17 +20,17 @@

      Organisation Identifiers: {{ slug.replace('_org', '') | capitalize }} Orgs<

      - {% include 'org_id_table_header.html' %} + {% include '_partials/org_id_table_header.html' %} {% for publisher_title, publisher in publishers_ordered_by_title %} - {% set publisher_stats = get_publisher_stats(publisher) %} + {% set publisher_stats = func.get_publisher_stats(publisher) %} {% set transaction_stats = publisher_stats[slug + '_transaction_stats'] %} - - - {% include 'org_id_table_cells.html' %} + + + {% include '_partials/org_id_table_cells.html' %} {% endfor %} diff --git a/dashboard/ui/urls.py b/dashboard/ui/urls.py index 11f3483f9a..7bf8ede8b3 100644 --- a/dashboard/ui/urls.py +++ b/dashboard/ui/urls.py @@ -58,8 +58,8 @@ path('exploring-data/booleans', lambda x: None, name="dash-exploringdata-booleans"), path('exploring-data/dates', lambda x: None, name="dash-exploringdata-dates"), path('exploring-data/traceability', lambda x: None, name="dash-exploringdata-traceability"), - path('exploring-data/organisation-identifiers', lambda x: None, name="dash-exploringdata-orgids"), - path('exploring-data/organisation-types/', lambda x: None, name="dash-exploringdata-orgtypes-detail"), + path('exploring-data/organisation-identifiers', ui.views.exploringdata_orgids, name="dash-exploringdata-orgids"), + path('exploring-data/organisation-type/', ui.views.exploringdata_orgtypes_detail, name="dash-exploringdata-orgtypes-detail"), # Publishing statistics pages. path('publishing-statistics/timeliness', lambda x: None, name="dash-publishingstats-timeliness"), diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py index 4c71f49853..2d217fa38b 100644 --- a/dashboard/ui/views.py +++ b/dashboard/ui/views.py @@ -340,3 +340,16 @@ def exploringdata_element_detail(request, element=None): context["publishers"] = list(current_stats['inverted_publisher']['elements'].values())[i] context["element_or_attribute"] = 'attribute' if '@' in context["element"] else 'element' return HttpResponse(template.render(context, request)) + + +def exploringdata_orgids(request): + template = loader.get_template("org_ids.html") + return HttpResponse(template.render(_make_context("org_ids"), request)) + + +def exploringdata_orgtypes_detail(request, org_type=None): + assert org_type in slugs['org_type']['by_slug'] + template = loader.get_template("org_type.html") + context = _make_context("org_ids") + context["slug"] = org_type + return HttpResponse(template.render(context, request)) From 74876bd4331754b5de1343a3a91e278624a737fe Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Tue, 29 Oct 2024 11:03:17 +0000 Subject: [PATCH 339/375] refactor: Added exploring data/codelist pages This commit adds the codelist pages from the exploring data section. The templates are refactored, the url routes completed (with a fix changing major_version from int to str), and view functions written. To enable the functionality a number of functions were moved from make_html.py: get_codelist_values() was moved into template_funcs.py and dictinvert() and nested_dictinvert() were moved into views.py. --- dashboard/templates/codelist.html | 6 ++--- dashboard/templates/codelists.html | 16 ++++++------- dashboard/ui/template_funcs.py | 10 ++++++++ dashboard/ui/urls.py | 4 ++-- dashboard/ui/views.py | 37 ++++++++++++++++++++++++++++++ 5 files changed, 60 insertions(+), 13 deletions(-) diff --git a/dashboard/templates/codelist.html b/dashboard/templates/codelist.html index fa5749184e..a90b8b40c9 100644 --- a/dashboard/templates/codelist.html +++ b/dashboard/templates/codelist.html @@ -1,5 +1,5 @@ {% extends 'base.html' %} -{% import '_partials/boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block page_header %}

      Codelist values used for {{ element }}

      @@ -45,7 +45,7 @@

      On Codelist

      {% endif %} {% endfor %} @@ -68,7 +68,7 @@

      Not On Codelist

      {% for value, publishers in values.items() %} {% if not value in codelist_sets[major_version].get(codelist_mapping[major_version].get(element)) %} {% endif %} {% endfor %} diff --git a/dashboard/templates/codelists.html b/dashboard/templates/codelists.html index 19069175b1..f1d516fbe6 100644 --- a/dashboard/templates/codelists.html +++ b/dashboard/templates/codelists.html @@ -1,5 +1,5 @@ {% extends 'base.html' %} -{% import '_partials/boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block content %}
      @@ -25,17 +25,17 @@

      Codelists for version {{ major_version }}.xx

      - {% for i, (element, values) in enumerate(current_stats.inverted_publisher.codelist_values_by_major_version[major_version].items()) %} + {% for i, (element, values) in func.enumerate(current_stats.inverted_publisher.codelist_values_by_major_version[major_version].items()) %} - + - + - {% with codes=sorted(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)).intersection(get_codelist_values(values))) %} - + {% with codes=func.sorted(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)).intersection(func.get_codelist_values(values))) %} + {% endwith %} - {% with codes=sorted(set(get_codelist_values(values)).difference(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)))) %} - + {% with codes=func.sorted(func.set(func.get_codelist_values(values)).difference(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)))) %} + {% endwith %} {% endfor %} diff --git a/dashboard/ui/template_funcs.py b/dashboard/ui/template_funcs.py index a01107e49f..8f5446febb 100644 --- a/dashboard/ui/template_funcs.py +++ b/dashboard/ui/template_funcs.py @@ -6,3 +6,13 @@ def firstint(s): return 0 m = re.search(r'\d+', s[0]) return int(m.group(0)) + + +def get_codelist_values(codelist_values_for_element): + """Return a list of unique values present within a one-level nested dictionary. + Envisaged usage is to gather the codelist values used by each publisher, as in + stats/current/inverted-publisher/codelist_values_by_major_version.json + Input: Set of codelist values for a given element (listed by publisher), for example: + current_stats['inverted_publisher']['codelist_values_by_major_version']['1']['.//@xml:lang'] + """ + return list(set([y for x in codelist_values_for_element.items() for y in list(x[1].keys())])) diff --git a/dashboard/ui/urls.py b/dashboard/ui/urls.py index 7bf8ede8b3..7dd586aab5 100644 --- a/dashboard/ui/urls.py +++ b/dashboard/ui/urls.py @@ -53,8 +53,8 @@ # Exploring data pages. path('exploring-data/elements', ui.views.exploringdata_elements, name="dash-exploringdata-elements"), path('exploring-data/elements/', ui.views.exploringdata_element_detail, name="dash-exploringdata-elements-detail"), - path('exploring-data/codelists', lambda x: None, name="dash-exploringdata-codelists"), - path('exploring-data/codelists//', lambda x: None, name="dash-exploringdata-codelists-detail"), + path('exploring-data/codelists', ui.views.exploringdata_codelists, name="dash-exploringdata-codelists"), + path('exploring-data/codelists//', ui.views.exploringdata_codelists_detail, name="dash-exploringdata-codelists-detail"), path('exploring-data/booleans', lambda x: None, name="dash-exploringdata-booleans"), path('exploring-data/dates', lambda x: None, name="dash-exploringdata-dates"), path('exploring-data/traceability', lambda x: None, name="dash-exploringdata-traceability"), diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py index 2d217fa38b..3f8aeaa644 100644 --- a/dashboard/ui/views.py +++ b/dashboard/ui/views.py @@ -8,6 +8,7 @@ import dateutil.parser import subprocess import json +import collections from django.http import HttpResponse, Http404 from django.template import loader @@ -80,6 +81,21 @@ def _get_licenses_for_publisher(publisher_name): for package in ckan[publisher_name].values()]) +def dictinvert(d): + inv = collections.defaultdict(list) + for k, v in d.items(): + inv[v].append(k) + return inv + + +def nested_dictinvert(d): + inv = collections.defaultdict(lambda: collections.defaultdict(int)) + for k, v in d.items(): + for k2, v2 in v.items(): + inv[k2][k] += v2 + return inv + + def _make_context(page_name: str): """Make a basic context dictionary for a given page """ @@ -148,6 +164,7 @@ def _make_context(page_name: str): stats_commit_hash=STATS_COMMIT_HASH, func={"sorted": sorted, "firstint": ui.template_funcs.firstint, + "get_codelist_values": ui.template_funcs.get_codelist_values, "dataset_to_publisher": lambda x: dataset_to_publisher_dict.get(x, ""), "get_publisher_stats": get_publisher_stats, "is_valid_element_or_attribute": is_valid_element_or_attribute, @@ -353,3 +370,23 @@ def exploringdata_orgtypes_detail(request, org_type=None): context = _make_context("org_ids") context["slug"] = org_type return HttpResponse(template.render(context, request)) + + +def exploringdata_codelists(request): + template = loader.get_template("codelists.html") + return HttpResponse(template.render(_make_context("codelists"), request)) + + +def exploringdata_codelists_detail(request, major_version=None, attribute=None): + template = loader.get_template("codelist.html") + + context = _make_context("codelists") + i = slugs['codelist'][major_version]['by_slug'][attribute] + element = list(current_stats['inverted_publisher']['codelist_values_by_major_version'][major_version])[i] + values = nested_dictinvert(list(current_stats['inverted_publisher']['codelist_values_by_major_version'][major_version].values())[i]) + context["element"] = element + context["values"] = values + context["reverse_codelist_mapping"] = {major_version: dictinvert(mapping) for major_version, mapping in codelist_mapping.items()} + context["major_version"] = major_version + + return HttpResponse(template.render(context, request)) From 9acd4f6b490d383ba934b97cdda1f2c7ffac9ecf Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Tue, 29 Oct 2024 11:14:11 +0000 Subject: [PATCH 340/375] refactor: Added booleans, dates and traceability pages to exploring data This commit adds three pages to the exploring data section: booleans, dates and traceability. The commit refactors the templates, completes the url routes, and adds the view functions. --- dashboard/templates/booleans.html | 2 +- dashboard/templates/dates.html | 4 ++-- dashboard/templates/traceability.html | 4 ++-- dashboard/ui/urls.py | 6 +++--- dashboard/ui/views.py | 15 +++++++++++++++ 5 files changed, 23 insertions(+), 8 deletions(-) diff --git a/dashboard/templates/booleans.html b/dashboard/templates/booleans.html index bbee0df0e9..8ad17c7e12 100644 --- a/dashboard/templates/booleans.html +++ b/dashboard/templates/booleans.html @@ -1,5 +1,5 @@ {% extends 'base.html' %} -{% import '_partials/boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context%} {% block content %}
      diff --git a/dashboard/templates/dates.html b/dashboard/templates/dates.html index fd9ffdb2d6..7d9a613940 100644 --- a/dashboard/templates/dates.html +++ b/dashboard/templates/dates.html @@ -17,9 +17,9 @@
      {% for publisher_title,publisher in publishers_ordered_by_title %} - {% set publisher_stats = get_publisher_stats(publisher) %} + {% set publisher_stats = func.get_publisher_stats(publisher) %} - + diff --git a/dashboard/templates/traceability.html b/dashboard/templates/traceability.html index be48254beb..af759d987e 100644 --- a/dashboard/templates/traceability.html +++ b/dashboard/templates/traceability.html @@ -24,9 +24,9 @@ {% for publisher_title,publisher in publishers_ordered_by_title %} - {% set publisher_stats = get_publisher_stats(publisher) %} + {% set publisher_stats = func.get_publisher_stats(publisher) %} - + {% for row in comprehensiveness.table() %} - + {% for column_slug in comprehensiveness.column_slugs[tab] %} {% for publisher, publisher_title, per_month, assessment, hft, first_published_band in timeliness.publisher_frequency_sorted() %} - + {% for month in timeliness.previous_months_reversed %} diff --git a/dashboard/templates/timeliness_base.html b/dashboard/templates/timeliness_base.html index 9ef36533af..48369492df 100644 --- a/dashboard/templates/timeliness_base.html +++ b/dashboard/templates/timeliness_base.html @@ -1,5 +1,5 @@ {% extends 'base.html' %} -{% import '_partials/boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block container %} @@ -8,8 +8,8 @@ {% endblock %}
        diff --git a/dashboard/templates/timeliness_timelag.html b/dashboard/templates/timeliness_timelag.html index 8cf5f9a742..4e40dc9465 100644 --- a/dashboard/templates/timeliness_timelag.html +++ b/dashboard/templates/timeliness_timelag.html @@ -1,5 +1,5 @@ {% extends 'timeliness_base.html' %} -{% import '_partials/boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block timelag_li %} class="active"{% endblock %} @@ -7,7 +7,7 @@
        - (This table as CSV) + (This table as CSV)

        Table of Time lag assessments

        @@ -42,7 +42,7 @@

        Table of Time lag assessments

      {% for publisher, publisher_title, per_month, assessment, hft in timeliness.publisher_timelag_sorted() %} - + {% for month in timeliness.previous_months_reversed %} {% endfor %} diff --git a/dashboard/ui/urls.py b/dashboard/ui/urls.py index d2f7c26320..e1489c58bb 100644 --- a/dashboard/ui/urls.py +++ b/dashboard/ui/urls.py @@ -62,7 +62,8 @@ path('exploring-data/organisation-type/', ui.views.exploringdata_orgtypes_detail, name="dash-exploringdata-orgtypes-detail"), # Publishing statistics pages. - path('publishing-statistics/timeliness', lambda x: None, name="dash-publishingstats-timeliness"), + path('publishing-statistics/timeliness', ui.views.pubstats_timeliness, name="dash-publishingstats-timeliness"), + path('publishing-statistics/timeliness-timelag', ui.views.pubstats_timeliness_timelag, name="dash-publishingstats-timeliness-timelag"), path('publishing-statistics/forward-looking', lambda x: None, name="dash-publishingstats-forwardlooking"), path('publishing-statistics/comprehensiveness', ui.views.pubstats_comprehensiveness, name="dash-publishingstats-comprehensiveness"), path('publishing-statistics/comprehensiveness/core', ui.views.pubstats_comprehensiveness_core, name="dash-publishingstats-comprehensiveness-core"), diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py index aeff71abc5..49259a9d6d 100644 --- a/dashboard/ui/views.py +++ b/dashboard/ui/views.py @@ -16,6 +16,7 @@ import comprehensiveness import config import text +import timeliness import ui.template_funcs import vars @@ -434,3 +435,15 @@ def pubstats_comprehensiveness_valueadded(request): context = _make_context("comprehensiveness") context["comprehensiveness"] = comprehensiveness return HttpResponse(template.render(context, request)) + +def pubstats_timeliness(request): + template = loader.get_template("timeliness.html") + context = _make_context("timeliness") + context["timeliness"] = timeliness + return HttpResponse(template.render(context, request)) + +def pubstats_timeliness_timelag(request): + template = loader.get_template("timeliness_timelag.html") + context = _make_context("timeliness") + context["timeliness"] = timeliness + return HttpResponse(template.render(context, request)) From 4b6e5e1cf6c6bf88ecca73bc6d29427c7d8efa2e Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Mon, 4 Nov 2024 17:25:30 +0000 Subject: [PATCH 343/375] refactor: Added summary stats, forward looking and humanitarian to stats Refactored the summary statistics, forward looking and humanitarian pages in the publishing statistics section. Refactored the templates, completed the url routes, and wrote view functions. --- dashboard/templates/forwardlooking.html | 6 ++--- dashboard/templates/humanitarian.html | 8 +++---- dashboard/templates/summary_stats.html | 16 ++++++------- dashboard/ui/urls.py | 6 ++--- dashboard/ui/views.py | 31 +++++++++++++++++++++++++ 5 files changed, 49 insertions(+), 18 deletions(-) diff --git a/dashboard/templates/forwardlooking.html b/dashboard/templates/forwardlooking.html index 1a4cdbfbc1..a25ae94fbe 100644 --- a/dashboard/templates/forwardlooking.html +++ b/dashboard/templates/forwardlooking.html @@ -1,5 +1,5 @@ {% extends 'base.html' %} -{% import '_partials/boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block content %} {% for row in forwardlooking.table() %} - + {% for column in row.year_columns %} {% for year in forwardlooking.years %} diff --git a/dashboard/templates/humanitarian.html b/dashboard/templates/humanitarian.html index 465d855cb2..07995d56c3 100644 --- a/dashboard/templates/humanitarian.html +++ b/dashboard/templates/humanitarian.html @@ -1,5 +1,5 @@ {% extends 'base.html' %} -{% import '_partials/boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block content %}
        @@ -10,7 +10,7 @@
        @@ -18,7 +18,7 @@

        Humanitarian

        This table assesses the extent to which IATI publishers are reporting on humanitarian attributes.

        -

        The statistics on this page do not form part of the Summary Statstics.

        +

        The statistics on this page do not form part of the Summary Statstics.

        {% include '_partials/tablesorter_instructions.html' %}
        @@ -35,7 +35,7 @@

        Humanitarian

      {% for row in humanitarian.table() %} - + {% for column_slug, _ in humanitarian.columns %} {% for row in summary_stats.table() %} - + {% for column_slug, column_header in summary_stats.columns %} - + diff --git a/dashboard/templates/files.html b/dashboard/templates/files.html index a47063f6a7..cd359cc2a6 100644 --- a/dashboard/templates/files.html +++ b/dashboard/templates/files.html @@ -49,8 +49,9 @@

      File Sizes

      {% for package, activities in current_stats.inverted_file.activities.items() %} - - + {% set publisher=func.dataset_to_publisher(package) %} + + diff --git a/dashboard/templates/timeliness_timelag.html b/dashboard/templates/timeliness_timelag.html index 8c2741ef5c..44fef92aca 100644 --- a/dashboard/templates/timeliness_timelag.html +++ b/dashboard/templates/timeliness_timelag.html @@ -42,7 +42,7 @@

      Table of Time lag assessments

      {% for publisher, publisher_title, per_month, assessment, hft in timeliness.publisher_timelag_sorted() %} - + {% for month in timeliness.previous_months_reversed %} {% endfor %} diff --git a/dashboard/templates/traceability.html b/dashboard/templates/traceability.html index af759d987e..19eb3a6259 100644 --- a/dashboard/templates/traceability.html +++ b/dashboard/templates/traceability.html @@ -26,7 +26,7 @@ {% for publisher_title,publisher in publishers_ordered_by_title %} {% set publisher_stats = func.get_publisher_stats(publisher) %} - + - - + {% set publisher=func.dataset_to_publisher(package) %} + + {% endif %} {% endfor %} @@ -55,8 +56,9 @@

      Files with non-standard roots

      {% for dataset, nonstandard in current_stats.inverted_file.nonstandardroots.items() %} {% if nonstandard %} - - + {% set publisher=func.dataset_to_publisher(package) %} + + {% endif %} {% endfor %} From 04d68c94f2c6c12d353c300ced36b0a210720f26 Mon Sep 17 00:00:00 2001 From: Bee Webb Date: Thu, 5 Dec 2024 13:29:11 +0000 Subject: [PATCH 370/375] dockerignore: Add more directories of generated or cached files --- .dockerignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.dockerignore b/.dockerignore index 7a451343fe..4ce053e2ab 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,2 +1,5 @@ data out +web +stats-calculated +dashboard/cache From 27d603ff28f0ad0d0598f4b2e0e5f49cc16f6d30 Mon Sep 17 00:00:00 2001 From: Bee Webb Date: Thu, 5 Dec 2024 13:33:27 +0000 Subject: [PATCH 371/375] caching: Import statement in wsgi.py, so it's preloaded by gunicorn --- dashboard/ui/wsgi.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dashboard/ui/wsgi.py b/dashboard/ui/wsgi.py index 4605f9a154..179c825598 100644 --- a/dashboard/ui/wsgi.py +++ b/dashboard/ui/wsgi.py @@ -11,6 +11,9 @@ from django.core.wsgi import get_wsgi_application +# Import data here so it's in gunicorn's preload +import data # noqa F401 + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ui.settings") application = get_wsgi_application() From d3641a321cd393b817c5482b38d1027ff21d9bf4 Mon Sep 17 00:00:00 2001 From: Bee Webb Date: Wed, 11 Dec 2024 12:59:24 +0000 Subject: [PATCH 372/375] get_stats.sh: Pull from the deployed copy of the merged codebase These files are rather large, so it's necessary to not run get_stats.sh in the CI. Instead I've added a mock shell script to set up the files it needs. --- .github/workflows/ci.yml | 2 +- get_stats.sh | 16 ++++++++++++++-- mock_stats_for_unit_tests.sh | 9 +++++++++ 3 files changed, 24 insertions(+), 3 deletions(-) create mode 100755 mock_stats_for_unit_tests.sh diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e61a1cfd1c..db38879c06 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,7 +31,7 @@ jobs: - name: Fetch stats run: | mkdir out - ./get_stats.sh + ./mock_stats_for_unit_tests.sh - name: Run tests run: pytest --cov . # - name: Coveralls diff --git a/get_stats.sh b/get_stats.sh index c51790dfe5..bfa3296eec 100755 --- a/get_stats.sh +++ b/get_stats.sh @@ -1,3 +1,15 @@ -#!/bin/bash +set -eux +# ^ https://explainshell.com/explain?cmd=set+-eux -git clone --depth=1 --quiet --branch gh-pages https://github.com/codeforIATI/IATI-Stats-public stats-calculated +mkdir stats-calculated +for f in ckan gitdate licenses; do + curl --compressed "https://dev.merged.dashboard.iatistandard.org/stats/${f}.json" > stats-calculated/${f}.json +done + +cd stats-calculated +wget "https://dev.merged.dashboard.iatistandard.org/stats/current.tar.gz" -O current.tar.gz +wget "https://dev.merged.dashboard.iatistandard.org/stats/gitaggregate-dated.tar.gz" -O gitaggregate-dated.tar.gz +wget "https://dev.merged.dashboard.iatistandard.org/stats/gitaggregate-publisher-dated.tar.gz" -O gitaggregate-publisher-dated.tar.gz +tar -xf current.tar.gz +tar -xf gitaggregate-dated.tar.gz +tar -xf gitaggregate-publisher-dated.tar.gz diff --git a/mock_stats_for_unit_tests.sh b/mock_stats_for_unit_tests.sh new file mode 100755 index 0000000000..26b6d0b10c --- /dev/null +++ b/mock_stats_for_unit_tests.sh @@ -0,0 +1,9 @@ +set -eux + +mkdir stats-calculated +curl --compressed "https://dev.merged.dashboard.iatistandard.org/stats/ckan.json" > stats-calculated/ckan.json +mkdir -p stats-calculated/current/aggregated-publisher +mkdir -p stats-calculated/current/inverted-publisher +for f in activities codelist_values_by_major_version elements; do + echo "{}" > stats-calculated/current/inverted-publisher/$f.json +done From bbf0df9bbcbd2cb61c66908308d8f36dc066b8f9 Mon Sep 17 00:00:00 2001 From: Bee Webb Date: Wed, 11 Dec 2024 13:01:18 +0000 Subject: [PATCH 373/375] footer: Don't require metadata.json This exists in the codeforIATI deploy, but not the new IATI one. Instead we pick the latest date from gitdate.json. --- dashboard/data.py | 1 - dashboard/ui/views.py | 9 ++++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/dashboard/data.py b/dashboard/data.py index d168a300f3..2f1ba23eb9 100644 --- a/dashboard/data.py +++ b/dashboard/data.py @@ -203,7 +203,6 @@ def deep_merge(obj1, obj2): dataset_to_publisher_dict = { dataset: publisher for publisher, publisher_dict in ckan.items() for dataset in publisher_dict.keys() } -metadata = json.load(open(filepaths.join_stats_path("metadata.json")), object_pairs_hook=OrderedDict) with open(filepaths.join_data_path("downloads/errors")) as fp: for line in fp: if line != ".\n": diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py index be0504fa9b..f61fe75afd 100644 --- a/dashboard/ui/views.py +++ b/dashboard/ui/views.py @@ -30,7 +30,6 @@ get_publisher_stats, github_issues, is_valid_element_or_attribute, - metadata, publisher_name, publishers_ordered_by_title, slugs, @@ -113,6 +112,11 @@ def nested_dictinvert(d): def _make_context(page_name: str): """Make a basic context dictionary for a given page""" + + with open(filepaths.join_stats_path("gitdate.json")) as fp: + date_time_data_str = max(json.load(fp).values()) + date_time_data_obj = dateutil.parser.parse(date_time_data_str) + context = dict( page=page_name, top_titles=text.top_titles, @@ -166,9 +170,8 @@ def _make_context(page_name: str): github_issues=github_issues, MAJOR_VERSIONS=MAJOR_VERSIONS, expected_versions=vars.expected_versions, - metadata=metadata, slugs=slugs, - datetime_data=dateutil.parser.parse(metadata["created_at"]).strftime("%-d %B %Y (at %H:%M %Z)"), + datetime_data=date_time_data_obj.strftime("%-d %B %Y (at %H:%M %Z)"), current_year=datetime.datetime.now(datetime.UTC).year, stats_url="/stats", generated_url="/generated", From a5cc6e1998f7f828ec66cc452b008f1694d18a8e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 11 Dec 2024 17:03:35 +0000 Subject: [PATCH 374/375] chore(deps): Bump django from 5.1.3 to 5.1.4 Bumps [django](https://github.com/django/django) from 5.1.3 to 5.1.4. - [Commits](https://github.com/django/django/compare/5.1.3...5.1.4) --- updated-dependencies: - dependency-name: django dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- requirements_dev.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index fc5f9b544c..5f9bb2cb13 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,7 +18,7 @@ contourpy==1.3.0 # via matplotlib cycler==0.12.1 # via matplotlib -django==5.1.3 +django==5.1.4 # via -r requirements.in django-environ==0.11.2 # via -r requirements.in diff --git a/requirements_dev.txt b/requirements_dev.txt index ee37e0cac8..d238804c5f 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -41,7 +41,7 @@ cycler==0.12.1 # via # -r requirements.txt # matplotlib -django==5.1.3 +django==5.1.4 # via -r requirements.txt django-environ==0.11.2 # via -r requirements.txt From dcfecae1b222e5df0eb4ad4fab9ad58ffd20ac6f Mon Sep 17 00:00:00 2001 From: Bee Webb Date: Wed, 11 Dec 2024 15:43:34 +0000 Subject: [PATCH 375/375] feat: Serve stats files from the development server (most of this change is black refromatting the list of urls so it can have additions at the end) --- dashboard/ui/urls.py | 346 +++++++++++++++++++++++-------------------- 1 file changed, 188 insertions(+), 158 deletions(-) diff --git a/dashboard/ui/urls.py b/dashboard/ui/urls.py index c407748ad9..9ed48452b9 100644 --- a/dashboard/ui/urls.py +++ b/dashboard/ui/urls.py @@ -22,162 +22,192 @@ import ui.views -urlpatterns = [ - path("admin/", admin.site.urls), - # Top level dashboard pages. - path("", ui.views.index, name="dash-index"), - path("headlines", ui.views.headlines, name="dash-headlines"), - path("data-quality", ui.views.data_quality, name="dash-dataquality"), - path("publishing-statistics", ui.views.publishing_stats, name="dash-publishingstats"), - path("exploring-data", ui.views.exploring_data, name="dash-exploringdata"), - path("faq", ui.views.faq, name="dash-faq"), - # Headlines pages and detail pages - placeholders for now. - path("headlines/publishers", ui.views.headlines_publishers, name="dash-headlines-publishers"), - path("headlines/files", ui.views.headlines_files, name="dash-headlines-files"), - path("headlines/activities", ui.views.headlines_activities, name="dash-headlines-activities"), - path( - "headlines/publishers/", - ui.views.headlines_publisher_detail, - name="dash-headlines-publisher-detail", - ), - # Data quality pages. - path("data-quality/download-errors", ui.views.dataquality_download, name="dash-dataquality-download"), - path("data/download_errors.json", ui.views.dataquality_download_errorsjson, name="dash-dataquality-download-json"), - path("data-quality/xml-errors", ui.views.dataquality_xml, name="dash-dataquality-xml"), - path("data-quality/validation", ui.views.dataquality_validation, name="dash-dataquality-validation"), - path("data-quality/versions", ui.views.dataquality_versions, name="dash-dataquality-versions"), - path("data-quality/organisation", ui.views.dataquality_orgxml, name="dash-dataquality-organisation"), - path("data-quality/licenses", ui.views.dataquality_licenses, name="dash-dataquality-licenses"), - path( - "data-quality/licenses/", - ui.views.dataquality_licenses_detail, - name="dash-dataquality-licenses-detail", - ), - path("data-quality/identifiers", ui.views.dataquality_identifiers, name="dash-dataquality-identifiers"), - path("data-quality/reporting-orgs", ui.views.dataquality_reportingorgs, name="dash-dataquality-reportingorgs"), - # Exploring data pages. - path("exploring-data/elements", ui.views.exploringdata_elements, name="dash-exploringdata-elements"), - path( - "exploring-data/elements/", - ui.views.exploringdata_element_detail, - name="dash-exploringdata-elements-detail", - ), - path("exploring-data/codelists", ui.views.exploringdata_codelists, name="dash-exploringdata-codelists"), - path( - "exploring-data/codelists//", - ui.views.exploringdata_codelists_detail, - name="dash-exploringdata-codelists-detail", - ), - path("exploring-data/booleans", ui.views.exploringdata_booleans, name="dash-exploringdata-booleans"), - path("exploring-data/dates", ui.views.exploringdata_dates, name="dash-exploringdata-dates"), - path("exploring-data/traceability", ui.views.exploringdata_traceability, name="dash-exploringdata-traceability"), - path("exploring-data/organisation-identifiers", ui.views.exploringdata_orgids, name="dash-exploringdata-orgids"), - path( - "exploring-data/organisation-type/", - ui.views.exploringdata_orgtypes_detail, - name="dash-exploringdata-orgtypes-detail", - ), - # Publishing statistics pages. - path("publishing-statistics/timeliness", ui.views.pubstats_timeliness, name="dash-publishingstats-timeliness"), - path( - "publishing-statistics/timeliness-timelag", - ui.views.pubstats_timeliness_timelag, - name="dash-publishingstats-timeliness-timelag", - ), - path( - "publishing-statistics/forward-looking", - ui.views.pubstats_forwardlooking, - name="dash-publishingstats-forwardlooking", - ), - path( - "publishing-statistics/comprehensiveness", - ui.views.pubstats_comprehensiveness, - name="dash-publishingstats-comprehensiveness", - ), - path( - "publishing-statistics/comprehensiveness/core", - ui.views.pubstats_comprehensiveness_core, - name="dash-publishingstats-comprehensiveness-core", - ), - path( - "publishing-statistics/comprehensiveness/financials", - ui.views.pubstats_comprehensiveness_financials, - name="dash-publishingstats-comprehensiveness-financials", - ), - path( - "publishing-statistics/comprehensiveness/value-added", - ui.views.pubstats_comprehensiveness_valueadded, - name="dash-publishingstats-comprehensiveness-valueadded", - ), - path( - "publishing-statistics/summary-statistics", - ui.views.pubstats_summarystats, - name="dash-publishingstats-summarystats", - ), - path( - "publishing-statistics/humanitarian-reporting", - ui.views.pubstats_humanitarian, - name="dash-publishingstats-humanitarian", - ), - # Registration agencies. - path("registration-agencies", ui.views.registration_agencies, name="dash-registrationagencies"), - path("registration_agencies.html", RedirectView.as_view(pattern_name="dash-registrationagencies", permanent=True)), - # Redirects to support any users with bookmarks to pages on the old Dashboard. - path("index.html", RedirectView.as_view(pattern_name="dash-index", permanent=True)), - path("headlines.html", RedirectView.as_view(pattern_name="dash-headlines", permanent=True)), - path("data_quality.html", RedirectView.as_view(pattern_name="dash-dataquality", permanent=True)), - path("exploring_data.html", RedirectView.as_view(pattern_name="dash-exploringdata-elements", permanent=True)), - path("publishers.html", RedirectView.as_view(pattern_name="dash-headlines-publishers", permanent=True)), - path("publishing_stats.html", RedirectView.as_view(pattern_name="dash-publishingstats", permanent=True)), - path("timeliness.html", RedirectView.as_view(pattern_name="dash-publishingstats-timeliness", permanent=True)), - path( - "timeliness_timelag.html", - RedirectView.as_view(pattern_name="dash-publishingstats-timeliness-timelag", permanent=True), - ), - path( - "forwardlooking.html", RedirectView.as_view(pattern_name="dash-publishingstats-forwardlooking", permanent=True) - ), - path( - "comprehensiveness.html", - RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness", permanent=True), - ), - path( - "comprehensiveness_core.html", - RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness-core", permanent=True), - ), - path( - "comprehensiveness_financials.html", - RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness-financials", permanent=True), - ), - path( - "comprehensiveness_valueadded.html", - RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness-valueadded", permanent=True), - ), - path("summary_stats.html", RedirectView.as_view(pattern_name="dash-publishingstats-summarystats", permanent=True)), - path("humanitarian.html", RedirectView.as_view(pattern_name="dash-publishingstats-humanitarian", permanent=True)), - path("files.html", RedirectView.as_view(pattern_name="dash-headlines-files", permanent=True)), - path("activities.html", RedirectView.as_view(pattern_name="dash-headlines-activities", permanent=True)), - path("download.html", RedirectView.as_view(pattern_name="dash-dataquality-download", permanent=True)), - path("xml.html", RedirectView.as_view(pattern_name="dash-dataquality-xml", permanent=True)), - path("validation.html", RedirectView.as_view(pattern_name="dash-dataquality-validation", permanent=True)), - path("versions.html", RedirectView.as_view(pattern_name="dash-dataquality-versions", permanent=True)), - path("organisation.html", RedirectView.as_view(pattern_name="dash-dataquality-organisation", permanent=True)), - path("identifiers.html", RedirectView.as_view(pattern_name="dash-dataquality-identifiers", permanent=True)), - path("reporting_orgs.html", RedirectView.as_view(pattern_name="dash-dataquality-reportingorgs", permanent=True)), - path("elements.html", RedirectView.as_view(pattern_name="dash-exploringdata-elements", permanent=True)), - path("codelists.html", RedirectView.as_view(pattern_name="dash-exploringdata-codelists", permanent=True)), - path("booleans.html", RedirectView.as_view(pattern_name="dash-exploringdata-booleans", permanent=True)), - path("dates.html", RedirectView.as_view(pattern_name="dash-exploringdata-dates", permanent=True)), - path("traceability.html", RedirectView.as_view(pattern_name="dash-exploringdata-traceability", permanent=True)), - path("org_ids.html", RedirectView.as_view(pattern_name="dash-exploringdata-orgids", permanent=True)), - path("faq.html", RedirectView.as_view(pattern_name="dash-faq", permanent=True)), - path("licenses.html", RedirectView.as_view(pattern_name="dash-dataquality-licenses", permanent=True)), - re_path(r"license\/\S*.html", RedirectView.as_view(pattern_name="dash-dataquality-licenses", permanent=True)), - re_path(r"publisher\/\S*.html", RedirectView.as_view(pattern_name="dash-headlines-publishers", permanent=True)), - re_path( - r"codelist\/\d\/\S*.html", RedirectView.as_view(pattern_name="dash-exploringdata-codelists", permanent=True) - ), - re_path(r"element\/\S*.html", RedirectView.as_view(pattern_name="dash-exploringdata-elements", permanent=True)), - re_path(r"org_type\/\S*.html", RedirectView.as_view(pattern_name="dash-exploringdata-orgids", permanent=True)), -] + static("generated", document_root="../out") +urlpatterns = ( + [ + path("admin/", admin.site.urls), + # Top level dashboard pages. + path("", ui.views.index, name="dash-index"), + path("headlines", ui.views.headlines, name="dash-headlines"), + path("data-quality", ui.views.data_quality, name="dash-dataquality"), + path("publishing-statistics", ui.views.publishing_stats, name="dash-publishingstats"), + path("exploring-data", ui.views.exploring_data, name="dash-exploringdata"), + path("faq", ui.views.faq, name="dash-faq"), + # Headlines pages and detail pages - placeholders for now. + path("headlines/publishers", ui.views.headlines_publishers, name="dash-headlines-publishers"), + path("headlines/files", ui.views.headlines_files, name="dash-headlines-files"), + path("headlines/activities", ui.views.headlines_activities, name="dash-headlines-activities"), + path( + "headlines/publishers/", + ui.views.headlines_publisher_detail, + name="dash-headlines-publisher-detail", + ), + # Data quality pages. + path("data-quality/download-errors", ui.views.dataquality_download, name="dash-dataquality-download"), + path( + "data/download_errors.json", + ui.views.dataquality_download_errorsjson, + name="dash-dataquality-download-json", + ), + path("data-quality/xml-errors", ui.views.dataquality_xml, name="dash-dataquality-xml"), + path("data-quality/validation", ui.views.dataquality_validation, name="dash-dataquality-validation"), + path("data-quality/versions", ui.views.dataquality_versions, name="dash-dataquality-versions"), + path("data-quality/organisation", ui.views.dataquality_orgxml, name="dash-dataquality-organisation"), + path("data-quality/licenses", ui.views.dataquality_licenses, name="dash-dataquality-licenses"), + path( + "data-quality/licenses/", + ui.views.dataquality_licenses_detail, + name="dash-dataquality-licenses-detail", + ), + path("data-quality/identifiers", ui.views.dataquality_identifiers, name="dash-dataquality-identifiers"), + path("data-quality/reporting-orgs", ui.views.dataquality_reportingorgs, name="dash-dataquality-reportingorgs"), + # Exploring data pages. + path("exploring-data/elements", ui.views.exploringdata_elements, name="dash-exploringdata-elements"), + path( + "exploring-data/elements/", + ui.views.exploringdata_element_detail, + name="dash-exploringdata-elements-detail", + ), + path("exploring-data/codelists", ui.views.exploringdata_codelists, name="dash-exploringdata-codelists"), + path( + "exploring-data/codelists//", + ui.views.exploringdata_codelists_detail, + name="dash-exploringdata-codelists-detail", + ), + path("exploring-data/booleans", ui.views.exploringdata_booleans, name="dash-exploringdata-booleans"), + path("exploring-data/dates", ui.views.exploringdata_dates, name="dash-exploringdata-dates"), + path( + "exploring-data/traceability", ui.views.exploringdata_traceability, name="dash-exploringdata-traceability" + ), + path( + "exploring-data/organisation-identifiers", ui.views.exploringdata_orgids, name="dash-exploringdata-orgids" + ), + path( + "exploring-data/organisation-type/", + ui.views.exploringdata_orgtypes_detail, + name="dash-exploringdata-orgtypes-detail", + ), + # Publishing statistics pages. + path("publishing-statistics/timeliness", ui.views.pubstats_timeliness, name="dash-publishingstats-timeliness"), + path( + "publishing-statistics/timeliness-timelag", + ui.views.pubstats_timeliness_timelag, + name="dash-publishingstats-timeliness-timelag", + ), + path( + "publishing-statistics/forward-looking", + ui.views.pubstats_forwardlooking, + name="dash-publishingstats-forwardlooking", + ), + path( + "publishing-statistics/comprehensiveness", + ui.views.pubstats_comprehensiveness, + name="dash-publishingstats-comprehensiveness", + ), + path( + "publishing-statistics/comprehensiveness/core", + ui.views.pubstats_comprehensiveness_core, + name="dash-publishingstats-comprehensiveness-core", + ), + path( + "publishing-statistics/comprehensiveness/financials", + ui.views.pubstats_comprehensiveness_financials, + name="dash-publishingstats-comprehensiveness-financials", + ), + path( + "publishing-statistics/comprehensiveness/value-added", + ui.views.pubstats_comprehensiveness_valueadded, + name="dash-publishingstats-comprehensiveness-valueadded", + ), + path( + "publishing-statistics/summary-statistics", + ui.views.pubstats_summarystats, + name="dash-publishingstats-summarystats", + ), + path( + "publishing-statistics/humanitarian-reporting", + ui.views.pubstats_humanitarian, + name="dash-publishingstats-humanitarian", + ), + # Registration agencies. + path("registration-agencies", ui.views.registration_agencies, name="dash-registrationagencies"), + path( + "registration_agencies.html", + RedirectView.as_view(pattern_name="dash-registrationagencies", permanent=True), + ), + # Redirects to support any users with bookmarks to pages on the old Dashboard. + path("index.html", RedirectView.as_view(pattern_name="dash-index", permanent=True)), + path("headlines.html", RedirectView.as_view(pattern_name="dash-headlines", permanent=True)), + path("data_quality.html", RedirectView.as_view(pattern_name="dash-dataquality", permanent=True)), + path("exploring_data.html", RedirectView.as_view(pattern_name="dash-exploringdata-elements", permanent=True)), + path("publishers.html", RedirectView.as_view(pattern_name="dash-headlines-publishers", permanent=True)), + path("publishing_stats.html", RedirectView.as_view(pattern_name="dash-publishingstats", permanent=True)), + path("timeliness.html", RedirectView.as_view(pattern_name="dash-publishingstats-timeliness", permanent=True)), + path( + "timeliness_timelag.html", + RedirectView.as_view(pattern_name="dash-publishingstats-timeliness-timelag", permanent=True), + ), + path( + "forwardlooking.html", + RedirectView.as_view(pattern_name="dash-publishingstats-forwardlooking", permanent=True), + ), + path( + "comprehensiveness.html", + RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness", permanent=True), + ), + path( + "comprehensiveness_core.html", + RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness-core", permanent=True), + ), + path( + "comprehensiveness_financials.html", + RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness-financials", permanent=True), + ), + path( + "comprehensiveness_valueadded.html", + RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness-valueadded", permanent=True), + ), + path( + "summary_stats.html", + RedirectView.as_view(pattern_name="dash-publishingstats-summarystats", permanent=True), + ), + path( + "humanitarian.html", RedirectView.as_view(pattern_name="dash-publishingstats-humanitarian", permanent=True) + ), + path("files.html", RedirectView.as_view(pattern_name="dash-headlines-files", permanent=True)), + path("activities.html", RedirectView.as_view(pattern_name="dash-headlines-activities", permanent=True)), + path("download.html", RedirectView.as_view(pattern_name="dash-dataquality-download", permanent=True)), + path("xml.html", RedirectView.as_view(pattern_name="dash-dataquality-xml", permanent=True)), + path("validation.html", RedirectView.as_view(pattern_name="dash-dataquality-validation", permanent=True)), + path("versions.html", RedirectView.as_view(pattern_name="dash-dataquality-versions", permanent=True)), + path("organisation.html", RedirectView.as_view(pattern_name="dash-dataquality-organisation", permanent=True)), + path("identifiers.html", RedirectView.as_view(pattern_name="dash-dataquality-identifiers", permanent=True)), + path( + "reporting_orgs.html", RedirectView.as_view(pattern_name="dash-dataquality-reportingorgs", permanent=True) + ), + path("elements.html", RedirectView.as_view(pattern_name="dash-exploringdata-elements", permanent=True)), + path("codelists.html", RedirectView.as_view(pattern_name="dash-exploringdata-codelists", permanent=True)), + path("booleans.html", RedirectView.as_view(pattern_name="dash-exploringdata-booleans", permanent=True)), + path("dates.html", RedirectView.as_view(pattern_name="dash-exploringdata-dates", permanent=True)), + path( + "traceability.html", RedirectView.as_view(pattern_name="dash-exploringdata-traceability", permanent=True) + ), + path("org_ids.html", RedirectView.as_view(pattern_name="dash-exploringdata-orgids", permanent=True)), + path("faq.html", RedirectView.as_view(pattern_name="dash-faq", permanent=True)), + path("licenses.html", RedirectView.as_view(pattern_name="dash-dataquality-licenses", permanent=True)), + re_path(r"license\/\S*.html", RedirectView.as_view(pattern_name="dash-dataquality-licenses", permanent=True)), + re_path( + r"publisher\/\S*.html", RedirectView.as_view(pattern_name="dash-headlines-publishers", permanent=True) + ), + re_path( + r"codelist\/\d\/\S*.html", + RedirectView.as_view(pattern_name="dash-exploringdata-codelists", permanent=True), + ), + re_path( + r"element\/\S*.html", RedirectView.as_view(pattern_name="dash-exploringdata-elements", permanent=True) + ), + re_path(r"org_type\/\S*.html", RedirectView.as_view(pattern_name="dash-exploringdata-orgids", permanent=True)), + ] + + static("generated", document_root="../out") + + static("stats", document_root="../stats-calculated") +) # ^ Serve generated files when using runserver for development
      {{ element }}{{ codelist_mapping[major_version].get(element) }}{{ codelist_mapping[major_version].get(element) }}{% if codes|count %} {{ codes|count }} diff --git a/templates/validation.html b/templates/validation.html index 7a91ff0960..c01936cc3d 100644 --- a/templates/validation.html +++ b/templates/validation.html @@ -45,7 +45,7 @@

      List of files that fail validation, grouped by publisher

      {% if publisher in ckan and dataset_name in ckan[publisher] %} - validator + validator {% endif %}
      From f0bf0ed76527681eff6298f7b74063dfd2b0455e Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Tue, 13 Aug 2024 11:32:06 +0100 Subject: [PATCH 315/375] feat: Minor text updates Minor updates to template text. --- templates/data_quality.html | 4 ++-- templates/exploring_data.html | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/templates/data_quality.html b/templates/data_quality.html index 81be6c67a3..6250d02f09 100644 --- a/templates/data_quality.html +++ b/templates/data_quality.html @@ -1,6 +1,6 @@ {% extends 'section_index.html' %} {% block about %} -

      This section tracks published IATI data around a series of quality measures.

      -

      These are specifically technical measures - schema validation, download issues, XML formatting, etc - that can be easily rectified by publishers. No attempt is made to evaluate the actual content of the data - it should be used for sense checking and technical fixes.

      +

      This section of the IATI Dashboard tracks published IATI data around a series of quality measures.

      +

      These are specifically technical measures - schema validation, download issues, XML formatting, etc - that can be easily rectified by publishers. No attempt is made to evaluate the actual content of the data - the dashboard should be used for sense checking and technical fixes.

      {% endblock about %} {% block publisher_page_hash %}#h_dataquality{% endblock %} diff --git a/templates/exploring_data.html b/templates/exploring_data.html index 7cd53a2a1b..5d4a99465c 100644 --- a/templates/exploring_data.html +++ b/templates/exploring_data.html @@ -1,6 +1,6 @@ {% extends 'section_index.html' %} {% block about %} -This section tracks the various elements, attributes and codelists within published IATI data. -For every instance of these properties, a report is made available. +

      This section tracks the various elements, attributes and codelists within published IATI data.

      +

      For every instance of these properties, a report is made available.

      {% endblock about %} {% block publisher_page_hash %}#h_exploringdata{% endblock %} From 3b965d13be8cc25a80575c725b7bb4be86416316 Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Tue, 24 Sep 2024 18:18:02 +0100 Subject: [PATCH 316/375] refactor: Generate file/path strings centrally Code refactored so that any file/path that is needed is generated using a central set of functions in config.py. This has been done as part of the refactor to Django so that the code can be moved around and any changes in paths can be changed in just one place. --- common.py | 6 +++--- config.py | 37 +++++++++++++++++++++++++++++++++++++ coverage.py | 4 +++- data.py | 46 ++++++++++++++++++++++++---------------------- licenses.py | 7 ++++--- make_csv.py | 23 ++++++++++++----------- plots.py | 18 ++++++++++-------- speakers_kit.py | 14 ++++++++------ timeliness.py | 7 ++++--- 9 files changed, 105 insertions(+), 57 deletions(-) create mode 100644 config.py diff --git a/common.py b/common.py index 8328f84b0c..a7922798b4 100644 --- a/common.py +++ b/common.py @@ -1,11 +1,11 @@ -# Script to define useful functions - +"""Load IATI OrganisationType codelist into a global and provide function to get publisher type""" import data import json +import config # Import organisation_type_codelist as a global, then delete when used to save memory -with open('data/IATI-Codelists-2/out/clv2/json/en/OrganisationType.json') as fh: +with open(config.join_data_path('IATI-Codelists-2/out/clv2/json/en/OrganisationType.json')) as fh: organisation_type_codelist = json.load(fh) organisation_type_dict = {c['code']: c['name'] for c in organisation_type_codelist['data']} del organisation_type_codelist diff --git a/config.py b/config.py new file mode 100644 index 0000000000..9b7e9e1906 --- /dev/null +++ b/config.py @@ -0,0 +1,37 @@ +"""These functions join path fragments to make directories to different input or output files + +Note: eventually these functions will probably become redundant or refactored into +a different module, but for now this helps in refactoring the code. +""" + +import os.path + + +STATS_DIRECTORY = "./stats-calculated" +DATA_DIRECTORY = "./data" +BASE_DIRECTORY = "./" +OUT_DIRECTORY = "./out" + + +def join_stats_path(p: str) -> str: + """Make a path to a file or directory within the downloaded stats directory + """ + return os.path.join(STATS_DIRECTORY, p) + + +def join_data_path(p: str) -> str: + """Make a path to a file or directory within the downloaded data directory + """ + return os.path.join(DATA_DIRECTORY, p) + + +def join_base_path(p: str) -> str: + """Make a path to a file or directory relative to the base of the dashboard directory + """ + return os.path.join(BASE_DIRECTORY, p) + + +def join_out_path(p: str) -> str: + """Make a path to a file or directory relative to the base of the out directory + """ + return os.path.join(OUT_DIRECTORY, p) diff --git a/coverage.py b/coverage.py index 69fe8de47e..35ae7549e9 100644 --- a/coverage.py +++ b/coverage.py @@ -1,10 +1,12 @@ # This file converts a range coverage data to variables which can be outputted on the coverage page import csv + from data import get_publisher_stats from data import get_registry_id_matches from data import publisher_name from data import publishers_ordered_by_title from data import secondary_publishers +import config def is_number(s): @@ -178,7 +180,7 @@ def table(): # Compile a list of Development finance institutions (DFIs) -with open('dfi_publishers.csv', 'r') as csv_file: +with open(config.join_base_path('dfi_publishers.csv'), 'r') as csv_file: reader = csv.reader(csv_file, delimiter=',') dfi_publishers = [] for line in reader: diff --git a/data.py b/data.py index ea5368ea43..17e1bd1e35 100644 --- a/data.py +++ b/data.py @@ -8,6 +8,8 @@ from xmlschema import XMLSchema +import config + # Modified from: # https://github.com/IATI/IATI-Stats/blob/1d20ed1e/stats/common/decorators.py#L5-L13 @@ -118,7 +120,7 @@ def get_publisher_name(self): # Loop over this list and return the publisher name if it is found within the historic list of publishers for x in path_components: - if x in JSONDir('./stats-calculated/current/aggregated-publisher').keys(): + if x in JSONDir(config.join_stats_path('current/aggregated-publisher')).keys(): return x # If got to the end of the loop and nothing found, this folder does not relate to a single publisher @@ -131,7 +133,7 @@ def get_publisher_stats(publisher, stats_type='aggregated'): is not found. """ try: - return JSONDir('./stats-calculated/current/{0}-publisher/{1}'.format(stats_type, publisher)) + return JSONDir(config.join_stats_path('current/{0}-publisher/{1}'.format(stats_type, publisher))) except IOError: return {} @@ -143,7 +145,7 @@ def get_registry_id_matches(): """ # Load registry IDs for publishers who have changed their registry ID - with open('registry_id_relationships.csv') as f: + with open(config.join_base_path('registry_id_relationships.csv')) as f: reader = csv.DictReader(f) # Load this data into a dictonary registry_matches = { @@ -182,33 +184,33 @@ def deep_merge(obj1, obj2): current_stats = { - 'aggregated': JSONDir('./stats-calculated/current/aggregated'), - 'aggregated_file': JSONDir('./stats-calculated/current/aggregated-file'), - 'inverted_publisher': JSONDir('./stats-calculated/current/inverted-publisher'), - 'inverted_file': JSONDir('./stats-calculated/current/inverted-file'), - 'inverted_file_publisher': JSONDir('./stats-calculated/current/inverted-file-publisher'), + 'aggregated': JSONDir(config.join_stats_path('current/aggregated')), + 'aggregated_file': JSONDir(config.join_stats_path('current/aggregated-file')), + 'inverted_publisher': JSONDir(config.join_stats_path('current/inverted-publisher')), + 'inverted_file': JSONDir(config.join_stats_path('current/inverted-file')), + 'inverted_file_publisher': JSONDir(config.join_stats_path('current/inverted-file-publisher')), 'download_errors': [] } -ckan_publishers = JSONDir('./data/ckan_publishers') -github_issues = JSONDir('./data/github/publishers') -ckan = json.load(open('./stats-calculated/ckan.json'), object_pairs_hook=OrderedDict) +ckan_publishers = JSONDir(config.join_data_path('ckan_publishers')) +github_issues = JSONDir(config.join_data_path('github/publishers')) +ckan = json.load(open(config.join_stats_path('ckan.json')), object_pairs_hook=OrderedDict) dataset_to_publisher_dict = { dataset: publisher for publisher, publisher_dict in ckan.items() for dataset in publisher_dict.keys() } -metadata = json.load(open('./stats-calculated/metadata.json'), object_pairs_hook=OrderedDict) -with open('./data/downloads/errors') as fp: +metadata = json.load(open(config.join_stats_path('metadata.json')), object_pairs_hook=OrderedDict) +with open(config.join_data_path('downloads/errors')) as fp: for line in fp: if line != '.\n': current_stats['download_errors'].append(line.strip('\n').split(' ', 3)) sources105 = [ - './data/schemas/1.05/iati-activities-schema.xsd', - './data/schemas/1.05/iati-organisations-schema.xsd'] + config.join_data_path('schemas/1.05/iati-activities-schema.xsd'), + config.join_data_path('schemas/1.05/iati-organisations-schema.xsd')] sources203 = [ - './data/schemas/2.03/iati-activities-schema.xsd', - './data/schemas/2.03/iati-organisations-schema.xsd'] + config.join_data_path('schemas/2.03/iati-activities-schema.xsd'), + config.join_data_path('schemas/2.03/iati-organisations-schema.xsd')] schema105 = XMLSchema(sources105) schema203 = XMLSchema(sources203) @@ -237,7 +239,7 @@ def transform_codelist_mapping_keys(codelist_mapping): def create_codelist_mapping(major_version): codelist_mapping = {} - for x in json.load(open('data/IATI-Codelists-{}/out/clv2/mapping.json'.format(major_version))): + for x in json.load(open(config.join_data_path('IATI-Codelists-{}/out/clv2/mapping.json'.format(major_version)))): if 'condition' in x: pref, attr = x['path'].rsplit('/', 1) path = '{0}[{1}]/{2}'.format( @@ -255,12 +257,12 @@ def create_codelist_mapping(major_version): # Create a big dictionary of all codelist values by version and codelist name codelist_sets = { major_version: { - cname: set(c['code'] for c in codelist['data']) for cname, codelist in JSONDir('data/IATI-Codelists-{}/out/clv2/json/en/'.format(major_version)).items() + cname: set(c['code'] for c in codelist['data']) for cname, codelist in JSONDir(config.join_data_path('IATI-Codelists-{}/out/clv2/json/en/'.format(major_version))).items() } for major_version in MAJOR_VERSIONS} codelist_lookup = { major_version: { - cname: {c['code']: c for c in codelist['data']} for cname, codelist in JSONDir('data/IATI-Codelists-{}/out/clv2/json/en/'.format(major_version)).items() + cname: {c['code']: c for c in codelist['data']} for cname, codelist in JSONDir(config.join_data_path('IATI-Codelists-{}/out/clv2/json/en/'.format(major_version))).items() } for major_version in MAJOR_VERSIONS} # Simple look up to map publisher id to a publishers given name (title) @@ -270,11 +272,11 @@ def create_codelist_mapping(major_version): publishers_ordered_by_title.sort(key=lambda x: (x[0]).lower()) # List of publishers who report all their activities as a secondary publisher -secondary_publishers = [publisher for publisher, stats in JSONDir('./stats-calculated/current/aggregated-publisher').items() +secondary_publishers = [publisher for publisher, stats in JSONDir(config.join_stats_path('current/aggregated-publisher')).items() if int(stats['activities']) == len(stats['activities_secondary_reported']) and int(stats['activities']) > 0] try: - dac2012 = {x[0]: Decimal(x[1].replace(',', '')) for x in csv.reader(open('data/dac2012.csv'))} + dac2012 = {x[0]: Decimal(x[1].replace(',', '')) for x in csv.reader(open(config.join_data_path('dac2012.csv')))} except IOError: dac2012 = {} diff --git a/licenses.py b/licenses.py index 4a15ee71d3..5d090a6a43 100644 --- a/licenses.py +++ b/licenses.py @@ -1,7 +1,8 @@ import json from collections import OrderedDict -from flask import render_template +from flask import render_template +import config license_names = { 'notspecified': 'Other::License Not Specified', @@ -84,10 +85,10 @@ 'zpl': 'OSI Approved::Zope Public License', 'zlib-license': 'OSI Approved::zlib/libpng license'} -with open('./stats-calculated/licenses.json') as handler: +with open(config.join_stats_path('licenses.json')) as handler: license_urls = json.load(handler) -with open('./stats-calculated/ckan.json') as handler: +with open(config.join_stats_path('ckan.json')) as handler: ckan = json.load(handler, object_pairs_hook=OrderedDict) licenses = [ diff --git a/make_csv.py b/make_csv.py index 0c283e7f62..5d4410e09d 100644 --- a/make_csv.py +++ b/make_csv.py @@ -1,7 +1,6 @@ # Script to generate CSV files from data in the 'stats-calculated' folder, # and extra logic in other files in this repository import csv -import os import data # Timeliness CSV files (frequency and timelag) @@ -22,6 +21,8 @@ # Humanitarian CSV file import humanitarian +import config + publisher_name = {publisher: publisher_json['result']['title'] for publisher, publisher_json in data.ckan_publishers.items()} @@ -47,7 +48,7 @@ def publisher_dicts(): } -with open(os.path.join('out', 'publishers.csv'), 'w') as fp: +with open(config.join_out_path('publishers.csv'), 'w') as fp: writer = csv.DictWriter(fp, [ 'Publisher Name', 'Publisher Registry Id', @@ -69,21 +70,21 @@ def publisher_dicts(): publishers = list(data.current_stats['inverted_publisher']['activities'].keys()) -with open(os.path.join('out', 'elements.csv'), 'w') as fp: +with open(config.join_out_path('elements.csv'), 'w') as fp: writer = csv.DictWriter(fp, ['Element'] + publishers) writer.writeheader() for element, publisher_dict in data.current_stats['inverted_publisher']['elements'].items(): publisher_dict['Element'] = element writer.writerow(publisher_dict) -with open(os.path.join('out', 'elements_total.csv'), 'w') as fp: +with open(config.join_out_path('elements_total.csv'), 'w') as fp: writer = csv.DictWriter(fp, ['Element'] + publishers) writer.writeheader() for element, publisher_dict in data.current_stats['inverted_publisher']['elements_total'].items(): publisher_dict['Element'] = element writer.writerow(publisher_dict) -with open(os.path.join('out', 'registry.csv'), 'w') as fp: +with open(config.join_out_path('registry.csv'), 'w') as fp: keys = ['name', 'title', 'publisher_frequency', 'publisher_frequency_select', 'publisher_implementation_schedule', 'publisher_ui', 'publisher_field_exclusions', 'publisher_contact', 'image_url', 'display_name', 'publisher_iati_id', 'publisher_units', 'publisher_record_exclusions', 'publisher_data_quality', 'publisher_country', 'publisher_description', 'publisher_refs', 'publisher_thresholds' 'publisher_agencies', 'publisher_constraints', 'publisher_organization_type', 'publisher_segmentation', 'license_id', 'state', 'publisher_timeliness'] writer = csv.DictWriter(fp, keys) writer.writeheader() @@ -92,26 +93,26 @@ def publisher_dicts(): previous_months = timeliness.previous_months_reversed -with open(os.path.join('out', 'timeliness_frequency.csv'), 'w') as fp: +with open(config.join_out_path('timeliness_frequency.csv'), 'w') as fp: writer = csv.writer(fp) writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + ['Frequency', 'First published']) for publisher, publisher_title, per_month, assessment, hft, first_published_band in timeliness.publisher_frequency_sorted(): writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment, first_published_band]) -with open(os.path.join('out', 'timeliness_timelag.csv'), 'w') as fp: +with open(config.join_out_path('timeliness_timelag.csv'), 'w') as fp: writer = csv.writer(fp) writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + ['Time lag']) for publisher, publisher_title, per_month, assessment, hft in timeliness.publisher_timelag_sorted(): writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment]) -with open(os.path.join('out', 'forwardlooking.csv'), 'w') as fp: +with open(config.join_out_path('forwardlooking.csv'), 'w') as fp: writer = csv.writer(fp) writer.writerow(['Publisher Name', 'Publisher Registry Id'] + ['{} ({})'.format(header, year) for header in forwardlooking.column_headers for year in forwardlooking.years]) for row in forwardlooking.table(): writer.writerow([row['publisher_title'], row['publisher']] + [year_column[year] for year_column in row['year_columns'] for year in forwardlooking.years]) for tab in comprehensiveness.columns.keys(): - with open(os.path.join('out', 'comprehensiveness_{}.csv'.format(tab)), 'w') as fp: + with open(config.join_out_path('comprehensiveness_{}.csv'.format(tab)), 'w') as fp: writer = csv.writer(fp) if tab == 'financials': writer.writerow(['Publisher Name', 'Publisher Registry Id'] + @@ -164,7 +165,7 @@ def publisher_dicts(): # row['spend_data_error_reported_flag'] # ]) -with open(os.path.join('out', 'summary_stats.csv'), 'w') as fp: +with open(config.join_out_path('summary_stats.csv'), 'w') as fp: writer = csv.writer(fp) # Add column headers writer.writerow(['Publisher Name', 'Publisher Registry Id'] + [header for slug, header in summary_stats.columns]) @@ -172,7 +173,7 @@ def publisher_dicts(): # Write each row writer.writerow([row['publisher_title'], row['publisher']] + [row[slug] for slug, header in summary_stats.columns]) -with open(os.path.join('out', 'humanitarian.csv'), 'w') as fp: +with open(config.join_out_path('humanitarian.csv'), 'w') as fp: writer = csv.writer(fp) # Add column headers writer.writerow([ diff --git a/plots.py b/plots.py index aaa69948f5..a2b8ad3a18 100644 --- a/plots.py +++ b/plots.py @@ -9,16 +9,18 @@ import csv import common import data +import config from vars import expected_versions # noqa: F401 import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt # noqa: E402 import matplotlib.dates as mdates # noqa: E402 + # Import failed_downloads as a global -failed_downloads = csv.reader(open('data/downloads/history.csv')) +failed_downloads = csv.reader(open(config.join_data_path('downloads/history.csv'))) -gitaggregate_publisher = data.JSONDir('./stats-calculated/gitaggregate-publisher-dated') +gitaggregate_publisher = data.JSONDir(config.join_stats_path('gitaggregate-publisher-dated')) class AugmentedJSONDir(data.JSONDir): @@ -88,7 +90,7 @@ def make_plot(stat_path, git_stats, img_prefix=''): else: fig_legend.legend(plots.values(), plots.keys(), loc='center', ncol=4) fig_legend.set_size_inches(600.0 / dpi, 100.0 / dpi) - fig_legend.savefig('out/{0}{1}{2}_legend.png'.format(img_prefix, stat_name, stat_path[2])) + fig_legend.savefig(config.join_out_path('{0}{1}{2}_legend.png'.format(img_prefix, stat_name, stat_path[2]))) else: keys = None ax.plot(x_values, y_values) @@ -115,10 +117,10 @@ def make_plot(stat_path, git_stats, img_prefix=''): ax.ticklabel_format(axis='y', style='plain', useOffset=False) - fig.savefig('out/{0}{1}{2}.png'.format(img_prefix, stat_name, stat_path[2] if type(stat_path) is tuple else ''), dpi=dpi) + fig.savefig(config.join_out_path('{0}{1}{2}.png'.format(img_prefix, stat_name, stat_path[2] if type(stat_path) is tuple else '')), dpi=dpi) plt.close('all') - fn = 'out/{0}{1}.csv'.format(img_prefix, stat_name) + fn = config.join_out_path('{0}{1}.csv'.format(img_prefix, stat_name)) with open(fn, 'w') as fp: writer = csv.writer(fp) if keys: @@ -136,7 +138,7 @@ def make_plot(stat_path, git_stats, img_prefix=''): # Load aggregated stats for all data print("All data") -git_stats = AugmentedJSONDir('./stats-calculated/gitaggregate-dated') +git_stats = AugmentedJSONDir(config.join_stats_path('gitaggregate-dated')) for stat_path in [ 'activities', @@ -166,11 +168,11 @@ def make_plot(stat_path, git_stats, img_prefix=''): del git_stats try: - os.makedirs('out/publisher_imgs') + os.makedirs(config.join_out_path('publisher_imgs')) except OSError: pass -git_stats_publishers = AugmentedJSONDir('./stats-calculated/gitaggregate-publisher-dated/') +git_stats_publishers = AugmentedJSONDir(config.join_stats_path('gitaggregate-publisher-dated/')) for publisher, git_stats_publisher in git_stats_publishers.items(): for stat_path in [ 'activities', diff --git a/speakers_kit.py b/speakers_kit.py index 8c33b305d1..b6e517d5f5 100644 --- a/speakers_kit.py +++ b/speakers_kit.py @@ -4,17 +4,19 @@ from collections import defaultdict from itertools import zip_longest +import config + def codelist_dict(codelist_path): codelist_json = json.load(open(codelist_path)) return {c['code']: c['name'] for c in codelist_json['data']} -organisation_type_dict = codelist_dict('data/IATI-Codelists-2/out/clv2/json/en/OrganisationType.json') -country_dict = codelist_dict('data/IATI-Codelists-2/out/clv2/json/en/Country.json') -region_dict = codelist_dict('data/IATI-Codelists-2/out/clv2/json/en/Region.json') +organisation_type_dict = codelist_dict(config.join_data_path('IATI-Codelists-2/out/clv2/json/en/OrganisationType.json')) +country_dict = codelist_dict(config.join_data_path('IATI-Codelists-2/out/clv2/json/en/Country.json')) +region_dict = codelist_dict(config.join_data_path('IATI-Codelists-2/out/clv2/json/en/Region.json')) -aggregated_publisher = data.JSONDir('./stats-calculated/current/aggregated-publisher/') +aggregated_publisher = data.JSONDir(config.join_stats_path('current/aggregated-publisher/')) activities_by = defaultdict(lambda: defaultdict(int)) publishers_by = defaultdict(lambda: defaultdict(int)) @@ -46,7 +48,7 @@ def codelist_dict(codelist_path): fieldnames = ['publisher_type', 'publishers_by_type', '', 'publisher_country', 'publishers_by_country', '', 'date', 'publishers_quarterly', '', 'activity_country', 'activities_by_country', '', 'activity_region', 'activities_by_region'] publishers_quarterly = [] -publishers_by_date = json.load(open('./stats-calculated/gitaggregate-dated/publishers.json')) +publishers_by_date = json.load(open(config.join_stats_path('gitaggregate-dated/publishers.json'))) for date, publishers in sorted(publishers_by_date.items()): if (date[8:10] == '30' and date[5:7] in ['06', '09']) or (date[8:10] == '31' and date[5:7] in ['03', '12']): publishers_quarterly.append((date, publishers)) @@ -56,7 +58,7 @@ def sort_second(x): return sorted(x, key=lambda y: y[1], reverse=True) -with open('out/speakers_kit.csv', 'w') as fp: +with open(config.join_out_path('speakers_kit.csv'), 'w') as fp: writer = csv.DictWriter(fp, fieldnames) writer.writeheader() for publishers_by_type, publishers_by_country, publishers_quarterly_, activities_by_country, activities_by_region in zip_longest( diff --git a/timeliness.py b/timeliness.py index 5cc1b70848..96f8f00791 100644 --- a/timeliness.py +++ b/timeliness.py @@ -4,6 +4,7 @@ import datetime from dateutil.relativedelta import relativedelta from collections import defaultdict, Counter +import config def short_month(month_str): @@ -58,7 +59,7 @@ def publisher_frequency(): """ # Load all the data from 'gitaggregate-publisher-dated' into memory - gitaggregate_publisher = JSONDir('./stats-calculated/gitaggregate-publisher-dated') + gitaggregate_publisher = JSONDir(config.join_stats_path('gitaggregate-publisher-dated')) # Loop over each publisher - i.e. a publisher folder within 'gitaggregate-publisher-dated' for publisher, agg in gitaggregate_publisher.items(): @@ -171,7 +172,7 @@ def first_published_band_index(first_published_band): def publisher_timelag(): - return [(publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag'], has_future_transactions(publisher)) for publisher, agg in JSONDir('./stats-calculated/current/aggregated-publisher').items()] + return [(publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag'], has_future_transactions(publisher)) for publisher, agg in JSONDir(config.join_stats_path('current/aggregated-publisher')).items()] def publisher_timelag_sorted(): @@ -204,7 +205,7 @@ def has_future_transactions(publisher): if transaction_date and transaction_date > datetime.date.today(): return 2 - gitaggregate_publisher = JSONDir('./stats-calculated/gitaggregate-publisher-dated').get(publisher, {}) + gitaggregate_publisher = JSONDir(config.join_stats_path('gitaggregate-publisher-dated')).get(publisher, {}) mindate = datetime.date(today.year - 1, today.month, 1) for date, activity_blacklist in gitaggregate_publisher.get('activities_with_future_transactions', {}).items(): if parse_iso_date(date) >= mindate and activity_blacklist: From 1a83f7d0ba0289d7e4c43936cea739be9a3b19bf Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Wed, 25 Sep 2024 11:59:44 +0100 Subject: [PATCH 317/375] refactor: Move Dashboard source code into dedicated directory Moved source code (not including code to fetch data), templates and static content into a dedicated source code folder to accommodate the transition to Django's directory structure. To make the Dashboard work the path constants were updated in config.py and some small path-related changes to the tests were applied so that they run in the new location. --- pytest.ini | 2 +- common.py => src/common.py | 0 comprehensiveness.py => src/comprehensiveness.py | 0 config.py => src/config.py | 8 ++++---- coverage.py => src/coverage.py | 0 data.py => src/data.py | 0 forwardlooking.py => src/forwardlooking.py | 0 humanitarian.py => src/humanitarian.py | 0 licenses.py => src/licenses.py | 0 make_csv.py => src/make_csv.py | 0 make_html.py => src/make_html.py | 0 plots.py => src/plots.py | 0 speakers_kit.py => src/speakers_kit.py | 0 {static => src/static}/.nojekyll | 0 {static => src/static}/img/favicon-16x16.png | Bin {static => src/static}/img/favicon-32x32.png | Bin {static => src/static}/img/favicon.ico | Bin {static => src/static}/img/tablesorter-icons.gif | Bin {static => src/static}/style.css | 0 summary_stats.py => src/summary_stats.py | 0 {templates => src/templates}/_partials/boxes.html | 0 .../_partials/tablesorter_instructions.html | 0 {templates => src/templates}/activities.html | 0 {templates => src/templates}/base.html | 0 {templates => src/templates}/booleans.html | 0 {templates => src/templates}/codelist.html | 0 {templates => src/templates}/codelists.html | 0 {templates => src/templates}/comprehensiveness.html | 0 .../templates}/comprehensiveness_base.html | 0 .../templates}/comprehensiveness_core.html | 0 .../templates}/comprehensiveness_financials.html | 0 .../templates}/comprehensiveness_valueadded.html | 0 {templates => src/templates}/coverage.html | 0 {templates => src/templates}/data_quality.html | 0 {templates => src/templates}/dates.html | 0 {templates => src/templates}/download.html | 0 {templates => src/templates}/element.html | 0 {templates => src/templates}/elements.html | 0 {templates => src/templates}/exploring_data.html | 0 {templates => src/templates}/faq.html | 0 {templates => src/templates}/files.html | 0 {templates => src/templates}/forwardlooking.html | 0 {templates => src/templates}/headlines.html | 0 {templates => src/templates}/humanitarian.html | 0 {templates => src/templates}/identifiers.html | 0 {templates => src/templates}/index.html | 0 {templates => src/templates}/license.html | 0 {templates => src/templates}/licenses.html | 0 .../templates}/org_id_table_cells.html | 0 .../templates}/org_id_table_header.html | 0 {templates => src/templates}/org_ids.html | 0 {templates => src/templates}/org_type.html | 0 {templates => src/templates}/organisation.html | 0 {templates => src/templates}/publisher.html | 0 {templates => src/templates}/publishers.html | 0 {templates => src/templates}/publishing_stats.html | 0 .../templates}/registration_agencies.html | 0 {templates => src/templates}/reporting_orgs.html | 0 {templates => src/templates}/section_index.html | 0 {templates => src/templates}/summary_stats.html | 0 {templates => src/templates}/timeliness.html | 0 {templates => src/templates}/timeliness_base.html | 0 .../templates}/timeliness_timelag.html | 0 {templates => src/templates}/traceability.html | 0 {templates => src/templates}/validation.html | 0 {templates => src/templates}/versions.html | 0 {templates => src/templates}/xml.html | 0 src/tests/__init__.py | 0 {tests => src/tests}/test_comprehensiveness.py | 4 ++-- {tests => src/tests}/test_timeliness.py | 4 ++-- text.py => src/text.py | 0 timeliness.py => src/timeliness.py | 0 vars.py => src/vars.py | 0 73 files changed, 9 insertions(+), 9 deletions(-) rename common.py => src/common.py (100%) rename comprehensiveness.py => src/comprehensiveness.py (100%) rename config.py => src/config.py (89%) rename coverage.py => src/coverage.py (100%) rename data.py => src/data.py (100%) rename forwardlooking.py => src/forwardlooking.py (100%) rename humanitarian.py => src/humanitarian.py (100%) rename licenses.py => src/licenses.py (100%) rename make_csv.py => src/make_csv.py (100%) rename make_html.py => src/make_html.py (100%) rename plots.py => src/plots.py (100%) rename speakers_kit.py => src/speakers_kit.py (100%) rename {static => src/static}/.nojekyll (100%) rename {static => src/static}/img/favicon-16x16.png (100%) rename {static => src/static}/img/favicon-32x32.png (100%) rename {static => src/static}/img/favicon.ico (100%) rename {static => src/static}/img/tablesorter-icons.gif (100%) rename {static => src/static}/style.css (100%) rename summary_stats.py => src/summary_stats.py (100%) rename {templates => src/templates}/_partials/boxes.html (100%) rename {templates => src/templates}/_partials/tablesorter_instructions.html (100%) rename {templates => src/templates}/activities.html (100%) rename {templates => src/templates}/base.html (100%) rename {templates => src/templates}/booleans.html (100%) rename {templates => src/templates}/codelist.html (100%) rename {templates => src/templates}/codelists.html (100%) rename {templates => src/templates}/comprehensiveness.html (100%) rename {templates => src/templates}/comprehensiveness_base.html (100%) rename {templates => src/templates}/comprehensiveness_core.html (100%) rename {templates => src/templates}/comprehensiveness_financials.html (100%) rename {templates => src/templates}/comprehensiveness_valueadded.html (100%) rename {templates => src/templates}/coverage.html (100%) rename {templates => src/templates}/data_quality.html (100%) rename {templates => src/templates}/dates.html (100%) rename {templates => src/templates}/download.html (100%) rename {templates => src/templates}/element.html (100%) rename {templates => src/templates}/elements.html (100%) rename {templates => src/templates}/exploring_data.html (100%) rename {templates => src/templates}/faq.html (100%) rename {templates => src/templates}/files.html (100%) rename {templates => src/templates}/forwardlooking.html (100%) rename {templates => src/templates}/headlines.html (100%) rename {templates => src/templates}/humanitarian.html (100%) rename {templates => src/templates}/identifiers.html (100%) rename {templates => src/templates}/index.html (100%) rename {templates => src/templates}/license.html (100%) rename {templates => src/templates}/licenses.html (100%) rename {templates => src/templates}/org_id_table_cells.html (100%) rename {templates => src/templates}/org_id_table_header.html (100%) rename {templates => src/templates}/org_ids.html (100%) rename {templates => src/templates}/org_type.html (100%) rename {templates => src/templates}/organisation.html (100%) rename {templates => src/templates}/publisher.html (100%) rename {templates => src/templates}/publishers.html (100%) rename {templates => src/templates}/publishing_stats.html (100%) rename {templates => src/templates}/registration_agencies.html (100%) rename {templates => src/templates}/reporting_orgs.html (100%) rename {templates => src/templates}/section_index.html (100%) rename {templates => src/templates}/summary_stats.html (100%) rename {templates => src/templates}/timeliness.html (100%) rename {templates => src/templates}/timeliness_base.html (100%) rename {templates => src/templates}/timeliness_timelag.html (100%) rename {templates => src/templates}/traceability.html (100%) rename {templates => src/templates}/validation.html (100%) rename {templates => src/templates}/versions.html (100%) rename {templates => src/templates}/xml.html (100%) create mode 100644 src/tests/__init__.py rename {tests => src/tests}/test_comprehensiveness.py (92%) rename {tests => src/tests}/test_timeliness.py (92%) rename text.py => src/text.py (100%) rename timeliness.py => src/timeliness.py (100%) rename vars.py => src/vars.py (100%) diff --git a/pytest.ini b/pytest.ini index d85657b286..cebc5c7e27 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,4 @@ [pytest] -testpaths = tests +testpaths = src/tests norecursedirs = *__pycache__* *.pytest_cache* console_output_style = count diff --git a/common.py b/src/common.py similarity index 100% rename from common.py rename to src/common.py diff --git a/comprehensiveness.py b/src/comprehensiveness.py similarity index 100% rename from comprehensiveness.py rename to src/comprehensiveness.py diff --git a/config.py b/src/config.py similarity index 89% rename from config.py rename to src/config.py index 9b7e9e1906..0fd3c199fd 100644 --- a/config.py +++ b/src/config.py @@ -7,10 +7,10 @@ import os.path -STATS_DIRECTORY = "./stats-calculated" -DATA_DIRECTORY = "./data" -BASE_DIRECTORY = "./" -OUT_DIRECTORY = "./out" +STATS_DIRECTORY = "../stats-calculated" +DATA_DIRECTORY = "../data" +BASE_DIRECTORY = "../" +OUT_DIRECTORY = "../out" def join_stats_path(p: str) -> str: diff --git a/coverage.py b/src/coverage.py similarity index 100% rename from coverage.py rename to src/coverage.py diff --git a/data.py b/src/data.py similarity index 100% rename from data.py rename to src/data.py diff --git a/forwardlooking.py b/src/forwardlooking.py similarity index 100% rename from forwardlooking.py rename to src/forwardlooking.py diff --git a/humanitarian.py b/src/humanitarian.py similarity index 100% rename from humanitarian.py rename to src/humanitarian.py diff --git a/licenses.py b/src/licenses.py similarity index 100% rename from licenses.py rename to src/licenses.py diff --git a/make_csv.py b/src/make_csv.py similarity index 100% rename from make_csv.py rename to src/make_csv.py diff --git a/make_html.py b/src/make_html.py similarity index 100% rename from make_html.py rename to src/make_html.py diff --git a/plots.py b/src/plots.py similarity index 100% rename from plots.py rename to src/plots.py diff --git a/speakers_kit.py b/src/speakers_kit.py similarity index 100% rename from speakers_kit.py rename to src/speakers_kit.py diff --git a/static/.nojekyll b/src/static/.nojekyll similarity index 100% rename from static/.nojekyll rename to src/static/.nojekyll diff --git a/static/img/favicon-16x16.png b/src/static/img/favicon-16x16.png similarity index 100% rename from static/img/favicon-16x16.png rename to src/static/img/favicon-16x16.png diff --git a/static/img/favicon-32x32.png b/src/static/img/favicon-32x32.png similarity index 100% rename from static/img/favicon-32x32.png rename to src/static/img/favicon-32x32.png diff --git a/static/img/favicon.ico b/src/static/img/favicon.ico similarity index 100% rename from static/img/favicon.ico rename to src/static/img/favicon.ico diff --git a/static/img/tablesorter-icons.gif b/src/static/img/tablesorter-icons.gif similarity index 100% rename from static/img/tablesorter-icons.gif rename to src/static/img/tablesorter-icons.gif diff --git a/static/style.css b/src/static/style.css similarity index 100% rename from static/style.css rename to src/static/style.css diff --git a/summary_stats.py b/src/summary_stats.py similarity index 100% rename from summary_stats.py rename to src/summary_stats.py diff --git a/templates/_partials/boxes.html b/src/templates/_partials/boxes.html similarity index 100% rename from templates/_partials/boxes.html rename to src/templates/_partials/boxes.html diff --git a/templates/_partials/tablesorter_instructions.html b/src/templates/_partials/tablesorter_instructions.html similarity index 100% rename from templates/_partials/tablesorter_instructions.html rename to src/templates/_partials/tablesorter_instructions.html diff --git a/templates/activities.html b/src/templates/activities.html similarity index 100% rename from templates/activities.html rename to src/templates/activities.html diff --git a/templates/base.html b/src/templates/base.html similarity index 100% rename from templates/base.html rename to src/templates/base.html diff --git a/templates/booleans.html b/src/templates/booleans.html similarity index 100% rename from templates/booleans.html rename to src/templates/booleans.html diff --git a/templates/codelist.html b/src/templates/codelist.html similarity index 100% rename from templates/codelist.html rename to src/templates/codelist.html diff --git a/templates/codelists.html b/src/templates/codelists.html similarity index 100% rename from templates/codelists.html rename to src/templates/codelists.html diff --git a/templates/comprehensiveness.html b/src/templates/comprehensiveness.html similarity index 100% rename from templates/comprehensiveness.html rename to src/templates/comprehensiveness.html diff --git a/templates/comprehensiveness_base.html b/src/templates/comprehensiveness_base.html similarity index 100% rename from templates/comprehensiveness_base.html rename to src/templates/comprehensiveness_base.html diff --git a/templates/comprehensiveness_core.html b/src/templates/comprehensiveness_core.html similarity index 100% rename from templates/comprehensiveness_core.html rename to src/templates/comprehensiveness_core.html diff --git a/templates/comprehensiveness_financials.html b/src/templates/comprehensiveness_financials.html similarity index 100% rename from templates/comprehensiveness_financials.html rename to src/templates/comprehensiveness_financials.html diff --git a/templates/comprehensiveness_valueadded.html b/src/templates/comprehensiveness_valueadded.html similarity index 100% rename from templates/comprehensiveness_valueadded.html rename to src/templates/comprehensiveness_valueadded.html diff --git a/templates/coverage.html b/src/templates/coverage.html similarity index 100% rename from templates/coverage.html rename to src/templates/coverage.html diff --git a/templates/data_quality.html b/src/templates/data_quality.html similarity index 100% rename from templates/data_quality.html rename to src/templates/data_quality.html diff --git a/templates/dates.html b/src/templates/dates.html similarity index 100% rename from templates/dates.html rename to src/templates/dates.html diff --git a/templates/download.html b/src/templates/download.html similarity index 100% rename from templates/download.html rename to src/templates/download.html diff --git a/templates/element.html b/src/templates/element.html similarity index 100% rename from templates/element.html rename to src/templates/element.html diff --git a/templates/elements.html b/src/templates/elements.html similarity index 100% rename from templates/elements.html rename to src/templates/elements.html diff --git a/templates/exploring_data.html b/src/templates/exploring_data.html similarity index 100% rename from templates/exploring_data.html rename to src/templates/exploring_data.html diff --git a/templates/faq.html b/src/templates/faq.html similarity index 100% rename from templates/faq.html rename to src/templates/faq.html diff --git a/templates/files.html b/src/templates/files.html similarity index 100% rename from templates/files.html rename to src/templates/files.html diff --git a/templates/forwardlooking.html b/src/templates/forwardlooking.html similarity index 100% rename from templates/forwardlooking.html rename to src/templates/forwardlooking.html diff --git a/templates/headlines.html b/src/templates/headlines.html similarity index 100% rename from templates/headlines.html rename to src/templates/headlines.html diff --git a/templates/humanitarian.html b/src/templates/humanitarian.html similarity index 100% rename from templates/humanitarian.html rename to src/templates/humanitarian.html diff --git a/templates/identifiers.html b/src/templates/identifiers.html similarity index 100% rename from templates/identifiers.html rename to src/templates/identifiers.html diff --git a/templates/index.html b/src/templates/index.html similarity index 100% rename from templates/index.html rename to src/templates/index.html diff --git a/templates/license.html b/src/templates/license.html similarity index 100% rename from templates/license.html rename to src/templates/license.html diff --git a/templates/licenses.html b/src/templates/licenses.html similarity index 100% rename from templates/licenses.html rename to src/templates/licenses.html diff --git a/templates/org_id_table_cells.html b/src/templates/org_id_table_cells.html similarity index 100% rename from templates/org_id_table_cells.html rename to src/templates/org_id_table_cells.html diff --git a/templates/org_id_table_header.html b/src/templates/org_id_table_header.html similarity index 100% rename from templates/org_id_table_header.html rename to src/templates/org_id_table_header.html diff --git a/templates/org_ids.html b/src/templates/org_ids.html similarity index 100% rename from templates/org_ids.html rename to src/templates/org_ids.html diff --git a/templates/org_type.html b/src/templates/org_type.html similarity index 100% rename from templates/org_type.html rename to src/templates/org_type.html diff --git a/templates/organisation.html b/src/templates/organisation.html similarity index 100% rename from templates/organisation.html rename to src/templates/organisation.html diff --git a/templates/publisher.html b/src/templates/publisher.html similarity index 100% rename from templates/publisher.html rename to src/templates/publisher.html diff --git a/templates/publishers.html b/src/templates/publishers.html similarity index 100% rename from templates/publishers.html rename to src/templates/publishers.html diff --git a/templates/publishing_stats.html b/src/templates/publishing_stats.html similarity index 100% rename from templates/publishing_stats.html rename to src/templates/publishing_stats.html diff --git a/templates/registration_agencies.html b/src/templates/registration_agencies.html similarity index 100% rename from templates/registration_agencies.html rename to src/templates/registration_agencies.html diff --git a/templates/reporting_orgs.html b/src/templates/reporting_orgs.html similarity index 100% rename from templates/reporting_orgs.html rename to src/templates/reporting_orgs.html diff --git a/templates/section_index.html b/src/templates/section_index.html similarity index 100% rename from templates/section_index.html rename to src/templates/section_index.html diff --git a/templates/summary_stats.html b/src/templates/summary_stats.html similarity index 100% rename from templates/summary_stats.html rename to src/templates/summary_stats.html diff --git a/templates/timeliness.html b/src/templates/timeliness.html similarity index 100% rename from templates/timeliness.html rename to src/templates/timeliness.html diff --git a/templates/timeliness_base.html b/src/templates/timeliness_base.html similarity index 100% rename from templates/timeliness_base.html rename to src/templates/timeliness_base.html diff --git a/templates/timeliness_timelag.html b/src/templates/timeliness_timelag.html similarity index 100% rename from templates/timeliness_timelag.html rename to src/templates/timeliness_timelag.html diff --git a/templates/traceability.html b/src/templates/traceability.html similarity index 100% rename from templates/traceability.html rename to src/templates/traceability.html diff --git a/templates/validation.html b/src/templates/validation.html similarity index 100% rename from templates/validation.html rename to src/templates/validation.html diff --git a/templates/versions.html b/src/templates/versions.html similarity index 100% rename from templates/versions.html rename to src/templates/versions.html diff --git a/templates/xml.html b/src/templates/xml.html similarity index 100% rename from templates/xml.html rename to src/templates/xml.html diff --git a/src/tests/__init__.py b/src/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_comprehensiveness.py b/src/tests/test_comprehensiveness.py similarity index 92% rename from tests/test_comprehensiveness.py rename to src/tests/test_comprehensiveness.py index 5b4c4ae937..ab4eb1e163 100644 --- a/tests/test_comprehensiveness.py +++ b/src/tests/test_comprehensiveness.py @@ -1,7 +1,7 @@ -import sys +import os from os import path -sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) +os.chdir(path.normpath(path.join(path.abspath(__file__), "../.."))) import comprehensiveness # noqa: E402 diff --git a/tests/test_timeliness.py b/src/tests/test_timeliness.py similarity index 92% rename from tests/test_timeliness.py rename to src/tests/test_timeliness.py index 727f807dac..c55861630a 100644 --- a/tests/test_timeliness.py +++ b/src/tests/test_timeliness.py @@ -1,10 +1,10 @@ """Testing of functions in timeliness.py """ -import sys +import os from os import path -sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) +os.chdir(path.normpath(path.join(path.abspath(__file__), "../.."))) import timeliness # noqa: E402 diff --git a/text.py b/src/text.py similarity index 100% rename from text.py rename to src/text.py diff --git a/timeliness.py b/src/timeliness.py similarity index 100% rename from timeliness.py rename to src/timeliness.py diff --git a/vars.py b/src/vars.py similarity index 100% rename from vars.py rename to src/vars.py From 57582471602813c94070465e3b7e417af9f88454 Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Wed, 25 Sep 2024 14:57:09 +0100 Subject: [PATCH 318/375] refactor: Added Django to requirements.in and recompiled Added Django to requirements.in and recompiled both requirements files. --- requirements.in | 1 + requirements.txt | 30 ++++++++++++++++++------------ requirements_dev.txt | 36 +++++++++++++++++++++++------------- 3 files changed, 42 insertions(+), 25 deletions(-) diff --git a/requirements.in b/requirements.in index 3d4badb12a..83a6919f86 100644 --- a/requirements.in +++ b/requirements.in @@ -1,3 +1,4 @@ +django flask frozen-flask jinja2 diff --git a/requirements.txt b/requirements.txt index dd5f65eaa2..b1cb4aa5a2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,29 +4,33 @@ # # pip-compile requirements.in # +asgiref==3.8.1 + # via django blinker==1.8.2 # via flask -certifi==2024.7.4 +certifi==2024.8.30 # via requests charset-normalizer==3.3.2 # via requests click==8.1.7 # via flask -contourpy==1.2.1 +contourpy==1.3.0 # via matplotlib cycler==0.12.1 # via matplotlib -elementpath==4.4.0 +django==5.1.1 + # via -r requirements.in +elementpath==4.5.0 # via xmlschema flask==3.0.3 # via # -r requirements.in # frozen-flask -fonttools==4.53.1 +fonttools==4.54.1 # via matplotlib frozen-flask==1.0.2 # via -r requirements.in -idna==3.7 +idna==3.10 # via requests itsdangerous==2.2.0 # via @@ -36,7 +40,7 @@ jinja2==3.1.4 # via # -r requirements.in # flask -kiwisolver==1.4.5 +kiwisolver==1.4.7 # via matplotlib lxml==5.3.0 # via -r requirements.in @@ -47,7 +51,7 @@ markupsafe==2.1.5 # werkzeug matplotlib==3.9.2 # via -r requirements.in -numpy==2.0.1 +numpy==2.1.1 # via # contourpy # matplotlib @@ -55,23 +59,25 @@ packaging==24.1 # via matplotlib pillow==10.4.0 # via matplotlib -pyparsing==3.1.2 +pyparsing==3.1.4 # via matplotlib python-dateutil==2.9.0.post0 # via # -r requirements.in # matplotlib -pytz==2024.1 +pytz==2024.2 # via -r requirements.in requests==2.32.3 # via -r requirements.in six==1.16.0 # via python-dateutil -urllib3==2.2.2 +sqlparse==0.5.1 + # via django +urllib3==2.2.3 # via requests -werkzeug==3.0.3 +werkzeug==3.0.4 # via # -r requirements.in # flask -xmlschema==3.3.2 +xmlschema==3.4.2 # via -r requirements.in diff --git a/requirements_dev.txt b/requirements_dev.txt index 1149eacc31..53760817b8 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -4,11 +4,15 @@ # # pip-compile requirements_dev.in # +asgiref==3.8.1 + # via + # -r requirements.txt + # django blinker==1.8.2 # via # -r requirements.txt # flask -certifi==2024.7.4 +certifi==2024.8.30 # via # -r requirements.txt # requests @@ -20,7 +24,7 @@ click==8.1.7 # via # -r requirements.txt # flask -contourpy==1.2.1 +contourpy==1.3.0 # via # -r requirements.txt # matplotlib @@ -34,9 +38,11 @@ cycler==0.12.1 # via # -r requirements.txt # matplotlib +django==5.1.1 + # via -r requirements.txt docopt==0.6.2 # via coveralls -elementpath==4.4.0 +elementpath==4.5.0 # via # -r requirements.txt # xmlschema @@ -46,13 +52,13 @@ flask==3.0.3 # via # -r requirements.txt # frozen-flask -fonttools==4.53.1 +fonttools==4.54.1 # via # -r requirements.txt # matplotlib frozen-flask==1.0.2 # via -r requirements.txt -idna==3.7 +idna==3.10 # via # -r requirements.txt # requests @@ -66,7 +72,7 @@ jinja2==3.1.4 # via # -r requirements.txt # flask -kiwisolver==1.4.5 +kiwisolver==1.4.7 # via # -r requirements.txt # matplotlib @@ -81,7 +87,7 @@ matplotlib==3.9.2 # via -r requirements.txt mccabe==0.7.0 # via flake8 -numpy==2.0.1 +numpy==2.1.1 # via # -r requirements.txt # contourpy @@ -101,11 +107,11 @@ pycodestyle==2.12.1 # via flake8 pyflakes==3.2.0 # via flake8 -pyparsing==3.1.2 +pyparsing==3.1.4 # via # -r requirements.txt # matplotlib -pytest==8.3.2 +pytest==8.3.3 # via # -r requirements_dev.in # pytest-cov @@ -115,7 +121,7 @@ python-dateutil==2.9.0.post0 # via # -r requirements.txt # matplotlib -pytz==2024.1 +pytz==2024.2 # via -r requirements.txt requests==2.32.3 # via @@ -125,13 +131,17 @@ six==1.16.0 # via # -r requirements.txt # python-dateutil -urllib3==2.2.2 +sqlparse==0.5.1 + # via + # -r requirements.txt + # django +urllib3==2.2.3 # via # -r requirements.txt # requests -werkzeug==3.0.3 +werkzeug==3.0.4 # via # -r requirements.txt # flask -xmlschema==3.3.2 +xmlschema==3.4.2 # via -r requirements.txt From a88f8a2b2be09d0ab3c8753da7de2e341ccf0051 Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Wed, 25 Sep 2024 15:00:53 +0100 Subject: [PATCH 319/375] refactor: Added basic Django project structure Added basic Django project structure ready to be customised for the Dashboard. Doesn't really do anything right now but is ready to flesh out with Dashboard views. --- src/dashboard/__init__.py | 0 src/dashboard/asgi.py | 16 +++++ src/dashboard/settings.py | 123 ++++++++++++++++++++++++++++++++++++++ src/dashboard/urls.py | 22 +++++++ src/dashboard/wsgi.py | 16 +++++ src/manage.py | 22 +++++++ 6 files changed, 199 insertions(+) create mode 100644 src/dashboard/__init__.py create mode 100644 src/dashboard/asgi.py create mode 100644 src/dashboard/settings.py create mode 100644 src/dashboard/urls.py create mode 100644 src/dashboard/wsgi.py create mode 100755 src/manage.py diff --git a/src/dashboard/__init__.py b/src/dashboard/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/dashboard/asgi.py b/src/dashboard/asgi.py new file mode 100644 index 0000000000..bcbc3073e9 --- /dev/null +++ b/src/dashboard/asgi.py @@ -0,0 +1,16 @@ +""" +ASGI config for IATI Dashboard project. + +It exposes the ASGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/5.1/howto/deployment/asgi/ +""" + +import os + +from django.core.asgi import get_asgi_application + +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'dashboard.settings') + +application = get_asgi_application() diff --git a/src/dashboard/settings.py b/src/dashboard/settings.py new file mode 100644 index 0000000000..b0a7eabd6e --- /dev/null +++ b/src/dashboard/settings.py @@ -0,0 +1,123 @@ +""" +Django settings for IATI Dashboard project. + +Generated by 'django-admin startproject' using Django 5.1.1. + +For more information on this file, see +https://docs.djangoproject.com/en/5.1/topics/settings/ + +For the full list of settings and their values, see +https://docs.djangoproject.com/en/5.1/ref/settings/ +""" + +from pathlib import Path + +# Build paths inside the project like this: BASE_DIR / 'subdir'. +BASE_DIR = Path(__file__).resolve().parent.parent + + +# Quick-start development settings - unsuitable for production +# See https://docs.djangoproject.com/en/5.1/howto/deployment/checklist/ + +# SECURITY WARNING: keep the secret key used in production secret! +SECRET_KEY = 'django-insecure-4i8e#n)gr2@wol5i4y@od0za_&y+idpz*-t5v)#-6s(&rhl=k&' + +# SECURITY WARNING: don't run with debug turned on in production! +DEBUG = True + +ALLOWED_HOSTS = [] + + +# Application definition + +INSTALLED_APPS = [ + 'django.contrib.admin', + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + 'django.contrib.messages', + 'django.contrib.staticfiles', +] + +MIDDLEWARE = [ + 'django.middleware.security.SecurityMiddleware', + 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.middleware.common.CommonMiddleware', + 'django.middleware.csrf.CsrfViewMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', + 'django.contrib.messages.middleware.MessageMiddleware', + 'django.middleware.clickjacking.XFrameOptionsMiddleware', +] + +ROOT_URLCONF = 'dashboard.urls' + +TEMPLATES = [ + { + 'BACKEND': 'django.template.backends.django.DjangoTemplates', + 'DIRS': [], + 'APP_DIRS': True, + 'OPTIONS': { + 'context_processors': [ + 'django.template.context_processors.debug', + 'django.template.context_processors.request', + 'django.contrib.auth.context_processors.auth', + 'django.contrib.messages.context_processors.messages', + ], + }, + }, +] + +WSGI_APPLICATION = 'dashboard.wsgi.application' + + +# Database +# https://docs.djangoproject.com/en/5.1/ref/settings/#databases + +DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.sqlite3', + 'NAME': BASE_DIR / 'db.sqlite3', + } +} + + +# Password validation +# https://docs.djangoproject.com/en/5.1/ref/settings/#auth-password-validators + +AUTH_PASSWORD_VALIDATORS = [ + { + 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', + }, +] + + +# Internationalization +# https://docs.djangoproject.com/en/5.1/topics/i18n/ + +LANGUAGE_CODE = 'en-us' + +TIME_ZONE = 'UTC' + +USE_I18N = True + +USE_TZ = True + + +# Static files (CSS, JavaScript, Images) +# https://docs.djangoproject.com/en/5.1/howto/static-files/ + +STATIC_URL = 'static/' + +# Default primary key field type +# https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field + +DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' diff --git a/src/dashboard/urls.py b/src/dashboard/urls.py new file mode 100644 index 0000000000..0e0fd5701c --- /dev/null +++ b/src/dashboard/urls.py @@ -0,0 +1,22 @@ +""" +URL configuration for IATI Dashboard project. + +The `urlpatterns` list routes URLs to views. For more information please see: + https://docs.djangoproject.com/en/5.1/topics/http/urls/ +Examples: +Function views + 1. Add an import: from my_app import views + 2. Add a URL to urlpatterns: path('', views.home, name='home') +Class-based views + 1. Add an import: from other_app.views import Home + 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') +Including another URLconf + 1. Import the include() function: from django.urls import include, path + 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) +""" +from django.contrib import admin +from django.urls import path + +urlpatterns = [ + path('admin/', admin.site.urls) +] diff --git a/src/dashboard/wsgi.py b/src/dashboard/wsgi.py new file mode 100644 index 0000000000..4d5897aeb4 --- /dev/null +++ b/src/dashboard/wsgi.py @@ -0,0 +1,16 @@ +""" +WSGI config for IATI Dashboard project. + +It exposes the WSGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/5.1/howto/deployment/wsgi/ +""" + +import os + +from django.core.wsgi import get_wsgi_application + +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'dashboard.settings') + +application = get_wsgi_application() diff --git a/src/manage.py b/src/manage.py new file mode 100755 index 0000000000..73d4f5b5fe --- /dev/null +++ b/src/manage.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +"""Django's command-line utility for administrative tasks.""" +import os +import sys + + +def main(): + """Run administrative tasks.""" + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'dashboard.settings') + try: + from django.core.management import execute_from_command_line + except ImportError as exc: + raise ImportError( + "Couldn't import Django. Are you sure it's installed and " + "available on your PYTHONPATH environment variable? Did you " + "forget to activate a virtual environment?" + ) from exc + execute_from_command_line(sys.argv) + + +if __name__ == '__main__': + main() From e3b003df67f663fcf047aa66e3e87d020ac53e4e Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Tue, 1 Oct 2024 16:40:06 +0100 Subject: [PATCH 320/375] feat: Refactored and added logging to CSV and image generators Renamed plots.py to make_plots.py to be consistent with make_csv.py. Lightly refactored both scripts and added logging and progress bars to aid in development work. Required a minor change in requirements. Also changed destination folders to suit new structure for static files. --- requirements.in | 3 +- requirements.txt | 2 + requirements_dev.txt | 2 + src/make_csv.py | 315 +++++++++++++++----------------- src/{plots.py => make_plots.py} | 141 ++++++++------ 5 files changed, 243 insertions(+), 220 deletions(-) rename src/{plots.py => make_plots.py} (56%) diff --git a/requirements.in b/requirements.in index 83a6919f86..523b2f0a4e 100644 --- a/requirements.in +++ b/requirements.in @@ -10,4 +10,5 @@ xmlschema lxml requests markupsafe -itsdangerous \ No newline at end of file +itsdangerous +tqdm \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index b1cb4aa5a2..afde658def 100644 --- a/requirements.txt +++ b/requirements.txt @@ -73,6 +73,8 @@ six==1.16.0 # via python-dateutil sqlparse==0.5.1 # via django +tqdm==4.66.5 + # via -r requirements.in urllib3==2.2.3 # via requests werkzeug==3.0.4 diff --git a/requirements_dev.txt b/requirements_dev.txt index 53760817b8..480e11b876 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -135,6 +135,8 @@ sqlparse==0.5.1 # via # -r requirements.txt # django +tqdm==4.66.5 + # via -r requirements.txt urllib3==2.2.3 # via # -r requirements.txt diff --git a/src/make_csv.py b/src/make_csv.py index 5d4410e09d..f34e2f9e3c 100644 --- a/src/make_csv.py +++ b/src/make_csv.py @@ -1,32 +1,25 @@ -# Script to generate CSV files from data in the 'stats-calculated' folder, -# and extra logic in other files in this repository +"""Generates CSV files from data in the 'stats-calculated' folder and using additional logic +""" import csv -import data +import os +import sys +import argparse +import logging -# Timeliness CSV files (frequency and timelag) +import data import timeliness - -# Forward-looking CSV file import forwardlooking - -# Comprehensiveness CSV files ('summary', 'core', 'financials' and 'valueadded') import comprehensiveness - -# # Coverage CSV file -# import coverage - -# Summary Stats CSV file import summary_stats - -# Humanitarian CSV file import humanitarian - import config -publisher_name = {publisher: publisher_json['result']['title'] for publisher, publisher_json in data.ckan_publishers.items()} + +logger = logging.getLogger(__name__) def publisher_dicts(): + publisher_name = {publisher: publisher_json['result']['title'] for publisher, publisher_json in data.ckan_publishers.items()} for publisher, activities in data.current_stats['inverted_publisher']['activities'].items(): if publisher not in data.ckan_publishers: continue @@ -48,154 +41,146 @@ def publisher_dicts(): } -with open(config.join_out_path('publishers.csv'), 'w') as fp: - writer = csv.DictWriter(fp, [ - 'Publisher Name', - 'Publisher Registry Id', - 'Activities', - 'Organisations', - 'Files', - 'Activity Files', - 'Organisation Files', - 'Total File Size', - 'Reporting Org on Registry', - 'Reporting Orgs in Data (count)', - 'Reporting Orgs in Data', - 'Hierarchies (count)', - 'Hierarchies', - ]) - writer.writeheader() - for d in publisher_dicts(): - writer.writerow(d) - -publishers = list(data.current_stats['inverted_publisher']['activities'].keys()) - -with open(config.join_out_path('elements.csv'), 'w') as fp: - writer = csv.DictWriter(fp, ['Element'] + publishers) - writer.writeheader() - for element, publisher_dict in data.current_stats['inverted_publisher']['elements'].items(): - publisher_dict['Element'] = element - writer.writerow(publisher_dict) - -with open(config.join_out_path('elements_total.csv'), 'w') as fp: - writer = csv.DictWriter(fp, ['Element'] + publishers) - writer.writeheader() - for element, publisher_dict in data.current_stats['inverted_publisher']['elements_total'].items(): - publisher_dict['Element'] = element - writer.writerow(publisher_dict) - -with open(config.join_out_path('registry.csv'), 'w') as fp: - keys = ['name', 'title', 'publisher_frequency', 'publisher_frequency_select', 'publisher_implementation_schedule', 'publisher_ui', 'publisher_field_exclusions', 'publisher_contact', 'image_url', 'display_name', 'publisher_iati_id', 'publisher_units', 'publisher_record_exclusions', 'publisher_data_quality', 'publisher_country', 'publisher_description', 'publisher_refs', 'publisher_thresholds' 'publisher_agencies', 'publisher_constraints', 'publisher_organization_type', 'publisher_segmentation', 'license_id', 'state', 'publisher_timeliness'] - writer = csv.DictWriter(fp, keys) - writer.writeheader() - for publisher_json in data.ckan_publishers.values(): - writer.writerow({x: publisher_json['result'].get(x) or 0 for x in keys}) - -previous_months = timeliness.previous_months_reversed - -with open(config.join_out_path('timeliness_frequency.csv'), 'w') as fp: - writer = csv.writer(fp) - writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + ['Frequency', 'First published']) - for publisher, publisher_title, per_month, assessment, hft, first_published_band in timeliness.publisher_frequency_sorted(): - writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment, first_published_band]) - -with open(config.join_out_path('timeliness_timelag.csv'), 'w') as fp: - writer = csv.writer(fp) - writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + ['Time lag']) - for publisher, publisher_title, per_month, assessment, hft in timeliness.publisher_timelag_sorted(): - writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment]) - -with open(config.join_out_path('forwardlooking.csv'), 'w') as fp: - writer = csv.writer(fp) - writer.writerow(['Publisher Name', 'Publisher Registry Id'] + ['{} ({})'.format(header, year) for header in forwardlooking.column_headers for year in forwardlooking.years]) - for row in forwardlooking.table(): - writer.writerow([row['publisher_title'], row['publisher']] + [year_column[year] for year_column in row['year_columns'] for year in forwardlooking.years]) - -for tab in comprehensiveness.columns.keys(): - with open(config.join_out_path('comprehensiveness_{}.csv'.format(tab)), 'w') as fp: +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--verbose", action="store_true", help="Output progress to stdout") + args = parser.parse_args() + + if args.verbose: + logger.setLevel(logging.INFO) + logger.addHandler(logging.StreamHandler(sys.stdout)) + + logger.info("Generating CSV files") + os.makedirs(config.join_out_path('data/csv'), exist_ok=True) + + logger.info("Generating publishers.csv") + with open(config.join_out_path('data/csv/publishers.csv'), 'w') as fp: + writer = csv.DictWriter(fp, [ + 'Publisher Name', + 'Publisher Registry Id', + 'Activities', + 'Organisations', + 'Files', + 'Activity Files', + 'Organisation Files', + 'Total File Size', + 'Reporting Org on Registry', + 'Reporting Orgs in Data (count)', + 'Reporting Orgs in Data', + 'Hierarchies (count)', + 'Hierarchies', + ]) + writer.writeheader() + for d in publisher_dicts(): + writer.writerow(d) + + logger.info("Generating elements.csv") + publishers = list(data.current_stats['inverted_publisher']['activities'].keys()) + with open(config.join_out_path('data/csv/elements.csv'), 'w') as fp: + writer = csv.DictWriter(fp, ['Element'] + publishers) + writer.writeheader() + for element, publisher_dict in data.current_stats['inverted_publisher']['elements'].items(): + publisher_dict['Element'] = element + writer.writerow(publisher_dict) + + logger.info("Generating elements_total.csv") + with open(config.join_out_path('data/csv/elements_total.csv'), 'w') as fp: + writer = csv.DictWriter(fp, ['Element'] + publishers) + writer.writeheader() + for element, publisher_dict in data.current_stats['inverted_publisher']['elements_total'].items(): + publisher_dict['Element'] = element + writer.writerow(publisher_dict) + + logger.info("Generating registry.csv") + with open(config.join_out_path('data/csv/registry.csv'), 'w') as fp: + keys = ['name', 'title', 'publisher_frequency', 'publisher_frequency_select', 'publisher_implementation_schedule', 'publisher_ui', 'publisher_field_exclusions', 'publisher_contact', 'image_url', 'display_name', 'publisher_iati_id', 'publisher_units', 'publisher_record_exclusions', 'publisher_data_quality', 'publisher_country', 'publisher_description', 'publisher_refs', 'publisher_thresholds' 'publisher_agencies', 'publisher_constraints', 'publisher_organization_type', 'publisher_segmentation', 'license_id', 'state', 'publisher_timeliness'] + writer = csv.DictWriter(fp, keys) + writer.writeheader() + for publisher_json in data.ckan_publishers.values(): + writer.writerow({x: publisher_json['result'].get(x) or 0 for x in keys}) + + logger.info("Generating timeliness_frequency.csv") + previous_months = timeliness.previous_months_reversed + with open(config.join_out_path('data/csv/timeliness_frequency.csv'), 'w') as fp: + writer = csv.writer(fp) + writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + ['Frequency', 'First published']) + for publisher, publisher_title, per_month, assessment, hft, first_published_band in timeliness.publisher_frequency_sorted(): + writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment, first_published_band]) + + logger.info("Generating timeliness_timelag.csv") + with open(config.join_out_path('data/csv/timeliness_timelag.csv'), 'w') as fp: + writer = csv.writer(fp) + writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + ['Time lag']) + for publisher, publisher_title, per_month, assessment, hft in timeliness.publisher_timelag_sorted(): + writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment]) + + logger.info("Generating forwardlooking.csv") + with open(config.join_out_path('data/csv/forwardlooking.csv'), 'w') as fp: + writer = csv.writer(fp) + writer.writerow(['Publisher Name', 'Publisher Registry Id'] + ['{} ({})'.format(header, year) for header in forwardlooking.column_headers for year in forwardlooking.years]) + for row in forwardlooking.table(): + writer.writerow([row['publisher_title'], row['publisher']] + [year_column[year] for year_column in row['year_columns'] for year in forwardlooking.years]) + + for tab in comprehensiveness.columns.keys(): + logger.info("Generating comprehensiveness_{}.csv".format(tab)) + with open(config.join_out_path('data/csv/comprehensiveness_{}.csv'.format(tab)), 'w') as fp: + writer = csv.writer(fp) + if tab == 'financials': + writer.writerow(['Publisher Name', 'Publisher Registry Id'] + + [x + ' (with valid data)' for x in comprehensiveness.column_headers[tab]] + + [x + ' (with any data)' for x in comprehensiveness.column_headers[tab]] + + ['Using budget-not-provided']) + for row in comprehensiveness.table(): + writer.writerow([row['publisher_title'], row['publisher']] + + [row[slug + '_valid'] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab]] + + [row[slug] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab]] + + ['Yes' if row['flag'] else '-']) + else: + writer.writerow(['Publisher Name', 'Publisher Registry Id'] + + [x + ' (with valid data)' for x in comprehensiveness.column_headers[tab]] + + [x + ' (with any data)' for x in comprehensiveness.column_headers[tab]]) + for row in comprehensiveness.table(): + writer.writerow([row['publisher_title'], row['publisher']] + + [row[slug + '_valid'] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab]] + + [row[slug] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab]]) + + logger.info("Generating summary_stats.csv") + with open(config.join_out_path('data/csv/summary_stats.csv'), 'w') as fp: + writer = csv.writer(fp) + # Add column headers + writer.writerow(['Publisher Name', 'Publisher Registry Id'] + [header for slug, header in summary_stats.columns]) + for row in summary_stats.table(): + # Write each row + writer.writerow([row['publisher_title'], row['publisher']] + [row[slug] for slug, header in summary_stats.columns]) + + logger.info("Generating humanitarian.csv") + with open(config.join_out_path('data/csv/humanitarian.csv'), 'w') as fp: writer = csv.writer(fp) - if tab == 'financials': - writer.writerow(['Publisher Name', 'Publisher Registry Id'] + - [x + ' (with valid data)' for x in comprehensiveness.column_headers[tab]] + - [x + ' (with any data)' for x in comprehensiveness.column_headers[tab]] + - ['Using budget-not-provided']) - for row in comprehensiveness.table(): - writer.writerow([row['publisher_title'], row['publisher']] + - [row[slug + '_valid'] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab]] + - [row[slug] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab]] + - ['Yes' if row['flag'] else '-']) - else: - writer.writerow(['Publisher Name', 'Publisher Registry Id'] + - [x + ' (with valid data)' for x in comprehensiveness.column_headers[tab]] + - [x + ' (with any data)' for x in comprehensiveness.column_headers[tab]]) - for row in comprehensiveness.table(): - writer.writerow([row['publisher_title'], row['publisher']] + - [row[slug + '_valid'] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab]] + - [row[slug] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab]]) - -# with open(os.path.join('out', 'coverage.csv'), 'w') as fp: -# writer = csv.writer(fp) -# # Add column headers -# writer.writerow([ -# 'Publisher Name', -# 'Publisher Registry Id', -# '2014 IATI Spend (US $m)', -# '2015 IATI Spend (US $m)', -# '2014 Reference Spend (US $m)', -# '2015 Reference Spend (US $m)', -# '2015 Official Forecast (US $m)', -# 'Spend Ratio (%)', -# 'No reference data available (Historic publishers)', -# 'No reference data available (New publishers)', -# 'Data quality issue reported' -# ]) -# for row in coverage.table(): -# # Write each row -# writer.writerow([ -# row['publisher_title'], -# row['publisher'], -# row['iati_spend_2014'], -# row['iati_spend_2015'], -# row['reference_spend_2014'], -# row['reference_spend_2015'], -# row['official_forecast_2015'], -# row['spend_ratio'], -# row['no_data_flag_red'], -# row['no_data_flag_amber'], -# row['spend_data_error_reported_flag'] -# ]) - -with open(config.join_out_path('summary_stats.csv'), 'w') as fp: - writer = csv.writer(fp) - # Add column headers - writer.writerow(['Publisher Name', 'Publisher Registry Id'] + [header for slug, header in summary_stats.columns]) - for row in summary_stats.table(): - # Write each row - writer.writerow([row['publisher_title'], row['publisher']] + [row[slug] for slug, header in summary_stats.columns]) - -with open(config.join_out_path('humanitarian.csv'), 'w') as fp: - writer = csv.writer(fp) - # Add column headers - writer.writerow([ - 'Publisher Name', - 'Publisher Registry Id', - 'Publisher Type', - 'Number of Activities', - 'Publishing Humanitarian', - 'Using Humanitarian Attribute', - 'Appeal or Emergency Details', - 'Clusters', - 'Humanitarian Score' - ]) - for row in humanitarian.table(): + # Add column headers writer.writerow([ - row['publisher_title'], - row['publisher'], - row['publisher_type'], - row['num_activities'], - row['publishing_humanitarian'], - row['humanitarian_attrib'], - row['appeal_emergency'], - row['clusters'], - row['average'] + 'Publisher Name', + 'Publisher Registry Id', + 'Publisher Type', + 'Number of Activities', + 'Publishing Humanitarian', + 'Using Humanitarian Attribute', + 'Appeal or Emergency Details', + 'Clusters', + 'Humanitarian Score' ]) + for row in humanitarian.table(): + writer.writerow([ + row['publisher_title'], + row['publisher'], + row['publisher_type'], + row['num_activities'], + row['publishing_humanitarian'], + row['humanitarian_attrib'], + row['appeal_emergency'], + row['clusters'], + row['average'] + ]) + + +if __name__ == "__main__": + main() diff --git a/src/plots.py b/src/make_plots.py similarity index 56% rename from src/plots.py rename to src/make_plots.py index a2b8ad3a18..70d2665974 100644 --- a/src/plots.py +++ b/src/make_plots.py @@ -2,11 +2,15 @@ """ Generates static images of stats graphs using matplotlib. """ +import logging import datetime -import numpy as np # noqa: F401 -from collections import defaultdict +import argparse import os # noqa: F401 +from collections import defaultdict import csv + +import numpy as np # noqa: F401 +from tqdm import tqdm import common import data import config @@ -17,35 +21,37 @@ import matplotlib.dates as mdates # noqa: E402 -# Import failed_downloads as a global -failed_downloads = csv.reader(open(config.join_data_path('downloads/history.csv'))) - -gitaggregate_publisher = data.JSONDir(config.join_stats_path('gitaggregate-publisher-dated')) +logger = logging.getLogger(__name__) class AugmentedJSONDir(data.JSONDir): + def __init__(self, folder, failed_downloads, gitaggregate_publisher): + super().__init__(folder) + self.failed_downloads = failed_downloads + self.gitaggregate_publisher = gitaggregate_publisher + def __getitem__(self, key): if key == 'failed_downloads': - return dict((row[0], row[1]) for row in failed_downloads) + return dict((row[0], row[1]) for row in self.failed_downloads) elif key == 'publisher_types': out = defaultdict(lambda: defaultdict(int)) - for publisher, publisher_data in gitaggregate_publisher.items(): + for publisher, publisher_data in self.gitaggregate_publisher.items(): if publisher in data.ckan_publishers: organization_type = common.get_publisher_type(publisher)['name'] for datestring, count in publisher_data['activities'].items(): out[datestring][organization_type] += 1 else: - print('Publisher not matched:', publisher) + logger.debug("Getting by publisher_type unmatched publisher <{}>".format(publisher)) return out elif key == 'activities_per_publisher_type': out = defaultdict(lambda: defaultdict(int)) - for publisher, publisher_data in gitaggregate_publisher.items(): + for publisher, publisher_data in self.gitaggregate_publisher.items(): if publisher in data.ckan_publishers: organization_type = common.get_publisher_type(publisher)['name'] for datestring, count in publisher_data['activities'].items(): out[datestring][organization_type] += count else: - print('Publisher not matched:', publisher) + logger.debug("Getting by activities_per_publisher_type unmatched publisher <{}>".format(publisher)) return out else: return super(AugmentedJSONDir, self).__getitem__(key) @@ -136,53 +142,80 @@ def make_plot(stat_path, git_stats, img_prefix=''): del writer -# Load aggregated stats for all data -print("All data") -git_stats = AugmentedJSONDir(config.join_stats_path('gitaggregate-dated')) - -for stat_path in [ - 'activities', - 'publishers', - 'activity_files', - 'organisation_files', - 'file_size', - 'failed_downloads', - 'invalidxml', - 'nonstandardroots', - 'unique_identifiers', - ('validation', lambda x: x == 'fail', ''), - ('publishers_validation', lambda x: x == 'fail', ''), - ('publisher_has_org_file', lambda x: x == 'no', ''), - ('versions', lambda x: x in expected_versions, '_expected'), - ('versions', lambda x: x not in expected_versions, '_other'), - ('publishers_per_version', lambda x: x in expected_versions, '_expected'), - ('publishers_per_version', lambda x: x not in expected_versions, '_other'), - ('file_size_bins', lambda x: True, ''), - ('publisher_types', lambda x: True, ''), - ('activities_per_publisher_type', lambda x: True, '') -]: - make_plot(stat_path, git_stats) - - -# Delete git_stats variable to save memory -del git_stats - -try: - os.makedirs(config.join_out_path('publisher_imgs')) -except OSError: - pass - -git_stats_publishers = AugmentedJSONDir(config.join_stats_path('gitaggregate-publisher-dated/')) -for publisher, git_stats_publisher in git_stats_publishers.items(): - for stat_path in [ +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--verbose", action="store_true", help="Generate images verbosely to stdout") + args = parser.parse_args() + + # Load data required for loading stats. + failed_downloads = csv.reader(open(config.join_data_path('downloads/history.csv'))) + gitaggregate_publisher = data.JSONDir(config.join_stats_path('gitaggregate-publisher-dated')) + + # Generate plots for aggregated stats for all data. + logger.info("Generating plots for all aggregated data") + git_stats = AugmentedJSONDir(config.join_stats_path('gitaggregate-dated'), + failed_downloads, + gitaggregate_publisher) + os.makedirs(config.join_out_path('img/aggregate'), exist_ok=True) + + _paths = [ 'activities', + 'publishers', 'activity_files', 'organisation_files', 'file_size', + 'failed_downloads', 'invalidxml', 'nonstandardroots', - 'publisher_unique_identifiers', + 'unique_identifiers', ('validation', lambda x: x == 'fail', ''), - ('versions', lambda x: True, ''), - ]: - make_plot(stat_path, git_stats_publisher, 'publisher_imgs/{0}_'.format(publisher)) + ('publishers_validation', lambda x: x == 'fail', ''), + ('publisher_has_org_file', lambda x: x == 'no', ''), + ('versions', lambda x: x in expected_versions, '_expected'), + ('versions', lambda x: x not in expected_versions, '_other'), + ('publishers_per_version', lambda x: x in expected_versions, '_expected'), + ('publishers_per_version', lambda x: x not in expected_versions, '_other'), + ('file_size_bins', lambda x: True, ''), + ('publisher_types', lambda x: True, ''), + ('activities_per_publisher_type', lambda x: True, '') + ] + with tqdm(total=len(_paths)) as pbar: + if args.verbose: + pbar.set_description("Generate aggregate plots") + for stat_path in _paths: + if args.verbose: + pbar.update() + make_plot(stat_path, git_stats, img_prefix='img/aggregate/') + + # Delete git_stats variable to save memory + del git_stats + + # Generate plots for each publisher. + logger.info("Generating plots for all publishers") + git_stats_publishers = AugmentedJSONDir(config.join_stats_path('gitaggregate-publisher-dated/'), + failed_downloads, + gitaggregate_publisher) + os.makedirs(config.join_out_path('img/publishers'), exist_ok=True) + + with tqdm(total=len(git_stats_publishers)) as pbar: + if args.verbose: + pbar.set_description("Generate plots for all publishers") + for publisher, git_stats_publisher in git_stats_publishers.items(): + if args.verbose: + pbar.update() + for stat_path in [ + 'activities', + 'activity_files', + 'organisation_files', + 'file_size', + 'invalidxml', + 'nonstandardroots', + 'publisher_unique_identifiers', + ('validation', lambda x: x == 'fail', ''), + ('versions', lambda x: True, ''), + ]: + make_plot(stat_path, git_stats_publisher, img_prefix='img/publishers/{0}_'.format(publisher)) + + +if __name__ == "__main__": + main() From db2e923481119556915fc3726dbc10cf40a12d82 Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Mon, 21 Oct 2024 13:45:13 +0100 Subject: [PATCH 321/375] refactor: Moved function used in templates into separate file. Created file for functions that are used in templates, and which aren't filters or tags, into a separate file. This commit moves firstint() from make_html.py. --- src/dashboard/template_funcs.py | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 src/dashboard/template_funcs.py diff --git a/src/dashboard/template_funcs.py b/src/dashboard/template_funcs.py new file mode 100644 index 0000000000..a01107e49f --- /dev/null +++ b/src/dashboard/template_funcs.py @@ -0,0 +1,8 @@ +import re + + +def firstint(s): + if s[0].startswith('<'): + return 0 + m = re.search(r'\d+', s[0]) + return int(m.group(0)) From 8c9fd6bec069cb2e17e2f2fcd1e98070cc2b110a Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Wed, 2 Oct 2024 18:29:59 +0100 Subject: [PATCH 322/375] refactor: Created views to render the top level pages. This commit implements a URL structure for the Dashboard and includes views for the top level navigation of the Dashboard as well as the main pages in the Headlines section. This required changes in the templates to remove Flask-specific template functions and minor syntax changes that prevented rendering. The views themselves will eventually benefit from some optimisation. Note, this commit is a breaking change for make_html.py and now the repo will not generate a Flask website. --- src/dashboard/jinja2.py | 17 ++ src/dashboard/settings.py | 15 ++ src/dashboard/urls.py | 58 +++++- src/dashboard/views.py | 174 ++++++++++++++++++ src/templates/_partials/boxes.html | 10 +- .../_partials/tablesorter_instructions.html | 2 +- src/templates/activities.html | 8 +- src/templates/base.html | 14 +- src/templates/files.html | 12 +- src/templates/index.html | 28 +-- src/templates/publishers.html | 14 +- src/templates/section_index.html | 6 +- 12 files changed, 313 insertions(+), 45 deletions(-) create mode 100644 src/dashboard/jinja2.py create mode 100644 src/dashboard/views.py diff --git a/src/dashboard/jinja2.py b/src/dashboard/jinja2.py new file mode 100644 index 0000000000..c4386c6c50 --- /dev/null +++ b/src/dashboard/jinja2.py @@ -0,0 +1,17 @@ +"""Jinja2 template configuration +""" +from django.templatetags.static import static +from django.urls import reverse + +from jinja2 import Environment + + +def environment(**options): + env = Environment(**options) + env.globals.update( + { + "static": static, + "url": reverse, + } + ) + return env diff --git a/src/dashboard/settings.py b/src/dashboard/settings.py index b0a7eabd6e..e30e28c5b6 100644 --- a/src/dashboard/settings.py +++ b/src/dashboard/settings.py @@ -65,6 +65,20 @@ ], }, }, + { + 'BACKEND': 'django.template.backends.jinja2.Jinja2', + 'DIRS': ["templates/"], + 'APP_DIRS': True, + 'OPTIONS': { + 'context_processors': [ + 'django.template.context_processors.debug', + 'django.template.context_processors.request', + 'django.contrib.auth.context_processors.auth', + 'django.contrib.messages.context_processors.messages', + ], + 'environment': 'dashboard.jinja2.environment' + }, + }, ] WSGI_APPLICATION = 'dashboard.wsgi.application' @@ -116,6 +130,7 @@ # https://docs.djangoproject.com/en/5.1/howto/static-files/ STATIC_URL = 'static/' +STATICFILES_DIRS = [BASE_DIR / 'static',] # Default primary key field type # https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field diff --git a/src/dashboard/urls.py b/src/dashboard/urls.py index 0e0fd5701c..a2ed4ab125 100644 --- a/src/dashboard/urls.py +++ b/src/dashboard/urls.py @@ -16,7 +16,63 @@ """ from django.contrib import admin from django.urls import path +# from django.shortcuts import redirect + +import dashboard.views + urlpatterns = [ - path('admin/', admin.site.urls) + path('admin/', admin.site.urls), + + # Top level dashboard pages. + path('', dashboard.views.index, name="dash-index"), + path('headlines', dashboard.views.headlines, name="dash-headlines"), + path('data-quality', dashboard.views.data_quality, name="dash-dataquality"), + path('publishing-statistics', dashboard.views.publishing_stats, name="dash-publishingstats"), + path('exploring-data', dashboard.views.exploring_data, name="dash-exploringdata"), + path('faq', dashboard.views.faq, name="dash-faq"), + + # Headlines pages and detail pages - placeholders for now. + path('headlines/publishers', dashboard.views.headlines_publishers, name="dash-headlines-publishers"), + path('headlines/files', dashboard.views.headlines_files, name="dash-headlines-files"), + path('headlines/activities', dashboard.views.headlines_activities, name="dash-headlines-activities"), + path('headlines/publishers/', dashboard.views.headlines_publisher_detail, name='dash-headlines-publisher-detail'), + + # Data quality pages. + path('data-quality/download-errors', lambda x: None, name="dash-dataquality-download"), + path('data-quality/xml-errors', lambda x: None, name="dash-dataquality-xml"), + path('data-quality/validation', lambda x: None, name="dash-dataquality-validation"), + path('data-quality/versions', lambda x: None, name="dash-dataquality-versions"), + path('data-quality/organisation-xml', lambda x: None, name="dash-dataquality-organisation"), + path('data-quality/licenses', lambda x: None, name="dash-dataquality-licenses"), + path('data-quality/identifiers', lambda x: None, name="dash-dataquality-identifiers"), + path('data-quality/reporting-orgs', lambda x: None, name="dash-dataquality-reportingorgs"), + + # Exploring data pages. + path('exploring-data/elements', lambda x: None, name="dash-exploringdata-elements"), + path('exploring-data/codelists', lambda x: None, name="dash-exploringdata-codelists"), + path('exploring-data/booleans', lambda x: None, name="dash-exploringdata-booleans"), + path('exploring-data/dates', lambda x: None, name="dash-exploringdata-dates"), + path('exploring-data/traceability', lambda x: None, name="dash-exploringdata-traceability"), + path('exploring-data/organisation-identifiers', lambda x: None, name="dash-exploringdata-orgids"), + + # Publishing statistics pages. + path('publishing-statistics/timeliness', lambda x: None, name="dash-publishingstats-timeliness"), + path('publishing-statistics/forward-looking', lambda x: None, name="dash-publishingstats-forwardlooking"), + path('publishing-statistics/comprehensiveness', lambda x: None, name="dash-publishingstats-comprehensiveness"), + path('publishing-statistics/coverage', lambda x: None, name="dash-publishingstats-coverage"), + path('publishing-statistics/summary-statistics', lambda x: None, name="dash-publishingstats-summarystats"), + path('publishing-statistics/humanitarian-reporting', lambda x: None, name="dash-publishingstats-humanitarian"), + + # Licenses + path('licenses/', lambda x: None, name="dash-licence-detail"), + path('licenses', lambda x: None, name="dash-licences") + + # Redirects to support any users with bookmarks to pages on the old Dashboard. + # path('timeliness.html', redirect("dash-publishingstats-timeliness")), + # path('index.html', redirect("dash-index")), + # path('summary_stats.html', redirect("dash-publishingstats-summarystats")), + # path('exploring_data.html', redirect("dash-exploringdata")) + ] +# Unsure where "rulesets" and "registration_agencies" should belong - can't find the route to these in make_html.py diff --git a/src/dashboard/views.py b/src/dashboard/views.py new file mode 100644 index 0000000000..808005a916 --- /dev/null +++ b/src/dashboard/views.py @@ -0,0 +1,174 @@ +"""Views for the IATI Dashboard""" + +# Note: in the page views I am unsure where "rulesets" and "registration_agencies" should +# belong - they exist in text.page_tiles but I can't find the route to these in make_html.py +# so not sure where they should fit. I've not included them in the page_view_names so hopefully +# an exception will be raised if they are referenced somewhere. + +import dateutil.parser +import subprocess + +from django.http import HttpResponse +from django.template import loader + +import config +import text +import dashboard.template_funcs + +from data import ( + ckan, + ckan_publishers, + codelist_mapping, + codelist_sets, + codelist_lookup, + current_stats, + dataset_to_publisher_dict, + github_issues, + get_publisher_stats, + MAJOR_VERSIONS, + metadata, + publisher_name, + publishers_ordered_by_title, + is_valid_element, + slugs) + + +COMMIT_HASH = subprocess.run('git show --format=%H --no-patch'.split(), + cwd=config.join_base_path(""), + capture_output=True).stdout.decode().strip() +STATS_COMMIT_HASH = subprocess.run('git -C stats-calculated show --format=%H --no-patch'.split(), + cwd=config.join_base_path(""), + capture_output=True).stdout.decode().strip() +STATS_GH_URL = 'https://github.com/codeforIATI/IATI-Stats-public/tree/' + STATS_COMMIT_HASH + + +def _make_context(page_name: str): + """Make a basic context dictionary for a given page + """ + context = dict( + page=page_name, + top_titles=text.top_titles, + page_titles=text.page_titles, + short_page_titles=text.short_page_titles, + page_leads=text.page_leads, + page_sub_leads=text.page_sub_leads, + top_navigation=text.top_navigation, + navigation=text.navigation, + navigation_reverse={page: k for k, pages in text.navigation.items() for page in pages}, + page_view_names={"index": "dash-index", + "headlines": "dash-headlines", + "data_quality": "dash-dataquality", + "publishing_stats": "dash-publishingstats", + "exploring_data": "dash-exploringdata", + "faq": "dash-faq", + + "publishers": "dash-headlines-publishers", + "files": "dash-headlines-files", + "activities": "dash-headlines-activities", + "publisher": "dash-headlines-publisher-detail", + + "download": "dash-dataquality-download", + "xml": "dash-dataquality-xml", + "validation": "dash-dataquality-validation", + "versions": "dash-dataquality-versions", + "organisation": "dash-dataquality-organisation", + "licenses": "dash-dataquality-licenses", + "identifiers": "dash-dataquality-identifiers", + "reporting_orgs": "dash-dataquality-reportingorgs", + + "elements": "dash-exploringdata-elements", + "codelists": "dash-exploringdata-codelists", + "booleans": "dash-exploringdata-booleans", + "dates": "dash-exploringdata-dates", + "traceability": "dash-exploringdata-traceability", + "org_ids": "dash-exploringdata-orgids", + + "timeliness": "dash-publishingstats-timeliness", + "forwardlooking": "dash-publishingstats-forwardlooking", + "comprehensiveness": "dash-publishingstats-comprehensiveness", + "coverage": "dash-publishingstats-coverage", + "summary_stats": "dash-publishingstats-summarystats", + "humanitarian": "dash-publishingstats-humanitarian" + }, + current_stats=current_stats, + publisher_name=publisher_name, + publishers_ordered_by_title=publishers_ordered_by_title, + ckan_publishers=ckan_publishers, + ckan=ckan, + codelist_lookup=codelist_lookup, + codelist_mapping=codelist_mapping, + codelist_sets=codelist_sets, + github_issues=github_issues, + MAJOR_VERSIONS=MAJOR_VERSIONS, + metadata=metadata, + slugs=slugs, + datetime_data=dateutil.parser.parse(metadata['created_at']).strftime('%-d %B %Y (at %H:%M %Z)'), + stats_url='https://stats.codeforiati.org', + stats_gh_url=STATS_GH_URL, + commit_hash=COMMIT_HASH, + stats_commit_hash=STATS_COMMIT_HASH, + func={"sorted": sorted, + "firstint": dashboard.template_funcs.firstint, + "dataset_to_publisher": lambda x: dataset_to_publisher_dict.get(x, ""), + "get_publisher_stats": get_publisher_stats, + "is_valid_element": is_valid_element} + ) + context["navigation_reverse"].update({k: k for k in text.navigation}) + + return context + + +# +# Top level navigation pages. +# +def index(request): + template = loader.get_template("index.html") + return HttpResponse(template.render(_make_context("index"), request)) + + +def headlines(request): + template = loader.get_template("headlines.html") + return HttpResponse(template.render(_make_context("headlines"), request)) + + +def data_quality(request): + template = loader.get_template("data_quality.html") + return HttpResponse(template.render(_make_context("data_quality"), request)) + + +def publishing_stats(request): + template = loader.get_template("publishing_stats.html") + return HttpResponse(template.render(_make_context("publishing_stats"), request)) + + +def exploring_data(request): + template = loader.get_template("exploring_data.html") + return HttpResponse(template.render(_make_context("exploring_data"), request)) + + +def faq(request): + template = loader.get_template("faq.html") + return HttpResponse(template.render(_make_context("faq"), request)) + + +# +# Headline pages. +# +def headlines_publishers(request): + template = loader.get_template("publishers.html") + return HttpResponse(template.render(_make_context("publishers"), request)) + + +def headlines_activities(request): + template = loader.get_template("activities.html") + return HttpResponse(template.render(_make_context("activities"), request)) + + +def headlines_files(request): + template = loader.get_template("files.html") + return HttpResponse(template.render(_make_context("files"), request)) + + +def headlines_publisher_detail(request, publisher=None): + # Not implemented yet. + return None diff --git a/src/templates/_partials/boxes.html b/src/templates/_partials/boxes.html index 9d300568c9..6cdf713567 100644 --- a/src/templates/_partials/boxes.html +++ b/src/templates/_partials/boxes.html @@ -8,15 +8,19 @@

      {{ title }}

      {% if json %} - (J) + {% if folderextra %} + (J) + {% else %} + (J) + {% endif %} {% endif %}

      {{ description|safe }}

      - + {% if legend %} - + {% endif %}
      diff --git a/src/templates/_partials/tablesorter_instructions.html b/src/templates/_partials/tablesorter_instructions.html index 348d0f3f39..aacea903f5 100644 --- a/src/templates/_partials/tablesorter_instructions.html +++ b/src/templates/_partials/tablesorter_instructions.html @@ -1 +1 @@ -

      Click the icons to sort the table by a column. Selecting further columns whilst holding the shift key will enable secondary (tertiary etc) sorting by the desired column/s.

      \ No newline at end of file +

      Click the icons to sort the table by a column. Selecting further columns whilst holding the shift key will enable secondary (tertiary etc) sorting by the desired column/s.

      \ No newline at end of file diff --git a/src/templates/activities.html b/src/templates/activities.html index 55c19c10a0..b462c6104c 100644 --- a/src/templates/activities.html +++ b/src/templates/activities.html @@ -1,14 +1,14 @@ {% extends 'base.html' %} -{% import '_partials/boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block content %}
      - {{ boxes.box('Total activities', current_stats.aggregated.activities, 'activities.png', 'activities.json', + {{ boxes.box('Total activities', current_stats.aggregated.activities, 'img/aggregate/activities.png', 'activities.json', description='Total count of activities across all publishers, over time. Note: this includes activities with duplicate iati-identifier') }} - {{ boxes.box('Unique Activities', current_stats.aggregated.unique_identifiers, 'unique_identifiers.png', 'unique_identifiers.json', + {{ boxes.box('Unique Activities', current_stats.aggregated.unique_identifiers, 'img/aggregate/unique_identifiers.png', 'unique_identifiers.json', description='Total count of unique activities across all publishers, over time Note: this excludes counts of duplicate iati-identifier') }} - {{ boxes.box('Activities by publisher type', '', 'activities_per_publisher_type.png', None, 'activities_per_publisher_type_legend.png', + {{ boxes.box('Activities by publisher type', '', 'img/aggregate/activities_per_publisher_type.png', None, 'img/aggregate/activities_per_publisher_type_legend.png', description='Count of all activities, aggregated by publisher type, over time.') }}
      {% endblock %} diff --git a/src/templates/base.html b/src/templates/base.html index 3bf3405ddf..6cee6884d8 100644 --- a/src/templates/base.html +++ b/src/templates/base.html @@ -5,8 +5,8 @@ - - + + IATI Dashboard – {% block title %}{{ page_titles[page] }}{% endblock %} @@ -30,16 +30,16 @@
      @@ -77,11 +77,11 @@

      {{ page_titles[page] }}

      Report bugs, and request features using Github issues.
      - Generated on {{ datetime_generated() }} from data downloaded on {{ datetime_data }}.
      + Generated from data downloaded on {{ datetime_data }}.
      (NB This is the time the download task started. Any changes made after this time may not be reflected).
      - For details on how often these updates are applied, see the IATI Dashboard FAQ. + For details on how often these updates are applied, see the IATI Dashboard FAQ. Privacy Policy

      diff --git a/src/templates/files.html b/src/templates/files.html index e3a1e05cfa..a47063f6a7 100644 --- a/src/templates/files.html +++ b/src/templates/files.html @@ -1,14 +1,14 @@ {% extends 'base.html' %} -{% import '_partials/boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block content %}
      - {{ boxes.box('Total activity files', current_stats.aggregated.activity_files, 'activity_files.png', 'activity_files.json', + {{ boxes.box('Total activity files', current_stats.aggregated.activity_files, 'img/aggregate/activity_files.png', 'activity_files.json', description='Count of total number of activity files over time.') }} - {{ boxes.box('Total organisation files', current_stats.aggregated.organisation_files, 'organisation_files.png', 'organisation_files.json', + {{ boxes.box('Total organisation files', current_stats.aggregated.organisation_files, 'img/aggregate/organisation_files.png', 'organisation_files.json', description='Count of total number of organisation files, over time.') }}
      - {{ boxes.box('Total File Size', current_stats.aggregated.file_size|filesizeformat, 'file_size.png', 'file_size.json') }} + {{ boxes.box('Total File Size', current_stats.aggregated.file_size|filesizeformat, 'img/aggregate/file_size.png', 'file_size.json') }}
      @@ -17,7 +17,7 @@

      File Sizes

      - {% for bin,freq in sorted(current_stats.aggregated.file_size_bins.items(), key=firstint) %} + {% for bin,freq in func.sorted(current_stats.aggregated.file_size_bins.items(), key=func.firstint) %} @@ -49,7 +49,7 @@

      File Sizes

      {% for package, activities in current_stats.inverted_file.activities.items() %} - + diff --git a/src/templates/index.html b/src/templates/index.html index 230be34e80..f3cd50ff50 100644 --- a/src/templates/index.html +++ b/src/templates/index.html @@ -1,7 +1,7 @@ {% extends 'section_index.html' %} -{% block title %}Code for IATI Analytics{% endblock %} +{% block title %}IATI Dashboard{% endblock %} {% block about %} -

      These analytics are generated daily, with the last update based on data in the IATI Registry on {{ datetime_data }}. For more information, see the FAQ.

      +

      These analytics are generated daily, with the last update based on data in the IATI Registry on {{ datetime_data }}. For more information, see the FAQ.

      Many of the tables are sortable by clicking on the headers.

      Many of the datasets are available in machine readable JSON format. Some links to JSON are abbreviated to (J).

      {% endblock %} @@ -18,7 +18,7 @@

      @@ -26,7 +26,7 @@

      @@ -34,7 +34,7 @@

      @@ -42,7 +42,7 @@

      @@ -50,7 +50,7 @@

      @@ -58,7 +58,7 @@

      @@ -66,7 +66,7 @@

      @@ -74,7 +74,7 @@

      @@ -82,7 +82,7 @@

      @@ -96,7 +96,7 @@

      @@ -104,7 +104,7 @@

      @@ -112,7 +112,7 @@

      diff --git a/src/templates/publishers.html b/src/templates/publishers.html index 08037a9e18..6fa3f483aa 100644 --- a/src/templates/publishers.html +++ b/src/templates/publishers.html @@ -1,10 +1,10 @@ {% extends 'base.html' %} -{% import '_partials/boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block content %}
      - {{ boxes.box('Publishers', current_stats.aggregated.publishers, 'publishers.png', 'publishers.json', + {{ boxes.box('Publishers', current_stats.aggregated.publishers, 'img/aggregate/publishers.png', 'publishers.json', description='This graph shows the number of organisations publishing IATI data over time.') }} - {{ boxes.box('Publishers by type', '', 'publisher_types.png', None, 'publisher_types_legend.png', + {{ boxes.box('Publishers by type', '', 'img/aggregate/publisher_types.png', None, 'img/aggregate/publisher_types_legend.png', description='This graph show the various types of organisations publishing IATI data.') }}
      @@ -13,7 +13,7 @@
      -

      (This table as CSV)

      +

      (This table as CSV)

      List of current active IATI publishers, Click on the publisher name for more details.

      {% include '_partials/tablesorter_instructions.html' %}
      @@ -32,10 +32,10 @@
      {% for publisher_title,publisher in publishers_ordered_by_title %} - {% set publisher_stats = get_publisher_stats(publisher) %} + {% set publisher_stats = func.get_publisher_stats(publisher) %} - - + + diff --git a/src/templates/section_index.html b/src/templates/section_index.html index 16fea03866..30c11da5b1 100644 --- a/src/templates/section_index.html +++ b/src/templates/section_index.html @@ -28,8 +28,10 @@

        {% for item in (top_navigation if page=='index' else navigation[navigation_reverse[page]]) %}
      • -

        {{ page_titles[item] }}

        -

        {{ page_leads[item]|safe }}

        +

        {{ page_titles[item] }}

        + {% if item in page_leads %} +

        {{ page_leads[item]|safe }}

        + {% endif %}
      • {% endfor %}
      From d3e2181b77f937f86b7d0d26b739c3ebae2a4249 Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Mon, 7 Oct 2024 14:32:58 +0100 Subject: [PATCH 323/375] refactor: Added views for licenses and license detail pages Moved licence page generation code into views.py and created view functions for the licenses page and the individual license detail page. Small adjustments were made to the templates for them to work with Django. Also moved license id code and title into text.py and added some license data generation code into views.py. --- src/dashboard/urls.py | 7 +- src/dashboard/views.py | 71 +++++++++++++++++ src/licenses.py | 148 ------------------------------------ src/templates/base.html | 2 +- src/templates/license.html | 2 +- src/templates/licenses.html | 4 +- src/text.py | 81 ++++++++++++++++++++ 7 files changed, 158 insertions(+), 157 deletions(-) delete mode 100644 src/licenses.py diff --git a/src/dashboard/urls.py b/src/dashboard/urls.py index a2ed4ab125..0e7cce3ac6 100644 --- a/src/dashboard/urls.py +++ b/src/dashboard/urls.py @@ -44,7 +44,8 @@ path('data-quality/validation', lambda x: None, name="dash-dataquality-validation"), path('data-quality/versions', lambda x: None, name="dash-dataquality-versions"), path('data-quality/organisation-xml', lambda x: None, name="dash-dataquality-organisation"), - path('data-quality/licenses', lambda x: None, name="dash-dataquality-licenses"), + path('data-quality/licenses', dashboard.views.dataquality_licenses, name="dash-dataquality-licenses"), + path('data-quality/licenses/', dashboard.views.dataquality_licenses_detail, name="dash-dataquality-licenses-detail"), path('data-quality/identifiers', lambda x: None, name="dash-dataquality-identifiers"), path('data-quality/reporting-orgs', lambda x: None, name="dash-dataquality-reportingorgs"), @@ -64,10 +65,6 @@ path('publishing-statistics/summary-statistics', lambda x: None, name="dash-publishingstats-summarystats"), path('publishing-statistics/humanitarian-reporting', lambda x: None, name="dash-publishingstats-humanitarian"), - # Licenses - path('licenses/', lambda x: None, name="dash-licence-detail"), - path('licenses', lambda x: None, name="dash-licences") - # Redirects to support any users with bookmarks to pages on the old Dashboard. # path('timeliness.html', redirect("dash-publishingstats-timeliness")), # path('index.html', redirect("dash-index")), diff --git a/src/dashboard/views.py b/src/dashboard/views.py index 808005a916..9d72e84b99 100644 --- a/src/dashboard/views.py +++ b/src/dashboard/views.py @@ -7,6 +7,7 @@ import dateutil.parser import subprocess +import json from django.http import HttpResponse from django.template import loader @@ -41,6 +42,28 @@ capture_output=True).stdout.decode().strip() STATS_GH_URL = 'https://github.com/codeforIATI/IATI-Stats-public/tree/' + STATS_COMMIT_HASH +# Load all the licenses and generate data for each licence and publisher. +with open(config.join_stats_path('licenses.json')) as handler: + LICENSE_URLS = json.load(handler) + +LICENSES = [ + package['license_id'] + if package['license_id'] is not None + else 'notspecified' + for _, publisher in ckan.items() + for _, package in publisher.items()] + +LICENCE_COUNT = dict((x, LICENSES.count(x)) for x in set(LICENSES)) + +LICENSES_AND_PUBLISHER = set([(package['license_id'] + if package['license_id'] is not None + else 'notspecified', publisher_name) + for publisher_name, publisher in ckan.items() + for package_name, package in publisher.items()]) + +LICENSES_PER_PUBLISHER = [license for license, publisher in LICENSES_AND_PUBLISHER] +PUBLISHER_LICENSE_COUNT = dict((x, LICENSES_PER_PUBLISHER.count(x)) for x in set(LICENSES_PER_PUBLISHER)) + def _make_context(page_name: str): """Make a basic context dictionary for a given page @@ -172,3 +195,51 @@ def headlines_files(request): def headlines_publisher_detail(request, publisher=None): # Not implemented yet. return None + + +# +# Views to generate data quality pages. +# +def dataquality_licenses(request): + template = loader.get_template("licenses.html") + context = _make_context("licenses") + context["license_urls"] = LICENSE_URLS + context["license_names"] = text.LICENSE_NAMES + context["licenses"] = True + context["license_count"] = LICENCE_COUNT + context["publisher_license_count"] = PUBLISHER_LICENSE_COUNT + return HttpResponse(template.render(context, request)) + + +def dataquality_licenses_detail(request, license_id=None): + template = loader.get_template("license.html") + + publishers = [ + publisher_name + for publisher_name, publisher in ckan.items() + for _, package in publisher.items() + if package['license_id'] == license_id or ( + license_id == 'notspecified' and package['license_id'] is None)] + context = _make_context("licenses") + context["license_urls"] = LICENSE_URLS + context["license_names"] = text.LICENSE_NAMES + context["licenses"] = True + context["license"] = license_id + context["publisher_counts"] = [(publisher, publishers.count(publisher)) for publisher in set(publishers)] + return HttpResponse(template.render(context, request)) + + +def _unused_licenses_for_publisher(publisher_name): + # Unused code from the original Dashboard. + # + # Check publisher is in the compiled list of CKAN data + # Arises from https://github.com/IATI/IATI-Dashboard/issues/408 + if publisher_name not in ckan.keys(): + return set() + + # Return unique licenses used + return set([ + package['license_id'] + if package['license_id'] is not None + else 'notspecified' + for package in ckan[publisher_name].values()]) diff --git a/src/licenses.py b/src/licenses.py deleted file mode 100644 index 5d090a6a43..0000000000 --- a/src/licenses.py +++ /dev/null @@ -1,148 +0,0 @@ -import json -from collections import OrderedDict - -from flask import render_template -import config - -license_names = { - 'notspecified': 'Other::License Not Specified', - 'odc-pddl': 'OKD Compliant::Open Data Commons Public Domain Dedication and Licence (PDDL)', - 'odc-odbl': 'OKD Compliant::Open Data Commons Open Database License (ODbL)', - 'odc-by': 'OKD Compliant::Open Data Commons Attribution Licence', - 'cc-zero': 'OKD Compliant::Creative Commons CCZero', - 'cc-by': 'OKD Compliant::Creative Commons Attribution', - 'cc-by-sa': 'OKD Compliant::Creative Commons Attribution Share-Alike', - 'gfdl': 'OKD Compliant::GNU Free Documentation License', - 'ukclickusepsi': 'OKD Compliant::UK Click Use PSI', - 'other-open': 'OKD Compliant::Other (Open)', - 'other-pd': 'OKD Compliant::Other (Public Domain)', - 'other-at': 'OKD Compliant::Other (Attribution)', - 'ukcrown-withrights': 'OKD Compliant::UK Crown Copyright with data.gov.uk rights', - 'hesa-withrights': 'OKD Compliant::Higher Education Statistics Agency Copyright with data.gov.uk rights', - 'localauth-withrights': 'OKD Compliant::Local Authority Copyright with data.gov.uk rights', - 'uk-ogl': 'OKD Compliant::UK Open Government Licence (OGL)', - 'met-office-cp': 'Non-OKD Compliant::Met Office UK Climate Projections Licence Agreement', - 'cc-nc': 'Non-OKD Compliant::Creative Commons Non-Commercial (Any)', - 'ukcrown': 'Non-OKD Compliant::UK Crown Copyright', - 'other-nc': 'Non-OKD Compliant::Other (Non-Commercial)', - 'other-closed': 'Non-OKD Compliant::Other (Not Open)', - 'bsd-license': 'OSI Approved::New and Simplified BSD licenses', - 'gpl-2.0': 'OSI Approved::GNU General Public License (GPL)', - 'gpl-3.0': 'OSI Approved::GNU General Public License version 3.0 (GPLv3)', - 'lgpl-2.1': 'OSI Approved::GNU Library or "Lesser" General Public License (LGPL)', - 'mit-license': 'OSI Approved::MIT license', - 'afl-3.0': 'OSI Approved::Academic Free License 3.0 (AFL 3.0)', - 'apl1.0': 'OSI Approved::Adaptive Public License', - 'apache': 'OSI Approved::Apache Software License', - 'apache2.0': 'OSI Approved::Apache License, 2.0', - 'apsl-2.0': 'OSI Approved::Apple Public Source License', - 'artistic-license-2.0': 'OSI Approved::Artistic license 2.0', - 'attribution': 'OSI Approved::Attribution Assurance Licenses', - 'ca-tosl1.1': 'OSI Approved::Computer Associates Trusted Open Source License 1.1', - 'cddl1': 'OSI Approved::Common Development and Distribution License', - 'cpal_1.0': 'OSI Approved::Common Public Attribution License 1.0 (CPAL)', - 'cuaoffice': 'OSI Approved::CUA Office Public License Version 1.0', - 'eudatagrid': 'OSI Approved::EU DataGrid Software License', - 'eclipse-1.0': 'OSI Approved::Eclipse Public License', - 'ecl2': 'OSI Approved::Educational Community License, Version 2.0', - 'eiffel': 'OSI Approved::Eiffel Forum License', - 'ver2_eiffel': 'OSI Approved::Eiffel Forum License V2.0', - 'entessa': 'OSI Approved::Entessa Public License', - 'fair': 'OSI Approved::Fair License', - 'frameworx': 'OSI Approved::Frameworx License', - 'ibmpl': 'OSI Approved::IBM Public License', - 'intel-osl': 'OSI Approved::Intel Open Source License', - 'jabber-osl': 'OSI Approved::Jabber Open Source License', - 'lucent-plan9': 'OSI Approved::Lucent Public License (Plan9)', - 'lucent1.02': 'OSI Approved::Lucent Public License Version 1.02', - 'mitre': 'OSI Approved::MITRE Collaborative Virtual Workspace License (CVW License)', - 'motosoto': 'OSI Approved::Motosoto License', - 'mozilla': 'OSI Approved::Mozilla Public License 1.0 (MPL)', - 'mozilla1.1': 'OSI Approved::Mozilla Public License 1.1 (MPL)', - 'nasa1.3': 'OSI Approved::NASA Open Source Agreement 1.3', - 'naumen': 'OSI Approved::Naumen Public License', - 'nethack': 'OSI Approved::Nethack General Public License', - 'nokia': 'OSI Approved::Nokia Open Source License', - 'oclc2': 'OSI Approved::OCLC Research Public License 2.0', - 'opengroup': 'OSI Approved::Open Group Test Suite License', - 'osl-3.0': 'OSI Approved::Open Software License 3.0 (OSL 3.0)', - 'php': 'OSI Approved::PHP License', - 'pythonpl': 'OSI Approved::Python license', - 'PythonSoftFoundation': 'OSI Approved::Python Software Foundation License', - 'qtpl': 'OSI Approved::Qt Public License (QPL)', - 'real': 'OSI Approved::RealNetworks Public Source License V1.0', - 'rpl1.5': 'OSI Approved::Reciprocal Public License 1.5 (RPL1.5)', - 'ricohpl': 'OSI Approved::Ricoh Source Code Public License', - 'sleepycat': 'OSI Approved::Sleepycat License', - 'sun-issl': 'OSI Approved::Sun Industry Standards Source License (SISSL)', - 'sunpublic': 'OSI Approved::Sun Public License', - 'sybase': 'OSI Approved::Sybase Open Watcom Public License 1.0', - 'UoI-NCSA': 'OSI Approved::University of Illinois/NCSA Open Source License', - 'vovidapl': 'OSI Approved::Vovida Software License v. 1.0', - 'W3C': 'OSI Approved::W3C License', - 'wxwindows': 'OSI Approved::wxWindows Library License', - 'xnet': 'OSI Approved::X.Net License', - 'zpl': 'OSI Approved::Zope Public License', - 'zlib-license': 'OSI Approved::zlib/libpng license'} - -with open(config.join_stats_path('licenses.json')) as handler: - license_urls = json.load(handler) - -with open(config.join_stats_path('ckan.json')) as handler: - ckan = json.load(handler, object_pairs_hook=OrderedDict) - -licenses = [ - package['license_id'] - if package['license_id'] is not None - else 'notspecified' - for _, publisher in ckan.items() - for _, package in publisher.items()] - - -def licenses_for_publisher(publisher_name): - # Check publisher is in the compiled list of CKAN data - # Arises from https://github.com/IATI/IATI-Dashboard/issues/408 - if publisher_name not in ckan.keys(): - return set() - - # Return unique licenses used - return set([ - package['license_id'] - if package['license_id'] is not None - else 'notspecified' - for package in ckan[publisher_name].values()]) - - -def main(): - licenses_and_publisher = set([ - (package['license_id'] - if package['license_id'] is not None - else 'notspecified', publisher_name) - for publisher_name, publisher in ckan.items() - for package_name, package in publisher.items()]) - licenses_per_publisher = [license for license, publisher in licenses_and_publisher] - return render_template('licenses.html', - license_names=license_names, - license_urls=license_urls, - license_count=dict((x, licenses.count(x)) for x in set(licenses)), - publisher_license_count=dict((x, licenses_per_publisher.count(x)) for x in set(licenses_per_publisher)), - sorted=sorted, - page='licenses', - licenses=True) - - -def individual_license(license): - publishers = [ - publisher_name - for publisher_name, publisher in ckan.items() - for _, package in publisher.items() - if package['license_id'] == license or ( - license == 'notspecified' and package['license_id'] is None)] - publisher_counts = [(publisher, publishers.count(publisher)) for publisher in set(publishers)] - return render_template('license.html', - license=license, - license_names=license_names, - license_urls=license_urls, - publisher_counts=publisher_counts, - page='licenses', - licenses=True) diff --git a/src/templates/base.html b/src/templates/base.html index 6cee6884d8..18613ee4fb 100644 --- a/src/templates/base.html +++ b/src/templates/base.html @@ -81,7 +81,7 @@

      {{ page_titles[page] }}

      (NB This is the time the download task started. Any changes made after this time may not be reflected).
      - For details on how often these updates are applied, see the IATI Dashboard FAQ. + For details on how often these updates are applied, see the IATI Dashboard FAQ. Privacy Policy

      diff --git a/src/templates/license.html b/src/templates/license.html index 7e170572e0..59c8ebfe2a 100644 --- a/src/templates/license.html +++ b/src/templates/license.html @@ -22,7 +22,7 @@

      {{ license_names[license] }}{% if license_urls[license]['url'] %} {{ publisher }} +

      {% endfor %} diff --git a/src/templates/licenses.html b/src/templates/licenses.html index 2d148c56c6..0908260759 100644 --- a/src/templates/licenses.html +++ b/src/templates/licenses.html @@ -18,12 +18,12 @@ - {% for license, files in sorted(license_count.items()) %} + {% for license, files in func.sorted(license_count.items()) %} - + {% endfor %} diff --git a/src/text.py b/src/text.py index 6955838bdc..39ee0ca37e 100644 --- a/src/text.py +++ b/src/text.py @@ -105,3 +105,84 @@ 'exploring_data': ['elements', 'codelists', 'booleans', 'dates', 'traceability', 'org_ids'], 'publishing_stats': ['timeliness', 'forwardlooking', 'comprehensiveness', 'summary_stats', 'humanitarian'] } + +LICENSE_NAMES = { + 'notspecified': 'Other::License Not Specified', + 'odc-pddl': 'OKD Compliant::Open Data Commons Public Domain Dedication and Licence (PDDL)', + 'odc-odbl': 'OKD Compliant::Open Data Commons Open Database License (ODbL)', + 'odc-by': 'OKD Compliant::Open Data Commons Attribution Licence', + 'cc-zero': 'OKD Compliant::Creative Commons CCZero', + 'cc-by': 'OKD Compliant::Creative Commons Attribution', + 'cc-by-sa': 'OKD Compliant::Creative Commons Attribution Share-Alike', + 'gfdl': 'OKD Compliant::GNU Free Documentation License', + 'ukclickusepsi': 'OKD Compliant::UK Click Use PSI', + 'other-open': 'OKD Compliant::Other (Open)', + 'other-pd': 'OKD Compliant::Other (Public Domain)', + 'other-at': 'OKD Compliant::Other (Attribution)', + 'ukcrown-withrights': 'OKD Compliant::UK Crown Copyright with data.gov.uk rights', + 'hesa-withrights': 'OKD Compliant::Higher Education Statistics Agency Copyright with data.gov.uk rights', + 'localauth-withrights': 'OKD Compliant::Local Authority Copyright with data.gov.uk rights', + 'uk-ogl': 'OKD Compliant::UK Open Government Licence (OGL)', + 'met-office-cp': 'Non-OKD Compliant::Met Office UK Climate Projections Licence Agreement', + 'cc-nc': 'Non-OKD Compliant::Creative Commons Non-Commercial (Any)', + 'ukcrown': 'Non-OKD Compliant::UK Crown Copyright', + 'other-nc': 'Non-OKD Compliant::Other (Non-Commercial)', + 'other-closed': 'Non-OKD Compliant::Other (Not Open)', + 'bsd-license': 'OSI Approved::New and Simplified BSD licenses', + 'gpl-2.0': 'OSI Approved::GNU General Public License (GPL)', + 'gpl-3.0': 'OSI Approved::GNU General Public License version 3.0 (GPLv3)', + 'lgpl-2.1': 'OSI Approved::GNU Library or "Lesser" General Public License (LGPL)', + 'mit-license': 'OSI Approved::MIT license', + 'afl-3.0': 'OSI Approved::Academic Free License 3.0 (AFL 3.0)', + 'apl1.0': 'OSI Approved::Adaptive Public License', + 'apache': 'OSI Approved::Apache Software License', + 'apache2.0': 'OSI Approved::Apache License, 2.0', + 'apsl-2.0': 'OSI Approved::Apple Public Source License', + 'artistic-license-2.0': 'OSI Approved::Artistic license 2.0', + 'attribution': 'OSI Approved::Attribution Assurance Licenses', + 'ca-tosl1.1': 'OSI Approved::Computer Associates Trusted Open Source License 1.1', + 'cddl1': 'OSI Approved::Common Development and Distribution License', + 'cpal_1.0': 'OSI Approved::Common Public Attribution License 1.0 (CPAL)', + 'cuaoffice': 'OSI Approved::CUA Office Public License Version 1.0', + 'eudatagrid': 'OSI Approved::EU DataGrid Software License', + 'eclipse-1.0': 'OSI Approved::Eclipse Public License', + 'ecl2': 'OSI Approved::Educational Community License, Version 2.0', + 'eiffel': 'OSI Approved::Eiffel Forum License', + 'ver2_eiffel': 'OSI Approved::Eiffel Forum License V2.0', + 'entessa': 'OSI Approved::Entessa Public License', + 'fair': 'OSI Approved::Fair License', + 'frameworx': 'OSI Approved::Frameworx License', + 'ibmpl': 'OSI Approved::IBM Public License', + 'intel-osl': 'OSI Approved::Intel Open Source License', + 'jabber-osl': 'OSI Approved::Jabber Open Source License', + 'lucent-plan9': 'OSI Approved::Lucent Public License (Plan9)', + 'lucent1.02': 'OSI Approved::Lucent Public License Version 1.02', + 'mitre': 'OSI Approved::MITRE Collaborative Virtual Workspace License (CVW License)', + 'motosoto': 'OSI Approved::Motosoto License', + 'mozilla': 'OSI Approved::Mozilla Public License 1.0 (MPL)', + 'mozilla1.1': 'OSI Approved::Mozilla Public License 1.1 (MPL)', + 'nasa1.3': 'OSI Approved::NASA Open Source Agreement 1.3', + 'naumen': 'OSI Approved::Naumen Public License', + 'nethack': 'OSI Approved::Nethack General Public License', + 'nokia': 'OSI Approved::Nokia Open Source License', + 'oclc2': 'OSI Approved::OCLC Research Public License 2.0', + 'opengroup': 'OSI Approved::Open Group Test Suite License', + 'osl-3.0': 'OSI Approved::Open Software License 3.0 (OSL 3.0)', + 'php': 'OSI Approved::PHP License', + 'pythonpl': 'OSI Approved::Python license', + 'PythonSoftFoundation': 'OSI Approved::Python Software Foundation License', + 'qtpl': 'OSI Approved::Qt Public License (QPL)', + 'real': 'OSI Approved::RealNetworks Public Source License V1.0', + 'rpl1.5': 'OSI Approved::Reciprocal Public License 1.5 (RPL1.5)', + 'ricohpl': 'OSI Approved::Ricoh Source Code Public License', + 'sleepycat': 'OSI Approved::Sleepycat License', + 'sun-issl': 'OSI Approved::Sun Industry Standards Source License (SISSL)', + 'sunpublic': 'OSI Approved::Sun Public License', + 'sybase': 'OSI Approved::Sybase Open Watcom Public License 1.0', + 'UoI-NCSA': 'OSI Approved::University of Illinois/NCSA Open Source License', + 'vovidapl': 'OSI Approved::Vovida Software License v. 1.0', + 'W3C': 'OSI Approved::W3C License', + 'wxwindows': 'OSI Approved::wxWindows Library License', + 'xnet': 'OSI Approved::X.Net License', + 'zpl': 'OSI Approved::Zope Public License', + 'zlib-license': 'OSI Approved::zlib/libpng license'} From 803e7ea048e14a50c1405d223e59a5a30e2e9d75 Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Tue, 8 Oct 2024 09:41:08 +0100 Subject: [PATCH 324/375] refactor: Added custom filters to Jinja2 setup Moved custom filters from make_html.py into this file. --- src/dashboard/jinja2.py | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/src/dashboard/jinja2.py b/src/dashboard/jinja2.py index c4386c6c50..b1da234df6 100644 --- a/src/dashboard/jinja2.py +++ b/src/dashboard/jinja2.py @@ -1,17 +1,46 @@ """Jinja2 template configuration """ +import re + from django.templatetags.static import static from django.urls import reverse - from jinja2 import Environment +import timeliness + + +def round_nicely(val, ndigits=2): + """ Round a float, but remove the trailing .0 from integers that python insists on + """ + if int(val) == float(val): + return int(val) + return round(float(val), ndigits) + + +def xpath_to_url(path): + path = path.strip('./') + # remove conditions + path = re.sub(r'\[[^]]+\]', '', path) + if path.startswith('iati-activity'): + url = 'http://iatistandard.org/activity-standard/iati-activities/' + path.split('@')[0] + elif path.startswith('iati-organisation'): + url = 'http://iatistandard.org/organisation-standard/iati-organisations/' + path.split('@')[0] + else: + url = 'http://iatistandard.org/activity-standard/iati-activities/iati-activity/' + path.split('@')[0] + if '@' in path: + url += '#attributes' + return url def environment(**options): env = Environment(**options) env.globals.update( { - "static": static, - "url": reverse, + 'static': static, + 'url': reverse, } ) + env.filters['url_to_filename'] = lambda x: x.rstrip('/').split('/')[-1] + env.filters['has_future_transactions'] = timeliness.has_future_transactions + env.filters['xpath_to_url'] = xpath_to_url + env.filters['round_nicely'] = round_nicely return env From f4a0a0a70c1cfd770a3f4026e4d1002bcb028d8f Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Tue, 8 Oct 2024 13:36:18 +0100 Subject: [PATCH 325/375] refactor: Moved org_id table fragments into template/_partials Since these table fragments were included in other templates I decided to move them into the _partials folder where other reusable components were stored. --- src/templates/{ => _partials}/org_id_table_cells.html | 0 src/templates/{ => _partials}/org_id_table_header.html | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename src/templates/{ => _partials}/org_id_table_cells.html (100%) rename src/templates/{ => _partials}/org_id_table_header.html (100%) diff --git a/src/templates/org_id_table_cells.html b/src/templates/_partials/org_id_table_cells.html similarity index 100% rename from src/templates/org_id_table_cells.html rename to src/templates/_partials/org_id_table_cells.html diff --git a/src/templates/org_id_table_header.html b/src/templates/_partials/org_id_table_header.html similarity index 100% rename from src/templates/org_id_table_header.html rename to src/templates/_partials/org_id_table_header.html From 95649385273a95a00cb0b8963fb222acec59bb50 Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Wed, 9 Oct 2024 09:46:39 +0100 Subject: [PATCH 326/375] refactor: Completed refactoring of headlines by adding publisher detail This commit completes the refactoring of the publisher headlines by modifying the publisher template, adding some extra URL routes, and completing the view function for the publisher detail page. Also needed a minor change to the boxes template. --- src/dashboard/urls.py | 3 ++ src/dashboard/views.py | 70 +++++++++++++++++++++--------- src/templates/_partials/boxes.html | 4 +- src/templates/publisher.html | 35 +++++++-------- 4 files changed, 74 insertions(+), 38 deletions(-) diff --git a/src/dashboard/urls.py b/src/dashboard/urls.py index 0e7cce3ac6..d04dff4b32 100644 --- a/src/dashboard/urls.py +++ b/src/dashboard/urls.py @@ -51,11 +51,14 @@ # Exploring data pages. path('exploring-data/elements', lambda x: None, name="dash-exploringdata-elements"), + path('exploring-data/elements/', lambda x: None, name="dash-exploringdata-elements-detail"), path('exploring-data/codelists', lambda x: None, name="dash-exploringdata-codelists"), + path('exploring-data/codelists//', lambda x: None, name="dash-exploringdata-codelists-detail"), path('exploring-data/booleans', lambda x: None, name="dash-exploringdata-booleans"), path('exploring-data/dates', lambda x: None, name="dash-exploringdata-dates"), path('exploring-data/traceability', lambda x: None, name="dash-exploringdata-traceability"), path('exploring-data/organisation-identifiers', lambda x: None, name="dash-exploringdata-orgids"), + path('exploring-data/organisation-types/', lambda x: None, name="dash-exploringdata-orgtypes-detail"), # Publishing statistics pages. path('publishing-statistics/timeliness', lambda x: None, name="dash-publishingstats-timeliness"), diff --git a/src/dashboard/views.py b/src/dashboard/views.py index 9d72e84b99..2a2aab03e6 100644 --- a/src/dashboard/views.py +++ b/src/dashboard/views.py @@ -9,7 +9,7 @@ import subprocess import json -from django.http import HttpResponse +from django.http import HttpResponse, Http404 from django.template import loader import config @@ -65,6 +65,20 @@ PUBLISHER_LICENSE_COUNT = dict((x, LICENSES_PER_PUBLISHER.count(x)) for x in set(LICENSES_PER_PUBLISHER)) +def _get_licenses_for_publisher(publisher_name): + # Check publisher is in the compiled list of CKAN data + # Arises from https://github.com/IATI/IATI-Dashboard/issues/408 + if publisher_name not in ckan.keys(): + return set() + + # Return unique licenses used + return set([ + package['license_id'] + if package['license_id'] is not None + else 'notspecified' + for package in ckan[publisher_name].values()]) + + def _make_context(page_name: str): """Make a basic context dictionary for a given page """ @@ -134,7 +148,9 @@ def _make_context(page_name: str): "firstint": dashboard.template_funcs.firstint, "dataset_to_publisher": lambda x: dataset_to_publisher_dict.get(x, ""), "get_publisher_stats": get_publisher_stats, - "is_valid_element": is_valid_element} + "is_valid_element": is_valid_element, + "set": set + } ) context["navigation_reverse"].update({k: k for k in text.navigation}) @@ -193,8 +209,38 @@ def headlines_files(request): def headlines_publisher_detail(request, publisher=None): - # Not implemented yet. - return None + template = loader.get_template("publisher.html") + + context = _make_context("publishers") + context["publisher"] = publisher + context["publisher_inverted"] = get_publisher_stats(publisher, 'inverted-file') + context["publisher_licenses"] = _get_licenses_for_publisher(publisher) + publisher_stats = get_publisher_stats(publisher) + context["publisher_stats"] = publisher_stats + + try: + context["budget_table"] = [{ + 'year': 'Total', + 'count_total': sum(sum(x.values()) for x in publisher_stats['count_budgets_by_type_by_year'].values()), + 'sum_total': {currency: sum(sums.values()) for by_currency in publisher_stats['sum_budgets_by_type_by_year'].values() for currency, sums in by_currency.items()}, + 'count_original': sum(publisher_stats['count_budgets_by_type_by_year']['1'].values()) if '1' in publisher_stats['count_budgets_by_type_by_year'] else None, + 'sum_original': {k: sum(v.values()) for k, v in publisher_stats['sum_budgets_by_type_by_year']['1'].items()} if '1' in publisher_stats['sum_budgets_by_type_by_year'] else None, + 'count_revised': sum(publisher_stats['count_budgets_by_type_by_year']['2'].values()) if '2' in publisher_stats['count_budgets_by_type_by_year'] else None, + 'sum_revised': {k: sum(v.values()) for k, v in publisher_stats['sum_budgets_by_type_by_year']['2'].items()} if '2' in publisher_stats['sum_budgets_by_type_by_year'] else None + }] + [{'year': year, + 'count_total': sum(x[year] for x in publisher_stats['count_budgets_by_type_by_year'].values() if year in x), + 'sum_total': {currency: sums.get(year) for by_currency in publisher_stats['sum_budgets_by_type_by_year'].values() for currency, sums in by_currency.items()}, + 'count_original': publisher_stats['count_budgets_by_type_by_year']['1'].get(year) if '1' in publisher_stats['count_budgets_by_type_by_year'] else None, + 'sum_original': {k: v.get(year) for k, v in publisher_stats['sum_budgets_by_type_by_year']['1'].items()} if '1' in publisher_stats['sum_budgets_by_type_by_year'] else None, + 'count_revised': publisher_stats['count_budgets_by_type_by_year']['2'].get(year) if '2' in publisher_stats['count_budgets_by_type_by_year'] else None, + 'sum_revised': {k: v.get(year) for k, v in publisher_stats['sum_budgets_by_type_by_year']['2'].items()} if '2' in publisher_stats['sum_budgets_by_type_by_year'] else None + } for year in sorted(set(sum((list(x.keys()) for x in publisher_stats['count_budgets_by_type_by_year'].values()), []))) + ] + context["failure_count"] = len(current_stats['inverted_file_publisher'][publisher]['validation'].get('fail', {})) + except KeyError: + raise Http404("Publisher does not exist") + + return HttpResponse(template.render(context, request)) # @@ -227,19 +273,3 @@ def dataquality_licenses_detail(request, license_id=None): context["license"] = license_id context["publisher_counts"] = [(publisher, publishers.count(publisher)) for publisher in set(publishers)] return HttpResponse(template.render(context, request)) - - -def _unused_licenses_for_publisher(publisher_name): - # Unused code from the original Dashboard. - # - # Check publisher is in the compiled list of CKAN data - # Arises from https://github.com/IATI/IATI-Dashboard/issues/408 - if publisher_name not in ckan.keys(): - return set() - - # Return unique licenses used - return set([ - package['license_id'] - if package['license_id'] is not None - else 'notspecified' - for package in ckan[publisher_name].values()]) diff --git a/src/templates/_partials/boxes.html b/src/templates/_partials/boxes.html index 6cdf713567..0bb96929cb 100644 --- a/src/templates/_partials/boxes.html +++ b/src/templates/_partials/boxes.html @@ -17,7 +17,9 @@

      -

      {{ description|safe }}

      + {% if description %} +

      {{ description|safe }}

      + {% endif %} {% if legend %} diff --git a/src/templates/publisher.html b/src/templates/publisher.html index 5808e83847..cf0ac66918 100644 --- a/src/templates/publisher.html +++ b/src/templates/publisher.html @@ -1,5 +1,5 @@ {% extends 'base.html' %} -{% import '_partials/boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block title %} Publisher: {{ publisher_name[publisher] }} {{ super() }} {% endblock %} @@ -109,7 +109,7 @@

      Headlines

      @@ -122,22 +122,23 @@

      Headlines

      {{ bin }} {{ freq }}
      {{ dataset_to_publisher(publisher_name[package[:-4]]) }}{{ publisher_name[func.dataset_to_publisher(package[:-4])] }} {{ package[:-4] }} {{ activities }} {{ current_stats.inverted_file.organisations.get(package) }}
      - + {{ current_stats.aggregated.activities }}
      - + {{ current_stats.aggregated.iati_identifiers|count }}
      - + {{ current_stats.aggregated.publishers }}
      - + {{ current_stats.aggregated.activity_files }}
      - + {{ current_stats.aggregated.organisation_files }}
      - + {{ current_stats.aggregated.file_size|filesizeformat }}
      - + {{ current_stats.download_errors|length }}
      - + {{ current_stats.aggregated.invalidxml }}
      - + {{ current_stats.aggregated.nonstandardroots }}
      - + {{ current_stats.aggregated.validation.fail }}
      - + {{ current_stats.aggregated.publishers_validation.fail }}
      - + {{ current_stats.aggregated.publisher_has_org_file.no }}
      {{ publisher_name[publisher] }}{{ publisher }}{{ publisher_name[publisher] }}{{ publisher }} {{ current_stats.inverted_publisher.activities[publisher] }} {{ publisher_stats.organisations }} {{ current_stats.inverted_publisher.activity_files.get(publisher)+current_stats.inverted_publisher.organisation_files.get(publisher) }}{{ publisher }} {{ files }}
      {% if license_urls[license]['url'] %}{{ license_names[license] }}{% else %}{{ license_names[license] }}{% endif %} {{ license }} {{ files }}{{ publisher_license_count[license] }}{{ publisher_license_count[license] }}
      Licenses {% for license in publisher_licenses %} - {{ license }} + {{ license }} {% endfor %}
      - {{ boxes.box('Activities', publisher_stats.activities, '../publisher_imgs/'+publisher+'_activities.png', publisher+'/activities.json', '', '-publisher') }} + {{ boxes.box( + 'Activities', publisher_stats.activities, 'img/publishers/'+publisher+'_activities.png', publisher+'/activities.json', '', '-publisher') }}
      - {{ boxes.box('Activity Files', publisher_stats.activity_files, '../publisher_imgs/'+publisher+'_activity_files.png', publisher+'/activity_files.json', '', '-publisher') }} - {{ boxes.box('Organisation Files', publisher_stats.organisation_files, '../publisher_imgs/'+publisher+'_organisation_files.png', publisher+'/organisation_files.json', '', '-publisher') }} + {{ boxes.box('Activity Files', publisher_stats.activity_files, 'img/publishers/'+publisher+'_activity_files.png', publisher+'/activity_files.json', '', '-publisher') }} + {{ boxes.box('Organisation Files', publisher_stats.organisation_files, 'img/publishers/'+publisher+'_organisation_files.png', publisher+'/organisation_files.json', '', '-publisher') }}
      - {{ boxes.box('Files per version', '', '../publisher_imgs/'+publisher+'_versions.png', publisher+'/versions.json', '../publisher_imgs/'+publisher+'_versions_legend.png', '-publisher') }} - {{ boxes.box('Total File Size', publisher_stats.file_size|filesizeformat, '../publisher_imgs/'+publisher+'_file_size.png', publisher+'/file_size.json', '', '-publisher') }} + {{ boxes.box('Files per version', '', 'img/publishers/'+publisher+'_versions.png', publisher+'/versions.json', 'img/publishers/'+publisher+'_versions_legend.png', '-publisher') }} + {{ boxes.box('Total File Size', publisher_stats.file_size|filesizeformat, 'img/publishers/'+publisher+'_file_size.png', publisher+'/file_size.json', '', '-publisher') }}
      - {{ boxes.box('Files failing validation', publisher_stats.validation.fail, '../publisher_imgs/'+publisher+'_validation.png', publisher+'/validation.json', '', '-publisher') }} - {{ boxes.box('Files where XML is not well-formed', publisher_stats.invalidxml, '../publisher_imgs/'+publisher+'_invalidxml.png', publisher+'/invalidxml.json', '', '-publisher') }} + {{ boxes.box('Files failing validation', publisher_stats.validation.get('fail',0), 'img/publishers/'+publisher+'_validation.png', publisher+'/validation.json', '', '-publisher') }} + {{ boxes.box('Files where XML is not well-formed', publisher_stats.invalidxml, 'img/publishers/'+publisher+'_invalidxml.png', publisher+'/invalidxml.json', '', '-publisher') }}

      Data Quality

      @@ -378,15 +379,15 @@

      Codelist Values (version {{ major_version }}.xx)

      {% for element, values in publisher_stats.codelist_values_by_major_version[major_version].items() %}
      {{ element }}{{ element }} {{ codelist_mapping[major_version].get(element) }}{% if codes|count %} {{ codes|count }} {% else %}{{ codes|count }}{% endif %} {% if codes|count %} @@ -426,9 +427,9 @@

      Elements and Attributes Published

      {% for element, count in publisher_stats['elements'].items() %}
      {{ element }}{{ element }} {{ count }}{{ publisher_inverted.elements[element]|count }}{{ publisher_inverted.elements[element]|count }}
      Org Type
      {{ slug.replace('_org', '') | capitalize }}{{ slug.replace('_org', '') | capitalize }}(J)
      {{ publisher }}{{ publisher }} {{ dataset }}{{ url|url_to_filename }}{{ err_url|url_to_filename }} {{ code }}
      {{ dataset_to_publisher(dataset[:-4]) }}{{ func.dataset_to_publisher(dataset[:-4]) }} {{ dataset[:-4] }}
      {{ dataset_to_publisher(dataset[:-4]) }}{{ func.dataset_to_publisher(dataset[:-4]) }} {{ dataset[:-4] }}
      {{ publisher_name[publisher] }} +
      {{ publisher_name[publisher] }} {% for dataset in datasets.keys() %} {{ dataset[:-4] }} {% endfor %} @@ -68,7 +68,7 @@

      Expected versions

      {% for publisher in publishers %} - + {% endfor %}
      {{ publisher_name[publisher] }}{{ publisher_name[publisher] }}
      @@ -95,7 +95,7 @@

      Other versions

      {% for publisher in publishers %} - + {% endfor %}
      {{ publisher_name[publisher] }}{{ publisher_name[publisher] }}
      diff --git a/dashboard/ui/urls.py b/dashboard/ui/urls.py index 0045c40327..3d9839a5c3 100644 --- a/dashboard/ui/urls.py +++ b/dashboard/ui/urls.py @@ -43,7 +43,7 @@ path('data/download_errors.json', ui.views.dataquality_download_errorsjson, name="dash-dataquality-download-json"), path('data-quality/xml-errors', ui.views.dataquality_xml, name="dash-dataquality-xml"), path('data-quality/validation', ui.views.dataquality_validation, name="dash-dataquality-validation"), - path('data-quality/versions', lambda x: None, name="dash-dataquality-versions"), + path('data-quality/versions', ui.views.dataquality_versions, name="dash-dataquality-versions"), path('data-quality/organisation-xml', lambda x: None, name="dash-dataquality-organisation"), path('data-quality/licenses', ui.views.dataquality_licenses, name="dash-dataquality-licenses"), path('data-quality/licenses/', ui.views.dataquality_licenses_detail, name="dash-dataquality-licenses-detail"), diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py index dcc7205e35..b5f413e6e8 100644 --- a/dashboard/ui/views.py +++ b/dashboard/ui/views.py @@ -15,6 +15,7 @@ import config import text import ui.template_funcs +import vars from data import ( ckan, @@ -137,6 +138,7 @@ def _make_context(page_name: str): codelist_sets=codelist_sets, github_issues=github_issues, MAJOR_VERSIONS=MAJOR_VERSIONS, + expected_versions = vars.expected_versions, metadata=metadata, slugs=slugs, datetime_data=dateutil.parser.parse(metadata['created_at']).strftime('%-d %B %Y (at %H:%M %Z)'), @@ -268,6 +270,12 @@ def dataquality_validation(request): return HttpResponse(template.render(context, request)) +def dataquality_versions(request): + template = loader.get_template("versions.html") + context = _make_context("versions") + return HttpResponse(template.render(context, request)) + + def dataquality_licenses(request): template = loader.get_template("licenses.html") context = _make_context("licenses") From f497ae287f377ac96b69a196a9b5bab46c411019 Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Tue, 22 Oct 2024 11:15:22 +0100 Subject: [PATCH 334/375] refactor: Added data quality/organisation XML page Refactored the template, completed the url route and wrote the view function to generate the organisation XML page wtihin the data quality section. --- dashboard/templates/organisation.html | 8 ++++---- dashboard/ui/urls.py | 2 +- dashboard/ui/views.py | 8 +++++++- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/dashboard/templates/organisation.html b/dashboard/templates/organisation.html index 0692d5fd9e..16bd858d36 100644 --- a/dashboard/templates/organisation.html +++ b/dashboard/templates/organisation.html @@ -1,8 +1,8 @@ {% extends 'base.html' %} -{% import '_partials/boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block content %}
      - {{ boxes.box('Publishers without an Organisation File', current_stats.aggregated.publisher_has_org_file.no, 'publisher_has_org_file.png', 'publisher_has_org_file.json', + {{ boxes.box('Publishers without an Organisation File', current_stats.aggregated.publisher_has_org_file.no, 'img/aggregate/publisher_has_org_file.png', 'publisher_has_org_file.json', description='Count of publishers without an organisation file, over time.') }}
      @@ -16,8 +16,8 @@

      The following publishers do not have an organisation file listed on the IATI Registry.

      diff --git a/dashboard/ui/urls.py b/dashboard/ui/urls.py index 3d9839a5c3..58457e4c8a 100644 --- a/dashboard/ui/urls.py +++ b/dashboard/ui/urls.py @@ -44,7 +44,7 @@ path('data-quality/xml-errors', ui.views.dataquality_xml, name="dash-dataquality-xml"), path('data-quality/validation', ui.views.dataquality_validation, name="dash-dataquality-validation"), path('data-quality/versions', ui.views.dataquality_versions, name="dash-dataquality-versions"), - path('data-quality/organisation-xml', lambda x: None, name="dash-dataquality-organisation"), + path('data-quality/organisation', ui.views.dataquality_orgxml, name="dash-dataquality-organisation"), path('data-quality/licenses', ui.views.dataquality_licenses, name="dash-dataquality-licenses"), path('data-quality/licenses/', ui.views.dataquality_licenses_detail, name="dash-dataquality-licenses-detail"), path('data-quality/identifiers', lambda x: None, name="dash-dataquality-identifiers"), diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py index b5f413e6e8..bd4da863ae 100644 --- a/dashboard/ui/views.py +++ b/dashboard/ui/views.py @@ -138,7 +138,7 @@ def _make_context(page_name: str): codelist_sets=codelist_sets, github_issues=github_issues, MAJOR_VERSIONS=MAJOR_VERSIONS, - expected_versions = vars.expected_versions, + expected_versions=vars.expected_versions, metadata=metadata, slugs=slugs, datetime_data=dateutil.parser.parse(metadata['created_at']).strftime('%-d %B %Y (at %H:%M %Z)'), @@ -303,3 +303,9 @@ def dataquality_licenses_detail(request, license_id=None): context["license"] = license_id context["publisher_counts"] = [(publisher, publishers.count(publisher)) for publisher in set(publishers)] return HttpResponse(template.render(context, request)) + + +def dataquality_orgxml(request): + template = loader.get_template("organisation.html") + context = _make_context("organisation") + return HttpResponse(template.render(context, request)) From 5d5531368b65b2fff465ea1d269b0545e0602c16 Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Tue, 22 Oct 2024 11:26:35 +0100 Subject: [PATCH 335/375] refactor: Added data quality/duplicate activity identifier page Refactored template, completed url route and wrote view function for the duplicate activity identifier page in the data quality section --- dashboard/templates/identifiers.html | 6 +++--- dashboard/ui/urls.py | 2 +- dashboard/ui/views.py | 6 ++++++ 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/dashboard/templates/identifiers.html b/dashboard/templates/identifiers.html index fc95c3d641..90387a637e 100644 --- a/dashboard/templates/identifiers.html +++ b/dashboard/templates/identifiers.html @@ -1,5 +1,5 @@ {% extends 'base.html' %} -{% import '_partials/boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block content %}
      @@ -25,10 +25,10 @@

      {{ publisher_title }}{{ publisher_title }} {{ publisher_stats.publisher_duplicate_identifiers|length }} {{ publisher_stats.publisher_duplicate_identifiers.values()|sum }}
      {{ publisher_title }}{{ publisher_title }} {{ ckan_publishers[publisher].result.publisher_iati_id }} {{ publisher_stats.reporting_orgs|length }} {% for ro in publisher_stats.reporting_orgs %}{{ ro }} {% endfor %}Total activities
      {{ publisher }}{{ publisher }}{% if 'elements' in publisher_inverted %}{{ publisher_inverted.elements[element]|count }}{% endif %}{{ current_stats.inverted_publisher.activity_files.get(publisher)+current_stats.inverted_publisher.organisation_files.get(publisher) }}{{ publisher_stats.elements[element] }} {{ publisher_stats.elements_total[element] }}
      {{ publisher }}{{ publisher }} {{ current_stats.inverted_publisher.activity_files.get(publisher)+current_stats.inverted_publisher.organisation_files.get(publisher) }} {{ current_stats.inverted_publisher.activities[publisher] }} {{ current_stats.inverted_publisher.organisations[publisher] }}
      {{ publisher }} +
      {{ publisher }} {% for dataset in datasets.keys() %} {{ dataset[:-4] }} {% endfor %} diff --git a/dashboard/templates/elements.html b/dashboard/templates/elements.html index d027314142..e5eb0ccb84 100644 --- a/dashboard/templates/elements.html +++ b/dashboard/templates/elements.html @@ -1,13 +1,13 @@ {% extends 'base.html' %} -{% import '_partials/boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block content %} Total Instances of Element (J)
      {{ element }}{{ publishers|length }}{{ element }}{{ publishers|length }} {{ current_stats.aggregated.elements[element] }} {{ current_stats.aggregated.elements_total[element] }}
      Org Type
      {{ slug.replace('_org', '') | capitalize }}{{ slug.replace('_org', '') | capitalize }}
      Publisher Name Publisher Registry Id
      {{ publisher_name[publisher] }}{{ publisher }}{{ publisher_name[publisher] }}{{ publisher }}
      {{ value }} {{ codelist_lookup[major_version].get(codelist_mapping[major_version].get(element))[value]['name'] }} - {{ publishers|length }} + {{ publishers|length }}
      {{ value }} - {{ publishers|length }} + {{ publishers|length }}
      Values used, not on Codelist
      {{ element }}{{ element }} {{ codelist_mapping[major_version].get(element) }}{{ values|length }}{{ values|length }} {{ codelist_sets[major_version].get(codelist_mapping[major_version].get(element))|length }}{{ codes|length }}{{ codes|length }}{{ codes|length }}{{ codes|length }}
      {{ publisher_title }}{{ publisher_title }} {% if publisher_stats.date_extremes.min.overall %}{{ publisher_stats.date_extremes.min.overall }}{% endif %} {% if publisher_stats.date_extremes.max.overall %}{{ publisher_stats.date_extremes.max.overall }}{% endif %} {% if publisher_stats.date_extremes.max.by_type['start-actual'] %}{{ publisher_stats.date_extremes.max.by_type['start-actual'] }}{% endif %}
      {{ publisher_title }}{{ publisher_title }} {%- if publisher_stats.traceable_activities_by_publisher_id -%} {{ '{:,}'.format(publisher_stats.traceable_activities_by_publisher_id) }} diff --git a/dashboard/ui/urls.py b/dashboard/ui/urls.py index 7dd586aab5..ae6bb90e9e 100644 --- a/dashboard/ui/urls.py +++ b/dashboard/ui/urls.py @@ -55,9 +55,9 @@ path('exploring-data/elements/', ui.views.exploringdata_element_detail, name="dash-exploringdata-elements-detail"), path('exploring-data/codelists', ui.views.exploringdata_codelists, name="dash-exploringdata-codelists"), path('exploring-data/codelists//', ui.views.exploringdata_codelists_detail, name="dash-exploringdata-codelists-detail"), - path('exploring-data/booleans', lambda x: None, name="dash-exploringdata-booleans"), - path('exploring-data/dates', lambda x: None, name="dash-exploringdata-dates"), - path('exploring-data/traceability', lambda x: None, name="dash-exploringdata-traceability"), + path('exploring-data/booleans', ui.views.exploringdata_booleans, name="dash-exploringdata-booleans"), + path('exploring-data/dates', ui.views.exploringdata_dates, name="dash-exploringdata-dates"), + path('exploring-data/traceability', ui.views.exploringdata_traceability, name="dash-exploringdata-traceability"), path('exploring-data/organisation-identifiers', ui.views.exploringdata_orgids, name="dash-exploringdata-orgids"), path('exploring-data/organisation-type/', ui.views.exploringdata_orgtypes_detail, name="dash-exploringdata-orgtypes-detail"), diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py index 3f8aeaa644..c699b1ea86 100644 --- a/dashboard/ui/views.py +++ b/dashboard/ui/views.py @@ -390,3 +390,18 @@ def exploringdata_codelists_detail(request, major_version=None, attribute=None): context["major_version"] = major_version return HttpResponse(template.render(context, request)) + + +def exploringdata_booleans(request): + template = loader.get_template("booleans.html") + return HttpResponse(template.render(_make_context("booleans"), request)) + + +def exploringdata_dates(request): + template = loader.get_template("dates.html") + return HttpResponse(template.render(_make_context("dates"), request)) + + +def exploringdata_traceability(request): + template = loader.get_template("traceability.html") + return HttpResponse(template.render(_make_context("traceability"), request)) From 90dd4b0a8d9c4356e30ed09c77c26568c8db88f2 Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Wed, 30 Oct 2024 15:16:14 +0000 Subject: [PATCH 341/375] refactor: Added publishing statistics / comprehensiveness pages Refactored the templates, extended and completed url routes, and wrote view functions to added the comprehensiveness pages to the publishing statistics section. --- .../templates/comprehensiveness_base.html | 14 ++++----- .../templates/comprehensiveness_core.html | 2 +- dashboard/ui/urls.py | 5 +++- dashboard/ui/views.py | 29 +++++++++++++++++++ 4 files changed, 41 insertions(+), 9 deletions(-) diff --git a/dashboard/templates/comprehensiveness_base.html b/dashboard/templates/comprehensiveness_base.html index 2a35ed2fa6..9ab0d9cf07 100644 --- a/dashboard/templates/comprehensiveness_base.html +++ b/dashboard/templates/comprehensiveness_base.html @@ -1,5 +1,5 @@ {% extends 'base.html' %} -{% import '_partials/boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block container %} @@ -8,10 +8,10 @@ {% endblock %}
        @@ -31,7 +31,7 @@ {% block content %}
        - (This table as CSV) + (This table as CSV)

        {% block table_title %}Table of Comprehensiveness values{% endblock %}

        @@ -57,7 +57,7 @@

        {% block table_title %}Table of Comprehensiveness values

      {{ row.publisher_title }}{{ row.publisher_title }}{% if column_slug in row %} {{ row[column_slug+'_valid'] | round_nicely }} diff --git a/dashboard/templates/comprehensiveness_core.html b/dashboard/templates/comprehensiveness_core.html index e082052c81..0475e18d9c 100644 --- a/dashboard/templates/comprehensiveness_core.html +++ b/dashboard/templates/comprehensiveness_core.html @@ -5,7 +5,7 @@ {% block heading_detail %}

      Core elements are those that are mandatory in version 2.01 of the IATI Activity standard. The core elements are: Version, Reporting Organisation, IATI Identifier, Participating Organisation, Title, Description, Status, Activity Date, Sector, and Country or Region.

      -

      This table shows the percentage of current activities where the core elements are populated with valid data. (Values in parentheses indicate percentage of activities where elements are populated with any data.) The scoring for the Summary Stats page recognises the importance of the core by giving it double weighting in the overall comprehensiveness component.

      +

      This table shows the percentage of current activities where the core elements are populated with valid data. (Values in parentheses indicate percentage of activities where elements are populated with any data.) The scoring for the Summary Stats page recognises the importance of the core by giving it double weighting in the overall comprehensiveness component.

      Key:
      Dashes: Where a publisher has published to IATI in the past but whose portfolio contains no current activities. diff --git a/dashboard/ui/urls.py b/dashboard/ui/urls.py index ae6bb90e9e..d2f7c26320 100644 --- a/dashboard/ui/urls.py +++ b/dashboard/ui/urls.py @@ -64,7 +64,10 @@ # Publishing statistics pages. path('publishing-statistics/timeliness', lambda x: None, name="dash-publishingstats-timeliness"), path('publishing-statistics/forward-looking', lambda x: None, name="dash-publishingstats-forwardlooking"), - path('publishing-statistics/comprehensiveness', lambda x: None, name="dash-publishingstats-comprehensiveness"), + path('publishing-statistics/comprehensiveness', ui.views.pubstats_comprehensiveness, name="dash-publishingstats-comprehensiveness"), + path('publishing-statistics/comprehensiveness/core', ui.views.pubstats_comprehensiveness_core, name="dash-publishingstats-comprehensiveness-core"), + path('publishing-statistics/comprehensiveness/financials', ui.views.pubstats_comprehensiveness_financials, name="dash-publishingstats-comprehensiveness-financials"), + path('publishing-statistics/comprehensiveness/value-added', ui.views.pubstats_comprehensiveness_valueadded, name="dash-publishingstats-comprehensiveness-valueadded"), path('publishing-statistics/coverage', lambda x: None, name="dash-publishingstats-coverage"), path('publishing-statistics/summary-statistics', lambda x: None, name="dash-publishingstats-summarystats"), path('publishing-statistics/humanitarian-reporting', lambda x: None, name="dash-publishingstats-humanitarian"), diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py index c699b1ea86..aeff71abc5 100644 --- a/dashboard/ui/views.py +++ b/dashboard/ui/views.py @@ -13,6 +13,7 @@ from django.http import HttpResponse, Http404 from django.template import loader +import comprehensiveness import config import text import ui.template_funcs @@ -405,3 +406,31 @@ def exploringdata_dates(request): def exploringdata_traceability(request): template = loader.get_template("traceability.html") return HttpResponse(template.render(_make_context("traceability"), request)) + + +# +# Publishing statistics pages. +# +def pubstats_comprehensiveness(request): + template = loader.get_template("comprehensiveness.html") + context = _make_context("comprehensiveness") + context["comprehensiveness"] = comprehensiveness + return HttpResponse(template.render(context, request)) + +def pubstats_comprehensiveness_core(request): + template = loader.get_template("comprehensiveness_core.html") + context = _make_context("comprehensiveness") + context["comprehensiveness"] = comprehensiveness + return HttpResponse(template.render(context, request)) + +def pubstats_comprehensiveness_financials(request): + template = loader.get_template("comprehensiveness_financials.html") + context = _make_context("comprehensiveness") + context["comprehensiveness"] = comprehensiveness + return HttpResponse(template.render(context, request)) + +def pubstats_comprehensiveness_valueadded(request): + template = loader.get_template("comprehensiveness_valueadded.html") + context = _make_context("comprehensiveness") + context["comprehensiveness"] = comprehensiveness + return HttpResponse(template.render(context, request)) From 71fd284bcf3e32f78e54a1d30e469fdcc92693d0 Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Wed, 30 Oct 2024 17:33:49 +0000 Subject: [PATCH 342/375] refactor: Added publishing statistics / timeliness pages Refactored the templates, augmented and completed url routes and wrote view functions for the timeliness pages in the publishing statistics pages. --- dashboard/templates/timeliness.html | 6 +++--- dashboard/templates/timeliness_base.html | 6 +++--- dashboard/templates/timeliness_timelag.html | 6 +++--- dashboard/ui/urls.py | 3 ++- dashboard/ui/views.py | 13 +++++++++++++ 5 files changed, 24 insertions(+), 10 deletions(-) diff --git a/dashboard/templates/timeliness.html b/dashboard/templates/timeliness.html index 24c7e00e2d..8d130238fa 100644 --- a/dashboard/templates/timeliness.html +++ b/dashboard/templates/timeliness.html @@ -1,5 +1,5 @@ {% extends 'timeliness_base.html' %} -{% import '_partials/boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block frequency_li %} class="active"{% endblock %} @@ -7,7 +7,7 @@

      - (This table as CSV) + (This table as CSV)

      Table of Frequency assessments

      @@ -45,7 +45,7 @@

      Table of Frequency assessments

      {{ publisher_title }}{{ publisher_title }} {{ first_published_band }}{{ per_month[month] or 0 }}
      {{ publisher_title }}{{ publisher_title }}{{ per_month[month] or 0 }}
      {{ row.publisher_title }}{{ row.publisher_title }}
      {{ row.publisher_title }}{{ row.publisher_title }} {%- if column_slug == 'publisher_type' -%} diff --git a/dashboard/templates/summary_stats.html b/dashboard/templates/summary_stats.html index 21791d153e..e73b608b8b 100644 --- a/dashboard/templates/summary_stats.html +++ b/dashboard/templates/summary_stats.html @@ -1,5 +1,5 @@ {% extends 'base.html' %} -{% import '_partials/boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block content %}
      {{ row.publisher_title }}{{ row.publisher_title }}{% if column_slug == "publisher_type" %}{{ row[column_slug] }}{% else %}{{ row[column_slug] | round_nicely }}{% endif %} {% endfor %} @@ -55,10 +55,10 @@

      Narrative

      Timeliness

      -

      This is calculated by scoring the assessments made on the - frequency and timelag pages on a scale of +

      This is calculated by scoring the assessments made on the + frequency and timelag pages on a scale of 0 to 4 (as below), dividing the sum of the two scores by 8, and expressing the result as - a percentage. The methodology used in making the assesments is detailed on the frequency and timelag pages. + a percentage. The methodology used in making the assesments is detailed on the frequency and timelag pages.

      @@ -118,12 +118,12 @@

      Timeliness

      Forward looking

      The average percentage of current activities with budgets for each of the years {{ current_year }} - {{ current_year + 2 }}. - The component values and a detailed methodology are displayed on the forward looking page. + The component values and a detailed methodology are displayed on the forward looking page.

      Comprehensiveness

      -

      The average of comprehensiveness averages for core, financials and value-added. The core average has a double-weighting.

      +

      The average of comprehensiveness averages for core, financials and value-added. The core average has a double-weighting.

      Score

      diff --git a/dashboard/ui/urls.py b/dashboard/ui/urls.py index e1489c58bb..0f73ca9ca5 100644 --- a/dashboard/ui/urls.py +++ b/dashboard/ui/urls.py @@ -64,14 +64,14 @@ # Publishing statistics pages. path('publishing-statistics/timeliness', ui.views.pubstats_timeliness, name="dash-publishingstats-timeliness"), path('publishing-statistics/timeliness-timelag', ui.views.pubstats_timeliness_timelag, name="dash-publishingstats-timeliness-timelag"), - path('publishing-statistics/forward-looking', lambda x: None, name="dash-publishingstats-forwardlooking"), + path('publishing-statistics/forward-looking', ui.views.pubstats_forwardlooking, name="dash-publishingstats-forwardlooking"), path('publishing-statistics/comprehensiveness', ui.views.pubstats_comprehensiveness, name="dash-publishingstats-comprehensiveness"), path('publishing-statistics/comprehensiveness/core', ui.views.pubstats_comprehensiveness_core, name="dash-publishingstats-comprehensiveness-core"), path('publishing-statistics/comprehensiveness/financials', ui.views.pubstats_comprehensiveness_financials, name="dash-publishingstats-comprehensiveness-financials"), path('publishing-statistics/comprehensiveness/value-added', ui.views.pubstats_comprehensiveness_valueadded, name="dash-publishingstats-comprehensiveness-valueadded"), path('publishing-statistics/coverage', lambda x: None, name="dash-publishingstats-coverage"), - path('publishing-statistics/summary-statistics', lambda x: None, name="dash-publishingstats-summarystats"), - path('publishing-statistics/humanitarian-reporting', lambda x: None, name="dash-publishingstats-humanitarian"), + path('publishing-statistics/summary-statistics', ui.views.pubstats_summarystats, name="dash-publishingstats-summarystats"), + path('publishing-statistics/humanitarian-reporting', ui.views.pubstats_humanitarian, name="dash-publishingstats-humanitarian"), # Redirects to support any users with bookmarks to pages on the old Dashboard. # path('timeliness.html', redirect("dash-publishingstats-timeliness")), diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py index 49259a9d6d..0e948642d5 100644 --- a/dashboard/ui/views.py +++ b/dashboard/ui/views.py @@ -5,6 +5,7 @@ # so not sure where they should fit. I've not included them in the page_view_names so hopefully # an exception will be raised if they are referenced somewhere. +import datetime import dateutil.parser import subprocess import json @@ -15,8 +16,11 @@ import comprehensiveness import config +import forwardlooking +import humanitarian import text import timeliness +import summary_stats import ui.template_funcs import vars @@ -160,6 +164,7 @@ def _make_context(page_name: str): metadata=metadata, slugs=slugs, datetime_data=dateutil.parser.parse(metadata['created_at']).strftime('%-d %B %Y (at %H:%M %Z)'), + current_year=datetime.datetime.now(datetime.UTC).year, stats_url='https://stats.codeforiati.org', stats_gh_url=STATS_GH_URL, commit_hash=COMMIT_HASH, @@ -418,32 +423,58 @@ def pubstats_comprehensiveness(request): context["comprehensiveness"] = comprehensiveness return HttpResponse(template.render(context, request)) + def pubstats_comprehensiveness_core(request): template = loader.get_template("comprehensiveness_core.html") context = _make_context("comprehensiveness") context["comprehensiveness"] = comprehensiveness return HttpResponse(template.render(context, request)) + def pubstats_comprehensiveness_financials(request): template = loader.get_template("comprehensiveness_financials.html") context = _make_context("comprehensiveness") context["comprehensiveness"] = comprehensiveness return HttpResponse(template.render(context, request)) + def pubstats_comprehensiveness_valueadded(request): template = loader.get_template("comprehensiveness_valueadded.html") context = _make_context("comprehensiveness") context["comprehensiveness"] = comprehensiveness return HttpResponse(template.render(context, request)) + def pubstats_timeliness(request): template = loader.get_template("timeliness.html") context = _make_context("timeliness") context["timeliness"] = timeliness return HttpResponse(template.render(context, request)) + def pubstats_timeliness_timelag(request): template = loader.get_template("timeliness_timelag.html") context = _make_context("timeliness") context["timeliness"] = timeliness return HttpResponse(template.render(context, request)) + + +def pubstats_summarystats(request): + template = loader.get_template("summary_stats.html") + context = _make_context("summary_stats") + context["summary_stats"] = summary_stats + return HttpResponse(template.render(context, request)) + + +def pubstats_forwardlooking(request): + template = loader.get_template("forwardlooking.html") + context = _make_context("forwardlooking") + context["forwardlooking"] = forwardlooking + return HttpResponse(template.render(context, request)) + + +def pubstats_humanitarian(request): + template = loader.get_template("humanitarian.html") + context = _make_context("humanitarian") + context["humanitarian"] = humanitarian + return HttpResponse(template.render(context, request)) From be1b889e8b71eb8712bbd918284ffb7e7af47d99 Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Mon, 4 Nov 2024 18:07:33 +0000 Subject: [PATCH 344/375] refactor: Removed coverage page --- dashboard/templates/coverage.html | 47 ------------------------------- dashboard/text.py | 1 - 2 files changed, 48 deletions(-) delete mode 100644 dashboard/templates/coverage.html diff --git a/dashboard/templates/coverage.html b/dashboard/templates/coverage.html deleted file mode 100644 index bda3d541b4..0000000000 --- a/dashboard/templates/coverage.html +++ /dev/null @@ -1,47 +0,0 @@ -{% extends 'base.html' %} -{% import '_partials/boxes.html' as boxes %} -{% block content %} - - - -{% endblock %} - -{% block tablesorteroptions %} -{ - widgets: ['stickyHeaders'], - textExtraction: { 8: function(node,table,cellIndex) { return $(node).attr('data-severity'); } } -} -{% endblock %} diff --git a/dashboard/text.py b/dashboard/text.py index 39ee0ca37e..c1d2ecbd13 100644 --- a/dashboard/text.py +++ b/dashboard/text.py @@ -37,7 +37,6 @@ 'traceability': 'Traceability', 'org_ids': 'Organisation Identifiers', 'publishing_stats': 'Publishing Statistics', - 'coverage': 'Coverage', 'timeliness': 'Timeliness', 'forwardlooking': 'Forward Looking', 'comprehensiveness': 'Comprehensiveness', From 78e42dd70696a58df6ed0c905fb9fd9a9e65b970 Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Mon, 4 Nov 2024 18:08:30 +0000 Subject: [PATCH 345/375] refactor: Added registration agencies page Added the registration agencies page. Code for the view was taken from make_html.py. --- .../templates/registration_agencies.html | 8 +++--- dashboard/ui/urls.py | 7 +++-- dashboard/ui/views.py | 27 +++++++++++++++++++ 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/dashboard/templates/registration_agencies.html b/dashboard/templates/registration_agencies.html index 8f3fe14eed..67d0ee214e 100644 --- a/dashboard/templates/registration_agencies.html +++ b/dashboard/templates/registration_agencies.html @@ -1,5 +1,5 @@ {% extends 'base.html' %} -{% import '_partials/boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block content %}
      @@ -19,7 +19,7 @@

      -{% for registration_agency, count in sorted(registration_agencies.items()) %} +{% for registration_agency, count in func.sorted(registration_agencies.items()) %} @@ -51,8 +51,8 @@

      {% for publisher, count in publishers.items() %}

      - - + + {% endfor %} diff --git a/dashboard/ui/urls.py b/dashboard/ui/urls.py index 0f73ca9ca5..f10c68bb8f 100644 --- a/dashboard/ui/urls.py +++ b/dashboard/ui/urls.py @@ -16,7 +16,7 @@ """ from django.contrib import admin from django.urls import path -# from django.shortcuts import redirect +from django.views.generic.base import RedirectView import ui.views @@ -73,6 +73,10 @@ path('publishing-statistics/summary-statistics', ui.views.pubstats_summarystats, name="dash-publishingstats-summarystats"), path('publishing-statistics/humanitarian-reporting', ui.views.pubstats_humanitarian, name="dash-publishingstats-humanitarian"), + # Registration agencies. + path('registration-agencies', ui.views.registration_agencies, name="dash-registrationagencies"), + path("registration_agencies.html", RedirectView.as_view(pattern_name="dash-registrationagencies", permanent=True)) + # Redirects to support any users with bookmarks to pages on the old Dashboard. # path('timeliness.html', redirect("dash-publishingstats-timeliness")), # path('index.html', redirect("dash-index")), @@ -80,4 +84,3 @@ # path('exploring_data.html', redirect("dash-exploringdata")) ] -# Unsure where "rulesets" and "registration_agencies" should belong - can't find the route to these in make_html.py diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py index 0e948642d5..dc2e81b26f 100644 --- a/dashboard/ui/views.py +++ b/dashboard/ui/views.py @@ -87,6 +87,12 @@ def _get_licenses_for_publisher(publisher_name): for package in ckan[publisher_name].values()]) +def _registration_agency(orgid): + for code in codelist_sets['2']['OrganisationRegistrationAgency']: + if orgid.startswith(code): + return code + + def dictinvert(d): inv = collections.defaultdict(list) for k, v in d.items(): @@ -478,3 +484,24 @@ def pubstats_humanitarian(request): context = _make_context("humanitarian") context["humanitarian"] = humanitarian return HttpResponse(template.render(context, request)) + + +# +# Registration agencies page. +# +def registration_agencies(request): + template = loader.get_template("registration_agencies.html") + + context = _make_context("registration_agencies") + context["registration_agencies"] = collections.defaultdict(int) + context["registration_agencies_publishers"] = collections.defaultdict(list) + context["nonmatching"] = [] + for orgid, publishers in current_stats['inverted_publisher']['reporting_orgs'].items(): + reg_ag = _registration_agency(orgid) + if reg_ag: + context["registration_agencies"][reg_ag] += 1 + context["registration_agencies_publishers"][reg_ag] += list(publishers) + else: + context["nonmatching"].append((orgid, publishers)) + + return HttpResponse(template.render(context, request)) From a64ff901e99216ea43433056a965f83317779fe3 Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Tue, 5 Nov 2024 14:43:07 +0000 Subject: [PATCH 346/375] feat: Added redirects from previous site structure This commit adds a set of url routes that redirect from the old Dashboard to the new views. It also removes the url route for the coverage page. --- dashboard/ui/urls.py | 47 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 8 deletions(-) diff --git a/dashboard/ui/urls.py b/dashboard/ui/urls.py index f10c68bb8f..08426d668b 100644 --- a/dashboard/ui/urls.py +++ b/dashboard/ui/urls.py @@ -15,7 +15,7 @@ 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) """ from django.contrib import admin -from django.urls import path +from django.urls import path, re_path from django.views.generic.base import RedirectView import ui.views @@ -69,18 +69,49 @@ path('publishing-statistics/comprehensiveness/core', ui.views.pubstats_comprehensiveness_core, name="dash-publishingstats-comprehensiveness-core"), path('publishing-statistics/comprehensiveness/financials', ui.views.pubstats_comprehensiveness_financials, name="dash-publishingstats-comprehensiveness-financials"), path('publishing-statistics/comprehensiveness/value-added', ui.views.pubstats_comprehensiveness_valueadded, name="dash-publishingstats-comprehensiveness-valueadded"), - path('publishing-statistics/coverage', lambda x: None, name="dash-publishingstats-coverage"), path('publishing-statistics/summary-statistics', ui.views.pubstats_summarystats, name="dash-publishingstats-summarystats"), path('publishing-statistics/humanitarian-reporting', ui.views.pubstats_humanitarian, name="dash-publishingstats-humanitarian"), # Registration agencies. path('registration-agencies', ui.views.registration_agencies, name="dash-registrationagencies"), - path("registration_agencies.html", RedirectView.as_view(pattern_name="dash-registrationagencies", permanent=True)) + path("registration_agencies.html", RedirectView.as_view(pattern_name="dash-registrationagencies", permanent=True)), # Redirects to support any users with bookmarks to pages on the old Dashboard. - # path('timeliness.html', redirect("dash-publishingstats-timeliness")), - # path('index.html', redirect("dash-index")), - # path('summary_stats.html', redirect("dash-publishingstats-summarystats")), - # path('exploring_data.html', redirect("dash-exploringdata")) - + path("index.html", RedirectView.as_view(pattern_name="dash-index", permanent=True)), + path("headlines.html", RedirectView.as_view(pattern_name="dash-headlines", permanent=True)), + path("data_quality.html", RedirectView.as_view(pattern_name="dash-dataquality", permanent=True)), + path("exploring_data.html", RedirectView.as_view(pattern_name="dash-exploringdata-elements", permanent=True)), + path("publishers.html", RedirectView.as_view(pattern_name="dash-headlines-publishers", permanent=True)), + path("publishing_stats.html", RedirectView.as_view(pattern_name="dash-publishingstats", permanent=True)), + path("timeliness.html", RedirectView.as_view(pattern_name="dash-publishingstats-timeliness", permanent=True)), + path("timeliness_timelag.html", RedirectView.as_view(pattern_name="dash-publishingstats-timeliness-timelag", permanent=True)), + path("forwardlooking.html", RedirectView.as_view(pattern_name="dash-publishingstats-forwardlooking", permanent=True)), + path("comprehensiveness.html", RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness", permanent=True)), + path("comprehensiveness_core.html", RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness-core", permanent=True)), + path("comprehensiveness_financials.html", RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness-financials", permanent=True)), + path("comprehensiveness_valueadded.html", RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness-valueadded", permanent=True)), + path("summary_stats.html", RedirectView.as_view(pattern_name="dash-publishingstats-summarystats", permanent=True)), + path("humanitarian.html", RedirectView.as_view(pattern_name="dash-publishingstats-humanitarian", permanent=True)), + path("files.html", RedirectView.as_view(pattern_name="dash-headlines-files", permanent=True)), + path("activities.html", RedirectView.as_view(pattern_name="dash-headlines-activities", permanent=True)), + path("download.html", RedirectView.as_view(pattern_name="dash-dataquality-download", permanent=True)), + path("xml.html", RedirectView.as_view(pattern_name="dash-dataquality-xml", permanent=True)), + path("validation.html", RedirectView.as_view(pattern_name="dash-dataquality-validation", permanent=True)), + path("versions.html", RedirectView.as_view(pattern_name="dash-dataquality-versions", permanent=True)), + path("organisation.html", RedirectView.as_view(pattern_name="dash-dataquality-organisation", permanent=True)), + path("identifiers.html", RedirectView.as_view(pattern_name="dash-dataquality-identifiers", permanent=True)), + path("reporting_orgs.html", RedirectView.as_view(pattern_name="dash-dataquality-reportingorgs", permanent=True)), + path("elements.html", RedirectView.as_view(pattern_name="dash-exploringdata-elements", permanent=True)), + path("codelists.html", RedirectView.as_view(pattern_name="dash-exploringdata-codelists", permanent=True)), + path("booleans.html", RedirectView.as_view(pattern_name="dash-exploringdata-booleans", permanent=True)), + path("dates.html", RedirectView.as_view(pattern_name="dash-exploringdata-dates", permanent=True)), + path("traceability.html", RedirectView.as_view(pattern_name="dash-exploringdata-traceability", permanent=True)), + path("org_ids.html", RedirectView.as_view(pattern_name="dash-exploringdata-orgids", permanent=True)), + path("faq.html", RedirectView.as_view(pattern_name="dash-faq", permanent=True)), + path("licenses.html", RedirectView.as_view(pattern_name="dash-dataquality-licenses", permanent=True)), + re_path(r"license\/\S*.html", RedirectView.as_view(pattern_name="dash-dataquality-licenses", permanent=True)), + re_path(r"publisher\/\S*.html", RedirectView.as_view(pattern_name="dash-headlines-publishers", permanent=True)), + re_path(r"codelist\/\d\/\S*.html", RedirectView.as_view(pattern_name="dash-exploringdata-codelists", permanent=True)), + re_path(r"element\/\S*.html", RedirectView.as_view(pattern_name="dash-exploringdata-elements", permanent=True)), + re_path(r"org_type\/\S*.html", RedirectView.as_view(pattern_name="dash-exploringdata-orgids", permanent=True)), ] From c82b2288253d2b69853e101a8b877adda1d7ce21 Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Tue, 5 Nov 2024 16:14:50 +0000 Subject: [PATCH 347/375] testing: Added basic automated testing of URL routes Using the Django test framework this commit adds some basic automated testing of the Dashboard URL routes, contains some fixes to the view functions to properly return HTTP 404 errors, and updates the documentation (including a small fix for the file paths). A small fix was also added to pytest.ini. --- README.md | 21 ++++- dashboard/ui/tests.py | 194 ++++++++++++++++++++++++++++++++++++++++++ dashboard/ui/views.py | 16 +++- pytest.ini | 2 +- 4 files changed, 228 insertions(+), 5 deletions(-) create mode 100644 dashboard/ui/tests.py diff --git a/README.md b/README.md index 4592fe5625..9dfc217df5 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ The IATI Dashboard is mostly written in Python but also has some helper Bash scr 3. Build the static graphs and other data that will be served via the Dashboard. 4. Run the web server. -Paths to different directories are set in `./src/config.py`. +Paths to different directories are set in `./dashboard/config.py`. ### 1. Setup environment @@ -64,7 +64,7 @@ Bash scripts are used to fetch the data that the Dashboard will present. They w ``` mkdir out -cd src +cd dashboard python make_plots.py python make_csv.py python speakers_kit.py @@ -75,7 +75,7 @@ cp ../img/publishers static/ ### 4. Run the webserver. -From `./src/`: +From `./dashboard/`: ``` python manage.py runserver @@ -86,6 +86,21 @@ The Dashboard will now be accessible from `localhost:8000/`. ## Development +### Automated tests +There are some unit tests written using `pytest` and site testing using Django's own testing framework. + +Once the development dependencies have been installed the unit tests can be run with: + +``` +pytest +``` + +The Django site tests can be run from the `dashboard/` directory with: + +``` +python manage.py test -v 2 +``` + ### Calculating your own statistics The IATI Dashboard requires a `stats-calculated` directory, which can be downloaded using the `get_stats.sh` shell script as described above. This can also be calculated using [Code for IATI Stats](http://github.com/codeforIATI/IATI-Stats) where `stats-calculated` corresponds to the `gitout` directory generated by [`git.sh` in IATI-Stats](https://github.com/codeforIATI/IATI-Stats#running-for-every-commit-in-the-data-directory). diff --git a/dashboard/ui/tests.py b/dashboard/ui/tests.py new file mode 100644 index 0000000000..11d0970848 --- /dev/null +++ b/dashboard/ui/tests.py @@ -0,0 +1,194 @@ +from django.test import TestCase +from django.urls import reverse + + +class BasicPageTests(TestCase): + """Perform basic HTTP 200/404 checks on the Dashboard pages + + These are split up into a number of functions because some can + take some time to run and so running with the "-v 2" flag will + list the tests as they run. + """ + + def test_top_pages(self): + """Test the index and top hierarchy pages return a 200 status code """ + + self.assertEqual(self.client.get(reverse("dash-index")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-headlines")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-publishingstats")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-exploringdata")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-faq")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-registrationagencies")).status_code, 200) + + def test_headlines(self): + """Test the headlines pages """ + + self.assertEqual(self.client.get(reverse("dash-headlines-files")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-headlines-activities")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-headlines-publishers")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-headlines-publisher-detail", args=("undp", ))).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-headlines-publisher-detail", args=("not-a-valid-publisher", ))).status_code, 404) + + def test_dataquality(self): + """Test the data quality pages""" + + self.assertEqual(self.client.get(reverse("dash-dataquality-download")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-download-json")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-xml")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-validation")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-versions")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-organisation")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-identifiers")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-reportingorgs")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-licenses")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-licenses-detail", args=("cc-by", ))).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-licenses-detail", args=("not-a-valid-license", ))).status_code, 404) + + def test_publishingstats_timeliness(self): + """Test timeliness pages in the publishing statistics section""" + + self.assertEqual(self.client.get(reverse("dash-publishingstats-timeliness")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-publishingstats-timeliness-timelag")).status_code, 200) + + def test_publishingstats_comprehensiveness(self): + """Test comprehensiveness pages in the publishing statistics section""" + + self.assertEqual(self.client.get(reverse("dash-publishingstats-comprehensiveness")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-publishingstats-comprehensiveness-core")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-publishingstats-comprehensiveness-financials")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-publishingstats-comprehensiveness-valueadded")).status_code, 200) + + def test_publishingstats_forwardlooking(self): + """Test the forward looking page in the publishing statistics section""" + + self.assertEqual(self.client.get(reverse("dash-publishingstats-forwardlooking")).status_code, 200) + + def test_publishingstats_summarystats(self): + """Test the summary statistics page in the publishing statistics section""" + + self.assertEqual(self.client.get(reverse("dash-publishingstats-summarystats")).status_code, 200) + + def test_publishingstats_humanitarian(self): + """Test the humanitarian page in the publishing statistics section""" + + self.assertEqual(self.client.get(reverse("dash-publishingstats-humanitarian")).status_code, 200) + + def test_exploringdata(self): + """Test the exploring data pages""" + self.assertEqual(self.client.get(reverse("dash-exploringdata-booleans")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-exploringdata-codelists")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-exploringdata-codelists-detail", args=("2", "budget_@type", ))).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-exploringdata-codelists-detail", args=("2", "not-a-valid-slug", ))).status_code, 404) + self.assertEqual(self.client.get(reverse("dash-exploringdata-codelists-detail", args=("3", "budget_@type", ))).status_code, 404) + self.assertEqual(self.client.get(reverse("dash-exploringdata-dates")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-exploringdata-elements")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-exploringdata-elements-detail", args=("iati-activity_activity-date_narrative", ))).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-exploringdata-elements-detail", args=("not-a-valid-element", ))).status_code, 404) + self.assertEqual(self.client.get(reverse("dash-exploringdata-orgids")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-exploringdata-orgtypes-detail", args=("funding_org", ))).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-exploringdata-orgtypes-detail", args=("not-a-valid-org-type", ))).status_code, 404) + self.assertEqual(self.client.get(reverse("dash-exploringdata-traceability")).status_code, 200) + + +class OriginalDashboardRedirectTests(TestCase): + """Perform basic HTTP 301 redirection checks on the Dashboard pages + + These are split up into a number of functions because some can + take some time to run and so running with the "-v 2" flag will + list the tests as they run. + """ + + def _url_and_view_helper(self, urls_and_views_to_check): + """Checks that a set of URLs redirect to matching view functions""" + + for url, view_name in urls_and_views_to_check.items(): + self.assertRedirects(self.client.get(f"/{url}.html"), reverse(view_name), status_code=301) + + def test_headlines_and_misc(self): + """Test headlines and miscellaneous pages redirect to their new locations""" + + # This is not particularly DRY as a similar dictionary is created in views.py + # but I think this is minor as that may disappear from views.py in a future + # refactor of what goes into the context. + self._url_and_view_helper({ + "index": "dash-index", + "headlines": "dash-headlines", + "files": "dash-headlines-files", + "activities": "dash-headlines-activities", + "publishers": "dash-headlines-publishers", + "faq": "dash-faq", + "registration_agencies": "dash-registrationagencies" + }) + + def test_dataquality(self): + """Test data quality pages redirect to their new locations""" + + # This is not particularly DRY as a similar dictionary is created in views.py + # but I think this is minor as that may disappear from views.py in a future + # refactor of what goes into the context. + self._url_and_view_helper({ + "data_quality": "dash-dataquality", + "download": "dash-dataquality-download", + "xml": "dash-dataquality-xml", + "validation": "dash-dataquality-validation", + "versions": "dash-dataquality-versions", + "organisation": "dash-dataquality-organisation", + "identifiers": "dash-dataquality-identifiers", + "reporting_orgs": "dash-dataquality-reportingorgs", + "licenses": "dash-dataquality-licenses" + }) + + def test_publishingstats(self): + """Test publishing stats pages redirect to their new locations""" + + # This is not particularly DRY as a similar dictionary is created in views.py + # but I think this is minor as that may disappear from views.py in a future + # refactor of what goes into the context. + self._url_and_view_helper({ + "publishing_stats": "dash-publishingstats", + "timeliness": "dash-publishingstats-timeliness", + "timeliness_timelag": "dash-publishingstats-timeliness-timelag", + "forwardlooking": "dash-publishingstats-forwardlooking", + "comprehensiveness": "dash-publishingstats-comprehensiveness", + "comprehensiveness_core": "dash-publishingstats-comprehensiveness-core", + "comprehensiveness_financials": "dash-publishingstats-comprehensiveness-financials", + "comprehensiveness_valueadded": "dash-publishingstats-comprehensiveness-valueadded", + "summary_stats": "dash-publishingstats-summarystats", + "humanitarian": "dash-publishingstats-humanitarian" + }) + + def test_exploringdata(self): + """Test exploring data pages redirect to their new locations""" + + # This is not particularly DRY as a similar dictionary is created in views.py + # but I think this is minor as that may disappear from views.py in a future + # refactor of what goes into the context. + self._url_and_view_helper({ + "exploring_data": "dash-exploringdata-elements", + "elements": "dash-exploringdata-elements", + "codelists": "dash-exploringdata-codelists", + "booleans": "dash-exploringdata-booleans", + "dates": "dash-exploringdata-dates", + "traceability": "dash-exploringdata-traceability", + "org_ids": "dash-exploringdata-orgids" + }) + + def test_slug_page_redirects(self): + """Test pages with slugs redirect to the section page""" + + self.assertRedirects(self.client.get(r"/publisher/undp.html"), + reverse("dash-headlines-publishers"), + status_code=301) + self.assertRedirects(self.client.get(r"/license/cc-by.html"), + reverse("dash-dataquality-licenses"), + status_code=301) + self.assertRedirects(self.client.get(r"/codelist/2/budget_@type.html"), + reverse("dash-exploringdata-codelists"), + status_code=301) + self.assertRedirects(self.client.get(r"/element/iati-activity_activity-date_narrative.html"), + reverse("dash-exploringdata-elements"), + status_code=301) + self.assertRedirects(self.client.get(r"/org_type/funding_org.html"), + reverse("dash-exploringdata-orgids"), + status_code=301) diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py index dc2e81b26f..9a013a900b 100644 --- a/dashboard/ui/views.py +++ b/dashboard/ui/views.py @@ -321,6 +321,9 @@ def dataquality_licenses(request): def dataquality_licenses_detail(request, license_id=None): template = loader.get_template("license.html") + if license_id not in LICENSE_URLS: + raise Http404("Unknown license") + publishers = [ publisher_name for publisher_name, publisher in ckan.items() @@ -365,6 +368,10 @@ def exploringdata_elements(request): def exploringdata_element_detail(request, element=None): template = loader.get_template("element.html") context = _make_context("elements") + + if element not in slugs['element']['by_slug']: + raise Http404("Unknown element or attribute") + i = slugs['element']['by_slug'][element] context["element"] = list(current_stats['inverted_publisher']['elements'])[i] context["publishers"] = list(current_stats['inverted_publisher']['elements'].values())[i] @@ -378,7 +385,9 @@ def exploringdata_orgids(request): def exploringdata_orgtypes_detail(request, org_type=None): - assert org_type in slugs['org_type']['by_slug'] + if org_type not in slugs['org_type']['by_slug']: + raise Http404("Unknown organisation type") + template = loader.get_template("org_type.html") context = _make_context("org_ids") context["slug"] = org_type @@ -393,6 +402,11 @@ def exploringdata_codelists(request): def exploringdata_codelists_detail(request, major_version=None, attribute=None): template = loader.get_template("codelist.html") + if major_version not in slugs['codelist']: + raise Http404("Unknown major version of the IATI standard") + if attribute not in slugs['codelist'][major_version]['by_slug']: + raise Http404("Unknown attribute") + context = _make_context("codelists") i = slugs['codelist'][major_version]['by_slug'][attribute] element = list(current_stats['inverted_publisher']['codelist_values_by_major_version'][major_version])[i] diff --git a/pytest.ini b/pytest.ini index cebc5c7e27..20843432d9 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,4 @@ [pytest] -testpaths = src/tests +testpaths = dashboard/tests norecursedirs = *__pycache__* *.pytest_cache* console_output_style = count From a43e003bfef9363ad487081ee667f9c6dbc6ac0a Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Tue, 5 Nov 2024 16:32:10 +0000 Subject: [PATCH 348/375] refactor: Moved file path constants into ui settings module This commit moves the file path constants from config.py into the ui settings module so that it is more consistent with Django's settings architecture. --- dashboard/config.py | 14 +++++--------- dashboard/ui/settings.py | 8 ++++++++ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/dashboard/config.py b/dashboard/config.py index 0fd3c199fd..49570b2190 100644 --- a/dashboard/config.py +++ b/dashboard/config.py @@ -6,32 +6,28 @@ import os.path - -STATS_DIRECTORY = "../stats-calculated" -DATA_DIRECTORY = "../data" -BASE_DIRECTORY = "../" -OUT_DIRECTORY = "../out" +import ui.settings def join_stats_path(p: str) -> str: """Make a path to a file or directory within the downloaded stats directory """ - return os.path.join(STATS_DIRECTORY, p) + return os.path.join(ui.settings.DASHBOARD_STATS_DIRECTORY, p) def join_data_path(p: str) -> str: """Make a path to a file or directory within the downloaded data directory """ - return os.path.join(DATA_DIRECTORY, p) + return os.path.join(ui.settings.DASHBOARD_DATA_DIRECTORY, p) def join_base_path(p: str) -> str: """Make a path to a file or directory relative to the base of the dashboard directory """ - return os.path.join(BASE_DIRECTORY, p) + return os.path.join(ui.settings.DASHBOARD_BASE_DIRECTORY, p) def join_out_path(p: str) -> str: """Make a path to a file or directory relative to the base of the out directory """ - return os.path.join(OUT_DIRECTORY, p) + return os.path.join(ui.settings.DASHBOARD_OUT_DIRECTORY, p) diff --git a/dashboard/ui/settings.py b/dashboard/ui/settings.py index d8008da83b..c0461c3d1b 100644 --- a/dashboard/ui/settings.py +++ b/dashboard/ui/settings.py @@ -136,3 +136,11 @@ # https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' + +# +# Relative (to dashboard/) paths to IATI data and output directories. +# +DASHBOARD_STATS_DIRECTORY = "../stats-calculated" +DASHBOARD_DATA_DIRECTORY = "../data" +DASHBOARD_BASE_DIRECTORY = "../" +DASHBOARD_OUT_DIRECTORY = "../out" From 7b7893ecd631149b5b1acb0497551cb5b6057b4b Mon Sep 17 00:00:00 2001 From: Bee Webb Date: Wed, 6 Nov 2024 13:50:42 +0000 Subject: [PATCH 349/375] git.sh: Switch back to using git.sh for doing deploy From https://github.com/IATI/IATI-Dashboard/blob/923f65caac8b3032e516c93d36e4f9b6f0ff0653/git.sh --- .github/workflows/build.yml | 75 ------------------------------------- git.sh | 34 +++++++++++++++++ 2 files changed, 34 insertions(+), 75 deletions(-) delete mode 100644 .github/workflows/build.yml create mode 100755 git.sh diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml deleted file mode 100644 index 76d5e869da..0000000000 --- a/.github/workflows/build.yml +++ /dev/null @@ -1,75 +0,0 @@ -name: Build -on: - push: - branches: - - dev - workflow_dispatch: # for triggering builds manually - repository_dispatch: - types: - - build -jobs: - build: - runs-on: ubuntu-latest - steps: - - name: Checkout this repo - uses: actions/checkout@v3 - with: - persist-credentials: false - - name: Set up Python 3.7 - uses: actions/setup-python@v4 - with: - python-version: 3.7 - - uses: actions/cache@v3 - name: Cache dependencies - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-pip- - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - - name: Fetch data - run: ./fetch_data.sh - - name: Fetch stats - run: | - mkdir out - ./get_stats.sh - - name: Make plots - run: python plots.py - - name: Build CSV output - run: python make_csv.py - - name: Build HTML output - run: python make_html.py - - name: Delete files - run: rm -rf data stats-calculated - - name: Deploy (production) 🚀 - if: github.ref == 'refs/heads/main' - uses: JamesIves/github-pages-deploy-action@v4 - with: - git-config-name: Code for IATI bot - git-config-email: 57559326+codeforIATIbot@users.noreply.github.com - token: ${{ secrets.TOKEN }} - repository-name: codeforIATI/analytics-public - branch: gh-pages - folder: out - clean: true - silent: true - single-commit: true - - name: Set CNAME (dev) - if: github.ref == 'refs/heads/dev' - run: echo "analytics-dev.codeforiati.org" > out/CNAME - - name: Deploy (dev) 🚀 - if: github.ref == 'refs/heads/dev' - uses: JamesIves/github-pages-deploy-action@v4 - with: - git-config-name: Code for IATI bot - git-config-email: 57559326+codeforIATIbot@users.noreply.github.com - token: ${{ secrets.TOKEN }} - repository-name: codeforIATI/analytics-dev - branch: gh-pages - folder: out - clean: true - silent: true - single-commit: true diff --git a/git.sh b/git.sh new file mode 100755 index 0000000000..c7b6c780a5 --- /dev/null +++ b/git.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Starting Dashboard generation" + +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Removing 'out' directory and creating a new one" +rm -rf out +mkdir out + +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Fetching data" +./fetch_data.sh &> fetch_data.log || exit 1 + +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running plots.py" +python plots.py || exit 1 + +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running make_csv.py" +python make_csv.py || exit 1 + +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running speakers kit.py" +python speakers_kit.py || exit 1 + +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running make_html.py" +python make_html.py $1 $2|| exit 1 + +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Copying static elements" +cp static/img/favicon.png out/ +cp static/img/tablesorter-icons.gif out/ + +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Make a backup of the old web directory and make new content live" +rsync -a --delete web web.bk +mv web web.1 +mv out web +rm -rf web.1 + +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Dashboard generation complete" From 4511550737f075fe5d860fc16b59c08e9a441e20 Mon Sep 17 00:00:00 2001 From: Bee Webb Date: Wed, 6 Nov 2024 13:50:05 +0000 Subject: [PATCH 350/375] git.sh: Update for dynamic django deploy --- git.sh | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/git.sh b/git.sh index c7b6c780a5..c6c0e70178 100755 --- a/git.sh +++ b/git.sh @@ -9,8 +9,10 @@ mkdir out echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Fetching data" ./fetch_data.sh &> fetch_data.log || exit 1 +cd dashboard + echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running plots.py" -python plots.py || exit 1 +python make_plots.py || exit 1 echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running make_csv.py" python make_csv.py || exit 1 @@ -18,12 +20,7 @@ python make_csv.py || exit 1 echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running speakers kit.py" python speakers_kit.py || exit 1 -echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running make_html.py" -python make_html.py $1 $2|| exit 1 - -echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Copying static elements" -cp static/img/favicon.png out/ -cp static/img/tablesorter-icons.gif out/ +cd .. echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Make a backup of the old web directory and make new content live" rsync -a --delete web web.bk From 2fd2bcb96f4dca2fb60272b350755a88f434feee Mon Sep 17 00:00:00 2001 From: Bee Webb Date: Tue, 29 Oct 2024 17:30:10 +0000 Subject: [PATCH 351/375] Add Dockerfile This is taken from https://github.com/IATI/deploy/blob/9e9d3009382517f329e62912bbabd0ab57bc3881/salt/salt/dashboard-merged/dashboard.Dockerfile --- Dockerfile | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000..623da6f290 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.12-bookworm + +WORKDIR /work + +COPY requirements.txt /work/IATI-Dashboard/requirements.txt + +RUN git config --global --add safe.directory /work/IATI-Stats/data + +RUN pip install -r /work/IATI-Dashboard/requirements.txt + +# 2024-03-20: Emergency fix +# We were seeing cert errors inside the docker container after a new Lets Encrypt was issued. +# +# We know there are changes coming about root certificates and the error may be caused by that: +# https://blog.cloudflare.com/upcoming-lets-encrypt-certificate-chain-change-and-impact-for-cloudflare-customers +# +# I tried installing the LE root cert's manually but that didn't work. +# As live is broken for now we need this emergency fix, but we should remove it in the future. +RUN echo "check_certificate=off" > /root/.wgetrc From e23f20d287acb0ea11a0ac2656a8b6efaade2fb6 Mon Sep 17 00:00:00 2001 From: Bee Webb Date: Wed, 6 Nov 2024 15:45:47 +0000 Subject: [PATCH 352/375] Dockerfile: run dynamic django agpp using gunicorn --- .dockerignore | 2 ++ Dockerfile | 6 +++++- requirements.in | 3 ++- requirements.txt | 8 ++++++-- requirements_dev.txt | 3 +++ 5 files changed, 18 insertions(+), 4 deletions(-) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000..7a451343fe --- /dev/null +++ b/.dockerignore @@ -0,0 +1,2 @@ +data +out diff --git a/Dockerfile b/Dockerfile index 623da6f290..fc8b288d98 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ FROM python:3.12-bookworm -WORKDIR /work +WORKDIR /work/IATI-Dashboard/dashboard COPY requirements.txt /work/IATI-Dashboard/requirements.txt @@ -8,6 +8,8 @@ RUN git config --global --add safe.directory /work/IATI-Stats/data RUN pip install -r /work/IATI-Dashboard/requirements.txt +COPY . /work/IATI-Dashboard + # 2024-03-20: Emergency fix # We were seeing cert errors inside the docker container after a new Lets Encrypt was issued. # @@ -17,3 +19,5 @@ RUN pip install -r /work/IATI-Dashboard/requirements.txt # I tried installing the LE root cert's manually but that didn't work. # As live is broken for now we need this emergency fix, but we should remove it in the future. RUN echo "check_certificate=off" > /root/.wgetrc + +CMD ["gunicorn", "--bind", "0.0.0.0:8000", "--timeout", "120", "ui.wsgi:application"] diff --git a/requirements.in b/requirements.in index 523b2f0a4e..488212fa86 100644 --- a/requirements.in +++ b/requirements.in @@ -1,4 +1,5 @@ django +gunicorn flask frozen-flask jinja2 @@ -11,4 +12,4 @@ lxml requests markupsafe itsdangerous -tqdm \ No newline at end of file +tqdm diff --git a/requirements.txt b/requirements.txt index afde658def..cb1e9ed6fc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with Python 3.12 # by the following command: # -# pip-compile requirements.in +# pip-compile # asgiref==3.8.1 # via django @@ -30,6 +30,8 @@ fonttools==4.54.1 # via matplotlib frozen-flask==1.0.2 # via -r requirements.in +gunicorn==23.0.0 + # via -r requirements.in idna==3.10 # via requests itsdangerous==2.2.0 @@ -56,7 +58,9 @@ numpy==2.1.1 # contourpy # matplotlib packaging==24.1 - # via matplotlib + # via + # gunicorn + # matplotlib pillow==10.4.0 # via matplotlib pyparsing==3.1.4 diff --git a/requirements_dev.txt b/requirements_dev.txt index 480e11b876..32fbe21f09 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -58,6 +58,8 @@ fonttools==4.54.1 # matplotlib frozen-flask==1.0.2 # via -r requirements.txt +gunicorn==23.0.0 + # via -r requirements.txt idna==3.10 # via # -r requirements.txt @@ -95,6 +97,7 @@ numpy==2.1.1 packaging==24.1 # via # -r requirements.txt + # gunicorn # matplotlib # pytest pillow==10.4.0 From bbdd8a51afac6d86b5001e00a2569d292721d46c Mon Sep 17 00:00:00 2001 From: Chris Arridge Date: Wed, 6 Nov 2024 23:17:55 +0000 Subject: [PATCH 353/375] refactor: Moved file path functions into a more sensibly named module After moving configuration constants into settings.py the config.py module was somewhat poorly named, this commit changes the name and refactors the other code to use the new name. --- dashboard/common.py | 4 +-- dashboard/coverage.py | 4 +-- dashboard/data.py | 46 +++++++++++++-------------- dashboard/{config.py => filepaths.py} | 0 dashboard/make_csv.py | 24 +++++++------- dashboard/make_plots.py | 20 ++++++------ dashboard/speakers_kit.py | 14 ++++---- dashboard/timeliness.py | 8 ++--- dashboard/ui/views.py | 13 +++----- 9 files changed, 64 insertions(+), 69 deletions(-) rename dashboard/{config.py => filepaths.py} (100%) diff --git a/dashboard/common.py b/dashboard/common.py index a7922798b4..41f773b8c8 100644 --- a/dashboard/common.py +++ b/dashboard/common.py @@ -2,10 +2,10 @@ import data import json -import config +import filepaths # Import organisation_type_codelist as a global, then delete when used to save memory -with open(config.join_data_path('IATI-Codelists-2/out/clv2/json/en/OrganisationType.json')) as fh: +with open(filepaths.join_data_path('IATI-Codelists-2/out/clv2/json/en/OrganisationType.json')) as fh: organisation_type_codelist = json.load(fh) organisation_type_dict = {c['code']: c['name'] for c in organisation_type_codelist['data']} del organisation_type_codelist diff --git a/dashboard/coverage.py b/dashboard/coverage.py index 35ae7549e9..58885685ab 100644 --- a/dashboard/coverage.py +++ b/dashboard/coverage.py @@ -6,7 +6,7 @@ from data import publisher_name from data import publishers_ordered_by_title from data import secondary_publishers -import config +import filepaths def is_number(s): @@ -180,7 +180,7 @@ def table(): # Compile a list of Development finance institutions (DFIs) -with open(config.join_base_path('dfi_publishers.csv'), 'r') as csv_file: +with open(filepaths.join_base_path('dfi_publishers.csv'), 'r') as csv_file: reader = csv.reader(csv_file, delimiter=',') dfi_publishers = [] for line in reader: diff --git a/dashboard/data.py b/dashboard/data.py index ba59673127..01edc56ffa 100644 --- a/dashboard/data.py +++ b/dashboard/data.py @@ -8,7 +8,7 @@ import xmlschema -import config +import filepaths # Modified from: @@ -120,7 +120,7 @@ def get_publisher_name(self): # Loop over this list and return the publisher name if it is found within the historic list of publishers for x in path_components: - if x in JSONDir(config.join_stats_path('current/aggregated-publisher')).keys(): + if x in JSONDir(filepaths.join_stats_path('current/aggregated-publisher')).keys(): return x # If got to the end of the loop and nothing found, this folder does not relate to a single publisher @@ -133,7 +133,7 @@ def get_publisher_stats(publisher, stats_type='aggregated'): is not found. """ try: - return JSONDir(config.join_stats_path('current/{0}-publisher/{1}'.format(stats_type, publisher))) + return JSONDir(filepaths.join_stats_path('current/{0}-publisher/{1}'.format(stats_type, publisher))) except IOError: return {} @@ -145,7 +145,7 @@ def get_registry_id_matches(): """ # Load registry IDs for publishers who have changed their registry ID - with open(config.join_base_path('registry_id_relationships.csv')) as f: + with open(filepaths.join_base_path('registry_id_relationships.csv')) as f: reader = csv.DictReader(f) # Load this data into a dictonary registry_matches = { @@ -184,33 +184,33 @@ def deep_merge(obj1, obj2): current_stats = { - 'aggregated': JSONDir(config.join_stats_path('current/aggregated')), - 'aggregated_file': JSONDir(config.join_stats_path('current/aggregated-file')), - 'inverted_publisher': JSONDir(config.join_stats_path('current/inverted-publisher')), - 'inverted_file': JSONDir(config.join_stats_path('current/inverted-file')), - 'inverted_file_publisher': JSONDir(config.join_stats_path('current/inverted-file-publisher')), + 'aggregated': JSONDir(filepaths.join_stats_path('current/aggregated')), + 'aggregated_file': JSONDir(filepaths.join_stats_path('current/aggregated-file')), + 'inverted_publisher': JSONDir(filepaths.join_stats_path('current/inverted-publisher')), + 'inverted_file': JSONDir(filepaths.join_stats_path('current/inverted-file')), + 'inverted_file_publisher': JSONDir(filepaths.join_stats_path('current/inverted-file-publisher')), 'download_errors': [] } -ckan_publishers = JSONDir(config.join_data_path('ckan_publishers')) -github_issues = JSONDir(config.join_data_path('github/publishers')) -ckan = json.load(open(config.join_stats_path('ckan.json')), object_pairs_hook=OrderedDict) +ckan_publishers = JSONDir(filepaths.join_data_path('ckan_publishers')) +github_issues = JSONDir(filepaths.join_data_path('github/publishers')) +ckan = json.load(open(filepaths.join_stats_path('ckan.json')), object_pairs_hook=OrderedDict) dataset_to_publisher_dict = { dataset: publisher for publisher, publisher_dict in ckan.items() for dataset in publisher_dict.keys() } -metadata = json.load(open(config.join_stats_path('metadata.json')), object_pairs_hook=OrderedDict) -with open(config.join_data_path('downloads/errors')) as fp: +metadata = json.load(open(filepaths.join_stats_path('metadata.json')), object_pairs_hook=OrderedDict) +with open(filepaths.join_data_path('downloads/errors')) as fp: for line in fp: if line != '.\n': current_stats['download_errors'].append(line.strip('\n').split(' ', 3)) sources105 = [ - config.join_data_path('schemas/1.05/iati-activities-schema.xsd'), - config.join_data_path('schemas/1.05/iati-organisations-schema.xsd')] + filepaths.join_data_path('schemas/1.05/iati-activities-schema.xsd'), + filepaths.join_data_path('schemas/1.05/iati-organisations-schema.xsd')] sources203 = [ - config.join_data_path('schemas/2.03/iati-activities-schema.xsd'), - config.join_data_path('schemas/2.03/iati-organisations-schema.xsd')] + filepaths.join_data_path('schemas/2.03/iati-activities-schema.xsd'), + filepaths.join_data_path('schemas/2.03/iati-organisations-schema.xsd')] schema105 = xmlschema.XMLSchema(sources105) schema203 = xmlschema.XMLSchema(sources203) @@ -246,7 +246,7 @@ def transform_codelist_mapping_keys(codelist_mapping): def create_codelist_mapping(major_version): codelist_mapping = {} - for x in json.load(open(config.join_data_path('IATI-Codelists-{}/out/clv2/mapping.json'.format(major_version)))): + for x in json.load(open(filepaths.join_data_path('IATI-Codelists-{}/out/clv2/mapping.json'.format(major_version)))): if 'condition' in x: pref, attr = x['path'].rsplit('/', 1) path = '{0}[{1}]/{2}'.format( @@ -264,12 +264,12 @@ def create_codelist_mapping(major_version): # Create a big dictionary of all codelist values by version and codelist name codelist_sets = { major_version: { - cname: set(c['code'] for c in codelist['data']) for cname, codelist in JSONDir(config.join_data_path('IATI-Codelists-{}/out/clv2/json/en/'.format(major_version))).items() + cname: set(c['code'] for c in codelist['data']) for cname, codelist in JSONDir(filepaths.join_data_path('IATI-Codelists-{}/out/clv2/json/en/'.format(major_version))).items() } for major_version in MAJOR_VERSIONS} codelist_lookup = { major_version: { - cname: {c['code']: c for c in codelist['data']} for cname, codelist in JSONDir(config.join_data_path('IATI-Codelists-{}/out/clv2/json/en/'.format(major_version))).items() + cname: {c['code']: c for c in codelist['data']} for cname, codelist in JSONDir(filepaths.join_data_path('IATI-Codelists-{}/out/clv2/json/en/'.format(major_version))).items() } for major_version in MAJOR_VERSIONS} # Simple look up to map publisher id to a publishers given name (title) @@ -279,11 +279,11 @@ def create_codelist_mapping(major_version): publishers_ordered_by_title.sort(key=lambda x: (x[0]).lower()) # List of publishers who report all their activities as a secondary publisher -secondary_publishers = [publisher for publisher, stats in JSONDir(config.join_stats_path('current/aggregated-publisher')).items() +secondary_publishers = [publisher for publisher, stats in JSONDir(filepaths.join_stats_path('current/aggregated-publisher')).items() if int(stats['activities']) == len(stats['activities_secondary_reported']) and int(stats['activities']) > 0] try: - dac2012 = {x[0]: Decimal(x[1].replace(',', '')) for x in csv.reader(open(config.join_data_path('dac2012.csv')))} + dac2012 = {x[0]: Decimal(x[1].replace(',', '')) for x in csv.reader(open(filepaths.join_data_path('dac2012.csv')))} except IOError: dac2012 = {} diff --git a/dashboard/config.py b/dashboard/filepaths.py similarity index 100% rename from dashboard/config.py rename to dashboard/filepaths.py diff --git a/dashboard/make_csv.py b/dashboard/make_csv.py index f34e2f9e3c..74e076efec 100644 --- a/dashboard/make_csv.py +++ b/dashboard/make_csv.py @@ -12,7 +12,7 @@ import comprehensiveness import summary_stats import humanitarian -import config +import filepaths logger = logging.getLogger(__name__) @@ -51,10 +51,10 @@ def main(): logger.addHandler(logging.StreamHandler(sys.stdout)) logger.info("Generating CSV files") - os.makedirs(config.join_out_path('data/csv'), exist_ok=True) + os.makedirs(filepaths.join_out_path('data/csv'), exist_ok=True) logger.info("Generating publishers.csv") - with open(config.join_out_path('data/csv/publishers.csv'), 'w') as fp: + with open(filepaths.join_out_path('data/csv/publishers.csv'), 'w') as fp: writer = csv.DictWriter(fp, [ 'Publisher Name', 'Publisher Registry Id', @@ -76,7 +76,7 @@ def main(): logger.info("Generating elements.csv") publishers = list(data.current_stats['inverted_publisher']['activities'].keys()) - with open(config.join_out_path('data/csv/elements.csv'), 'w') as fp: + with open(filepaths.join_out_path('data/csv/elements.csv'), 'w') as fp: writer = csv.DictWriter(fp, ['Element'] + publishers) writer.writeheader() for element, publisher_dict in data.current_stats['inverted_publisher']['elements'].items(): @@ -84,7 +84,7 @@ def main(): writer.writerow(publisher_dict) logger.info("Generating elements_total.csv") - with open(config.join_out_path('data/csv/elements_total.csv'), 'w') as fp: + with open(filepaths.join_out_path('data/csv/elements_total.csv'), 'w') as fp: writer = csv.DictWriter(fp, ['Element'] + publishers) writer.writeheader() for element, publisher_dict in data.current_stats['inverted_publisher']['elements_total'].items(): @@ -92,7 +92,7 @@ def main(): writer.writerow(publisher_dict) logger.info("Generating registry.csv") - with open(config.join_out_path('data/csv/registry.csv'), 'w') as fp: + with open(filepaths.join_out_path('data/csv/registry.csv'), 'w') as fp: keys = ['name', 'title', 'publisher_frequency', 'publisher_frequency_select', 'publisher_implementation_schedule', 'publisher_ui', 'publisher_field_exclusions', 'publisher_contact', 'image_url', 'display_name', 'publisher_iati_id', 'publisher_units', 'publisher_record_exclusions', 'publisher_data_quality', 'publisher_country', 'publisher_description', 'publisher_refs', 'publisher_thresholds' 'publisher_agencies', 'publisher_constraints', 'publisher_organization_type', 'publisher_segmentation', 'license_id', 'state', 'publisher_timeliness'] writer = csv.DictWriter(fp, keys) writer.writeheader() @@ -101,21 +101,21 @@ def main(): logger.info("Generating timeliness_frequency.csv") previous_months = timeliness.previous_months_reversed - with open(config.join_out_path('data/csv/timeliness_frequency.csv'), 'w') as fp: + with open(filepaths.join_out_path('data/csv/timeliness_frequency.csv'), 'w') as fp: writer = csv.writer(fp) writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + ['Frequency', 'First published']) for publisher, publisher_title, per_month, assessment, hft, first_published_band in timeliness.publisher_frequency_sorted(): writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment, first_published_band]) logger.info("Generating timeliness_timelag.csv") - with open(config.join_out_path('data/csv/timeliness_timelag.csv'), 'w') as fp: + with open(filepaths.join_out_path('data/csv/timeliness_timelag.csv'), 'w') as fp: writer = csv.writer(fp) writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + ['Time lag']) for publisher, publisher_title, per_month, assessment, hft in timeliness.publisher_timelag_sorted(): writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment]) logger.info("Generating forwardlooking.csv") - with open(config.join_out_path('data/csv/forwardlooking.csv'), 'w') as fp: + with open(filepaths.join_out_path('data/csv/forwardlooking.csv'), 'w') as fp: writer = csv.writer(fp) writer.writerow(['Publisher Name', 'Publisher Registry Id'] + ['{} ({})'.format(header, year) for header in forwardlooking.column_headers for year in forwardlooking.years]) for row in forwardlooking.table(): @@ -123,7 +123,7 @@ def main(): for tab in comprehensiveness.columns.keys(): logger.info("Generating comprehensiveness_{}.csv".format(tab)) - with open(config.join_out_path('data/csv/comprehensiveness_{}.csv'.format(tab)), 'w') as fp: + with open(filepaths.join_out_path('data/csv/comprehensiveness_{}.csv'.format(tab)), 'w') as fp: writer = csv.writer(fp) if tab == 'financials': writer.writerow(['Publisher Name', 'Publisher Registry Id'] + @@ -145,7 +145,7 @@ def main(): [row[slug] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab]]) logger.info("Generating summary_stats.csv") - with open(config.join_out_path('data/csv/summary_stats.csv'), 'w') as fp: + with open(filepaths.join_out_path('data/csv/summary_stats.csv'), 'w') as fp: writer = csv.writer(fp) # Add column headers writer.writerow(['Publisher Name', 'Publisher Registry Id'] + [header for slug, header in summary_stats.columns]) @@ -154,7 +154,7 @@ def main(): writer.writerow([row['publisher_title'], row['publisher']] + [row[slug] for slug, header in summary_stats.columns]) logger.info("Generating humanitarian.csv") - with open(config.join_out_path('data/csv/humanitarian.csv'), 'w') as fp: + with open(filepaths.join_out_path('data/csv/humanitarian.csv'), 'w') as fp: writer = csv.writer(fp) # Add column headers writer.writerow([ diff --git a/dashboard/make_plots.py b/dashboard/make_plots.py index 70d2665974..cdbe27640d 100644 --- a/dashboard/make_plots.py +++ b/dashboard/make_plots.py @@ -13,7 +13,7 @@ from tqdm import tqdm import common import data -import config +import filepaths from vars import expected_versions # noqa: F401 import matplotlib as mpl mpl.use('Agg') @@ -96,7 +96,7 @@ def make_plot(stat_path, git_stats, img_prefix=''): else: fig_legend.legend(plots.values(), plots.keys(), loc='center', ncol=4) fig_legend.set_size_inches(600.0 / dpi, 100.0 / dpi) - fig_legend.savefig(config.join_out_path('{0}{1}{2}_legend.png'.format(img_prefix, stat_name, stat_path[2]))) + fig_legend.savefig(filepaths.join_out_path('{0}{1}{2}_legend.png'.format(img_prefix, stat_name, stat_path[2]))) else: keys = None ax.plot(x_values, y_values) @@ -123,10 +123,10 @@ def make_plot(stat_path, git_stats, img_prefix=''): ax.ticklabel_format(axis='y', style='plain', useOffset=False) - fig.savefig(config.join_out_path('{0}{1}{2}.png'.format(img_prefix, stat_name, stat_path[2] if type(stat_path) is tuple else '')), dpi=dpi) + fig.savefig(filepaths.join_out_path('{0}{1}{2}.png'.format(img_prefix, stat_name, stat_path[2] if type(stat_path) is tuple else '')), dpi=dpi) plt.close('all') - fn = config.join_out_path('{0}{1}.csv'.format(img_prefix, stat_name)) + fn = filepaths.join_out_path('{0}{1}.csv'.format(img_prefix, stat_name)) with open(fn, 'w') as fp: writer = csv.writer(fp) if keys: @@ -148,15 +148,15 @@ def main(): args = parser.parse_args() # Load data required for loading stats. - failed_downloads = csv.reader(open(config.join_data_path('downloads/history.csv'))) - gitaggregate_publisher = data.JSONDir(config.join_stats_path('gitaggregate-publisher-dated')) + failed_downloads = csv.reader(open(filepaths.join_data_path('downloads/history.csv'))) + gitaggregate_publisher = data.JSONDir(filepaths.join_stats_path('gitaggregate-publisher-dated')) # Generate plots for aggregated stats for all data. logger.info("Generating plots for all aggregated data") - git_stats = AugmentedJSONDir(config.join_stats_path('gitaggregate-dated'), + git_stats = AugmentedJSONDir(filepaths.join_stats_path('gitaggregate-dated'), failed_downloads, gitaggregate_publisher) - os.makedirs(config.join_out_path('img/aggregate'), exist_ok=True) + os.makedirs(filepaths.join_out_path('img/aggregate'), exist_ok=True) _paths = [ 'activities', @@ -192,10 +192,10 @@ def main(): # Generate plots for each publisher. logger.info("Generating plots for all publishers") - git_stats_publishers = AugmentedJSONDir(config.join_stats_path('gitaggregate-publisher-dated/'), + git_stats_publishers = AugmentedJSONDir(filepaths.join_stats_path('gitaggregate-publisher-dated/'), failed_downloads, gitaggregate_publisher) - os.makedirs(config.join_out_path('img/publishers'), exist_ok=True) + os.makedirs(filepaths.join_out_path('img/publishers'), exist_ok=True) with tqdm(total=len(git_stats_publishers)) as pbar: if args.verbose: diff --git a/dashboard/speakers_kit.py b/dashboard/speakers_kit.py index b6e517d5f5..0fb9b4bde2 100644 --- a/dashboard/speakers_kit.py +++ b/dashboard/speakers_kit.py @@ -4,7 +4,7 @@ from collections import defaultdict from itertools import zip_longest -import config +import filepaths def codelist_dict(codelist_path): @@ -12,11 +12,11 @@ def codelist_dict(codelist_path): return {c['code']: c['name'] for c in codelist_json['data']} -organisation_type_dict = codelist_dict(config.join_data_path('IATI-Codelists-2/out/clv2/json/en/OrganisationType.json')) -country_dict = codelist_dict(config.join_data_path('IATI-Codelists-2/out/clv2/json/en/Country.json')) -region_dict = codelist_dict(config.join_data_path('IATI-Codelists-2/out/clv2/json/en/Region.json')) +organisation_type_dict = codelist_dict(filepaths.join_data_path('IATI-Codelists-2/out/clv2/json/en/OrganisationType.json')) +country_dict = codelist_dict(filepaths.join_data_path('IATI-Codelists-2/out/clv2/json/en/Country.json')) +region_dict = codelist_dict(filepaths.join_data_path('IATI-Codelists-2/out/clv2/json/en/Region.json')) -aggregated_publisher = data.JSONDir(config.join_stats_path('current/aggregated-publisher/')) +aggregated_publisher = data.JSONDir(filepaths.join_stats_path('current/aggregated-publisher/')) activities_by = defaultdict(lambda: defaultdict(int)) publishers_by = defaultdict(lambda: defaultdict(int)) @@ -48,7 +48,7 @@ def codelist_dict(codelist_path): fieldnames = ['publisher_type', 'publishers_by_type', '', 'publisher_country', 'publishers_by_country', '', 'date', 'publishers_quarterly', '', 'activity_country', 'activities_by_country', '', 'activity_region', 'activities_by_region'] publishers_quarterly = [] -publishers_by_date = json.load(open(config.join_stats_path('gitaggregate-dated/publishers.json'))) +publishers_by_date = json.load(open(filepaths.join_stats_path('gitaggregate-dated/publishers.json'))) for date, publishers in sorted(publishers_by_date.items()): if (date[8:10] == '30' and date[5:7] in ['06', '09']) or (date[8:10] == '31' and date[5:7] in ['03', '12']): publishers_quarterly.append((date, publishers)) @@ -58,7 +58,7 @@ def sort_second(x): return sorted(x, key=lambda y: y[1], reverse=True) -with open(config.join_out_path('speakers_kit.csv'), 'w') as fp: +with open(filepaths.join_out_path('speakers_kit.csv'), 'w') as fp: writer = csv.DictWriter(fp, fieldnames) writer.writeheader() for publishers_by_type, publishers_by_country, publishers_quarterly_, activities_by_country, activities_by_region in zip_longest( diff --git a/dashboard/timeliness.py b/dashboard/timeliness.py index 96f8f00791..d1074ceca7 100644 --- a/dashboard/timeliness.py +++ b/dashboard/timeliness.py @@ -4,7 +4,7 @@ import datetime from dateutil.relativedelta import relativedelta from collections import defaultdict, Counter -import config +import filepaths def short_month(month_str): @@ -59,7 +59,7 @@ def publisher_frequency(): """ # Load all the data from 'gitaggregate-publisher-dated' into memory - gitaggregate_publisher = JSONDir(config.join_stats_path('gitaggregate-publisher-dated')) + gitaggregate_publisher = JSONDir(filepaths.join_stats_path('gitaggregate-publisher-dated')) # Loop over each publisher - i.e. a publisher folder within 'gitaggregate-publisher-dated' for publisher, agg in gitaggregate_publisher.items(): @@ -172,7 +172,7 @@ def first_published_band_index(first_published_band): def publisher_timelag(): - return [(publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag'], has_future_transactions(publisher)) for publisher, agg in JSONDir(config.join_stats_path('current/aggregated-publisher')).items()] + return [(publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag'], has_future_transactions(publisher)) for publisher, agg in JSONDir(filepaths.join_stats_path('current/aggregated-publisher')).items()] def publisher_timelag_sorted(): @@ -205,7 +205,7 @@ def has_future_transactions(publisher): if transaction_date and transaction_date > datetime.date.today(): return 2 - gitaggregate_publisher = JSONDir(config.join_stats_path('gitaggregate-publisher-dated')).get(publisher, {}) + gitaggregate_publisher = JSONDir(filepaths.join_stats_path('gitaggregate-publisher-dated')).get(publisher, {}) mindate = datetime.date(today.year - 1, today.month, 1) for date, activity_blacklist in gitaggregate_publisher.get('activities_with_future_transactions', {}).items(): if parse_iso_date(date) >= mindate and activity_blacklist: diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py index 9a013a900b..7cc3dd7eec 100644 --- a/dashboard/ui/views.py +++ b/dashboard/ui/views.py @@ -1,10 +1,5 @@ """Views for the IATI Dashboard""" -# Note: in the page views I am unsure where "rulesets" and "registration_agencies" should -# belong - they exist in text.page_tiles but I can't find the route to these in make_html.py -# so not sure where they should fit. I've not included them in the page_view_names so hopefully -# an exception will be raised if they are referenced somewhere. - import datetime import dateutil.parser import subprocess @@ -15,7 +10,7 @@ from django.template import loader import comprehensiveness -import config +import filepaths import forwardlooking import humanitarian import text @@ -43,15 +38,15 @@ COMMIT_HASH = subprocess.run('git show --format=%H --no-patch'.split(), - cwd=config.join_base_path(""), + cwd=filepaths.join_base_path(""), capture_output=True).stdout.decode().strip() STATS_COMMIT_HASH = subprocess.run('git -C stats-calculated show --format=%H --no-patch'.split(), - cwd=config.join_base_path(""), + cwd=filepaths.join_base_path(""), capture_output=True).stdout.decode().strip() STATS_GH_URL = 'https://github.com/codeforIATI/IATI-Stats-public/tree/' + STATS_COMMIT_HASH # Load all the licenses and generate data for each licence and publisher. -with open(config.join_stats_path('licenses.json')) as handler: +with open(filepaths.join_stats_path('licenses.json')) as handler: LICENSE_URLS = json.load(handler) LICENSES = [ From 822c16699b8018908adebdb55a869eb39e582691 Mon Sep 17 00:00:00 2001 From: Bee Webb Date: Thu, 7 Nov 2024 15:51:38 +0000 Subject: [PATCH 354/375] requirements: Update all requirements --- requirements.txt | 20 ++++++++++---------- requirements_dev.txt | 24 ++++++++++++------------ 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/requirements.txt b/requirements.txt index cb1e9ed6fc..d12165c3e6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,7 @@ blinker==1.8.2 # via flask certifi==2024.8.30 # via requests -charset-normalizer==3.3.2 +charset-normalizer==3.4.0 # via requests click==8.1.7 # via flask @@ -18,9 +18,9 @@ contourpy==1.3.0 # via matplotlib cycler==0.12.1 # via matplotlib -django==5.1.1 +django==5.1.3 # via -r requirements.in -elementpath==4.5.0 +elementpath==4.6.0 # via xmlschema flask==3.0.3 # via @@ -46,14 +46,14 @@ kiwisolver==1.4.7 # via matplotlib lxml==5.3.0 # via -r requirements.in -markupsafe==2.1.5 +markupsafe==3.0.2 # via # -r requirements.in # jinja2 # werkzeug matplotlib==3.9.2 # via -r requirements.in -numpy==2.1.1 +numpy==2.1.3 # via # contourpy # matplotlib @@ -61,9 +61,9 @@ packaging==24.1 # via # gunicorn # matplotlib -pillow==10.4.0 +pillow==11.0.0 # via matplotlib -pyparsing==3.1.4 +pyparsing==3.2.0 # via matplotlib python-dateutil==2.9.0.post0 # via @@ -77,13 +77,13 @@ six==1.16.0 # via python-dateutil sqlparse==0.5.1 # via django -tqdm==4.66.5 +tqdm==4.67.0 # via -r requirements.in urllib3==2.2.3 # via requests -werkzeug==3.0.4 +werkzeug==3.1.2 # via # -r requirements.in # flask -xmlschema==3.4.2 +xmlschema==3.4.3 # via -r requirements.in diff --git a/requirements_dev.txt b/requirements_dev.txt index 32fbe21f09..1382c52a0a 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -16,7 +16,7 @@ certifi==2024.8.30 # via # -r requirements.txt # requests -charset-normalizer==3.3.2 +charset-normalizer==3.4.0 # via # -r requirements.txt # requests @@ -28,7 +28,7 @@ contourpy==1.3.0 # via # -r requirements.txt # matplotlib -coverage[toml]==7.6.1 +coverage[toml]==7.6.4 # via # coveralls # pytest-cov @@ -38,11 +38,11 @@ cycler==0.12.1 # via # -r requirements.txt # matplotlib -django==5.1.1 +django==5.1.3 # via -r requirements.txt docopt==0.6.2 # via coveralls -elementpath==4.5.0 +elementpath==4.6.0 # via # -r requirements.txt # xmlschema @@ -80,7 +80,7 @@ kiwisolver==1.4.7 # matplotlib lxml==5.3.0 # via -r requirements.txt -markupsafe==2.1.5 +markupsafe==3.0.2 # via # -r requirements.txt # jinja2 @@ -89,7 +89,7 @@ matplotlib==3.9.2 # via -r requirements.txt mccabe==0.7.0 # via flake8 -numpy==2.1.1 +numpy==2.1.3 # via # -r requirements.txt # contourpy @@ -100,7 +100,7 @@ packaging==24.1 # gunicorn # matplotlib # pytest -pillow==10.4.0 +pillow==11.0.0 # via # -r requirements.txt # matplotlib @@ -110,7 +110,7 @@ pycodestyle==2.12.1 # via flake8 pyflakes==3.2.0 # via flake8 -pyparsing==3.1.4 +pyparsing==3.2.0 # via # -r requirements.txt # matplotlib @@ -118,7 +118,7 @@ pytest==8.3.3 # via # -r requirements_dev.in # pytest-cov -pytest-cov==5.0.0 +pytest-cov==6.0.0 # via -r requirements_dev.in python-dateutil==2.9.0.post0 # via @@ -138,15 +138,15 @@ sqlparse==0.5.1 # via # -r requirements.txt # django -tqdm==4.66.5 +tqdm==4.67.0 # via -r requirements.txt urllib3==2.2.3 # via # -r requirements.txt # requests -werkzeug==3.0.4 +werkzeug==3.1.2 # via # -r requirements.txt # flask -xmlschema==3.4.2 +xmlschema==3.4.3 # via -r requirements.txt From aa55ab0fbf5857b01c18410447e9075abc10734f Mon Sep 17 00:00:00 2001 From: Bee Webb Date: Thu, 7 Nov 2024 15:54:07 +0000 Subject: [PATCH 355/375] requirements: Install black, isort, flake8-pyproject --- requirements_dev.in | 3 +++ requirements_dev.txt | 16 ++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/requirements_dev.in b/requirements_dev.in index ccc6a2542d..e81b4f6610 100644 --- a/requirements_dev.in +++ b/requirements_dev.in @@ -3,3 +3,6 @@ pytest pytest-cov coveralls flake8 +flake8-pyproject +isort +black diff --git a/requirements_dev.txt b/requirements_dev.txt index 1382c52a0a..013f9c844b 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -8,6 +8,8 @@ asgiref==3.8.1 # via # -r requirements.txt # django +black==24.10.0 + # via -r requirements_dev.in blinker==1.8.2 # via # -r requirements.txt @@ -23,6 +25,7 @@ charset-normalizer==3.4.0 click==8.1.7 # via # -r requirements.txt + # black # flask contourpy==1.3.0 # via @@ -47,6 +50,10 @@ elementpath==4.6.0 # -r requirements.txt # xmlschema flake8==7.1.1 + # via + # -r requirements_dev.in + # flake8-pyproject +flake8-pyproject==1.2.3 # via -r requirements_dev.in flask==3.0.3 # via @@ -66,6 +73,8 @@ idna==3.10 # requests iniconfig==2.0.0 # via pytest +isort==5.13.2 + # via -r requirements_dev.in itsdangerous==2.2.0 # via # -r requirements.txt @@ -89,6 +98,8 @@ matplotlib==3.9.2 # via -r requirements.txt mccabe==0.7.0 # via flake8 +mypy-extensions==1.0.0 + # via black numpy==2.1.3 # via # -r requirements.txt @@ -97,13 +108,18 @@ numpy==2.1.3 packaging==24.1 # via # -r requirements.txt + # black # gunicorn # matplotlib # pytest +pathspec==0.12.1 + # via black pillow==11.0.0 # via # -r requirements.txt # matplotlib +platformdirs==4.3.6 + # via black pluggy==1.5.0 # via pytest pycodestyle==2.12.1 From 5d658f29e066696b574f630747480d9db6b6c543 Mon Sep 17 00:00:00 2001 From: Bee Webb Date: Thu, 7 Nov 2024 15:56:45 +0000 Subject: [PATCH 356/375] chore: Configure linting with pyproject.toml --- pyproject.toml | 19 +++++++++++++++++++ setup.cfg | 2 -- 2 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 pyproject.toml delete mode 100644 setup.cfg diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000..1a352dc140 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,19 @@ +[tool.isort] +py_version=312 +extend_skip = ["__pycache__", ".mypy_cache", ".ve", ".venv", ".vagrant-ve"] +skip_gitignore = true +src_paths = ["dashboard"] +line_length = 119 +profile = "black" + + +[tool.flake8] +max-line-length = 119 +extend_ignore = ["E203", "W503", "E275", "E501", "E721"] +exclude = ["data", "gitout", "helpers/IATI-Rulesets", "helpers/schemas", "__pycache__", ".mypy_cache", ".pytest_cache", ".ve", ".venv", ".vagrant-ve"] + + +[tool.black] +line-length = 119 +target-version = ["py312"] +include = "^[^/]*\\.py|dashboard/.*\\.py$" diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index a6578a30e8..0000000000 --- a/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[flake8] -extend-ignore = E501 From 3e2bf32b77e563428669616dc89c783d8e896af3 Mon Sep 17 00:00:00 2001 From: Automated Reformatting Date: Mon, 11 Nov 2024 13:53:11 +0000 Subject: [PATCH 357/375] lint: Run black for the first time --- dashboard/common.py | 13 +- dashboard/comprehensiveness.py | 179 ++++---- dashboard/coverage.py | 182 ++++---- dashboard/data.py | 179 ++++---- dashboard/filepaths.py | 12 +- dashboard/forwardlooking.py | 90 ++-- dashboard/humanitarian.py | 55 +-- dashboard/make_csv.py | 270 +++++++----- dashboard/make_html.py | 503 ++++++++++++---------- dashboard/make_plots.py | 134 +++--- dashboard/manage.py | 4 +- dashboard/speakers_kit.py | 101 +++-- dashboard/summary_stats.py | 53 ++- dashboard/tests/test_comprehensiveness.py | 30 +- dashboard/tests/test_timeliness.py | 5 +- dashboard/text.py | 345 ++++++++------- dashboard/timeliness.py | 127 +++--- dashboard/ui/asgi.py | 2 +- dashboard/ui/jinja2.py | 34 +- dashboard/ui/settings.py | 96 +++-- dashboard/ui/template_funcs.py | 12 +- dashboard/ui/tests.py | 210 ++++++--- dashboard/ui/urls.py | 173 +++++--- dashboard/ui/views.py | 281 +++++++----- dashboard/ui/wsgi.py | 2 +- dashboard/vars.py | 2 +- 26 files changed, 1802 insertions(+), 1292 deletions(-) diff --git a/dashboard/common.py b/dashboard/common.py index 41f773b8c8..98fd4dec70 100644 --- a/dashboard/common.py +++ b/dashboard/common.py @@ -1,20 +1,21 @@ """Load IATI OrganisationType codelist into a global and provide function to get publisher type""" + import data import json import filepaths # Import organisation_type_codelist as a global, then delete when used to save memory -with open(filepaths.join_data_path('IATI-Codelists-2/out/clv2/json/en/OrganisationType.json')) as fh: +with open(filepaths.join_data_path("IATI-Codelists-2/out/clv2/json/en/OrganisationType.json")) as fh: organisation_type_codelist = json.load(fh) -organisation_type_dict = {c['code']: c['name'] for c in organisation_type_codelist['data']} +organisation_type_dict = {c["code"]: c["name"] for c in organisation_type_codelist["data"]} del organisation_type_codelist def get_publisher_type(publisher): """Return a dictionary of publisher organisation information, based on what is stored - in CKAN for the given publisher registry ID. - Returns None if publisher is not found. + in CKAN for the given publisher registry ID. + Returns None if publisher is not found. """ # Check that the publisher is in the list of ckan_publishers @@ -22,10 +23,10 @@ def get_publisher_type(publisher): return None # Get the code the organisation from CKAN data (this will be in line with the OrganisationType codelist) - organization_type_code = data.ckan_publishers[publisher]['result']['publisher_organization_type'] + organization_type_code = data.ckan_publishers[publisher]["result"]["publisher_organization_type"] # Get the english language name of this organisation type, according to the codelist organization_type_name = organisation_type_dict[organization_type_code] # Return a dictionary with code and name - return {'code': organization_type_code, 'name': organization_type_name} + return {"code": organization_type_code, "name": organization_type_name} diff --git a/dashboard/comprehensiveness.py b/dashboard/comprehensiveness.py index 6a41860976..bb89389398 100644 --- a/dashboard/comprehensiveness.py +++ b/dashboard/comprehensiveness.py @@ -3,46 +3,51 @@ from data import publishers_ordered_by_title, get_publisher_stats, publisher_name columns = { - 'summary': [ + "summary": [ # Format for elements within this list - and similar lists below ('core', 'financials', etc): # slug, header, weighting when calculating average - ('core_average', 'Core Average', 2), - ('financials_average', 'Financials Average', 1), - ('valueadded_average', 'Value Added Average', 1), - ('summary_average', 'Weighted Average', 0), # i.e. don't include the average within the calculation of the average + ("core_average", "Core Average", 2), + ("financials_average", "Financials Average", 1), + ("valueadded_average", "Value Added Average", 1), + ( + "summary_average", + "Weighted Average", + 0, + ), # i.e. don't include the average within the calculation of the average ], - 'core': [ - ('version', 'Version', 1), - ('reporting-org', 'Reporting Organisation', 1), - ('iati-identifier', 'IATI Identifier', 1), - ('participating-org', 'Participating Organisation', 1), - ('title', 'Title', 1), - ('description', 'Description', 1), - ('activity-status', 'Status', 1), - ('activity-date', 'Activity Date', 1), - ('sector', 'Sector', 1), - ('country_or_region', 'Country or Region', 1), - ('core_average', 'Average', 0), # i.e. don't include the average within the calculation of the average + "core": [ + ("version", "Version", 1), + ("reporting-org", "Reporting Organisation", 1), + ("iati-identifier", "IATI Identifier", 1), + ("participating-org", "Participating Organisation", 1), + ("title", "Title", 1), + ("description", "Description", 1), + ("activity-status", "Status", 1), + ("activity-date", "Activity Date", 1), + ("sector", "Sector", 1), + ("country_or_region", "Country or Region", 1), + ("core_average", "Average", 0), # i.e. don't include the average within the calculation of the average ], - 'financials': [ - ('transaction_commitment', 'Transaction - Commitment', 1, 'first_hierarchy_with_commitments'), - ('transaction_spend', 'Transaction - Disbursement or Expenditure', 1, 'bottom_hierarchy'), - ('transaction_traceability', 'Transaction - Traceability', 1, 'bottom_hierarchy'), - ('budget', 'Budget', 1, 'hierarchy_with_most_budgets'), - ('financials_average', 'Average', 0), # i.e. don't include the average within the calculation of the average + "financials": [ + ("transaction_commitment", "Transaction - Commitment", 1, "first_hierarchy_with_commitments"), + ("transaction_spend", "Transaction - Disbursement or Expenditure", 1, "bottom_hierarchy"), + ("transaction_traceability", "Transaction - Traceability", 1, "bottom_hierarchy"), + ("budget", "Budget", 1, "hierarchy_with_most_budgets"), + ("financials_average", "Average", 0), # i.e. don't include the average within the calculation of the average ], - 'valueadded': [ - ('contact-info', 'Contacts', 1), - ('location', 'Location Details', 1), - ('location_point_pos', 'Geographic Coordinates', 1), - ('sector_dac', 'DAC Sectors', 1), - ('capital-spend', 'Capital Spend', 1), - ('document-link', 'Activity Documents', 1), - ('aid_type', 'Aid Type', 1), - ('recipient_language', 'Recipient Language', 1), - ('result_indicator', 'Result/ Indicator', 1), - ('valueadded_average', 'Average', 0), # i.e. don't include the average within the calculation of the average - ]} + "valueadded": [ + ("contact-info", "Contacts", 1), + ("location", "Location Details", 1), + ("location_point_pos", "Geographic Coordinates", 1), + ("sector_dac", "DAC Sectors", 1), + ("capital-spend", "Capital Spend", 1), + ("document-link", "Activity Documents", 1), + ("aid_type", "Aid Type", 1), + ("recipient_language", "Recipient Language", 1), + ("result_indicator", "Result/ Indicator", 1), + ("valueadded_average", "Average", 0), # i.e. don't include the average within the calculation of the average + ], +} # Build dictionaries for all the column_headers and column_slugs defined above column_headers = {tabname: [x[1] for x in values] for tabname, values in columns.items()} @@ -51,7 +56,7 @@ # Build directory to lookup the hierarchy which should be used in the numerator # e.g. {'activity-date': 'all', 'activity-status': 'all', [...] budget': 'hierarchy_with_most_budgets', [etc]} column_base_lookup = { - col[0]: col[3] if len(col) > 3 else 'all' + col[0]: col[3] if len(col) > 3 else "all" for col_group, col_components in columns.items() for col in col_components } @@ -67,29 +72,35 @@ def denominator(key, stats): return 0 # If there is a specific denominator for the given key, return this - if key in stats['comprehensiveness_denominators']: - return float(stats['comprehensiveness_denominators'][key]) + if key in stats["comprehensiveness_denominators"]: + return float(stats["comprehensiveness_denominators"][key]) # Otherwise, return the default denominator else: - return float(stats['comprehensiveness_denominator_default']) + return float(stats["comprehensiveness_denominator_default"]) def get_hierarchy_with_most_budgets(stats): """Find the hierarchy which contains the greatest number of budgets. - Will only count hierarchies where the default denominator is greater than zero. - Input: - stats -- a JSONDir object of publisher stats - Returns: - Key of the hierarchy with greatest number of budgets, or None + Will only count hierarchies where the default denominator is greater than zero. + Input: + stats -- a JSONDir object of publisher stats + Returns: + Key of the hierarchy with greatest number of budgets, or None """ try: # Get the key with the largest number of budgets - budgets = max(stats['by_hierarchy'], key=( - lambda x: - stats['by_hierarchy'][x]['comprehensiveness'].get('budget', 0) + stats['by_hierarchy'][x]['comprehensiveness'].get('budget_not_provided', 0) - if stats['by_hierarchy'][x]['comprehensiveness_denominator_default'] > 0 else -1) + budgets = max( + stats["by_hierarchy"], + key=( + lambda x: ( + stats["by_hierarchy"][x]["comprehensiveness"].get("budget", 0) + + stats["by_hierarchy"][x]["comprehensiveness"].get("budget_not_provided", 0) + if stats["by_hierarchy"][x]["comprehensiveness_denominator_default"] > 0 + else -1 + ) + ), ) return budgets except KeyError: @@ -102,78 +113,86 @@ def get_hierarchy_with_most_budgets(stats): def get_first_hierarchy_with_commitments(stats): """Return the number of the first hierarchy that contains at least 1 commitment - (according to the comprehensiveness counts) - Returns: - Number of first hierarchy with commitments or None if no commitments in any hierarchy + (according to the comprehensiveness counts) + Returns: + Number of first hierarchy with commitments or None if no commitments in any hierarchy """ - hierarchies_with_commitments = {x: y['comprehensiveness']['transaction_commitment'] - for x, y in stats.get('by_hierarchy', {}).items() - if y['comprehensiveness'].get('transaction_commitment', 0) > 0} + hierarchies_with_commitments = { + x: y["comprehensiveness"]["transaction_commitment"] + for x, y in stats.get("by_hierarchy", {}).items() + if y["comprehensiveness"].get("transaction_commitment", 0) > 0 + } return min(hierarchies_with_commitments) if len(hierarchies_with_commitments) else None def generate_row(publisher): - """Generate comprehensiveness table data for a given publisher - """ + """Generate comprehensiveness table data for a given publisher""" publisher_stats = get_publisher_stats(publisher) # Set an inital dictionary, which will later be populated further row = {} - row['publisher'] = publisher - row['publisher_title'] = publisher_name[publisher] + row["publisher"] = publisher + row["publisher_title"] = publisher_name[publisher] # Calculate percentages for publisher data populated with any data - for slug in column_slugs['core'] + column_slugs['financials'] + column_slugs['valueadded']: + for slug in column_slugs["core"] + column_slugs["financials"] + column_slugs["valueadded"]: # Set the stats base for calculating the numerator. This is based on the hierarchy set in the lookup - if column_base_lookup[slug] == 'bottom_hierarchy': - publisher_base = publisher_stats.get('bottom_hierarchy', {}) + if column_base_lookup[slug] == "bottom_hierarchy": + publisher_base = publisher_stats.get("bottom_hierarchy", {}) - elif column_base_lookup[slug] == 'hierarchy_with_most_budgets': - publisher_base = publisher_stats['by_hierarchy'].get(get_hierarchy_with_most_budgets(publisher_stats), {}) + elif column_base_lookup[slug] == "hierarchy_with_most_budgets": + publisher_base = publisher_stats["by_hierarchy"].get(get_hierarchy_with_most_budgets(publisher_stats), {}) - elif column_base_lookup[slug] == 'first_hierarchy_with_commitments': + elif column_base_lookup[slug] == "first_hierarchy_with_commitments": if get_first_hierarchy_with_commitments(publisher_stats): - publisher_base = publisher_stats['by_hierarchy'].get(get_first_hierarchy_with_commitments(publisher_stats), {}) + publisher_base = publisher_stats["by_hierarchy"].get( + get_first_hierarchy_with_commitments(publisher_stats), {} + ) else: - publisher_base = publisher_stats.get('bottom_hierarchy', {}) + publisher_base = publisher_stats.get("bottom_hierarchy", {}) else: # Most common case will be column_base_lookup[slug] == 'all': publisher_base = publisher_stats - if slug == 'budget': - budget_all = publisher_base.get('comprehensiveness', {}).get(slug, 0) - budget_not_provided_all = publisher_base.get('comprehensiveness', {}).get('budget_not_provided', 0) - row['flag'] = budget_not_provided_all > 0 + if slug == "budget": + budget_all = publisher_base.get("comprehensiveness", {}).get(slug, 0) + budget_not_provided_all = publisher_base.get("comprehensiveness", {}).get("budget_not_provided", 0) + row["flag"] = budget_not_provided_all > 0 numerator_all = budget_all + budget_not_provided_all - budget_valid = publisher_base.get('comprehensiveness_with_validation', {}).get(slug, 0) - budget_not_provided_valid = publisher_base.get('comprehensiveness_with_validation', {}).get('budget_not_provided', 0) + budget_valid = publisher_base.get("comprehensiveness_with_validation", {}).get(slug, 0) + budget_not_provided_valid = publisher_base.get("comprehensiveness_with_validation", {}).get( + "budget_not_provided", 0 + ) numerator_valid = budget_valid + budget_not_provided_valid else: - numerator_all = publisher_base.get('comprehensiveness', {}).get(slug, 0) - numerator_valid = publisher_base.get('comprehensiveness_with_validation', {}).get(slug, 0) + numerator_all = publisher_base.get("comprehensiveness", {}).get(slug, 0) + numerator_valid = publisher_base.get("comprehensiveness_with_validation", {}).get(slug, 0) if denominator(slug, publisher_base) != 0: # Populate the row with the %age row[slug] = float(numerator_all) / denominator(slug, publisher_base) * 100 - row[slug + '_valid'] = float(numerator_valid) / denominator(slug, publisher_base) * 100 + row[slug + "_valid"] = float(numerator_valid) / denominator(slug, publisher_base) * 100 # Loop for averages # Calculate the average for each grouping, and the overall 'summary' average - for page in ['core', 'financials', 'valueadded', 'summary']: + for page in ["core", "financials", "valueadded", "summary"]: # Note that the summary must be last, so that it can use the average calculations from the other groupings - row[page + '_average'] = sum((row.get(x[0]) or 0) * x[2] for x in columns[page]) / float(sum(x[2] for x in columns[page])) + row[page + "_average"] = sum((row.get(x[0]) or 0) * x[2] for x in columns[page]) / float( + sum(x[2] for x in columns[page]) + ) - row[page + '_average_valid'] = sum((row.get(x[0] + '_valid') or 0) * x[2] for x in columns[page]) / float(sum(x[2] for x in columns[page])) + row[page + "_average_valid"] = sum((row.get(x[0] + "_valid") or 0) * x[2] for x in columns[page]) / float( + sum(x[2] for x in columns[page]) + ) return row def table(): - """Generate comprehensiveness table data for every publisher and return as a generator object - """ + """Generate comprehensiveness table data for every publisher and return as a generator object""" # Loop over the data for each publisher for publisher_title, publisher in publishers_ordered_by_title: diff --git a/dashboard/coverage.py b/dashboard/coverage.py index 58885685ab..7277d3973f 100644 --- a/dashboard/coverage.py +++ b/dashboard/coverage.py @@ -10,10 +10,10 @@ def is_number(s): - """ Tests if a variable is a number. - Input: s - a variable - Return: True if v is a number - False if v is not a number + """Tests if a variable is a number. + Input: s - a variable + Return: True if v is a number + False if v is not a number """ try: float(s) @@ -23,9 +23,9 @@ def is_number(s): def convert_to_int(x): - """ Converts a variable to an integer value, or 0 if it cannot be converted to an integer. - Input: x - a variable - Return: x as an integer, or zero if x is not a number + """Converts a variable to an integer value, or 0 if it cannot be converted to an integer. + Input: x - a variable + Return: x as an integer, or zero if x is not a number """ if is_number(x): return int(x) @@ -34,21 +34,20 @@ def convert_to_int(x): def generate_row(publisher): - """Generate coverage table data for a given publisher - """ + """Generate coverage table data for a given publisher""" # Store the data for this publisher as new variables publisher_stats = get_publisher_stats(publisher) - transactions_usd = publisher_stats['sum_transactions_by_type_by_year_usd'] + transactions_usd = publisher_stats["sum_transactions_by_type_by_year_usd"] # Create a list for publisher data, and populate it with basic data row = {} - row['publisher'] = publisher - row['publisher_title'] = publisher_name[publisher] - row['no_data_flag_red'] = 0 - row['no_data_flag_amber'] = 0 - row['spend_data_error_reported_flag'] = 0 - row['sort_order'] = 0 + row["publisher"] = publisher + row["publisher_title"] = publisher_name[publisher] + row["no_data_flag_red"] = 0 + row["no_data_flag_amber"] = 0 + row["spend_data_error_reported_flag"] = 0 + row["sort_order"] = 0 # Compute 2014 IATI spend iati_2014_spend_total = 0 @@ -56,28 +55,28 @@ def generate_row(publisher): if publisher in dfi_publishers: # If this publisher is a DFI, then their 2014 spend total should be based on their # commitment transactions only. See https://github.com/IATI/IATI-Dashboard/issues/387 - if '2014' in transactions_usd.get('2', {}).get('USD', {}): - iati_2014_spend_total += transactions_usd['2']['USD']['2014'] + if "2014" in transactions_usd.get("2", {}).get("USD", {}): + iati_2014_spend_total += transactions_usd["2"]["USD"]["2014"] - if '2014' in transactions_usd.get('C', {}).get('USD', {}): - iati_2014_spend_total += transactions_usd['C']['USD']['2014'] + if "2014" in transactions_usd.get("C", {}).get("USD", {}): + iati_2014_spend_total += transactions_usd["C"]["USD"]["2014"] else: # This is a non-DFI publisher - if '2014' in transactions_usd.get('3', {}).get('USD', {}): - iati_2014_spend_total += transactions_usd['3']['USD']['2014'] + if "2014" in transactions_usd.get("3", {}).get("USD", {}): + iati_2014_spend_total += transactions_usd["3"]["USD"]["2014"] - if '2014' in transactions_usd.get('D', {}).get('USD', {}): - iati_2014_spend_total += transactions_usd['D']['USD']['2014'] + if "2014" in transactions_usd.get("D", {}).get("USD", {}): + iati_2014_spend_total += transactions_usd["D"]["USD"]["2014"] - if '2014' in transactions_usd.get('4', {}).get('USD', {}): - iati_2014_spend_total += transactions_usd['4']['USD']['2014'] + if "2014" in transactions_usd.get("4", {}).get("USD", {}): + iati_2014_spend_total += transactions_usd["4"]["USD"]["2014"] - if '2014' in transactions_usd.get('E', {}).get('USD', {}): - iati_2014_spend_total += transactions_usd['E']['USD']['2014'] + if "2014" in transactions_usd.get("E", {}).get("USD", {}): + iati_2014_spend_total += transactions_usd["E"]["USD"]["2014"] # Convert to millions USD - row['iati_spend_2014'] = round(float(iati_2014_spend_total / 1000000), 2) + row["iati_spend_2014"] = round(float(iati_2014_spend_total / 1000000), 2) # Compute 2015 IATI spend iati_2015_spend_total = 0 @@ -85,28 +84,28 @@ def generate_row(publisher): if publisher in dfi_publishers: # If this publisher is a DFI, then their 2015 spend total should be based on their # commitment transactions only. See https://github.com/IATI/IATI-Dashboard/issues/387 - if '2015' in transactions_usd.get('2', {}).get('USD', {}): - iati_2015_spend_total += transactions_usd['2']['USD']['2015'] + if "2015" in transactions_usd.get("2", {}).get("USD", {}): + iati_2015_spend_total += transactions_usd["2"]["USD"]["2015"] - if '2015' in transactions_usd.get('C', {}).get('USD', {}): - iati_2015_spend_total += transactions_usd['C']['USD']['2015'] + if "2015" in transactions_usd.get("C", {}).get("USD", {}): + iati_2015_spend_total += transactions_usd["C"]["USD"]["2015"] else: # This is a non-DFI publisher - if '2015' in transactions_usd.get('3', {}).get('USD', {}): - iati_2015_spend_total += transactions_usd['3']['USD']['2015'] + if "2015" in transactions_usd.get("3", {}).get("USD", {}): + iati_2015_spend_total += transactions_usd["3"]["USD"]["2015"] - if '2015' in transactions_usd.get('D', {}).get('USD', {}): - iati_2015_spend_total += transactions_usd['D']['USD']['2015'] + if "2015" in transactions_usd.get("D", {}).get("USD", {}): + iati_2015_spend_total += transactions_usd["D"]["USD"]["2015"] - if '2015' in transactions_usd.get('4', {}).get('USD', {}): - iati_2015_spend_total += transactions_usd['4']['USD']['2015'] + if "2015" in transactions_usd.get("4", {}).get("USD", {}): + iati_2015_spend_total += transactions_usd["4"]["USD"]["2015"] - if '2015' in transactions_usd.get('E', {}).get('USD', {}): - iati_2015_spend_total += transactions_usd['E']['USD']['2015'] + if "2015" in transactions_usd.get("E", {}).get("USD", {}): + iati_2015_spend_total += transactions_usd["E"]["USD"]["2015"] # Convert to millions USD - row['iati_spend_2015'] = round(float(iati_2015_spend_total / 1000000), 2) + row["iati_spend_2015"] = round(float(iati_2015_spend_total / 1000000), 2) # Compute 2016 IATI spend iati_2016_spend_total = 0 @@ -114,60 +113,99 @@ def generate_row(publisher): if publisher in dfi_publishers: # If this publisher is a DFI, then their 2016 spend total should be based on their # commitment transactions only. See https://github.com/IATI/IATI-Dashboard/issues/387 - if '2016' in transactions_usd.get('2', {}).get('USD', {}): - iati_2016_spend_total += transactions_usd['2']['USD']['2016'] + if "2016" in transactions_usd.get("2", {}).get("USD", {}): + iati_2016_spend_total += transactions_usd["2"]["USD"]["2016"] - if '2016' in transactions_usd.get('C', {}).get('USD', {}): - iati_2016_spend_total += transactions_usd['C']['USD']['2016'] + if "2016" in transactions_usd.get("C", {}).get("USD", {}): + iati_2016_spend_total += transactions_usd["C"]["USD"]["2016"] else: # This is a non-DFI publisher - if '2016' in transactions_usd.get('3', {}).get('USD', {}): - iati_2016_spend_total += transactions_usd['3']['USD']['2016'] + if "2016" in transactions_usd.get("3", {}).get("USD", {}): + iati_2016_spend_total += transactions_usd["3"]["USD"]["2016"] - if '2016' in transactions_usd.get('D', {}).get('USD', {}): - iati_2016_spend_total += transactions_usd['D']['USD']['2016'] + if "2016" in transactions_usd.get("D", {}).get("USD", {}): + iati_2016_spend_total += transactions_usd["D"]["USD"]["2016"] - if '2016' in transactions_usd.get('4', {}).get('USD', {}): - iati_2016_spend_total += transactions_usd['4']['USD']['2016'] + if "2016" in transactions_usd.get("4", {}).get("USD", {}): + iati_2016_spend_total += transactions_usd["4"]["USD"]["2016"] - if '2016' in transactions_usd.get('E', {}).get('USD', {}): - iati_2016_spend_total += transactions_usd['E']['USD']['2016'] + if "2016" in transactions_usd.get("E", {}).get("USD", {}): + iati_2016_spend_total += transactions_usd["E"]["USD"]["2016"] # Convert to millions USD - row['iati_spend_2016'] = round(float(iati_2016_spend_total / 1000000), 2) + row["iati_spend_2016"] = round(float(iati_2016_spend_total / 1000000), 2) # Get reference data # Get data from stats files. Set as empty stings if the IATI-Stats code did not find them in the reference data sheet - data_2014 = publisher_stats['reference_spend_data_usd'].get('2014', {'ref_spend': '', 'not_in_sheet': True}) - data_2015 = publisher_stats['reference_spend_data_usd'].get('2015', {'ref_spend': '', 'official_forecast': '', 'not_in_sheet': True}) + data_2014 = publisher_stats["reference_spend_data_usd"].get("2014", {"ref_spend": "", "not_in_sheet": True}) + data_2015 = publisher_stats["reference_spend_data_usd"].get( + "2015", {"ref_spend": "", "official_forecast": "", "not_in_sheet": True} + ) # Compute reference data as $USDm - row['reference_spend_2014'] = round((float(data_2014['ref_spend']) / 1000000), 2) if is_number(data_2014['ref_spend']) else '-' - row['reference_spend_2015'] = round((float(data_2015['ref_spend']) / 1000000), 2) if is_number(data_2015['ref_spend']) else '-' - row['official_forecast_2015'] = round((float(data_2015['official_forecast']) / 1000000), 2) if is_number(data_2015['official_forecast']) else '-' + row["reference_spend_2014"] = ( + round((float(data_2014["ref_spend"]) / 1000000), 2) if is_number(data_2014["ref_spend"]) else "-" + ) + row["reference_spend_2015"] = ( + round((float(data_2015["ref_spend"]) / 1000000), 2) if is_number(data_2015["ref_spend"]) else "-" + ) + row["official_forecast_2015"] = ( + round((float(data_2015["official_forecast"]) / 1000000), 2) + if is_number(data_2015["official_forecast"]) + else "-" + ) # Compute spend ratio score # Compile a list of ratios for spend & reference data paired by year - spend_ratio_candidates = [(row['iati_spend_2014'] / row['reference_spend_2014']) if (row['reference_spend_2014'] > 0) and is_number(row['reference_spend_2014']) else 0, - (row['iati_spend_2015'] / row['reference_spend_2015']) if (row['reference_spend_2015'] > 0) and is_number(row['reference_spend_2015']) else 0, - (row['iati_spend_2015'] / row['official_forecast_2015']) if (row['official_forecast_2015'] > 0) and is_number(row['official_forecast_2015']) else 0] + spend_ratio_candidates = [ + ( + (row["iati_spend_2014"] / row["reference_spend_2014"]) + if (row["reference_spend_2014"] > 0) and is_number(row["reference_spend_2014"]) + else 0 + ), + ( + (row["iati_spend_2015"] / row["reference_spend_2015"]) + if (row["reference_spend_2015"] > 0) and is_number(row["reference_spend_2015"]) + else 0 + ), + ( + (row["iati_spend_2015"] / row["official_forecast_2015"]) + if (row["official_forecast_2015"] > 0) and is_number(row["official_forecast_2015"]) + else 0 + ), + ] # If there are no annual pairs, add the value of non-matching-year spend / reference data - if ((row['iati_spend_2014'] == 0 or row['reference_spend_2014'] == '-') and (row['iati_spend_2015'] == 0 or row['reference_spend_2015'] == '-') and (row['iati_spend_2015'] == 0 or row['official_forecast_2015'] == '-')): - spend_ratio_candidates.append((row['iati_spend_2015'] / row['reference_spend_2014']) if (row['reference_spend_2014'] > 0) and is_number(row['reference_spend_2014']) else 0) - spend_ratio_candidates.append((row['iati_spend_2016'] / row['reference_spend_2014']) if (row['reference_spend_2014'] > 0) and is_number(row['reference_spend_2014']) else 0) - spend_ratio_candidates.append((row['iati_spend_2016'] / row['reference_spend_2015']) if (row['reference_spend_2015'] > 0) and is_number(row['reference_spend_2015']) else 0) + if ( + (row["iati_spend_2014"] == 0 or row["reference_spend_2014"] == "-") + and (row["iati_spend_2015"] == 0 or row["reference_spend_2015"] == "-") + and (row["iati_spend_2015"] == 0 or row["official_forecast_2015"] == "-") + ): + spend_ratio_candidates.append( + (row["iati_spend_2015"] / row["reference_spend_2014"]) + if (row["reference_spend_2014"] > 0) and is_number(row["reference_spend_2014"]) + else 0 + ) + spend_ratio_candidates.append( + (row["iati_spend_2016"] / row["reference_spend_2014"]) + if (row["reference_spend_2014"] > 0) and is_number(row["reference_spend_2014"]) + else 0 + ) + spend_ratio_candidates.append( + (row["iati_spend_2016"] / row["reference_spend_2015"]) + if (row["reference_spend_2015"] > 0) and is_number(row["reference_spend_2015"]) + else 0 + ) # Get the maximum value and convert to a percentage - row['spend_ratio'] = int(round(max(spend_ratio_candidates) * 100)) + row["spend_ratio"] = int(round(max(spend_ratio_candidates) * 100)) return row def table(): - """Generate coverage table data for every publisher and return as a generator object - """ + """Generate coverage table data for every publisher and return as a generator object""" # Loop over each publisher for publisher_title, publisher in publishers_ordered_by_title: @@ -180,8 +218,8 @@ def table(): # Compile a list of Development finance institutions (DFIs) -with open(filepaths.join_base_path('dfi_publishers.csv'), 'r') as csv_file: - reader = csv.reader(csv_file, delimiter=',') +with open(filepaths.join_base_path("dfi_publishers.csv"), "r") as csv_file: + reader = csv.reader(csv_file, delimiter=",") dfi_publishers = [] for line in reader: diff --git a/dashboard/data.py b/dashboard/data.py index 01edc56ffa..e81389fd13 100644 --- a/dashboard/data.py +++ b/dashboard/data.py @@ -15,7 +15,7 @@ # https://github.com/IATI/IATI-Stats/blob/1d20ed1e/stats/common/decorators.py#L5-L13 def memoize(f): def wrapper(self, key): - if not hasattr(self, '__cache'): + if not hasattr(self, "__cache"): self.__cache = {} if key in self.__cache: return self.__cache[key] @@ -24,18 +24,19 @@ def wrapper(self, key): # don't cache JSONDirs self.__cache[key] = res return res + return wrapper class JSONDir(MutableMapping): """Produces an object, to be used to access JSON-formatted publisher data and return - this as an ordered dictionary (with nested dictionaries, if appropriate). - Use of this class removes the need to load large amounts of data into memory. + this as an ordered dictionary (with nested dictionaries, if appropriate). + Use of this class removes the need to load large amounts of data into memory. """ def __init__(self, folder): """Set the path of the folder being accessed as an attribute to an instance of - the object. + the object. """ self.folder = folder @@ -49,7 +50,7 @@ def __delitem__(self, key): pass def __repr__(self): - return '{}, JSONDIR({})'.format(super(JSONDir, self).__repr__(), self.__dict__) + return "{}, JSONDIR({})".format(super(JSONDir, self).__repr__(), self.__dict__) def __setitem__(self, key, value): super(JSONDir, self).__setitem__(key, value) @@ -57,28 +58,28 @@ def __setitem__(self, key, value): @memoize def __getitem__(self, key): """Define how variables are gathered from the raw JSON files and then parsed into - the OrderedDict that will be returned. + the OrderedDict that will be returned. - Note: - try-except should be used around file operations rather than checking before-hand + Note: + try-except should be used around file operations rather than checking before-hand """ if os.path.exists(os.path.join(self.folder, key)): # The data being sought is a directory data = JSONDir(os.path.join(self.folder, key)) - elif os.path.exists(os.path.join(self.folder, key + '.json')): + elif os.path.exists(os.path.join(self.folder, key + ".json")): # The data being sought is a json file - with open(os.path.join(self.folder, key + '.json')) as fp: + with open(os.path.join(self.folder, key + ".json")) as fp: data = json.load(fp, object_pairs_hook=OrderedDict) # Deal with publishers who had an old registry ID # If this publisher had at least one old ID in the past - if (self.get_publisher_name() in get_registry_id_matches().values()) and ('gitaggregate' in self.folder): + if (self.get_publisher_name() in get_registry_id_matches().values()) and ("gitaggregate" in self.folder): # Perform the merging # Look over the set of changed registry IDs for previous_id, current_id in get_registry_id_matches().items(): folder = self.folder - previous_path = os.path.join(folder.replace(current_id, previous_id), key + '.json') + previous_path = os.path.join(folder.replace(current_id, previous_id), key + ".json") # If this publisher has had an old ID and there is data for it if (current_id == self.get_publisher_name()) and os.path.exists(previous_path): # Get the corresponding value for the old publisher ID, and merge with the existing value for this publisher @@ -99,9 +100,9 @@ def __getitem__(self, key): def keys(self): """Method to return a list of keys that are contained within the data folder that - is being accessed within this instance. + is being accessed within this instance. """ - return [x[:-5] if x.endswith('.json') else x for x in os.listdir(self.folder)] + return [x[:-5] if x.endswith(".json") else x for x in os.listdir(self.folder)] def __iter__(self): """Custom iterable, to iterate over the keys that are contained within the data @@ -111,7 +112,7 @@ def __iter__(self): def get_publisher_name(self): """Find the name of the publisher that this data relates to. - Note, this is a super hacky way to do this, prize available if a better way is found to do this! + Note, this is a super hacky way to do this, prize available if a better way is found to do this! """ # Get a list of the parts that are contained within this filepath @@ -120,20 +121,20 @@ def get_publisher_name(self): # Loop over this list and return the publisher name if it is found within the historic list of publishers for x in path_components: - if x in JSONDir(filepaths.join_stats_path('current/aggregated-publisher')).keys(): + if x in JSONDir(filepaths.join_stats_path("current/aggregated-publisher")).keys(): return x # If got to the end of the loop and nothing found, this folder does not relate to a single publisher return None -def get_publisher_stats(publisher, stats_type='aggregated'): +def get_publisher_stats(publisher, stats_type="aggregated"): """Function to obtain current data for a given publisher. Returns: A JSONDir object for the publisher, or an empty dictionary if the publisher is not found. """ try: - return JSONDir(filepaths.join_stats_path('current/{0}-publisher/{1}'.format(stats_type, publisher))) + return JSONDir(filepaths.join_stats_path("current/{0}-publisher/{1}".format(stats_type, publisher))) except IOError: return {} @@ -145,13 +146,10 @@ def get_registry_id_matches(): """ # Load registry IDs for publishers who have changed their registry ID - with open(filepaths.join_base_path('registry_id_relationships.csv')) as f: + with open(filepaths.join_base_path("registry_id_relationships.csv")) as f: reader = csv.DictReader(f) # Load this data into a dictonary - registry_matches = { - row['previous_registry_id']: row['current_registry_id'] - for row in reader - } + registry_matches = {row["previous_registry_id"]: row["current_registry_id"] for row in reader} return registry_matches @@ -184,33 +182,33 @@ def deep_merge(obj1, obj2): current_stats = { - 'aggregated': JSONDir(filepaths.join_stats_path('current/aggregated')), - 'aggregated_file': JSONDir(filepaths.join_stats_path('current/aggregated-file')), - 'inverted_publisher': JSONDir(filepaths.join_stats_path('current/inverted-publisher')), - 'inverted_file': JSONDir(filepaths.join_stats_path('current/inverted-file')), - 'inverted_file_publisher': JSONDir(filepaths.join_stats_path('current/inverted-file-publisher')), - 'download_errors': [] + "aggregated": JSONDir(filepaths.join_stats_path("current/aggregated")), + "aggregated_file": JSONDir(filepaths.join_stats_path("current/aggregated-file")), + "inverted_publisher": JSONDir(filepaths.join_stats_path("current/inverted-publisher")), + "inverted_file": JSONDir(filepaths.join_stats_path("current/inverted-file")), + "inverted_file_publisher": JSONDir(filepaths.join_stats_path("current/inverted-file-publisher")), + "download_errors": [], } -ckan_publishers = JSONDir(filepaths.join_data_path('ckan_publishers')) -github_issues = JSONDir(filepaths.join_data_path('github/publishers')) -ckan = json.load(open(filepaths.join_stats_path('ckan.json')), object_pairs_hook=OrderedDict) +ckan_publishers = JSONDir(filepaths.join_data_path("ckan_publishers")) +github_issues = JSONDir(filepaths.join_data_path("github/publishers")) +ckan = json.load(open(filepaths.join_stats_path("ckan.json")), object_pairs_hook=OrderedDict) dataset_to_publisher_dict = { - dataset: publisher - for publisher, publisher_dict in ckan.items() - for dataset in publisher_dict.keys() + dataset: publisher for publisher, publisher_dict in ckan.items() for dataset in publisher_dict.keys() } -metadata = json.load(open(filepaths.join_stats_path('metadata.json')), object_pairs_hook=OrderedDict) -with open(filepaths.join_data_path('downloads/errors')) as fp: +metadata = json.load(open(filepaths.join_stats_path("metadata.json")), object_pairs_hook=OrderedDict) +with open(filepaths.join_data_path("downloads/errors")) as fp: for line in fp: - if line != '.\n': - current_stats['download_errors'].append(line.strip('\n').split(' ', 3)) + if line != ".\n": + current_stats["download_errors"].append(line.strip("\n").split(" ", 3)) sources105 = [ - filepaths.join_data_path('schemas/1.05/iati-activities-schema.xsd'), - filepaths.join_data_path('schemas/1.05/iati-organisations-schema.xsd')] + filepaths.join_data_path("schemas/1.05/iati-activities-schema.xsd"), + filepaths.join_data_path("schemas/1.05/iati-organisations-schema.xsd"), +] sources203 = [ - filepaths.join_data_path('schemas/2.03/iati-activities-schema.xsd'), - filepaths.join_data_path('schemas/2.03/iati-organisations-schema.xsd')] + filepaths.join_data_path("schemas/2.03/iati-activities-schema.xsd"), + filepaths.join_data_path("schemas/2.03/iati-organisations-schema.xsd"), +] schema105 = xmlschema.XMLSchema(sources105) schema203 = xmlschema.XMLSchema(sources203) @@ -238,73 +236,102 @@ def is_valid_element_or_attribute(path: str) -> bool: def transform_codelist_mapping_keys(codelist_mapping): # Perform the same transformation as https://github.com/IATI/IATI-Stats/blob/d622f8e88af4d33b1161f906ec1b53c63f2f0936/stats.py#L12 - codelist_mapping = {k: v for k, v in codelist_mapping.items() if not k.startswith('//iati-organisation')} - codelist_mapping = {re.sub(r'^\/\/iati-activity', './', k): v for k, v in codelist_mapping.items()} - codelist_mapping = {re.sub(r'^\/\/', './/', k): v for k, v in codelist_mapping.items()} + codelist_mapping = {k: v for k, v in codelist_mapping.items() if not k.startswith("//iati-organisation")} + codelist_mapping = {re.sub(r"^\/\/iati-activity", "./", k): v for k, v in codelist_mapping.items()} + codelist_mapping = {re.sub(r"^\/\/", ".//", k): v for k, v in codelist_mapping.items()} return codelist_mapping def create_codelist_mapping(major_version): codelist_mapping = {} - for x in json.load(open(filepaths.join_data_path('IATI-Codelists-{}/out/clv2/mapping.json'.format(major_version)))): - if 'condition' in x: - pref, attr = x['path'].rsplit('/', 1) - path = '{0}[{1}]/{2}'.format( - pref, x['condition'], attr) + for x in json.load( + open(filepaths.join_data_path("IATI-Codelists-{}/out/clv2/mapping.json".format(major_version))) + ): + if "condition" in x: + pref, attr = x["path"].rsplit("/", 1) + path = "{0}[{1}]/{2}".format(pref, x["condition"], attr) else: - path = x['path'] - codelist_mapping[path] = x['codelist'] + path = x["path"] + codelist_mapping[path] = x["codelist"] return transform_codelist_mapping_keys(codelist_mapping) -MAJOR_VERSIONS = ['2', '1'] +MAJOR_VERSIONS = ["2", "1"] codelist_mapping = {v: create_codelist_mapping(v) for v in MAJOR_VERSIONS} # Create a big dictionary of all codelist values by version and codelist name codelist_sets = { major_version: { - cname: set(c['code'] for c in codelist['data']) for cname, codelist in JSONDir(filepaths.join_data_path('IATI-Codelists-{}/out/clv2/json/en/'.format(major_version))).items() - } for major_version in MAJOR_VERSIONS} + cname: set(c["code"] for c in codelist["data"]) + for cname, codelist in JSONDir( + filepaths.join_data_path("IATI-Codelists-{}/out/clv2/json/en/".format(major_version)) + ).items() + } + for major_version in MAJOR_VERSIONS +} codelist_lookup = { major_version: { - cname: {c['code']: c for c in codelist['data']} for cname, codelist in JSONDir(filepaths.join_data_path('IATI-Codelists-{}/out/clv2/json/en/'.format(major_version))).items() - } for major_version in MAJOR_VERSIONS} + cname: {c["code"]: c for c in codelist["data"]} + for cname, codelist in JSONDir( + filepaths.join_data_path("IATI-Codelists-{}/out/clv2/json/en/".format(major_version)) + ).items() + } + for major_version in MAJOR_VERSIONS +} # Simple look up to map publisher id to a publishers given name (title) -publisher_name = {publisher: publisher_json['result']['title'] for publisher, publisher_json in ckan_publishers.items()} +publisher_name = { + publisher: publisher_json["result"]["title"] for publisher, publisher_json in ckan_publishers.items() +} # Create a list of tuples ordered by publisher given name titles - this allows us to display lists of publishers in alphabetical order -publishers_ordered_by_title = [(publisher_name[publisher], publisher) for publisher in current_stats['inverted_publisher']['activities'] if publisher in publisher_name] +publishers_ordered_by_title = [ + (publisher_name[publisher], publisher) + for publisher in current_stats["inverted_publisher"]["activities"] + if publisher in publisher_name +] publishers_ordered_by_title.sort(key=lambda x: (x[0]).lower()) # List of publishers who report all their activities as a secondary publisher -secondary_publishers = [publisher for publisher, stats in JSONDir(filepaths.join_stats_path('current/aggregated-publisher')).items() - if int(stats['activities']) == len(stats['activities_secondary_reported']) and int(stats['activities']) > 0] +secondary_publishers = [ + publisher + for publisher, stats in JSONDir(filepaths.join_stats_path("current/aggregated-publisher")).items() + if int(stats["activities"]) == len(stats["activities_secondary_reported"]) and int(stats["activities"]) > 0 +] try: - dac2012 = {x[0]: Decimal(x[1].replace(',', '')) for x in csv.reader(open(filepaths.join_data_path('dac2012.csv')))} + dac2012 = {x[0]: Decimal(x[1].replace(",", "")) for x in csv.reader(open(filepaths.join_data_path("dac2012.csv")))} except IOError: dac2012 = {} def make_slugs(keys): - out = {'by_slug': {}, 'by_i': {}} + out = {"by_slug": {}, "by_i": {}} for i, key in enumerate(keys): - slug = re.sub(r'[^a-zA-Z0-9:@\-_]', '', re.sub(r'{[^}]*}', '', key.replace('{http://www.w3.org/XML/1998/namespace}', 'xml:').replace('/', '_'))).strip('_') - while slug in out['by_slug']: - slug += '_' - out['by_slug'][slug] = i - out['by_i'][i] = slug + slug = re.sub( + r"[^a-zA-Z0-9:@\-_]", + "", + re.sub(r"{[^}]*}", "", key.replace("{http://www.w3.org/XML/1998/namespace}", "xml:").replace("/", "_")), + ).strip("_") + while slug in out["by_slug"]: + slug += "_" + out["by_slug"][slug] = i + out["by_i"][i] = slug return out slugs = { - 'codelist': {major_version: ( - make_slugs(current_stats['inverted_publisher']['codelist_values_by_major_version'][major_version].keys()) - if major_version in current_stats['inverted_publisher']['codelist_values_by_major_version'] - else make_slugs([]) - ) for major_version in MAJOR_VERSIONS}, - 'element': make_slugs(current_stats['inverted_publisher']['elements'].keys()), - 'org_type': make_slugs(['accountable_org', 'extending_org', 'funding_org', 'implementing_org', 'provider_org', 'receiver_org']), + "codelist": { + major_version: ( + make_slugs(current_stats["inverted_publisher"]["codelist_values_by_major_version"][major_version].keys()) + if major_version in current_stats["inverted_publisher"]["codelist_values_by_major_version"] + else make_slugs([]) + ) + for major_version in MAJOR_VERSIONS + }, + "element": make_slugs(current_stats["inverted_publisher"]["elements"].keys()), + "org_type": make_slugs( + ["accountable_org", "extending_org", "funding_org", "implementing_org", "provider_org", "receiver_org"] + ), } diff --git a/dashboard/filepaths.py b/dashboard/filepaths.py index 49570b2190..b558a80845 100644 --- a/dashboard/filepaths.py +++ b/dashboard/filepaths.py @@ -10,24 +10,20 @@ def join_stats_path(p: str) -> str: - """Make a path to a file or directory within the downloaded stats directory - """ + """Make a path to a file or directory within the downloaded stats directory""" return os.path.join(ui.settings.DASHBOARD_STATS_DIRECTORY, p) def join_data_path(p: str) -> str: - """Make a path to a file or directory within the downloaded data directory - """ + """Make a path to a file or directory within the downloaded data directory""" return os.path.join(ui.settings.DASHBOARD_DATA_DIRECTORY, p) def join_base_path(p: str) -> str: - """Make a path to a file or directory relative to the base of the dashboard directory - """ + """Make a path to a file or directory relative to the base of the dashboard directory""" return os.path.join(ui.settings.DASHBOARD_BASE_DIRECTORY, p) def join_out_path(p: str) -> str: - """Make a path to a file or directory relative to the base of the out directory - """ + """Make a path to a file or directory relative to the base of the out directory""" return os.path.join(ui.settings.DASHBOARD_OUT_DIRECTORY, p) diff --git a/dashboard/forwardlooking.py b/dashboard/forwardlooking.py index 81301a5169..55a104dddb 100644 --- a/dashboard/forwardlooking.py +++ b/dashboard/forwardlooking.py @@ -11,76 +11,96 @@ # Set column groupings, to be displayed in the user output column_headers = [ - 'Current activities at the start of each year', - 'Current activities with budgets for each year', - 'Percentage of current activities with budgets' + "Current activities at the start of each year", + "Current activities with budgets for each year", + "Percentage of current activities with budgets", ] def generate_row(publisher): - """Generate forward-looking table data for a given publisher - """ + """Generate forward-looking table data for a given publisher""" # Store the data for this publisher as a new variable publisher_stats = get_publisher_stats(publisher) # Create a list for publisher data, and populate it with basic data row = {} - row['publisher'] = publisher - row['publisher_title'] = publisher_name[publisher] - row['year_columns'] = [{}, {}, {}] - row['budget_not_provided'] = False + row["publisher"] = publisher + row["publisher_title"] = publisher_name[publisher] + row["year_columns"] = [{}, {}, {}] + row["budget_not_provided"] = False # Work with hierarchies - by_hierarchy = publisher_stats['by_hierarchy'] + by_hierarchy = publisher_stats["by_hierarchy"] hierarchies_with_nonzero_budgets = [ - h for h, stats in by_hierarchy.items() - if not all(x == 0 for x in stats['forwardlooking_activities_with_budgets'].values()) + h + for h, stats in by_hierarchy.items() + if not all(x == 0 for x in stats["forwardlooking_activities_with_budgets"].values()) ] # Flag if budgets on current activities are reported at more than one hierarchy - row['flag'] = len(hierarchies_with_nonzero_budgets) > 1 + row["flag"] = len(hierarchies_with_nonzero_budgets) > 1 hierarchies_with_budget_not_provided = [ - h for h, stats in by_hierarchy.items() - if not all(x == 0 for x in stats['forwardlooking_activities_with_budget_not_provided'].values()) + h + for h, stats in by_hierarchy.items() + if not all(x == 0 for x in stats["forwardlooking_activities_with_budget_not_provided"].values()) ] # Loop over each of the three years (i.e. this year and the following two years) to generate the statistics for the table for year in years: - if (len(hierarchies_with_budget_not_provided) > 0): - row['budget_not_provided'] = True + if len(hierarchies_with_budget_not_provided) > 0: + row["budget_not_provided"] = True # If 'forwardlooking_activities_current' and 'forwardlooking_activities_with_budgets' or 'forwardlooking_activities_with_budget_not_provided' are in the bottom hierarchy - if 'forwardlooking_activities_current' in publisher_stats['bottom_hierarchy'] and ('forwardlooking_activities_with_budgets' in publisher_stats['bottom_hierarchy'] or 'forwardlooking_activities_with_budget_not_provided' in publisher_stats['bottom_hierarchy']): + if "forwardlooking_activities_current" in publisher_stats["bottom_hierarchy"] and ( + "forwardlooking_activities_with_budgets" in publisher_stats["bottom_hierarchy"] + or "forwardlooking_activities_with_budget_not_provided" in publisher_stats["bottom_hierarchy"] + ): if len(hierarchies_with_nonzero_budgets) != 1: # If budgets are at more than one hierarchy (or no hierarchies), just use activities at all hierarchies - row['year_columns'][0][year] = publisher_stats['forwardlooking_activities_current'].get(year) or 0 - row['year_columns'][1][year] = publisher_stats['forwardlooking_activities_with_budgets'].get(year) or 0 - if row['budget_not_provided']: - row['year_columns'][1][year] += publisher_stats['forwardlooking_activities_with_budget_not_provided'].get(year) or 0 + row["year_columns"][0][year] = publisher_stats["forwardlooking_activities_current"].get(year) or 0 + row["year_columns"][1][year] = publisher_stats["forwardlooking_activities_with_budgets"].get(year) or 0 + if row["budget_not_provided"]: + row["year_columns"][1][year] += ( + publisher_stats["forwardlooking_activities_with_budget_not_provided"].get(year) or 0 + ) else: # Else, use the hierarchy which they are reported at - row['year_columns'][0][year] = by_hierarchy[hierarchies_with_nonzero_budgets[0]]['forwardlooking_activities_current'].get(year) or 0 - row['year_columns'][1][year] = by_hierarchy[hierarchies_with_nonzero_budgets[0]]['forwardlooking_activities_with_budgets'].get(year) or 0 - if row['budget_not_provided']: - row['year_columns'][1][year] += by_hierarchy[hierarchies_with_nonzero_budgets[0]]['forwardlooking_activities_with_budget_not_provided'].get(year) or 0 - - if not int(row['year_columns'][0][year]): - row['year_columns'][2][year] = '-' + row["year_columns"][0][year] = ( + by_hierarchy[hierarchies_with_nonzero_budgets[0]]["forwardlooking_activities_current"].get(year) + or 0 + ) + row["year_columns"][1][year] = ( + by_hierarchy[hierarchies_with_nonzero_budgets[0]]["forwardlooking_activities_with_budgets"].get( + year + ) + or 0 + ) + if row["budget_not_provided"]: + row["year_columns"][1][year] += ( + by_hierarchy[hierarchies_with_nonzero_budgets[0]][ + "forwardlooking_activities_with_budget_not_provided" + ].get(year) + or 0 + ) + + if not int(row["year_columns"][0][year]): + row["year_columns"][2][year] = "-" else: - row['year_columns'][2][year] = float(row['year_columns'][1][year]) / float(row['year_columns'][0][year]) * 100 + row["year_columns"][2][year] = ( + float(row["year_columns"][1][year]) / float(row["year_columns"][0][year]) * 100 + ) else: # Else if either 'forwardlooking_activities_current' or 'forwardlooking_activities_with_budgets' are not in the bottom hierarchy, set data zero # This should only occur if a publisher has 0 activities - row['year_columns'][0][year] = 0 - row['year_columns'][1][year] = 0 - row['year_columns'][2][year] = '-' + row["year_columns"][0][year] = 0 + row["year_columns"][1][year] = 0 + row["year_columns"][2][year] = "-" return row def table(): - """Generate forward-looking table data for every publisher and return as a generator object - """ + """Generate forward-looking table data for every publisher and return as a generator object""" # Loop over each publisher for publisher_title, publisher in publishers_ordered_by_title: diff --git a/dashboard/humanitarian.py b/dashboard/humanitarian.py index 2cbe96083e..09f50ed989 100644 --- a/dashboard/humanitarian.py +++ b/dashboard/humanitarian.py @@ -6,19 +6,18 @@ # Set column groupings, to be displayed in the user output columns = [ # slug, header - ('publisher_type', 'Publisher Type'), - ('num_activities', 'Number of Activities'), - ('publishing_humanitarian', 'Publishing Humanitarian?'), - ('humanitarian_attrib', 'Using Humanitarian Attribute?'), - ('appeal_emergency', 'Appeal or Emergency Details'), - ('clusters', 'Clusters'), - ('average', 'Average') + ("publisher_type", "Publisher Type"), + ("num_activities", "Number of Activities"), + ("publishing_humanitarian", "Publishing Humanitarian?"), + ("humanitarian_attrib", "Using Humanitarian Attribute?"), + ("appeal_emergency", "Appeal or Emergency Details"), + ("clusters", "Clusters"), + ("average", "Average"), ] def table(): - """Generate data for the humanitarian table - """ + """Generate data for the humanitarian table""" # Loop over each publisher for publisher_title, publisher in publishers_ordered_by_title: @@ -27,34 +26,42 @@ def table(): # Create a list for publisher data, and populate it with basic data row = {} - row['publisher'] = publisher - row['publisher_title'] = publisher_title - row['publisher_type'] = common.get_publisher_type(publisher)['name'] + row["publisher"] = publisher + row["publisher_title"] = publisher_title + row["publisher_type"] = common.get_publisher_type(publisher)["name"] # Get data from IATI-Stats output - row['num_activities'] = publisher_stats.get('humanitarian', {}).get('is_humanitarian', '0') - row['publishing_humanitarian'] = 100 if int(row['num_activities']) > 0 else 0 + row["num_activities"] = publisher_stats.get("humanitarian", {}).get("is_humanitarian", "0") + row["publishing_humanitarian"] = 100 if int(row["num_activities"]) > 0 else 0 # Calculate percentage of all humanitarian activities that are defined using the @humanitarian attribute - row['humanitarian_attrib'] = ( - publisher_stats.get('humanitarian', {}).get('is_humanitarian_by_attrib', '0') / float(row['num_activities']) - if int(row['num_activities']) > 0 else 0. + row["humanitarian_attrib"] = ( + publisher_stats.get("humanitarian", {}).get("is_humanitarian_by_attrib", "0") + / float(row["num_activities"]) + if int(row["num_activities"]) > 0 + else 0.0 ) * 100 # Calculate percentage of all humanitarian activities that use the element to define an appeal or emergency - row['appeal_emergency'] = ( - publisher_stats.get('humanitarian', {}).get('contains_humanitarian_scope', '0') / float(row['num_activities']) - if int(row['num_activities']) > 0 else 0. + row["appeal_emergency"] = ( + publisher_stats.get("humanitarian", {}).get("contains_humanitarian_scope", "0") + / float(row["num_activities"]) + if int(row["num_activities"]) > 0 + else 0.0 ) * 100 # Calculate percentage of all humanitarian activities that use clusters - row['clusters'] = ( - publisher_stats.get('humanitarian', {}).get('uses_humanitarian_clusters_vocab', '0') / float(row['num_activities']) - if int(row['num_activities']) > 0 else 0. + row["clusters"] = ( + publisher_stats.get("humanitarian", {}).get("uses_humanitarian_clusters_vocab", "0") + / float(row["num_activities"]) + if int(row["num_activities"]) > 0 + else 0.0 ) * 100 # Calculate the mean average - row['average'] = (row['publishing_humanitarian'] + row['humanitarian_attrib'] + row['appeal_emergency'] + row['clusters']) / float(4) + row["average"] = ( + row["publishing_humanitarian"] + row["humanitarian_attrib"] + row["appeal_emergency"] + row["clusters"] + ) / float(4) # Return a generator object yield row diff --git a/dashboard/make_csv.py b/dashboard/make_csv.py index 74e076efec..6e46430774 100644 --- a/dashboard/make_csv.py +++ b/dashboard/make_csv.py @@ -1,5 +1,6 @@ """Generates CSV files from data in the 'stats-calculated' folder and using additional logic """ + import csv import os import sys @@ -19,25 +20,27 @@ def publisher_dicts(): - publisher_name = {publisher: publisher_json['result']['title'] for publisher, publisher_json in data.ckan_publishers.items()} - for publisher, activities in data.current_stats['inverted_publisher']['activities'].items(): + publisher_name = { + publisher: publisher_json["result"]["title"] for publisher, publisher_json in data.ckan_publishers.items() + } + for publisher, activities in data.current_stats["inverted_publisher"]["activities"].items(): if publisher not in data.ckan_publishers: continue publisher_stats = data.get_publisher_stats(publisher) yield { - 'Publisher Name': publisher_name[publisher], - 'Publisher Registry Id': publisher, - 'Activities': activities, - 'Organisations': publisher_stats['organisations'], - 'Files': publisher_stats['activity_files'] + publisher_stats['organisation_files'], - 'Activity Files': publisher_stats['activity_files'], - 'Organisation Files': publisher_stats['organisation_files'], - 'Total File Size': publisher_stats['file_size'], - 'Reporting Org on Registry': data.ckan_publishers[publisher]['result']['publisher_iati_id'], - 'Reporting Orgs in Data (count)': len(publisher_stats['reporting_orgs']), - 'Reporting Orgs in Data': ';'.join(publisher_stats['reporting_orgs']), - 'Hierarchies (count)': len(publisher_stats['hierarchies']), - 'Hierarchies': ';'.join(publisher_stats['hierarchies']), + "Publisher Name": publisher_name[publisher], + "Publisher Registry Id": publisher, + "Activities": activities, + "Organisations": publisher_stats["organisations"], + "Files": publisher_stats["activity_files"] + publisher_stats["organisation_files"], + "Activity Files": publisher_stats["activity_files"], + "Organisation Files": publisher_stats["organisation_files"], + "Total File Size": publisher_stats["file_size"], + "Reporting Org on Registry": data.ckan_publishers[publisher]["result"]["publisher_iati_id"], + "Reporting Orgs in Data (count)": len(publisher_stats["reporting_orgs"]), + "Reporting Orgs in Data": ";".join(publisher_stats["reporting_orgs"]), + "Hierarchies (count)": len(publisher_stats["hierarchies"]), + "Hierarchies": ";".join(publisher_stats["hierarchies"]), } @@ -51,135 +54,210 @@ def main(): logger.addHandler(logging.StreamHandler(sys.stdout)) logger.info("Generating CSV files") - os.makedirs(filepaths.join_out_path('data/csv'), exist_ok=True) + os.makedirs(filepaths.join_out_path("data/csv"), exist_ok=True) logger.info("Generating publishers.csv") - with open(filepaths.join_out_path('data/csv/publishers.csv'), 'w') as fp: - writer = csv.DictWriter(fp, [ - 'Publisher Name', - 'Publisher Registry Id', - 'Activities', - 'Organisations', - 'Files', - 'Activity Files', - 'Organisation Files', - 'Total File Size', - 'Reporting Org on Registry', - 'Reporting Orgs in Data (count)', - 'Reporting Orgs in Data', - 'Hierarchies (count)', - 'Hierarchies', - ]) + with open(filepaths.join_out_path("data/csv/publishers.csv"), "w") as fp: + writer = csv.DictWriter( + fp, + [ + "Publisher Name", + "Publisher Registry Id", + "Activities", + "Organisations", + "Files", + "Activity Files", + "Organisation Files", + "Total File Size", + "Reporting Org on Registry", + "Reporting Orgs in Data (count)", + "Reporting Orgs in Data", + "Hierarchies (count)", + "Hierarchies", + ], + ) writer.writeheader() for d in publisher_dicts(): writer.writerow(d) logger.info("Generating elements.csv") - publishers = list(data.current_stats['inverted_publisher']['activities'].keys()) - with open(filepaths.join_out_path('data/csv/elements.csv'), 'w') as fp: - writer = csv.DictWriter(fp, ['Element'] + publishers) + publishers = list(data.current_stats["inverted_publisher"]["activities"].keys()) + with open(filepaths.join_out_path("data/csv/elements.csv"), "w") as fp: + writer = csv.DictWriter(fp, ["Element"] + publishers) writer.writeheader() - for element, publisher_dict in data.current_stats['inverted_publisher']['elements'].items(): - publisher_dict['Element'] = element + for element, publisher_dict in data.current_stats["inverted_publisher"]["elements"].items(): + publisher_dict["Element"] = element writer.writerow(publisher_dict) logger.info("Generating elements_total.csv") - with open(filepaths.join_out_path('data/csv/elements_total.csv'), 'w') as fp: - writer = csv.DictWriter(fp, ['Element'] + publishers) + with open(filepaths.join_out_path("data/csv/elements_total.csv"), "w") as fp: + writer = csv.DictWriter(fp, ["Element"] + publishers) writer.writeheader() - for element, publisher_dict in data.current_stats['inverted_publisher']['elements_total'].items(): - publisher_dict['Element'] = element + for element, publisher_dict in data.current_stats["inverted_publisher"]["elements_total"].items(): + publisher_dict["Element"] = element writer.writerow(publisher_dict) logger.info("Generating registry.csv") - with open(filepaths.join_out_path('data/csv/registry.csv'), 'w') as fp: - keys = ['name', 'title', 'publisher_frequency', 'publisher_frequency_select', 'publisher_implementation_schedule', 'publisher_ui', 'publisher_field_exclusions', 'publisher_contact', 'image_url', 'display_name', 'publisher_iati_id', 'publisher_units', 'publisher_record_exclusions', 'publisher_data_quality', 'publisher_country', 'publisher_description', 'publisher_refs', 'publisher_thresholds' 'publisher_agencies', 'publisher_constraints', 'publisher_organization_type', 'publisher_segmentation', 'license_id', 'state', 'publisher_timeliness'] + with open(filepaths.join_out_path("data/csv/registry.csv"), "w") as fp: + keys = [ + "name", + "title", + "publisher_frequency", + "publisher_frequency_select", + "publisher_implementation_schedule", + "publisher_ui", + "publisher_field_exclusions", + "publisher_contact", + "image_url", + "display_name", + "publisher_iati_id", + "publisher_units", + "publisher_record_exclusions", + "publisher_data_quality", + "publisher_country", + "publisher_description", + "publisher_refs", + "publisher_thresholds" "publisher_agencies", + "publisher_constraints", + "publisher_organization_type", + "publisher_segmentation", + "license_id", + "state", + "publisher_timeliness", + ] writer = csv.DictWriter(fp, keys) writer.writeheader() for publisher_json in data.ckan_publishers.values(): - writer.writerow({x: publisher_json['result'].get(x) or 0 for x in keys}) + writer.writerow({x: publisher_json["result"].get(x) or 0 for x in keys}) logger.info("Generating timeliness_frequency.csv") previous_months = timeliness.previous_months_reversed - with open(filepaths.join_out_path('data/csv/timeliness_frequency.csv'), 'w') as fp: + with open(filepaths.join_out_path("data/csv/timeliness_frequency.csv"), "w") as fp: writer = csv.writer(fp) - writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + ['Frequency', 'First published']) - for publisher, publisher_title, per_month, assessment, hft, first_published_band in timeliness.publisher_frequency_sorted(): - writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment, first_published_band]) + writer.writerow( + ["Publisher Name", "Publisher Registry Id"] + previous_months + ["Frequency", "First published"] + ) + for ( + publisher, + publisher_title, + per_month, + assessment, + hft, + first_published_band, + ) in timeliness.publisher_frequency_sorted(): + writer.writerow( + [publisher_title, publisher] + + [per_month.get(x) or 0 for x in previous_months] + + [assessment, first_published_band] + ) logger.info("Generating timeliness_timelag.csv") - with open(filepaths.join_out_path('data/csv/timeliness_timelag.csv'), 'w') as fp: + with open(filepaths.join_out_path("data/csv/timeliness_timelag.csv"), "w") as fp: writer = csv.writer(fp) - writer.writerow(['Publisher Name', 'Publisher Registry Id'] + previous_months + ['Time lag']) + writer.writerow(["Publisher Name", "Publisher Registry Id"] + previous_months + ["Time lag"]) for publisher, publisher_title, per_month, assessment, hft in timeliness.publisher_timelag_sorted(): - writer.writerow([publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment]) + writer.writerow( + [publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment] + ) logger.info("Generating forwardlooking.csv") - with open(filepaths.join_out_path('data/csv/forwardlooking.csv'), 'w') as fp: + with open(filepaths.join_out_path("data/csv/forwardlooking.csv"), "w") as fp: writer = csv.writer(fp) - writer.writerow(['Publisher Name', 'Publisher Registry Id'] + ['{} ({})'.format(header, year) for header in forwardlooking.column_headers for year in forwardlooking.years]) + writer.writerow( + ["Publisher Name", "Publisher Registry Id"] + + [ + "{} ({})".format(header, year) + for header in forwardlooking.column_headers + for year in forwardlooking.years + ] + ) for row in forwardlooking.table(): - writer.writerow([row['publisher_title'], row['publisher']] + [year_column[year] for year_column in row['year_columns'] for year in forwardlooking.years]) + writer.writerow( + [row["publisher_title"], row["publisher"]] + + [year_column[year] for year_column in row["year_columns"] for year in forwardlooking.years] + ) for tab in comprehensiveness.columns.keys(): logger.info("Generating comprehensiveness_{}.csv".format(tab)) - with open(filepaths.join_out_path('data/csv/comprehensiveness_{}.csv'.format(tab)), 'w') as fp: + with open(filepaths.join_out_path("data/csv/comprehensiveness_{}.csv".format(tab)), "w") as fp: writer = csv.writer(fp) - if tab == 'financials': - writer.writerow(['Publisher Name', 'Publisher Registry Id'] + - [x + ' (with valid data)' for x in comprehensiveness.column_headers[tab]] + - [x + ' (with any data)' for x in comprehensiveness.column_headers[tab]] + - ['Using budget-not-provided']) + if tab == "financials": + writer.writerow( + ["Publisher Name", "Publisher Registry Id"] + + [x + " (with valid data)" for x in comprehensiveness.column_headers[tab]] + + [x + " (with any data)" for x in comprehensiveness.column_headers[tab]] + + ["Using budget-not-provided"] + ) for row in comprehensiveness.table(): - writer.writerow([row['publisher_title'], row['publisher']] + - [row[slug + '_valid'] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab]] + - [row[slug] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab]] + - ['Yes' if row['flag'] else '-']) + writer.writerow( + [row["publisher_title"], row["publisher"]] + + [ + row[slug + "_valid"] if slug in row else "-" + for slug in comprehensiveness.column_slugs[tab] + ] + + [row[slug] if slug in row else "-" for slug in comprehensiveness.column_slugs[tab]] + + ["Yes" if row["flag"] else "-"] + ) else: - writer.writerow(['Publisher Name', 'Publisher Registry Id'] + - [x + ' (with valid data)' for x in comprehensiveness.column_headers[tab]] + - [x + ' (with any data)' for x in comprehensiveness.column_headers[tab]]) + writer.writerow( + ["Publisher Name", "Publisher Registry Id"] + + [x + " (with valid data)" for x in comprehensiveness.column_headers[tab]] + + [x + " (with any data)" for x in comprehensiveness.column_headers[tab]] + ) for row in comprehensiveness.table(): - writer.writerow([row['publisher_title'], row['publisher']] + - [row[slug + '_valid'] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab]] + - [row[slug] if slug in row else '-' for slug in comprehensiveness.column_slugs[tab]]) + writer.writerow( + [row["publisher_title"], row["publisher"]] + + [ + row[slug + "_valid"] if slug in row else "-" + for slug in comprehensiveness.column_slugs[tab] + ] + + [row[slug] if slug in row else "-" for slug in comprehensiveness.column_slugs[tab]] + ) logger.info("Generating summary_stats.csv") - with open(filepaths.join_out_path('data/csv/summary_stats.csv'), 'w') as fp: + with open(filepaths.join_out_path("data/csv/summary_stats.csv"), "w") as fp: writer = csv.writer(fp) # Add column headers - writer.writerow(['Publisher Name', 'Publisher Registry Id'] + [header for slug, header in summary_stats.columns]) + writer.writerow( + ["Publisher Name", "Publisher Registry Id"] + [header for slug, header in summary_stats.columns] + ) for row in summary_stats.table(): # Write each row - writer.writerow([row['publisher_title'], row['publisher']] + [row[slug] for slug, header in summary_stats.columns]) + writer.writerow( + [row["publisher_title"], row["publisher"]] + [row[slug] for slug, header in summary_stats.columns] + ) logger.info("Generating humanitarian.csv") - with open(filepaths.join_out_path('data/csv/humanitarian.csv'), 'w') as fp: + with open(filepaths.join_out_path("data/csv/humanitarian.csv"), "w") as fp: writer = csv.writer(fp) # Add column headers - writer.writerow([ - 'Publisher Name', - 'Publisher Registry Id', - 'Publisher Type', - 'Number of Activities', - 'Publishing Humanitarian', - 'Using Humanitarian Attribute', - 'Appeal or Emergency Details', - 'Clusters', - 'Humanitarian Score' - ]) + writer.writerow( + [ + "Publisher Name", + "Publisher Registry Id", + "Publisher Type", + "Number of Activities", + "Publishing Humanitarian", + "Using Humanitarian Attribute", + "Appeal or Emergency Details", + "Clusters", + "Humanitarian Score", + ] + ) for row in humanitarian.table(): - writer.writerow([ - row['publisher_title'], - row['publisher'], - row['publisher_type'], - row['num_activities'], - row['publishing_humanitarian'], - row['humanitarian_attrib'], - row['appeal_emergency'], - row['clusters'], - row['average'] - ]) + writer.writerow( + [ + row["publisher_title"], + row["publisher"], + row["publisher_type"], + row["num_activities"], + row["publishing_humanitarian"], + row["humanitarian_attrib"], + row["appeal_emergency"], + row["clusters"], + row["average"], + ] + ) if __name__ == "__main__": diff --git a/dashboard/make_html.py b/dashboard/make_html.py index 04546b9a6a..3e85fa7d49 100644 --- a/dashboard/make_html.py +++ b/dashboard/make_html.py @@ -14,6 +14,7 @@ import timeliness import forwardlooking import comprehensiveness + # import coverage import summary_stats import humanitarian @@ -36,9 +37,10 @@ publisher_name, publishers_ordered_by_title, is_valid_element, - slugs) + slugs, +) -app = Flask(__name__, static_url_path='') +app = Flask(__name__, static_url_path="") def dictinvert(d): @@ -57,339 +59,400 @@ def nested_dictinvert(d): def dataset_to_publisher(dataset_slug): - """ Converts a dataset (package) slug e.g. dfid-bd to the corresponding publisher - slug e.g. dfid """ - return dataset_to_publisher_dict.get(dataset_slug, '') + """Converts a dataset (package) slug e.g. dfid-bd to the corresponding publisher + slug e.g. dfid""" + return dataset_to_publisher_dict.get(dataset_slug, "") def firstint(s): - if s[0].startswith('<'): + if s[0].startswith("<"): return 0 - m = re.search(r'\d+', s[0]) + m = re.search(r"\d+", s[0]) return int(m.group(0)) def round_nicely(val, ndigits=2): - """ Round a float, but remove the trailing .0 from integers that python insists on - """ + """Round a float, but remove the trailing .0 from integers that python insists on""" if int(val) == float(val): return int(val) return round(float(val), ndigits) def xpath_to_url(path): - path = path.strip('./') + path = path.strip("./") # remove conditions - path = re.sub(r'\[[^]]+\]', '', path) - if path.startswith('iati-activity'): - url = 'http://iatistandard.org/activity-standard/iati-activities/' + path.split('@')[0] - elif path.startswith('iati-organisation'): - url = 'http://iatistandard.org/organisation-standard/iati-organisations/' + path.split('@')[0] + path = re.sub(r"\[[^]]+\]", "", path) + if path.startswith("iati-activity"): + url = "http://iatistandard.org/activity-standard/iati-activities/" + path.split("@")[0] + elif path.startswith("iati-organisation"): + url = "http://iatistandard.org/organisation-standard/iati-organisations/" + path.split("@")[0] else: - url = 'http://iatistandard.org/activity-standard/iati-activities/iati-activity/' + path.split('@')[0] - if '@' in path: - url += '#attributes' + url = "http://iatistandard.org/activity-standard/iati-activities/iati-activity/" + path.split("@")[0] + if "@" in path: + url += "#attributes" return url def registration_agency(orgid): - for code in codelist_sets['2']['OrganisationRegistrationAgency']: + for code in codelist_sets["2"]["OrganisationRegistrationAgency"]: if orgid.startswith(code): return code def get_codelist_values(codelist_values_for_element): """Return a list of unique values present within a one-level nested dictionary. - Envisaged usage is to gather the codelist values used by each publisher, as in - stats/current/inverted-publisher/codelist_values_by_major_version.json - Input: Set of codelist values for a given element (listed by publisher), for example: - current_stats['inverted_publisher']['codelist_values_by_major_version']['1']['.//@xml:lang'] + Envisaged usage is to gather the codelist values used by each publisher, as in + stats/current/inverted-publisher/codelist_values_by_major_version.json + Input: Set of codelist values for a given element (listed by publisher), for example: + current_stats['inverted_publisher']['codelist_values_by_major_version']['1']['.//@xml:lang'] """ return list(set([y for x in codelist_values_for_element.items() for y in list(x[1].keys())])) # Store data processing times -date_time_data_obj = parser.parse(metadata['created_at']) +date_time_data_obj = parser.parse(metadata["created_at"]) # Custom Jinja filters -app.jinja_env.filters['xpath_to_url'] = xpath_to_url -app.jinja_env.filters['url_to_filename'] = lambda x: x.rstrip('/').split('/')[-1] -app.jinja_env.filters['has_future_transactions'] = timeliness.has_future_transactions -app.jinja_env.filters['round_nicely'] = round_nicely +app.jinja_env.filters["xpath_to_url"] = xpath_to_url +app.jinja_env.filters["url_to_filename"] = lambda x: x.rstrip("/").split("/")[-1] +app.jinja_env.filters["has_future_transactions"] = timeliness.has_future_transactions +app.jinja_env.filters["round_nicely"] = round_nicely # Custom Jinja globals - NOTE: codeforIATI stats URLs have not been # changed. -app.jinja_env.globals['dataset_to_publisher'] = dataset_to_publisher -app.jinja_env.globals['url'] = lambda x: '/' if x == 'index.html' else x -app.jinja_env.globals['datetime_generated'] = lambda: datetime.now(UTC).strftime('%-d %B %Y (at %H:%M %Z)') -app.jinja_env.globals['datetime_data'] = date_time_data_obj.strftime('%-d %B %Y (at %H:%M %Z)') -app.jinja_env.globals['commit_hash'] = subprocess.run( - 'git show --format=%H --no-patch'.split(), - capture_output=True).stdout.decode().strip() -app.jinja_env.globals['stats_commit_hash'] = subprocess.run( - 'git -C stats-calculated show --format=%H --no-patch'.split(), - capture_output=True).stdout.decode().strip() -app.jinja_env.globals['stats_url'] = 'https://stats.codeforiati.org' -app.jinja_env.globals['stats_gh_url'] = 'https://github.com/codeforIATI/IATI-Stats-public/tree/' + app.jinja_env.globals['stats_commit_hash'] -app.jinja_env.globals['sorted'] = sorted -app.jinja_env.globals['enumerate'] = enumerate -app.jinja_env.globals['top_titles'] = text.top_titles -app.jinja_env.globals['page_titles'] = text.page_titles -app.jinja_env.globals['short_page_titles'] = text.short_page_titles -app.jinja_env.globals['page_leads'] = text.page_leads -app.jinja_env.globals['page_sub_leads'] = text.page_sub_leads -app.jinja_env.globals['top_navigation'] = text.top_navigation -app.jinja_env.globals['navigation'] = text.navigation -app.jinja_env.globals['navigation_reverse'] = {page: k for k, pages in text.navigation.items() for page in pages} -app.jinja_env.globals['navigation_reverse'].update({k: k for k in text.navigation}) -app.jinja_env.globals['current_stats'] = current_stats -app.jinja_env.globals['ckan'] = ckan -app.jinja_env.globals['ckan_publishers'] = ckan_publishers -app.jinja_env.globals['github_issues'] = github_issues -app.jinja_env.globals['publisher_name'] = publisher_name -app.jinja_env.globals['publishers_ordered_by_title'] = publishers_ordered_by_title -app.jinja_env.globals['get_publisher_stats'] = get_publisher_stats -app.jinja_env.globals['set'] = set -app.jinja_env.globals['firstint'] = firstint -app.jinja_env.globals['expected_versions'] = expected_versions -app.jinja_env.globals['current_year'] = datetime.now(UTC).year +app.jinja_env.globals["dataset_to_publisher"] = dataset_to_publisher +app.jinja_env.globals["url"] = lambda x: "/" if x == "index.html" else x +app.jinja_env.globals["datetime_generated"] = lambda: datetime.now(UTC).strftime("%-d %B %Y (at %H:%M %Z)") +app.jinja_env.globals["datetime_data"] = date_time_data_obj.strftime("%-d %B %Y (at %H:%M %Z)") +app.jinja_env.globals["commit_hash"] = ( + subprocess.run("git show --format=%H --no-patch".split(), capture_output=True).stdout.decode().strip() +) +app.jinja_env.globals["stats_commit_hash"] = ( + subprocess.run("git -C stats-calculated show --format=%H --no-patch".split(), capture_output=True) + .stdout.decode() + .strip() +) +app.jinja_env.globals["stats_url"] = "https://stats.codeforiati.org" +app.jinja_env.globals["stats_gh_url"] = ( + "https://github.com/codeforIATI/IATI-Stats-public/tree/" + app.jinja_env.globals["stats_commit_hash"] +) +app.jinja_env.globals["sorted"] = sorted +app.jinja_env.globals["enumerate"] = enumerate +app.jinja_env.globals["top_titles"] = text.top_titles +app.jinja_env.globals["page_titles"] = text.page_titles +app.jinja_env.globals["short_page_titles"] = text.short_page_titles +app.jinja_env.globals["page_leads"] = text.page_leads +app.jinja_env.globals["page_sub_leads"] = text.page_sub_leads +app.jinja_env.globals["top_navigation"] = text.top_navigation +app.jinja_env.globals["navigation"] = text.navigation +app.jinja_env.globals["navigation_reverse"] = {page: k for k, pages in text.navigation.items() for page in pages} +app.jinja_env.globals["navigation_reverse"].update({k: k for k in text.navigation}) +app.jinja_env.globals["current_stats"] = current_stats +app.jinja_env.globals["ckan"] = ckan +app.jinja_env.globals["ckan_publishers"] = ckan_publishers +app.jinja_env.globals["github_issues"] = github_issues +app.jinja_env.globals["publisher_name"] = publisher_name +app.jinja_env.globals["publishers_ordered_by_title"] = publishers_ordered_by_title +app.jinja_env.globals["get_publisher_stats"] = get_publisher_stats +app.jinja_env.globals["set"] = set +app.jinja_env.globals["firstint"] = firstint +app.jinja_env.globals["expected_versions"] = expected_versions +app.jinja_env.globals["current_year"] = datetime.now(UTC).year # Following variables set in coverage branch but not in master # app.jinja_env.globals['float'] = float # app.jinja_env.globals['dac2012'] = dac2012 -app.jinja_env.globals['MAJOR_VERSIONS'] = MAJOR_VERSIONS +app.jinja_env.globals["MAJOR_VERSIONS"] = MAJOR_VERSIONS -app.jinja_env.globals['slugs'] = slugs -app.jinja_env.globals['codelist_mapping'] = codelist_mapping -app.jinja_env.globals['codelist_sets'] = codelist_sets -app.jinja_env.globals['codelist_lookup'] = codelist_lookup -app.jinja_env.globals['get_codelist_values'] = get_codelist_values -app.jinja_env.globals['is_valid_element'] = is_valid_element +app.jinja_env.globals["slugs"] = slugs +app.jinja_env.globals["codelist_mapping"] = codelist_mapping +app.jinja_env.globals["codelist_sets"] = codelist_sets +app.jinja_env.globals["codelist_lookup"] = codelist_lookup +app.jinja_env.globals["get_codelist_values"] = get_codelist_values +app.jinja_env.globals["is_valid_element"] = is_valid_element basic_page_names = [ - 'headlines', - 'data_quality', - 'exploring_data', - 'publishers', - 'publishing_stats', - 'timeliness', - 'timeliness_timelag', - 'forwardlooking', - 'comprehensiveness', - 'comprehensiveness_core', - 'comprehensiveness_financials', - 'comprehensiveness_valueadded', + "headlines", + "data_quality", + "exploring_data", + "publishers", + "publishing_stats", + "timeliness", + "timeliness_timelag", + "forwardlooking", + "comprehensiveness", + "comprehensiveness_core", + "comprehensiveness_financials", + "comprehensiveness_valueadded", # 'coverage', - 'summary_stats', - 'humanitarian', - 'files', - 'activities', - 'download', - 'xml', - 'validation', - 'versions', - 'organisation', - 'identifiers', - 'reporting_orgs', - 'elements', - 'codelists', - 'booleans', - 'dates', - 'traceability', - 'org_ids', - 'faq', + "summary_stats", + "humanitarian", + "files", + "activities", + "download", + "xml", + "validation", + "versions", + "organisation", + "identifiers", + "reporting_orgs", + "elements", + "codelists", + "booleans", + "dates", + "traceability", + "org_ids", + "faq", ] -@app.route('/.html') +@app.route("/.html") def basic_page(page_name): if page_name in basic_page_names: kwargs = {} - if page_name.startswith('timeliness'): - kwargs['timeliness'] = timeliness - parent_page_name = 'timeliness' - elif page_name.startswith('forwardlooking'): - kwargs['forwardlooking'] = forwardlooking - parent_page_name = 'forwardlooking' - elif page_name.startswith('comprehensiveness'): - kwargs['comprehensiveness'] = comprehensiveness - parent_page_name = 'comprehensiveness' - elif page_name.startswith('coverage'): + if page_name.startswith("timeliness"): + kwargs["timeliness"] = timeliness + parent_page_name = "timeliness" + elif page_name.startswith("forwardlooking"): + kwargs["forwardlooking"] = forwardlooking + parent_page_name = "forwardlooking" + elif page_name.startswith("comprehensiveness"): + kwargs["comprehensiveness"] = comprehensiveness + parent_page_name = "comprehensiveness" + elif page_name.startswith("coverage"): # kwargs['coverage'] = coverage - parent_page_name = 'coverage' - elif page_name.startswith('summary_stats'): - kwargs['summary_stats'] = summary_stats - parent_page_name = 'summary_stats' - elif page_name.startswith('humanitarian'): - kwargs['humanitarian'] = humanitarian - parent_page_name = 'humanitarian' + parent_page_name = "coverage" + elif page_name.startswith("summary_stats"): + kwargs["summary_stats"] = summary_stats + parent_page_name = "summary_stats" + elif page_name.startswith("humanitarian"): + kwargs["humanitarian"] = humanitarian + parent_page_name = "humanitarian" else: parent_page_name = page_name - return render_template(page_name + '.html', page=parent_page_name, **kwargs) + return render_template(page_name + ".html", page=parent_page_name, **kwargs) else: abort(404) -@app.route('/data/download_errors.json') +@app.route("/data/download_errors.json") def download_errors_json(): - return Response(json.dumps(current_stats['download_errors'], indent=2), mimetype='application/json') + return Response(json.dumps(current_stats["download_errors"], indent=2), mimetype="application/json") -@app.route('/') +@app.route("/") def homepage(): - return render_template('index.html', page='index') + return render_template("index.html", page="index") -app.add_url_rule('/licenses.html', 'licenses', licenses.main) -app.add_url_rule('/license/.html', 'licenses_individual_license', licenses.individual_license) +app.add_url_rule("/licenses.html", "licenses", licenses.main) +app.add_url_rule("/license/.html", "licenses_individual_license", licenses.individual_license) -@app.route('/publisher/.html') +@app.route("/publisher/.html") def publisher(publisher): publisher_stats = get_publisher_stats(publisher) try: - budget_table = [{ - 'year': 'Total', - 'count_total': sum(sum(x.values()) for x in publisher_stats['count_budgets_by_type_by_year'].values()), - 'sum_total': {currency: sum(sums.values()) for by_currency in publisher_stats['sum_budgets_by_type_by_year'].values() for currency, sums in by_currency.items()}, - 'count_original': sum(publisher_stats['count_budgets_by_type_by_year']['1'].values()) if '1' in publisher_stats['count_budgets_by_type_by_year'] else None, - 'sum_original': {k: sum(v.values()) for k, v in publisher_stats['sum_budgets_by_type_by_year']['1'].items()} if '1' in publisher_stats['sum_budgets_by_type_by_year'] else None, - 'count_revised': sum(publisher_stats['count_budgets_by_type_by_year']['2'].values()) if '2' in publisher_stats['count_budgets_by_type_by_year'] else None, - 'sum_revised': {k: sum(v.values()) for k, v in publisher_stats['sum_budgets_by_type_by_year']['2'].items()} if '2' in publisher_stats['sum_budgets_by_type_by_year'] else None - }] + [{'year': year, - 'count_total': sum(x[year] for x in publisher_stats['count_budgets_by_type_by_year'].values() if year in x), - 'sum_total': {currency: sums.get(year) for by_currency in publisher_stats['sum_budgets_by_type_by_year'].values() for currency, sums in by_currency.items()}, - 'count_original': publisher_stats['count_budgets_by_type_by_year']['1'].get(year) if '1' in publisher_stats['count_budgets_by_type_by_year'] else None, - 'sum_original': {k: v.get(year) for k, v in publisher_stats['sum_budgets_by_type_by_year']['1'].items()} if '1' in publisher_stats['sum_budgets_by_type_by_year'] else None, - 'count_revised': publisher_stats['count_budgets_by_type_by_year']['2'].get(year) if '2' in publisher_stats['count_budgets_by_type_by_year'] else None, - 'sum_revised': {k: v.get(year) for k, v in publisher_stats['sum_budgets_by_type_by_year']['2'].items()} if '2' in publisher_stats['sum_budgets_by_type_by_year'] else None - } for year in sorted(set(sum((list(x.keys()) for x in publisher_stats['count_budgets_by_type_by_year'].values()), []))) - ] - failure_count = len(current_stats['inverted_file_publisher'][publisher]['validation'].get('fail', {})) + budget_table = [ + { + "year": "Total", + "count_total": sum(sum(x.values()) for x in publisher_stats["count_budgets_by_type_by_year"].values()), + "sum_total": { + currency: sum(sums.values()) + for by_currency in publisher_stats["sum_budgets_by_type_by_year"].values() + for currency, sums in by_currency.items() + }, + "count_original": ( + sum(publisher_stats["count_budgets_by_type_by_year"]["1"].values()) + if "1" in publisher_stats["count_budgets_by_type_by_year"] + else None + ), + "sum_original": ( + {k: sum(v.values()) for k, v in publisher_stats["sum_budgets_by_type_by_year"]["1"].items()} + if "1" in publisher_stats["sum_budgets_by_type_by_year"] + else None + ), + "count_revised": ( + sum(publisher_stats["count_budgets_by_type_by_year"]["2"].values()) + if "2" in publisher_stats["count_budgets_by_type_by_year"] + else None + ), + "sum_revised": ( + {k: sum(v.values()) for k, v in publisher_stats["sum_budgets_by_type_by_year"]["2"].items()} + if "2" in publisher_stats["sum_budgets_by_type_by_year"] + else None + ), + } + ] + [ + { + "year": year, + "count_total": sum( + x[year] for x in publisher_stats["count_budgets_by_type_by_year"].values() if year in x + ), + "sum_total": { + currency: sums.get(year) + for by_currency in publisher_stats["sum_budgets_by_type_by_year"].values() + for currency, sums in by_currency.items() + }, + "count_original": ( + publisher_stats["count_budgets_by_type_by_year"]["1"].get(year) + if "1" in publisher_stats["count_budgets_by_type_by_year"] + else None + ), + "sum_original": ( + {k: v.get(year) for k, v in publisher_stats["sum_budgets_by_type_by_year"]["1"].items()} + if "1" in publisher_stats["sum_budgets_by_type_by_year"] + else None + ), + "count_revised": ( + publisher_stats["count_budgets_by_type_by_year"]["2"].get(year) + if "2" in publisher_stats["count_budgets_by_type_by_year"] + else None + ), + "sum_revised": ( + {k: v.get(year) for k, v in publisher_stats["sum_budgets_by_type_by_year"]["2"].items()} + if "2" in publisher_stats["sum_budgets_by_type_by_year"] + else None + ), + } + for year in sorted( + set(sum((list(x.keys()) for x in publisher_stats["count_budgets_by_type_by_year"].values()), [])) + ) + ] + failure_count = len(current_stats["inverted_file_publisher"][publisher]["validation"].get("fail", {})) except KeyError: abort(404) - return render_template('publisher.html', - publisher=publisher, - publisher_stats=publisher_stats, - failure_count=failure_count, - publisher_inverted=get_publisher_stats(publisher, 'inverted-file'), - publisher_licenses=licenses.licenses_for_publisher(publisher), - budget_table=budget_table,) - - -@app.route('/codelist//.html') + return render_template( + "publisher.html", + publisher=publisher, + publisher_stats=publisher_stats, + failure_count=failure_count, + publisher_inverted=get_publisher_stats(publisher, "inverted-file"), + publisher_licenses=licenses.licenses_for_publisher(publisher), + budget_table=budget_table, + ) + + +@app.route("/codelist//.html") def codelist(major_version, slug): - i = slugs['codelist'][major_version]['by_slug'][slug] - element = list(current_stats['inverted_publisher']['codelist_values_by_major_version'][major_version])[i] - values = nested_dictinvert(list(current_stats['inverted_publisher']['codelist_values_by_major_version'][major_version].values())[i]) - return render_template('codelist.html', - element=element, - values=values, - reverse_codelist_mapping={major_version: dictinvert(mapping) for major_version, mapping in codelist_mapping.items()}, - major_version=major_version, - page='codelists') - - -@app.route('/element/.html') + i = slugs["codelist"][major_version]["by_slug"][slug] + element = list(current_stats["inverted_publisher"]["codelist_values_by_major_version"][major_version])[i] + values = nested_dictinvert( + list(current_stats["inverted_publisher"]["codelist_values_by_major_version"][major_version].values())[i] + ) + return render_template( + "codelist.html", + element=element, + values=values, + reverse_codelist_mapping={ + major_version: dictinvert(mapping) for major_version, mapping in codelist_mapping.items() + }, + major_version=major_version, + page="codelists", + ) + + +@app.route("/element/.html") def element(slug): - i = slugs['element']['by_slug'][slug] - element = list(current_stats['inverted_publisher']['elements'])[i] - publishers = list(current_stats['inverted_publisher']['elements'].values())[i] - return render_template('element.html', - element=element, - publishers=publishers, - element_or_attribute='attribute' if '@' in element else 'element', - page='elements') - - -@app.route('/org_type/.html') + i = slugs["element"]["by_slug"][slug] + element = list(current_stats["inverted_publisher"]["elements"])[i] + publishers = list(current_stats["inverted_publisher"]["elements"].values())[i] + return render_template( + "element.html", + element=element, + publishers=publishers, + element_or_attribute="attribute" if "@" in element else "element", + page="elements", + ) + + +@app.route("/org_type/.html") def org_type(slug): - assert slug in slugs['org_type']['by_slug'] - return render_template('org_type.html', - slug=slug, - page='org_ids') + assert slug in slugs["org_type"]["by_slug"] + return render_template("org_type.html", slug=slug, page="org_ids") -@app.route('/registration_agencies.html') +@app.route("/registration_agencies.html") def registration_agencies(): registration_agencies = defaultdict(int) registration_agencies_publishers = defaultdict(list) nonmatching = [] - for orgid, publishers in current_stats['inverted_publisher']['reporting_orgs'].items(): + for orgid, publishers in current_stats["inverted_publisher"]["reporting_orgs"].items(): reg_ag = registration_agency(orgid) if reg_ag: registration_agencies[reg_ag] += 1 registration_agencies_publishers[reg_ag] += list(publishers) else: nonmatching.append((orgid, publishers)) - return render_template('registration_agencies.html', - page='registration_agencies', - registration_agencies=registration_agencies, - registration_agencies_publishers=registration_agencies_publishers, - nonmatching=nonmatching) - - -@app.route('/') + return render_template( + "registration_agencies.html", + page="registration_agencies", + registration_agencies=registration_agencies, + registration_agencies_publishers=registration_agencies_publishers, + nonmatching=nonmatching, + ) + + +@app.route( + '/' +) def serve_images_development(filename): """Serve static images through the development server (--live)""" - return send_from_directory('static/', filename) + return send_from_directory("static/", filename) @app.route('/') def serve_css_development(filename): """Serve static css through the development server (--live)""" - return send_from_directory('static/', filename) + return send_from_directory("static/", filename) -@app.route('/favicon.ico') +@app.route("/favicon.ico") def favicon_root(): """Serve favicon from img folder when requested from root""" - return send_from_directory('static/img', 'favicon.ico') + return send_from_directory("static/img", "favicon.ico") -@app.route('/.csv') +@app.route("/.csv") def csv_development(name): - return send_from_directory('out', name + '.csv') + return send_from_directory("out", name + ".csv") -@app.route('/publisher_imgs/.png') +@app.route("/publisher_imgs/.png") def image_development_publisher(image): - return send_from_directory('out/publisher_imgs', image + '.png') + return send_from_directory("out/publisher_imgs", image + ".png") -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--live", action="store_true", - help="Run a development server") + parser.add_argument("--live", action="store_true", help="Run a development server") args = parser.parse_args() if args.live: app.debug = True app.run() else: from flask_frozen import Freezer - app.config['FREEZER_DESTINATION'] = 'out' - app.config['FREEZER_REMOVE_EXTRA_FILES'] = False - app.config['FREEZER_IGNORE_404_NOT_FOUND'] = True - app.debug = False # Comment to turn off debugging - app.testing = True # Comment to turn off debugging + + app.config["FREEZER_DESTINATION"] = "out" + app.config["FREEZER_REMOVE_EXTRA_FILES"] = False + app.config["FREEZER_IGNORE_404_NOT_FOUND"] = True + app.debug = False # Comment to turn off debugging + app.testing = True # Comment to turn off debugging freezer = Freezer(app) @freezer.register_generator def url_generator(): for page_name in basic_page_names: - yield 'basic_page', {'page_name': page_name} - for publisher in current_stats['inverted_publisher']['activities'].keys(): - yield 'publisher', {'publisher': publisher} - for slug in slugs['element']['by_slug']: - yield 'element', {'slug': slug} - for major_version, codelist_slugs in slugs['codelist'].items(): - for slug in codelist_slugs['by_slug']: - yield 'codelist', { - 'slug': slug, - 'major_version': major_version - } - for slug in slugs['org_type']['by_slug']: - yield 'org_type', {'slug': slug} + yield "basic_page", {"page_name": page_name} + for publisher in current_stats["inverted_publisher"]["activities"].keys(): + yield "publisher", {"publisher": publisher} + for slug in slugs["element"]["by_slug"]: + yield "element", {"slug": slug} + for major_version, codelist_slugs in slugs["codelist"].items(): + for slug in codelist_slugs["by_slug"]: + yield "codelist", {"slug": slug, "major_version": major_version} + for slug in slugs["org_type"]["by_slug"]: + yield "org_type", {"slug": slug} for license in set(licenses.licenses): - yield 'licenses_individual_license', {'license': license} + yield "licenses_individual_license", {"license": license} freezer.freeze() diff --git a/dashboard/make_plots.py b/dashboard/make_plots.py index cdbe27640d..99409024f9 100644 --- a/dashboard/make_plots.py +++ b/dashboard/make_plots.py @@ -16,7 +16,8 @@ import filepaths from vars import expected_versions # noqa: F401 import matplotlib as mpl -mpl.use('Agg') + +mpl.use("Agg") import matplotlib.pyplot as plt # noqa: E402 import matplotlib.dates as mdates # noqa: E402 @@ -31,24 +32,24 @@ def __init__(self, folder, failed_downloads, gitaggregate_publisher): self.gitaggregate_publisher = gitaggregate_publisher def __getitem__(self, key): - if key == 'failed_downloads': + if key == "failed_downloads": return dict((row[0], row[1]) for row in self.failed_downloads) - elif key == 'publisher_types': + elif key == "publisher_types": out = defaultdict(lambda: defaultdict(int)) for publisher, publisher_data in self.gitaggregate_publisher.items(): if publisher in data.ckan_publishers: - organization_type = common.get_publisher_type(publisher)['name'] - for datestring, count in publisher_data['activities'].items(): + organization_type = common.get_publisher_type(publisher)["name"] + for datestring, count in publisher_data["activities"].items(): out[datestring][organization_type] += 1 else: logger.debug("Getting by publisher_type unmatched publisher <{}>".format(publisher)) return out - elif key == 'activities_per_publisher_type': + elif key == "activities_per_publisher_type": out = defaultdict(lambda: defaultdict(int)) for publisher, publisher_data in self.gitaggregate_publisher.items(): if publisher in data.ckan_publishers: - organization_type = common.get_publisher_type(publisher)['name'] - for datestring, count in publisher_data['activities'].items(): + organization_type = common.get_publisher_type(publisher)["name"] + for datestring, count in publisher_data["activities"].items(): out[datestring][organization_type] += count else: logger.debug("Getting by activities_per_publisher_type unmatched publisher <{}>".format(publisher)) @@ -57,7 +58,7 @@ def __getitem__(self, key): return super(AugmentedJSONDir, self).__getitem__(key) -def make_plot(stat_path, git_stats, img_prefix=''): +def make_plot(stat_path, git_stats, img_prefix=""): if type(stat_path) is tuple: stat_name = stat_path[0] else: @@ -75,10 +76,10 @@ def make_plot(stat_path, git_stats, img_prefix=''): # years = mdates.YearLocator() # every year # months = mdates.MonthLocator() # every month - datefmt = mdates.DateFormatter('%Y-%m-%d') + datefmt = mdates.DateFormatter("%Y-%m-%d") fig, ax = plt.subplots() - ax.set_prop_cycle('color', ['b', 'g', 'r', 'c', 'm', 'y', 'k', '#00ff00', '#fc5ab8', '#af31f2']) + ax.set_prop_cycle("color", ["b", "g", "r", "c", "m", "y", "k", "#00ff00", "#fc5ab8", "#af31f2"]) fig_legend = plt.figure() dpi = 96 fig.set_size_inches(600.0 / dpi, 600.0 / dpi) @@ -87,16 +88,16 @@ def make_plot(stat_path, git_stats, img_prefix=''): keys = set([tm for y in y_values for tm in y.keys()]) plots = {} for key in keys: - plots[key], = ax.plot(x_values, [y.get(key) or 0 for y in y_values]) - if stat_name in ['publisher_types', 'activities_per_publisher_type']: + (plots[key],) = ax.plot(x_values, [y.get(key) or 0 for y in y_values]) + if stat_name in ["publisher_types", "activities_per_publisher_type"]: # Sort by the most recent value for the key sorted_items = sorted(plots.items(), key=lambda x: y_values[-1][x[0]], reverse=True) - fig_legend.legend([x[1] for x in sorted_items], [x[0] for x in sorted_items], loc='center', ncol=1) + fig_legend.legend([x[1] for x in sorted_items], [x[0] for x in sorted_items], loc="center", ncol=1) fig_legend.set_size_inches(600.0 / dpi, 300.0 / dpi) else: - fig_legend.legend(plots.values(), plots.keys(), loc='center', ncol=4) + fig_legend.legend(plots.values(), plots.keys(), loc="center", ncol=4) fig_legend.set_size_inches(600.0 / dpi, 100.0 / dpi) - fig_legend.savefig(filepaths.join_out_path('{0}{1}{2}_legend.png'.format(img_prefix, stat_name, stat_path[2]))) + fig_legend.savefig(filepaths.join_out_path("{0}{1}{2}_legend.png".format(img_prefix, stat_name, stat_path[2]))) else: keys = None ax.plot(x_values, y_values) @@ -114,26 +115,31 @@ def make_plot(stat_path, git_stats, img_prefix=''): # def price(x): return '$%1.2f'%x # ax.format_ydata = price ax.xaxis_date() - ax.format_xdata = mdates.DateFormatter('%Y-%m-%d') + ax.format_xdata = mdates.DateFormatter("%Y-%m-%d") ax.grid(True) # rotates and right aligns the x labels, and moves the bottom of the # axes up to make room for them fig.autofmt_xdate() - ax.ticklabel_format(axis='y', style='plain', useOffset=False) + ax.ticklabel_format(axis="y", style="plain", useOffset=False) - fig.savefig(filepaths.join_out_path('{0}{1}{2}.png'.format(img_prefix, stat_name, stat_path[2] if type(stat_path) is tuple else '')), dpi=dpi) - plt.close('all') + fig.savefig( + filepaths.join_out_path( + "{0}{1}{2}.png".format(img_prefix, stat_name, stat_path[2] if type(stat_path) is tuple else "") + ), + dpi=dpi, + ) + plt.close("all") - fn = filepaths.join_out_path('{0}{1}.csv'.format(img_prefix, stat_name)) - with open(fn, 'w') as fp: + fn = filepaths.join_out_path("{0}{1}.csv".format(img_prefix, stat_name)) + with open(fn, "w") as fp: writer = csv.writer(fp) if keys: sorted_keys = sorted(list(keys)) - writer.writerow(['date'] + sorted_keys) + writer.writerow(["date"] + sorted_keys) else: - writer.writerow(['date', 'value']) + writer.writerow(["date", "value"]) for k, v in items: if keys: writer.writerow([k] + [v.get(key) for key in sorted_keys]) @@ -148,36 +154,36 @@ def main(): args = parser.parse_args() # Load data required for loading stats. - failed_downloads = csv.reader(open(filepaths.join_data_path('downloads/history.csv'))) - gitaggregate_publisher = data.JSONDir(filepaths.join_stats_path('gitaggregate-publisher-dated')) + failed_downloads = csv.reader(open(filepaths.join_data_path("downloads/history.csv"))) + gitaggregate_publisher = data.JSONDir(filepaths.join_stats_path("gitaggregate-publisher-dated")) # Generate plots for aggregated stats for all data. logger.info("Generating plots for all aggregated data") - git_stats = AugmentedJSONDir(filepaths.join_stats_path('gitaggregate-dated'), - failed_downloads, - gitaggregate_publisher) - os.makedirs(filepaths.join_out_path('img/aggregate'), exist_ok=True) + git_stats = AugmentedJSONDir( + filepaths.join_stats_path("gitaggregate-dated"), failed_downloads, gitaggregate_publisher + ) + os.makedirs(filepaths.join_out_path("img/aggregate"), exist_ok=True) _paths = [ - 'activities', - 'publishers', - 'activity_files', - 'organisation_files', - 'file_size', - 'failed_downloads', - 'invalidxml', - 'nonstandardroots', - 'unique_identifiers', - ('validation', lambda x: x == 'fail', ''), - ('publishers_validation', lambda x: x == 'fail', ''), - ('publisher_has_org_file', lambda x: x == 'no', ''), - ('versions', lambda x: x in expected_versions, '_expected'), - ('versions', lambda x: x not in expected_versions, '_other'), - ('publishers_per_version', lambda x: x in expected_versions, '_expected'), - ('publishers_per_version', lambda x: x not in expected_versions, '_other'), - ('file_size_bins', lambda x: True, ''), - ('publisher_types', lambda x: True, ''), - ('activities_per_publisher_type', lambda x: True, '') + "activities", + "publishers", + "activity_files", + "organisation_files", + "file_size", + "failed_downloads", + "invalidxml", + "nonstandardroots", + "unique_identifiers", + ("validation", lambda x: x == "fail", ""), + ("publishers_validation", lambda x: x == "fail", ""), + ("publisher_has_org_file", lambda x: x == "no", ""), + ("versions", lambda x: x in expected_versions, "_expected"), + ("versions", lambda x: x not in expected_versions, "_other"), + ("publishers_per_version", lambda x: x in expected_versions, "_expected"), + ("publishers_per_version", lambda x: x not in expected_versions, "_other"), + ("file_size_bins", lambda x: True, ""), + ("publisher_types", lambda x: True, ""), + ("activities_per_publisher_type", lambda x: True, ""), ] with tqdm(total=len(_paths)) as pbar: if args.verbose: @@ -185,17 +191,17 @@ def main(): for stat_path in _paths: if args.verbose: pbar.update() - make_plot(stat_path, git_stats, img_prefix='img/aggregate/') + make_plot(stat_path, git_stats, img_prefix="img/aggregate/") # Delete git_stats variable to save memory del git_stats # Generate plots for each publisher. logger.info("Generating plots for all publishers") - git_stats_publishers = AugmentedJSONDir(filepaths.join_stats_path('gitaggregate-publisher-dated/'), - failed_downloads, - gitaggregate_publisher) - os.makedirs(filepaths.join_out_path('img/publishers'), exist_ok=True) + git_stats_publishers = AugmentedJSONDir( + filepaths.join_stats_path("gitaggregate-publisher-dated/"), failed_downloads, gitaggregate_publisher + ) + os.makedirs(filepaths.join_out_path("img/publishers"), exist_ok=True) with tqdm(total=len(git_stats_publishers)) as pbar: if args.verbose: @@ -204,17 +210,17 @@ def main(): if args.verbose: pbar.update() for stat_path in [ - 'activities', - 'activity_files', - 'organisation_files', - 'file_size', - 'invalidxml', - 'nonstandardroots', - 'publisher_unique_identifiers', - ('validation', lambda x: x == 'fail', ''), - ('versions', lambda x: True, ''), + "activities", + "activity_files", + "organisation_files", + "file_size", + "invalidxml", + "nonstandardroots", + "publisher_unique_identifiers", + ("validation", lambda x: x == "fail", ""), + ("versions", lambda x: True, ""), ]: - make_plot(stat_path, git_stats_publisher, img_prefix='img/publishers/{0}_'.format(publisher)) + make_plot(stat_path, git_stats_publisher, img_prefix="img/publishers/{0}_".format(publisher)) if __name__ == "__main__": diff --git a/dashboard/manage.py b/dashboard/manage.py index 2ea28dd972..f0f848ecc1 100755 --- a/dashboard/manage.py +++ b/dashboard/manage.py @@ -6,7 +6,7 @@ def main(): """Run administrative tasks.""" - os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'ui.settings') + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ui.settings") try: from django.core.management import execute_from_command_line except ImportError as exc: @@ -18,5 +18,5 @@ def main(): execute_from_command_line(sys.argv) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/dashboard/speakers_kit.py b/dashboard/speakers_kit.py index 0fb9b4bde2..d3b8a7381d 100644 --- a/dashboard/speakers_kit.py +++ b/dashboard/speakers_kit.py @@ -9,48 +9,67 @@ def codelist_dict(codelist_path): codelist_json = json.load(open(codelist_path)) - return {c['code']: c['name'] for c in codelist_json['data']} + return {c["code"]: c["name"] for c in codelist_json["data"]} -organisation_type_dict = codelist_dict(filepaths.join_data_path('IATI-Codelists-2/out/clv2/json/en/OrganisationType.json')) -country_dict = codelist_dict(filepaths.join_data_path('IATI-Codelists-2/out/clv2/json/en/Country.json')) -region_dict = codelist_dict(filepaths.join_data_path('IATI-Codelists-2/out/clv2/json/en/Region.json')) +organisation_type_dict = codelist_dict( + filepaths.join_data_path("IATI-Codelists-2/out/clv2/json/en/OrganisationType.json") +) +country_dict = codelist_dict(filepaths.join_data_path("IATI-Codelists-2/out/clv2/json/en/Country.json")) +region_dict = codelist_dict(filepaths.join_data_path("IATI-Codelists-2/out/clv2/json/en/Region.json")) -aggregated_publisher = data.JSONDir(filepaths.join_stats_path('current/aggregated-publisher/')) +aggregated_publisher = data.JSONDir(filepaths.join_stats_path("current/aggregated-publisher/")) activities_by = defaultdict(lambda: defaultdict(int)) publishers_by = defaultdict(lambda: defaultdict(int)) for publisher, publisher_data in aggregated_publisher.items(): if publisher in data.ckan_publishers: - organization_type = data.ckan_publishers[publisher]['result']['publisher_organization_type'] + organization_type = data.ckan_publishers[publisher]["result"]["publisher_organization_type"] # activities_by['type'][organisation_type_dict[organization_type]] += publisher_data['activities'] - publishers_by['type'][organisation_type_dict[organization_type]] += 1 + publishers_by["type"][organisation_type_dict[organization_type]] += 1 - publisher_country_code = data.ckan_publishers[publisher]['result']['publisher_country'] + publisher_country_code = data.ckan_publishers[publisher]["result"]["publisher_country"] if publisher_country_code in country_dict or publisher_country_code in region_dict: - publishers_by['country'][country_dict.get(publisher_country_code) or region_dict.get(publisher_country_code)] += 1 + publishers_by["country"][ + country_dict.get(publisher_country_code) or region_dict.get(publisher_country_code) + ] += 1 else: - print('Unrecognised registry publisher_country code: ', publisher_country_code) - activity_countries = publisher_data['codelist_values'].get('.//recipient-country/@code') + print("Unrecognised registry publisher_country code: ", publisher_country_code) + activity_countries = publisher_data["codelist_values"].get(".//recipient-country/@code") if activity_countries: for code, count in activity_countries.items(): if code and code in country_dict: - activities_by['country'][country_dict.get(code)] += count - activity_regions = publisher_data['codelist_values'].get('.//recipient-region/@code') + activities_by["country"][country_dict.get(code)] += count + activity_regions = publisher_data["codelist_values"].get(".//recipient-region/@code") if activity_regions: for code, count in activity_regions.items(): if code and code in region_dict: - activities_by['region'][region_dict.get(code)] += count + activities_by["region"][region_dict.get(code)] += count else: - print('Publisher not matched:', publisher) + print("Publisher not matched:", publisher) -fieldnames = ['publisher_type', 'publishers_by_type', '', 'publisher_country', 'publishers_by_country', '', 'date', 'publishers_quarterly', '', 'activity_country', 'activities_by_country', '', 'activity_region', 'activities_by_region'] +fieldnames = [ + "publisher_type", + "publishers_by_type", + "", + "publisher_country", + "publishers_by_country", + "", + "date", + "publishers_quarterly", + "", + "activity_country", + "activities_by_country", + "", + "activity_region", + "activities_by_region", +] publishers_quarterly = [] -publishers_by_date = json.load(open(filepaths.join_stats_path('gitaggregate-dated/publishers.json'))) +publishers_by_date = json.load(open(filepaths.join_stats_path("gitaggregate-dated/publishers.json"))) for date, publishers in sorted(publishers_by_date.items()): - if (date[8:10] == '30' and date[5:7] in ['06', '09']) or (date[8:10] == '31' and date[5:7] in ['03', '12']): + if (date[8:10] == "30" and date[5:7] in ["06", "09"]) or (date[8:10] == "31" and date[5:7] in ["03", "12"]): publishers_quarterly.append((date, publishers)) @@ -58,25 +77,33 @@ def sort_second(x): return sorted(x, key=lambda y: y[1], reverse=True) -with open(filepaths.join_out_path('speakers_kit.csv'), 'w') as fp: +with open(filepaths.join_out_path("speakers_kit.csv"), "w") as fp: writer = csv.DictWriter(fp, fieldnames) writer.writeheader() - for publishers_by_type, publishers_by_country, publishers_quarterly_, activities_by_country, activities_by_region in zip_longest( - sort_second(publishers_by['type'].items()), - sort_second(publishers_by['country'].items()), - publishers_quarterly, - sort_second(activities_by['country'].items()), - sort_second(activities_by['region'].items()), + for ( + publishers_by_type, + publishers_by_country, + publishers_quarterly_, + activities_by_country, + activities_by_region, + ) in zip_longest( + sort_second(publishers_by["type"].items()), + sort_second(publishers_by["country"].items()), + publishers_quarterly, + sort_second(activities_by["country"].items()), + sort_second(activities_by["region"].items()), ): - writer.writerow({ - 'publisher_type': publishers_by_type[0] if publishers_by_type else '', - 'publishers_by_type': publishers_by_type[1] if publishers_by_type else '', - 'publisher_country': publishers_by_country[0] if publishers_by_country else '', - 'publishers_by_country': publishers_by_country[1] if publishers_by_country else '', - 'date': publishers_quarterly_[0] if publishers_quarterly_ else '', - 'publishers_quarterly': publishers_quarterly_[1] if publishers_quarterly_ else '', - 'activity_country': activities_by_country[0] if activities_by_country else '', - 'activities_by_country': activities_by_country[1] if activities_by_country else '', - 'activity_region': activities_by_region[0] if activities_by_region else '', - 'activities_by_region': activities_by_region[1] if activities_by_region else '', - }) + writer.writerow( + { + "publisher_type": publishers_by_type[0] if publishers_by_type else "", + "publishers_by_type": publishers_by_type[1] if publishers_by_type else "", + "publisher_country": publishers_by_country[0] if publishers_by_country else "", + "publishers_by_country": publishers_by_country[1] if publishers_by_country else "", + "date": publishers_quarterly_[0] if publishers_quarterly_ else "", + "publishers_quarterly": publishers_quarterly_[1] if publishers_quarterly_ else "", + "activity_country": activities_by_country[0] if activities_by_country else "", + "activities_by_country": activities_by_country[1] if activities_by_country else "", + "activity_region": activities_by_region[0] if activities_by_region else "", + "activities_by_region": activities_by_region[1] if activities_by_region else "", + } + ) diff --git a/dashboard/summary_stats.py b/dashboard/summary_stats.py index 9d65d1462e..da4efcf7f4 100644 --- a/dashboard/summary_stats.py +++ b/dashboard/summary_stats.py @@ -9,17 +9,16 @@ # Set column groupings, to be displayed in the user output columns = [ # slug, header - ('publisher_type', 'Publisher Type'), - ('timeliness', 'Timeliness'), - ('forwardlooking', 'Forward looking'), - ('comprehensiveness', 'Comprehensiveness'), - ('score', 'Score') + ("publisher_type", "Publisher Type"), + ("timeliness", "Timeliness"), + ("forwardlooking", "Forward looking"), + ("comprehensiveness", "Comprehensiveness"), + ("score", "Score"), ] def is_number(s): - """ @todo Document this function - """ + """@todo Document this function""" try: float(s) return True @@ -28,8 +27,7 @@ def is_number(s): def convert_to_float(x): - """ @todo Document this function - """ + """@todo Document this function""" if is_number(x): return float(x) else: @@ -37,8 +35,7 @@ def convert_to_float(x): def table(): - """Generate data for the publisher forward-looking table - """ + """Generate data for the publisher forward-looking table""" # Store timeliness data in variable timeliness_frequency_data = timeliness.publisher_frequency_dict() @@ -56,22 +53,22 @@ def table(): # Create a list for publisher data, and populate it with basic data row = {} - row['publisher'] = publisher - row['publisher_title'] = publisher_title - row['publisher_type'] = common.get_publisher_type(publisher)['name'] + row["publisher"] = publisher + row["publisher_title"] = publisher_title + row["publisher_type"] = common.get_publisher_type(publisher)["name"] # Compute timeliness statistic # Assign frequency score # Get initial frequency assessment, or use empty set in the case where the publisher is not found frequency_assessment_data = timeliness_frequency_data.get(publisher, ()) frequency_assessment = None if len(frequency_assessment_data) < 4 else frequency_assessment_data[3] - if frequency_assessment == 'Monthly': + if frequency_assessment == "Monthly": frequency_score = 4 - elif frequency_assessment == 'Quarterly': + elif frequency_assessment == "Quarterly": frequency_score = 3 - elif frequency_assessment == 'Six-Monthly': + elif frequency_assessment == "Six-Monthly": frequency_score = 2 - elif frequency_assessment == 'Annual': + elif frequency_assessment == "Annual": frequency_score = 1 else: # frequency_assessment == 'Less than Annual' or something else! frequency_score = 0 @@ -80,39 +77,41 @@ def table(): # Get initial timelag assessment, or use empty set in the case where the publisher is not found timelag_assessment_data = timeliness_timelag_data.get(publisher, ()) timelag_assessment = None if len(timelag_assessment_data) < 4 else timelag_assessment_data[3] - if timelag_assessment == 'One month': + if timelag_assessment == "One month": timelag_score = 4 - elif timelag_assessment == 'A quarter': + elif timelag_assessment == "A quarter": timelag_score = 3 - elif timelag_assessment == 'Six months': + elif timelag_assessment == "Six months": timelag_score = 2 - elif timelag_assessment == 'One year': + elif timelag_assessment == "One year": timelag_score = 1 else: # timelag_assessment == 'More than one year' or something else! timelag_score = 0 # Compute the percentage - row['timeliness'] = (float(frequency_score + timelag_score) / 8) * 100 + row["timeliness"] = (float(frequency_score + timelag_score) / 8) * 100 # Compute forward-looking statistic # Get the forward-looking data for this publisher publisher_forwardlooking_data = forwardlooking.generate_row(publisher) # Convert the data for this publishers 'Percentage of current activities with budgets' fields into integers - numbers = [int(x) for x in publisher_forwardlooking_data['year_columns'][2].values() if is_number(x)] + numbers = [int(x) for x in publisher_forwardlooking_data["year_columns"][2].values() if is_number(x)] # Compute and store the mean average for these fields - row['forwardlooking'] = sum(int(round(y)) for y in numbers) / len(publisher_forwardlooking_data['year_columns'][2]) + row["forwardlooking"] = sum(int(round(y)) for y in numbers) / len( + publisher_forwardlooking_data["year_columns"][2] + ) # Compute comprehensiveness statistic # Get the comprehensiveness data for this publisher publisher_comprehensiveness_data = comprehensiveness.generate_row(publisher) # Set the comprehensiveness value to be the summary average for valid data - row['comprehensiveness'] = convert_to_float(publisher_comprehensiveness_data['summary_average_valid']) + row["comprehensiveness"] = convert_to_float(publisher_comprehensiveness_data["summary_average_valid"]) # Compute score - row['score'] = float(row['timeliness'] + row['forwardlooking'] + row['comprehensiveness']) / 3 + row["score"] = float(row["timeliness"] + row["forwardlooking"] + row["comprehensiveness"]) / 3 # Return a generator object yield row diff --git a/dashboard/tests/test_comprehensiveness.py b/dashboard/tests/test_comprehensiveness.py index ab4eb1e163..c35b9ff066 100644 --- a/dashboard/tests/test_comprehensiveness.py +++ b/dashboard/tests/test_comprehensiveness.py @@ -6,23 +6,23 @@ import comprehensiveness # noqa: E402 mock_stats = { - 'comprehensiveness': { - 'activity-date': 2, - 'activity-status': 2, - 'recipient_language': 0, - 'transaction_spend': 1, + "comprehensiveness": { + "activity-date": 2, + "activity-status": 2, + "recipient_language": 0, + "transaction_spend": 1, }, - 'comprehensiveness_denominator_default': 2, - 'comprehensiveness_denominators': { - 'recipient_language': 0, - 'transaction_spend': 1, - 'transaction_traceability': 1 - } + "comprehensiveness_denominator_default": 2, + "comprehensiveness_denominators": {"recipient_language": 0, "transaction_spend": 1, "transaction_traceability": 1}, } def test_denominator(): - assert comprehensiveness.denominator('activity-date', mock_stats) == 2 - assert comprehensiveness.denominator('transaction_spend', mock_stats) == 1 - assert comprehensiveness.denominator('non_existant_key', mock_stats) == 2 # Passing a non existant key will return the default denominator - assert comprehensiveness.denominator('activity-date', None) == 0 # Passing a 'Falsey' value as the stats param will return 0 + assert comprehensiveness.denominator("activity-date", mock_stats) == 2 + assert comprehensiveness.denominator("transaction_spend", mock_stats) == 1 + assert ( + comprehensiveness.denominator("non_existant_key", mock_stats) == 2 + ) # Passing a non existant key will return the default denominator + assert ( + comprehensiveness.denominator("activity-date", None) == 0 + ) # Passing a 'Falsey' value as the stats param will return 0 diff --git a/dashboard/tests/test_timeliness.py b/dashboard/tests/test_timeliness.py index c55861630a..5a9d4207f0 100644 --- a/dashboard/tests/test_timeliness.py +++ b/dashboard/tests/test_timeliness.py @@ -10,10 +10,9 @@ def test_short_month(): - month_strings = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + month_strings = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] for index, s in enumerate(month_strings): - assert timeliness.short_month('01-{:02d}-2024'.format(index + 1)) == s + assert timeliness.short_month("01-{:02d}-2024".format(index + 1)) == s def test_parse_iso_date(): diff --git a/dashboard/text.py b/dashboard/text.py index c1d2ecbd13..47e256f1c4 100644 --- a/dashboard/text.py +++ b/dashboard/text.py @@ -1,187 +1,200 @@ """Text fragments for page titles, navigation and page leaders/sub-leaders """ + import copy top_titles = { - 'index': 'Home', - 'headlines': 'Headlines', - 'data_quality': 'Data Quality', - 'publishing_stats': 'Publishing Statistics', - 'exploring_data': 'Exploring Data', - 'faq': 'FAQ' + "index": "Home", + "headlines": "Headlines", + "data_quality": "Data Quality", + "publishing_stats": "Publishing Statistics", + "exploring_data": "Exploring Data", + "faq": "FAQ", } page_titles = { - 'index': 'Dashboard Home', - 'headlines': 'Headlines', - 'data_quality': 'Data Quality', - 'exploring_data': 'Exploring Data', - 'faq': 'Frequently Asked Questions', - 'publishers': 'IATI Publishers', - 'files': 'IATI Files', - 'activities': 'IATI Activities', - 'download': 'Download Errors', - 'xml': 'XML Errors', - 'validation': 'Validation Against the Schema', - 'versions': 'Versions', - 'rulesets': 'Rulesets', - 'licenses': 'Licenses listed on the Registry', - 'organisation': 'Organisation XML Files', - 'identifiers': 'Duplicate Activity Identifiers', - 'registration_agencies': 'Registration Agencies', - 'reporting_orgs': 'Reporting Orgs', - 'elements': 'Elements', - 'codelists': 'Codelists', - 'booleans': 'Booleans', - 'dates': 'Dates', - 'traceability': 'Traceability', - 'org_ids': 'Organisation Identifiers', - 'publishing_stats': 'Publishing Statistics', - 'timeliness': 'Timeliness', - 'forwardlooking': 'Forward Looking', - 'comprehensiveness': 'Comprehensiveness', - 'summary_stats': 'Summary Statistics', - 'humanitarian': 'Humanitarian Reporting' + "index": "Dashboard Home", + "headlines": "Headlines", + "data_quality": "Data Quality", + "exploring_data": "Exploring Data", + "faq": "Frequently Asked Questions", + "publishers": "IATI Publishers", + "files": "IATI Files", + "activities": "IATI Activities", + "download": "Download Errors", + "xml": "XML Errors", + "validation": "Validation Against the Schema", + "versions": "Versions", + "rulesets": "Rulesets", + "licenses": "Licenses listed on the Registry", + "organisation": "Organisation XML Files", + "identifiers": "Duplicate Activity Identifiers", + "registration_agencies": "Registration Agencies", + "reporting_orgs": "Reporting Orgs", + "elements": "Elements", + "codelists": "Codelists", + "booleans": "Booleans", + "dates": "Dates", + "traceability": "Traceability", + "org_ids": "Organisation Identifiers", + "publishing_stats": "Publishing Statistics", + "timeliness": "Timeliness", + "forwardlooking": "Forward Looking", + "comprehensiveness": "Comprehensiveness", + "summary_stats": "Summary Statistics", + "humanitarian": "Humanitarian Reporting", } page_leads = { - 'index': 'Statistics, charts and metrics on data accessed via the IATI Registry.', - 'data_quality': 'What needs fixing in IATI data?', - 'exploring_data': 'Which parts of the IATI Standard are being used?', - 'headlines': 'What is the size, scope and scale of published IATI data?', - 'publishers': 'How many organisations are publishing IATI data?', - 'files': 'How many IATI files are published?', - 'activities': 'How many IATI activities are published?', - 'download': 'How many files failed to download?', - 'xml': 'Which files have XML errors?', - 'validation': 'Which files fail schema validation?', - 'versions': 'Which versions of the IATI Standard are being used?', - 'rulesets': 'How does IATI data test against rulesets?', - 'licenses': 'Which licences are used by IATI publishers?', - 'organisation': 'Who is publishing IATI Organisation files?', - 'identifiers': 'Where are there duplicate IATI identifiers?', - 'reporting_orgs': 'Where are reporting organisation identifiers inconsistent with the IATI Registry?', - 'elements': 'How are the IATI Standard elements used by publishers?', - 'codelists': 'How are codelists used in IATI data?', - 'booleans': 'How are booleans used in IATI data?', - 'dates': 'What date ranges do publishers publish data for?', - 'traceability': 'How much of a publisher’s spending is traceable to other publishers’ activities?', - 'org_ids': 'Are organisation identifiers being used correctly?', + "index": "Statistics, charts and metrics on data accessed via the IATI Registry.", + "data_quality": "What needs fixing in IATI data?", + "exploring_data": "Which parts of the IATI Standard are being used?", + "headlines": "What is the size, scope and scale of published IATI data?", + "publishers": "How many organisations are publishing IATI data?", + "files": "How many IATI files are published?", + "activities": "How many IATI activities are published?", + "download": "How many files failed to download?", + "xml": "Which files have XML errors?", + "validation": "Which files fail schema validation?", + "versions": 'Which versions of the IATI Standard are being used?', + "rulesets": "How does IATI data test against rulesets?", + "licenses": "Which licences are used by IATI publishers?", + "organisation": "Who is publishing IATI Organisation files?", + "identifiers": "Where are there duplicate IATI identifiers?", + "reporting_orgs": "Where are reporting organisation identifiers inconsistent with the IATI Registry?", + "elements": "How are the IATI Standard elements used by publishers?", + "codelists": "How are codelists used in IATI data?", + "booleans": "How are booleans used in IATI data?", + "dates": "What date ranges do publishers publish data for?", + "traceability": "How much of a publisher’s spending is traceable to other publishers’ activities?", + "org_ids": "Are organisation identifiers being used correctly?", } page_sub_leads = { - 'publishers': 'Publishers represent organisation accounts in the IATI Registry.', - 'files': 'Files are logged on the IATI Registry by publishers The files contain data on activities and the organisation. A publisher may have multiple files, which can contain multiple activities.', - 'activities': 'Activities are the individual projects found in files. A file can contain one or many activities, from a publisher.', - 'download': 'Files that failed to download, when accessed via the IATI Registry. Note: This may because no URL is listed on the registry, or when requesting the URL the publisher\'s server returns an error message (e.g. because there is no file at that location). Some files that failed to download when last checked may since have become available.', - 'xml': 'This page shows files that are not well-formed XML, accessed via the IATI Registry.', - 'validation': 'IATI files are validated against the appropriate IATI Schema. Note: this is based on the version declared in the file and whether it\'s an activity/organisation file.', - 'versions': 'Files are reported against a specific version of the IATI Standard, using the version attribute in the iati-activities element.', - 'rulesets': 'The IATI Ruleset describe constraints, conditions and logics that are additional to the IATI schema. Note: Currently, on the IATI Standard Ruleset is tested.', - 'licenses': 'Licences are applied to files by publishers on the IATI Registry, and explain how data can be used.', - 'organisation': 'Checking the IATI Registry for files that have iati-organisations as the root element. IATI Organisation files contain general information about the organisations in the delivery chain.', - 'identifiers': 'Checking the iati-identifier element for duplicate values per publisher. A duplicate appears if a publisher creates two activities with the same identifier.', - 'reporting_orgs': 'Checking the reporting-org identifiers in IATI data.', - 'elements': 'Checking usage of all elements/attributes within the IATI Standard.', - 'codelists': 'Checking usage of codelists across IATI data files.', - 'booleans': 'Checking usage of booleans across IATI data files. Booleans are values that are either true or false. In XML true or 1 can be used for true and false or 0 can be used for false.', + "publishers": "Publishers represent organisation accounts in the IATI Registry.", + "files": "Files are logged on the IATI Registry by publishers The files contain data on activities and the organisation. A publisher may have multiple files, which can contain multiple activities.", + "activities": "Activities are the individual projects found in files. A file can contain one or many activities, from a publisher.", + "download": "Files that failed to download, when accessed via the IATI Registry. Note: This may because no URL is listed on the registry, or when requesting the URL the publisher's server returns an error message (e.g. because there is no file at that location). Some files that failed to download when last checked may since have become available.", + "xml": "This page shows files that are not well-formed XML, accessed via the IATI Registry.", + "validation": 'IATI files are validated against the appropriate IATI Schema. Note: this is based on the version declared in the file and whether it\'s an activity/organisation file.', + "versions": "Files are reported against a specific version of the IATI Standard, using the version attribute in the iati-activities element.", + "rulesets": "The IATI Ruleset describe constraints, conditions and logics that are additional to the IATI schema. Note: Currently, on the IATI Standard Ruleset is tested.", + "licenses": "Licences are applied to files by publishers on the IATI Registry, and explain how data can be used.", + "organisation": "Checking the IATI Registry for files that have iati-organisations as the root element. IATI Organisation files contain general information about the organisations in the delivery chain.", + "identifiers": "Checking the iati-identifier element for duplicate values per publisher. A duplicate appears if a publisher creates two activities with the same identifier.", + "reporting_orgs": "Checking the reporting-org identifiers in IATI data.", + "elements": "Checking usage of all elements/attributes within the IATI Standard.", + "codelists": "Checking usage of codelists across IATI data files.", + "booleans": "Checking usage of booleans across IATI data files. Booleans are values that are either true or false. In XML true or 1 can be used for true and false or 0 can be used for false.", } short_page_titles = copy.copy(page_titles) -short_page_titles.update({ - 'publishers': 'Publishers', - 'files': 'Files', - 'activities': 'Activities', - 'validation': 'Validation', - 'licenses': 'Licenses', - 'organisation': 'Organisation XML', - 'identifiers': 'Duplicate Identifiers', -}) +short_page_titles.update( + { + "publishers": "Publishers", + "files": "Files", + "activities": "Activities", + "validation": "Validation", + "licenses": "Licenses", + "organisation": "Organisation XML", + "identifiers": "Duplicate Identifiers", + } +) -top_navigation = ['headlines', 'data_quality', 'publishing_stats', 'exploring_data', 'faq'] +top_navigation = ["headlines", "data_quality", "publishing_stats", "exploring_data", "faq"] navigation = { - 'headlines': ['publishers', 'files', 'activities'], - 'data_quality': ['download', 'xml', 'validation', 'versions', 'licenses', 'organisation', 'identifiers', 'reporting_orgs'], - 'exploring_data': ['elements', 'codelists', 'booleans', 'dates', 'traceability', 'org_ids'], - 'publishing_stats': ['timeliness', 'forwardlooking', 'comprehensiveness', 'summary_stats', 'humanitarian'] + "headlines": ["publishers", "files", "activities"], + "data_quality": [ + "download", + "xml", + "validation", + "versions", + "licenses", + "organisation", + "identifiers", + "reporting_orgs", + ], + "exploring_data": ["elements", "codelists", "booleans", "dates", "traceability", "org_ids"], + "publishing_stats": ["timeliness", "forwardlooking", "comprehensiveness", "summary_stats", "humanitarian"], } LICENSE_NAMES = { - 'notspecified': 'Other::License Not Specified', - 'odc-pddl': 'OKD Compliant::Open Data Commons Public Domain Dedication and Licence (PDDL)', - 'odc-odbl': 'OKD Compliant::Open Data Commons Open Database License (ODbL)', - 'odc-by': 'OKD Compliant::Open Data Commons Attribution Licence', - 'cc-zero': 'OKD Compliant::Creative Commons CCZero', - 'cc-by': 'OKD Compliant::Creative Commons Attribution', - 'cc-by-sa': 'OKD Compliant::Creative Commons Attribution Share-Alike', - 'gfdl': 'OKD Compliant::GNU Free Documentation License', - 'ukclickusepsi': 'OKD Compliant::UK Click Use PSI', - 'other-open': 'OKD Compliant::Other (Open)', - 'other-pd': 'OKD Compliant::Other (Public Domain)', - 'other-at': 'OKD Compliant::Other (Attribution)', - 'ukcrown-withrights': 'OKD Compliant::UK Crown Copyright with data.gov.uk rights', - 'hesa-withrights': 'OKD Compliant::Higher Education Statistics Agency Copyright with data.gov.uk rights', - 'localauth-withrights': 'OKD Compliant::Local Authority Copyright with data.gov.uk rights', - 'uk-ogl': 'OKD Compliant::UK Open Government Licence (OGL)', - 'met-office-cp': 'Non-OKD Compliant::Met Office UK Climate Projections Licence Agreement', - 'cc-nc': 'Non-OKD Compliant::Creative Commons Non-Commercial (Any)', - 'ukcrown': 'Non-OKD Compliant::UK Crown Copyright', - 'other-nc': 'Non-OKD Compliant::Other (Non-Commercial)', - 'other-closed': 'Non-OKD Compliant::Other (Not Open)', - 'bsd-license': 'OSI Approved::New and Simplified BSD licenses', - 'gpl-2.0': 'OSI Approved::GNU General Public License (GPL)', - 'gpl-3.0': 'OSI Approved::GNU General Public License version 3.0 (GPLv3)', - 'lgpl-2.1': 'OSI Approved::GNU Library or "Lesser" General Public License (LGPL)', - 'mit-license': 'OSI Approved::MIT license', - 'afl-3.0': 'OSI Approved::Academic Free License 3.0 (AFL 3.0)', - 'apl1.0': 'OSI Approved::Adaptive Public License', - 'apache': 'OSI Approved::Apache Software License', - 'apache2.0': 'OSI Approved::Apache License, 2.0', - 'apsl-2.0': 'OSI Approved::Apple Public Source License', - 'artistic-license-2.0': 'OSI Approved::Artistic license 2.0', - 'attribution': 'OSI Approved::Attribution Assurance Licenses', - 'ca-tosl1.1': 'OSI Approved::Computer Associates Trusted Open Source License 1.1', - 'cddl1': 'OSI Approved::Common Development and Distribution License', - 'cpal_1.0': 'OSI Approved::Common Public Attribution License 1.0 (CPAL)', - 'cuaoffice': 'OSI Approved::CUA Office Public License Version 1.0', - 'eudatagrid': 'OSI Approved::EU DataGrid Software License', - 'eclipse-1.0': 'OSI Approved::Eclipse Public License', - 'ecl2': 'OSI Approved::Educational Community License, Version 2.0', - 'eiffel': 'OSI Approved::Eiffel Forum License', - 'ver2_eiffel': 'OSI Approved::Eiffel Forum License V2.0', - 'entessa': 'OSI Approved::Entessa Public License', - 'fair': 'OSI Approved::Fair License', - 'frameworx': 'OSI Approved::Frameworx License', - 'ibmpl': 'OSI Approved::IBM Public License', - 'intel-osl': 'OSI Approved::Intel Open Source License', - 'jabber-osl': 'OSI Approved::Jabber Open Source License', - 'lucent-plan9': 'OSI Approved::Lucent Public License (Plan9)', - 'lucent1.02': 'OSI Approved::Lucent Public License Version 1.02', - 'mitre': 'OSI Approved::MITRE Collaborative Virtual Workspace License (CVW License)', - 'motosoto': 'OSI Approved::Motosoto License', - 'mozilla': 'OSI Approved::Mozilla Public License 1.0 (MPL)', - 'mozilla1.1': 'OSI Approved::Mozilla Public License 1.1 (MPL)', - 'nasa1.3': 'OSI Approved::NASA Open Source Agreement 1.3', - 'naumen': 'OSI Approved::Naumen Public License', - 'nethack': 'OSI Approved::Nethack General Public License', - 'nokia': 'OSI Approved::Nokia Open Source License', - 'oclc2': 'OSI Approved::OCLC Research Public License 2.0', - 'opengroup': 'OSI Approved::Open Group Test Suite License', - 'osl-3.0': 'OSI Approved::Open Software License 3.0 (OSL 3.0)', - 'php': 'OSI Approved::PHP License', - 'pythonpl': 'OSI Approved::Python license', - 'PythonSoftFoundation': 'OSI Approved::Python Software Foundation License', - 'qtpl': 'OSI Approved::Qt Public License (QPL)', - 'real': 'OSI Approved::RealNetworks Public Source License V1.0', - 'rpl1.5': 'OSI Approved::Reciprocal Public License 1.5 (RPL1.5)', - 'ricohpl': 'OSI Approved::Ricoh Source Code Public License', - 'sleepycat': 'OSI Approved::Sleepycat License', - 'sun-issl': 'OSI Approved::Sun Industry Standards Source License (SISSL)', - 'sunpublic': 'OSI Approved::Sun Public License', - 'sybase': 'OSI Approved::Sybase Open Watcom Public License 1.0', - 'UoI-NCSA': 'OSI Approved::University of Illinois/NCSA Open Source License', - 'vovidapl': 'OSI Approved::Vovida Software License v. 1.0', - 'W3C': 'OSI Approved::W3C License', - 'wxwindows': 'OSI Approved::wxWindows Library License', - 'xnet': 'OSI Approved::X.Net License', - 'zpl': 'OSI Approved::Zope Public License', - 'zlib-license': 'OSI Approved::zlib/libpng license'} + "notspecified": "Other::License Not Specified", + "odc-pddl": "OKD Compliant::Open Data Commons Public Domain Dedication and Licence (PDDL)", + "odc-odbl": "OKD Compliant::Open Data Commons Open Database License (ODbL)", + "odc-by": "OKD Compliant::Open Data Commons Attribution Licence", + "cc-zero": "OKD Compliant::Creative Commons CCZero", + "cc-by": "OKD Compliant::Creative Commons Attribution", + "cc-by-sa": "OKD Compliant::Creative Commons Attribution Share-Alike", + "gfdl": "OKD Compliant::GNU Free Documentation License", + "ukclickusepsi": "OKD Compliant::UK Click Use PSI", + "other-open": "OKD Compliant::Other (Open)", + "other-pd": "OKD Compliant::Other (Public Domain)", + "other-at": "OKD Compliant::Other (Attribution)", + "ukcrown-withrights": "OKD Compliant::UK Crown Copyright with data.gov.uk rights", + "hesa-withrights": "OKD Compliant::Higher Education Statistics Agency Copyright with data.gov.uk rights", + "localauth-withrights": "OKD Compliant::Local Authority Copyright with data.gov.uk rights", + "uk-ogl": "OKD Compliant::UK Open Government Licence (OGL)", + "met-office-cp": "Non-OKD Compliant::Met Office UK Climate Projections Licence Agreement", + "cc-nc": "Non-OKD Compliant::Creative Commons Non-Commercial (Any)", + "ukcrown": "Non-OKD Compliant::UK Crown Copyright", + "other-nc": "Non-OKD Compliant::Other (Non-Commercial)", + "other-closed": "Non-OKD Compliant::Other (Not Open)", + "bsd-license": "OSI Approved::New and Simplified BSD licenses", + "gpl-2.0": "OSI Approved::GNU General Public License (GPL)", + "gpl-3.0": "OSI Approved::GNU General Public License version 3.0 (GPLv3)", + "lgpl-2.1": 'OSI Approved::GNU Library or "Lesser" General Public License (LGPL)', + "mit-license": "OSI Approved::MIT license", + "afl-3.0": "OSI Approved::Academic Free License 3.0 (AFL 3.0)", + "apl1.0": "OSI Approved::Adaptive Public License", + "apache": "OSI Approved::Apache Software License", + "apache2.0": "OSI Approved::Apache License, 2.0", + "apsl-2.0": "OSI Approved::Apple Public Source License", + "artistic-license-2.0": "OSI Approved::Artistic license 2.0", + "attribution": "OSI Approved::Attribution Assurance Licenses", + "ca-tosl1.1": "OSI Approved::Computer Associates Trusted Open Source License 1.1", + "cddl1": "OSI Approved::Common Development and Distribution License", + "cpal_1.0": "OSI Approved::Common Public Attribution License 1.0 (CPAL)", + "cuaoffice": "OSI Approved::CUA Office Public License Version 1.0", + "eudatagrid": "OSI Approved::EU DataGrid Software License", + "eclipse-1.0": "OSI Approved::Eclipse Public License", + "ecl2": "OSI Approved::Educational Community License, Version 2.0", + "eiffel": "OSI Approved::Eiffel Forum License", + "ver2_eiffel": "OSI Approved::Eiffel Forum License V2.0", + "entessa": "OSI Approved::Entessa Public License", + "fair": "OSI Approved::Fair License", + "frameworx": "OSI Approved::Frameworx License", + "ibmpl": "OSI Approved::IBM Public License", + "intel-osl": "OSI Approved::Intel Open Source License", + "jabber-osl": "OSI Approved::Jabber Open Source License", + "lucent-plan9": "OSI Approved::Lucent Public License (Plan9)", + "lucent1.02": "OSI Approved::Lucent Public License Version 1.02", + "mitre": "OSI Approved::MITRE Collaborative Virtual Workspace License (CVW License)", + "motosoto": "OSI Approved::Motosoto License", + "mozilla": "OSI Approved::Mozilla Public License 1.0 (MPL)", + "mozilla1.1": "OSI Approved::Mozilla Public License 1.1 (MPL)", + "nasa1.3": "OSI Approved::NASA Open Source Agreement 1.3", + "naumen": "OSI Approved::Naumen Public License", + "nethack": "OSI Approved::Nethack General Public License", + "nokia": "OSI Approved::Nokia Open Source License", + "oclc2": "OSI Approved::OCLC Research Public License 2.0", + "opengroup": "OSI Approved::Open Group Test Suite License", + "osl-3.0": "OSI Approved::Open Software License 3.0 (OSL 3.0)", + "php": "OSI Approved::PHP License", + "pythonpl": "OSI Approved::Python license", + "PythonSoftFoundation": "OSI Approved::Python Software Foundation License", + "qtpl": "OSI Approved::Qt Public License (QPL)", + "real": "OSI Approved::RealNetworks Public Source License V1.0", + "rpl1.5": "OSI Approved::Reciprocal Public License 1.5 (RPL1.5)", + "ricohpl": "OSI Approved::Ricoh Source Code Public License", + "sleepycat": "OSI Approved::Sleepycat License", + "sun-issl": "OSI Approved::Sun Industry Standards Source License (SISSL)", + "sunpublic": "OSI Approved::Sun Public License", + "sybase": "OSI Approved::Sybase Open Watcom Public License 1.0", + "UoI-NCSA": "OSI Approved::University of Illinois/NCSA Open Source License", + "vovidapl": "OSI Approved::Vovida Software License v. 1.0", + "W3C": "OSI Approved::W3C License", + "wxwindows": "OSI Approved::wxWindows Library License", + "xnet": "OSI Approved::X.Net License", + "zpl": "OSI Approved::Zope Public License", + "zlib-license": "OSI Approved::zlib/libpng license", +} diff --git a/dashboard/timeliness.py b/dashboard/timeliness.py index d1074ceca7..f72d5b7811 100644 --- a/dashboard/timeliness.py +++ b/dashboard/timeliness.py @@ -9,15 +9,14 @@ def short_month(month_str): """Return the 'short month' represeentation of a date which is inputted as a string, seperated with dashes - For example '01-03-2012' returns 'Mar' + For example '01-03-2012' returns 'Mar' """ - short_months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] - return short_months[int(month_str.split('-')[1]) - 1] + short_months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] + return short_months[int(month_str.split("-")[1]) - 1] def parse_iso_date(d): - """Parse a string representation of a date into a datetime object - """ + """Parse a string representation of a date into a datetime object""" try: return datetime.date(int(d[:4]), int(d[5:7]), int(d[8:10])) except (ValueError, TypeError): @@ -25,8 +24,7 @@ def parse_iso_date(d): def previous_months_generator(d): - """Returns a generator object with the previous month for a given datetime object - """ + """Returns a generator object with the previous month for a given datetime object""" year = d.year month = d.month for i in range(0, 12): @@ -38,12 +36,14 @@ def previous_months_generator(d): # Store lists of previous months -previous_months = ['{}-{}'.format(year, str(month).zfill(2)) for year, month in previous_months_generator(datetime.date.today())] +previous_months = [ + "{}-{}".format(year, str(month).zfill(2)) for year, month in previous_months_generator(datetime.date.today()) +] previous_months_reversed = list(reversed(previous_months)) # Store the current month as a string today = datetime.date.today() -this_month = '{}-{}'.format(today.year, str(today.month).zfill(2)) +this_month = "{}-{}".format(today.year, str(today.month).zfill(2)) # Store a list of the past 12 months from today previous_month_days = [today - relativedelta(months=x) for x in range(13)] @@ -55,17 +55,16 @@ def previous_months_generator(d): def publisher_frequency(): - """Generate the publisher frequency data - """ + """Generate the publisher frequency data""" # Load all the data from 'gitaggregate-publisher-dated' into memory - gitaggregate_publisher = JSONDir(filepaths.join_stats_path('gitaggregate-publisher-dated')) + gitaggregate_publisher = JSONDir(filepaths.join_stats_path("gitaggregate-publisher-dated")) # Loop over each publisher - i.e. a publisher folder within 'gitaggregate-publisher-dated' for publisher, agg in gitaggregate_publisher.items(): # Skip to the next publisher if there is no data for 'most_recent_transaction_date' for this publisher - if 'most_recent_transaction_date' not in agg: + if "most_recent_transaction_date" not in agg: continue # Skip if this publisher appears in the list of publishers who have since changed their Registry ID @@ -76,7 +75,7 @@ def publisher_frequency(): previous_transaction_date = datetime.date(1, 1, 1) # Find the most recent transaction date and parse into a datetime object - for gitdate, transaction_date_str in sorted(agg['most_recent_transaction_date'].items()): + for gitdate, transaction_date_str in sorted(agg["most_recent_transaction_date"].items()): transaction_date = parse_iso_date(transaction_date_str) # If transaction date has increased @@ -85,7 +84,7 @@ def publisher_frequency(): updates_per_month[gitdate[:7]] += 1 # Find the first date that this publisher made data available, and parse into a datetime object - first_published_string = sorted(agg['most_recent_transaction_date'])[0] + first_published_string = sorted(agg["most_recent_transaction_date"])[0] first_published = parse_iso_date(first_published_string) hft = has_future_transactions(publisher) @@ -94,47 +93,55 @@ def publisher_frequency(): if first_published >= previous_month_days[3]: # This is a publisher of less than 3 months - first_published_band = 'Less than 3 months ago' - frequency = 'Annual' + first_published_band = "Less than 3 months ago" + frequency = "Annual" elif first_published >= previous_month_days[6]: # This is a publisher of less than 6 months - first_published_band = '3-6 months ago' + first_published_band = "3-6 months ago" if all([x in updates_per_month for x in previous_months[:3]]): - frequency = 'Monthly' + frequency = "Monthly" else: - frequency = 'Annual' + frequency = "Annual" elif first_published >= previous_month_days[12]: # This is a publisher of less than 12 months - first_published_band = '6-12 months ago' + first_published_band = "6-12 months ago" if [x in updates_per_month for x in previous_months[:6]].count(True) >= 4: - frequency = 'Monthly' - elif any([x in updates_per_month for x in previous_months[:3]]) and any([x in updates_per_month for x in previous_months[3:6]]): - frequency = 'Quarterly' + frequency = "Monthly" + elif any([x in updates_per_month for x in previous_months[:3]]) and any( + [x in updates_per_month for x in previous_months[3:6]] + ): + frequency = "Quarterly" else: - frequency = 'Annual' + frequency = "Annual" else: if first_published >= previous_year_days[3]: - first_published_band = '1-3 years ago' + first_published_band = "1-3 years ago" elif first_published >= previous_year_days[5]: - first_published_band = '3-5 years ago' + first_published_band = "3-5 years ago" else: - first_published_band = 'More than 5 years ago' + first_published_band = "More than 5 years ago" # This is a publisher of 1 year or more - if ([x in updates_per_month for x in previous_months[:12]].count(True) >= 7) and ([x in updates_per_month for x in previous_months[:2]].count(True) >= 1): + if ([x in updates_per_month for x in previous_months[:12]].count(True) >= 7) and ( + [x in updates_per_month for x in previous_months[:2]].count(True) >= 1 + ): # Data updated in 7 or more of past 12 full months AND data updated at least once in last 2 full months. - frequency = 'Monthly' - elif ([x in updates_per_month for x in previous_months[:12]].count(True) >= 3) and ([x in updates_per_month for x in previous_months[:4]].count(True) >= 1): + frequency = "Monthly" + elif ([x in updates_per_month for x in previous_months[:12]].count(True) >= 3) and ( + [x in updates_per_month for x in previous_months[:4]].count(True) >= 1 + ): # Data updated in 3 or more of past 12 full months AND data updated at least once in last 4 full months. - frequency = 'Quarterly' - elif any([x in updates_per_month for x in previous_months[:6]]) and any([x in updates_per_month for x in previous_months[6:12]]): + frequency = "Quarterly" + elif any([x in updates_per_month for x in previous_months[:6]]) and any( + [x in updates_per_month for x in previous_months[6:12]] + ): # There has been an update in 2 of the last 6 month periods - frequency = 'Six-Monthly' + frequency = "Six-Monthly" elif any([x in updates_per_month for x in previous_months[:12]]): # There has been an update in 1 of the last 12 months - frequency = 'Annual' + frequency = "Annual" else: # There has been an update in none of the last 12 months - frequency = 'Less than Annual' + frequency = "Less than Annual" # If the publisher is in the list of current publishers, return a generator object if publisher in publisher_name: @@ -142,13 +149,11 @@ def publisher_frequency(): def frequency_index(frequency): - return ['Monthly', 'Quarterly', 'Six-Monthly', 'Annual', 'Less than Annual'].index(frequency) + return ["Monthly", "Quarterly", "Six-Monthly", "Annual", "Less than Annual"].index(frequency) def publisher_frequency_sorted(): - return sorted( - publisher_frequency(), - key=lambda tup: (frequency_index(tup[3]), tup[1])) + return sorted(publisher_frequency(), key=lambda tup: (frequency_index(tup[3]), tup[1])) def publisher_frequency_dict(): @@ -164,21 +169,35 @@ def publisher_frequency_summary(): def timelag_index(timelag): - return ['One month', 'A quarter', 'Six months', 'One year', 'More than one year'].index(timelag) + return ["One month", "A quarter", "Six months", "One year", "More than one year"].index(timelag) def first_published_band_index(first_published_band): - return ['More than 5 years ago', '3-5 years ago', '1-3 years ago', '6-12 months ago', '3-6 months ago', 'Less than 3 months ago'].index(first_published_band) + return [ + "More than 5 years ago", + "3-5 years ago", + "1-3 years ago", + "6-12 months ago", + "3-6 months ago", + "Less than 3 months ago", + ].index(first_published_band) def publisher_timelag(): - return [(publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag'], has_future_transactions(publisher)) for publisher, agg in JSONDir(filepaths.join_stats_path('current/aggregated-publisher')).items()] + return [ + ( + publisher, + publisher_name.get(publisher), + agg["transaction_months_with_year"], + agg["timelag"], + has_future_transactions(publisher), + ) + for publisher, agg in JSONDir(filepaths.join_stats_path("current/aggregated-publisher")).items() + ] def publisher_timelag_sorted(): - return sorted( - publisher_timelag(), - key=lambda tup: (timelag_index(tup[3]), tup[1])) + return sorted(publisher_timelag(), key=lambda tup: (timelag_index(tup[3]), tup[1])) def publisher_timelag_dict(): @@ -191,23 +210,23 @@ def publisher_timelag_summary(): def has_future_transactions(publisher): """ - returns 0, 1 or 2 - Returns 2 if the most recent data for a publisher has future transactions. - Returns 1 if the publisher has ever had future transactions. - Returns 0 otherwise. + returns 0, 1 or 2 + Returns 2 if the most recent data for a publisher has future transactions. + Returns 1 if the publisher has ever had future transactions. + Returns 0 otherwise. """ today = datetime.date.today() publisher_stats = get_publisher_stats(publisher) - if 'transaction_dates' in publisher_stats: - for transaction_type, transaction_counts in publisher_stats['transaction_dates'].items(): + if "transaction_dates" in publisher_stats: + for transaction_type, transaction_counts in publisher_stats["transaction_dates"].items(): for transaction_date_string, count in transaction_counts.items(): transaction_date = parse_iso_date(transaction_date_string) if transaction_date and transaction_date > datetime.date.today(): return 2 - gitaggregate_publisher = JSONDir(filepaths.join_stats_path('gitaggregate-publisher-dated')).get(publisher, {}) + gitaggregate_publisher = JSONDir(filepaths.join_stats_path("gitaggregate-publisher-dated")).get(publisher, {}) mindate = datetime.date(today.year - 1, today.month, 1) - for date, activity_blacklist in gitaggregate_publisher.get('activities_with_future_transactions', {}).items(): + for date, activity_blacklist in gitaggregate_publisher.get("activities_with_future_transactions", {}).items(): if parse_iso_date(date) >= mindate and activity_blacklist: return 1 return 0 diff --git a/dashboard/ui/asgi.py b/dashboard/ui/asgi.py index fb118a8cab..b844ecbea3 100644 --- a/dashboard/ui/asgi.py +++ b/dashboard/ui/asgi.py @@ -11,6 +11,6 @@ from django.core.asgi import get_asgi_application -os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'ui.settings') +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ui.settings") application = get_asgi_application() diff --git a/dashboard/ui/jinja2.py b/dashboard/ui/jinja2.py index b1da234df6..7f615620cf 100644 --- a/dashboard/ui/jinja2.py +++ b/dashboard/ui/jinja2.py @@ -1,5 +1,6 @@ """Jinja2 template configuration """ + import re from django.templatetags.static import static @@ -9,25 +10,24 @@ def round_nicely(val, ndigits=2): - """ Round a float, but remove the trailing .0 from integers that python insists on - """ + """Round a float, but remove the trailing .0 from integers that python insists on""" if int(val) == float(val): return int(val) return round(float(val), ndigits) def xpath_to_url(path): - path = path.strip('./') + path = path.strip("./") # remove conditions - path = re.sub(r'\[[^]]+\]', '', path) - if path.startswith('iati-activity'): - url = 'http://iatistandard.org/activity-standard/iati-activities/' + path.split('@')[0] - elif path.startswith('iati-organisation'): - url = 'http://iatistandard.org/organisation-standard/iati-organisations/' + path.split('@')[0] + path = re.sub(r"\[[^]]+\]", "", path) + if path.startswith("iati-activity"): + url = "http://iatistandard.org/activity-standard/iati-activities/" + path.split("@")[0] + elif path.startswith("iati-organisation"): + url = "http://iatistandard.org/organisation-standard/iati-organisations/" + path.split("@")[0] else: - url = 'http://iatistandard.org/activity-standard/iati-activities/iati-activity/' + path.split('@')[0] - if '@' in path: - url += '#attributes' + url = "http://iatistandard.org/activity-standard/iati-activities/iati-activity/" + path.split("@")[0] + if "@" in path: + url += "#attributes" return url @@ -35,12 +35,12 @@ def environment(**options): env = Environment(**options) env.globals.update( { - 'static': static, - 'url': reverse, + "static": static, + "url": reverse, } ) - env.filters['url_to_filename'] = lambda x: x.rstrip('/').split('/')[-1] - env.filters['has_future_transactions'] = timeliness.has_future_transactions - env.filters['xpath_to_url'] = xpath_to_url - env.filters['round_nicely'] = round_nicely + env.filters["url_to_filename"] = lambda x: x.rstrip("/").split("/")[-1] + env.filters["has_future_transactions"] = timeliness.has_future_transactions + env.filters["xpath_to_url"] = xpath_to_url + env.filters["round_nicely"] = round_nicely return env diff --git a/dashboard/ui/settings.py b/dashboard/ui/settings.py index c0461c3d1b..919fe78b65 100644 --- a/dashboard/ui/settings.py +++ b/dashboard/ui/settings.py @@ -20,7 +20,7 @@ # See https://docs.djangoproject.com/en/5.1/howto/deployment/checklist/ # SECURITY WARNING: keep the secret key used in production secret! -SECRET_KEY = 'django-insecure-4i8e#n)gr2@wol5i4y@od0za_&y+idpz*-t5v)#-6s(&rhl=k&' +SECRET_KEY = "django-insecure-4i8e#n)gr2@wol5i4y@od0za_&y+idpz*-t5v)#-6s(&rhl=k&" # SECURITY WARNING: don't run with debug turned on in production! DEBUG = True @@ -31,66 +31,66 @@ # Application definition INSTALLED_APPS = [ - 'django.contrib.admin', - 'django.contrib.auth', - 'django.contrib.contenttypes', - 'django.contrib.sessions', - 'django.contrib.messages', - 'django.contrib.staticfiles', + "django.contrib.admin", + "django.contrib.auth", + "django.contrib.contenttypes", + "django.contrib.sessions", + "django.contrib.messages", + "django.contrib.staticfiles", ] MIDDLEWARE = [ - 'django.middleware.security.SecurityMiddleware', - 'django.contrib.sessions.middleware.SessionMiddleware', - 'django.middleware.common.CommonMiddleware', - 'django.middleware.csrf.CsrfViewMiddleware', - 'django.contrib.auth.middleware.AuthenticationMiddleware', - 'django.contrib.messages.middleware.MessageMiddleware', - 'django.middleware.clickjacking.XFrameOptionsMiddleware', + "django.middleware.security.SecurityMiddleware", + "django.contrib.sessions.middleware.SessionMiddleware", + "django.middleware.common.CommonMiddleware", + "django.middleware.csrf.CsrfViewMiddleware", + "django.contrib.auth.middleware.AuthenticationMiddleware", + "django.contrib.messages.middleware.MessageMiddleware", + "django.middleware.clickjacking.XFrameOptionsMiddleware", ] -ROOT_URLCONF = 'ui.urls' +ROOT_URLCONF = "ui.urls" TEMPLATES = [ { - 'BACKEND': 'django.template.backends.django.DjangoTemplates', - 'DIRS': [], - 'APP_DIRS': True, - 'OPTIONS': { - 'context_processors': [ - 'django.template.context_processors.debug', - 'django.template.context_processors.request', - 'django.contrib.auth.context_processors.auth', - 'django.contrib.messages.context_processors.messages', + "BACKEND": "django.template.backends.django.DjangoTemplates", + "DIRS": [], + "APP_DIRS": True, + "OPTIONS": { + "context_processors": [ + "django.template.context_processors.debug", + "django.template.context_processors.request", + "django.contrib.auth.context_processors.auth", + "django.contrib.messages.context_processors.messages", ], }, }, { - 'BACKEND': 'django.template.backends.jinja2.Jinja2', - 'DIRS': ["templates/"], - 'APP_DIRS': True, - 'OPTIONS': { - 'context_processors': [ - 'django.template.context_processors.debug', - 'django.template.context_processors.request', - 'django.contrib.auth.context_processors.auth', - 'django.contrib.messages.context_processors.messages', + "BACKEND": "django.template.backends.jinja2.Jinja2", + "DIRS": ["templates/"], + "APP_DIRS": True, + "OPTIONS": { + "context_processors": [ + "django.template.context_processors.debug", + "django.template.context_processors.request", + "django.contrib.auth.context_processors.auth", + "django.contrib.messages.context_processors.messages", ], - 'environment': 'ui.jinja2.environment' + "environment": "ui.jinja2.environment", }, }, ] -WSGI_APPLICATION = 'ui.wsgi.application' +WSGI_APPLICATION = "ui.wsgi.application" # Database # https://docs.djangoproject.com/en/5.1/ref/settings/#databases DATABASES = { - 'default': { - 'ENGINE': 'django.db.backends.sqlite3', - 'NAME': BASE_DIR / 'db.sqlite3', + "default": { + "ENGINE": "django.db.backends.sqlite3", + "NAME": BASE_DIR / "db.sqlite3", } } @@ -100,16 +100,16 @@ AUTH_PASSWORD_VALIDATORS = [ { - 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', + "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", }, { - 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', + "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator", }, { - 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', + "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator", }, { - 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', + "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator", }, ] @@ -117,9 +117,9 @@ # Internationalization # https://docs.djangoproject.com/en/5.1/topics/i18n/ -LANGUAGE_CODE = 'en-us' +LANGUAGE_CODE = "en-us" -TIME_ZONE = 'UTC' +TIME_ZONE = "UTC" USE_I18N = True @@ -129,13 +129,15 @@ # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/5.1/howto/static-files/ -STATIC_URL = 'static/' -STATICFILES_DIRS = [BASE_DIR / 'static',] +STATIC_URL = "static/" +STATICFILES_DIRS = [ + BASE_DIR / "static", +] # Default primary key field type # https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field -DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' +DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" # # Relative (to dashboard/) paths to IATI data and output directories. diff --git a/dashboard/ui/template_funcs.py b/dashboard/ui/template_funcs.py index 8f5446febb..de80d95011 100644 --- a/dashboard/ui/template_funcs.py +++ b/dashboard/ui/template_funcs.py @@ -2,17 +2,17 @@ def firstint(s): - if s[0].startswith('<'): + if s[0].startswith("<"): return 0 - m = re.search(r'\d+', s[0]) + m = re.search(r"\d+", s[0]) return int(m.group(0)) def get_codelist_values(codelist_values_for_element): """Return a list of unique values present within a one-level nested dictionary. - Envisaged usage is to gather the codelist values used by each publisher, as in - stats/current/inverted-publisher/codelist_values_by_major_version.json - Input: Set of codelist values for a given element (listed by publisher), for example: - current_stats['inverted_publisher']['codelist_values_by_major_version']['1']['.//@xml:lang'] + Envisaged usage is to gather the codelist values used by each publisher, as in + stats/current/inverted-publisher/codelist_values_by_major_version.json + Input: Set of codelist values for a given element (listed by publisher), for example: + current_stats['inverted_publisher']['codelist_values_by_major_version']['1']['.//@xml:lang'] """ return list(set([y for x in codelist_values_for_element.items() for y in list(x[1].keys())])) diff --git a/dashboard/ui/tests.py b/dashboard/ui/tests.py index 11d0970848..cffb7b1ab8 100644 --- a/dashboard/ui/tests.py +++ b/dashboard/ui/tests.py @@ -11,7 +11,7 @@ class BasicPageTests(TestCase): """ def test_top_pages(self): - """Test the index and top hierarchy pages return a 200 status code """ + """Test the index and top hierarchy pages return a 200 status code""" self.assertEqual(self.client.get(reverse("dash-index")).status_code, 200) self.assertEqual(self.client.get(reverse("dash-headlines")).status_code, 200) @@ -22,13 +22,16 @@ def test_top_pages(self): self.assertEqual(self.client.get(reverse("dash-registrationagencies")).status_code, 200) def test_headlines(self): - """Test the headlines pages """ + """Test the headlines pages""" self.assertEqual(self.client.get(reverse("dash-headlines-files")).status_code, 200) self.assertEqual(self.client.get(reverse("dash-headlines-activities")).status_code, 200) self.assertEqual(self.client.get(reverse("dash-headlines-publishers")).status_code, 200) - self.assertEqual(self.client.get(reverse("dash-headlines-publisher-detail", args=("undp", ))).status_code, 200) - self.assertEqual(self.client.get(reverse("dash-headlines-publisher-detail", args=("not-a-valid-publisher", ))).status_code, 404) + self.assertEqual(self.client.get(reverse("dash-headlines-publisher-detail", args=("undp",))).status_code, 200) + self.assertEqual( + self.client.get(reverse("dash-headlines-publisher-detail", args=("not-a-valid-publisher",))).status_code, + 404, + ) def test_dataquality(self): """Test the data quality pages""" @@ -42,8 +45,13 @@ def test_dataquality(self): self.assertEqual(self.client.get(reverse("dash-dataquality-identifiers")).status_code, 200) self.assertEqual(self.client.get(reverse("dash-dataquality-reportingorgs")).status_code, 200) self.assertEqual(self.client.get(reverse("dash-dataquality-licenses")).status_code, 200) - self.assertEqual(self.client.get(reverse("dash-dataquality-licenses-detail", args=("cc-by", ))).status_code, 200) - self.assertEqual(self.client.get(reverse("dash-dataquality-licenses-detail", args=("not-a-valid-license", ))).status_code, 404) + self.assertEqual( + self.client.get(reverse("dash-dataquality-licenses-detail", args=("cc-by",))).status_code, 200 + ) + self.assertEqual( + self.client.get(reverse("dash-dataquality-licenses-detail", args=("not-a-valid-license",))).status_code, + 404, + ) def test_publishingstats_timeliness(self): """Test timeliness pages in the publishing statistics section""" @@ -56,8 +64,12 @@ def test_publishingstats_comprehensiveness(self): self.assertEqual(self.client.get(reverse("dash-publishingstats-comprehensiveness")).status_code, 200) self.assertEqual(self.client.get(reverse("dash-publishingstats-comprehensiveness-core")).status_code, 200) - self.assertEqual(self.client.get(reverse("dash-publishingstats-comprehensiveness-financials")).status_code, 200) - self.assertEqual(self.client.get(reverse("dash-publishingstats-comprehensiveness-valueadded")).status_code, 200) + self.assertEqual( + self.client.get(reverse("dash-publishingstats-comprehensiveness-financials")).status_code, 200 + ) + self.assertEqual( + self.client.get(reverse("dash-publishingstats-comprehensiveness-valueadded")).status_code, 200 + ) def test_publishingstats_forwardlooking(self): """Test the forward looking page in the publishing statistics section""" @@ -78,16 +90,62 @@ def test_exploringdata(self): """Test the exploring data pages""" self.assertEqual(self.client.get(reverse("dash-exploringdata-booleans")).status_code, 200) self.assertEqual(self.client.get(reverse("dash-exploringdata-codelists")).status_code, 200) - self.assertEqual(self.client.get(reverse("dash-exploringdata-codelists-detail", args=("2", "budget_@type", ))).status_code, 200) - self.assertEqual(self.client.get(reverse("dash-exploringdata-codelists-detail", args=("2", "not-a-valid-slug", ))).status_code, 404) - self.assertEqual(self.client.get(reverse("dash-exploringdata-codelists-detail", args=("3", "budget_@type", ))).status_code, 404) + self.assertEqual( + self.client.get( + reverse( + "dash-exploringdata-codelists-detail", + args=( + "2", + "budget_@type", + ), + ) + ).status_code, + 200, + ) + self.assertEqual( + self.client.get( + reverse( + "dash-exploringdata-codelists-detail", + args=( + "2", + "not-a-valid-slug", + ), + ) + ).status_code, + 404, + ) + self.assertEqual( + self.client.get( + reverse( + "dash-exploringdata-codelists-detail", + args=( + "3", + "budget_@type", + ), + ) + ).status_code, + 404, + ) self.assertEqual(self.client.get(reverse("dash-exploringdata-dates")).status_code, 200) self.assertEqual(self.client.get(reverse("dash-exploringdata-elements")).status_code, 200) - self.assertEqual(self.client.get(reverse("dash-exploringdata-elements-detail", args=("iati-activity_activity-date_narrative", ))).status_code, 200) - self.assertEqual(self.client.get(reverse("dash-exploringdata-elements-detail", args=("not-a-valid-element", ))).status_code, 404) + self.assertEqual( + self.client.get( + reverse("dash-exploringdata-elements-detail", args=("iati-activity_activity-date_narrative",)) + ).status_code, + 200, + ) + self.assertEqual( + self.client.get(reverse("dash-exploringdata-elements-detail", args=("not-a-valid-element",))).status_code, + 404, + ) self.assertEqual(self.client.get(reverse("dash-exploringdata-orgids")).status_code, 200) - self.assertEqual(self.client.get(reverse("dash-exploringdata-orgtypes-detail", args=("funding_org", ))).status_code, 200) - self.assertEqual(self.client.get(reverse("dash-exploringdata-orgtypes-detail", args=("not-a-valid-org-type", ))).status_code, 404) + self.assertEqual( + self.client.get(reverse("dash-exploringdata-orgtypes-detail", args=("funding_org",))).status_code, 200 + ) + self.assertEqual( + self.client.get(reverse("dash-exploringdata-orgtypes-detail", args=("not-a-valid-org-type",))).status_code, + 404, + ) self.assertEqual(self.client.get(reverse("dash-exploringdata-traceability")).status_code, 200) @@ -111,15 +169,17 @@ def test_headlines_and_misc(self): # This is not particularly DRY as a similar dictionary is created in views.py # but I think this is minor as that may disappear from views.py in a future # refactor of what goes into the context. - self._url_and_view_helper({ - "index": "dash-index", - "headlines": "dash-headlines", - "files": "dash-headlines-files", - "activities": "dash-headlines-activities", - "publishers": "dash-headlines-publishers", - "faq": "dash-faq", - "registration_agencies": "dash-registrationagencies" - }) + self._url_and_view_helper( + { + "index": "dash-index", + "headlines": "dash-headlines", + "files": "dash-headlines-files", + "activities": "dash-headlines-activities", + "publishers": "dash-headlines-publishers", + "faq": "dash-faq", + "registration_agencies": "dash-registrationagencies", + } + ) def test_dataquality(self): """Test data quality pages redirect to their new locations""" @@ -127,17 +187,19 @@ def test_dataquality(self): # This is not particularly DRY as a similar dictionary is created in views.py # but I think this is minor as that may disappear from views.py in a future # refactor of what goes into the context. - self._url_and_view_helper({ - "data_quality": "dash-dataquality", - "download": "dash-dataquality-download", - "xml": "dash-dataquality-xml", - "validation": "dash-dataquality-validation", - "versions": "dash-dataquality-versions", - "organisation": "dash-dataquality-organisation", - "identifiers": "dash-dataquality-identifiers", - "reporting_orgs": "dash-dataquality-reportingorgs", - "licenses": "dash-dataquality-licenses" - }) + self._url_and_view_helper( + { + "data_quality": "dash-dataquality", + "download": "dash-dataquality-download", + "xml": "dash-dataquality-xml", + "validation": "dash-dataquality-validation", + "versions": "dash-dataquality-versions", + "organisation": "dash-dataquality-organisation", + "identifiers": "dash-dataquality-identifiers", + "reporting_orgs": "dash-dataquality-reportingorgs", + "licenses": "dash-dataquality-licenses", + } + ) def test_publishingstats(self): """Test publishing stats pages redirect to their new locations""" @@ -145,18 +207,20 @@ def test_publishingstats(self): # This is not particularly DRY as a similar dictionary is created in views.py # but I think this is minor as that may disappear from views.py in a future # refactor of what goes into the context. - self._url_and_view_helper({ - "publishing_stats": "dash-publishingstats", - "timeliness": "dash-publishingstats-timeliness", - "timeliness_timelag": "dash-publishingstats-timeliness-timelag", - "forwardlooking": "dash-publishingstats-forwardlooking", - "comprehensiveness": "dash-publishingstats-comprehensiveness", - "comprehensiveness_core": "dash-publishingstats-comprehensiveness-core", - "comprehensiveness_financials": "dash-publishingstats-comprehensiveness-financials", - "comprehensiveness_valueadded": "dash-publishingstats-comprehensiveness-valueadded", - "summary_stats": "dash-publishingstats-summarystats", - "humanitarian": "dash-publishingstats-humanitarian" - }) + self._url_and_view_helper( + { + "publishing_stats": "dash-publishingstats", + "timeliness": "dash-publishingstats-timeliness", + "timeliness_timelag": "dash-publishingstats-timeliness-timelag", + "forwardlooking": "dash-publishingstats-forwardlooking", + "comprehensiveness": "dash-publishingstats-comprehensiveness", + "comprehensiveness_core": "dash-publishingstats-comprehensiveness-core", + "comprehensiveness_financials": "dash-publishingstats-comprehensiveness-financials", + "comprehensiveness_valueadded": "dash-publishingstats-comprehensiveness-valueadded", + "summary_stats": "dash-publishingstats-summarystats", + "humanitarian": "dash-publishingstats-humanitarian", + } + ) def test_exploringdata(self): """Test exploring data pages redirect to their new locations""" @@ -164,31 +228,35 @@ def test_exploringdata(self): # This is not particularly DRY as a similar dictionary is created in views.py # but I think this is minor as that may disappear from views.py in a future # refactor of what goes into the context. - self._url_and_view_helper({ - "exploring_data": "dash-exploringdata-elements", - "elements": "dash-exploringdata-elements", - "codelists": "dash-exploringdata-codelists", - "booleans": "dash-exploringdata-booleans", - "dates": "dash-exploringdata-dates", - "traceability": "dash-exploringdata-traceability", - "org_ids": "dash-exploringdata-orgids" - }) + self._url_and_view_helper( + { + "exploring_data": "dash-exploringdata-elements", + "elements": "dash-exploringdata-elements", + "codelists": "dash-exploringdata-codelists", + "booleans": "dash-exploringdata-booleans", + "dates": "dash-exploringdata-dates", + "traceability": "dash-exploringdata-traceability", + "org_ids": "dash-exploringdata-orgids", + } + ) def test_slug_page_redirects(self): """Test pages with slugs redirect to the section page""" - self.assertRedirects(self.client.get(r"/publisher/undp.html"), - reverse("dash-headlines-publishers"), - status_code=301) - self.assertRedirects(self.client.get(r"/license/cc-by.html"), - reverse("dash-dataquality-licenses"), - status_code=301) - self.assertRedirects(self.client.get(r"/codelist/2/budget_@type.html"), - reverse("dash-exploringdata-codelists"), - status_code=301) - self.assertRedirects(self.client.get(r"/element/iati-activity_activity-date_narrative.html"), - reverse("dash-exploringdata-elements"), - status_code=301) - self.assertRedirects(self.client.get(r"/org_type/funding_org.html"), - reverse("dash-exploringdata-orgids"), - status_code=301) + self.assertRedirects( + self.client.get(r"/publisher/undp.html"), reverse("dash-headlines-publishers"), status_code=301 + ) + self.assertRedirects( + self.client.get(r"/license/cc-by.html"), reverse("dash-dataquality-licenses"), status_code=301 + ) + self.assertRedirects( + self.client.get(r"/codelist/2/budget_@type.html"), reverse("dash-exploringdata-codelists"), status_code=301 + ) + self.assertRedirects( + self.client.get(r"/element/iati-activity_activity-date_narrative.html"), + reverse("dash-exploringdata-elements"), + status_code=301, + ) + self.assertRedirects( + self.client.get(r"/org_type/funding_org.html"), reverse("dash-exploringdata-orgids"), status_code=301 + ) diff --git a/dashboard/ui/urls.py b/dashboard/ui/urls.py index 08426d668b..4c45254e37 100644 --- a/dashboard/ui/urls.py +++ b/dashboard/ui/urls.py @@ -14,6 +14,7 @@ 1. Import the include() function: from django.urls import include, path 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) """ + from django.contrib import admin from django.urls import path, re_path from django.views.generic.base import RedirectView @@ -22,60 +23,105 @@ urlpatterns = [ - path('admin/', admin.site.urls), - + path("admin/", admin.site.urls), # Top level dashboard pages. - path('', ui.views.index, name="dash-index"), - path('headlines', ui.views.headlines, name="dash-headlines"), - path('data-quality', ui.views.data_quality, name="dash-dataquality"), - path('publishing-statistics', ui.views.publishing_stats, name="dash-publishingstats"), - path('exploring-data', ui.views.exploring_data, name="dash-exploringdata"), - path('faq', ui.views.faq, name="dash-faq"), - + path("", ui.views.index, name="dash-index"), + path("headlines", ui.views.headlines, name="dash-headlines"), + path("data-quality", ui.views.data_quality, name="dash-dataquality"), + path("publishing-statistics", ui.views.publishing_stats, name="dash-publishingstats"), + path("exploring-data", ui.views.exploring_data, name="dash-exploringdata"), + path("faq", ui.views.faq, name="dash-faq"), # Headlines pages and detail pages - placeholders for now. - path('headlines/publishers', ui.views.headlines_publishers, name="dash-headlines-publishers"), - path('headlines/files', ui.views.headlines_files, name="dash-headlines-files"), - path('headlines/activities', ui.views.headlines_activities, name="dash-headlines-activities"), - path('headlines/publishers/', ui.views.headlines_publisher_detail, name='dash-headlines-publisher-detail'), - + path("headlines/publishers", ui.views.headlines_publishers, name="dash-headlines-publishers"), + path("headlines/files", ui.views.headlines_files, name="dash-headlines-files"), + path("headlines/activities", ui.views.headlines_activities, name="dash-headlines-activities"), + path( + "headlines/publishers/", + ui.views.headlines_publisher_detail, + name="dash-headlines-publisher-detail", + ), # Data quality pages. - path('data-quality/download-errors', ui.views.dataquality_download, name="dash-dataquality-download"), - path('data/download_errors.json', ui.views.dataquality_download_errorsjson, name="dash-dataquality-download-json"), - path('data-quality/xml-errors', ui.views.dataquality_xml, name="dash-dataquality-xml"), - path('data-quality/validation', ui.views.dataquality_validation, name="dash-dataquality-validation"), - path('data-quality/versions', ui.views.dataquality_versions, name="dash-dataquality-versions"), - path('data-quality/organisation', ui.views.dataquality_orgxml, name="dash-dataquality-organisation"), - path('data-quality/licenses', ui.views.dataquality_licenses, name="dash-dataquality-licenses"), - path('data-quality/licenses/', ui.views.dataquality_licenses_detail, name="dash-dataquality-licenses-detail"), - path('data-quality/identifiers', ui.views.dataquality_identifiers, name="dash-dataquality-identifiers"), - path('data-quality/reporting-orgs', ui.views.dataquality_reportingorgs, name="dash-dataquality-reportingorgs"), - + path("data-quality/download-errors", ui.views.dataquality_download, name="dash-dataquality-download"), + path("data/download_errors.json", ui.views.dataquality_download_errorsjson, name="dash-dataquality-download-json"), + path("data-quality/xml-errors", ui.views.dataquality_xml, name="dash-dataquality-xml"), + path("data-quality/validation", ui.views.dataquality_validation, name="dash-dataquality-validation"), + path("data-quality/versions", ui.views.dataquality_versions, name="dash-dataquality-versions"), + path("data-quality/organisation", ui.views.dataquality_orgxml, name="dash-dataquality-organisation"), + path("data-quality/licenses", ui.views.dataquality_licenses, name="dash-dataquality-licenses"), + path( + "data-quality/licenses/", + ui.views.dataquality_licenses_detail, + name="dash-dataquality-licenses-detail", + ), + path("data-quality/identifiers", ui.views.dataquality_identifiers, name="dash-dataquality-identifiers"), + path("data-quality/reporting-orgs", ui.views.dataquality_reportingorgs, name="dash-dataquality-reportingorgs"), # Exploring data pages. - path('exploring-data/elements', ui.views.exploringdata_elements, name="dash-exploringdata-elements"), - path('exploring-data/elements/', ui.views.exploringdata_element_detail, name="dash-exploringdata-elements-detail"), - path('exploring-data/codelists', ui.views.exploringdata_codelists, name="dash-exploringdata-codelists"), - path('exploring-data/codelists//', ui.views.exploringdata_codelists_detail, name="dash-exploringdata-codelists-detail"), - path('exploring-data/booleans', ui.views.exploringdata_booleans, name="dash-exploringdata-booleans"), - path('exploring-data/dates', ui.views.exploringdata_dates, name="dash-exploringdata-dates"), - path('exploring-data/traceability', ui.views.exploringdata_traceability, name="dash-exploringdata-traceability"), - path('exploring-data/organisation-identifiers', ui.views.exploringdata_orgids, name="dash-exploringdata-orgids"), - path('exploring-data/organisation-type/', ui.views.exploringdata_orgtypes_detail, name="dash-exploringdata-orgtypes-detail"), - + path("exploring-data/elements", ui.views.exploringdata_elements, name="dash-exploringdata-elements"), + path( + "exploring-data/elements/", + ui.views.exploringdata_element_detail, + name="dash-exploringdata-elements-detail", + ), + path("exploring-data/codelists", ui.views.exploringdata_codelists, name="dash-exploringdata-codelists"), + path( + "exploring-data/codelists//", + ui.views.exploringdata_codelists_detail, + name="dash-exploringdata-codelists-detail", + ), + path("exploring-data/booleans", ui.views.exploringdata_booleans, name="dash-exploringdata-booleans"), + path("exploring-data/dates", ui.views.exploringdata_dates, name="dash-exploringdata-dates"), + path("exploring-data/traceability", ui.views.exploringdata_traceability, name="dash-exploringdata-traceability"), + path("exploring-data/organisation-identifiers", ui.views.exploringdata_orgids, name="dash-exploringdata-orgids"), + path( + "exploring-data/organisation-type/", + ui.views.exploringdata_orgtypes_detail, + name="dash-exploringdata-orgtypes-detail", + ), # Publishing statistics pages. - path('publishing-statistics/timeliness', ui.views.pubstats_timeliness, name="dash-publishingstats-timeliness"), - path('publishing-statistics/timeliness-timelag', ui.views.pubstats_timeliness_timelag, name="dash-publishingstats-timeliness-timelag"), - path('publishing-statistics/forward-looking', ui.views.pubstats_forwardlooking, name="dash-publishingstats-forwardlooking"), - path('publishing-statistics/comprehensiveness', ui.views.pubstats_comprehensiveness, name="dash-publishingstats-comprehensiveness"), - path('publishing-statistics/comprehensiveness/core', ui.views.pubstats_comprehensiveness_core, name="dash-publishingstats-comprehensiveness-core"), - path('publishing-statistics/comprehensiveness/financials', ui.views.pubstats_comprehensiveness_financials, name="dash-publishingstats-comprehensiveness-financials"), - path('publishing-statistics/comprehensiveness/value-added', ui.views.pubstats_comprehensiveness_valueadded, name="dash-publishingstats-comprehensiveness-valueadded"), - path('publishing-statistics/summary-statistics', ui.views.pubstats_summarystats, name="dash-publishingstats-summarystats"), - path('publishing-statistics/humanitarian-reporting', ui.views.pubstats_humanitarian, name="dash-publishingstats-humanitarian"), - + path("publishing-statistics/timeliness", ui.views.pubstats_timeliness, name="dash-publishingstats-timeliness"), + path( + "publishing-statistics/timeliness-timelag", + ui.views.pubstats_timeliness_timelag, + name="dash-publishingstats-timeliness-timelag", + ), + path( + "publishing-statistics/forward-looking", + ui.views.pubstats_forwardlooking, + name="dash-publishingstats-forwardlooking", + ), + path( + "publishing-statistics/comprehensiveness", + ui.views.pubstats_comprehensiveness, + name="dash-publishingstats-comprehensiveness", + ), + path( + "publishing-statistics/comprehensiveness/core", + ui.views.pubstats_comprehensiveness_core, + name="dash-publishingstats-comprehensiveness-core", + ), + path( + "publishing-statistics/comprehensiveness/financials", + ui.views.pubstats_comprehensiveness_financials, + name="dash-publishingstats-comprehensiveness-financials", + ), + path( + "publishing-statistics/comprehensiveness/value-added", + ui.views.pubstats_comprehensiveness_valueadded, + name="dash-publishingstats-comprehensiveness-valueadded", + ), + path( + "publishing-statistics/summary-statistics", + ui.views.pubstats_summarystats, + name="dash-publishingstats-summarystats", + ), + path( + "publishing-statistics/humanitarian-reporting", + ui.views.pubstats_humanitarian, + name="dash-publishingstats-humanitarian", + ), # Registration agencies. - path('registration-agencies', ui.views.registration_agencies, name="dash-registrationagencies"), + path("registration-agencies", ui.views.registration_agencies, name="dash-registrationagencies"), path("registration_agencies.html", RedirectView.as_view(pattern_name="dash-registrationagencies", permanent=True)), - # Redirects to support any users with bookmarks to pages on the old Dashboard. path("index.html", RedirectView.as_view(pattern_name="dash-index", permanent=True)), path("headlines.html", RedirectView.as_view(pattern_name="dash-headlines", permanent=True)), @@ -84,12 +130,29 @@ path("publishers.html", RedirectView.as_view(pattern_name="dash-headlines-publishers", permanent=True)), path("publishing_stats.html", RedirectView.as_view(pattern_name="dash-publishingstats", permanent=True)), path("timeliness.html", RedirectView.as_view(pattern_name="dash-publishingstats-timeliness", permanent=True)), - path("timeliness_timelag.html", RedirectView.as_view(pattern_name="dash-publishingstats-timeliness-timelag", permanent=True)), - path("forwardlooking.html", RedirectView.as_view(pattern_name="dash-publishingstats-forwardlooking", permanent=True)), - path("comprehensiveness.html", RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness", permanent=True)), - path("comprehensiveness_core.html", RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness-core", permanent=True)), - path("comprehensiveness_financials.html", RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness-financials", permanent=True)), - path("comprehensiveness_valueadded.html", RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness-valueadded", permanent=True)), + path( + "timeliness_timelag.html", + RedirectView.as_view(pattern_name="dash-publishingstats-timeliness-timelag", permanent=True), + ), + path( + "forwardlooking.html", RedirectView.as_view(pattern_name="dash-publishingstats-forwardlooking", permanent=True) + ), + path( + "comprehensiveness.html", + RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness", permanent=True), + ), + path( + "comprehensiveness_core.html", + RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness-core", permanent=True), + ), + path( + "comprehensiveness_financials.html", + RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness-financials", permanent=True), + ), + path( + "comprehensiveness_valueadded.html", + RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness-valueadded", permanent=True), + ), path("summary_stats.html", RedirectView.as_view(pattern_name="dash-publishingstats-summarystats", permanent=True)), path("humanitarian.html", RedirectView.as_view(pattern_name="dash-publishingstats-humanitarian", permanent=True)), path("files.html", RedirectView.as_view(pattern_name="dash-headlines-files", permanent=True)), @@ -111,7 +174,9 @@ path("licenses.html", RedirectView.as_view(pattern_name="dash-dataquality-licenses", permanent=True)), re_path(r"license\/\S*.html", RedirectView.as_view(pattern_name="dash-dataquality-licenses", permanent=True)), re_path(r"publisher\/\S*.html", RedirectView.as_view(pattern_name="dash-headlines-publishers", permanent=True)), - re_path(r"codelist\/\d\/\S*.html", RedirectView.as_view(pattern_name="dash-exploringdata-codelists", permanent=True)), + re_path( + r"codelist\/\d\/\S*.html", RedirectView.as_view(pattern_name="dash-exploringdata-codelists", permanent=True) + ), re_path(r"element\/\S*.html", RedirectView.as_view(pattern_name="dash-exploringdata-elements", permanent=True)), re_path(r"org_type\/\S*.html", RedirectView.as_view(pattern_name="dash-exploringdata-orgids", permanent=True)), ] diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py index 7cc3dd7eec..90cbc6fd43 100644 --- a/dashboard/ui/views.py +++ b/dashboard/ui/views.py @@ -34,35 +34,45 @@ publisher_name, publishers_ordered_by_title, is_valid_element_or_attribute, - slugs) - - -COMMIT_HASH = subprocess.run('git show --format=%H --no-patch'.split(), - cwd=filepaths.join_base_path(""), - capture_output=True).stdout.decode().strip() -STATS_COMMIT_HASH = subprocess.run('git -C stats-calculated show --format=%H --no-patch'.split(), - cwd=filepaths.join_base_path(""), - capture_output=True).stdout.decode().strip() -STATS_GH_URL = 'https://github.com/codeforIATI/IATI-Stats-public/tree/' + STATS_COMMIT_HASH + slugs, +) + + +COMMIT_HASH = ( + subprocess.run("git show --format=%H --no-patch".split(), cwd=filepaths.join_base_path(""), capture_output=True) + .stdout.decode() + .strip() +) +STATS_COMMIT_HASH = ( + subprocess.run( + "git -C stats-calculated show --format=%H --no-patch".split(), + cwd=filepaths.join_base_path(""), + capture_output=True, + ) + .stdout.decode() + .strip() +) +STATS_GH_URL = "https://github.com/codeforIATI/IATI-Stats-public/tree/" + STATS_COMMIT_HASH # Load all the licenses and generate data for each licence and publisher. -with open(filepaths.join_stats_path('licenses.json')) as handler: +with open(filepaths.join_stats_path("licenses.json")) as handler: LICENSE_URLS = json.load(handler) LICENSES = [ - package['license_id'] - if package['license_id'] is not None - else 'notspecified' + package["license_id"] if package["license_id"] is not None else "notspecified" for _, publisher in ckan.items() - for _, package in publisher.items()] + for _, package in publisher.items() +] LICENCE_COUNT = dict((x, LICENSES.count(x)) for x in set(LICENSES)) -LICENSES_AND_PUBLISHER = set([(package['license_id'] - if package['license_id'] is not None - else 'notspecified', publisher_name) - for publisher_name, publisher in ckan.items() - for package_name, package in publisher.items()]) +LICENSES_AND_PUBLISHER = set( + [ + (package["license_id"] if package["license_id"] is not None else "notspecified", publisher_name) + for publisher_name, publisher in ckan.items() + for package_name, package in publisher.items() + ] +) LICENSES_PER_PUBLISHER = [license for license, publisher in LICENSES_AND_PUBLISHER] PUBLISHER_LICENSE_COUNT = dict((x, LICENSES_PER_PUBLISHER.count(x)) for x in set(LICENSES_PER_PUBLISHER)) @@ -75,15 +85,16 @@ def _get_licenses_for_publisher(publisher_name): return set() # Return unique licenses used - return set([ - package['license_id'] - if package['license_id'] is not None - else 'notspecified' - for package in ckan[publisher_name].values()]) + return set( + [ + package["license_id"] if package["license_id"] is not None else "notspecified" + for package in ckan[publisher_name].values() + ] + ) def _registration_agency(orgid): - for code in codelist_sets['2']['OrganisationRegistrationAgency']: + for code in codelist_sets["2"]["OrganisationRegistrationAgency"]: if orgid.startswith(code): return code @@ -104,8 +115,7 @@ def nested_dictinvert(d): def _make_context(page_name: str): - """Make a basic context dictionary for a given page - """ + """Make a basic context dictionary for a given page""" context = dict( page=page_name, top_titles=text.top_titles, @@ -116,41 +126,38 @@ def _make_context(page_name: str): top_navigation=text.top_navigation, navigation=text.navigation, navigation_reverse={page: k for k, pages in text.navigation.items() for page in pages}, - page_view_names={"index": "dash-index", - "headlines": "dash-headlines", - "data_quality": "dash-dataquality", - "publishing_stats": "dash-publishingstats", - "exploring_data": "dash-exploringdata", - "faq": "dash-faq", - - "publishers": "dash-headlines-publishers", - "files": "dash-headlines-files", - "activities": "dash-headlines-activities", - "publisher": "dash-headlines-publisher-detail", - - "download": "dash-dataquality-download", - "xml": "dash-dataquality-xml", - "validation": "dash-dataquality-validation", - "versions": "dash-dataquality-versions", - "organisation": "dash-dataquality-organisation", - "licenses": "dash-dataquality-licenses", - "identifiers": "dash-dataquality-identifiers", - "reporting_orgs": "dash-dataquality-reportingorgs", - - "elements": "dash-exploringdata-elements", - "codelists": "dash-exploringdata-codelists", - "booleans": "dash-exploringdata-booleans", - "dates": "dash-exploringdata-dates", - "traceability": "dash-exploringdata-traceability", - "org_ids": "dash-exploringdata-orgids", - - "timeliness": "dash-publishingstats-timeliness", - "forwardlooking": "dash-publishingstats-forwardlooking", - "comprehensiveness": "dash-publishingstats-comprehensiveness", - "coverage": "dash-publishingstats-coverage", - "summary_stats": "dash-publishingstats-summarystats", - "humanitarian": "dash-publishingstats-humanitarian" - }, + page_view_names={ + "index": "dash-index", + "headlines": "dash-headlines", + "data_quality": "dash-dataquality", + "publishing_stats": "dash-publishingstats", + "exploring_data": "dash-exploringdata", + "faq": "dash-faq", + "publishers": "dash-headlines-publishers", + "files": "dash-headlines-files", + "activities": "dash-headlines-activities", + "publisher": "dash-headlines-publisher-detail", + "download": "dash-dataquality-download", + "xml": "dash-dataquality-xml", + "validation": "dash-dataquality-validation", + "versions": "dash-dataquality-versions", + "organisation": "dash-dataquality-organisation", + "licenses": "dash-dataquality-licenses", + "identifiers": "dash-dataquality-identifiers", + "reporting_orgs": "dash-dataquality-reportingorgs", + "elements": "dash-exploringdata-elements", + "codelists": "dash-exploringdata-codelists", + "booleans": "dash-exploringdata-booleans", + "dates": "dash-exploringdata-dates", + "traceability": "dash-exploringdata-traceability", + "org_ids": "dash-exploringdata-orgids", + "timeliness": "dash-publishingstats-timeliness", + "forwardlooking": "dash-publishingstats-forwardlooking", + "comprehensiveness": "dash-publishingstats-comprehensiveness", + "coverage": "dash-publishingstats-coverage", + "summary_stats": "dash-publishingstats-summarystats", + "humanitarian": "dash-publishingstats-humanitarian", + }, current_stats=current_stats, publisher_name=publisher_name, publishers_ordered_by_title=publishers_ordered_by_title, @@ -164,21 +171,22 @@ def _make_context(page_name: str): expected_versions=vars.expected_versions, metadata=metadata, slugs=slugs, - datetime_data=dateutil.parser.parse(metadata['created_at']).strftime('%-d %B %Y (at %H:%M %Z)'), + datetime_data=dateutil.parser.parse(metadata["created_at"]).strftime("%-d %B %Y (at %H:%M %Z)"), current_year=datetime.datetime.now(datetime.UTC).year, - stats_url='https://stats.codeforiati.org', + stats_url="https://stats.codeforiati.org", stats_gh_url=STATS_GH_URL, commit_hash=COMMIT_HASH, stats_commit_hash=STATS_COMMIT_HASH, - func={"sorted": sorted, - "firstint": ui.template_funcs.firstint, - "get_codelist_values": ui.template_funcs.get_codelist_values, - "dataset_to_publisher": lambda x: dataset_to_publisher_dict.get(x, ""), - "get_publisher_stats": get_publisher_stats, - "is_valid_element_or_attribute": is_valid_element_or_attribute, - "set": set, - "enumerate": enumerate - } + func={ + "sorted": sorted, + "firstint": ui.template_funcs.firstint, + "get_codelist_values": ui.template_funcs.get_codelist_values, + "dataset_to_publisher": lambda x: dataset_to_publisher_dict.get(x, ""), + "get_publisher_stats": get_publisher_stats, + "is_valid_element_or_attribute": is_valid_element_or_attribute, + "set": set, + "enumerate": enumerate, + }, ) context["navigation_reverse"].update({k: k for k in text.navigation}) @@ -241,30 +249,81 @@ def headlines_publisher_detail(request, publisher=None): context = _make_context("publishers") context["publisher"] = publisher - context["publisher_inverted"] = get_publisher_stats(publisher, 'inverted-file') + context["publisher_inverted"] = get_publisher_stats(publisher, "inverted-file") context["publisher_licenses"] = _get_licenses_for_publisher(publisher) publisher_stats = get_publisher_stats(publisher) context["publisher_stats"] = publisher_stats try: - context["budget_table"] = [{ - 'year': 'Total', - 'count_total': sum(sum(x.values()) for x in publisher_stats['count_budgets_by_type_by_year'].values()), - 'sum_total': {currency: sum(sums.values()) for by_currency in publisher_stats['sum_budgets_by_type_by_year'].values() for currency, sums in by_currency.items()}, - 'count_original': sum(publisher_stats['count_budgets_by_type_by_year']['1'].values()) if '1' in publisher_stats['count_budgets_by_type_by_year'] else None, - 'sum_original': {k: sum(v.values()) for k, v in publisher_stats['sum_budgets_by_type_by_year']['1'].items()} if '1' in publisher_stats['sum_budgets_by_type_by_year'] else None, - 'count_revised': sum(publisher_stats['count_budgets_by_type_by_year']['2'].values()) if '2' in publisher_stats['count_budgets_by_type_by_year'] else None, - 'sum_revised': {k: sum(v.values()) for k, v in publisher_stats['sum_budgets_by_type_by_year']['2'].items()} if '2' in publisher_stats['sum_budgets_by_type_by_year'] else None - }] + [{'year': year, - 'count_total': sum(x[year] for x in publisher_stats['count_budgets_by_type_by_year'].values() if year in x), - 'sum_total': {currency: sums.get(year) for by_currency in publisher_stats['sum_budgets_by_type_by_year'].values() for currency, sums in by_currency.items()}, - 'count_original': publisher_stats['count_budgets_by_type_by_year']['1'].get(year) if '1' in publisher_stats['count_budgets_by_type_by_year'] else None, - 'sum_original': {k: v.get(year) for k, v in publisher_stats['sum_budgets_by_type_by_year']['1'].items()} if '1' in publisher_stats['sum_budgets_by_type_by_year'] else None, - 'count_revised': publisher_stats['count_budgets_by_type_by_year']['2'].get(year) if '2' in publisher_stats['count_budgets_by_type_by_year'] else None, - 'sum_revised': {k: v.get(year) for k, v in publisher_stats['sum_budgets_by_type_by_year']['2'].items()} if '2' in publisher_stats['sum_budgets_by_type_by_year'] else None - } for year in sorted(set(sum((list(x.keys()) for x in publisher_stats['count_budgets_by_type_by_year'].values()), []))) - ] - context["failure_count"] = len(current_stats['inverted_file_publisher'][publisher]['validation'].get('fail', {})) + context["budget_table"] = [ + { + "year": "Total", + "count_total": sum(sum(x.values()) for x in publisher_stats["count_budgets_by_type_by_year"].values()), + "sum_total": { + currency: sum(sums.values()) + for by_currency in publisher_stats["sum_budgets_by_type_by_year"].values() + for currency, sums in by_currency.items() + }, + "count_original": ( + sum(publisher_stats["count_budgets_by_type_by_year"]["1"].values()) + if "1" in publisher_stats["count_budgets_by_type_by_year"] + else None + ), + "sum_original": ( + {k: sum(v.values()) for k, v in publisher_stats["sum_budgets_by_type_by_year"]["1"].items()} + if "1" in publisher_stats["sum_budgets_by_type_by_year"] + else None + ), + "count_revised": ( + sum(publisher_stats["count_budgets_by_type_by_year"]["2"].values()) + if "2" in publisher_stats["count_budgets_by_type_by_year"] + else None + ), + "sum_revised": ( + {k: sum(v.values()) for k, v in publisher_stats["sum_budgets_by_type_by_year"]["2"].items()} + if "2" in publisher_stats["sum_budgets_by_type_by_year"] + else None + ), + } + ] + [ + { + "year": year, + "count_total": sum( + x[year] for x in publisher_stats["count_budgets_by_type_by_year"].values() if year in x + ), + "sum_total": { + currency: sums.get(year) + for by_currency in publisher_stats["sum_budgets_by_type_by_year"].values() + for currency, sums in by_currency.items() + }, + "count_original": ( + publisher_stats["count_budgets_by_type_by_year"]["1"].get(year) + if "1" in publisher_stats["count_budgets_by_type_by_year"] + else None + ), + "sum_original": ( + {k: v.get(year) for k, v in publisher_stats["sum_budgets_by_type_by_year"]["1"].items()} + if "1" in publisher_stats["sum_budgets_by_type_by_year"] + else None + ), + "count_revised": ( + publisher_stats["count_budgets_by_type_by_year"]["2"].get(year) + if "2" in publisher_stats["count_budgets_by_type_by_year"] + else None + ), + "sum_revised": ( + {k: v.get(year) for k, v in publisher_stats["sum_budgets_by_type_by_year"]["2"].items()} + if "2" in publisher_stats["sum_budgets_by_type_by_year"] + else None + ), + } + for year in sorted( + set(sum((list(x.keys()) for x in publisher_stats["count_budgets_by_type_by_year"].values()), [])) + ) + ] + context["failure_count"] = len( + current_stats["inverted_file_publisher"][publisher]["validation"].get("fail", {}) + ) except KeyError: raise Http404("Publisher does not exist") @@ -281,7 +340,7 @@ def dataquality_download(request): def dataquality_download_errorsjson(request): - return HttpResponse(json.dumps(current_stats['download_errors'], indent=2), content_type='application/json') + return HttpResponse(json.dumps(current_stats["download_errors"], indent=2), content_type="application/json") def dataquality_xml(request): @@ -323,8 +382,8 @@ def dataquality_licenses_detail(request, license_id=None): publisher_name for publisher_name, publisher in ckan.items() for _, package in publisher.items() - if package['license_id'] == license_id or ( - license_id == 'notspecified' and package['license_id'] is None)] + if package["license_id"] == license_id or (license_id == "notspecified" and package["license_id"] is None) + ] context = _make_context("licenses") context["license_urls"] = LICENSE_URLS context["license_names"] = text.LICENSE_NAMES @@ -364,13 +423,13 @@ def exploringdata_element_detail(request, element=None): template = loader.get_template("element.html") context = _make_context("elements") - if element not in slugs['element']['by_slug']: + if element not in slugs["element"]["by_slug"]: raise Http404("Unknown element or attribute") - i = slugs['element']['by_slug'][element] - context["element"] = list(current_stats['inverted_publisher']['elements'])[i] - context["publishers"] = list(current_stats['inverted_publisher']['elements'].values())[i] - context["element_or_attribute"] = 'attribute' if '@' in context["element"] else 'element' + i = slugs["element"]["by_slug"][element] + context["element"] = list(current_stats["inverted_publisher"]["elements"])[i] + context["publishers"] = list(current_stats["inverted_publisher"]["elements"].values())[i] + context["element_or_attribute"] = "attribute" if "@" in context["element"] else "element" return HttpResponse(template.render(context, request)) @@ -380,7 +439,7 @@ def exploringdata_orgids(request): def exploringdata_orgtypes_detail(request, org_type=None): - if org_type not in slugs['org_type']['by_slug']: + if org_type not in slugs["org_type"]["by_slug"]: raise Http404("Unknown organisation type") template = loader.get_template("org_type.html") @@ -397,18 +456,22 @@ def exploringdata_codelists(request): def exploringdata_codelists_detail(request, major_version=None, attribute=None): template = loader.get_template("codelist.html") - if major_version not in slugs['codelist']: + if major_version not in slugs["codelist"]: raise Http404("Unknown major version of the IATI standard") - if attribute not in slugs['codelist'][major_version]['by_slug']: + if attribute not in slugs["codelist"][major_version]["by_slug"]: raise Http404("Unknown attribute") context = _make_context("codelists") - i = slugs['codelist'][major_version]['by_slug'][attribute] - element = list(current_stats['inverted_publisher']['codelist_values_by_major_version'][major_version])[i] - values = nested_dictinvert(list(current_stats['inverted_publisher']['codelist_values_by_major_version'][major_version].values())[i]) + i = slugs["codelist"][major_version]["by_slug"][attribute] + element = list(current_stats["inverted_publisher"]["codelist_values_by_major_version"][major_version])[i] + values = nested_dictinvert( + list(current_stats["inverted_publisher"]["codelist_values_by_major_version"][major_version].values())[i] + ) context["element"] = element context["values"] = values - context["reverse_codelist_mapping"] = {major_version: dictinvert(mapping) for major_version, mapping in codelist_mapping.items()} + context["reverse_codelist_mapping"] = { + major_version: dictinvert(mapping) for major_version, mapping in codelist_mapping.items() + } context["major_version"] = major_version return HttpResponse(template.render(context, request)) @@ -505,7 +568,7 @@ def registration_agencies(request): context["registration_agencies"] = collections.defaultdict(int) context["registration_agencies_publishers"] = collections.defaultdict(list) context["nonmatching"] = [] - for orgid, publishers in current_stats['inverted_publisher']['reporting_orgs'].items(): + for orgid, publishers in current_stats["inverted_publisher"]["reporting_orgs"].items(): reg_ag = _registration_agency(orgid) if reg_ag: context["registration_agencies"][reg_ag] += 1 diff --git a/dashboard/ui/wsgi.py b/dashboard/ui/wsgi.py index c94858e9fb..4605f9a154 100644 --- a/dashboard/ui/wsgi.py +++ b/dashboard/ui/wsgi.py @@ -11,6 +11,6 @@ from django.core.wsgi import get_wsgi_application -os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'ui.settings') +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ui.settings") application = get_wsgi_application() diff --git a/dashboard/vars.py b/dashboard/vars.py index c8f00227d9..5ffa420bc9 100644 --- a/dashboard/vars.py +++ b/dashboard/vars.py @@ -1 +1 @@ -expected_versions = ['null', '1.01', '1.02', '1.03', '1.04', '1.05', '2.01', '2.02', '2.03'] +expected_versions = ["null", "1.01", "1.02", "1.03", "1.04", "1.05", "2.01", "2.02", "2.03"] From fe5096dddb29d74240ddd57265f62bc0db47f128 Mon Sep 17 00:00:00 2001 From: Automated Reformatting Date: Mon, 11 Nov 2024 13:56:03 +0000 Subject: [PATCH 358/375] lint: Run isort for the first time --- dashboard/common.py | 2 +- dashboard/comprehensiveness.py | 2 +- dashboard/coverage.py | 12 +++++++----- dashboard/data.py | 6 +++--- dashboard/forwardlooking.py | 3 ++- dashboard/humanitarian.py | 2 +- dashboard/make_csv.py | 13 ++++++------- dashboard/make_html.py | 24 ++++++++++++------------ dashboard/make_plots.py | 12 ++++++------ dashboard/speakers_kit.py | 4 ++-- dashboard/summary_stats.py | 6 +++--- dashboard/timeliness.py | 6 ++++-- dashboard/ui/jinja2.py | 1 + dashboard/ui/urls.py | 1 - dashboard/ui/views.py | 20 +++++++++----------- fetch_data.py | 6 +++--- fetch_github_issues.py | 6 +++--- fetch_v2_codelists.py | 5 ++--- 18 files changed, 66 insertions(+), 65 deletions(-) diff --git a/dashboard/common.py b/dashboard/common.py index 98fd4dec70..162bb69ebe 100644 --- a/dashboard/common.py +++ b/dashboard/common.py @@ -1,8 +1,8 @@ """Load IATI OrganisationType codelist into a global and provide function to get publisher type""" -import data import json +import data import filepaths # Import organisation_type_codelist as a global, then delete when used to save memory diff --git a/dashboard/comprehensiveness.py b/dashboard/comprehensiveness.py index bb89389398..89dd409095 100644 --- a/dashboard/comprehensiveness.py +++ b/dashboard/comprehensiveness.py @@ -1,6 +1,6 @@ # This file converts raw comprehensiveness data to percentages, and calculates averages. -from data import publishers_ordered_by_title, get_publisher_stats, publisher_name +from data import get_publisher_stats, publisher_name, publishers_ordered_by_title columns = { "summary": [ diff --git a/dashboard/coverage.py b/dashboard/coverage.py index 7277d3973f..440067a60f 100644 --- a/dashboard/coverage.py +++ b/dashboard/coverage.py @@ -1,12 +1,14 @@ # This file converts a range coverage data to variables which can be outputted on the coverage page import csv -from data import get_publisher_stats -from data import get_registry_id_matches -from data import publisher_name -from data import publishers_ordered_by_title -from data import secondary_publishers import filepaths +from data import ( + get_publisher_stats, + get_registry_id_matches, + publisher_name, + publishers_ordered_by_title, + secondary_publishers, +) def is_number(s): diff --git a/dashboard/data.py b/dashboard/data.py index e81389fd13..3296ac143b 100644 --- a/dashboard/data.py +++ b/dashboard/data.py @@ -1,9 +1,9 @@ -from collections import OrderedDict -from collections.abc import MutableMapping +import csv import json import os import re -import csv +from collections import OrderedDict +from collections.abc import MutableMapping from decimal import Decimal import xmlschema diff --git a/dashboard/forwardlooking.py b/dashboard/forwardlooking.py index 55a104dddb..e91f4af23a 100644 --- a/dashboard/forwardlooking.py +++ b/dashboard/forwardlooking.py @@ -1,8 +1,9 @@ # This file converts raw forward-looking data to percentages -from data import publishers_ordered_by_title, get_publisher_stats, publisher_name import datetime +from data import get_publisher_stats, publisher_name, publishers_ordered_by_title + # Create a variable with the current year as an integer this_year = datetime.date.today().year diff --git a/dashboard/humanitarian.py b/dashboard/humanitarian.py index 09f50ed989..3d93509c5a 100644 --- a/dashboard/humanitarian.py +++ b/dashboard/humanitarian.py @@ -1,7 +1,7 @@ # This file builds a table to show humanitarian reporting for each publisher -from data import publishers_ordered_by_title, get_publisher_stats import common +from data import get_publisher_stats, publishers_ordered_by_title # Set column groupings, to be displayed in the user output columns = [ diff --git a/dashboard/make_csv.py b/dashboard/make_csv.py index 6e46430774..0ba6a6d2d0 100644 --- a/dashboard/make_csv.py +++ b/dashboard/make_csv.py @@ -1,20 +1,19 @@ """Generates CSV files from data in the 'stats-calculated' folder and using additional logic """ +import argparse import csv +import logging import os import sys -import argparse -import logging +import comprehensiveness import data -import timeliness +import filepaths import forwardlooking -import comprehensiveness -import summary_stats import humanitarian -import filepaths - +import summary_stats +import timeliness logger = logging.getLogger(__name__) diff --git a/dashboard/make_html.py b/dashboard/make_html.py index 3e85fa7d49..ebe9d89c7e 100644 --- a/dashboard/make_html.py +++ b/dashboard/make_html.py @@ -7,38 +7,38 @@ import re import subprocess from collections import defaultdict - -from flask import Flask, render_template, abort, Response, send_from_directory +from datetime import UTC, datetime import licenses -import timeliness -import forwardlooking +from dateutil import parser +from flask import Flask, Response, abort, render_template, send_from_directory + import comprehensiveness +import forwardlooking +import humanitarian # import coverage import summary_stats -import humanitarian -from vars import expected_versions import text -from datetime import datetime, UTC -from dateutil import parser +import timeliness from data import ( + MAJOR_VERSIONS, ckan, ckan_publishers, + codelist_lookup, codelist_mapping, codelist_sets, - codelist_lookup, current_stats, dataset_to_publisher_dict, - github_issues, get_publisher_stats, - MAJOR_VERSIONS, + github_issues, + is_valid_element, metadata, publisher_name, publishers_ordered_by_title, - is_valid_element, slugs, ) +from vars import expected_versions app = Flask(__name__, static_url_path="") diff --git a/dashboard/make_plots.py b/dashboard/make_plots.py index 99409024f9..a8e3e98f83 100644 --- a/dashboard/make_plots.py +++ b/dashboard/make_plots.py @@ -2,25 +2,25 @@ """ Generates static images of stats graphs using matplotlib. """ -import logging -import datetime import argparse +import csv +import datetime +import logging import os # noqa: F401 from collections import defaultdict -import csv +import matplotlib as mpl import numpy as np # noqa: F401 from tqdm import tqdm + import common import data import filepaths from vars import expected_versions # noqa: F401 -import matplotlib as mpl mpl.use("Agg") -import matplotlib.pyplot as plt # noqa: E402 import matplotlib.dates as mdates # noqa: E402 - +import matplotlib.pyplot as plt # noqa: E402 logger = logging.getLogger(__name__) diff --git a/dashboard/speakers_kit.py b/dashboard/speakers_kit.py index d3b8a7381d..7529252ce5 100644 --- a/dashboard/speakers_kit.py +++ b/dashboard/speakers_kit.py @@ -1,9 +1,9 @@ -import json -import data import csv +import json from collections import defaultdict from itertools import zip_longest +import data import filepaths diff --git a/dashboard/summary_stats.py b/dashboard/summary_stats.py index da4efcf7f4..e8b3abad82 100644 --- a/dashboard/summary_stats.py +++ b/dashboard/summary_stats.py @@ -1,10 +1,10 @@ # This file converts a range of transparency data to percentages -from data import publishers_ordered_by_title, get_publisher_stats, secondary_publishers import common -import timeliness -import forwardlooking import comprehensiveness +import forwardlooking +import timeliness +from data import get_publisher_stats, publishers_ordered_by_title, secondary_publishers # Set column groupings, to be displayed in the user output columns = [ diff --git a/dashboard/timeliness.py b/dashboard/timeliness.py index f72d5b7811..f1ae1bcbc3 100644 --- a/dashboard/timeliness.py +++ b/dashboard/timeliness.py @@ -1,10 +1,12 @@ # This file converts raw timeliness data into the associated Publishing Statistics assessments -from data import JSONDir, publisher_name, get_publisher_stats, get_registry_id_matches import datetime +from collections import Counter, defaultdict + from dateutil.relativedelta import relativedelta -from collections import defaultdict, Counter + import filepaths +from data import JSONDir, get_publisher_stats, get_registry_id_matches, publisher_name def short_month(month_str): diff --git a/dashboard/ui/jinja2.py b/dashboard/ui/jinja2.py index 7f615620cf..089761320c 100644 --- a/dashboard/ui/jinja2.py +++ b/dashboard/ui/jinja2.py @@ -6,6 +6,7 @@ from django.templatetags.static import static from django.urls import reverse from jinja2 import Environment + import timeliness diff --git a/dashboard/ui/urls.py b/dashboard/ui/urls.py index 4c45254e37..49ee979545 100644 --- a/dashboard/ui/urls.py +++ b/dashboard/ui/urls.py @@ -21,7 +21,6 @@ import ui.views - urlpatterns = [ path("admin/", admin.site.urls), # Top level dashboard pages. diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py index 90cbc6fd43..95e016d10e 100644 --- a/dashboard/ui/views.py +++ b/dashboard/ui/views.py @@ -1,43 +1,41 @@ """Views for the IATI Dashboard""" +import collections import datetime -import dateutil.parser -import subprocess import json -import collections +import subprocess -from django.http import HttpResponse, Http404 +import dateutil.parser +from django.http import Http404, HttpResponse from django.template import loader import comprehensiveness import filepaths import forwardlooking import humanitarian +import summary_stats import text import timeliness -import summary_stats import ui.template_funcs import vars - from data import ( + MAJOR_VERSIONS, ckan, ckan_publishers, + codelist_lookup, codelist_mapping, codelist_sets, - codelist_lookup, current_stats, dataset_to_publisher_dict, - github_issues, get_publisher_stats, - MAJOR_VERSIONS, + github_issues, + is_valid_element_or_attribute, metadata, publisher_name, publishers_ordered_by_title, - is_valid_element_or_attribute, slugs, ) - COMMIT_HASH = ( subprocess.run("git show --format=%H --no-patch".split(), cwd=filepaths.join_base_path(""), capture_output=True) .stdout.decode() diff --git a/fetch_data.py b/fetch_data.py index 80d18c0e91..705ebb5228 100644 --- a/fetch_data.py +++ b/fetch_data.py @@ -7,10 +7,10 @@ We're particulary looking for information such as name, organisation type, and the link back to the registry """ -from pathlib import Path -from os.path import join -from os import makedirs import json +from os import makedirs +from os.path import join +from pathlib import Path import requests diff --git a/fetch_github_issues.py b/fetch_github_issues.py index 5266c51701..6fc068e65f 100644 --- a/fetch_github_issues.py +++ b/fetch_github_issues.py @@ -1,10 +1,10 @@ """ Fetch codeforIATI/iati-data-bugtracker github issues """ -from pathlib import Path -from collections import defaultdict -import os import json +import os +from collections import defaultdict +from pathlib import Path import requests diff --git a/fetch_v2_codelists.py b/fetch_v2_codelists.py index 4a445dc1a1..bcb35137d6 100644 --- a/fetch_v2_codelists.py +++ b/fetch_v2_codelists.py @@ -1,11 +1,10 @@ -from os.path import join -from os import makedirs import json +from os import makedirs +from os.path import join import requests from lxml import etree as ET - output_path = join( 'data', 'IATI-Codelists-2', 'out', 'clv2', 'json', 'en') makedirs(output_path) From fd397e3cf50521c73830f64f90b8c1ce160307c1 Mon Sep 17 00:00:00 2001 From: Bee Webb Date: Mon, 11 Nov 2024 13:57:26 +0000 Subject: [PATCH 359/375] ci: Add black and isort to the CI tests --- .github/workflows/ci.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 88197d62eb..e61a1cfd1c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,8 +20,12 @@ jobs: run: | python -m pip install --upgrade pip pip install -r requirements_dev.txt - - name: Lint + - name: Lint - flake8 run: flake8 + - name: Lint - black + run: black . --check + - name: Lint - isort + run: isort . --check-only - name: Fetch data run: ./fetch_data.sh - name: Fetch stats From 58de52be8d65797e47f7195045a2432b41fddae0 Mon Sep 17 00:00:00 2001 From: Bee Webb Date: Mon, 11 Nov 2024 20:26:49 +0000 Subject: [PATCH 360/375] publisher: Remove Code For IATI issues raised from publisher page --- dashboard/templates/publisher.html | 35 ------------------------------ 1 file changed, 35 deletions(-) diff --git a/dashboard/templates/publisher.html b/dashboard/templates/publisher.html index cf0ac66918..f9ae0f34c4 100644 --- a/dashboard/templates/publisher.html +++ b/dashboard/templates/publisher.html @@ -22,9 +22,6 @@

      Publisher: {{ publisher_name[publisher] }}

      {% if failure_count > 0 %}
    • Files Failing Validation {% endif %} - {% if github_issues and publisher in github_issues %} -
    • Issues Raised - {% endif %} {% if 1 in publisher_inverted.invalidxml.values() %}
    • Files where XML is not well-formed {% endif %} @@ -243,38 +240,6 @@

      Files with non-standard roots

      - {% if github_issues and publisher in github_issues %} - {% set data_quality_issue = true %} -
      -
      -

      Issues Raised

      -
      -
      -

      Data and metadata issues are raised publicly on IATI Data Bugtracker. If you spot a problem with IATI data or metadata, please raise an issue.

      -
      -
    • Publishers
      {{ registration_agency }} {{ count }}
      {{ orgid|replace(' ', ' ') }}{{ publisher }}{{ publisher_name[publisher] }}{{ publisher }}{{ publisher_name[publisher] }} {{ count }}
      - - - - - - - - - - {% for issue in github_issues[publisher] %} - - - - - - - {% endfor %} - -
      TitleLabelsCreatedLast updated
      {{ issue.title }}{% for label in issue.labels %}{{ label.name }} {% endfor %}{{ issue.created_at[:10] }}{{ issue.updated_at[:10] }}
      -
      - {% endif %} - {% if not data_quality_issue %}

      No issues were found.

      {% endif %} From dc5e9a46e8d9bca658fe856db8713275700efc94 Mon Sep 17 00:00:00 2001 From: Bee Webb Date: Fri, 6 Dec 2024 19:00:45 +0000 Subject: [PATCH 361/375] comprehensiveness: Revert to rounding to whole decimal points --- dashboard/comprehensiveness.py | 14 +++++++------ dashboard/forwardlooking.py | 3 ++- dashboard/make_csv.py | 37 ++++++++++++++++++++++++---------- dashboard/summary_stats.py | 9 +++++---- dashboard/ui/jinja2.py | 8 +++++--- 5 files changed, 46 insertions(+), 25 deletions(-) diff --git a/dashboard/comprehensiveness.py b/dashboard/comprehensiveness.py index 89dd409095..f8add7fc0c 100644 --- a/dashboard/comprehensiveness.py +++ b/dashboard/comprehensiveness.py @@ -1,6 +1,7 @@ # This file converts raw comprehensiveness data to percentages, and calculates averages. from data import get_publisher_stats, publisher_name, publishers_ordered_by_title +from ui.jinja2 import round_nicely columns = { "summary": [ @@ -173,19 +174,20 @@ def generate_row(publisher): if denominator(slug, publisher_base) != 0: # Populate the row with the %age - row[slug] = float(numerator_all) / denominator(slug, publisher_base) * 100 - row[slug + "_valid"] = float(numerator_valid) / denominator(slug, publisher_base) * 100 + row[slug] = round_nicely(float(numerator_all) / denominator(slug, publisher_base) * 100) + row[slug + "_valid"] = round_nicely(float(numerator_valid) / denominator(slug, publisher_base) * 100) # Loop for averages # Calculate the average for each grouping, and the overall 'summary' average for page in ["core", "financials", "valueadded", "summary"]: # Note that the summary must be last, so that it can use the average calculations from the other groupings - row[page + "_average"] = sum((row.get(x[0]) or 0) * x[2] for x in columns[page]) / float( - sum(x[2] for x in columns[page]) + row[page + "_average"] = round_nicely( + sum((row.get(x[0]) or 0) * x[2] for x in columns[page]) / float(sum(x[2] for x in columns[page])) ) - row[page + "_average_valid"] = sum((row.get(x[0] + "_valid") or 0) * x[2] for x in columns[page]) / float( - sum(x[2] for x in columns[page]) + row[page + "_average_valid"] = round_nicely( + sum((row.get(x[0] + "_valid") or 0) * x[2] for x in columns[page]) + / float(sum(x[2] for x in columns[page])) ) return row diff --git a/dashboard/forwardlooking.py b/dashboard/forwardlooking.py index e91f4af23a..d3103c56f7 100644 --- a/dashboard/forwardlooking.py +++ b/dashboard/forwardlooking.py @@ -3,6 +3,7 @@ import datetime from data import get_publisher_stats, publisher_name, publishers_ordered_by_title +from ui.jinja2 import round_nicely # Create a variable with the current year as an integer this_year = datetime.date.today().year @@ -87,7 +88,7 @@ def generate_row(publisher): if not int(row["year_columns"][0][year]): row["year_columns"][2][year] = "-" else: - row["year_columns"][2][year] = ( + row["year_columns"][2][year] = round_nicely( float(row["year_columns"][1][year]) / float(row["year_columns"][0][year]) * 100 ) else: diff --git a/dashboard/make_csv.py b/dashboard/make_csv.py index 0ba6a6d2d0..e1abbba0aa 100644 --- a/dashboard/make_csv.py +++ b/dashboard/make_csv.py @@ -14,6 +14,7 @@ import humanitarian import summary_stats import timeliness +from ui.jinja2 import round_nicely logger = logging.getLogger(__name__) @@ -173,7 +174,11 @@ def main(): for row in forwardlooking.table(): writer.writerow( [row["publisher_title"], row["publisher"]] - + [year_column[year] for year_column in row["year_columns"] for year in forwardlooking.years] + + [ + round_nicely(year_column[year]) + for year_column in row["year_columns"] + for year in forwardlooking.years + ] ) for tab in comprehensiveness.columns.keys(): @@ -191,10 +196,13 @@ def main(): writer.writerow( [row["publisher_title"], row["publisher"]] + [ - row[slug + "_valid"] if slug in row else "-" + round_nicely(row[slug + "_valid"]) if slug in row else "-" + for slug in comprehensiveness.column_slugs[tab] + ] + + [ + round_nicely(row[slug]) if slug in row else "-" for slug in comprehensiveness.column_slugs[tab] ] - + [row[slug] if slug in row else "-" for slug in comprehensiveness.column_slugs[tab]] + ["Yes" if row["flag"] else "-"] ) else: @@ -207,10 +215,13 @@ def main(): writer.writerow( [row["publisher_title"], row["publisher"]] + [ - row[slug + "_valid"] if slug in row else "-" + round_nicely(row[slug + "_valid"]) if slug in row else "-" + for slug in comprehensiveness.column_slugs[tab] + ] + + [ + round_nicely(row[slug]) if slug in row else "-" for slug in comprehensiveness.column_slugs[tab] ] - + [row[slug] if slug in row else "-" for slug in comprehensiveness.column_slugs[tab]] ) logger.info("Generating summary_stats.csv") @@ -223,7 +234,11 @@ def main(): for row in summary_stats.table(): # Write each row writer.writerow( - [row["publisher_title"], row["publisher"]] + [row[slug] for slug, header in summary_stats.columns] + [row["publisher_title"], row["publisher"]] + + [ + row[slug] if header == "Publisher Type" else round_nicely(row[slug]) + for slug, header in summary_stats.columns + ] ) logger.info("Generating humanitarian.csv") @@ -250,11 +265,11 @@ def main(): row["publisher"], row["publisher_type"], row["num_activities"], - row["publishing_humanitarian"], - row["humanitarian_attrib"], - row["appeal_emergency"], - row["clusters"], - row["average"], + round_nicely(row["publishing_humanitarian"]), + round_nicely(row["humanitarian_attrib"]), + round_nicely(row["appeal_emergency"]), + round_nicely(row["clusters"]), + round_nicely(row["average"]), ] ) diff --git a/dashboard/summary_stats.py b/dashboard/summary_stats.py index e8b3abad82..f84c2d4a11 100644 --- a/dashboard/summary_stats.py +++ b/dashboard/summary_stats.py @@ -5,6 +5,7 @@ import forwardlooking import timeliness from data import get_publisher_stats, publishers_ordered_by_title, secondary_publishers +from ui.jinja2 import round_nicely # Set column groupings, to be displayed in the user output columns = [ @@ -89,7 +90,7 @@ def table(): timelag_score = 0 # Compute the percentage - row["timeliness"] = (float(frequency_score + timelag_score) / 8) * 100 + row["timeliness"] = round_nicely((float(frequency_score + timelag_score) / 8) * 100) # Compute forward-looking statistic # Get the forward-looking data for this publisher @@ -99,8 +100,8 @@ def table(): numbers = [int(x) for x in publisher_forwardlooking_data["year_columns"][2].values() if is_number(x)] # Compute and store the mean average for these fields - row["forwardlooking"] = sum(int(round(y)) for y in numbers) / len( - publisher_forwardlooking_data["year_columns"][2] + row["forwardlooking"] = round_nicely( + sum(int(round(y)) for y in numbers) / len(publisher_forwardlooking_data["year_columns"][2]) ) # Compute comprehensiveness statistic @@ -111,7 +112,7 @@ def table(): row["comprehensiveness"] = convert_to_float(publisher_comprehensiveness_data["summary_average_valid"]) # Compute score - row["score"] = float(row["timeliness"] + row["forwardlooking"] + row["comprehensiveness"]) / 3 + row["score"] = round_nicely(float(row["timeliness"] + row["forwardlooking"] + row["comprehensiveness"]) / 3) # Return a generator object yield row diff --git a/dashboard/ui/jinja2.py b/dashboard/ui/jinja2.py index 089761320c..e719e5ac2d 100644 --- a/dashboard/ui/jinja2.py +++ b/dashboard/ui/jinja2.py @@ -10,11 +10,13 @@ import timeliness -def round_nicely(val, ndigits=2): +def round_nicely(val, ndigits=0): """Round a float, but remove the trailing .0 from integers that python insists on""" - if int(val) == float(val): + if val == "-": + return val + val = round(float(val), ndigits) + if val == int(val): return int(val) - return round(float(val), ndigits) def xpath_to_url(path): From 612295eab53cb47b984db52c693906833938db9c Mon Sep 17 00:00:00 2001 From: Bee Webb Date: Wed, 4 Dec 2024 11:47:12 +0000 Subject: [PATCH 362/375] templates: Server generated plots and csvs from their own directory We do this instead of using the static directory, as some of django's static files functionality assumes they won't change at run time. --- dashboard/templates/_partials/boxes.html | 4 ++-- dashboard/templates/comprehensiveness_base.html | 2 +- dashboard/templates/elements.html | 4 ++-- dashboard/templates/forwardlooking.html | 2 +- dashboard/templates/humanitarian.html | 2 +- dashboard/templates/publishers.html | 2 +- dashboard/templates/summary_stats.html | 2 +- dashboard/templates/timeliness.html | 2 +- dashboard/templates/timeliness_timelag.html | 2 +- dashboard/ui/settings.py | 1 + dashboard/ui/urls.py | 4 +++- dashboard/ui/views.py | 3 ++- 12 files changed, 17 insertions(+), 13 deletions(-) diff --git a/dashboard/templates/_partials/boxes.html b/dashboard/templates/_partials/boxes.html index 0bb96929cb..7132de6ed9 100644 --- a/dashboard/templates/_partials/boxes.html +++ b/dashboard/templates/_partials/boxes.html @@ -20,9 +20,9 @@

      {% if description %}

      {{ description|safe }}

      {% endif %} - + {% if legend %} - + {% endif %} diff --git a/dashboard/templates/comprehensiveness_base.html b/dashboard/templates/comprehensiveness_base.html index 9ab0d9cf07..da550bc501 100644 --- a/dashboard/templates/comprehensiveness_base.html +++ b/dashboard/templates/comprehensiveness_base.html @@ -31,7 +31,7 @@ {% block content %}
      - (This table as CSV) + (This table as CSV)

      {% block table_title %}Table of Comprehensiveness values{% endblock %}

      diff --git a/dashboard/templates/elements.html b/dashboard/templates/elements.html index e5eb0ccb84..78f867fc73 100644 --- a/dashboard/templates/elements.html +++ b/dashboard/templates/elements.html @@ -6,8 +6,8 @@ diff --git a/dashboard/templates/forwardlooking.html b/dashboard/templates/forwardlooking.html index a25ae94fbe..f1bcd51c58 100644 --- a/dashboard/templates/forwardlooking.html +++ b/dashboard/templates/forwardlooking.html @@ -20,7 +20,7 @@
      - (This table as CSV) + (This table as CSV)

      Activities with Forward Looking Budget Allocations

      diff --git a/dashboard/templates/humanitarian.html b/dashboard/templates/humanitarian.html index 07995d56c3..d9f69c8d44 100644 --- a/dashboard/templates/humanitarian.html +++ b/dashboard/templates/humanitarian.html @@ -10,7 +10,7 @@
      diff --git a/dashboard/templates/publishers.html b/dashboard/templates/publishers.html index 6fa3f483aa..99ce9fd797 100644 --- a/dashboard/templates/publishers.html +++ b/dashboard/templates/publishers.html @@ -13,7 +13,7 @@
      -

      (This table as CSV)

      +

      (This table as CSV)

      List of current active IATI publishers, Click on the publisher name for more details.

      {% include '_partials/tablesorter_instructions.html' %}
      diff --git a/dashboard/templates/summary_stats.html b/dashboard/templates/summary_stats.html index e73b608b8b..ac9703e84d 100644 --- a/dashboard/templates/summary_stats.html +++ b/dashboard/templates/summary_stats.html @@ -12,7 +12,7 @@
      - (This table as CSV) + (This table as CSV)

      Summary Statistics

      diff --git a/dashboard/templates/timeliness.html b/dashboard/templates/timeliness.html index 8d130238fa..d4cc3f77cf 100644 --- a/dashboard/templates/timeliness.html +++ b/dashboard/templates/timeliness.html @@ -7,7 +7,7 @@
      - (This table as CSV) + (This table as CSV)

      Table of Frequency assessments

      diff --git a/dashboard/templates/timeliness_timelag.html b/dashboard/templates/timeliness_timelag.html index 4e40dc9465..8c2741ef5c 100644 --- a/dashboard/templates/timeliness_timelag.html +++ b/dashboard/templates/timeliness_timelag.html @@ -7,7 +7,7 @@
      - (This table as CSV) + (This table as CSV)

      Table of Time lag assessments

      diff --git a/dashboard/ui/settings.py b/dashboard/ui/settings.py index 919fe78b65..78605ff749 100644 --- a/dashboard/ui/settings.py +++ b/dashboard/ui/settings.py @@ -133,6 +133,7 @@ STATICFILES_DIRS = [ BASE_DIR / "static", ] +STATIC_ROOT = "../static" # Default primary key field type # https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field diff --git a/dashboard/ui/urls.py b/dashboard/ui/urls.py index 49ee979545..c407748ad9 100644 --- a/dashboard/ui/urls.py +++ b/dashboard/ui/urls.py @@ -15,6 +15,7 @@ 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) """ +from django.conf.urls.static import static from django.contrib import admin from django.urls import path, re_path from django.views.generic.base import RedirectView @@ -178,4 +179,5 @@ ), re_path(r"element\/\S*.html", RedirectView.as_view(pattern_name="dash-exploringdata-elements", permanent=True)), re_path(r"org_type\/\S*.html", RedirectView.as_view(pattern_name="dash-exploringdata-orgids", permanent=True)), -] +] + static("generated", document_root="../out") +# ^ Serve generated files when using runserver for development diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py index 95e016d10e..339cc7d69f 100644 --- a/dashboard/ui/views.py +++ b/dashboard/ui/views.py @@ -171,7 +171,8 @@ def _make_context(page_name: str): slugs=slugs, datetime_data=dateutil.parser.parse(metadata["created_at"]).strftime("%-d %B %Y (at %H:%M %Z)"), current_year=datetime.datetime.now(datetime.UTC).year, - stats_url="https://stats.codeforiati.org", + stats_url="/stats", + generated_url="/generated", stats_gh_url=STATS_GH_URL, commit_hash=COMMIT_HASH, stats_commit_hash=STATS_COMMIT_HASH, From 656d707628d9652b86841e5abf863d89d211f4e9 Mon Sep 17 00:00:00 2001 From: Bee Webb Date: Fri, 6 Dec 2024 12:02:29 +0000 Subject: [PATCH 363/375] codelists: Use official IATI codelists, instead of Code for IATI --- fetch_data.sh | 2 +- fetch_v2_codelists.py | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/fetch_data.sh b/fetch_data.sh index 10f1605b59..9d72cb8794 100755 --- a/fetch_data.sh +++ b/fetch_data.sh @@ -34,7 +34,7 @@ cd ../../../ # Get codelists for versions v1.x and v2.x of the IATI Standard rm -rf data/IATI-Codelists-1 echo "cloning Codelists-1" -git clone --branch version-1.05 https://github.com/codeforIATI/IATI-Codelists.git data/IATI-Codelists-1 +git clone --branch version-1.05 https://github.com/IATI/IATI-Codelists.git data/IATI-Codelists-1 cd data/IATI-Codelists-1 echo "running gen.sh for Codelist-1" ./gen.sh diff --git a/fetch_v2_codelists.py b/fetch_v2_codelists.py index bcb35137d6..d6060a4fb8 100644 --- a/fetch_v2_codelists.py +++ b/fetch_v2_codelists.py @@ -11,7 +11,10 @@ resp = requests.get('https://codelists.codeforiati.org/api/') codelists = resp.json()['formats']['json']['languages']['en'] for codelist_name, codelist_url in codelists.items(): - codelist_json = requests.get(codelist_url).json() + r = requests.get("http://dev.iatistandard.org/reference_downloads/203/codelists/downloads/clv3/json/en/" + codelist_url.split("/")[-1]) + if r.status_code == 404: + continue + codelist_json = r.json() with open(join(output_path, codelist_name + '.json'), 'w') as f: json.dump(codelist_json, f) @@ -28,8 +31,9 @@ def mapping_to_json(mappings): mapping_urls = [ - 'https://raw.githubusercontent.com/codeforIATI/IATI-Codelists/version-2.03/mapping.xml', - 'https://raw.githubusercontent.com/codeforIATI/Unofficial-Codelists/master/mapping.xml'] + 'https://raw.githubusercontent.com/IATI/IATI-Codelists/version-2.03/mapping.xml', + ] +# 'https://raw.githubusercontent.com/codeforIATI/Unofficial-Codelists/master/mapping.xml'] mappings = [] for mapping_url in mapping_urls: resp = requests.get(mapping_url) From d62fd08322f4fe5b1bb2fcd1998c6ec3cc6b5ddf Mon Sep 17 00:00:00 2001 From: Bee Webb Date: Fri, 6 Dec 2024 14:26:33 +0000 Subject: [PATCH 364/375] settings: Set default SECRET_KEY and DEBUG to safe values --- README.md | 2 +- dashboard/ui/settings.py | 18 ++++++++++++------ requirements.in | 1 + requirements.txt | 2 ++ requirements_dev.txt | 2 ++ 5 files changed, 18 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 9dfc217df5..635cf18139 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,7 @@ cp ../img/publishers static/ From `./dashboard/`: ``` -python manage.py runserver +DEBUG=True python manage.py runserver ``` The Dashboard will now be accessible from `localhost:8000/`. diff --git a/dashboard/ui/settings.py b/dashboard/ui/settings.py index 78605ff749..bb048bc294 100644 --- a/dashboard/ui/settings.py +++ b/dashboard/ui/settings.py @@ -12,18 +12,24 @@ from pathlib import Path +import environ +from django.utils.crypto import get_random_string + # Build paths inside the project like this: BASE_DIR / 'subdir'. BASE_DIR = Path(__file__).resolve().parent.parent +chars = "abcdefghijklmnopqrstuvwxyz0123456789!@#%^&*(-_=+)" +secret_key = get_random_string(50, chars) + +env = environ.Env( # set default values and casting + DEBUG=(bool, False), + SECRET_KEY=(str, secret_key), +) -# Quick-start development settings - unsuitable for production -# See https://docs.djangoproject.com/en/5.1/howto/deployment/checklist/ -# SECURITY WARNING: keep the secret key used in production secret! -SECRET_KEY = "django-insecure-4i8e#n)gr2@wol5i4y@od0za_&y+idpz*-t5v)#-6s(&rhl=k&" +SECRET_KEY = env("SECRET_KEY") -# SECURITY WARNING: don't run with debug turned on in production! -DEBUG = True +DEBUG = env("DEBUG") ALLOWED_HOSTS = [] diff --git a/requirements.in b/requirements.in index 488212fa86..98d9f59faf 100644 --- a/requirements.in +++ b/requirements.in @@ -1,4 +1,5 @@ django +django-environ gunicorn flask frozen-flask diff --git a/requirements.txt b/requirements.txt index d12165c3e6..fc5f9b544c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,6 +20,8 @@ cycler==0.12.1 # via matplotlib django==5.1.3 # via -r requirements.in +django-environ==0.11.2 + # via -r requirements.in elementpath==4.6.0 # via xmlschema flask==3.0.3 diff --git a/requirements_dev.txt b/requirements_dev.txt index 013f9c844b..ee37e0cac8 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -43,6 +43,8 @@ cycler==0.12.1 # matplotlib django==5.1.3 # via -r requirements.txt +django-environ==0.11.2 + # via -r requirements.txt docopt==0.6.2 # via coveralls elementpath==4.6.0 From f79e1fbc99423684b614e6e9050931ae52d1d2f5 Mon Sep 17 00:00:00 2001 From: Bee Webb Date: Fri, 6 Dec 2024 16:31:23 +0000 Subject: [PATCH 365/375] timeliness: Fix yellow flags Port this fix I made in the IATI-Publishing-Statistics repo: https://github.com/IATI/IATI-Publishing-Statistics/commit/35fd39ca80a78d15099e5d394ec4dbdd5b49a7c7 --- dashboard/timeliness.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dashboard/timeliness.py b/dashboard/timeliness.py index f1ae1bcbc3..293e5b2d0c 100644 --- a/dashboard/timeliness.py +++ b/dashboard/timeliness.py @@ -228,8 +228,12 @@ def has_future_transactions(publisher): gitaggregate_publisher = JSONDir(filepaths.join_stats_path("gitaggregate-publisher-dated")).get(publisher, {}) mindate = datetime.date(today.year - 1, today.month, 1) - for date, activity_blacklist in gitaggregate_publisher.get("activities_with_future_transactions", {}).items(): - if parse_iso_date(date) >= mindate and activity_blacklist: + for date_string, latest_transaction_date_string in gitaggregate_publisher.get( + "latest_transaction_date", {} + ).items(): + date = parse_iso_date(date_string) + latest_transaction_date = parse_iso_date(latest_transaction_date_string) + if date >= mindate and latest_transaction_date and latest_transaction_date > date: return 1 return 0 From 95df46e5178c5c499db568a37a4f4a31706bf36a Mon Sep 17 00:00:00 2001 From: Bee Webb Date: Fri, 6 Dec 2024 16:47:44 +0000 Subject: [PATCH 366/375] publisher: Fix a url to the json stats --- dashboard/templates/publisher.html | 2 +- dashboard/ui/views.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/dashboard/templates/publisher.html b/dashboard/templates/publisher.html index f9ae0f34c4..3daa664da1 100644 --- a/dashboard/templates/publisher.html +++ b/dashboard/templates/publisher.html @@ -4,7 +4,7 @@ Publisher: {{ publisher_name[publisher] }} {{ super() }} {% endblock %} {% block page_header %} -(Publisher Stats JSON) +(Publisher Stats JSON)

      Publisher: {{ publisher_name[publisher] }}

      {% endblock %} diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py index 339cc7d69f..be0504fa9b 100644 --- a/dashboard/ui/views.py +++ b/dashboard/ui/views.py @@ -50,7 +50,6 @@ .stdout.decode() .strip() ) -STATS_GH_URL = "https://github.com/codeforIATI/IATI-Stats-public/tree/" + STATS_COMMIT_HASH # Load all the licenses and generate data for each licence and publisher. with open(filepaths.join_stats_path("licenses.json")) as handler: @@ -173,7 +172,6 @@ def _make_context(page_name: str): current_year=datetime.datetime.now(datetime.UTC).year, stats_url="/stats", generated_url="/generated", - stats_gh_url=STATS_GH_URL, commit_hash=COMMIT_HASH, stats_commit_hash=STATS_COMMIT_HASH, func={ From 758c8032072b8e9317acab8186444eee3faee664 Mon Sep 17 00:00:00 2001 From: Bee Webb Date: Fri, 8 Nov 2024 14:18:22 +0000 Subject: [PATCH 367/375] caching: Only load the publisher list into memory once --- dashboard/data.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/dashboard/data.py b/dashboard/data.py index 3296ac143b..2db818fc41 100644 --- a/dashboard/data.py +++ b/dashboard/data.py @@ -28,6 +28,9 @@ def wrapper(self, key): return wrapper +PUBLISHER_LIST = None + + class JSONDir(MutableMapping): """Produces an object, to be used to access JSON-formatted publisher data and return this as an ordered dictionary (with nested dictionaries, if appropriate). @@ -115,13 +118,18 @@ def get_publisher_name(self): Note, this is a super hacky way to do this, prize available if a better way is found to do this! """ + global PUBLISHER_LIST + + if PUBLISHER_LIST is None: + PUBLISHER_LIST = JSONDir(filepaths.join_stats_path('current/aggregated-publisher')).keys() + # Get a list of the parts that are contained within this filepath path = os.path.normpath(self.folder) path_components = path.split(os.sep) # Loop over this list and return the publisher name if it is found within the historic list of publishers for x in path_components: - if x in JSONDir(filepaths.join_stats_path("current/aggregated-publisher")).keys(): + if x in PUBLISHER_LIST: return x # If got to the end of the loop and nothing found, this folder does not relate to a single publisher From 05b084872ad739d2f368db2e846732420d9ea27f Mon Sep 17 00:00:00 2001 From: Bee Webb Date: Mon, 9 Dec 2024 16:47:56 +0000 Subject: [PATCH 368/375] caching: Cache some data as json files --- dashboard/cache.py | 39 ++++++++++++++++++++++++++++++++++ dashboard/comprehensiveness.py | 2 ++ dashboard/create_caches.py | 16 ++++++++++++++ dashboard/data.py | 2 +- dashboard/forwardlooking.py | 2 ++ dashboard/humanitarian.py | 2 ++ dashboard/summary_stats.py | 2 ++ dashboard/timeliness.py | 5 ++++- dashboard/ui/settings.py | 4 +++- git.sh | 6 +++++- 10 files changed, 76 insertions(+), 4 deletions(-) create mode 100644 dashboard/cache.py create mode 100644 dashboard/create_caches.py diff --git a/dashboard/cache.py b/dashboard/cache.py new file mode 100644 index 0000000000..397e717cd6 --- /dev/null +++ b/dashboard/cache.py @@ -0,0 +1,39 @@ +import json +import os + +import django.core.exceptions +from django.conf import settings + +os.makedirs("cache", exist_ok=True) + + +# JSON cache for a function with no arguments +def json_cache(fname): + fname = os.path.join("cache", fname) + + def decorator(f): + def wrapper(): + # Check in memory cache first + if hasattr(f, "__cache"): + return f.__cache + is_json_file = os.path.isfile(fname) + if is_json_file: + with open(fname) as fp: + try: + res = json.load(fp) + except json.decoder.JSONDecodeError: + is_json_file = False + if not is_json_file: + res = list(f()) + try: + if settings.DASHBOARD_CREATE_CACHE_FILES: + with open(fname, "w") as fp: + json.dump(res, fp) + except django.core.exceptions.ImproperlyConfigured: + pass + f.__cache = res + return res + + return wrapper + + return decorator diff --git a/dashboard/comprehensiveness.py b/dashboard/comprehensiveness.py index f8add7fc0c..d740a45b9e 100644 --- a/dashboard/comprehensiveness.py +++ b/dashboard/comprehensiveness.py @@ -1,5 +1,6 @@ # This file converts raw comprehensiveness data to percentages, and calculates averages. +from cache import json_cache from data import get_publisher_stats, publisher_name, publishers_ordered_by_title from ui.jinja2 import round_nicely @@ -193,6 +194,7 @@ def generate_row(publisher): return row +@json_cache("comprehensiveness.json") def table(): """Generate comprehensiveness table data for every publisher and return as a generator object""" diff --git a/dashboard/create_caches.py b/dashboard/create_caches.py new file mode 100644 index 0000000000..fc402799fd --- /dev/null +++ b/dashboard/create_caches.py @@ -0,0 +1,16 @@ +from django.conf import settings + +import comprehensiveness +import forwardlooking +import humanitarian +import summary_stats +import timeliness + +settings.configure(DASHBOARD_CREATE_CACHE_FILES=True) + +timeliness.publisher_frequency() +timeliness.publisher_timelag() +forwardlooking.table() +comprehensiveness.table() +summary_stats.table() +humanitarian.table() diff --git a/dashboard/data.py b/dashboard/data.py index 2db818fc41..d168a300f3 100644 --- a/dashboard/data.py +++ b/dashboard/data.py @@ -121,7 +121,7 @@ def get_publisher_name(self): global PUBLISHER_LIST if PUBLISHER_LIST is None: - PUBLISHER_LIST = JSONDir(filepaths.join_stats_path('current/aggregated-publisher')).keys() + PUBLISHER_LIST = JSONDir(filepaths.join_stats_path("current/aggregated-publisher")).keys() # Get a list of the parts that are contained within this filepath path = os.path.normpath(self.folder) diff --git a/dashboard/forwardlooking.py b/dashboard/forwardlooking.py index d3103c56f7..20a076cb25 100644 --- a/dashboard/forwardlooking.py +++ b/dashboard/forwardlooking.py @@ -2,6 +2,7 @@ import datetime +from cache import json_cache from data import get_publisher_stats, publisher_name, publishers_ordered_by_title from ui.jinja2 import round_nicely @@ -101,6 +102,7 @@ def generate_row(publisher): return row +@json_cache("forwardlooking.json") def table(): """Generate forward-looking table data for every publisher and return as a generator object""" diff --git a/dashboard/humanitarian.py b/dashboard/humanitarian.py index 3d93509c5a..ac10c7b736 100644 --- a/dashboard/humanitarian.py +++ b/dashboard/humanitarian.py @@ -1,6 +1,7 @@ # This file builds a table to show humanitarian reporting for each publisher import common +from cache import json_cache from data import get_publisher_stats, publishers_ordered_by_title # Set column groupings, to be displayed in the user output @@ -16,6 +17,7 @@ ] +@json_cache("humanitarian.json") def table(): """Generate data for the humanitarian table""" diff --git a/dashboard/summary_stats.py b/dashboard/summary_stats.py index f84c2d4a11..9c099c064c 100644 --- a/dashboard/summary_stats.py +++ b/dashboard/summary_stats.py @@ -4,6 +4,7 @@ import comprehensiveness import forwardlooking import timeliness +from cache import json_cache from data import get_publisher_stats, publishers_ordered_by_title, secondary_publishers from ui.jinja2 import round_nicely @@ -35,6 +36,7 @@ def convert_to_float(x): return 0 +@json_cache("summary_stats.json") def table(): """Generate data for the publisher forward-looking table""" diff --git a/dashboard/timeliness.py b/dashboard/timeliness.py index 293e5b2d0c..059a1850d1 100644 --- a/dashboard/timeliness.py +++ b/dashboard/timeliness.py @@ -6,6 +6,7 @@ from dateutil.relativedelta import relativedelta import filepaths +from cache import json_cache from data import JSONDir, get_publisher_stats, get_registry_id_matches, publisher_name @@ -56,6 +57,7 @@ def previous_months_generator(d): this_year = datetime.datetime.today().year +@json_cache("timeliness_frequency.json") def publisher_frequency(): """Generate the publisher frequency data""" @@ -185,11 +187,12 @@ def first_published_band_index(first_published_band): ].index(first_published_band) +@json_cache("timeliness_timelag.json") def publisher_timelag(): return [ ( publisher, - publisher_name.get(publisher), + publisher_name.get(publisher, ""), agg["transaction_months_with_year"], agg["timelag"], has_future_transactions(publisher), diff --git a/dashboard/ui/settings.py b/dashboard/ui/settings.py index bb048bc294..194d758631 100644 --- a/dashboard/ui/settings.py +++ b/dashboard/ui/settings.py @@ -31,7 +31,7 @@ DEBUG = env("DEBUG") -ALLOWED_HOSTS = [] +ALLOWED_HOSTS = [".dashboard.iatistandard.org", "testserver", "localhost"] # Application definition @@ -153,3 +153,5 @@ DASHBOARD_DATA_DIRECTORY = "../data" DASHBOARD_BASE_DIRECTORY = "../" DASHBOARD_OUT_DIRECTORY = "../out" + +DASHBOARD_CREATE_CACHE_FILES = False diff --git a/git.sh b/git.sh index c6c0e70178..34dd8c8f5b 100755 --- a/git.sh +++ b/git.sh @@ -17,9 +17,13 @@ python make_plots.py || exit 1 echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running make_csv.py" python make_csv.py || exit 1 -echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running speakers kit.py" +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running speakers_kit.py" python speakers_kit.py || exit 1 +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running create_caches.py" +rm -r cache +python create_caches.py || exit 1 + cd .. echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Make a backup of the old web directory and make new content live" From 9cb0486377ff6c729a594f1af0f7c10ac1cd3ab8 Mon Sep 17 00:00:00 2001 From: Bee Webb Date: Mon, 9 Dec 2024 16:48:25 +0000 Subject: [PATCH 369/375] templates: Check that a publisher exists before we try and look up a url Without this the templates are a lot slower. --- dashboard/templates/dates.html | 2 +- dashboard/templates/files.html | 5 +++-- dashboard/templates/timeliness_timelag.html | 2 +- dashboard/templates/traceability.html | 2 +- dashboard/templates/xml.html | 10 ++++++---- 5 files changed, 12 insertions(+), 9 deletions(-) diff --git a/dashboard/templates/dates.html b/dashboard/templates/dates.html index 7d9a613940..b08997eb48 100644 --- a/dashboard/templates/dates.html +++ b/dashboard/templates/dates.html @@ -19,7 +19,7 @@ {% for publisher_title,publisher in publishers_ordered_by_title %} {% set publisher_stats = func.get_publisher_stats(publisher) %}

      {{ publisher_title }}{{ publisher_title }} {% if publisher_stats.date_extremes.min.overall %}{{ publisher_stats.date_extremes.min.overall }}{% endif %} {% if publisher_stats.date_extremes.max.overall %}{{ publisher_stats.date_extremes.max.overall }}{% endif %} {% if publisher_stats.date_extremes.max.by_type['start-actual'] %}{{ publisher_stats.date_extremes.max.by_type['start-actual'] }}{% endif %}
      {{ publisher_name[func.dataset_to_publisher(package[:-4])] }}{{ package[:-4] }}{{ publisher_name[func.dataset_to_publisher(package)] }}{{ package }} {{ activities }} {{ current_stats.inverted_file.organisations.get(package) }} {{ current_stats.inverted_file.file_size.get(package)|filesizeformat }}
      {{ publisher_title }}{{ publisher_title }}{{ per_month[month] or 0 }}
      {{ publisher_title }}{{ publisher_title }} {%- if publisher_stats.traceable_activities_by_publisher_id -%} {{ '{:,}'.format(publisher_stats.traceable_activities_by_publisher_id) }} diff --git a/dashboard/templates/xml.html b/dashboard/templates/xml.html index 46c9da4a74..a89979002d 100644 --- a/dashboard/templates/xml.html +++ b/dashboard/templates/xml.html @@ -28,8 +28,9 @@

      Files where XML is not well-formed

      {% for dataset, invalid in current_stats.inverted_file.invalidxml.items() %} {% if invalid %}
      {{ func.dataset_to_publisher(dataset[:-4]) }}{{ dataset[:-4] }}{{ publisher }}{{ dataset }}
      {{ func.dataset_to_publisher(dataset[:-4]) }}{{ dataset[:-4] }}{{ publisher }}{{ dataset }}