diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000000..a7cba45f3d --- /dev/null +++ b/.coveragerc @@ -0,0 +1,3 @@ +[run] +omit = + tests/* diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000..4ce053e2ab --- /dev/null +++ b/.dockerignore @@ -0,0 +1,5 @@ +data +out +web +stats-calculated +dashboard/cache diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index bbcbbe7d61..0214585569 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -8,7 +8,7 @@ assignees: '' --- **Is your feature request related to a problem? Please describe.** -A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] +A clear and concise description of what the problem is. **Describe the solution you'd like** A clear and concise description of what you want to happen. diff --git a/.github/ISSUE_TEMPLATE/other.md b/.github/ISSUE_TEMPLATE/other.md deleted file mode 100644 index 9d633eb4d2..0000000000 --- a/.github/ISSUE_TEMPLATE/other.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -name: Other -about: A blank template -title: '' -labels: '' -assignees: '' - ---- - - diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000000..db38879c06 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,41 @@ +name: CI +on: [push, pull_request] +jobs: + ci: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.12 + uses: actions/setup-python@v4 + with: + python-version: 3.12 + - uses: actions/cache@v3 + name: Cache dependencies + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements_dev.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements_dev.txt + - name: Lint - flake8 + run: flake8 + - name: Lint - black + run: black . --check + - name: Lint - isort + run: isort . --check-only + - name: Fetch data + run: ./fetch_data.sh + - name: Fetch stats + run: | + mkdir out + ./mock_stats_for_unit_tests.sh + - name: Run tests + run: pytest --cov . + # - name: Coveralls + # run: coveralls --service=github-actions + # env: + # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} diff --git a/.gitignore b/.gitignore index 3554e82638..52a1db6179 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,142 @@ -*.pyc -*.swp - -pyenv -stats-calculated* -data +/stats-calculated/ +/data/ out + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ diff --git a/CHECKLIST.rst b/CHECKLIST.rst deleted file mode 100644 index babc3f386b..0000000000 --- a/CHECKLIST.rst +++ /dev/null @@ -1,139 +0,0 @@ -IATI Dashboard Checklist -======================== - -https://github.com/IATI/IATI-Developer-Documentation/blob/master/code-checklist.rst - -We should know which code is 'ours' ------------------------------------ - -This is ours - -All code should have a lead person identified ---------------------------------------------- - -Dale Potter - `https://github.com/dalepotter `__  - -Our projects/code should be appropriately branded. --------------------------------------------------- - -In absence of branding guidance this uses a plain twitter bootstrap -theme. - -Our code/projects should be in version control and present links to issue trackers  and source code. ----------------------------------------------------------------------------------------------------- - -`https://github.com/IATI/IATI-Dashboard/ `__ - -Each piece of code should have a document(!), a roadmap, and estimate of resources, and a licence -------------------------------------------------------------------------------------------------- - -Released under the GPL license - -We should be confident that updates to our code will not break existing functionality -------------------------------------------------------------------------------------- - -It’s a static site generator, so if we check the output is okay, it’s -fine. - -It would be good to have an automated process of doing this, but there -currently isn’t. - -It should make sense in the way people access our tools/code ------------------------------------------------------------- - -At http://dashboard.iatistandard.org - -Our code should be on our servers - we should be able to monitor the performance of those servers -------------------------------------------------------------------------------------------------- - -All the code for this is now on IATI servers. - -We don't have any particular performance monitoring set up. - -We should know how our code is being used - logs! -------------------------------------------------- - -We have google analytics, and the server has web logs for page accesses, and logs of the complete process. - -Our code will need to adapt with schema changes and changes to external systems upon which it relies ----------------------------------------------------------------------------------------------------- - -This relies on the stats generated by a different piece of software -(`IATI-Stats `__), which itself -relies on the Standard and the CKAN API. If those statistics are -generated differently (or output differently) then this code would be -affected. - -It also relies on the Github API (v3), and Github gist. - -The code relies on python modules, which may change over time. This may -have implications for the server requirements to host the application. - -Developers should be able to find useful resources and help easily ------------------------------------------------------------------- - -- Link to the source code and issue tracker are in the footer. -- Javascript support tab is in place that links to Zendesk. - -Each project should clearly describe how other developers can get involved -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Has a CONTRIBUTING.rst file - -We should be able to communicate with the users of our code. ------------------------------------------------------------- - -There is room on the homepage for notices. - -There is a forum on support.iatistandard.org for consultation about the Dashboard Publishing Statistics tabs - http://support.iatistandard.org/forums/21204695-Dashboard-Publishing-Statistics - -Nothing more general is in place. - -Users should be able to communicate with us about our code ----------------------------------------------------------- - -The Support tab is in place. - -If people visit the source code pages they can contact the team there. - -We should protect our users privacy ------------------------------------ - -- This is just a website, same concerns as our other websites. -- No logins, or collection of users data -- The Dashboard sets cookies. Cookie code should be considered. -- Terms and conditions have not been written and may need to be. - -We should be clear about how we work with contractors ------------------------------------------------------ - -N/A - -If our code works with IATI data, have we considered how it will work as the IATI datasets grow, both in terms of individual file size and as a corpus ------------------------------------------------------------------------------------------------------------------------------------------------------- - -This could become a problem for the Dashboard, as it relies on running -code across the entire dataset. Currently the complete process -(including download and stats code) for generating the dashboard takes several hours. - -Our code should be secure -------------------------- - -The dashboard doesn’t pose any new security concerns because it’s all a -static site. - -We should know that our deployed code is working properly ---------------------------------------------------------- - -As this is a static site it is either there or it is not. - -If any of scripts return a non-zero exit status the site will not be updated. -So at worst an out of date site will be displayed, but not a broken site. - -We use travis to check whether is has updated by noon in a given day - -https://github.com/IATI/IATI-Website-Tests - -There is a concern for knowing that the dashboard is accurately -displaying what we think it should. As it relies on number of other -services it could be displaying inaccurate data. - diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000000..02f29ea5b3 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,61 @@ +# Contributing + +If you would like to contribute to the IATI Dashboard you can.... + +* Send us feedback about your user experience. You can find our [contact details on the IATI Organisation page on GitHub](https://github.com/IATI) +* Report bugs +* Request new features +* Contribute code or documents to improve the application. See the list of specific tasks below. + + +## How to report a bug or request a feature + +If you are able to work with GitHub then please [create an issue](https://github.com/IATI/IATI-Dashboard/issues/new/choose). + +Before creating a new issue [check to see if the issue already exists](https://github.com/IATI/IATI-Dashboard/issues/). If not then please do create it. + +If you are not comfortable working with GitHub, but would still like to contribute, then talk to us. You can reach us via the central [IATI Developer Documentation pages](https://iatistandard.org/en/guidance/developer/). + + +## How to contribute code and documents + +### How we use branches in this repository + +* `live` represents the branch we are currently using for our deployed instance of the code. +* Eventually a `develop` branch will be for development work that is not yet live. +* Other branches represent development work or bug fixes. + +### Submitting changes + +* Fork this repository (if you haven't previously) +* Make sure you're working on top of an up to date copy of the `live` branch +* Create a branch named after the work you're doing (if you're targeting a specific issue, start the branch name with the issue number e.g. `42-feature-name`). +* Do your work, creating atomic commits as you go. If your work addresses a specific issue, reference that issue in your commit message using the full URL to the issue. Please name your commits starting with a one-word description of the commit, e.g., *fix*, *update*, *refactor*. +* Create a pull request against `develop`. + +## Specific Tasks: + +### Deployment + +If you find any issues in deploying your own version of the code we'd love to hear about it and try to improve our deployment documentation. + +### User Interface + +Can you improve the user interface? Can you make it look 'nicer'? + +### Documentation + +We would welcome any improvements to how the code or the application is documented. + +### Statistics + +Can you check that the generated statstics are telling us what we think they are telling us? +Do you have other tests/statistics that we could be generating? + +### Unit Tests + +Can you improve the unit testing to make deployment more robust? + +## Talk to us + +We'd love to hear from you. You can find our [contact details at the main IATI GitHub page](https://github.com/IATI). diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst deleted file mode 100644 index 4e04cfd772..0000000000 --- a/CONTRIBUTING.rst +++ /dev/null @@ -1,70 +0,0 @@ -If you would like to contribute to the IATI Dashboard project, you can.... - -* Send us feedback about your user experience. Contact details at: https://github.com/IATI -* Report bugs -* Request new features -* Contribute code or documents to improve the application. See the list of specific tasks below. - -How to report a bug or request a feature -======================================== -If you are able to work with GitHub then please "Create an issue" - -Before creating a new issue check to see if the issue already exists. If not then please do report it. - -If you are not comfortable working with GitHub, but would still like to contribute, then talk to us. Details at: https://github.com/IATI - - -How to contribute code and documents -==================================== - -How we use branches in this repository --------------------------------------- - -* master represents our main development branch -* live is the branch we are currently using for our deployed instance of the code -* Other branches represent development work or bug fixes. - -Submitting changes ------------------- - -* Fork this repository (if you haven't previously) -* Make sure you're working on top of an up to date copy of IATI's master branch - - Create a branch named after the work you're doing (if you're targeting a specific issue, start the branch name with the issue number e.g. ``42-feature-name``) -* Do your work - - If your work addresses a specific issue, reference that issue in your commit message by starting the commit message with ``[#issue number]`` e.g. ``[#64]`` -* Create a pull request - -Specific Tasks: -=============== - -Deployment ----------- -If you find any issues in deploying your own version of the code we'd love to hear about it and try to improve our deployment documentation. - -User Interface --------------- -Can you improve the user interface? Can you make it look 'nicer'? - -Documentation -------------- -We would welcome any improvements to how the code or the application is documented. - -Statistics ----------- -Can you check that the generated statstics are telling us what we think they are telling us? -Do you have other tests/statistics that we could be generating? - -Unit Tests ----------- -Can you improve the unit testing to make deployment more robust? - -Fix a Bitesize issue --------------------- -We mark some of issues as 'Bitesize'. Generally these will help ease you into the code and help you find your way around. - -Talk to us -========== -We'd love to hear from you. Details at: https://github.com/IATI - - -For general guidance on contributing to IATI Code please see http://iatistandard.org/developer/contribute/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000..fc8b288d98 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,23 @@ +FROM python:3.12-bookworm + +WORKDIR /work/IATI-Dashboard/dashboard + +COPY requirements.txt /work/IATI-Dashboard/requirements.txt + +RUN git config --global --add safe.directory /work/IATI-Stats/data + +RUN pip install -r /work/IATI-Dashboard/requirements.txt + +COPY . /work/IATI-Dashboard + +# 2024-03-20: Emergency fix +# We were seeing cert errors inside the docker container after a new Lets Encrypt was issued. +# +# We know there are changes coming about root certificates and the error may be caused by that: +# https://blog.cloudflare.com/upcoming-lets-encrypt-certificate-chain-change-and-impact-for-cloudflare-customers +# +# I tried installing the LE root cert's manually but that didn't work. +# As live is broken for now we need this emergency fix, but we should remove it in the future. +RUN echo "check_certificate=off" > /root/.wgetrc + +CMD ["gunicorn", "--bind", "0.0.0.0:8000", "--timeout", "120", "ui.wsgi:application"] diff --git a/GPL.md b/LICENSE.md similarity index 100% rename from GPL.md rename to LICENSE.md diff --git a/README.md b/README.md new file mode 100644 index 0000000000..635cf18139 --- /dev/null +++ b/README.md @@ -0,0 +1,139 @@ +IATI Dashboard +============== + +[![Coverage Status](https://coveralls.io/repos/github/IATI/IATI-Dashboard/badge.svg?branch=merge-codeforiati-and-publishingstats)](https://coveralls.io/github/IATI/IATI-Dashboard?branch=merge-codeforiati-and-publishingstats) +[![GPLv3 License](https://img.shields.io/badge/license-GPLv3-blue.svg)](https://github.com/IATI/IATI-Dashboard/blob/merge-codeforiati-and-publishingstats/LICENSE.md) + +## Summary + + Product | IATI Dashboard +--- | --- +Description | A Django web application that provides key numbers, statistics and graphs about the data on the [IATI registry](http://iatiregistry.org/). This repository is currently a development version where the IATI Dashboard/Publishing Statistics and Code for IATI Analytics are being merged. +Website | Development only; see [IATI Dashboard](https://dashboard.iatistandard.org), and [Code for IATI Analytics](https://analytics.codeforiati.org) for live versions. +Related | Repositories for the [live version of the IATI Dashboard](https://github.com/IATI/IATI-Dashboard), [live version of the IATI Publishing Stats](https://github.com/IATI/IATI-Publishing-Statistics), and [Code for IATI Analytics](https://github.com/codeforIATI/analytics). Data is generated from [Code for IATI Stats](https://github.com/codeforIATI/IATI-Stats). +Documentation | Rest of README.md +Technical Issues | See https://github.com/IATI/IATI-Dashboard/issues +Support | https://iatistandard.org/en/guidance/get-support/ + +## High-level requirements + +* Python 3.12 +* Unix-based setup (e.g., Linux, MacOS X) with `bash`, `wget` and `curl` installed. +* Development files for libfreetype, libpng, libxml and libxslt e.g. ``libfreetype6-dev libpng-dev libxml2-dev libxslt-dev``. + +## Running the app locally +### Overview +The IATI Dashboard is mostly written in Python but also has some helper Bash scripts to collect the data that the dashboard uses. Top-level steps required to run the Dashboard are: + +1. Setup Python environment and install dependencies. +2. Fetch the data. +3. Build the static graphs and other data that will be served via the Dashboard. +4. Run the web server. + +Paths to different directories are set in `./dashboard/config.py`. + +### 1. Setup environment + +Assuming that this repository has been cloned and you are in the root directory of the repository. + +``` +# Setup and activate a virtual environment (recommended) - here we use virtualenv +virtualenv ve +source ve/bin/activate +``` + +Now install the dependencies. + +``` +pip install -r requirements.txt +``` + +### 2. Fetching the data + +Bash scripts are used to fetch the data that the Dashboard will present. They will store data in `./data` and `./stats-calculated`. + +``` +# Fetch the necessary calculated stats +./get_stats.sh + +# Fetch some extra data from github and github gists and other sources on the internet +./fetch_data.sh +``` + +### 3. Build static data and graphs and copy to static + +``` +mkdir out +cd dashboard +python make_plots.py +python make_csv.py +python speakers_kit.py +cp ../out/data static/ +cp ../img/aggregate static/ +cp ../img/publishers static/ +``` + +### 4. Run the webserver. + +From `./dashboard/`: + +``` +DEBUG=True python manage.py runserver +``` + +The Dashboard will now be accessible from `localhost:8000/`. + + +## Development + +### Automated tests +There are some unit tests written using `pytest` and site testing using Django's own testing framework. + +Once the development dependencies have been installed the unit tests can be run with: + +``` +pytest +``` + +The Django site tests can be run from the `dashboard/` directory with: + +``` +python manage.py test -v 2 +``` + +### Calculating your own statistics + +The IATI Dashboard requires a `stats-calculated` directory, which can be downloaded using the `get_stats.sh` shell script as described above. This can also be calculated using [Code for IATI Stats](http://github.com/codeforIATI/IATI-Stats) where `stats-calculated` corresponds to the `gitout` directory generated by [`git.sh` in IATI-Stats](https://github.com/codeforIATI/IATI-Stats#running-for-every-commit-in-the-data-directory). + +Often you only want to regenerate the current stats, use `get_stats.sh` to download the pre-calculated historical stats and just replace the `stats-calculated/current directory` with the `out` directory produced by running the [loop, aggregate and invert commands individually](https://github.com/codeforIATI/IATI-Stats#getting-started), then regenerate graphs and CSV files as per the above. + +### Adding new dependencies + +If a change requires new dependencies then please add to `requirements.in` or `requirements_dev.in` as appropriate and recompile: + +``` +pip-compile requirements_dev.in +pip-compile requirements.in +``` + +### Linting + +Code linting is carried out using [Flake8](https://flake8.pycqa.org/en/latest/) and `setup.cfg` has the configuration. + +## License + Copyright (C) 2013-2015 Ben Webb + Copyright (C) 2013-2014 David Carpenter + Copyright (C) 2021 Andy Lulham + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . diff --git a/README.rst b/README.rst deleted file mode 100644 index 71f21a1537..0000000000 --- a/README.rst +++ /dev/null @@ -1,134 +0,0 @@ -IATI Dashboard -============== - -.. image:: https://requires.io/github/IATI/IATI-Dashboard/requirements.svg?branch=master - :target: https://requires.io/github/IATI/IATI-Dashboard/requirements/?branch=master - :alt: Requirements Status -.. image:: https://img.shields.io/badge/license-GPLv3-blue.svg - :target: https://github.com/IATI/IATI-Dashboard/blob/master/GPL.md - -Introduction ------------- - -The IATI dashboard displays key numbers and graphs about the data on the `IATI registry `__. - -See the Dashboard in action at http://dashboard.iatistandard.org - -The Dashboard is in beta, all contents/urls/machine readable downloads are subject to change. - -This repository is the code for the Dashboard frontend. Stats are generated from the Registry by stats code in a separate repository - https://github.com/IATI/IATI-Stats - -Technology Overview -^^^^^^^^^^^^^^^^^^^ - -The dashboard mostly in Python, with some helper Bash scripts. - -Python scripts: - -* ``make_html.py`` contains is a Flask application that makes use of Frozen Flask to generate some static HTML. -* ``make_csv.py`` generates CSV files. -* ``plots.py`` generates static images of graphs using matplotlib. - -Bash helper scripts: - -* The main source of data for the Dashboard is stats generated by `IATI-Stats `_ (about the data on the IATI Registry). ``get_stats.sh`` can be used to fetch a recently calculated copy of these stats. (or see calculating your own stats section below) -* The Dashboard also uses various other data from online sources (including GitHub). These can be fetched using ``fetch_data.sh``. -* ``git.sh`` runs all the above commands, see Usage below. - -Installation -^^^^^^^^^^^^ - -Requirements: - -* Unix based setup (e.g. Linux, Mac OS X) with bash etc. -* wget and curl installed -* Python 2.7 -* Python dev library ``python-dev`` -* python-virtualenv (optional) -* Development files for libfreetype, libpng, libxml and libxslt e.g. ``libfreetype6-dev libpng-dev libxml2-dev libxslt-dev``. - - (alternatively, you may be able to install some of the python dependencies in - requirements.txt using your package manager) - - -To install: - -.. code-block:: bash - - ## Get the code - git clone https://github.com/IATI/IATI-Dashboard.git - cd IATI-Dashboard - - ## Set up a virtual environment (recommended) - # Create a virtual environment - virtualenv pyenv - # Activate the virtual environment - # (you need to this every time you open a new terminal session) - source pyenv/bin/activate - - ## Install python dependencies - ## Use pip as described below, or your distro's package manager to install - ## the dependcies in requirements.txt - # If you are running a less recent linux distro, you will need to install distribute - easy_install -U distribute - pip install -r requirements.txt - -Usage -^^^^^ - -The following steps are performed routinely on our `deployed Dashboard `__. (On our servers, the Dashboard is actually deployed using `this salt file `__. - -.. code-block:: bash - - # Fetch the necessary calculated stats - ./get_stats.sh - # Fetch some extra data from github and github gists - ./fetch_data.sh - - mkdir out - python plots.py - python make_csv.py - python make_html.py - -make_html.py will output a MissingURLGeneratorWarning. This is expected, as some of the URLs defined are for the live development server only (see below). - -The full list of steps for our deployment can be found in ``git.sh``. (The name of this is now a misnomer as the output is no longer a git repository - previously a commit was pushed to GitHub pages.) - -Development -^^^^^^^^^^^ - -For development, you can use the live Flask development server, instead of Frozen Flask. - -.. code-block:: bash - - python make_html.py --live - -Using the live development server is highly recommended, because it displays full bracktraces for 500 errors, whereas frozen flask does not. - -Calculating your own stats for the dashboard -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The Dashboard requires a `stats-calculated` directory, which can be downloaded using the `get_stats.sh` shell script as described above, or calculated yourself using http://github.com/IATI/IATI-Stats . `stats-calculated` corresponds to the `gitout` directory generated by `IATI-Stat's git.sh `__. - -Often you only want to regenerate the current stats, use `get_stats.sh` to download the pre-calculated historical stats and just replace the stats-calculated/current directory with the out directory produced by running the `loop, aggregate and invert commands individually `__. - -License -^^^^^^^ - -:: - - Copyright (C) 2013-2015 Ben Webb - Copyright (C) 2013-2014 David Carpenter - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . diff --git a/TODO b/TODO deleted file mode 100644 index 61aeaeb723..0000000000 --- a/TODO +++ /dev/null @@ -1,2 +0,0 @@ -http://localhost:5000/codelist/1/@xml:lang.html and http://localhost:5000/codelist/2/@xml:lang.html are showing the same thing -Values used columns on http://localhost:5000/codelists.html is much too skewed diff --git a/coverage.py b/coverage.py deleted file mode 100644 index fccaee5dfa..0000000000 --- a/coverage.py +++ /dev/null @@ -1,202 +0,0 @@ -# This file converts a range coverage data to variables which can be outputted on the coverage page -import csv -from data import get_publisher_stats -from data import get_registry_id_matches -from data import publisher_name -from data import publishers_ordered_by_title -from data import secondary_publishers - -def is_number(s): - """ Tests if a variable is a number. - Input: s - a variable - Return: True if v is a number - False if v is not a number - """ - try: - float(s) - return True - except ValueError: - return False - -def convert_to_int(x): - """ Converts a variable to an integer value, or 0 if it cannot be converted to an integer. - Input: x - a variable - Return: x as an integer, or zero if x is not a number - """ - if is_number(x): - return int(x) - else: - return 0 - - -def generate_row(publisher): - """Generate coverage table data for a given publisher - """ - - # Store the data for this publisher as new variables - publisher_stats = get_publisher_stats(publisher) - transactions_usd = publisher_stats['sum_transactions_by_type_by_year_usd'] - - # Create a list for publisher data, and populate it with basic data - row = {} - row['publisher'] = publisher - row['publisher_title'] = publisher_name[publisher] - row['no_data_flag_red'] = 0 - row['no_data_flag_amber'] = 0 - row['spend_data_error_reported_flag'] = 0 - row['sort_order'] = 0 - - - # Compute 2014 IATI spend - iati_2014_spend_total = 0 - - - if publisher in dfi_publishers: - # If this publisher is a DFI, then their 2014 spend total should be based on their - # commitment transactions only. See https://github.com/IATI/IATI-Dashboard/issues/387 - if '2014' in transactions_usd.get('2', {}).get('USD', {}): - iati_2014_spend_total += transactions_usd['2']['USD']['2014'] - - if '2014' in transactions_usd.get('C', {}).get('USD', {}): - iati_2014_spend_total += transactions_usd['C']['USD']['2014'] - - else: - # This is a non-DFI publisher - if '2014' in transactions_usd.get('3', {}).get('USD', {}): - iati_2014_spend_total += transactions_usd['3']['USD']['2014'] - - if '2014' in transactions_usd.get('D', {}).get('USD', {}): - iati_2014_spend_total += transactions_usd['D']['USD']['2014'] - - if '2014' in transactions_usd.get('4', {}).get('USD', {}): - iati_2014_spend_total += transactions_usd['4']['USD']['2014'] - - if '2014' in transactions_usd.get('E', {}).get('USD', {}): - iati_2014_spend_total += transactions_usd['E']['USD']['2014'] - - # Convert to millions USD - row['iati_spend_2014'] = round(float( iati_2014_spend_total / 1000000), 2) - - - # Compute 2015 IATI spend - iati_2015_spend_total = 0 - - if publisher in dfi_publishers: - # If this publisher is a DFI, then their 2015 spend total should be based on their - # commitment transactions only. See https://github.com/IATI/IATI-Dashboard/issues/387 - if '2015' in transactions_usd.get('2', {}).get('USD', {}): - iati_2015_spend_total += transactions_usd['2']['USD']['2015'] - - if '2015' in transactions_usd.get('C', {}).get('USD', {}): - iati_2015_spend_total += transactions_usd['C']['USD']['2015'] - - else: - # This is a non-DFI publisher - if '2015' in transactions_usd.get('3', {}).get('USD', {}): - iati_2015_spend_total += transactions_usd['3']['USD']['2015'] - - if '2015' in transactions_usd.get('D', {}).get('USD', {}): - iati_2015_spend_total += transactions_usd['D']['USD']['2015'] - - if '2015' in transactions_usd.get('4', {}).get('USD', {}): - iati_2015_spend_total += transactions_usd['4']['USD']['2015'] - - if '2015' in transactions_usd.get('E', {}).get('USD', {}): - iati_2015_spend_total += transactions_usd['E']['USD']['2015'] - - # Convert to millions USD - row['iati_spend_2015'] = round(float( iati_2015_spend_total / 1000000), 2) - - # Compute 2016 IATI spend - iati_2016_spend_total = 0 - - if publisher in dfi_publishers: - # If this publisher is a DFI, then their 2016 spend total should be based on their - # commitment transactions only. See https://github.com/IATI/IATI-Dashboard/issues/387 - if '2016' in transactions_usd.get('2', {}).get('USD', {}): - iati_2016_spend_total += transactions_usd['2']['USD']['2016'] - - if '2016' in transactions_usd.get('C', {}).get('USD', {}): - iati_2016_spend_total += transactions_usd['C']['USD']['2016'] - - else: - # This is a non-DFI publisher - if '2016' in transactions_usd.get('3', {}).get('USD', {}): - iati_2016_spend_total += transactions_usd['3']['USD']['2016'] - - if '2016' in transactions_usd.get('D', {}).get('USD', {}): - iati_2016_spend_total += transactions_usd['D']['USD']['2016'] - - if '2016' in transactions_usd.get('4', {}).get('USD', {}): - iati_2016_spend_total += transactions_usd['4']['USD']['2016'] - - if '2016' in transactions_usd.get('E', {}).get('USD', {}): - iati_2016_spend_total += transactions_usd['E']['USD']['2016'] - - # Convert to millions USD - row['iati_spend_2016'] = round(float( iati_2016_spend_total / 1000000), 2) - - - # Get reference data - # Get data from stats files. Set as empty stings if the IATI-Stats code did not find them in the reference data sheet - data_2014 = publisher_stats['reference_spend_data_usd'].get('2014', {'ref_spend': '', 'not_in_sheet': True}) - data_2015 = publisher_stats['reference_spend_data_usd'].get('2015', {'ref_spend': '', 'official_forecast': '', 'not_in_sheet': True}) - - # Compute reference data as $USDm - row['reference_spend_2014'] = round((float(data_2014['ref_spend']) / 1000000), 2) if is_number(data_2014['ref_spend']) else '-' - row['reference_spend_2015'] = round((float(data_2015['ref_spend']) / 1000000), 2) if is_number(data_2015['ref_spend']) else '-' - row['official_forecast_2015'] = round((float(data_2015['official_forecast']) / 1000000), 2) if is_number(data_2015['official_forecast']) else '-' - - - # Compute spend ratio score - # Compile a list of ratios for spend & reference data paired by year - spend_ratio_candidates = [(row['iati_spend_2014'] / row['reference_spend_2014']) if (row['reference_spend_2014'] > 0) and is_number(row['reference_spend_2014']) else 0, - (row['iati_spend_2015'] / row['reference_spend_2015']) if (row['reference_spend_2015'] > 0) and is_number(row['reference_spend_2015']) else 0, - (row['iati_spend_2015'] / row['official_forecast_2015']) if (row['official_forecast_2015'] > 0) and is_number(row['official_forecast_2015']) else 0] - - # If there are no annual pairs, add the value of non-matching-year spend / reference data - if ((row['iati_spend_2014'] == 0 or row['reference_spend_2014'] == '-') and - (row['iati_spend_2015'] == 0 or row['reference_spend_2015'] == '-') and - (row['iati_spend_2015'] == 0 or row['official_forecast_2015'] == '-')): - spend_ratio_candidates.append((row['iati_spend_2015'] / row['reference_spend_2014']) if (row['reference_spend_2014'] > 0) and is_number(row['reference_spend_2014']) else 0) - spend_ratio_candidates.append((row['iati_spend_2016'] / row['reference_spend_2014']) if (row['reference_spend_2014'] > 0) and is_number(row['reference_spend_2014']) else 0) - spend_ratio_candidates.append((row['iati_spend_2016'] / row['reference_spend_2015']) if (row['reference_spend_2015'] > 0) and is_number(row['reference_spend_2015']) else 0) - - - # Get the maximum value and convert to a percentage - row['spend_ratio'] = int(round(max(spend_ratio_candidates) * 100)) - - return row - - -def table(): - """Generate coverage table data for every publisher and return as a generator object - """ - - # Loop over each publisher - for publisher_title, publisher in publishers_ordered_by_title: - - # Store the data for this publisher as new variables - publisher_stats = get_publisher_stats(publisher) - - # Skip if all activities from this publisher are secondary reported - if publisher in secondary_publishers: - continue - - # Return a generator object - yield generate_row(publisher) - - -# Compile a list of Development finance institutions (DFIs) -with open('dfi_publishers.csv', 'r') as csv_file: - reader = csv.reader(csv_file, delimiter=',') - dfi_publishers = [] - for line in reader: - - # Update the publisher registry ID, if this publisher has since updated their registry ID - if line[1] in get_registry_id_matches().keys(): - line[1] = get_registry_id_matches()[line[1]] - - # Append publisher ID to the list of dfi publishers, if they are found in the list of publisher IDs - if line[1] in [x[1] for x in publishers_ordered_by_title]: - dfi_publishers.append(line[1]) diff --git a/dashboard/cache.py b/dashboard/cache.py new file mode 100644 index 0000000000..397e717cd6 --- /dev/null +++ b/dashboard/cache.py @@ -0,0 +1,39 @@ +import json +import os + +import django.core.exceptions +from django.conf import settings + +os.makedirs("cache", exist_ok=True) + + +# JSON cache for a function with no arguments +def json_cache(fname): + fname = os.path.join("cache", fname) + + def decorator(f): + def wrapper(): + # Check in memory cache first + if hasattr(f, "__cache"): + return f.__cache + is_json_file = os.path.isfile(fname) + if is_json_file: + with open(fname) as fp: + try: + res = json.load(fp) + except json.decoder.JSONDecodeError: + is_json_file = False + if not is_json_file: + res = list(f()) + try: + if settings.DASHBOARD_CREATE_CACHE_FILES: + with open(fname, "w") as fp: + json.dump(res, fp) + except django.core.exceptions.ImproperlyConfigured: + pass + f.__cache = res + return res + + return wrapper + + return decorator diff --git a/common.py b/dashboard/common.py similarity index 54% rename from common.py rename to dashboard/common.py index f139667215..162bb69ebe 100644 --- a/common.py +++ b/dashboard/common.py @@ -1,31 +1,32 @@ -# Script to define useful functions +"""Load IATI OrganisationType codelist into a global and provide function to get publisher type""" -import data import json +import data +import filepaths # Import organisation_type_codelist as a global, then delete when used to save memory -with open('data/IATI-Codelists-2/out/clv2/json/en/OrganisationType.json') as fh: +with open(filepaths.join_data_path("IATI-Codelists-2/out/clv2/json/en/OrganisationType.json")) as fh: organisation_type_codelist = json.load(fh) -organisation_type_dict = {c['code']:c['name'] for c in organisation_type_codelist['data']} +organisation_type_dict = {c["code"]: c["name"] for c in organisation_type_codelist["data"]} del organisation_type_codelist def get_publisher_type(publisher): - """Return a dictionary of publisher organisation information, based on what is stored - in CKAN for the given publisher registry ID. - Returns None if publisher is not found. + """Return a dictionary of publisher organisation information, based on what is stored + in CKAN for the given publisher registry ID. + Returns None if publisher is not found. """ - + # Check that the publisher is in the list of ckan_publishers if publisher not in data.ckan_publishers: return None # Get the code the organisation from CKAN data (this will be in line with the OrganisationType codelist) - organization_type_code = data.ckan_publishers[publisher]['result']['publisher_organization_type'] - - # Get the english language name of this organisation type, according to the codelist + organization_type_code = data.ckan_publishers[publisher]["result"]["publisher_organization_type"] + + # Get the english language name of this organisation type, according to the codelist organization_type_name = organisation_type_dict[organization_type_code] # Return a dictionary with code and name - return {'code': organization_type_code, 'name': organization_type_name} + return {"code": organization_type_code, "name": organization_type_name} diff --git a/dashboard/comprehensiveness.py b/dashboard/comprehensiveness.py new file mode 100644 index 0000000000..d740a45b9e --- /dev/null +++ b/dashboard/comprehensiveness.py @@ -0,0 +1,205 @@ +# This file converts raw comprehensiveness data to percentages, and calculates averages. + +from cache import json_cache +from data import get_publisher_stats, publisher_name, publishers_ordered_by_title +from ui.jinja2 import round_nicely + +columns = { + "summary": [ + # Format for elements within this list - and similar lists below ('core', 'financials', etc): + # slug, header, weighting when calculating average + ("core_average", "Core Average", 2), + ("financials_average", "Financials Average", 1), + ("valueadded_average", "Value Added Average", 1), + ( + "summary_average", + "Weighted Average", + 0, + ), # i.e. don't include the average within the calculation of the average + ], + "core": [ + ("version", "Version", 1), + ("reporting-org", "Reporting Organisation", 1), + ("iati-identifier", "IATI Identifier", 1), + ("participating-org", "Participating Organisation", 1), + ("title", "Title", 1), + ("description", "Description", 1), + ("activity-status", "Status", 1), + ("activity-date", "Activity Date", 1), + ("sector", "Sector", 1), + ("country_or_region", "Country or Region", 1), + ("core_average", "Average", 0), # i.e. don't include the average within the calculation of the average + ], + "financials": [ + ("transaction_commitment", "Transaction - Commitment", 1, "first_hierarchy_with_commitments"), + ("transaction_spend", "Transaction - Disbursement or Expenditure", 1, "bottom_hierarchy"), + ("transaction_traceability", "Transaction - Traceability", 1, "bottom_hierarchy"), + ("budget", "Budget", 1, "hierarchy_with_most_budgets"), + ("financials_average", "Average", 0), # i.e. don't include the average within the calculation of the average + ], + "valueadded": [ + ("contact-info", "Contacts", 1), + ("location", "Location Details", 1), + ("location_point_pos", "Geographic Coordinates", 1), + ("sector_dac", "DAC Sectors", 1), + ("capital-spend", "Capital Spend", 1), + ("document-link", "Activity Documents", 1), + ("aid_type", "Aid Type", 1), + ("recipient_language", "Recipient Language", 1), + ("result_indicator", "Result/ Indicator", 1), + ("valueadded_average", "Average", 0), # i.e. don't include the average within the calculation of the average + ], +} + +# Build dictionaries for all the column_headers and column_slugs defined above +column_headers = {tabname: [x[1] for x in values] for tabname, values in columns.items()} +column_slugs = {tabname: [x[0] for x in values] for tabname, values in columns.items()} + +# Build directory to lookup the hierarchy which should be used in the numerator +# e.g. {'activity-date': 'all', 'activity-status': 'all', [...] budget': 'hierarchy_with_most_budgets', [etc]} +column_base_lookup = { + col[0]: col[3] if len(col) > 3 else "all" + for col_group, col_components in columns.items() + for col in col_components +} + + +def denominator(key, stats): + """Return the appropriate denominator value for a given key. + Returns either the specifc demominator calculated, or a default denominator value. + """ + + # If stats not pased to this function, return zero + if not stats: + return 0 + + # If there is a specific denominator for the given key, return this + if key in stats["comprehensiveness_denominators"]: + return float(stats["comprehensiveness_denominators"][key]) + + # Otherwise, return the default denominator + else: + return float(stats["comprehensiveness_denominator_default"]) + + +def get_hierarchy_with_most_budgets(stats): + """Find the hierarchy which contains the greatest number of budgets. + Will only count hierarchies where the default denominator is greater than zero. + Input: + stats -- a JSONDir object of publisher stats + Returns: + Key of the hierarchy with greatest number of budgets, or None + """ + + try: + # Get the key with the largest number of budgets + budgets = max( + stats["by_hierarchy"], + key=( + lambda x: ( + stats["by_hierarchy"][x]["comprehensiveness"].get("budget", 0) + + stats["by_hierarchy"][x]["comprehensiveness"].get("budget_not_provided", 0) + if stats["by_hierarchy"][x]["comprehensiveness_denominator_default"] > 0 + else -1 + ) + ), + ) + return budgets + except KeyError: + # Return None if this publisher has no comprehensiveness data in any hierarchy - i.e. KeyError + return None + except ValueError: + # Some publishers have no data in 'by_hierarchy' at all - i.e. ValueError: max() arg is an empty sequence + return None + + +def get_first_hierarchy_with_commitments(stats): + """Return the number of the first hierarchy that contains at least 1 commitment + (according to the comprehensiveness counts) + Returns: + Number of first hierarchy with commitments or None if no commitments in any hierarchy + """ + hierarchies_with_commitments = { + x: y["comprehensiveness"]["transaction_commitment"] + for x, y in stats.get("by_hierarchy", {}).items() + if y["comprehensiveness"].get("transaction_commitment", 0) > 0 + } + return min(hierarchies_with_commitments) if len(hierarchies_with_commitments) else None + + +def generate_row(publisher): + """Generate comprehensiveness table data for a given publisher""" + + publisher_stats = get_publisher_stats(publisher) + + # Set an inital dictionary, which will later be populated further + row = {} + row["publisher"] = publisher + row["publisher_title"] = publisher_name[publisher] + + # Calculate percentages for publisher data populated with any data + for slug in column_slugs["core"] + column_slugs["financials"] + column_slugs["valueadded"]: + + # Set the stats base for calculating the numerator. This is based on the hierarchy set in the lookup + if column_base_lookup[slug] == "bottom_hierarchy": + publisher_base = publisher_stats.get("bottom_hierarchy", {}) + + elif column_base_lookup[slug] == "hierarchy_with_most_budgets": + publisher_base = publisher_stats["by_hierarchy"].get(get_hierarchy_with_most_budgets(publisher_stats), {}) + + elif column_base_lookup[slug] == "first_hierarchy_with_commitments": + if get_first_hierarchy_with_commitments(publisher_stats): + publisher_base = publisher_stats["by_hierarchy"].get( + get_first_hierarchy_with_commitments(publisher_stats), {} + ) + else: + publisher_base = publisher_stats.get("bottom_hierarchy", {}) + + else: + # Most common case will be column_base_lookup[slug] == 'all': + publisher_base = publisher_stats + + if slug == "budget": + budget_all = publisher_base.get("comprehensiveness", {}).get(slug, 0) + budget_not_provided_all = publisher_base.get("comprehensiveness", {}).get("budget_not_provided", 0) + row["flag"] = budget_not_provided_all > 0 + numerator_all = budget_all + budget_not_provided_all + budget_valid = publisher_base.get("comprehensiveness_with_validation", {}).get(slug, 0) + budget_not_provided_valid = publisher_base.get("comprehensiveness_with_validation", {}).get( + "budget_not_provided", 0 + ) + numerator_valid = budget_valid + budget_not_provided_valid + else: + numerator_all = publisher_base.get("comprehensiveness", {}).get(slug, 0) + numerator_valid = publisher_base.get("comprehensiveness_with_validation", {}).get(slug, 0) + + if denominator(slug, publisher_base) != 0: + # Populate the row with the %age + row[slug] = round_nicely(float(numerator_all) / denominator(slug, publisher_base) * 100) + row[slug + "_valid"] = round_nicely(float(numerator_valid) / denominator(slug, publisher_base) * 100) + + # Loop for averages + # Calculate the average for each grouping, and the overall 'summary' average + for page in ["core", "financials", "valueadded", "summary"]: + # Note that the summary must be last, so that it can use the average calculations from the other groupings + row[page + "_average"] = round_nicely( + sum((row.get(x[0]) or 0) * x[2] for x in columns[page]) / float(sum(x[2] for x in columns[page])) + ) + + row[page + "_average_valid"] = round_nicely( + sum((row.get(x[0] + "_valid") or 0) * x[2] for x in columns[page]) + / float(sum(x[2] for x in columns[page])) + ) + + return row + + +@json_cache("comprehensiveness.json") +def table(): + """Generate comprehensiveness table data for every publisher and return as a generator object""" + + # Loop over the data for each publisher + for publisher_title, publisher in publishers_ordered_by_title: + + # Generate a row object + yield generate_row(publisher) diff --git a/dashboard/coverage.py b/dashboard/coverage.py new file mode 100644 index 0000000000..440067a60f --- /dev/null +++ b/dashboard/coverage.py @@ -0,0 +1,234 @@ +# This file converts a range coverage data to variables which can be outputted on the coverage page +import csv + +import filepaths +from data import ( + get_publisher_stats, + get_registry_id_matches, + publisher_name, + publishers_ordered_by_title, + secondary_publishers, +) + + +def is_number(s): + """Tests if a variable is a number. + Input: s - a variable + Return: True if v is a number + False if v is not a number + """ + try: + float(s) + return True + except ValueError: + return False + + +def convert_to_int(x): + """Converts a variable to an integer value, or 0 if it cannot be converted to an integer. + Input: x - a variable + Return: x as an integer, or zero if x is not a number + """ + if is_number(x): + return int(x) + else: + return 0 + + +def generate_row(publisher): + """Generate coverage table data for a given publisher""" + + # Store the data for this publisher as new variables + publisher_stats = get_publisher_stats(publisher) + transactions_usd = publisher_stats["sum_transactions_by_type_by_year_usd"] + + # Create a list for publisher data, and populate it with basic data + row = {} + row["publisher"] = publisher + row["publisher_title"] = publisher_name[publisher] + row["no_data_flag_red"] = 0 + row["no_data_flag_amber"] = 0 + row["spend_data_error_reported_flag"] = 0 + row["sort_order"] = 0 + + # Compute 2014 IATI spend + iati_2014_spend_total = 0 + + if publisher in dfi_publishers: + # If this publisher is a DFI, then their 2014 spend total should be based on their + # commitment transactions only. See https://github.com/IATI/IATI-Dashboard/issues/387 + if "2014" in transactions_usd.get("2", {}).get("USD", {}): + iati_2014_spend_total += transactions_usd["2"]["USD"]["2014"] + + if "2014" in transactions_usd.get("C", {}).get("USD", {}): + iati_2014_spend_total += transactions_usd["C"]["USD"]["2014"] + + else: + # This is a non-DFI publisher + if "2014" in transactions_usd.get("3", {}).get("USD", {}): + iati_2014_spend_total += transactions_usd["3"]["USD"]["2014"] + + if "2014" in transactions_usd.get("D", {}).get("USD", {}): + iati_2014_spend_total += transactions_usd["D"]["USD"]["2014"] + + if "2014" in transactions_usd.get("4", {}).get("USD", {}): + iati_2014_spend_total += transactions_usd["4"]["USD"]["2014"] + + if "2014" in transactions_usd.get("E", {}).get("USD", {}): + iati_2014_spend_total += transactions_usd["E"]["USD"]["2014"] + + # Convert to millions USD + row["iati_spend_2014"] = round(float(iati_2014_spend_total / 1000000), 2) + + # Compute 2015 IATI spend + iati_2015_spend_total = 0 + + if publisher in dfi_publishers: + # If this publisher is a DFI, then their 2015 spend total should be based on their + # commitment transactions only. See https://github.com/IATI/IATI-Dashboard/issues/387 + if "2015" in transactions_usd.get("2", {}).get("USD", {}): + iati_2015_spend_total += transactions_usd["2"]["USD"]["2015"] + + if "2015" in transactions_usd.get("C", {}).get("USD", {}): + iati_2015_spend_total += transactions_usd["C"]["USD"]["2015"] + + else: + # This is a non-DFI publisher + if "2015" in transactions_usd.get("3", {}).get("USD", {}): + iati_2015_spend_total += transactions_usd["3"]["USD"]["2015"] + + if "2015" in transactions_usd.get("D", {}).get("USD", {}): + iati_2015_spend_total += transactions_usd["D"]["USD"]["2015"] + + if "2015" in transactions_usd.get("4", {}).get("USD", {}): + iati_2015_spend_total += transactions_usd["4"]["USD"]["2015"] + + if "2015" in transactions_usd.get("E", {}).get("USD", {}): + iati_2015_spend_total += transactions_usd["E"]["USD"]["2015"] + + # Convert to millions USD + row["iati_spend_2015"] = round(float(iati_2015_spend_total / 1000000), 2) + + # Compute 2016 IATI spend + iati_2016_spend_total = 0 + + if publisher in dfi_publishers: + # If this publisher is a DFI, then their 2016 spend total should be based on their + # commitment transactions only. See https://github.com/IATI/IATI-Dashboard/issues/387 + if "2016" in transactions_usd.get("2", {}).get("USD", {}): + iati_2016_spend_total += transactions_usd["2"]["USD"]["2016"] + + if "2016" in transactions_usd.get("C", {}).get("USD", {}): + iati_2016_spend_total += transactions_usd["C"]["USD"]["2016"] + + else: + # This is a non-DFI publisher + if "2016" in transactions_usd.get("3", {}).get("USD", {}): + iati_2016_spend_total += transactions_usd["3"]["USD"]["2016"] + + if "2016" in transactions_usd.get("D", {}).get("USD", {}): + iati_2016_spend_total += transactions_usd["D"]["USD"]["2016"] + + if "2016" in transactions_usd.get("4", {}).get("USD", {}): + iati_2016_spend_total += transactions_usd["4"]["USD"]["2016"] + + if "2016" in transactions_usd.get("E", {}).get("USD", {}): + iati_2016_spend_total += transactions_usd["E"]["USD"]["2016"] + + # Convert to millions USD + row["iati_spend_2016"] = round(float(iati_2016_spend_total / 1000000), 2) + + # Get reference data + # Get data from stats files. Set as empty stings if the IATI-Stats code did not find them in the reference data sheet + data_2014 = publisher_stats["reference_spend_data_usd"].get("2014", {"ref_spend": "", "not_in_sheet": True}) + data_2015 = publisher_stats["reference_spend_data_usd"].get( + "2015", {"ref_spend": "", "official_forecast": "", "not_in_sheet": True} + ) + + # Compute reference data as $USDm + row["reference_spend_2014"] = ( + round((float(data_2014["ref_spend"]) / 1000000), 2) if is_number(data_2014["ref_spend"]) else "-" + ) + row["reference_spend_2015"] = ( + round((float(data_2015["ref_spend"]) / 1000000), 2) if is_number(data_2015["ref_spend"]) else "-" + ) + row["official_forecast_2015"] = ( + round((float(data_2015["official_forecast"]) / 1000000), 2) + if is_number(data_2015["official_forecast"]) + else "-" + ) + + # Compute spend ratio score + # Compile a list of ratios for spend & reference data paired by year + spend_ratio_candidates = [ + ( + (row["iati_spend_2014"] / row["reference_spend_2014"]) + if (row["reference_spend_2014"] > 0) and is_number(row["reference_spend_2014"]) + else 0 + ), + ( + (row["iati_spend_2015"] / row["reference_spend_2015"]) + if (row["reference_spend_2015"] > 0) and is_number(row["reference_spend_2015"]) + else 0 + ), + ( + (row["iati_spend_2015"] / row["official_forecast_2015"]) + if (row["official_forecast_2015"] > 0) and is_number(row["official_forecast_2015"]) + else 0 + ), + ] + + # If there are no annual pairs, add the value of non-matching-year spend / reference data + if ( + (row["iati_spend_2014"] == 0 or row["reference_spend_2014"] == "-") + and (row["iati_spend_2015"] == 0 or row["reference_spend_2015"] == "-") + and (row["iati_spend_2015"] == 0 or row["official_forecast_2015"] == "-") + ): + spend_ratio_candidates.append( + (row["iati_spend_2015"] / row["reference_spend_2014"]) + if (row["reference_spend_2014"] > 0) and is_number(row["reference_spend_2014"]) + else 0 + ) + spend_ratio_candidates.append( + (row["iati_spend_2016"] / row["reference_spend_2014"]) + if (row["reference_spend_2014"] > 0) and is_number(row["reference_spend_2014"]) + else 0 + ) + spend_ratio_candidates.append( + (row["iati_spend_2016"] / row["reference_spend_2015"]) + if (row["reference_spend_2015"] > 0) and is_number(row["reference_spend_2015"]) + else 0 + ) + + # Get the maximum value and convert to a percentage + row["spend_ratio"] = int(round(max(spend_ratio_candidates) * 100)) + + return row + + +def table(): + """Generate coverage table data for every publisher and return as a generator object""" + + # Loop over each publisher + for publisher_title, publisher in publishers_ordered_by_title: + # Skip if all activities from this publisher are secondary reported + if publisher in secondary_publishers: + continue + + # Return a generator object + yield generate_row(publisher) + + +# Compile a list of Development finance institutions (DFIs) +with open(filepaths.join_base_path("dfi_publishers.csv"), "r") as csv_file: + reader = csv.reader(csv_file, delimiter=",") + dfi_publishers = [] + for line in reader: + + # Update the publisher registry ID, if this publisher has since updated their registry ID + if line[1] in get_registry_id_matches().keys(): + line[1] = get_registry_id_matches()[line[1]] + + # Append publisher ID to the list of dfi publishers, if they are found in the list of publisher IDs + if line[1] in [x[1] for x in publishers_ordered_by_title]: + dfi_publishers.append(line[1]) diff --git a/dashboard/create_caches.py b/dashboard/create_caches.py new file mode 100644 index 0000000000..fc402799fd --- /dev/null +++ b/dashboard/create_caches.py @@ -0,0 +1,16 @@ +from django.conf import settings + +import comprehensiveness +import forwardlooking +import humanitarian +import summary_stats +import timeliness + +settings.configure(DASHBOARD_CREATE_CACHE_FILES=True) + +timeliness.publisher_frequency() +timeliness.publisher_timelag() +forwardlooking.table() +comprehensiveness.table() +summary_stats.table() +humanitarian.table() diff --git a/dashboard/data.py b/dashboard/data.py new file mode 100644 index 0000000000..2f1ba23eb9 --- /dev/null +++ b/dashboard/data.py @@ -0,0 +1,344 @@ +import csv +import json +import os +import re +from collections import OrderedDict +from collections.abc import MutableMapping +from decimal import Decimal + +import xmlschema + +import filepaths + + +# Modified from: +# https://github.com/IATI/IATI-Stats/blob/1d20ed1e/stats/common/decorators.py#L5-L13 +def memoize(f): + def wrapper(self, key): + if not hasattr(self, "__cache"): + self.__cache = {} + if key in self.__cache: + return self.__cache[key] + res = f(self, key) + if type(res) is not JSONDir: + # don't cache JSONDirs + self.__cache[key] = res + return res + + return wrapper + + +PUBLISHER_LIST = None + + +class JSONDir(MutableMapping): + """Produces an object, to be used to access JSON-formatted publisher data and return + this as an ordered dictionary (with nested dictionaries, if appropriate). + Use of this class removes the need to load large amounts of data into memory. + """ + + def __init__(self, folder): + """Set the path of the folder being accessed as an attribute to an instance of + the object. + """ + self.folder = folder + + def __len__(self): + return len(self.keys()) + + def __delitem__(self, key): + try: + del self.folder[key] + except KeyError: + pass + + def __repr__(self): + return "{}, JSONDIR({})".format(super(JSONDir, self).__repr__(), self.__dict__) + + def __setitem__(self, key, value): + super(JSONDir, self).__setitem__(key, value) + + @memoize + def __getitem__(self, key): + """Define how variables are gathered from the raw JSON files and then parsed into + the OrderedDict that will be returned. + + Note: + try-except should be used around file operations rather than checking before-hand + """ + + if os.path.exists(os.path.join(self.folder, key)): + # The data being sought is a directory + data = JSONDir(os.path.join(self.folder, key)) + elif os.path.exists(os.path.join(self.folder, key + ".json")): + # The data being sought is a json file + with open(os.path.join(self.folder, key + ".json")) as fp: + data = json.load(fp, object_pairs_hook=OrderedDict) + + # Deal with publishers who had an old registry ID + # If this publisher had at least one old ID in the past + if (self.get_publisher_name() in get_registry_id_matches().values()) and ("gitaggregate" in self.folder): + # Perform the merging + # Look over the set of changed registry IDs + for previous_id, current_id in get_registry_id_matches().items(): + folder = self.folder + previous_path = os.path.join(folder.replace(current_id, previous_id), key + ".json") + # If this publisher has had an old ID and there is data for it + if (current_id == self.get_publisher_name()) and os.path.exists(previous_path): + # Get the corresponding value for the old publisher ID, and merge with the existing value for this publisher + with open(previous_path) as old_fp: + old_pub_data = json.load(old_fp, object_pairs_hook=OrderedDict) + deep_merge(data, old_pub_data) + # FIXME i) Should deep_merge attempt to sort this ordereddict ii) Should there be an attempt to aggregate/average conflicting values? + else: + # No value found as either a folder or json file + raise KeyError(key) + + # Because these keys are used as filenames, they can never exceed 255 characters + if hasattr(data, "keys"): + for k in data.keys(): + if len(k) >= 255: + data.pop(k) + return data + + def keys(self): + """Method to return a list of keys that are contained within the data folder that + is being accessed within this instance. + """ + return [x[:-5] if x.endswith(".json") else x for x in os.listdir(self.folder)] + + def __iter__(self): + """Custom iterable, to iterate over the keys that are contained within the data + folder that is being accessed within this instance. + """ + return iter(self.keys()) + + def get_publisher_name(self): + """Find the name of the publisher that this data relates to. + Note, this is a super hacky way to do this, prize available if a better way is found to do this! + """ + + global PUBLISHER_LIST + + if PUBLISHER_LIST is None: + PUBLISHER_LIST = JSONDir(filepaths.join_stats_path("current/aggregated-publisher")).keys() + + # Get a list of the parts that are contained within this filepath + path = os.path.normpath(self.folder) + path_components = path.split(os.sep) + + # Loop over this list and return the publisher name if it is found within the historic list of publishers + for x in path_components: + if x in PUBLISHER_LIST: + return x + + # If got to the end of the loop and nothing found, this folder does not relate to a single publisher + return None + + +def get_publisher_stats(publisher, stats_type="aggregated"): + """Function to obtain current data for a given publisher. + Returns: A JSONDir object for the publisher, or an empty dictionary if the publisher + is not found. + """ + try: + return JSONDir(filepaths.join_stats_path("current/{0}-publisher/{1}".format(stats_type, publisher))) + except IOError: + return {} + + +def get_registry_id_matches(): + """Returns a dictionary of publishers who have modified their registry ID + Returns: Dictionary, where the key is the old registry ID, and the corresponding + value is the registry ID that data should be mapped to + """ + + # Load registry IDs for publishers who have changed their registry ID + with open(filepaths.join_base_path("registry_id_relationships.csv")) as f: + reader = csv.DictReader(f) + # Load this data into a dictonary + registry_matches = {row["previous_registry_id"]: row["current_registry_id"] for row in reader} + + return registry_matches + + +def deep_merge(obj1, obj2): + """Merges two OrderedDict objects with an unknown number of nested levels + Input: obj1 - OrderedDict to be used as the base object + Input: obj2 - OrderedDict to be merged into obj1 + Returns: Nothing, but obj1 will contain the full data + """ + + # Iterate through keys + for key in obj1.copy(): + # If this is value, we've hit the bottom, copy all of obj2 into obj1 + if type(obj1[key]) is not OrderedDict: + for key2 in obj2: + # If there exists a dict at that key, make sure it's not erased + if key2 in obj1: + if type(obj1[key2]) is not OrderedDict: + # You can change behavior here to determine + # How duplicate keys are handled + obj1[key2] = obj2[key2] + else: + obj1[key2] = obj2[key2] + + # If it's a dictionary we need to go deeper, by running this function recursively + else: + if key in obj2: + deep_merge(obj1[key], obj2[key]) + + +current_stats = { + "aggregated": JSONDir(filepaths.join_stats_path("current/aggregated")), + "aggregated_file": JSONDir(filepaths.join_stats_path("current/aggregated-file")), + "inverted_publisher": JSONDir(filepaths.join_stats_path("current/inverted-publisher")), + "inverted_file": JSONDir(filepaths.join_stats_path("current/inverted-file")), + "inverted_file_publisher": JSONDir(filepaths.join_stats_path("current/inverted-file-publisher")), + "download_errors": [], +} +ckan_publishers = JSONDir(filepaths.join_data_path("ckan_publishers")) +github_issues = JSONDir(filepaths.join_data_path("github/publishers")) +ckan = json.load(open(filepaths.join_stats_path("ckan.json")), object_pairs_hook=OrderedDict) +dataset_to_publisher_dict = { + dataset: publisher for publisher, publisher_dict in ckan.items() for dataset in publisher_dict.keys() +} +with open(filepaths.join_data_path("downloads/errors")) as fp: + for line in fp: + if line != ".\n": + current_stats["download_errors"].append(line.strip("\n").split(" ", 3)) + +sources105 = [ + filepaths.join_data_path("schemas/1.05/iati-activities-schema.xsd"), + filepaths.join_data_path("schemas/1.05/iati-organisations-schema.xsd"), +] +sources203 = [ + filepaths.join_data_path("schemas/2.03/iati-activities-schema.xsd"), + filepaths.join_data_path("schemas/2.03/iati-organisations-schema.xsd"), +] +schema105 = xmlschema.XMLSchema(sources105) +schema203 = xmlschema.XMLSchema(sources203) + + +def is_valid_element_or_attribute(path: str) -> bool: + """Checks to see if a path is in either the 2.03 or 1.05 schema + + Parameters + ---------- + path : str + Path to the element or attribute to find. + + Returns + ------- + bool + True if the path is a known element or attribute. + """ + if isinstance(schema203.find(path), (xmlschema.XsdElement, xmlschema.XsdAttribute)): + return True + if isinstance(schema105.find(path), (xmlschema.XsdElement, xmlschema.XsdAttribute)): + return True + + return False + + +def transform_codelist_mapping_keys(codelist_mapping): + # Perform the same transformation as https://github.com/IATI/IATI-Stats/blob/d622f8e88af4d33b1161f906ec1b53c63f2f0936/stats.py#L12 + codelist_mapping = {k: v for k, v in codelist_mapping.items() if not k.startswith("//iati-organisation")} + codelist_mapping = {re.sub(r"^\/\/iati-activity", "./", k): v for k, v in codelist_mapping.items()} + codelist_mapping = {re.sub(r"^\/\/", ".//", k): v for k, v in codelist_mapping.items()} + return codelist_mapping + + +def create_codelist_mapping(major_version): + codelist_mapping = {} + for x in json.load( + open(filepaths.join_data_path("IATI-Codelists-{}/out/clv2/mapping.json".format(major_version))) + ): + if "condition" in x: + pref, attr = x["path"].rsplit("/", 1) + path = "{0}[{1}]/{2}".format(pref, x["condition"], attr) + else: + path = x["path"] + codelist_mapping[path] = x["codelist"] + return transform_codelist_mapping_keys(codelist_mapping) + + +MAJOR_VERSIONS = ["2", "1"] + +codelist_mapping = {v: create_codelist_mapping(v) for v in MAJOR_VERSIONS} + +# Create a big dictionary of all codelist values by version and codelist name +codelist_sets = { + major_version: { + cname: set(c["code"] for c in codelist["data"]) + for cname, codelist in JSONDir( + filepaths.join_data_path("IATI-Codelists-{}/out/clv2/json/en/".format(major_version)) + ).items() + } + for major_version in MAJOR_VERSIONS +} + +codelist_lookup = { + major_version: { + cname: {c["code"]: c for c in codelist["data"]} + for cname, codelist in JSONDir( + filepaths.join_data_path("IATI-Codelists-{}/out/clv2/json/en/".format(major_version)) + ).items() + } + for major_version in MAJOR_VERSIONS +} + +# Simple look up to map publisher id to a publishers given name (title) +publisher_name = { + publisher: publisher_json["result"]["title"] for publisher, publisher_json in ckan_publishers.items() +} +# Create a list of tuples ordered by publisher given name titles - this allows us to display lists of publishers in alphabetical order +publishers_ordered_by_title = [ + (publisher_name[publisher], publisher) + for publisher in current_stats["inverted_publisher"]["activities"] + if publisher in publisher_name +] +publishers_ordered_by_title.sort(key=lambda x: (x[0]).lower()) + +# List of publishers who report all their activities as a secondary publisher +secondary_publishers = [ + publisher + for publisher, stats in JSONDir(filepaths.join_stats_path("current/aggregated-publisher")).items() + if int(stats["activities"]) == len(stats["activities_secondary_reported"]) and int(stats["activities"]) > 0 +] + +try: + dac2012 = {x[0]: Decimal(x[1].replace(",", "")) for x in csv.reader(open(filepaths.join_data_path("dac2012.csv")))} +except IOError: + dac2012 = {} + + +def make_slugs(keys): + out = {"by_slug": {}, "by_i": {}} + for i, key in enumerate(keys): + slug = re.sub( + r"[^a-zA-Z0-9:@\-_]", + "", + re.sub(r"{[^}]*}", "", key.replace("{http://www.w3.org/XML/1998/namespace}", "xml:").replace("/", "_")), + ).strip("_") + while slug in out["by_slug"]: + slug += "_" + out["by_slug"][slug] = i + out["by_i"][i] = slug + return out + + +slugs = { + "codelist": { + major_version: ( + make_slugs(current_stats["inverted_publisher"]["codelist_values_by_major_version"][major_version].keys()) + if major_version in current_stats["inverted_publisher"]["codelist_values_by_major_version"] + else make_slugs([]) + ) + for major_version in MAJOR_VERSIONS + }, + "element": make_slugs(current_stats["inverted_publisher"]["elements"].keys()), + "org_type": make_slugs( + ["accountable_org", "extending_org", "funding_org", "implementing_org", "provider_org", "receiver_org"] + ), +} diff --git a/dashboard/filepaths.py b/dashboard/filepaths.py new file mode 100644 index 0000000000..b558a80845 --- /dev/null +++ b/dashboard/filepaths.py @@ -0,0 +1,29 @@ +"""These functions join path fragments to make directories to different input or output files + +Note: eventually these functions will probably become redundant or refactored into +a different module, but for now this helps in refactoring the code. +""" + +import os.path + +import ui.settings + + +def join_stats_path(p: str) -> str: + """Make a path to a file or directory within the downloaded stats directory""" + return os.path.join(ui.settings.DASHBOARD_STATS_DIRECTORY, p) + + +def join_data_path(p: str) -> str: + """Make a path to a file or directory within the downloaded data directory""" + return os.path.join(ui.settings.DASHBOARD_DATA_DIRECTORY, p) + + +def join_base_path(p: str) -> str: + """Make a path to a file or directory relative to the base of the dashboard directory""" + return os.path.join(ui.settings.DASHBOARD_BASE_DIRECTORY, p) + + +def join_out_path(p: str) -> str: + """Make a path to a file or directory relative to the base of the out directory""" + return os.path.join(ui.settings.DASHBOARD_OUT_DIRECTORY, p) diff --git a/dashboard/forwardlooking.py b/dashboard/forwardlooking.py new file mode 100644 index 0000000000..20a076cb25 --- /dev/null +++ b/dashboard/forwardlooking.py @@ -0,0 +1,113 @@ +# This file converts raw forward-looking data to percentages + +import datetime + +from cache import json_cache +from data import get_publisher_stats, publisher_name, publishers_ordered_by_title +from ui.jinja2 import round_nicely + +# Create a variable with the current year as an integer +this_year = datetime.date.today().year + +# Create a list containing three years: the current year and two following +years = list(map(str, range(this_year, this_year + 3))) + +# Set column groupings, to be displayed in the user output +column_headers = [ + "Current activities at the start of each year", + "Current activities with budgets for each year", + "Percentage of current activities with budgets", +] + + +def generate_row(publisher): + """Generate forward-looking table data for a given publisher""" + + # Store the data for this publisher as a new variable + publisher_stats = get_publisher_stats(publisher) + + # Create a list for publisher data, and populate it with basic data + row = {} + row["publisher"] = publisher + row["publisher_title"] = publisher_name[publisher] + row["year_columns"] = [{}, {}, {}] + row["budget_not_provided"] = False + # Work with hierarchies + by_hierarchy = publisher_stats["by_hierarchy"] + hierarchies_with_nonzero_budgets = [ + h + for h, stats in by_hierarchy.items() + if not all(x == 0 for x in stats["forwardlooking_activities_with_budgets"].values()) + ] + + # Flag if budgets on current activities are reported at more than one hierarchy + row["flag"] = len(hierarchies_with_nonzero_budgets) > 1 + + hierarchies_with_budget_not_provided = [ + h + for h, stats in by_hierarchy.items() + if not all(x == 0 for x in stats["forwardlooking_activities_with_budget_not_provided"].values()) + ] + + # Loop over each of the three years (i.e. this year and the following two years) to generate the statistics for the table + for year in years: + if len(hierarchies_with_budget_not_provided) > 0: + row["budget_not_provided"] = True + # If 'forwardlooking_activities_current' and 'forwardlooking_activities_with_budgets' or 'forwardlooking_activities_with_budget_not_provided' are in the bottom hierarchy + if "forwardlooking_activities_current" in publisher_stats["bottom_hierarchy"] and ( + "forwardlooking_activities_with_budgets" in publisher_stats["bottom_hierarchy"] + or "forwardlooking_activities_with_budget_not_provided" in publisher_stats["bottom_hierarchy"] + ): + if len(hierarchies_with_nonzero_budgets) != 1: + # If budgets are at more than one hierarchy (or no hierarchies), just use activities at all hierarchies + row["year_columns"][0][year] = publisher_stats["forwardlooking_activities_current"].get(year) or 0 + row["year_columns"][1][year] = publisher_stats["forwardlooking_activities_with_budgets"].get(year) or 0 + if row["budget_not_provided"]: + row["year_columns"][1][year] += ( + publisher_stats["forwardlooking_activities_with_budget_not_provided"].get(year) or 0 + ) + else: + # Else, use the hierarchy which they are reported at + row["year_columns"][0][year] = ( + by_hierarchy[hierarchies_with_nonzero_budgets[0]]["forwardlooking_activities_current"].get(year) + or 0 + ) + row["year_columns"][1][year] = ( + by_hierarchy[hierarchies_with_nonzero_budgets[0]]["forwardlooking_activities_with_budgets"].get( + year + ) + or 0 + ) + if row["budget_not_provided"]: + row["year_columns"][1][year] += ( + by_hierarchy[hierarchies_with_nonzero_budgets[0]][ + "forwardlooking_activities_with_budget_not_provided" + ].get(year) + or 0 + ) + + if not int(row["year_columns"][0][year]): + row["year_columns"][2][year] = "-" + else: + row["year_columns"][2][year] = round_nicely( + float(row["year_columns"][1][year]) / float(row["year_columns"][0][year]) * 100 + ) + else: + # Else if either 'forwardlooking_activities_current' or 'forwardlooking_activities_with_budgets' are not in the bottom hierarchy, set data zero + # This should only occur if a publisher has 0 activities + row["year_columns"][0][year] = 0 + row["year_columns"][1][year] = 0 + row["year_columns"][2][year] = "-" + + return row + + +@json_cache("forwardlooking.json") +def table(): + """Generate forward-looking table data for every publisher and return as a generator object""" + + # Loop over each publisher + for publisher_title, publisher in publishers_ordered_by_title: + + # Return a generator object + yield generate_row(publisher) diff --git a/dashboard/humanitarian.py b/dashboard/humanitarian.py new file mode 100644 index 0000000000..ac10c7b736 --- /dev/null +++ b/dashboard/humanitarian.py @@ -0,0 +1,69 @@ +# This file builds a table to show humanitarian reporting for each publisher + +import common +from cache import json_cache +from data import get_publisher_stats, publishers_ordered_by_title + +# Set column groupings, to be displayed in the user output +columns = [ + # slug, header + ("publisher_type", "Publisher Type"), + ("num_activities", "Number of Activities"), + ("publishing_humanitarian", "Publishing Humanitarian?"), + ("humanitarian_attrib", "Using Humanitarian Attribute?"), + ("appeal_emergency", "Appeal or Emergency Details"), + ("clusters", "Clusters"), + ("average", "Average"), +] + + +@json_cache("humanitarian.json") +def table(): + """Generate data for the humanitarian table""" + + # Loop over each publisher + for publisher_title, publisher in publishers_ordered_by_title: + # Store the data for this publisher as a new variable + publisher_stats = get_publisher_stats(publisher) + + # Create a list for publisher data, and populate it with basic data + row = {} + row["publisher"] = publisher + row["publisher_title"] = publisher_title + row["publisher_type"] = common.get_publisher_type(publisher)["name"] + + # Get data from IATI-Stats output + row["num_activities"] = publisher_stats.get("humanitarian", {}).get("is_humanitarian", "0") + row["publishing_humanitarian"] = 100 if int(row["num_activities"]) > 0 else 0 + + # Calculate percentage of all humanitarian activities that are defined using the @humanitarian attribute + row["humanitarian_attrib"] = ( + publisher_stats.get("humanitarian", {}).get("is_humanitarian_by_attrib", "0") + / float(row["num_activities"]) + if int(row["num_activities"]) > 0 + else 0.0 + ) * 100 + + # Calculate percentage of all humanitarian activities that use the element to define an appeal or emergency + row["appeal_emergency"] = ( + publisher_stats.get("humanitarian", {}).get("contains_humanitarian_scope", "0") + / float(row["num_activities"]) + if int(row["num_activities"]) > 0 + else 0.0 + ) * 100 + + # Calculate percentage of all humanitarian activities that use clusters + row["clusters"] = ( + publisher_stats.get("humanitarian", {}).get("uses_humanitarian_clusters_vocab", "0") + / float(row["num_activities"]) + if int(row["num_activities"]) > 0 + else 0.0 + ) * 100 + + # Calculate the mean average + row["average"] = ( + row["publishing_humanitarian"] + row["humanitarian_attrib"] + row["appeal_emergency"] + row["clusters"] + ) / float(4) + + # Return a generator object + yield row diff --git a/dashboard/make_csv.py b/dashboard/make_csv.py new file mode 100644 index 0000000000..e1abbba0aa --- /dev/null +++ b/dashboard/make_csv.py @@ -0,0 +1,278 @@ +"""Generates CSV files from data in the 'stats-calculated' folder and using additional logic +""" + +import argparse +import csv +import logging +import os +import sys + +import comprehensiveness +import data +import filepaths +import forwardlooking +import humanitarian +import summary_stats +import timeliness +from ui.jinja2 import round_nicely + +logger = logging.getLogger(__name__) + + +def publisher_dicts(): + publisher_name = { + publisher: publisher_json["result"]["title"] for publisher, publisher_json in data.ckan_publishers.items() + } + for publisher, activities in data.current_stats["inverted_publisher"]["activities"].items(): + if publisher not in data.ckan_publishers: + continue + publisher_stats = data.get_publisher_stats(publisher) + yield { + "Publisher Name": publisher_name[publisher], + "Publisher Registry Id": publisher, + "Activities": activities, + "Organisations": publisher_stats["organisations"], + "Files": publisher_stats["activity_files"] + publisher_stats["organisation_files"], + "Activity Files": publisher_stats["activity_files"], + "Organisation Files": publisher_stats["organisation_files"], + "Total File Size": publisher_stats["file_size"], + "Reporting Org on Registry": data.ckan_publishers[publisher]["result"]["publisher_iati_id"], + "Reporting Orgs in Data (count)": len(publisher_stats["reporting_orgs"]), + "Reporting Orgs in Data": ";".join(publisher_stats["reporting_orgs"]), + "Hierarchies (count)": len(publisher_stats["hierarchies"]), + "Hierarchies": ";".join(publisher_stats["hierarchies"]), + } + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--verbose", action="store_true", help="Output progress to stdout") + args = parser.parse_args() + + if args.verbose: + logger.setLevel(logging.INFO) + logger.addHandler(logging.StreamHandler(sys.stdout)) + + logger.info("Generating CSV files") + os.makedirs(filepaths.join_out_path("data/csv"), exist_ok=True) + + logger.info("Generating publishers.csv") + with open(filepaths.join_out_path("data/csv/publishers.csv"), "w") as fp: + writer = csv.DictWriter( + fp, + [ + "Publisher Name", + "Publisher Registry Id", + "Activities", + "Organisations", + "Files", + "Activity Files", + "Organisation Files", + "Total File Size", + "Reporting Org on Registry", + "Reporting Orgs in Data (count)", + "Reporting Orgs in Data", + "Hierarchies (count)", + "Hierarchies", + ], + ) + writer.writeheader() + for d in publisher_dicts(): + writer.writerow(d) + + logger.info("Generating elements.csv") + publishers = list(data.current_stats["inverted_publisher"]["activities"].keys()) + with open(filepaths.join_out_path("data/csv/elements.csv"), "w") as fp: + writer = csv.DictWriter(fp, ["Element"] + publishers) + writer.writeheader() + for element, publisher_dict in data.current_stats["inverted_publisher"]["elements"].items(): + publisher_dict["Element"] = element + writer.writerow(publisher_dict) + + logger.info("Generating elements_total.csv") + with open(filepaths.join_out_path("data/csv/elements_total.csv"), "w") as fp: + writer = csv.DictWriter(fp, ["Element"] + publishers) + writer.writeheader() + for element, publisher_dict in data.current_stats["inverted_publisher"]["elements_total"].items(): + publisher_dict["Element"] = element + writer.writerow(publisher_dict) + + logger.info("Generating registry.csv") + with open(filepaths.join_out_path("data/csv/registry.csv"), "w") as fp: + keys = [ + "name", + "title", + "publisher_frequency", + "publisher_frequency_select", + "publisher_implementation_schedule", + "publisher_ui", + "publisher_field_exclusions", + "publisher_contact", + "image_url", + "display_name", + "publisher_iati_id", + "publisher_units", + "publisher_record_exclusions", + "publisher_data_quality", + "publisher_country", + "publisher_description", + "publisher_refs", + "publisher_thresholds" "publisher_agencies", + "publisher_constraints", + "publisher_organization_type", + "publisher_segmentation", + "license_id", + "state", + "publisher_timeliness", + ] + writer = csv.DictWriter(fp, keys) + writer.writeheader() + for publisher_json in data.ckan_publishers.values(): + writer.writerow({x: publisher_json["result"].get(x) or 0 for x in keys}) + + logger.info("Generating timeliness_frequency.csv") + previous_months = timeliness.previous_months_reversed + with open(filepaths.join_out_path("data/csv/timeliness_frequency.csv"), "w") as fp: + writer = csv.writer(fp) + writer.writerow( + ["Publisher Name", "Publisher Registry Id"] + previous_months + ["Frequency", "First published"] + ) + for ( + publisher, + publisher_title, + per_month, + assessment, + hft, + first_published_band, + ) in timeliness.publisher_frequency_sorted(): + writer.writerow( + [publisher_title, publisher] + + [per_month.get(x) or 0 for x in previous_months] + + [assessment, first_published_band] + ) + + logger.info("Generating timeliness_timelag.csv") + with open(filepaths.join_out_path("data/csv/timeliness_timelag.csv"), "w") as fp: + writer = csv.writer(fp) + writer.writerow(["Publisher Name", "Publisher Registry Id"] + previous_months + ["Time lag"]) + for publisher, publisher_title, per_month, assessment, hft in timeliness.publisher_timelag_sorted(): + writer.writerow( + [publisher_title, publisher] + [per_month.get(x) or 0 for x in previous_months] + [assessment] + ) + + logger.info("Generating forwardlooking.csv") + with open(filepaths.join_out_path("data/csv/forwardlooking.csv"), "w") as fp: + writer = csv.writer(fp) + writer.writerow( + ["Publisher Name", "Publisher Registry Id"] + + [ + "{} ({})".format(header, year) + for header in forwardlooking.column_headers + for year in forwardlooking.years + ] + ) + for row in forwardlooking.table(): + writer.writerow( + [row["publisher_title"], row["publisher"]] + + [ + round_nicely(year_column[year]) + for year_column in row["year_columns"] + for year in forwardlooking.years + ] + ) + + for tab in comprehensiveness.columns.keys(): + logger.info("Generating comprehensiveness_{}.csv".format(tab)) + with open(filepaths.join_out_path("data/csv/comprehensiveness_{}.csv".format(tab)), "w") as fp: + writer = csv.writer(fp) + if tab == "financials": + writer.writerow( + ["Publisher Name", "Publisher Registry Id"] + + [x + " (with valid data)" for x in comprehensiveness.column_headers[tab]] + + [x + " (with any data)" for x in comprehensiveness.column_headers[tab]] + + ["Using budget-not-provided"] + ) + for row in comprehensiveness.table(): + writer.writerow( + [row["publisher_title"], row["publisher"]] + + [ + round_nicely(row[slug + "_valid"]) if slug in row else "-" + for slug in comprehensiveness.column_slugs[tab] + ] + + [ + round_nicely(row[slug]) if slug in row else "-" + for slug in comprehensiveness.column_slugs[tab] + ] + + ["Yes" if row["flag"] else "-"] + ) + else: + writer.writerow( + ["Publisher Name", "Publisher Registry Id"] + + [x + " (with valid data)" for x in comprehensiveness.column_headers[tab]] + + [x + " (with any data)" for x in comprehensiveness.column_headers[tab]] + ) + for row in comprehensiveness.table(): + writer.writerow( + [row["publisher_title"], row["publisher"]] + + [ + round_nicely(row[slug + "_valid"]) if slug in row else "-" + for slug in comprehensiveness.column_slugs[tab] + ] + + [ + round_nicely(row[slug]) if slug in row else "-" + for slug in comprehensiveness.column_slugs[tab] + ] + ) + + logger.info("Generating summary_stats.csv") + with open(filepaths.join_out_path("data/csv/summary_stats.csv"), "w") as fp: + writer = csv.writer(fp) + # Add column headers + writer.writerow( + ["Publisher Name", "Publisher Registry Id"] + [header for slug, header in summary_stats.columns] + ) + for row in summary_stats.table(): + # Write each row + writer.writerow( + [row["publisher_title"], row["publisher"]] + + [ + row[slug] if header == "Publisher Type" else round_nicely(row[slug]) + for slug, header in summary_stats.columns + ] + ) + + logger.info("Generating humanitarian.csv") + with open(filepaths.join_out_path("data/csv/humanitarian.csv"), "w") as fp: + writer = csv.writer(fp) + # Add column headers + writer.writerow( + [ + "Publisher Name", + "Publisher Registry Id", + "Publisher Type", + "Number of Activities", + "Publishing Humanitarian", + "Using Humanitarian Attribute", + "Appeal or Emergency Details", + "Clusters", + "Humanitarian Score", + ] + ) + for row in humanitarian.table(): + writer.writerow( + [ + row["publisher_title"], + row["publisher"], + row["publisher_type"], + row["num_activities"], + round_nicely(row["publishing_humanitarian"]), + round_nicely(row["humanitarian_attrib"]), + round_nicely(row["appeal_emergency"]), + round_nicely(row["clusters"]), + round_nicely(row["average"]), + ] + ) + + +if __name__ == "__main__": + main() diff --git a/dashboard/make_html.py b/dashboard/make_html.py new file mode 100644 index 0000000000..ebe9d89c7e --- /dev/null +++ b/dashboard/make_html.py @@ -0,0 +1,458 @@ +# Script to generate static HTML pages +# This uses Jinja templating to render the HTML templates in the 'templates' folder +# Data is based on the files in the 'stats-calculated' folder, and extra logic in other files in this repository + +import argparse +import json +import re +import subprocess +from collections import defaultdict +from datetime import UTC, datetime + +import licenses +from dateutil import parser +from flask import Flask, Response, abort, render_template, send_from_directory + +import comprehensiveness +import forwardlooking +import humanitarian + +# import coverage +import summary_stats +import text +import timeliness +from data import ( + MAJOR_VERSIONS, + ckan, + ckan_publishers, + codelist_lookup, + codelist_mapping, + codelist_sets, + current_stats, + dataset_to_publisher_dict, + get_publisher_stats, + github_issues, + is_valid_element, + metadata, + publisher_name, + publishers_ordered_by_title, + slugs, +) +from vars import expected_versions + +app = Flask(__name__, static_url_path="") + + +def dictinvert(d): + inv = defaultdict(list) + for k, v in d.items(): + inv[v].append(k) + return inv + + +def nested_dictinvert(d): + inv = defaultdict(lambda: defaultdict(int)) + for k, v in d.items(): + for k2, v2 in v.items(): + inv[k2][k] += v2 + return inv + + +def dataset_to_publisher(dataset_slug): + """Converts a dataset (package) slug e.g. dfid-bd to the corresponding publisher + slug e.g. dfid""" + return dataset_to_publisher_dict.get(dataset_slug, "") + + +def firstint(s): + if s[0].startswith("<"): + return 0 + m = re.search(r"\d+", s[0]) + return int(m.group(0)) + + +def round_nicely(val, ndigits=2): + """Round a float, but remove the trailing .0 from integers that python insists on""" + if int(val) == float(val): + return int(val) + return round(float(val), ndigits) + + +def xpath_to_url(path): + path = path.strip("./") + # remove conditions + path = re.sub(r"\[[^]]+\]", "", path) + if path.startswith("iati-activity"): + url = "http://iatistandard.org/activity-standard/iati-activities/" + path.split("@")[0] + elif path.startswith("iati-organisation"): + url = "http://iatistandard.org/organisation-standard/iati-organisations/" + path.split("@")[0] + else: + url = "http://iatistandard.org/activity-standard/iati-activities/iati-activity/" + path.split("@")[0] + if "@" in path: + url += "#attributes" + return url + + +def registration_agency(orgid): + for code in codelist_sets["2"]["OrganisationRegistrationAgency"]: + if orgid.startswith(code): + return code + + +def get_codelist_values(codelist_values_for_element): + """Return a list of unique values present within a one-level nested dictionary. + Envisaged usage is to gather the codelist values used by each publisher, as in + stats/current/inverted-publisher/codelist_values_by_major_version.json + Input: Set of codelist values for a given element (listed by publisher), for example: + current_stats['inverted_publisher']['codelist_values_by_major_version']['1']['.//@xml:lang'] + """ + return list(set([y for x in codelist_values_for_element.items() for y in list(x[1].keys())])) + + +# Store data processing times +date_time_data_obj = parser.parse(metadata["created_at"]) + +# Custom Jinja filters +app.jinja_env.filters["xpath_to_url"] = xpath_to_url +app.jinja_env.filters["url_to_filename"] = lambda x: x.rstrip("/").split("/")[-1] +app.jinja_env.filters["has_future_transactions"] = timeliness.has_future_transactions +app.jinja_env.filters["round_nicely"] = round_nicely + +# Custom Jinja globals - NOTE: codeforIATI stats URLs have not been +# changed. +app.jinja_env.globals["dataset_to_publisher"] = dataset_to_publisher +app.jinja_env.globals["url"] = lambda x: "/" if x == "index.html" else x +app.jinja_env.globals["datetime_generated"] = lambda: datetime.now(UTC).strftime("%-d %B %Y (at %H:%M %Z)") +app.jinja_env.globals["datetime_data"] = date_time_data_obj.strftime("%-d %B %Y (at %H:%M %Z)") +app.jinja_env.globals["commit_hash"] = ( + subprocess.run("git show --format=%H --no-patch".split(), capture_output=True).stdout.decode().strip() +) +app.jinja_env.globals["stats_commit_hash"] = ( + subprocess.run("git -C stats-calculated show --format=%H --no-patch".split(), capture_output=True) + .stdout.decode() + .strip() +) +app.jinja_env.globals["stats_url"] = "https://stats.codeforiati.org" +app.jinja_env.globals["stats_gh_url"] = ( + "https://github.com/codeforIATI/IATI-Stats-public/tree/" + app.jinja_env.globals["stats_commit_hash"] +) +app.jinja_env.globals["sorted"] = sorted +app.jinja_env.globals["enumerate"] = enumerate +app.jinja_env.globals["top_titles"] = text.top_titles +app.jinja_env.globals["page_titles"] = text.page_titles +app.jinja_env.globals["short_page_titles"] = text.short_page_titles +app.jinja_env.globals["page_leads"] = text.page_leads +app.jinja_env.globals["page_sub_leads"] = text.page_sub_leads +app.jinja_env.globals["top_navigation"] = text.top_navigation +app.jinja_env.globals["navigation"] = text.navigation +app.jinja_env.globals["navigation_reverse"] = {page: k for k, pages in text.navigation.items() for page in pages} +app.jinja_env.globals["navigation_reverse"].update({k: k for k in text.navigation}) +app.jinja_env.globals["current_stats"] = current_stats +app.jinja_env.globals["ckan"] = ckan +app.jinja_env.globals["ckan_publishers"] = ckan_publishers +app.jinja_env.globals["github_issues"] = github_issues +app.jinja_env.globals["publisher_name"] = publisher_name +app.jinja_env.globals["publishers_ordered_by_title"] = publishers_ordered_by_title +app.jinja_env.globals["get_publisher_stats"] = get_publisher_stats +app.jinja_env.globals["set"] = set +app.jinja_env.globals["firstint"] = firstint +app.jinja_env.globals["expected_versions"] = expected_versions +app.jinja_env.globals["current_year"] = datetime.now(UTC).year +# Following variables set in coverage branch but not in master +# app.jinja_env.globals['float'] = float +# app.jinja_env.globals['dac2012'] = dac2012 +app.jinja_env.globals["MAJOR_VERSIONS"] = MAJOR_VERSIONS + +app.jinja_env.globals["slugs"] = slugs +app.jinja_env.globals["codelist_mapping"] = codelist_mapping +app.jinja_env.globals["codelist_sets"] = codelist_sets +app.jinja_env.globals["codelist_lookup"] = codelist_lookup +app.jinja_env.globals["get_codelist_values"] = get_codelist_values +app.jinja_env.globals["is_valid_element"] = is_valid_element + +basic_page_names = [ + "headlines", + "data_quality", + "exploring_data", + "publishers", + "publishing_stats", + "timeliness", + "timeliness_timelag", + "forwardlooking", + "comprehensiveness", + "comprehensiveness_core", + "comprehensiveness_financials", + "comprehensiveness_valueadded", + # 'coverage', + "summary_stats", + "humanitarian", + "files", + "activities", + "download", + "xml", + "validation", + "versions", + "organisation", + "identifiers", + "reporting_orgs", + "elements", + "codelists", + "booleans", + "dates", + "traceability", + "org_ids", + "faq", +] + + +@app.route("/.html") +def basic_page(page_name): + if page_name in basic_page_names: + kwargs = {} + if page_name.startswith("timeliness"): + kwargs["timeliness"] = timeliness + parent_page_name = "timeliness" + elif page_name.startswith("forwardlooking"): + kwargs["forwardlooking"] = forwardlooking + parent_page_name = "forwardlooking" + elif page_name.startswith("comprehensiveness"): + kwargs["comprehensiveness"] = comprehensiveness + parent_page_name = "comprehensiveness" + elif page_name.startswith("coverage"): + # kwargs['coverage'] = coverage + parent_page_name = "coverage" + elif page_name.startswith("summary_stats"): + kwargs["summary_stats"] = summary_stats + parent_page_name = "summary_stats" + elif page_name.startswith("humanitarian"): + kwargs["humanitarian"] = humanitarian + parent_page_name = "humanitarian" + else: + parent_page_name = page_name + return render_template(page_name + ".html", page=parent_page_name, **kwargs) + else: + abort(404) + + +@app.route("/data/download_errors.json") +def download_errors_json(): + return Response(json.dumps(current_stats["download_errors"], indent=2), mimetype="application/json") + + +@app.route("/") +def homepage(): + return render_template("index.html", page="index") + + +app.add_url_rule("/licenses.html", "licenses", licenses.main) +app.add_url_rule("/license/.html", "licenses_individual_license", licenses.individual_license) + + +@app.route("/publisher/.html") +def publisher(publisher): + publisher_stats = get_publisher_stats(publisher) + try: + budget_table = [ + { + "year": "Total", + "count_total": sum(sum(x.values()) for x in publisher_stats["count_budgets_by_type_by_year"].values()), + "sum_total": { + currency: sum(sums.values()) + for by_currency in publisher_stats["sum_budgets_by_type_by_year"].values() + for currency, sums in by_currency.items() + }, + "count_original": ( + sum(publisher_stats["count_budgets_by_type_by_year"]["1"].values()) + if "1" in publisher_stats["count_budgets_by_type_by_year"] + else None + ), + "sum_original": ( + {k: sum(v.values()) for k, v in publisher_stats["sum_budgets_by_type_by_year"]["1"].items()} + if "1" in publisher_stats["sum_budgets_by_type_by_year"] + else None + ), + "count_revised": ( + sum(publisher_stats["count_budgets_by_type_by_year"]["2"].values()) + if "2" in publisher_stats["count_budgets_by_type_by_year"] + else None + ), + "sum_revised": ( + {k: sum(v.values()) for k, v in publisher_stats["sum_budgets_by_type_by_year"]["2"].items()} + if "2" in publisher_stats["sum_budgets_by_type_by_year"] + else None + ), + } + ] + [ + { + "year": year, + "count_total": sum( + x[year] for x in publisher_stats["count_budgets_by_type_by_year"].values() if year in x + ), + "sum_total": { + currency: sums.get(year) + for by_currency in publisher_stats["sum_budgets_by_type_by_year"].values() + for currency, sums in by_currency.items() + }, + "count_original": ( + publisher_stats["count_budgets_by_type_by_year"]["1"].get(year) + if "1" in publisher_stats["count_budgets_by_type_by_year"] + else None + ), + "sum_original": ( + {k: v.get(year) for k, v in publisher_stats["sum_budgets_by_type_by_year"]["1"].items()} + if "1" in publisher_stats["sum_budgets_by_type_by_year"] + else None + ), + "count_revised": ( + publisher_stats["count_budgets_by_type_by_year"]["2"].get(year) + if "2" in publisher_stats["count_budgets_by_type_by_year"] + else None + ), + "sum_revised": ( + {k: v.get(year) for k, v in publisher_stats["sum_budgets_by_type_by_year"]["2"].items()} + if "2" in publisher_stats["sum_budgets_by_type_by_year"] + else None + ), + } + for year in sorted( + set(sum((list(x.keys()) for x in publisher_stats["count_budgets_by_type_by_year"].values()), [])) + ) + ] + failure_count = len(current_stats["inverted_file_publisher"][publisher]["validation"].get("fail", {})) + except KeyError: + abort(404) + return render_template( + "publisher.html", + publisher=publisher, + publisher_stats=publisher_stats, + failure_count=failure_count, + publisher_inverted=get_publisher_stats(publisher, "inverted-file"), + publisher_licenses=licenses.licenses_for_publisher(publisher), + budget_table=budget_table, + ) + + +@app.route("/codelist//.html") +def codelist(major_version, slug): + i = slugs["codelist"][major_version]["by_slug"][slug] + element = list(current_stats["inverted_publisher"]["codelist_values_by_major_version"][major_version])[i] + values = nested_dictinvert( + list(current_stats["inverted_publisher"]["codelist_values_by_major_version"][major_version].values())[i] + ) + return render_template( + "codelist.html", + element=element, + values=values, + reverse_codelist_mapping={ + major_version: dictinvert(mapping) for major_version, mapping in codelist_mapping.items() + }, + major_version=major_version, + page="codelists", + ) + + +@app.route("/element/.html") +def element(slug): + i = slugs["element"]["by_slug"][slug] + element = list(current_stats["inverted_publisher"]["elements"])[i] + publishers = list(current_stats["inverted_publisher"]["elements"].values())[i] + return render_template( + "element.html", + element=element, + publishers=publishers, + element_or_attribute="attribute" if "@" in element else "element", + page="elements", + ) + + +@app.route("/org_type/.html") +def org_type(slug): + assert slug in slugs["org_type"]["by_slug"] + return render_template("org_type.html", slug=slug, page="org_ids") + + +@app.route("/registration_agencies.html") +def registration_agencies(): + registration_agencies = defaultdict(int) + registration_agencies_publishers = defaultdict(list) + nonmatching = [] + for orgid, publishers in current_stats["inverted_publisher"]["reporting_orgs"].items(): + reg_ag = registration_agency(orgid) + if reg_ag: + registration_agencies[reg_ag] += 1 + registration_agencies_publishers[reg_ag] += list(publishers) + else: + nonmatching.append((orgid, publishers)) + return render_template( + "registration_agencies.html", + page="registration_agencies", + registration_agencies=registration_agencies, + registration_agencies_publishers=registration_agencies_publishers, + nonmatching=nonmatching, + ) + + +@app.route( + '/' +) +def serve_images_development(filename): + """Serve static images through the development server (--live)""" + return send_from_directory("static/", filename) + + +@app.route('/') +def serve_css_development(filename): + """Serve static css through the development server (--live)""" + return send_from_directory("static/", filename) + + +@app.route("/favicon.ico") +def favicon_root(): + """Serve favicon from img folder when requested from root""" + return send_from_directory("static/img", "favicon.ico") + + +@app.route("/.csv") +def csv_development(name): + return send_from_directory("out", name + ".csv") + + +@app.route("/publisher_imgs/.png") +def image_development_publisher(image): + return send_from_directory("out/publisher_imgs", image + ".png") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--live", action="store_true", help="Run a development server") + args = parser.parse_args() + if args.live: + app.debug = True + app.run() + else: + from flask_frozen import Freezer + + app.config["FREEZER_DESTINATION"] = "out" + app.config["FREEZER_REMOVE_EXTRA_FILES"] = False + app.config["FREEZER_IGNORE_404_NOT_FOUND"] = True + app.debug = False # Comment to turn off debugging + app.testing = True # Comment to turn off debugging + freezer = Freezer(app) + + @freezer.register_generator + def url_generator(): + for page_name in basic_page_names: + yield "basic_page", {"page_name": page_name} + for publisher in current_stats["inverted_publisher"]["activities"].keys(): + yield "publisher", {"publisher": publisher} + for slug in slugs["element"]["by_slug"]: + yield "element", {"slug": slug} + for major_version, codelist_slugs in slugs["codelist"].items(): + for slug in codelist_slugs["by_slug"]: + yield "codelist", {"slug": slug, "major_version": major_version} + for slug in slugs["org_type"]["by_slug"]: + yield "org_type", {"slug": slug} + for license in set(licenses.licenses): + yield "licenses_individual_license", {"license": license} + + freezer.freeze() diff --git a/dashboard/make_plots.py b/dashboard/make_plots.py new file mode 100644 index 0000000000..a8e3e98f83 --- /dev/null +++ b/dashboard/make_plots.py @@ -0,0 +1,227 @@ +#!/usr/bin/env python +""" Generates static images of stats graphs using matplotlib. +""" + +import argparse +import csv +import datetime +import logging +import os # noqa: F401 +from collections import defaultdict + +import matplotlib as mpl +import numpy as np # noqa: F401 +from tqdm import tqdm + +import common +import data +import filepaths +from vars import expected_versions # noqa: F401 + +mpl.use("Agg") +import matplotlib.dates as mdates # noqa: E402 +import matplotlib.pyplot as plt # noqa: E402 + +logger = logging.getLogger(__name__) + + +class AugmentedJSONDir(data.JSONDir): + def __init__(self, folder, failed_downloads, gitaggregate_publisher): + super().__init__(folder) + self.failed_downloads = failed_downloads + self.gitaggregate_publisher = gitaggregate_publisher + + def __getitem__(self, key): + if key == "failed_downloads": + return dict((row[0], row[1]) for row in self.failed_downloads) + elif key == "publisher_types": + out = defaultdict(lambda: defaultdict(int)) + for publisher, publisher_data in self.gitaggregate_publisher.items(): + if publisher in data.ckan_publishers: + organization_type = common.get_publisher_type(publisher)["name"] + for datestring, count in publisher_data["activities"].items(): + out[datestring][organization_type] += 1 + else: + logger.debug("Getting by publisher_type unmatched publisher <{}>".format(publisher)) + return out + elif key == "activities_per_publisher_type": + out = defaultdict(lambda: defaultdict(int)) + for publisher, publisher_data in self.gitaggregate_publisher.items(): + if publisher in data.ckan_publishers: + organization_type = common.get_publisher_type(publisher)["name"] + for datestring, count in publisher_data["activities"].items(): + out[datestring][organization_type] += count + else: + logger.debug("Getting by activities_per_publisher_type unmatched publisher <{}>".format(publisher)) + return out + else: + return super(AugmentedJSONDir, self).__getitem__(key) + + +def make_plot(stat_path, git_stats, img_prefix=""): + if type(stat_path) is tuple: + stat_name = stat_path[0] + else: + stat_name = stat_path + + stat_dict = git_stats.get(stat_name) + if not stat_dict: + return + items = sorted(stat_dict.items()) + x_values = [datetime.date(int(x[0:4]), int(x[5:7]), int(x[8:10])) for x, y in items] + if type(stat_path) is tuple: + y_values = [dict((k, v) for k, v in y.items() if stat_path[1](k)) for x, y in items] + else: + y_values = [float(y) for x, y in items] + + # years = mdates.YearLocator() # every year + # months = mdates.MonthLocator() # every month + datefmt = mdates.DateFormatter("%Y-%m-%d") + + fig, ax = plt.subplots() + ax.set_prop_cycle("color", ["b", "g", "r", "c", "m", "y", "k", "#00ff00", "#fc5ab8", "#af31f2"]) + fig_legend = plt.figure() + dpi = 96 + fig.set_size_inches(600.0 / dpi, 600.0 / dpi) + + if type(y_values[0]) is dict: + keys = set([tm for y in y_values for tm in y.keys()]) + plots = {} + for key in keys: + (plots[key],) = ax.plot(x_values, [y.get(key) or 0 for y in y_values]) + if stat_name in ["publisher_types", "activities_per_publisher_type"]: + # Sort by the most recent value for the key + sorted_items = sorted(plots.items(), key=lambda x: y_values[-1][x[0]], reverse=True) + fig_legend.legend([x[1] for x in sorted_items], [x[0] for x in sorted_items], loc="center", ncol=1) + fig_legend.set_size_inches(600.0 / dpi, 300.0 / dpi) + else: + fig_legend.legend(plots.values(), plots.keys(), loc="center", ncol=4) + fig_legend.set_size_inches(600.0 / dpi, 100.0 / dpi) + fig_legend.savefig(filepaths.join_out_path("{0}{1}{2}_legend.png".format(img_prefix, stat_name, stat_path[2]))) + else: + keys = None + ax.plot(x_values, y_values) + + # format the ticks + # ax.xaxis.set_major_locator(years) + ax.xaxis.set_major_formatter(datefmt) + # ax.xaxis.set_minor_locator(months) + + # datemin = datetime.date(r.date.min().year, 1, 1) + # datemax = datetime.date(r.date.max().year+1, 1, 1) + # ax.set_xlim(datemin, datemax) + + # format the coords message box + # def price(x): return '$%1.2f'%x + # ax.format_ydata = price + ax.xaxis_date() + ax.format_xdata = mdates.DateFormatter("%Y-%m-%d") + ax.grid(True) + + # rotates and right aligns the x labels, and moves the bottom of the + # axes up to make room for them + fig.autofmt_xdate() + + ax.ticklabel_format(axis="y", style="plain", useOffset=False) + + fig.savefig( + filepaths.join_out_path( + "{0}{1}{2}.png".format(img_prefix, stat_name, stat_path[2] if type(stat_path) is tuple else "") + ), + dpi=dpi, + ) + plt.close("all") + + fn = filepaths.join_out_path("{0}{1}.csv".format(img_prefix, stat_name)) + with open(fn, "w") as fp: + writer = csv.writer(fp) + if keys: + sorted_keys = sorted(list(keys)) + writer.writerow(["date"] + sorted_keys) + else: + writer.writerow(["date", "value"]) + for k, v in items: + if keys: + writer.writerow([k] + [v.get(key) for key in sorted_keys]) + else: + writer.writerow([k, v]) + del writer + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--verbose", action="store_true", help="Generate images verbosely to stdout") + args = parser.parse_args() + + # Load data required for loading stats. + failed_downloads = csv.reader(open(filepaths.join_data_path("downloads/history.csv"))) + gitaggregate_publisher = data.JSONDir(filepaths.join_stats_path("gitaggregate-publisher-dated")) + + # Generate plots for aggregated stats for all data. + logger.info("Generating plots for all aggregated data") + git_stats = AugmentedJSONDir( + filepaths.join_stats_path("gitaggregate-dated"), failed_downloads, gitaggregate_publisher + ) + os.makedirs(filepaths.join_out_path("img/aggregate"), exist_ok=True) + + _paths = [ + "activities", + "publishers", + "activity_files", + "organisation_files", + "file_size", + "failed_downloads", + "invalidxml", + "nonstandardroots", + "unique_identifiers", + ("validation", lambda x: x == "fail", ""), + ("publishers_validation", lambda x: x == "fail", ""), + ("publisher_has_org_file", lambda x: x == "no", ""), + ("versions", lambda x: x in expected_versions, "_expected"), + ("versions", lambda x: x not in expected_versions, "_other"), + ("publishers_per_version", lambda x: x in expected_versions, "_expected"), + ("publishers_per_version", lambda x: x not in expected_versions, "_other"), + ("file_size_bins", lambda x: True, ""), + ("publisher_types", lambda x: True, ""), + ("activities_per_publisher_type", lambda x: True, ""), + ] + with tqdm(total=len(_paths)) as pbar: + if args.verbose: + pbar.set_description("Generate aggregate plots") + for stat_path in _paths: + if args.verbose: + pbar.update() + make_plot(stat_path, git_stats, img_prefix="img/aggregate/") + + # Delete git_stats variable to save memory + del git_stats + + # Generate plots for each publisher. + logger.info("Generating plots for all publishers") + git_stats_publishers = AugmentedJSONDir( + filepaths.join_stats_path("gitaggregate-publisher-dated/"), failed_downloads, gitaggregate_publisher + ) + os.makedirs(filepaths.join_out_path("img/publishers"), exist_ok=True) + + with tqdm(total=len(git_stats_publishers)) as pbar: + if args.verbose: + pbar.set_description("Generate plots for all publishers") + for publisher, git_stats_publisher in git_stats_publishers.items(): + if args.verbose: + pbar.update() + for stat_path in [ + "activities", + "activity_files", + "organisation_files", + "file_size", + "invalidxml", + "nonstandardroots", + "publisher_unique_identifiers", + ("validation", lambda x: x == "fail", ""), + ("versions", lambda x: True, ""), + ]: + make_plot(stat_path, git_stats_publisher, img_prefix="img/publishers/{0}_".format(publisher)) + + +if __name__ == "__main__": + main() diff --git a/dashboard/manage.py b/dashboard/manage.py new file mode 100755 index 0000000000..f0f848ecc1 --- /dev/null +++ b/dashboard/manage.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +"""Django's command-line utility for administrative tasks.""" +import os +import sys + + +def main(): + """Run administrative tasks.""" + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ui.settings") + try: + from django.core.management import execute_from_command_line + except ImportError as exc: + raise ImportError( + "Couldn't import Django. Are you sure it's installed and " + "available on your PYTHONPATH environment variable? Did you " + "forget to activate a virtual environment?" + ) from exc + execute_from_command_line(sys.argv) + + +if __name__ == "__main__": + main() diff --git a/dashboard/speakers_kit.py b/dashboard/speakers_kit.py new file mode 100644 index 0000000000..7529252ce5 --- /dev/null +++ b/dashboard/speakers_kit.py @@ -0,0 +1,109 @@ +import csv +import json +from collections import defaultdict +from itertools import zip_longest + +import data +import filepaths + + +def codelist_dict(codelist_path): + codelist_json = json.load(open(codelist_path)) + return {c["code"]: c["name"] for c in codelist_json["data"]} + + +organisation_type_dict = codelist_dict( + filepaths.join_data_path("IATI-Codelists-2/out/clv2/json/en/OrganisationType.json") +) +country_dict = codelist_dict(filepaths.join_data_path("IATI-Codelists-2/out/clv2/json/en/Country.json")) +region_dict = codelist_dict(filepaths.join_data_path("IATI-Codelists-2/out/clv2/json/en/Region.json")) + +aggregated_publisher = data.JSONDir(filepaths.join_stats_path("current/aggregated-publisher/")) + +activities_by = defaultdict(lambda: defaultdict(int)) +publishers_by = defaultdict(lambda: defaultdict(int)) + +for publisher, publisher_data in aggregated_publisher.items(): + if publisher in data.ckan_publishers: + organization_type = data.ckan_publishers[publisher]["result"]["publisher_organization_type"] + # activities_by['type'][organisation_type_dict[organization_type]] += publisher_data['activities'] + publishers_by["type"][organisation_type_dict[organization_type]] += 1 + + publisher_country_code = data.ckan_publishers[publisher]["result"]["publisher_country"] + if publisher_country_code in country_dict or publisher_country_code in region_dict: + publishers_by["country"][ + country_dict.get(publisher_country_code) or region_dict.get(publisher_country_code) + ] += 1 + else: + print("Unrecognised registry publisher_country code: ", publisher_country_code) + activity_countries = publisher_data["codelist_values"].get(".//recipient-country/@code") + if activity_countries: + for code, count in activity_countries.items(): + if code and code in country_dict: + activities_by["country"][country_dict.get(code)] += count + activity_regions = publisher_data["codelist_values"].get(".//recipient-region/@code") + if activity_regions: + for code, count in activity_regions.items(): + if code and code in region_dict: + activities_by["region"][region_dict.get(code)] += count + else: + print("Publisher not matched:", publisher) + +fieldnames = [ + "publisher_type", + "publishers_by_type", + "", + "publisher_country", + "publishers_by_country", + "", + "date", + "publishers_quarterly", + "", + "activity_country", + "activities_by_country", + "", + "activity_region", + "activities_by_region", +] + +publishers_quarterly = [] +publishers_by_date = json.load(open(filepaths.join_stats_path("gitaggregate-dated/publishers.json"))) +for date, publishers in sorted(publishers_by_date.items()): + if (date[8:10] == "30" and date[5:7] in ["06", "09"]) or (date[8:10] == "31" and date[5:7] in ["03", "12"]): + publishers_quarterly.append((date, publishers)) + + +def sort_second(x): + return sorted(x, key=lambda y: y[1], reverse=True) + + +with open(filepaths.join_out_path("speakers_kit.csv"), "w") as fp: + writer = csv.DictWriter(fp, fieldnames) + writer.writeheader() + for ( + publishers_by_type, + publishers_by_country, + publishers_quarterly_, + activities_by_country, + activities_by_region, + ) in zip_longest( + sort_second(publishers_by["type"].items()), + sort_second(publishers_by["country"].items()), + publishers_quarterly, + sort_second(activities_by["country"].items()), + sort_second(activities_by["region"].items()), + ): + writer.writerow( + { + "publisher_type": publishers_by_type[0] if publishers_by_type else "", + "publishers_by_type": publishers_by_type[1] if publishers_by_type else "", + "publisher_country": publishers_by_country[0] if publishers_by_country else "", + "publishers_by_country": publishers_by_country[1] if publishers_by_country else "", + "date": publishers_quarterly_[0] if publishers_quarterly_ else "", + "publishers_quarterly": publishers_quarterly_[1] if publishers_quarterly_ else "", + "activity_country": activities_by_country[0] if activities_by_country else "", + "activities_by_country": activities_by_country[1] if activities_by_country else "", + "activity_region": activities_by_region[0] if activities_by_region else "", + "activities_by_region": activities_by_region[1] if activities_by_region else "", + } + ) diff --git a/dashboard/static/.nojekyll b/dashboard/static/.nojekyll new file mode 100644 index 0000000000..e69de29bb2 diff --git a/dashboard/static/img/favicon-16x16.png b/dashboard/static/img/favicon-16x16.png new file mode 100644 index 0000000000..dab68d1b0f Binary files /dev/null and b/dashboard/static/img/favicon-16x16.png differ diff --git a/dashboard/static/img/favicon-32x32.png b/dashboard/static/img/favicon-32x32.png new file mode 100644 index 0000000000..605e8f2e88 Binary files /dev/null and b/dashboard/static/img/favicon-32x32.png differ diff --git a/dashboard/static/img/favicon.ico b/dashboard/static/img/favicon.ico new file mode 100644 index 0000000000..40b5194de1 Binary files /dev/null and b/dashboard/static/img/favicon.ico differ diff --git a/static/img/tablesorter-icons.gif b/dashboard/static/img/tablesorter-icons.gif similarity index 100% rename from static/img/tablesorter-icons.gif rename to dashboard/static/img/tablesorter-icons.gif diff --git a/dashboard/static/style.css b/dashboard/static/style.css new file mode 100644 index 0000000000..231cfb2f4b --- /dev/null +++ b/dashboard/static/style.css @@ -0,0 +1,111 @@ +html { + scroll-behavior: smooth; +} + +em { + font-style: italic; +} + +.big-number { + font-size: 100px; +} + +.title-number { + font-weight: bold; + float: left; + width: 20%; +} + +.title-text { + float: left; +} + +.panel-heading { + clear: both; +} + +.break { + word-break:break-all; +} + +.panel-body h4, .panel-body h5 { + margin-top: 24px; +} + +.panel-body h4:first-of-type, .panel-body h5:first-of-type { + margin-top: 10px; +} + +.table > tbody > tr > td, +.table > tbody > tr > th, +.table > tfoot > tr > td, +.table > tfoot > tr > th, +.table > thead > tr > td, +.table > thead > tr > th { + vertical-align: middle; +} + +.tablesorter-default .tablesorter-header { + background-image: url(data:image/gif;base64,R0lGODlhFQAJAIAAACMtMP///yH5BAEAAAEALAAAAAAVAAkAAAIXjI+AywnaYnhUMoqt3gZXPmVg94yJVQAAOw==); + background-position: center left; + background-repeat: no-repeat; + cursor: pointer; +} + +.tablesorter-default thead .headerSortUp, +.tablesorter-default thead .tablesorter-headerSortUp, +.tablesorter-default thead .tablesorter-headerAsc { + background-image: url(data:image/gif;base64,R0lGODlhFQAEAIAAACMtMP///yH5BAEAAAEALAAAAAAVAAQAAAINjI8Bya2wnINUMopZAQA7); + border-bottom: #000 2px solid; +} + +.tablesorter-default thead .headerSortDown, +.tablesorter-default thead .tablesorter-headerSortDown, +.tablesorter-default thead .tablesorter-headerDesc { + background-image: url(data:image/gif;base64,R0lGODlhFQAEAIAAACMtMP///yH5BAEAAAEALAAAAAAVAAQAAAINjB+gC+jP2ptn0WskLQA7); + border-bottom: #000 2px solid; +} + +.tablesorter-header-inner { + margin-left:10px; +} + +.tablesorter-headerRow { + background-color:white; +} + +html,body { + height:100%; +} + +#wrap { + min-height:100%; + height:auto; + margin:0 auto -60px; + padding:0 0 60px; +} + +#footer { + background-color:#f5f5f5; + margin-top: 30px +} + +.container .text-muted { + margin: 20px 0; +} + +.navbar-default { + margin-bottom: 0px; +} + +.navbar-brand { + padding: 10px; +} + +.navbar-brand img { + height: 30px; +} + +a.popover-html:focus { + outline: none; +} diff --git a/dashboard/summary_stats.py b/dashboard/summary_stats.py new file mode 100644 index 0000000000..9c099c064c --- /dev/null +++ b/dashboard/summary_stats.py @@ -0,0 +1,120 @@ +# This file converts a range of transparency data to percentages + +import common +import comprehensiveness +import forwardlooking +import timeliness +from cache import json_cache +from data import get_publisher_stats, publishers_ordered_by_title, secondary_publishers +from ui.jinja2 import round_nicely + +# Set column groupings, to be displayed in the user output +columns = [ + # slug, header + ("publisher_type", "Publisher Type"), + ("timeliness", "Timeliness"), + ("forwardlooking", "Forward looking"), + ("comprehensiveness", "Comprehensiveness"), + ("score", "Score"), +] + + +def is_number(s): + """@todo Document this function""" + try: + float(s) + return True + except ValueError: + return False + + +def convert_to_float(x): + """@todo Document this function""" + if is_number(x): + return float(x) + else: + return 0 + + +@json_cache("summary_stats.json") +def table(): + """Generate data for the publisher forward-looking table""" + + # Store timeliness data in variable + timeliness_frequency_data = timeliness.publisher_frequency_dict() + timeliness_timelag_data = timeliness.publisher_timelag_dict() + + # Loop over each publisher + for publisher_title, publisher in publishers_ordered_by_title: + + # Store the data for this publisher as a new variable + get_publisher_stats(publisher) + + # Skip if all activities from this publisher are secondary reported + if publisher in secondary_publishers: + continue + + # Create a list for publisher data, and populate it with basic data + row = {} + row["publisher"] = publisher + row["publisher_title"] = publisher_title + row["publisher_type"] = common.get_publisher_type(publisher)["name"] + + # Compute timeliness statistic + # Assign frequency score + # Get initial frequency assessment, or use empty set in the case where the publisher is not found + frequency_assessment_data = timeliness_frequency_data.get(publisher, ()) + frequency_assessment = None if len(frequency_assessment_data) < 4 else frequency_assessment_data[3] + if frequency_assessment == "Monthly": + frequency_score = 4 + elif frequency_assessment == "Quarterly": + frequency_score = 3 + elif frequency_assessment == "Six-Monthly": + frequency_score = 2 + elif frequency_assessment == "Annual": + frequency_score = 1 + else: # frequency_assessment == 'Less than Annual' or something else! + frequency_score = 0 + + # Assign timelag score + # Get initial timelag assessment, or use empty set in the case where the publisher is not found + timelag_assessment_data = timeliness_timelag_data.get(publisher, ()) + timelag_assessment = None if len(timelag_assessment_data) < 4 else timelag_assessment_data[3] + if timelag_assessment == "One month": + timelag_score = 4 + elif timelag_assessment == "A quarter": + timelag_score = 3 + elif timelag_assessment == "Six months": + timelag_score = 2 + elif timelag_assessment == "One year": + timelag_score = 1 + else: # timelag_assessment == 'More than one year' or something else! + timelag_score = 0 + + # Compute the percentage + row["timeliness"] = round_nicely((float(frequency_score + timelag_score) / 8) * 100) + + # Compute forward-looking statistic + # Get the forward-looking data for this publisher + publisher_forwardlooking_data = forwardlooking.generate_row(publisher) + + # Convert the data for this publishers 'Percentage of current activities with budgets' fields into integers + numbers = [int(x) for x in publisher_forwardlooking_data["year_columns"][2].values() if is_number(x)] + + # Compute and store the mean average for these fields + row["forwardlooking"] = round_nicely( + sum(int(round(y)) for y in numbers) / len(publisher_forwardlooking_data["year_columns"][2]) + ) + + # Compute comprehensiveness statistic + # Get the comprehensiveness data for this publisher + publisher_comprehensiveness_data = comprehensiveness.generate_row(publisher) + + # Set the comprehensiveness value to be the summary average for valid data + row["comprehensiveness"] = convert_to_float(publisher_comprehensiveness_data["summary_average_valid"]) + + # Compute score + row["score"] = round_nicely(float(row["timeliness"] + row["forwardlooking"] + row["comprehensiveness"]) / 3) + + # Return a generator object + yield row diff --git a/dashboard/templates/_partials/boxes.html b/dashboard/templates/_partials/boxes.html new file mode 100644 index 0000000000..7132de6ed9 --- /dev/null +++ b/dashboard/templates/_partials/boxes.html @@ -0,0 +1,30 @@ +{% macro box(title, number, image, json, legend, folderextra, description) %} +
+
+
+

+ {% set title_id = title.replace(' ', '-').lower() %} + {% if number is number %}{{ "{:,}".format(number) }}{% else %}{{ number }}{% endif %} + {{ title }} +

+ {% if json %} + {% if folderextra %} + (J) + {% else %} + (J) + {% endif %} + {% endif %} +
+
+
+ {% if description %} +

{{ description|safe }}

+ {% endif %} + + {% if legend %} + + {% endif %} +
+
+
+{% endmacro %} diff --git a/dashboard/templates/_partials/org_id_table_cells.html b/dashboard/templates/_partials/org_id_table_cells.html new file mode 100644 index 0000000000..e572263206 --- /dev/null +++ b/dashboard/templates/_partials/org_id_table_cells.html @@ -0,0 +1,12 @@ +{{ '{:,}'.format(transaction_stats.total_orgs or 0) }} +{{ '{:,}'.format(transaction_stats.total_refs or 0) }} +{{ '{:,}'.format(transaction_stats.total_full_refs or 0) }} +{% set self_refs = (transaction_stats.total_full_refs or 0) - (transaction_stats.total_notself_refs or 0) %} +{{ '{:,}'.format(self_refs) }} +{% set total_org_elements_excluding_self_refs = (transaction_stats.total_orgs or 0) - self_refs %} +{{ '{:,}'.format(total_org_elements_excluding_self_refs) }} +{% set total_refs_excluding_self_refs = (transaction_stats.total_refs or 0) - self_refs %} +{{ '{:,}'.format(total_refs_excluding_self_refs) }} +{{ '{:,}'.format(transaction_stats.total_notself_refs or 0) }} +{{ '{:,}'.format(transaction_stats.total_valid_refs or 0) }} +{{ (((transaction_stats.total_valid_refs or 0) / total_org_elements_excluding_self_refs * 100) if total_org_elements_excluding_self_refs else 0) | round_nicely }} diff --git a/dashboard/templates/_partials/org_id_table_header.html b/dashboard/templates/_partials/org_id_table_header.html new file mode 100644 index 0000000000..a65d3c4856 --- /dev/null +++ b/dashboard/templates/_partials/org_id_table_header.html @@ -0,0 +1,16 @@ + Total + Self Refs + Excluding Self Refs + {% if extra_column %} + + {% endif %} + + + Org Elements + Refs + Non-Empty Refs + Org Elements + Refs + Non-Empty Refs + Valid Refs + Percentage of Org Elements with Valid Refs diff --git a/dashboard/templates/_partials/tablesorter_instructions.html b/dashboard/templates/_partials/tablesorter_instructions.html new file mode 100644 index 0000000000..aacea903f5 --- /dev/null +++ b/dashboard/templates/_partials/tablesorter_instructions.html @@ -0,0 +1 @@ +

Click the icons to sort the table by a column. Selecting further columns whilst holding the shift key will enable secondary (tertiary etc) sorting by the desired column/s.

\ No newline at end of file diff --git a/dashboard/templates/activities.html b/dashboard/templates/activities.html new file mode 100644 index 0000000000..b462c6104c --- /dev/null +++ b/dashboard/templates/activities.html @@ -0,0 +1,14 @@ +{% extends 'base.html' %} +{% import '_partials/boxes.html' as boxes with context %} +{% block content %} +
+ {{ boxes.box('Total activities', current_stats.aggregated.activities, 'img/aggregate/activities.png', 'activities.json', + description='Total count of activities across all publishers, over time. + Note: this includes activities with duplicate iati-identifier') }} + {{ boxes.box('Unique Activities', current_stats.aggregated.unique_identifiers, 'img/aggregate/unique_identifiers.png', 'unique_identifiers.json', + description='Total count of unique activities across all publishers, over time + Note: this excludes counts of duplicate iati-identifier') }} + {{ boxes.box('Activities by publisher type', '', 'img/aggregate/activities_per_publisher_type.png', None, 'img/aggregate/activities_per_publisher_type_legend.png', + description='Count of all activities, aggregated by publisher type, over time.') }} +
+{% endblock %} diff --git a/dashboard/templates/base.html b/dashboard/templates/base.html new file mode 100644 index 0000000000..18613ee4fb --- /dev/null +++ b/dashboard/templates/base.html @@ -0,0 +1,97 @@ + + + + + + + + + + IATI Dashboard – {% block title %}{{ page_titles[page] }}{% endblock %} + + + + + + {% block extrahead %}{% endblock %} + + +
+ + {% if page_titles[page] == "Data Quality" %} +
+ Organisations who publish IATI data are encouraged to use IATI’s new Validator tool to understand how to improve the quality of their data. See more information. +
+ {% endif %} +
+ {% block container %} + {% block page_header_div %} + + {% endblock %} + + {% block content %} + {% endblock %} + + {% endblock %} +
+ +
+ + + + + + + +{% block tablesorterscript %}{% endblock %} + {% block extrafooter %}{% endblock %} + + diff --git a/dashboard/templates/booleans.html b/dashboard/templates/booleans.html new file mode 100644 index 0000000000..8ad17c7e12 --- /dev/null +++ b/dashboard/templates/booleans.html @@ -0,0 +1,41 @@ +{% extends 'base.html' %} +{% import '_partials/boxes.html' as boxes with context%} + +{% block content %} +
+
+

(This table as JSON)

+ +
+
+

List of values used by publishers for attributes that should be valid XML booleans (ie. true, false, 0 or 1).

+ {% include '_partials/tablesorter_instructions.html' %} +
+ + + + {% for boolean, values in current_stats.inverted_publisher.boolean_values.items() %} + {% for value, publishers in values.items() %} + + + + + + {% endfor %} + {% endfor %} + +
ElementValuesPublishers
{{ boolean }}{{ value }}{% for publisher in publishers %}{{ publisher }} {% endfor %}
+
+
+
+{% endblock %} + +{% block tablesorteroptions %}{ + textExtraction:{ + 1: function(node,table,cellIndex) { + if (['1', 'true'].indexOf($(node).text()) !== -1) return 1; + if (['0', 'false'].indexOf($(node).text()) !== -1) return 0; + return -1; + } + } +}{% endblock %} diff --git a/dashboard/templates/codelist.html b/dashboard/templates/codelist.html new file mode 100644 index 0000000000..a90b8b40c9 --- /dev/null +++ b/dashboard/templates/codelist.html @@ -0,0 +1,88 @@ +{% extends 'base.html' %} +{% import '_partials/boxes.html' as boxes with context %} + +{% block page_header %} +

Codelist values used for {{ element }}

+

Who uses {{ codelist_mapping[major_version].get(element) }} in {{ element }}?

+

(for files published to version {{ major_version }}.xx of the standard)

+

(This page in JSON format)

+

Values should be on the {{ codelist_mapping[major_version].get(element) }} codelist.

+{% endblock %} + +{% block content %} +
+
+

+ {% with elements=reverse_codelist_mapping[major_version][codelist_mapping[element]] %} + {% if elements|count > 1 %} + Other elements/attributes on this codelist: +

    + {% for el in elements %} + {% if el in current_stats.inverted_publisher.codelist_values[major_version].keys() %} + {% if el!=element %}
  • {{ el }}
  • {% endif %} + {% endif %} + {% endfor %} +
+ {% endif %} + {% endwith %}

+
+
+ +
+
+
+
+

On Codelist

+
+
+

Codes that are on the {{ codelist_mapping[major_version].get(element) }} codelist.

+
+ + + + {% for value, publishers in values.items() %} + {% if value in codelist_sets[major_version].get(codelist_mapping[major_version].get(element)) %} + + + + {% endif %} + {% endfor %} + +
ValueNamePublishers
{{ value }}{{ codelist_lookup[major_version].get(codelist_mapping[major_version].get(element))[value]['name'] }} + {{ publishers|length }} +
+
+
+ +
+
+
+

Not On Codelist

+
+
+

Codes that are not on the {{ codelist_mapping[major_version].get(element) }} codelist.

+
+ + + + {% for value, publishers in values.items() %} + {% if not value in codelist_sets[major_version].get(codelist_mapping[major_version].get(element)) %} + + {% endif %} + {% endfor %} + +
ValuePublishers
{{ value }} + {{ publishers|length }} +
+
+
+
+{% endblock %} + +{% block extrafooter %} + +{% endblock %} diff --git a/dashboard/templates/codelists.html b/dashboard/templates/codelists.html new file mode 100644 index 0000000000..f1d516fbe6 --- /dev/null +++ b/dashboard/templates/codelists.html @@ -0,0 +1,50 @@ +{% extends 'base.html' %} +{% import '_partials/boxes.html' as boxes with context %} +{% block content %} +
+
+

Count of the different values used by all publishers, per codelist.

+

Note: some Elements/Attributes may use the same codelist. In each case, the values counted are different for each use.

+ {% include '_partials/tablesorter_instructions.html' %} + + {% for major_version in MAJOR_VERSIONS %} +
+
+

Codelists for version {{ major_version }}.xx

+
+ {% if major_version not in current_stats.inverted_publisher.codelist_values_by_major_version %} + There are no publishers using {{ major_version }}.xx codelists yet. + {% else %} + + + + + + + + + + + {% for i, (element, values) in func.enumerate(current_stats.inverted_publisher.codelist_values_by_major_version[major_version].items()) %} + + + + + + {% with codes=func.sorted(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)).intersection(func.get_codelist_values(values))) %} + + {% endwith %} + {% with codes=func.sorted(func.set(func.get_codelist_values(values)).difference(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)))) %} + + {% endwith %} + + {% endfor %} + +
Element/Attribute on codelistCodelistTotal Values Used (J)Total Values on CodelistValues used, on CodelistValues used, not on Codelist
{{ element }}{{ codelist_mapping[major_version].get(element) }}{{ values|length }}{{ codelist_sets[major_version].get(codelist_mapping[major_version].get(element))|length }}{{ codes|length }}{{ codes|length }}
+ {% endif %} +
+ {% endfor %} +
+
+ +{% endblock %} diff --git a/dashboard/templates/comprehensiveness.html b/dashboard/templates/comprehensiveness.html new file mode 100644 index 0000000000..a29025cdca --- /dev/null +++ b/dashboard/templates/comprehensiveness.html @@ -0,0 +1,47 @@ +{% set tab='summary' %} +{% extends 'comprehensiveness_base.html' %} + + +{% block table_title %} +Summary Table of Comprehensiveness Values +{% endblock %} + + +{% block heading_detail %} +

This tab summarises the average percentage of current activities where elements are populated with valid data. (Values in parentheses indicate percentage of activities where elements are populated with any data.) The columns show the average for each of the core, financial and value-added elements as defined in the associated tabs. The Weighted Average column provides an indication of the overall comprehensiveness of the data. It gives a double weighting to the Core Average as these elements are essential for IATI data to be valid for use.

+ +

{Weighted Average} = (({Core Average} * 2) + {Financials Average} + {Value Added Average}) / 4

+{% endblock %} + + +{% block narrative_text %} +

Overview

+

To assess comprehensiveness, publication of selected elements of the standard have been aggregated into three sections. "Core" are the mandatory fields specified by version 2.01 of the Activity Standard. Financials cover publishing of both financial transactions and budgets. Value Added are optional elements of widespread benefit to users.

+ +

Core Average

+

An average of the percentages assigned to the ten mandatory activity elements as specified on the Core tab.

+ +

Financials Average

+

An average of the percentages assigned to four financial elements as specified on the Financials tab.

+ +

Value Added Average

+

An average of the percentages assigned to the ten most useful recommended (non-mandatory) elements as specified on the Value Added tab.

+ +

Weighted Average

+

Twice the Core average plus the Financials average plus the Value-Added average, divided by 4.

+{% endblock %} + + +{% block assessment_text %} +

No assessments are currently employed.

+{% endblock %} + + +{% block exceptions_text %} +

See, where applicable, the exceptions for each individual element on the associated tabs.

+{% endblock %} + + +{% block comparison_text %} +

The original methodology counted all IATI elements. This approach is more selective, is fairer to publishers as it excludes fields not applicable to all types of organisation, and places more emphasis on mandatory fields.

+{% endblock %} diff --git a/dashboard/templates/comprehensiveness_base.html b/dashboard/templates/comprehensiveness_base.html new file mode 100644 index 0000000000..da550bc501 --- /dev/null +++ b/dashboard/templates/comprehensiveness_base.html @@ -0,0 +1,447 @@ +{% extends 'base.html' %} +{% import '_partials/boxes.html' as boxes with context %} + +{% block container %} + + {% block page_header_div %} + {{ super() }} + {% endblock %} + + + + + + {% block content %} +
+
+ (This table as CSV) +

{% block table_title %}Table of Comprehensiveness values{% endblock %}

+
+ + {% if self.heading_detail() %} +
+ {% block heading_detail %}{% endblock %} + {% include '_partials/tablesorter_instructions.html' %} +
+ {% endif %} + + + + + + {% for column_header in comprehensiveness.column_headers[tab] %} + + {% endfor %} + {% if tab == 'financials' %} + + {% endif %} + + + + {% for row in comprehensiveness.table() %} + + + {% for column_slug in comprehensiveness.column_slugs[tab] %} + + {% endif %} + {% else %}-{% endif %} + {% endfor %} + {% if tab == 'financials' %} + + {% endif %} + + {% endfor %} + +
Publisher Name{{ column_header }}
{{ row.publisher_title }}{% if column_slug in row %} + {{ row[column_slug+'_valid'] | round_nicely }} + {% if row[column_slug+'_valid'] != row[column_slug] %} + ({{ row[column_slug] | round_nicely }})
+
+ + + {% block narrative %} +
+
+

Narrative

+
+
+ {% block narrative_text %}{% endblock %} +
+
+ {% endblock %} + + + {% block assessment %} +
+
+

Assessment

+
+
+ {% block assessment_text %}{% endblock %} +
+
+ {% endblock %} + + + {% block exceptions %} +
+
+

Exceptions

+
+
+ {% block exceptions_text %}{% endblock %} +
+
+ {% endblock %} + + + {% block comparison %} +
+
+

Comparison with original Global Partnership Indicator methodology

+
+
+ {% block comparison_text %} +

These tests are more targeted than the original methodology which merely checked for the existence of all fields irrespective of their importance.

+ {% endblock %} +
+
+ {% endblock %} + + +
+
+

Pseudocode

+
+
+ +

For the purpose of this calculation, each iati-activity XML block is an activity.

+ +

To determine the lowest hierarchy:

+
+Lowest hierarchy =
+   Largest integer reported in the hierarchy attribute of any iati-activity element
+
+ +

To determine whether an activity is at the lowest hierarchical level:

+ +
+If the @hierarchy attribute is missing
+    If the lowest hierarchical level is 1
+        Activity is at lowest hierarchical level
+    Else
+        Activity is NOT at lowest hierarchical level
+Else if the @hierarchy attribute == lowest hierarchy
+    Activity is at lowest hierarchical level
+Else
+    Activity is NOT at lowest hierarchical level
+
+ +

To test whether an activity is current:

+ +
+end dates =
+    For each activity-date of type end-planned or end-actual
+        Parse activity-date/@iso-date as an iso date ('yyyy-mm-dd...')
+        If this does not work parse activity-date/text() as an iso date ('yyyy-mm-dd...')
+        If neither work, ignore this activity-date
+
+If activity-status/@code exists
+    If activity-status/@code is 2 exists
+        The activity is current
+    Else
+        The activity is current
+Else
+    If end dates (see above) is empty
+        The activity is current
+    Else
+        If there is an end date where (date year >= current year)
+            The activity is current
+        Else
+            The activity is not current
+
+
+ +

To determine whether we use an activity is relevant for a given comprehensiveness test.

+ +
+start date =
+    If activity-date[@type="start-actual"] exists
+        Parse activity-date[@type="start-actual"]/@iso_date as an iso date ('yyyy-mm-dd...')
+            If this works, we have the start date, else null
+    Else If activity-date[@type="start-planned"] exists
+        Parse activity-date[@type="start-planned"]/@iso_date as an iso date ('yyyy-mm-dd...')
+            If this works, we have the start date, else null
+    Else
+        null
+
+If the activity is current
+    If we are on the financials tab
+        If hierarchy = lowest level
+            If the comprehensiveness test is 'Transaction - Disbursement or Expenditure'
+                If (start date isn't null
+                        and start date < today
+                        and today - start date < 365 days)
+                    Use activity
+                Else
+                    Ignore activity
+            Else If the comprehensiveness test is 'Transaction - Traceability'
+                If transaction/transaction-type[@code="IF"] exists (1.xx) or transaction/transaction-type[@code="1"] exists (2.xx) or transaction/transaction-type[@code="11"] exists or transaction/transaction-type[@code="13"] exists
+                    Use activity
+                Else
+                    Ignore activity
+            Else
+                Use activity
+        Else
+            Ignore activity
+    Else
+        Use activity
+Else
+    Ignore activity
+
+ + {% block table_test_methodology_full %} + + {% block table_test_methodology_header %} + + + {% endblock %} + + {% block table_test_methodology_core %} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {% endblock %} + + {% block table_test_methodology_financials %} + + + + + + + + + + + + + + + + + + + + + + + + + + + + {% endblock %} + + {% block table_test_methodology_valueadded %} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {% endblock %} + +
Tab + Comprehensiveness Test + Basic Methodology + Validation Methodology +
CoreVersion + iati-activities/@version must exist for the file the activity is in + iati-activities/@version must be on the Version codelist +
CoreReporting Organisationreporting-org/@ref and must exist and reporting-org must have textual content
CoreIATI IdentifierActivity must contain a nonempty iati-identifierIf publishing at v2.xx, the iati-identifier must start with either a) the value in reporting-org/@ref or b) a value in other-identifier/@ref where @type="B1" (i.e. 'Previous Reporting Organisation Identifier'). No validation check is applied for v1.xx data, as it only became possible to add other-identifier/@type="B1" from v2.01.
CoreParticipating OrganisationActivity must contain at least 1 participating-orgAt least one participating-org must have @role Funding (1.xx) or 1 (2.xx)
CoreTitleActivity must contain a non-empty title element
CoreDescriptionActivity must contain a non-empty description element
CoreStatusActivity must contain an activity-status elementactivity-status/@code must be on the ActivityStats codelist
CoreActivity DateActivity must contain at least 1 activity-date elementAt least 1 activity-date element in the activity must have @type equal to start-planned or start-actual (1.xx) or 1 or 2 (2.xx)
CoreSectorAt least one sector element present at activity level OR in all transactionsIf activity level AND more than 1 per vocab, percentage must add up per vocab
CoreCountry or Region(recipient-country OR recipient-region) at activity level OR (2.xx only) on all transactionsIf activity level AND more than 1 THEN percentages must add up
FinancialsTransaction - Commitment + Activity must have at least 1 transaction with @type of 'Commitment' (i.e. C (1.xx) or 2 (2.xx)) or 'Incoming commitment' (i.e. 11).All transactions of this @type must have a value that is a valid xs:decimal AND on of (transaction-date/@iso-date OR value/@value-date) that is a valid xs:date +
FinancialsTransaction - Disbursement or ExpenditureActivity must have at least 1 transaction with @type D or E (1.xx) or 3 or 4 (2.xx)All transactions of these @type's must have a value that is a valid xs:decimal AND on of (transaction-date/@iso-date OR value/@value-date) that is a valid xs:date +
FinancialsTransaction - CurrencyAll transactions must have value/@currency OR the activity must have a @default-currency attribute. All transactions must have value/@value-dateAll currency values must be on the Currency codelist. value/@value-date must be valid xs:date.
FinancialsTransaction - TraceabilityAll transactions with @type of 'Incoming Funds' (i.e. `IF` (1.xx) or `1` (2.xx)) or 'Incoming Commitment' (i.e. `11`) or 'Incoming Pledge' (i.e. `13`) must contain provider-org/@provider-activity-id
FinancialsBudgetActivity must have at least 1 budget element OR the activity must have the budget-not-provided attribute AND no budget elementsEach budget element must contain period-start/@iso-date and period-end/@iso-date and value/@value-date that are valid xs:dates AND a value element that is a valid xs:decimal OR the activity element must have a valid iati-activity/@budget-not-provided attribute under the BudgetNotProvided codelist AND no budget elements
Value addedContacts + Activity must contain 1 contact-info/email element + +
Value addedLocation DetailsActivity must contain location/name OR location/description OR location/location-administrative) OR location/point/pos
Value addedGeographic CoordinatesActivity must contain location/point/poslocation/point/pos must contain valid coordinates (two space separated decimals), and not be 0 0
Value addedDAC SectorsAt least 1 sector where @vocabulary is DAC or DAC-3 (1.xx) or 1 or 2 (2.xx), must be reported at activity level, unless there is no @vocabulary attribute, in which case DAC/1 is assumed. If there is no DAC sector element at activity level it must be reported within all transactions contained within that activity.Must be valid code on the DAC or DAC-3 lists as appropriate.
Value addedCapital SpendActivity must contain the capital-spend/@percentage attribute
Value addedActivity DocumentsActivity must contain at least 1 document-linkEach document-link must contain valid document category code and a url that is a valid xs:anyURI and contains the string //.
Value addedAid TypeActivity must contain either i) a value in default-aid-type/@code or ii) each transaction must contain a value in transaction/aid-type/@code.Must be valid code on the AidType codelist.
Value addedRecipient LanguageOnly activities containing only one recipient-country are assessed. Activity must contain title and description elements containing at least one of the official languages spoken for the defined recipient-country/@code.
Value addedResult/IndicatorActivity must contain the result/indicator element
+ {% endblock %} + +

Where the Validation Methodology column is blank, no extra conditions are imposed over the basic methodology.

+

The main percentage is the percentage of relevant activities that satisfy the basic and validation methodology for the given Comprehensiveness Test and publisher.

+

The bracketed percentage is the percentage of relevant activities that satisfy the basic methodology for the given Comprehensiveness Test and publisher. This is only shown if it is different to the main percentage.

+
+ + +
+ {% endblock %} +{% endblock %} + +{% block tablesorteroptions %}{ + widgets: ['stickyHeaders'], + textExtraction:{ + 1: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') >= 0) return '0'; + else return $(node).text(); + }, + 2: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') >= 0) return '0'; + else return $(node).text(); + }, + 3: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') >= 0) return '0'; + else return $(node).text(); + }, + 4: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') >= 0) return '0'; + else return $(node).text(); + }, + 5: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') >= 0) return '0'; + else return $(node).text(); + }, + 6: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') >= 0) return '0'; + else return $(node).text(); + }, + 7: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') >= 0) return '0'; + else return $(node).text(); + }, + 8: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') >= 0) return '0'; + else return $(node).text(); + }, + 9: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') >= 0) return '0'; + else return $(node).text(); + }, + 10: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') >= 0) return '0'; + else return $(node).text(); + }, + 11: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') >= 0) return '0'; + else return $(node).text(); + } + } +} +{% endblock %} +{% block tablesortertarget %}table#main_table{% endblock %} diff --git a/dashboard/templates/comprehensiveness_core.html b/dashboard/templates/comprehensiveness_core.html new file mode 100644 index 0000000000..0475e18d9c --- /dev/null +++ b/dashboard/templates/comprehensiveness_core.html @@ -0,0 +1,68 @@ +{% set tab='core' %} +{% extends 'comprehensiveness_base.html' %} + + +{% block heading_detail %} +

Core elements are those that are mandatory in version 2.01 of the IATI Activity standard. The core elements are: Version, Reporting Organisation, IATI Identifier, Participating Organisation, Title, Description, Status, Activity Date, Sector, and Country or Region.

+ +

This table shows the percentage of current activities where the core elements are populated with valid data. (Values in parentheses indicate percentage of activities where elements are populated with any data.) The scoring for the Summary Stats page recognises the importance of the core by giving it double weighting in the overall comprehensiveness component.

+ +

Key:
+ Dashes: Where a publisher has published to IATI in the past but whose portfolio contains no current activities. +

+{% endblock %} + + +{% block narrative_text %} +

Overview

+

Version 2.01 introduced a more stringent approach to the way in which data can be validated (through the requirement that elements are published in a specified order) and as a result ten elements are mandatory for all activities under all conditions. This tab measures how comprehensively publishers are meeting these requirements, irrespective of whether they are using V2.01 or not.

+ +

Only current activities are assessed. A current activity is one with an activity status of implementing, a planned end date beyond today, or no end date.

+ +

Details

+
Version
+

Percentage of all current activities which contain a valid version number in the <iati-activities> file header element.

+ +
Reporting Organisation
+

Percentage of all current activities which contain both a name and an identifier for the reporting organisation. (In future this will also check that the identifier contains a valid prefix identifying a registration agency.)

+ +
IATI Identifier
+

Percentage of all current activities that contain a valid activity identifier. This MUST be prefixed with either the identifier reported for the reporting organisation, or (if publishing at v2.xx) an identifier reported in the <other-identifier> element. (In future this will also check that each identifier is globally unique.)

+ +
Participating Organisation
+

Percentage of all current activities that contain a participating organisation of type funding.

+ +
Title
+

Percentage of all current activities that contain a title.

+ +
Description
+

Percentage of all current activities that contain a description.

+ +
Status
+

Percentage of all current activities that contain a validly coded activity status.

+ +
Activity Date
+

Percentage of all current activities that contain a valid planned or actual start date.

+ +
Sector
+

Percentage of all current activities that EITHER contain at least one valid activity-level sector element OR all transactions contain a valid sector element. If multiple sectors are reported per vocabulary at activity level reported percentages must add up to 100% for the activity to be assessed as valid.

+ +
Country or Region
+

Percentage of all current activities that EITHER contain at least one valid activity level recipient country or region OR all transactions containing only one valid country or region. If more than one country and/or region is reported at activity then they must all contain percentages adding up to 100%.

+{% endblock %} + + +{% block assessment_text %} +

Only elements containing valid data are counted. Where applicable a total including invalid data is provided in parentheses.

+{% endblock %} + + +{% block exceptions_text %} +

Only current activities are checked. A current activity is one with an activity status of implementing, a planned end date beyond today, or no end date. Note that publisher rows consisting of dashes reflect those that have published to IATI in the past but whose portfolio contains no current activities.

+{% endblock %} + + +{% block table_test_methodology_financials %} +{% endblock %} +{% block table_test_methodology_valueadded %} +{% endblock %} diff --git a/dashboard/templates/comprehensiveness_financials.html b/dashboard/templates/comprehensiveness_financials.html new file mode 100644 index 0000000000..47376a84ec --- /dev/null +++ b/dashboard/templates/comprehensiveness_financials.html @@ -0,0 +1,56 @@ +{% set tab='financials' %} +{% extends 'comprehensiveness_base.html' %} + + +{% block table_title %} +Table of Financial values +{% endblock %} + + +{% block heading_detail %} +

Four aspects of financial reporting are tracked: the reporting of commitments and spend, the ability to track funds across activities and organisations, and the existence of activity budgets (which are also given added weight in the forward-looking component). Only current activities are assessed.

+ +

This table shows the percentage of current activities where these financial elements are populated with valid data. (Values in parentheses indicate percentage of activities where elements are populated with any data.)

+ +

The statistics on this page are calculated based on either i) the data in each publishers' lowest hierarchy, or ii) data in the hierarchy which contains the greatest number of budgets for the given publisher - see the narrative section for full details.

+ +

Key:
+ Dashes: Where a publisher has published to IATI in the past but whose portfolio contains no current activities.
+ Yellow flag: Publisher currently publishing the 'budget not provided' attribute for some or all activities. +

+{% endblock %} + + +{% block narrative_text %} +
Transaction - Commitment
+

For the data in the chosen hierarchy, the percentage of all current activities that contain at least one transaction of type Commitment or Incoming Commitment. The hierarchy chosen for this calculation is the highest hierarchy that contains an above-defined commitment tranaction. Only one hierarchy is selected due in line with stated IATI rules on multi-level reporting. + +

Transaction - Disbursement or Expenditure
+

For the data at the publishers' lowest hierarchy, the percentage of current activities that contain at least one transaction of type Disbursement or Expenditure.

+ + +
Transaction - Traceability
+

For the data at the publishers' lowest hierarchy, the percentage of current activities containing a transaction of type Incoming Funds, Incoming Commitment or Incoming Pledge that also contain the IATI identifier for the funding organisation's activity. This links the funds disbursed by one organisation and received by another. (NB activities that do not contain incoming funds transactions are excluded from the calculation.) (In future the syntax of the provider-activity-id will also be validated.)

+ +

Donor publishers who list themselves within as a participating-org of either 1 (i.e. 'Funding') or 3 (i.e. 'Extending') AND who are not listed as type 4 (i.e. 'Implementing') will be given credit for traceability, as they are at the top of the funding chain.

+ +
Budget
+

For the hierarchy which contains the greatest number of budgets, the percentage of all current activities that contain at least one valid budget entry. A valid budget entry must contain a valid period-start AND a valid period-end AND a valid value AND a valid value-date OR the activity has the @budget-not-provided attribute.

+{% endblock %} + + +{% block assessment_text %} +

Only elements containing valid data are counted. Where applicable a total including invalid data is provided in parentheses.

+

Activities with the budget-not-provided attribute will not be counted as valid if any budget elements are found.

+{% endblock %} + + +{% block exceptions_text %} +

Only current activities are checked. A current activity is one with an activity status of implementing, a planned end date beyond today, or no end date. For disbursements and expenditure activities less than one year old (based on activity start date) are also excluded.

+{% endblock %} + + +{% block table_test_methodology_core %} +{% endblock %} +{% block table_test_methodology_valueadded %} +{% endblock %} diff --git a/dashboard/templates/comprehensiveness_valueadded.html b/dashboard/templates/comprehensiveness_valueadded.html new file mode 100644 index 0000000000..fb21e4649e --- /dev/null +++ b/dashboard/templates/comprehensiveness_valueadded.html @@ -0,0 +1,65 @@ +{% set tab='valueadded' %} +{% extends 'comprehensiveness_base.html' %} + + +{% block table_title %} +Table of Value-Added values +{% endblock %} + + +{% block heading_detail %} +

This table attempts to capture the breadth, richness and usefulness of high quality IATI data. The value added elements included in this table are: Contacts, Location Details, Geographic Coordinates, DAC Sectors, Capital Spend, Activity Documents, Activity Website, Conditions Attached, and Result/Indicator.

+ +

This table shows the percentage of current activities where these value added elements are populated with valid data. (Values in parentheses indicate percentage of activities where elements are populated with any data.)

+ +

Key:
+ Dashes (all fields except recipient language): Where a publisher has published to IATI in the past but whose portfolio contains no current activities.
+ Dashes (recipient language field): Where a publisher has no current activites targetted at only one recipient country. +

+{% endblock %} + + +{% block narrative_text %} +
Contacts
+

The percentage of all current activities that contain at least one contact email address.

+ +
Location Details
+

The percentage of all current activities that contain at least one location name, location description, administrative area, or coordinates.

+ +
Geographic Coordinates
+

The importance of geocoded data is the reason why coordinates are assessed in addition to basic location details. The percentage of all current activities that contain at least one set of geographic coordinates.

+ +
DAC Sectors
+

Sector is a mandatory element and is assessed in the Core component. While it is not mandatory for publishers to utilise OECD DAC Sector/Purpose codes it is highly recommended that they do so as these codes are in widespread use and allow for comparison across activities. The percentage of all current activities that contain at least 1 valid DAC-CRS purpose code.

+ +
Capital Spend
+

For sustainable planning it is useful for developing countries to know how project funding is split between capital and recurrent expenditure. The percentage of all current activities that contain a capital spend percentage. (N.B. that 0% is a valid entry.)

+ +
Activity Documents
+

The percentage of all current activities that contain at least 1 document link.

+ +
Aid Type
+

The percentage of all current activities that contain details of the type of aid being supplied. This can be done at activity level using default-aid-type, or at transaction level using aid-type.

+ +
Recipient Language
+

The percentage of activities targeted at only one recipient-country that contain title and description elements with at least one of the official languages spoken in that country. These calculations are based on a list of official languages within each country.

+ +
Result/Indicator
+

The percentage of all current activities that contain at least one validly reported results indicator.

+{% endblock %} + + +{% block assessment_text %} +

Only elements containing valid data are counted. Where applicable a total including invalid data is provided in parentheses.

+{% endblock %} + + +{% block exceptions_text %} +

Only current activities are checked. A current activity is one with an activity status of implementing, a planned end date beyond today, or no end date. Publisher rows containing dashes indicate that no current activities are published.

+{% endblock %} + + +{% block table_test_methodology_core %} +{% endblock %} +{% block table_test_methodology_financials %} +{% endblock %} \ No newline at end of file diff --git a/static/templates/data_quality.html b/dashboard/templates/data_quality.html similarity index 91% rename from static/templates/data_quality.html rename to dashboard/templates/data_quality.html index 176738138f..6250d02f09 100644 --- a/static/templates/data_quality.html +++ b/dashboard/templates/data_quality.html @@ -1,6 +1,6 @@ {% extends 'section_index.html' %} {% block about %} -

This section of the IATI Dashboard tracks published IATI data around a series of quality measures.

+

This section of the IATI Dashboard tracks published IATI data around a series of quality measures.

These are specifically technical measures - schema validation, download issues, XML formatting, etc - that can be easily rectified by publishers. No attempt is made to evaluate the actual content of the data - the dashboard should be used for sense checking and technical fixes.

{% endblock about %} {% block publisher_page_hash %}#h_dataquality{% endblock %} diff --git a/dashboard/templates/dates.html b/dashboard/templates/dates.html new file mode 100644 index 0000000000..b08997eb48 --- /dev/null +++ b/dashboard/templates/dates.html @@ -0,0 +1,34 @@ +{% extends 'base.html' %} +{% block content %} +
+
+
+

Activity Dates

+
+ {% include '_partials/tablesorter_instructions.html' %} +
+ + + + + + + + + + {% for publisher_title,publisher in publishers_ordered_by_title %} + {% set publisher_stats = func.get_publisher_stats(publisher) %} + + + + + + + + {% endfor %} + +
PublisherEarliest DateLatest DateLatest Actual StartLatest Actual End
{{ publisher_title }}{% if publisher_stats.date_extremes.min.overall %}{{ publisher_stats.date_extremes.min.overall }}{% endif %}{% if publisher_stats.date_extremes.max.overall %}{{ publisher_stats.date_extremes.max.overall }}{% endif %}{% if publisher_stats.date_extremes.max.by_type['start-actual'] %}{{ publisher_stats.date_extremes.max.by_type['start-actual'] }}{% endif %}{% if publisher_stats.date_extremes.max.by_type['end-actual'] %}{{ publisher_stats.date_extremes.max.by_type['end-actual'] }}{% endif %}
+
+
+
+{% endblock %} diff --git a/dashboard/templates/download.html b/dashboard/templates/download.html new file mode 100644 index 0000000000..7ec23d01d1 --- /dev/null +++ b/dashboard/templates/download.html @@ -0,0 +1,43 @@ +{% extends 'base.html' %} +{% import '_partials/boxes.html' as boxes with context %} +{% block content %} +
+ {{ boxes.box('Files that fail to download', current_stats.download_errors|length, 'img/aggregate/failed_downloads.png', + description='Count of files that fail to download, over time.') }} +
+ +
+
+

History of Download Errors

+ +

This table as JSON

+ +
+
+

List of files that fail to download.

+ {% include '_partials/tablesorter_instructions.html' %} +
+ + + + + + + + + + + {% for code, publisher, dataset, err_url in current_stats.download_errors %} + + + + + + + {% endfor %} + +
PublisherRegistry DatasetURLError Code
{{ publisher }}{{ dataset }}{{ err_url|url_to_filename }}{{ code }}
+
+
+
+{% endblock %} diff --git a/dashboard/templates/element.html b/dashboard/templates/element.html new file mode 100644 index 0000000000..6ac861d01b --- /dev/null +++ b/dashboard/templates/element.html @@ -0,0 +1,114 @@ +{% extends 'base.html' %} +{% import '_partials/boxes.html' as boxes with context %} + +{% block page_header %} +

Usage of {{ element }}

+

Who uses {{ element }}?

+

Checking usage of {{ element }} across publishers, files and activities.

+ + {% if element_or_attribute == 'attribute' %} +

An empty value for the attribute is treated the same as if the attribute is not present.

+ {% endif %} +{% endblock %} + +{% block content %} +
+
+

Publishers

+ +

(In JSON format)

+
+
+ +
+
+
+
+

Publishing this {{ element_or_attribute }}

+
+ + + + + + + + + + + {% for publisher in func.sorted(publishers) %} + + + {% with publisher_inverted=func.get_publisher_stats(publisher, 'inverted-file') %} + + {% endwith %} + + {% with publisher_stats=func.get_publisher_stats(publisher) %} + + + {% endwith %} + + + {% endfor %} + +
PublisherFiles with {{ element_or_attribute }}Total filesActivities/Orgs with {{ element_or_attribute }}Total instances of {{ element_or_attribute }}Total activities
{{ publisher }}{% if 'elements' in publisher_inverted %}{{ publisher_inverted.elements[element]|count }}{% endif %}{{ current_stats.inverted_publisher.activity_files.get(publisher)+current_stats.inverted_publisher.organisation_files.get(publisher) }}{{ publisher_stats.elements[element] }}{{ publisher_stats.elements_total[element] }}{{ current_stats.inverted_publisher.activities[publisher] }}
+
+
+ +
+
+
+

Not publishing this {{ element_or_attribute }}

+
+ + + + + + + + + {% for publisher in current_stats.inverted_publisher.publishers %} + {% if publisher not in publishers %} + + + + + + + {% endif %} + {% endfor %} + +
PublisherTotal filesTotal activitiesTotal organisations
{{ publisher }}{{ current_stats.inverted_publisher.activity_files.get(publisher)+current_stats.inverted_publisher.organisation_files.get(publisher) }}{{ current_stats.inverted_publisher.activities[publisher] }}{{ current_stats.inverted_publisher.organisations[publisher] }}
+
+
+
+ + +
+
+

Files

+
+ + + + + + + {% for publisher in current_stats.inverted_file_publisher %} + {% with datasets = current_stats.inverted_file_publisher[publisher].elements.get(element) %} + {% if datasets %} + + {% endif %} + {% endwith %} + {% endfor %} + +
PublisherFiles
{{ publisher }} + {% for dataset in datasets.keys() %} + {{ dataset[:-4] }} + {% endfor %} +
+
+
+
+{% endblock %} diff --git a/dashboard/templates/elements.html b/dashboard/templates/elements.html new file mode 100644 index 0000000000..78f867fc73 --- /dev/null +++ b/dashboard/templates/elements.html @@ -0,0 +1,54 @@ +{% extends 'base.html' %} +{% import '_partials/boxes.html' as boxes with context %} +{% block content %} + + +
+
+
+
+

Usage of IATI elements/attributes by: +

    +
  • Publishers: Total number of publishers that use this element/attribute (at least once)
  • +
  • Activities/Orgs: total number of iati-activity or iati-organisation (where applicable) that use this element/attribute.
  • +
  • Total - Count of total instances in which this element/attribute is used, across all publishers.
  • +
+

+

Empty values for attributes are treated the same as if the attribute is not present.

+ {% include '_partials/tablesorter_instructions.html' %} +
+ + + + + + + + + {% for i, (element,publishers) in func.enumerate(current_stats.inverted_publisher.elements.items()) %} + {% if func.is_valid_element_or_attribute(element) %} + + + + + + + {% endif %} + {% endfor %} + +
Element/AttributePublishers (J)Activities/Orgs with element (J)Total Instances of Element (J)
{{ element }}{{ publishers|length }}{{ current_stats.aggregated.elements[element] }}{{ current_stats.aggregated.elements_total[element] }}
+
+
+
+{% endblock %} diff --git a/dashboard/templates/exploring_data.html b/dashboard/templates/exploring_data.html new file mode 100644 index 0000000000..5d4a99465c --- /dev/null +++ b/dashboard/templates/exploring_data.html @@ -0,0 +1,6 @@ +{% extends 'section_index.html' %} +{% block about %} +

This section tracks the various elements, attributes and codelists within published IATI data.

+

For every instance of these properties, a report is made available.

+{% endblock about %} +{% block publisher_page_hash %}#h_exploringdata{% endblock %} diff --git a/static/templates/faq.html b/dashboard/templates/faq.html similarity index 53% rename from static/templates/faq.html rename to dashboard/templates/faq.html index a5974691ac..9b1d9363b3 100644 --- a/static/templates/faq.html +++ b/dashboard/templates/faq.html @@ -2,29 +2,31 @@ {% block content %} -

Who runs the IATI Dashboard?

-

The IATI Dashboard and Publishing Statistics are maintained by the technical team of the IATI Secretariat. It has grown out of various projects to track IATI data around various facets and metrics.

+
+
+

Who runs the IATI Dashboard?

+

The IATI Dashboard and Publishing Statistics are maintained by the technical team of the IATI Secretariat and includes components developed by Code for IATI volunteers. The IATI Dashboard grew out of various projects to track and provide metrics for how IATI data is published and how the IATI Standard is used in practice.

-

When is the Dashboard updated?

+

When is the Dashboard updated?

The Dashboard and Publishing Statistics are generated through a daily process that happens each night.

This is a two step process:

    -
  1. The Dashboard and Publishing Statistics scripts first download the data that is linked to from the IATI Registry - see “data downloaded” date time at the footer of each page.
  2. -
  3. From this dataset, the relevant statistics are calculated, and the Dashboard and Publishing Statistics are then updated - see “generated” date/time at footer.
  4. +
  5. First the data that is linked to from the IATI Registry is downloaded — see the “data downloaded” timestamp in the footer of each page.
  6. +
  7. From this dataset, the relevant statistics are calculated, and this site is then updated — see the “generated” timestamp in footer.

The generation process usually completes within 12 hours.

-

Why is there a difference between the data download and Dashboard generation time?

+

Why is there a difference between the data download and Dashboard generation time?

The data is downloaded, and then there is a period of computing time to generate the statistics that inform the Dashboard and Publishing Statistics.

-

Usually, there is a small period of time between the two. However, we do track this as sometimes things break - and the site fails to regenerate. If you spot something, please also let us know via support@iatistandard.org.

+

Usually, there is a short period of time between the two. However, we do track this as sometimes things break and the site may fail to regenerate. If you spot something, please also let us know via support@iatistandard.org.

-

Does a graph going up or down mean something good?

-

No. There may be a number of reasons why a certain graph or number rises or falls.

-

In some cases, a fall in the graph may well be down to the fact that the Dashboard script failed to download the relevant data. This could be due to any number of reasons - and not necessarily anything to do with the setup of the IATI publisher.

-

Therefore, we stress to treat the graphs, numbers and statistics on the IATI Dashboard with caution, context and consideration.

+

Does a graph going up or down mean something good?

+

No. There may be a number of reasons why a certain graph or number rises or falls.

+

In some cases, a fall in the graph may well be down to the fact that the Dashboard script failed to download the relevant data. This could be due to any number of reasons — and not necessarily anything to do with the setup of the IATI publisher.

+

Therefore, we stress to treat the graphs, numbers and statistics on this site with caution, context and consideration.

Should you think something is really wrong, please contact us via support@iatistandard.org.

-

What do the Publishing Statistics numbers mean?

+

What do the Publishing Statistics numbers mean?

Different tables capture how timely, forward looking and comprehensive a publisher’s IATI data is. A summary of this information is displayed on the Summary Statistics page, where each quality is marked out of 100.

The other tabs outline how this mark is calculated and the different assessments included.

In some cases, a low number may well be down to the fact that the Dashboard script failed to download the relevant data or the methodology for an element within IATI may need amending. This could be due to any number of reasons - and not necessarily anything to do with the setup of the IATI publisher.

@@ -32,35 +34,35 @@

What do the Publishing Statistics numbers mean?

Therefore, we stress to treat the tables and their corresponding marks with caution, context and consideration.

Should you think something is really wrong, please contact us via support@iatistandard.org.

-

Can I build my own version of this Dashboard?

-

Yes - the source code is all open source:

+

Can I build my own version of this Dashboard?

+

Yes — the source code is all open source:

  1. https://github.com/IATI/IATI-Dashboard/.
  2. -
  3. https://github.com/IATI/IATI-Publishing-Statistics

We advise you to check through the technical specifications.

-

How can I suggest a new function?

-

Ideally, we’d suggest to check through the list of issues we have logged in our Github repositories.

- +

How can I suggest a new function?

+

Ideally, we’d suggest to check through the list of issues we have logged in our Github repositories.

Alternatively, please email us via support@iatistandard.org

-

We’ve published/updated our IATI data, but the numbers haven’t updated.

+

We’ve published/updated our IATI data, but the numbers haven’t updated.

We’d suggest two initial checks:

  1. Is the data registered on the IATI Registry?
  2. -
  3. Has the dashboard updated since you published (check the times at the footer of this page)?
  4. +
  5. Has this site updated since you published (check the timestamps in the footer of this page)?
-

Should you still believe that data is missing from the Dashboard, we’d love to hear from you - please contact us on support@iatistandard.org

+

Should you still believe that data is missing, we’d love to hear from you — please email us at support@iatistandard.org.

-

I want to get to the raw data of a publisher - how can I do that?

+

I want to get to the raw data of a publisher — how can I do that?

Two ways:

    -
  1. Visit the IATI Registry and access the relevant links to the XML files - these links are often found on the relevant Dashboard page.
  2. +
  3. Visit the IATI Registry and access the relevant links to the XML files — these links are often found on the relevant Dashboard page.
  4. Try a query via the IATI Datastore
+
+
{% endblock %} diff --git a/dashboard/templates/files.html b/dashboard/templates/files.html new file mode 100644 index 0000000000..cd359cc2a6 --- /dev/null +++ b/dashboard/templates/files.html @@ -0,0 +1,68 @@ +{% extends 'base.html' %} +{% import '_partials/boxes.html' as boxes with context %} +{% block content %} +
+ {{ boxes.box('Total activity files', current_stats.aggregated.activity_files, 'img/aggregate/activity_files.png', 'activity_files.json', + description='Count of total number of activity files over time.') }} + {{ boxes.box('Total organisation files', current_stats.aggregated.organisation_files, 'img/aggregate/organisation_files.png', 'organisation_files.json', + description='Count of total number of organisation files, over time.') }} +
+
+ {{ boxes.box('Total File Size', current_stats.aggregated.file_size|filesizeformat, 'img/aggregate/file_size.png', 'file_size.json') }} +
+
+
+ (J) +

File Sizes

+
+ + + {% for bin,freq in func.sorted(current_stats.aggregated.file_size_bins.items(), key=func.firstint) %} + + + + + {% endfor %} + +
{{ bin }}{{ freq }}
+
+
+
+ +
+
+
+
+

Total file size by publisher

+ {% include '_partials/tablesorter_instructions.html' %} +
+ + + + + + + + + + + + {% for package, activities in current_stats.inverted_file.activities.items() %} + + {% set publisher=func.dataset_to_publisher(package) %} + + + + + + + {% endfor %} + +
PublisherPackageActivities (J)Organisations (J)File Size (J)
{{ publisher_name[func.dataset_to_publisher(package)] }}{{ package }}{{ activities }}{{ current_stats.inverted_file.organisations.get(package) }}{{ current_stats.inverted_file.file_size.get(package)|filesizeformat }}
+
+
+
+{% endblock %} +{% block tablesorteroptions %} +{textExtraction:{4: function(node,table,cellIndex) { return $(node).attr('data-bytes'); } }} +{% endblock %} diff --git a/dashboard/templates/forwardlooking.html b/dashboard/templates/forwardlooking.html new file mode 100644 index 0000000000..f1bcd51c58 --- /dev/null +++ b/dashboard/templates/forwardlooking.html @@ -0,0 +1,251 @@ +{% extends 'base.html' %} +{% import '_partials/boxes.html' as boxes with context %} +{% block content %} + + +
+ +
+ (This table as CSV) +

Activities with Forward Looking Budget Allocations

+
+ +
+

+ The number of activities with budgets reported for each year is compared against the total number of activities current at the start of each year. The first block shows the number of activities that are, or will be, current in this and the next two years. The second block shows how many of these activities contain a budget for the corresponding year. The third block expresses this as a percentage. +

+ +

+ Activities are excluded from forward looking calculations if they contain commitment transactions and 90% of the total commitment value has already been disbursed or expended in the corresponding year or previously. Additionally, activities are excluded if they have less than six months left to run. +

+ +

Key:
+ Dashes: Where a percentage cannot be calculated, because the denominator is zero.
+ Red flag: Publisher currently publishing forward looking budgets at more than one hierarchical level.
+ Yellow flag: Publisher currently publishing the 'budget not provided' attribute for some or all activities.
+

+ {% include '_partials/tablesorter_instructions.html' %} +
+ + + + + + {% for column_header in forwardlooking.column_headers %} + + {% endfor %} + + + {% for i in range(0,3) %} + {% for year in forwardlooking.years %} + + {% endfor %} + {% endfor %} + + + + {% for row in forwardlooking.table() %} + + + + {% for column in row.year_columns %} + {% for year in forwardlooking.years %} + + {% endfor %} + {% endfor %} + + + {% endfor %} + +
Publisher Name{{ column_header }} +
{{ year }}
{{ row.publisher_title }} + {% if column[year] != '-' %}{{ column[year] | round_nicely }}{% else %}-{% endif %} + * + {%- elif row['budget_not_provided'] -%} + background-color: #fcf8aa" data-severity="1">* + {%- elif row['flag'] -%} + background-color: #f2aaaa" data-severity="2">* + {%- else -%} + " data-severity="0"> + {%- endif -%} +
+
+ + + +
+
+

Narrative

+
+
+

Developing countries have, since 2008, been asking their development partners to provide forward-looking data which can be used for both planning and budget preparation. While aggregated country-level budgets have a certain political value it is activity-level data that is of greatest benefit, and which this dimension attempts to assess.

+

The standard asks publishers to break down their total commitment to an activity into annual or quarterly budgets - i.e. the sum of the reported budgets matches the sum of commitments. It is stressed that these budget breakdowns are indicative and are in no way binding.

+

This assessment counts the number of current activities for this and the next two years that contain budgets. It is based on a number of assumptions:

+
    +
  • For any given future year all current activities should contain a budget.
  • +
  • Activities are deemed to be current in any given year if their end date is reported to be in this year or beyond (or if there is no end date)
  • +
  • Counting the number of activities that contain budgets provides a fairer result than summing the value of these budgets. The proportion of a publisher's total commitment for a future year that has already been committed to existing projects may vary greatly (e.g. you may have earmarked an amount to spend in three-years’ time, but not yet agreed on how to spend it.)
  • +
  • For publisher's reporting multiple hierarchical levels ONLY the level that budgets are reported at is used in this calculation. However if budgets are reported at multiple levels, all activities are counted, and the publisher is marked with a red flag.
  • +
+

As noted above, activities are excluded from forward looking calculations if they contain commitment transactions and 90% of the total commitment value has already been disbursed or expended in the corresponding year or previously. Additionally, activities are excluded if they have less than six months left to run (based on the reported actual or planned end date).

+
+
+ + +
+
+

Assessment

+
+
+

No separate assessment is provided as the percentage of current activities containing budgets for this and the next two years is the de facto assessment. No attempt is currently being made to turn these into a descriptive summary (as, for example, "Frequency = "Monthly"). The percentage for the middle year (i.e. 'next year') is of most relevance to developing countries.

+

Activities with any budget elements that are also found to contain the budget-not-provided attribute will not receive a forward looking score.

+
+
+ + + +
+
+

Exceptions

+
+
+

Dashes

+

Where a percentage can not be calculated, because the denominator is zero, a dash is used.

+

Red Flags

+

Publishers currently publishing forward looking budgets at more than one hierarchical level.

+

Yellow Flags

+

Publishers currently publishing the 'budget not provided' attribute for some or all activities.

+
+
+ + + +
+
+

Comparison with original Global Partnership Indicator methodology

+
+
+

This methodology differs substantially from the original GP Indicator in two ways.

+
    +
  • All current activities are assessed, NOT only those containing Country Programmable Aid. CPA is calculated by the Forward Spending Survey by excluding activities based on a complex filtering of purpose codes, finance types and aid types. Firstly this is difficult to explain. Secondly the provision, or lack thereof, of forward looking data is not believed to be determined by CPA status. Thirdly, as a multi-stakeholder standard many IATI publishers, in particular implementing agencies, do not necessarily report CRS-specific fields.
  • +
  • As explained above the number of activities with budgets, NOT the value of budgets is counted.
  • +
+
+
+ + +
+
+

Pseudocode

+
+
+ +

For the purpose of this calculation, each iati-activity XML block is an activity.

+ +

To test whether an activity is current in a given year:

+ +
+end dates =
+    For each activity-date that describes an end date (type is end-planned or end-actual in 1.xx, 3 or 4 in 2.xx)
+        Parse activity-date/@iso-date as an iso date ('yyyy-mm-dd...')
+        If this does not work parse activity-date/text() as an iso date ('yyyy-mm-dd...')
+        If neither work, ignore this activity-date
+If no end dates were successfully parsed
+    The activity is current
+Else
+    If there is an end date where (date year >= given year)
+        The activity is current
+    Else
+        The activity is not current
+
+ +

To find the year for a budget:

+ +
+start =
+    Parse period-start/@iso-date as an iso date ('yyyy-mm-dd...')
+    If this does not work parse period-start/text() as an iso date ('yyyy-mm-dd...')
+    Otherwise null
+end =
+    Parse period-end/@iso-date as an iso date ('yyyy-mm-dd...')
+    If this does not work parse period-end/text() as an iso date ('yyyy-mm-dd...')
+    Otherwise null
+
+If start and end are both not null
+    If (end - start <= 370 days)
+        If end year == start year
+            budget year = end year
+        Elif end month >= 7
+            budget year = end year
+        Else
+            budget year = end year - 1
+    Else ignore the budget
+Else budget year is null
+
+ +

The relevant hierarchical level is:

+ +
+If the @hierarchy attribute is missing the hierarchy value is 1
+Else the hierarchy value is the value of the @hierarchy attribute
+
+If all budgets for current activities in the given years have the same hierarchy value
+    Only activities with that hierarchy value are at a relevant hierarchical level
+Else
+    All activities are considered to be at a relevant hierarchical level
+
+ +

To calculate the "Current activities" column, count the number of activities that are:

+
    +
  • at a relevant hierarchical level (see above)
  • +
  • AND current
  • +
+

To calculate the "Current activities with budgets" column, count the number of activities that are: +

    +
  • at a relevant hierarchical level (see above)
  • +
  • AND current
  • +
  • AND contain at least one budget budget with a budget year (as described above) that matches the year of the column OR contains the budget-not-provided attribute
  • +
+

+ +
+
+{% endblock %} + +{% block tablesorteroptions %}{ + widgets: ['stickyHeaders'], + textExtraction:{ + 7: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') > 0) return '0'; + else return $(node).text(); + }, + 8: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') > 0) return '0'; + else return $(node).text(); + }, + 9: function(node,table,cellIndex) { + if ($(node).text().indexOf('-') > 0) return '0'; + else return $(node).text(); + }, + 10: function(node,table,cellIndex) { + return $(node).attr('data-severity'); + } + } +}{% endblock %} +{% block tablesortertarget %}table#main_table{% endblock %} diff --git a/static/templates/headlines.html b/dashboard/templates/headlines.html similarity index 69% rename from static/templates/headlines.html rename to dashboard/templates/headlines.html index 8771163829..efc23b667f 100644 --- a/static/templates/headlines.html +++ b/dashboard/templates/headlines.html @@ -1,6 +1,6 @@ {% extends 'section_index.html' %} {% block about %} -

This section of the IATI dashboard tracks published IATI data and provides headline information.

+

This section tracks published IATI data and provides headline information.

This is useful to gain a quick insight into published IATI data - and can also be used as a starting point to explore specific focal points around IATI.

{% endblock about %} diff --git a/static/templates/humanitarian.html b/dashboard/templates/humanitarian.html similarity index 80% rename from static/templates/humanitarian.html rename to dashboard/templates/humanitarian.html index cc8f8cbe4d..d9f69c8d44 100644 --- a/static/templates/humanitarian.html +++ b/dashboard/templates/humanitarian.html @@ -1,5 +1,5 @@ {% extends 'base.html' %} -{% import 'boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block content %}
    @@ -10,7 +10,7 @@
    @@ -18,30 +18,30 @@

    Humanitarian

    This table assesses the extent to which IATI publishers are reporting on humanitarian attributes.

    -

    The statistics on this page do not form part of the Summary Statstics.

    +

    The statistics on this page do not form part of the Summary Statstics.

    - {% include 'tablesorter_instructions.html' %} + {% include '_partials/tablesorter_instructions.html' %}
    - + {% for _, column_header in humanitarian.columns %} - + {% endfor %} {% for row in humanitarian.table() %} - - + + {% for column_slug, _ in humanitarian.columns %} - {% endfor %} diff --git a/dashboard/templates/identifiers.html b/dashboard/templates/identifiers.html new file mode 100644 index 0000000000..90387a637e --- /dev/null +++ b/dashboard/templates/identifiers.html @@ -0,0 +1,43 @@ +{% extends 'base.html' %} +{% import '_partials/boxes.html' as boxes with context %} +{% block content %} +
    +
    +
    +
    +

    + Count of duplicates, per publisher +

    +
    +
    +

    + Duplicate identifiers: a count of the unique iati-identifier that are duplicated. + Instances of duplicate identifiers: the total number of activities that contain a duplicate iati-identifier, within a publisher dataset. + Example: two identifiers could be found as having duplicates. Across the dataset, these duplicates could account for 200 activities. +

    + {% include '_partials/tablesorter_instructions.html' %} +
    +
    Publisher NamePublisher Name{{column_header}}{{ column_header }}
    {{row.publisher_title}}
    {{ row.publisher_title }} + {%- if column_slug == 'publisher_type' -%} - {{row[column_slug]}} + {{ row[column_slug] }} {%- else -%} - {{row[column_slug]|int}} + {{ row[column_slug]|round_nicely }} {%- endif -%}
    + + + + + + + {% for publisher_title,publisher in publishers_ordered_by_title %} + {% set publisher_stats = func.get_publisher_stats(publisher) %} + {% if publisher_stats.publisher_duplicate_identifiers|count != 0 %} + + + + + + {% endif %} + {% endfor %} + +
    PublisherDuplicate identifiersInstances of duplicate identifiers
    {{ publisher_title }}{{ publisher_stats.publisher_duplicate_identifiers|length }}{{ publisher_stats.publisher_duplicate_identifiers.values()|sum }}
    +
    + + +{% endblock %} + diff --git a/static/templates/index.html b/dashboard/templates/index.html similarity index 55% rename from static/templates/index.html rename to dashboard/templates/index.html index 739dfa442b..f3cd50ff50 100644 --- a/static/templates/index.html +++ b/dashboard/templates/index.html @@ -1,11 +1,12 @@ {% extends 'section_index.html' %} +{% block title %}IATI Dashboard{% endblock %} {% block about %} -

    The Dashboard is generated routinely, with the last update based on data in the IATI Registry on {{datetime_data_homepage}}. We expect the Dashboard to update every few days, for more information, see the FAQ.

    -

    Many of the tables in the Dashboard are sortable by clicking on the headers.

    -

    Many of the datasets in the Dashboard are available in machine readable JSON format. Some links to JSON are abbreviated to (J).

    +

    These analytics are generated daily, with the last update based on data in the IATI Registry on {{ datetime_data }}. For more information, see the FAQ.

    +

    Many of the tables are sortable by clicking on the headers.

    +

    Many of the datasets are available in machine readable JSON format. Some links to JSON are abbreviated to (J).

    {% endblock %} {% block lhs_column %} -{{ super () }} +{{ super() }}
    @@ -17,102 +18,102 @@

    - - {{current_stats.aggregated.activities}} + + {{ current_stats.aggregated.activities }} Activities - - {{current_stats.aggregated.iati_identifiers|count}} + + {{ current_stats.aggregated.iati_identifiers|count }} Unique Activities - - {{current_stats.aggregated.publishers}} + + {{ current_stats.aggregated.publishers }} Publishers - - {{current_stats.aggregated.activity_files}} + + {{ current_stats.aggregated.activity_files }} Activity Files - - {{current_stats.aggregated.organisation_files}} + + {{ current_stats.aggregated.organisation_files }} Organisation Files - - {{current_stats.aggregated.file_size|filesizeformat}} + + {{ current_stats.aggregated.file_size|filesizeformat }} Total File Size - - {{current_stats.download_errors|length}} + + {{ current_stats.download_errors|length }} Files Fail to Download - - {{current_stats.aggregated.invalidxml}} + + {{ current_stats.aggregated.invalidxml }} Files where XML is not well-formed - - {{current_stats.aggregated.nonstandardroots}} + + {{ current_stats.aggregated.nonstandardroots }} Files with Nonstandard Roots - {{current_stats.aggregated.toolarge}} + {{ current_stats.aggregated.toolarge }} Files that are too large to be processed - - {{current_stats.aggregated.validation.fail}} + + {{ current_stats.aggregated.validation.fail }} Files don't validate against the schema - - {{current_stats.aggregated.publishers_validation.fail}} + + {{ current_stats.aggregated.publishers_validation.fail }} Publishers have non-validating files - - {{current_stats.aggregated.publisher_has_org_file.no}} + + {{ current_stats.aggregated.publisher_has_org_file.no }} Publishers with no organisation file diff --git a/dashboard/templates/license.html b/dashboard/templates/license.html new file mode 100644 index 0000000000..59c8ebfe2a --- /dev/null +++ b/dashboard/templates/license.html @@ -0,0 +1,34 @@ +{% extends 'base.html' %} +{% import '_partials/boxes.html' as boxes %} +{% block page_header %} +

    {{ license_names[license] }}{% if license_urls[license]['url'] %} (source){% endif %}

    +

    + License ID: {{ license }} +

    +{% endblock %} + +{% block content %} + +
    +
    +
    + + + + + + + + + {% for publisher, files in publisher_counts %} + + + + + {% endfor %} + +
    PublisherFiles
    {{ publisher }}{{ files }}
    +
    +
    +
    +{% endblock %} diff --git a/dashboard/templates/licenses.html b/dashboard/templates/licenses.html new file mode 100644 index 0000000000..0908260759 --- /dev/null +++ b/dashboard/templates/licenses.html @@ -0,0 +1,34 @@ +{% extends 'base.html' %} +{% import '_partials/boxes.html' as boxes %} +{% block content %} +
    +
    +
    +
    +

    Count of publishers per licences in use on the IATI Registry.

    + {% include '_partials/tablesorter_instructions.html' %} +
    + + + + + + + + + + + {% for license, files in func.sorted(license_count.items()) %} + + + + + + + {% endfor %} + +
    LicenseLicense IDFilesPublishers
    {% if license_urls[license]['url'] %}{{ license_names[license] }}{% else %}{{ license_names[license] }}{% endif %}{{ license }}{{ files }}{{ publisher_license_count[license] }}
    +
    +
    +
    +{% endblock %} diff --git a/dashboard/templates/org_ids.html b/dashboard/templates/org_ids.html new file mode 100644 index 0000000000..b36f51257c --- /dev/null +++ b/dashboard/templates/org_ids.html @@ -0,0 +1,30 @@ +{% extends 'base.html' %} +{% block content %} +
    +
    +
    +
    + {% include '_partials/tablesorter_instructions.html' %} +
    + + + + + {% include '_partials/org_id_table_header.html' %} + + + + {% for slug in slugs.org_type.by_slug %} + {% set transaction_stats = current_stats.aggregated[slug + '_transaction_stats'] %} + + + {% include '_partials/org_id_table_cells.html' %} + + {% endfor %} + +
    Org Type
    {{ slug.replace('_org', '') | capitalize }}
    +
    +
    +
    + +{% endblock %} diff --git a/dashboard/templates/org_type.html b/dashboard/templates/org_type.html new file mode 100644 index 0000000000..0602447245 --- /dev/null +++ b/dashboard/templates/org_type.html @@ -0,0 +1,41 @@ +{% extends 'base.html' %} + +{% block title %} +Organisation Identifiers: {{ slug.replace('_org', '') | capitalize }} Orgs +{% endblock %} + +{% block page_header %} +

    Organisation Identifiers: {{ slug.replace('_org', '') | capitalize }} Orgs

    +{% endblock %} + +{% block content %} +
    +
    +
    +
    + {% include '_partials/tablesorter_instructions.html' %} +
    + + + + + + {% include '_partials/org_id_table_header.html' %} + + + + {% for publisher_title, publisher in publishers_ordered_by_title %} + {% set publisher_stats = func.get_publisher_stats(publisher) %} + {% set transaction_stats = publisher_stats[slug + '_transaction_stats'] %} + + + + {% include '_partials/org_id_table_cells.html' %} + + {% endfor %} + +
    Publisher NamePublisher Registry Id
    {{ publisher_name[publisher] }}{{ publisher }}
    +
    +
    +
    +{% endblock %} diff --git a/static/templates/organisation.html b/dashboard/templates/organisation.html similarity index 52% rename from static/templates/organisation.html rename to dashboard/templates/organisation.html index c5e2097746..16bd858d36 100644 --- a/static/templates/organisation.html +++ b/dashboard/templates/organisation.html @@ -1,23 +1,23 @@ {% extends 'base.html' %} -{% import 'boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block content %}
    - {{boxes.box('Publishers without an Organisation File', current_stats.aggregated.publisher_has_org_file.no, 'publisher_has_org_file.png', 'publisher_has_org_file.json', - description='Count of publishers without an organisation file, over time.')}} + {{ boxes.box('Publishers without an Organisation File', current_stats.aggregated.publisher_has_org_file.no, 'img/aggregate/publisher_has_org_file.png', 'publisher_has_org_file.json', + description='Count of publishers without an organisation file, over time.') }}

    List of publishers without an Organisation File

    - (J) + (J)

    The following publishers do not have an organisation file listed on the IATI Registry.

    diff --git a/dashboard/templates/publisher.html b/dashboard/templates/publisher.html new file mode 100644 index 0000000000..3daa664da1 --- /dev/null +++ b/dashboard/templates/publisher.html @@ -0,0 +1,446 @@ +{% extends 'base.html' %} +{% import '_partials/boxes.html' as boxes with context %} +{% block title %} +Publisher: {{ publisher_name[publisher] }} {{ super() }} +{% endblock %} +{% block page_header %} +(Publisher Stats JSON) +

    Publisher: {{ publisher_name[publisher] }}

    +

    +{% endblock %} + +{% block content %} +
    +
    +
    +

    Table of Contents

    +
    + +
    +
    +
    +
    + +

    Headlines

    + +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    On the Registry{{ publisher }}
    Reporting Org on Registry{% if ckan_publishers and publisher in ckan_publishers %} + {{ ckan_publishers[publisher].result.publisher_iati_id }} + {% endif %} +
    Reporting Org(s) in Data + {% for org in publisher_stats.reporting_orgs %} + {{ org|replace(' ', ' ') }} + {% endfor %} +
    Activity Files{{ publisher_stats.activity_files }}
    Organisation Files{{ publisher_stats.organisation_files }}
    Total File Size{{ publisher_stats.file_size|filesizeformat }}
    Activities{{ publisher_stats.activities }}
    Unique Activities{{ publisher_stats.iati_identifiers|count }}
    Organisations{{ publisher_stats.organisations }}
    Versions + {% for version in publisher_stats.versions.keys() %} + {{ version|replace(' ', ' ')|safe }} + {% endfor %} +
    Hierarchies + {% for hierarchy in publisher_stats.hierarchies %} + {{ hierarchy }} + {% endfor %} +
    Licenses + {% for license in publisher_licenses %} + {{ license }} + {% endfor %} +
    Files failing validation + {{ failure_count }} +
    +
    +
    + {{ boxes.box( + 'Activities', publisher_stats.activities, 'img/publishers/'+publisher+'_activities.png', publisher+'/activities.json', '', '-publisher') }} +
    + +
    + {{ boxes.box('Activity Files', publisher_stats.activity_files, 'img/publishers/'+publisher+'_activity_files.png', publisher+'/activity_files.json', '', '-publisher') }} + {{ boxes.box('Organisation Files', publisher_stats.organisation_files, 'img/publishers/'+publisher+'_organisation_files.png', publisher+'/organisation_files.json', '', '-publisher') }} +
    + +
    + {{ boxes.box('Files per version', '', 'img/publishers/'+publisher+'_versions.png', publisher+'/versions.json', 'img/publishers/'+publisher+'_versions_legend.png', '-publisher') }} + {{ boxes.box('Total File Size', publisher_stats.file_size|filesizeformat, 'img/publishers/'+publisher+'_file_size.png', publisher+'/file_size.json', '', '-publisher') }} +
    + +
    + {{ boxes.box('Files failing validation', publisher_stats.validation.get('fail',0), 'img/publishers/'+publisher+'_validation.png', publisher+'/validation.json', '', '-publisher') }} + {{ boxes.box('Files where XML is not well-formed', publisher_stats.invalidxml, 'img/publishers/'+publisher+'_invalidxml.png', publisher+'/invalidxml.json', '', '-publisher') }} +
    + +

    Data Quality

    + + {% set data_quality_issue = false %} + +
    + {% if current_stats.inverted_file_publisher[publisher].validation.fail %} + {% set data_quality_issue = true %} +
    +
    + +

    Files Failing Validation

    + + + + +
    +
    + {% endif %} + + {% if 1 in publisher_inverted.invalidxml.values() %} + {% set data_quality_issue = true %} +
    +
    +
    + (J) +

    Files where XML is not well-formed

    +
    + + + + + + + + {% for dataset, invalid in publisher_inverted.invalidxml.items() %} + {% if invalid %} + + + + {% endif %} + {% endfor %} + +
    Dataset
    {{ dataset[:-4] }}
    +
    +
    + {% endif %} + + {% if 1 in publisher_inverted.nonstandardroots.values() %} + {% set data_quality_issue = true %} +
    +
    + +
    + (J) +

    Files with non-standard roots

    +
    +
    + + + + + + + {% for dataset, nonstandard in publisher_inverted.nonstandardroots.items() %} + {% if nonstandard %} + + + + {% endif %} + {% endfor %} + +
    Dataset
    {{ dataset[:-4] }}
    +
    +
    + {% endif %} + +
    + + {% if not data_quality_issue %} +

    No issues were found.

    + {% endif %} + +

    Financial

    + + {% macro currency_value(d) %} + {% if d %} + {% for currency, value in d.items() %} + {% if value!=None %} + {{ value }} {{ currency }}
    + {% endif %} + {% endfor %} + {% endif %} + {% endmacro %} + +
    +
    +

    Budgets

    +
    +
    +

    The below figures are calculated based on the data contained within the <budget> element for each reported activity. Original and revised elements are based on the value declared in the budget/@type attribute. Where budgets fall across two calendar years, the month of the <period-end> date is used to determine annual groupings, with budgets for periods ending January-June added to the previous calendar year.

    +
    + + + + + + + + + + + + + + {% for row in budget_table %} + + + + + + + + + + {% endfor %} + +
    YearCount (all)Sum (all)Count (Original)Sum (Original)Count (Revised)Sum (Revised)
    {{ row.year }}{% if row.count_total %}{{ row.count_total }}{% endif %}{% if row.sum_total %}{{ currency_value(row.sum_total) }}{% endif %}{% if row.count_original %}{{ row.count_original }}{% endif %}{{ currency_value(row.sum_original) }}{% if row.count_revised %}{{ row.count_revised }}{% endif %}{{ currency_value(row.sum_revised) }}
    +
    + + +

    Exploring Data

    + +
    +
    +

    Files

    +
    + + + + + + + + + + + + {% for package, activities in publisher_inverted.activities.items() %} + + + + + + + + {% endfor %} + +
    PackageActivities (J)Organisations (J)File Size (J)Version (J)
    {{ package[:-4] }}{{ activities }}{{ current_stats.inverted_file.organisations.get(package) }}{{ current_stats.inverted_file.file_size.get(package)|filesizeformat }}{{ current_stats.aggregated_file[publisher][package]['versions'].keys()|first }}
    +
    + + {% for major_version in MAJOR_VERSIONS %} + {% if major_version in publisher_stats.codelist_values_by_major_version %} +
    + +
    + (J) +

    Codelist Values (version {{ major_version }}.xx)

    +
    +
    + + + + + + + + + + {% with element_list=current_stats.inverted_publisher.codelist_values_by_major_version[major_version].keys()|list %} + {% for element, values in publisher_stats.codelist_values_by_major_version[major_version].items() %} + + {% with element_i=element_list.index(element) %} + + + {% with codes=func.sorted(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)).intersection(values.keys())) %} + + {% endwith %} + {% with codes=func.sorted(func.set(values.keys()).difference(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)))) %} + + {% endwith %} + {% endwith %} + + {% endfor %} + {% endwith %} + +
    Element/AttributeCodelistValues on CodelistValues not on Codelist
    {{ element }}{{ codelist_mapping[major_version].get(element) }}{% if codes|count %} + {{ codes|count }} + {% else %}{{ codes|count }}{% endif %} + + {% if codes|count %} + + {{ codes|count }} + + {% else %} + {{ codes|count }} + {% endif %} +
    +
    + {% endif %} + {% endfor %} + +
    + +
    + (J) +

    Elements and Attributes Published

    +
    +
    + + + + + + + + + {% with element_list=current_stats.inverted_publisher.elements.keys()|list %} + {% for element, count in publisher_stats['elements'].items() %} + + {% with element_i=element_list.index(element) %} + + + + {% endwith %} + + {% endfor %} + {% endwith %} + +
    Element/AttributeActivities/OrganisationsFiles
    {{ element }}{{ count }}{{ publisher_inverted.elements[element]|count }}
    +
    + +
    + +
    +

    Organisation Identifiers

    +
    +
    + + + + {% set extra_column = true %} + {% include '_partials/org_id_table_header.html' with context %} + + + + {% for slug in slugs.org_type.by_slug %} + {% set transaction_stats = publisher_stats[slug + '_transaction_stats'] %} + + + {% include '_partials/org_id_table_cells.html' with context %} + + + {% endfor %} + +
    Org Type
    {{ slug.replace('_org', '') | capitalize }}(J)
    +
    + +{% endblock %} + +{% block extrafooter %} + +{% endblock %} + + +{% block tablesorterscript %} + + +{% endblock %} diff --git a/dashboard/templates/publishers.html b/dashboard/templates/publishers.html new file mode 100644 index 0000000000..99ce9fd797 --- /dev/null +++ b/dashboard/templates/publishers.html @@ -0,0 +1,56 @@ +{% extends 'base.html' %} +{% import '_partials/boxes.html' as boxes with context %} +{% block content %} +
    + {{ boxes.box('Publishers', current_stats.aggregated.publishers, 'img/aggregate/publishers.png', 'publishers.json', + description='This graph shows the number of organisations publishing IATI data over time.') }} + {{ boxes.box('Publishers by type', '', 'img/aggregate/publisher_types.png', None, 'img/aggregate/publisher_types_legend.png', + description='This graph show the various types of organisations publishing IATI data.') }} +
    + + +
    +
    +
    +
    +

    (This table as CSV)

    +

    List of current active IATI publishers, Click on the publisher name for more details.

    + {% include '_partials/tablesorter_instructions.html' %} +
    + + + + + + + + + + + + + + + {% for publisher_title,publisher in publishers_ordered_by_title %} + {% set publisher_stats = func.get_publisher_stats(publisher) %} + + + + + + + + + + + {% endfor %} + +
    Publisher NamePublisher Registry IdActivities (J)Organisations (J)Files (*)Total File Size (J)Hierarchies (J)Reporting Orgs (J)
    {{ publisher_name[publisher] }}{{ publisher }}{{ current_stats.inverted_publisher.activities[publisher] }}{{ publisher_stats.organisations }}{{ current_stats.inverted_publisher.activity_files.get(publisher)+current_stats.inverted_publisher.organisation_files.get(publisher) }}{{ current_stats.inverted_publisher.file_size.get(publisher)|filesizeformat }}{{ publisher_stats.hierarchies|length }}{{ publisher_stats.reporting_orgs|length }}
    +
    +

    * Files is the sum of Activity Files (J) and Organisation Files (J).

    +
    +
    +{% endblock %} +{% block tablesorteroptions %} +{textExtraction:{5: function(node,table,cellIndex) { return $(node).attr('data-bytes'); } }} +{% endblock %} diff --git a/static/templates/publishing_stats.html b/dashboard/templates/publishing_stats.html similarity index 67% rename from static/templates/publishing_stats.html rename to dashboard/templates/publishing_stats.html index 3da3877e5a..6e42aba7d1 100644 --- a/static/templates/publishing_stats.html +++ b/dashboard/templates/publishing_stats.html @@ -1,6 +1,6 @@ {% extends 'section_index.html' %} {% block about %} -

    The statistics on IATI data that are calculated routinely and displayed on this dashboard are now a central part of the service that the Technical Team provides to publishers and users of IATI data alike: for publishers to better understand how to improve their data; for users to assess which data is likely to meet their particular needs; and for the technical team itself to prioritise its commitments to data quality.

    +

    The statistics on IATI data that are calculated routinely and displayed on this dashboard are now a central part of the service that the IATI Secretariat provides to publishers and users of IATI data alike: for publishers to better understand how to improve their data; for users to assess which data is likely to meet their particular needs; and for the IATI Secretariat itself to prioritise its commitments to data quality.

    It is also important that a consistent approach is developed in the presentation of statistics in IATI's annual report and the Dashboard's own calculations, as well as when IATI is asked to contribute to monitoring reports. This section of the Dashboard is focused to do just that.

    @@ -16,9 +16,6 @@

    The statistics on these pages are refreshed frequently. The methodology is open to inspection and will be changed in response to feedback and discussion.

    -

    The overriding concern of the Technical Team is that the methodologies being tested here gain buy-in from our members and publishers so that they can be used as a credible benchmark in improving the quality of IATI data.

    +

    The overriding concern of the IATI Secretariat is that the methodologies being tested here gain buy-in from our members and publishers so that they can be used as a credible benchmark in improving the quality of IATI data.

    {% endblock about %} - -{% block publishers %} -{% endblock publishers %} diff --git a/static/templates/registration_agencies.html b/dashboard/templates/registration_agencies.html similarity index 60% rename from static/templates/registration_agencies.html rename to dashboard/templates/registration_agencies.html index e85a2c5de1..67d0ee214e 100644 --- a/static/templates/registration_agencies.html +++ b/dashboard/templates/registration_agencies.html @@ -1,7 +1,9 @@ {% extends 'base.html' %} -{% import 'boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block content %} +
    +

    Looking up reporting org identifiers against the registration agency codelist.

    @@ -17,11 +19,11 @@

    Publishers -{% for registration_agency, count in sorted(registration_agencies.items()) %} +{% for registration_agency, count in func.sorted(registration_agencies.items()) %} - {{registration_agency}} - {{count}} - {{registration_agencies_publishers[registration_agency]|count}} + {{ registration_agency }} + {{ count }} + {{ registration_agencies_publishers[registration_agency]|count }} {% endfor %} @@ -48,15 +50,16 @@

    {% for orgid, publishers in nonmatching %} {% for publisher, count in publishers.items() %} - {{orgid|replace(' ', ' ')}} - {{publisher}} - {{publisher_name[publisher]}} - {{count}} + {{ orgid|replace(' ', ' ') }} + {{ publisher }} + {{ publisher_name[publisher] }} + {{ count }} {% endfor %} {% endfor %}

    - +
    +
    {% endblock %} diff --git a/dashboard/templates/reporting_orgs.html b/dashboard/templates/reporting_orgs.html new file mode 100644 index 0000000000..3d2702f9aa --- /dev/null +++ b/dashboard/templates/reporting_orgs.html @@ -0,0 +1,42 @@ +{% extends 'base.html' %} +{% import '_partials/boxes.html' as boxes with context %} +{% block content %} +
    +
    +
    +
    +

    + Inconsistent Reporting Org references +

    +
    +
    +

    List of Publishers where the reporting-org element does not match the reporting-org field in the IATI Registry.

    + {% include '_partials/tablesorter_instructions.html' %} +
    + + + + + + + + + {% for publisher_title,publisher in publishers_ordered_by_title %} + {% set publisher_stats = func.get_publisher_stats(publisher) %} + {% set reporting_orgs_key = publisher_stats.reporting_orgs.keys()|first %} + {% if publisher_stats.reporting_orgs|count != 1 or reporting_orgs_key != ckan_publishers[publisher].result.publisher_iati_id %} + + + + + + + {% endif %} + {% endfor %} + +
    PublisherReporting Org on RegistryReporting Orgs in Data (count)Reporting Orgs in Data
    {{ publisher_title }}{{ ckan_publishers[publisher].result.publisher_iati_id }}{{ publisher_stats.reporting_orgs|length }}{% for ro in publisher_stats.reporting_orgs %}{{ ro }} {% endfor %}
    +
    +
    +
    +{% endblock %} + diff --git a/static/templates/section_index.html b/dashboard/templates/section_index.html similarity index 64% rename from static/templates/section_index.html rename to dashboard/templates/section_index.html index 8bfbae39a6..30c11da5b1 100644 --- a/static/templates/section_index.html +++ b/dashboard/templates/section_index.html @@ -27,24 +27,16 @@

      {% for item in (top_navigation if page=='index' else navigation[navigation_reverse[page]]) %} - {% if item!='index' %} -
    • -

      {{page_titles[item]}}

      -

      {{page_leads[item]|safe}}

      -
    • +
    • +

      {{ page_titles[item] }}

      + {% if item in page_leads %} +

      {{ page_leads[item]|safe }}

      {% endif %} +
    • {% endfor %}
    - -{% block publishers %} -

    Publishers

    -{% for publisher_title,publisher in publishers_ordered_by_title %} -
  • {{publisher_title}}
  • -{% endfor %} -{% endblock %} - {% endblock content %} diff --git a/dashboard/templates/summary_stats.html b/dashboard/templates/summary_stats.html new file mode 100644 index 0000000000..ac9703e84d --- /dev/null +++ b/dashboard/templates/summary_stats.html @@ -0,0 +1,164 @@ +{% extends 'base.html' %} +{% import '_partials/boxes.html' as boxes with context %} +{% block content %} + + + +
    +
    + (This table as CSV) +

    Summary Statistics

    +
    + + +
    +

    This table assesses all IATI publishers by scoring three dimensions – Timeliness, Forward-looking and Comprehensiveness. The methodology is explained below the table and in the related Publisher Statistics pages. In summary:

    + +

    {Score} = ( {Timeliness} + {Forward looking} + {Comprehensiveness} ) / 3  

    + + {% include '_partials/tablesorter_instructions.html' %} +
    + + + + + + {% for column_slug, column_header in summary_stats.columns %} + + {% endfor %} + + + + {% for row in summary_stats.table() %} + + + {% for column_slug, column_header in summary_stats.columns %} + + + {% endfor %} + +
    Publisher Name{{ column_header }}
    {{ row.publisher_title }}{% if column_slug == "publisher_type" %}{{ row[column_slug] }}{% else %}{{ row[column_slug] | round_nicely }}{% endif %} + {% endfor %} +
    +
    + + +
    +
    +

    Narrative

    +
    +
    +

    Timeliness

    +

    This is calculated by scoring the assessments made on the + frequency and timelag pages on a scale of + 0 to 4 (as below), dividing the sum of the two scores by 8, and expressing the result as + a percentage. The methodology used in making the assesments is detailed on the frequency and timelag pages. +

    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Frequency assessmentScore
    Monthly4
    Quarterly3
    Six-Monthly2
    Annual1
    Less than Annual0
    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Time lag assessmentScore
    One month4
    A quarter3
    Six months2
    One year1
    More than one year0
    + + +

    Forward looking

    +

    The average percentage of current activities with budgets for each of the years {{ current_year }} - {{ current_year + 2 }}. + The component values and a detailed methodology are displayed on the forward looking page. +

    + + +

    Comprehensiveness

    +

    The average of comprehensiveness averages for core, financials and value-added. The core average has a double-weighting.

    + + +

    Score

    +

    The mean average of the three values above.

    + +

    {Score} = ( {Timeliness} + {Forward looking} + {Comprehensiveness} ) / 3

    + +
    +
    + + +
    +
    +

    Exceptions

    +
    +
    +
    Secondary reporters
    +

    Publishers who publish all of their activities as a secondary reporter do not appear in this table. + {% if summary_stats.secondary_publishers|length > 0 %} + Therefore, a total of {{ summary_stats.secondary_publishers|length }} publishers have been excluded in this regard: +

      + {% for publisher in summary_stats.secondary_publishers %} +
    • {{ publisher_name[publisher] }}
    • + {% endfor %} +
    + {% endif %} +

    +
    +
    + + +{% endblock %} + +{% block tablesorteroptions %} +{ + widgets: ['stickyHeaders'] +} +{% endblock %} diff --git a/dashboard/templates/timeliness.html b/dashboard/templates/timeliness.html new file mode 100644 index 0000000000..d4cc3f77cf --- /dev/null +++ b/dashboard/templates/timeliness.html @@ -0,0 +1,289 @@ +{% extends 'timeliness_base.html' %} +{% import '_partials/boxes.html' as boxes with context %} + +{% block frequency_li %} class="active"{% endblock %} + +{% block content %} +
    + +
    + (This table as CSV) +

    Table of Frequency assessments

    +
    + + + +
    +

    This table seeks to measure how often a publisher updates their data. As transactions are the most numerous element reported in IATI the adopted methodology assumes that a publisher has updated their data if a transaction with a more recent transaction date than previously published is detected across the publisher's entire portfolio.

    + +

    The table records the number of days in each of the last twelve months on which the most recently recorded transaction date was observed to have changed. (The current month is also displayed for informational purposes, but is not used in the assessment.)

    + +

    Key:
    + Red flag: Publisher currently publishing future transaction dates.
    + Yellow flag: Publisher not currently publishing future transaction dates, but did report future transactions at some point in the last twelve calendar months (See exceptions).

    + + {% include '_partials/tablesorter_instructions.html' %} +
    + + + + + + + {% for month in timeliness.previous_months_reversed %} + + {% endfor %} + + + + + {% for publisher, publisher_title, per_month, assessment, hft, first_published_band in timeliness.publisher_frequency_sorted() %} + + + + {% for month in timeliness.previous_months_reversed %} + + {% endfor %} + + + + + {% endfor %} + +
    Publisher Name + First published + {{ timeliness.this_year-1 }} + {{ timeliness.this_year }} + + Frequency +
    {{ timeliness.short_month(month) }}{{ timeliness.short_month(timeliness.this_month) }}
    {{ publisher_title }}{{ first_published_band }}{{ per_month[month] or 0 }}{{ per_month[timeliness.this_month] or 0 }}{% if hft %}*{% endif %}{{ assessment }}
    +
    + + +
    +
    +

    Summary of Publisher Performance

    +
    + + + + + + + + + {% set summary = timeliness.publisher_frequency_summary() %} + {% for assessment, count in timeliness.sort_first(summary.items(), timeliness.frequency_index) %} + + + + + {% endfor %} + + + + + + +
    FrequencyCount
    {{ assessment }}{{ count }}
    Total{{ summary.values()|sum }}
    +
    + + + +
    +
    +

    Narrative

    +
    +
    +

    The frequency statistics attempt to assess how often any part of a publisher's data is substantively updated.

    + +

    For the purposes of these statistics an update is assumed to have taken place on any given day when the most recently recorded transaction date across a publisher's entire portfolio is observed to have changed to a more recent date. This approach has been adopted as transactions are the most numerous and most frequently updated elements in the reporting of activities.

    + +

    The table of statistics records the number of days in each of the last twelve calendar months (the current month is also displayed for informational purposes, but is not used in the assessment) on which the most recently recorded transaction date was observed to have changed. A daily statistical snapshot is maintained, which allows for this data to be recalculated using historical recordings.

    +
    +
    + + + +
    +
    +

    Assessment

    +
    +
    +

    To assess these statistics one also has to take into account how long a publisher has been publishing to IATI. This is calculated based on when a publisher first appeared in the statistical snapshot.

    +

    These statistics are then assessed as follows:

    + +

    For publishers of 1 year or more

    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Updates reported in ... + Assessment +
    7 or more of the past 12 full months AND data updated at least once in last 2 full monthsMonthly
    3 or more of the past 12 full months AND data updated at least once in last 4 full monthsQuarterly
    2 of the last 6 month periodsSix-monthly
    1 of the last 12 monthsAnnual
    None of the last 12 monthsLess than annual
    + +

    For publishers of six months or more

    + + + + + + + + + + + + + + + + + + + +
    Updates reported in ... + Assessment +
    4 of the last 6 monthsMonthly
    2 of the last 2 quartersQuarterly
    1 of the last 12 monthsAnnual
    + +

    For publishers of three months or more

    + + + + + + + + + + + + + + + +
    Updates reported in ... + Assessment +
    3 of the last 3 monthsMonthly
    1 of the last 6 monthsAnnual
    + +

    For publishers of less than 3 months

    + + + + + + + + + + + +
    Updates reported in ... + Assessment +
    1 of the last 3 monthsAnnual
    +
    +
    + + + +
    +
    +

    Exceptions

    +
    +
    +

    Future transaction dates disrupt these statistics. For example a publisher might today report a transaction date for each month for the next year and never refresh their data. Over the next year, as each of these future dates move into the past, the statistics would incorrectly give the publisher a frequency assessment of monthly, even though they did not refresh their data.

    + +

    Future transaction dates may affect the assessments on this page. Publishers who currently have future transaction dates have a red flag next to their assessment. A yellow flag indicates that although a publisher does not currently have future transactions, they did report future transactions at some point over the last twelve calendar months.

    + +

    We plan to exclude activities in which future transactions dates are reported from these assessments until such time that a publisher's entire portfolio no longer contains any future dates.

    +
    +
    + + + +
    +
    +

    Comparison with original Global Partnership Indicator methodology

    +
    +
    +

    This methodology is substantially different. + +

    In the original Indicator methodology the IATI Registry log dates were analysed to assess when updates had been made. This approach was flawed as the Registry logs record any change, no matter how trivial. A spelling correction, for example, would count as an update. Similarly if a publisher's file was inaccessible, its reappearance would count as an update.

    +
    +
    + + + +
    +
    +

    Pseudocode

    +
    +
    +

    To get a count of updates by calendar month (for a given publisher):

    +
    +For data captured each day over the past year
    +    For each transaction (of any type) in the publisher's data
    +        Get the transaction date as follows:
    +            If transaction-date exists
    +                If transaction-date/@iso-date exists
    +                    Use transaction-date/@iso-date
    +                Else
    +                    Use transaction-date/text()
    +            Else if value/@value-date exists
    +                Use value/@value-date
    +            Else the transaction is ignore
    +        Parse the start of the transaction date as an iso date (yyyy-mm-dd...).
    +            If it does not match, the transaction is ignored.
    +        Store a record of this transaction date.
    +    Of the recorded dates, find the latest date that is on or before the date the data was captured.
    +    Record this date against the date of data capture
    +previous transaction date = 0001-01-01
    +Loop over the list of dates
    +    If transaction date > previous transaction date
    +        previous transaction date = transaction date
    +        Record an update as having happened on this day
    +Count the updates by calendar month
    +
    +
    +
    +{% endblock %} + +{% block tablesorteroptions %} +{ + widgets: ['stickyHeaders'], + textExtraction: { + 1: function(node, table, cellIndex) { + return $(node).attr('data-index'); + }, + 15: function(node, table, cellIndex) { + return $(node).attr('data-severity'); + }, + 16: function(node, table, cellIndex) { + return $(node).attr('data-index'); + } + } +} +{% endblock %} diff --git a/dashboard/templates/timeliness_base.html b/dashboard/templates/timeliness_base.html new file mode 100644 index 0000000000..48369492df --- /dev/null +++ b/dashboard/templates/timeliness_base.html @@ -0,0 +1,35 @@ +{% extends 'base.html' %} +{% import '_partials/boxes.html' as boxes with context %} + +{% block container %} + + {% block page_header_div %} + {{ super() }} + {% endblock %} + + + + + + {% block content %} + {% endblock %} +{% endblock %} + +{% block tablesortertarget %}table#main_table{% endblock %} diff --git a/dashboard/templates/timeliness_timelag.html b/dashboard/templates/timeliness_timelag.html new file mode 100644 index 0000000000..44fef92aca --- /dev/null +++ b/dashboard/templates/timeliness_timelag.html @@ -0,0 +1,219 @@ +{% extends 'timeliness_base.html' %} +{% import '_partials/boxes.html' as boxes with context %} + +{% block timelag_li %} class="active"{% endblock %} + +{% block content %} +
    + +
    + (This table as CSV) +

    Table of Time lag assessments

    +
    + + +
    +

    The time-lag statistics attempt to assess how up to date the data is at the point that it is refreshed. For instance a publisher may refresh their data monthly, but the refreshed data is in fact three months old. Alternatively a publisher may only refresh their data once a year, but when they do it contains current data that is less than one month out of date. Transactions are the most numerous and most regularly refreshed elements in reported IATI activities and they are therefore used to make this assessment. The table of statistics shows the number of transaction dates reported in each of the last twelve calendar months. The current month is shown for informational purposes, but excluded from the assessment.

    + +

    Key:
    + Red flag: Publisher currently publishing future transaction dates.
    + Yellow flag: Publisher not currently publishing future transaction dates, but did report future transactions at some point in the last twelve calendar months (See exceptions).

    + + {% include '_partials/tablesorter_instructions.html' %} +
    + + + + + + + + {% for month in timeliness.previous_months_reversed %} + + {% endfor %} + + + + + {% for publisher, publisher_title, per_month, assessment, hft in timeliness.publisher_timelag_sorted() %} + + + {% for month in timeliness.previous_months_reversed %} + + {% endfor %} + + + + + {% endfor %} + +
    Publisher Name + {{ timeliness.this_year-1 }} + {{ timeliness.this_year }} + + Time lag +
    {{ timeliness.short_month(month) }}{{ timeliness.short_month(timeliness.this_month) }}
    {{ publisher_title }}{{ per_month[month] or 0 }}{{ per_month[timeliness.this_month] or 0 }}{% if hft %}*{% endif %}{{ assessment }}
    +
    + + + + +
    +
    +

    Summary of Publisher Performance

    +
    + + + + + + + + + {% set summary = timeliness.publisher_timelag_summary() %} + {% for assessment, count in timeliness.sort_first(summary.items(), timeliness.timelag_index) %} + + + + + {% endfor %} + + + + + + +
    FrequencyCount
    {{ assessment }}{{ count }}
    Total{{ summary.values()|sum }}
    +
    + + + + +
    +
    +

    Narrative

    +
    +
    +

    The time-lag statistics attempt to assess how up to date the data is at the point that it is refreshed. For instance a publisher may refresh their data monthly, but the refreshed data is in fact three months old. Alternatively a publisher may only refresh their data once a year, but when they do it contains current data that is less than one month out of date.

    + +

    Transactions are the most numerous and most regularly refreshed elements in reported IATI activities and they are therefore used to make this assessment.

    + +

    The table of statistics shows the number of transaction dates reported in each of the last twelve calendar months. The current month is shown for informational purposes, but excluded from the assessment.

    +
    +
    + + + + + +
    +
    +

    Assessment

    +
    +
    +

    These statistics are assessed as follows:

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Transactions reported for ...Assessment
    2 of the last 3 monthsOne month in arrears
    1 of the last 3 monthsA quarter in arrears
    Any of the last 6 monthsSix months in arrears
    1 of the last 12 monthsOne year in arrears
    None of the last 12 monthsMore than one year in arrears
    +
    +
    + + + + +
    +
    +

    Exceptions

    +
    +
    +

    Future transaction dates disrupt these statistics. For example a publisher might today report a transaction date for each month for the next year and never refresh their data. Over the next year, as each of these future dates move into the past, the statistics would incorrectly give the publisher a time-lag assessment of one month in arrears, even though they did not refresh their data.

    + +

    Future transaction dates may affect the assessments on this page. Publishers who currently have future transaction dates have a red flag next to their assessment. A yellow flag indicates that although a publisher does not currently have future transactions, they did report future transactions at some point over the last twelve calendar months.

    + +

    We plan to exclude activities in which future transactions dates are reported from these assessments until such time that a publisher's entire portfolio no longer contains any future dates.

    +
    +
    + + + + +
    +
    +

    Comparison with orginal Global Partnership Indicator methodology

    +
    +
    +

    No change.

    +
    +
    + + + +
    +
    +

    Pseudocode

    +
    +
    +

    To get a count of transactions by calendar month (for a given publisher):

    +
    +Using most recently captured data.
    +For each transaction (of any type) in the publisher's data:
    +    Get the transaction date as follows:
    +        If transaction-date exists
    +            If transaction-date/@iso-date exists
    +                Use transaction-date/@iso-date
    +            Else
    +                Use transaction-date/text()
    +        Else if value/@value-date exists
    +            Use value/@value-date
    +        Else the transaction is ignored
    +    Parse the start of the transaction date as an iso date (yyyy-mm-dd...).
    +        If it does not match, the transaction is ignored.
    +    Record a count of the transaction date against the calendar year and month
    +
    +
    +
    +{% endblock %} + +{% block tablesorteroptions %} +{ + widgets: ['stickyHeaders'], + textExtraction: { + 4: function(node, table, cellIndex) { + return $(node).attr('data-index'); + }, + 14: function(node, table, cellIndex) { + return $(node).attr('data-severity'); + }, + 15: function(node, table, cellIndex) { + return $(node).attr('data-index'); + } + } +} +{% endblock %} diff --git a/dashboard/templates/traceability.html b/dashboard/templates/traceability.html new file mode 100644 index 0000000000..19eb3a6259 --- /dev/null +++ b/dashboard/templates/traceability.html @@ -0,0 +1,79 @@ +{% extends 'base.html' %} +{% block content %} +
    +
    +
    +

    Traceability

    +
    +

    This page calculates the percentage of publishers’ spending that is traceable.

    +

    This has been assessed based on other publishers’ activities that reference the activity in transaction/provider-org/@provider-activity-id. If an activity is referenced, all of its spending is counted.

    +

    Two calculations are made on this page: Counting the activities, and adding up all of the activities’ spend.

    +

    Spend is the sum of commitments and disbursements.

    + + {% include '_partials/tablesorter_instructions.html' %} +
    + + + + + + + + + + + + {% for publisher_title,publisher in publishers_ordered_by_title %} + {% set publisher_stats = func.get_publisher_stats(publisher) %} + + + + + + + + + + {% endfor %} + +
    PublisherTraceable ActivitiesTotal ActivitiesPercentage Traceable ActivitiesTraceable SpendTotal SpendPercentage Traceable Spend
    {{ publisher_title }} + {%- if publisher_stats.traceable_activities_by_publisher_id -%} + {{ '{:,}'.format(publisher_stats.traceable_activities_by_publisher_id) }} + {%- else -%} + 0 + {%- endif -%} + + {%- if publisher_stats.traceable_activities_by_publisher_id_denominator -%} + {{ '{:,}'.format(publisher_stats.traceable_activities_by_publisher_id_denominator) }} + {%- else -%} + 0 + {%- endif -%} + + {%- if publisher_stats.traceable_activities_by_publisher_id and publisher_stats.traceable_activities_by_publisher_id_denominator -%} + {{ (publisher_stats.traceable_activities_by_publisher_id / publisher_stats.traceable_activities_by_publisher_id_denominator * 100) | round_nicely }} + {%- elif publisher_stats.traceable_activities_by_publisher_id_denominator -%} + 0 + {%- endif -%} + + {%- if publisher_stats.traceable_sum_commitments_and_disbursements_by_publisher_id -%} + {{ '{:,.2f}'.format(publisher_stats.traceable_sum_commitments_and_disbursements_by_publisher_id) }} + {%- else -%} + 0.00 + {%- endif -%} + + {%- if publisher_stats.traceable_sum_commitments_and_disbursements_by_publisher_id_denominator -%} + {{ '{:,.2f}'.format(publisher_stats.traceable_sum_commitments_and_disbursements_by_publisher_id_denominator) }} + {%- else -%} + 0.00 + {%- endif -%} + + {%- if publisher_stats.traceable_sum_commitments_and_disbursements_by_publisher_id and publisher_stats.traceable_sum_commitments_and_disbursements_by_publisher_id_denominator -%} + {{ (publisher_stats.traceable_sum_commitments_and_disbursements_by_publisher_id / publisher_stats.traceable_sum_commitments_and_disbursements_by_publisher_id_denominator * 100) | round_nicely }} + {%- elif publisher_stats.traceable_sum_commitments_and_disbursements_by_publisher_id_denominator -%} + 0 + {%- endif -%} +
    +
    +
    +
    +{% endblock %} diff --git a/dashboard/templates/validation.html b/dashboard/templates/validation.html new file mode 100644 index 0000000000..ee09ecd54a --- /dev/null +++ b/dashboard/templates/validation.html @@ -0,0 +1,86 @@ +{% extends 'base.html' %} +{% import '_partials/boxes.html' as boxes with context %} +{% block content %} +
    + {{ boxes.box('Invalid files', current_stats.aggregated.validation.fail, 'img/aggregate/validation.png', 'validation.json', + description='Count of files that do not validate against the relevant schema, over time.') }} + {{ boxes.box('Publishers with invalid files', current_stats.aggregated.publishers_validation.fail, 'img/aggregate/publishers_validation.png', 'publishers_validation.json', + description='Count of publishers that have at least one invalid file, over time') }} +
    + +
    +
    +

    Breakdown By Publisher

    +
    +
    + +
    +
    + +

    List of files that fail validation, grouped by publisher

    + + {% for publisher in current_stats.inverted_file_publisher %} + {% with datasets = current_stats.inverted_file_publisher[publisher].validation.get('fail', {}) %} + {% if datasets %} +
    + +
    {{ publisher_name[publisher ] }} ({{ datasets|length }})
    + + + + +
    + {% endif %} + {% endwith %} + {% endfor %} +
    + +
    + +

    Count of files that fail validation, per publisher.

    + +
    + + + + + + + {% for publisher in current_stats.inverted_file_publisher %} + {% if 'fail' in current_stats.inverted_file_publisher[publisher].validation %} + + + + + {% endif %} + {% endfor %} + +
    Publisher (J)Failing files (J)
    {{ publisher_name[publisher ] }}{{ current_stats.inverted_file_publisher[publisher].validation.fail|length }}
    +
    +
    +
    +{% endblock %} diff --git a/static/templates/versions.html b/dashboard/templates/versions.html similarity index 52% rename from static/templates/versions.html rename to dashboard/templates/versions.html index 953146fc9f..b6bb50c840 100644 --- a/static/templates/versions.html +++ b/dashboard/templates/versions.html @@ -1,23 +1,20 @@ {% extends 'base.html' %} -{% import 'boxes.html' as boxes %} +{% import '_partials/boxes.html' as boxes with context %} {% block content %}
    - {{boxes.box('Files per version (expected)', '', 'versions_expected.png', 'versions.json', - description='Count of files per IATI version, over time. Expected: these are actual versions of the IATI Standard.')}} - {{boxes.box('Files per version (other)', '', 'versions_other.png', 'versions.json', - description='Count of files per other versions, over time. These values do not actually exist as IATI versions.')}} + {{ boxes.box('Files per version (expected)', '', 'img/aggregate/versions_expected.png', 'versions.json', 'img/aggregate/versions_expected_legend.png', description='Count of files per IATI version, over time. Expected: these are actual versions of the IATI Standard.') }} + {{ boxes.box('Files per version (other)', '', 'img/aggregate/versions_other.png', 'versions.json', 'img/aggregate/versions_other_legend.png', description='Count of files per other versions, over time. These values do not actually exist as IATI versions.') }}
    - {{boxes.box('Versions legend (expected)', '', 'versions_expected_legend.png')}} - {{boxes.box('Versions legend (other)', '', 'versions_other_legend.png')}} -
    -
    - {{boxes.box('Publishers per version (expected)', '', 'publishers_per_version_expected.png', 'publishers_per_version.json', - description='Count of publishers per IATI version, over time. Note: If a publisher utilises two or more versions, they are counted for each.')}} - {{boxes.box('Publishers per version (other)', '', 'publishers_per_version_other.png', 'publishers_per_version.json', - description='Count of publishers per other version, over time')}} + {{ boxes.box('Publishers per version (expected)', '', 'img/aggregate/publishers_per_version_expected.png', 'publishers_per_version.json', 'img/aggregate/versions_expected_legend.png', + description='Count of publishers per IATI version, over time. Note: If a publisher utilises two or more versions, they are counted for each.') }} + {{ boxes.box('Publishers per version (other)', '', 'img/aggregate/publishers_per_version_other.png', 'publishers_per_version.json', 'img/aggregate/versions_other_legend.png', + description='Count of publishers per other version, over time') }}
    + {% if 'true' in current_stats.aggregated.version_mismatch %} +
    +

    Inconsistent versions

    Files where the iati-activities/@version does not match iati-activity/@version

    @@ -29,25 +26,34 @@

    Inconsistent versions

    Files - {% if 'true' in current_stats.inverted_file_grouped.version_mismatch %} - {% for publisher,files in current_stats.inverted_file_grouped.version_mismatch.true.items() %} - {{publisher_name[publisher]}} - {% for file in files %} - {{file}} + {% for publisher in current_stats.inverted_file_publisher %} + {% with datasets = current_stats.inverted_file_publisher[publisher].version_mismatch.get('true', {}) %} + {% if datasets %} + {{ publisher_name[publisher] }} + {% for dataset in datasets.keys() %} + {{ dataset[:-4] }} {% endfor %} + {% endif %} + {% endwith %} {% endfor %} - {% endif %}
    +
    +
    + {% endif %} +
    +

    Publishers by version

    -

    (In JSON format)

    +

    (In JSON format)

    +
    +
    -
    +

    Expected versions

    @@ -58,11 +64,11 @@

    Expected versions

    {% for version, publishers in current_stats.inverted_publisher.versions.items() %} {% if version in expected_versions %}
    - {{version|replace(' ', ' ')|safe}} + {{ version|replace(' ', ' ')|safe }} {% for publisher in publishers %} - + {% endfor %}
    {{publisher_name[publisher]}}{{ publisher_name[publisher] }}
    @@ -74,7 +80,7 @@

    Expected versions

    -
    +

    Other versions

    @@ -85,11 +91,11 @@

    Other versions

    {% for version, publishers in current_stats.inverted_publisher.versions.items() %} {% if version not in expected_versions %}
    - {{version|replace(' ', ' ')|safe}} + {{ version|replace(' ', ' ')|safe }} {% for publisher in publishers %} - + {% endfor %}
    {{publisher_name[publisher]}}{{ publisher_name[publisher] }}
    diff --git a/static/templates/xml.html b/dashboard/templates/xml.html similarity index 54% rename from static/templates/xml.html rename to dashboard/templates/xml.html index 01033fbebf..a89979002d 100644 --- a/static/templates/xml.html +++ b/dashboard/templates/xml.html @@ -1,26 +1,20 @@ {% extends 'base.html' %} -{% import 'boxes.html' as boxes %} - -{% block page_header %} -{{ super () }} - -{% endblock %} +{% import '_partials/boxes.html' as boxes with context %} {% block content %} -
    - {{boxes.box('Files where XML is not well-formed', current_stats.aggregated.invalidxml, 'invalidxml.png', 'invalidxml.json', - description='Count of files where the XML that is not well-formed, over time. Note: this is different from validation against the schema.')}} - {{boxes.box('Files with non-standard roots', current_stats.aggregated.nonstandardroots, 'nonstandardroots.png', 'nonstandardroots.json', - description='Count of files with non-standard root, over time. Note: Files with non-standard roots are those where the root XML element is not iati-activities or iati-organisation as we would expect.

    ')}} + {{ boxes.box('Files where XML is not well-formed', current_stats.aggregated.invalidxml, 'img/aggregate/invalidxml.png', 'invalidxml.json', + description='Count of files where the XML that is not well-formed, over time. Note: this is different from validation against the schema.'.format( url('dash-dataquality-validation'))) }} + {{ boxes.box('Files with non-standard roots', current_stats.aggregated.nonstandardroots, 'img/aggregate/nonstandardroots.png', 'nonstandardroots.json', + description='Count of files with non-standard root, over time. Note: Files with non-standard roots are those where the root XML element is not iati-activities or iati-organisation as we would expect.

    ') }}
    - +
    - (J) + (J)

    Files where XML is not well-formed

    @@ -34,8 +28,9 @@

    Files where XML is not well-formed

    {% for dataset, invalid in current_stats.inverted_file.invalidxml.items() %} {% if invalid %} - - + {% set publisher=func.dataset_to_publisher(package) %} + + {% endif %} {% endfor %} @@ -48,7 +43,7 @@

    Files where XML is not well-formed

    {{dataset|dataset_to_publisher}}{{dataset}}{{ publisher }}{{ dataset }}
    - (J) + (J)

    Files with non-standard roots

    @@ -61,8 +56,9 @@

    Files with non-standard roots

    {% for dataset, nonstandard in current_stats.inverted_file.nonstandardroots.items() %} {% if nonstandard %} - - + {% set publisher=func.dataset_to_publisher(package) %} + + {% endif %} {% endfor %} diff --git a/dashboard/tests/__init__.py b/dashboard/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/dashboard/tests/test_comprehensiveness.py b/dashboard/tests/test_comprehensiveness.py new file mode 100644 index 0000000000..c35b9ff066 --- /dev/null +++ b/dashboard/tests/test_comprehensiveness.py @@ -0,0 +1,28 @@ +import os +from os import path + +os.chdir(path.normpath(path.join(path.abspath(__file__), "../.."))) + +import comprehensiveness # noqa: E402 + +mock_stats = { + "comprehensiveness": { + "activity-date": 2, + "activity-status": 2, + "recipient_language": 0, + "transaction_spend": 1, + }, + "comprehensiveness_denominator_default": 2, + "comprehensiveness_denominators": {"recipient_language": 0, "transaction_spend": 1, "transaction_traceability": 1}, +} + + +def test_denominator(): + assert comprehensiveness.denominator("activity-date", mock_stats) == 2 + assert comprehensiveness.denominator("transaction_spend", mock_stats) == 1 + assert ( + comprehensiveness.denominator("non_existant_key", mock_stats) == 2 + ) # Passing a non existant key will return the default denominator + assert ( + comprehensiveness.denominator("activity-date", None) == 0 + ) # Passing a 'Falsey' value as the stats param will return 0 diff --git a/dashboard/tests/test_timeliness.py b/dashboard/tests/test_timeliness.py new file mode 100644 index 0000000000..5a9d4207f0 --- /dev/null +++ b/dashboard/tests/test_timeliness.py @@ -0,0 +1,32 @@ +"""Testing of functions in timeliness.py +""" + +import os +from os import path + +os.chdir(path.normpath(path.join(path.abspath(__file__), "../.."))) + +import timeliness # noqa: E402 + + +def test_short_month(): + month_strings = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] + for index, s in enumerate(month_strings): + assert timeliness.short_month("01-{:02d}-2024".format(index + 1)) == s + + +def test_parse_iso_date(): + test_date_1 = timeliness.parse_iso_date("2024-01-01") + assert test_date_1.year == 2024 + assert test_date_1.month == 1 + assert test_date_1.day == 1 + + test_date_2 = timeliness.parse_iso_date("2024-02-29") + assert test_date_2.year == 2024 + assert test_date_2.month == 2 + assert test_date_2.day == 29 + + test_date_3 = timeliness.parse_iso_date("2024-04-01") + assert test_date_3.year == 2024 + assert test_date_3.month == 4 + assert test_date_3.day == 1 diff --git a/dashboard/text.py b/dashboard/text.py new file mode 100644 index 0000000000..47e256f1c4 --- /dev/null +++ b/dashboard/text.py @@ -0,0 +1,200 @@ +"""Text fragments for page titles, navigation and page leaders/sub-leaders +""" + +import copy + +top_titles = { + "index": "Home", + "headlines": "Headlines", + "data_quality": "Data Quality", + "publishing_stats": "Publishing Statistics", + "exploring_data": "Exploring Data", + "faq": "FAQ", +} + +page_titles = { + "index": "Dashboard Home", + "headlines": "Headlines", + "data_quality": "Data Quality", + "exploring_data": "Exploring Data", + "faq": "Frequently Asked Questions", + "publishers": "IATI Publishers", + "files": "IATI Files", + "activities": "IATI Activities", + "download": "Download Errors", + "xml": "XML Errors", + "validation": "Validation Against the Schema", + "versions": "Versions", + "rulesets": "Rulesets", + "licenses": "Licenses listed on the Registry", + "organisation": "Organisation XML Files", + "identifiers": "Duplicate Activity Identifiers", + "registration_agencies": "Registration Agencies", + "reporting_orgs": "Reporting Orgs", + "elements": "Elements", + "codelists": "Codelists", + "booleans": "Booleans", + "dates": "Dates", + "traceability": "Traceability", + "org_ids": "Organisation Identifiers", + "publishing_stats": "Publishing Statistics", + "timeliness": "Timeliness", + "forwardlooking": "Forward Looking", + "comprehensiveness": "Comprehensiveness", + "summary_stats": "Summary Statistics", + "humanitarian": "Humanitarian Reporting", +} + +page_leads = { + "index": "Statistics, charts and metrics on data accessed via the IATI Registry.", + "data_quality": "What needs fixing in IATI data?", + "exploring_data": "Which parts of the IATI Standard are being used?", + "headlines": "What is the size, scope and scale of published IATI data?", + "publishers": "How many organisations are publishing IATI data?", + "files": "How many IATI files are published?", + "activities": "How many IATI activities are published?", + "download": "How many files failed to download?", + "xml": "Which files have XML errors?", + "validation": "Which files fail schema validation?", + "versions": 'Which versions of the IATI Standard are being used?', + "rulesets": "How does IATI data test against rulesets?", + "licenses": "Which licences are used by IATI publishers?", + "organisation": "Who is publishing IATI Organisation files?", + "identifiers": "Where are there duplicate IATI identifiers?", + "reporting_orgs": "Where are reporting organisation identifiers inconsistent with the IATI Registry?", + "elements": "How are the IATI Standard elements used by publishers?", + "codelists": "How are codelists used in IATI data?", + "booleans": "How are booleans used in IATI data?", + "dates": "What date ranges do publishers publish data for?", + "traceability": "How much of a publisher’s spending is traceable to other publishers’ activities?", + "org_ids": "Are organisation identifiers being used correctly?", +} +page_sub_leads = { + "publishers": "Publishers represent organisation accounts in the IATI Registry.", + "files": "Files are logged on the IATI Registry by publishers The files contain data on activities and the organisation. A publisher may have multiple files, which can contain multiple activities.", + "activities": "Activities are the individual projects found in files. A file can contain one or many activities, from a publisher.", + "download": "Files that failed to download, when accessed via the IATI Registry. Note: This may because no URL is listed on the registry, or when requesting the URL the publisher's server returns an error message (e.g. because there is no file at that location). Some files that failed to download when last checked may since have become available.", + "xml": "This page shows files that are not well-formed XML, accessed via the IATI Registry.", + "validation": 'IATI files are validated against the appropriate IATI Schema. Note: this is based on the version declared in the file and whether it\'s an activity/organisation file.', + "versions": "Files are reported against a specific version of the IATI Standard, using the version attribute in the iati-activities element.", + "rulesets": "The IATI Ruleset describe constraints, conditions and logics that are additional to the IATI schema. Note: Currently, on the IATI Standard Ruleset is tested.", + "licenses": "Licences are applied to files by publishers on the IATI Registry, and explain how data can be used.", + "organisation": "Checking the IATI Registry for files that have iati-organisations as the root element. IATI Organisation files contain general information about the organisations in the delivery chain.", + "identifiers": "Checking the iati-identifier element for duplicate values per publisher. A duplicate appears if a publisher creates two activities with the same identifier.", + "reporting_orgs": "Checking the reporting-org identifiers in IATI data.", + "elements": "Checking usage of all elements/attributes within the IATI Standard.", + "codelists": "Checking usage of codelists across IATI data files.", + "booleans": "Checking usage of booleans across IATI data files. Booleans are values that are either true or false. In XML true or 1 can be used for true and false or 0 can be used for false.", +} + +short_page_titles = copy.copy(page_titles) +short_page_titles.update( + { + "publishers": "Publishers", + "files": "Files", + "activities": "Activities", + "validation": "Validation", + "licenses": "Licenses", + "organisation": "Organisation XML", + "identifiers": "Duplicate Identifiers", + } +) + +top_navigation = ["headlines", "data_quality", "publishing_stats", "exploring_data", "faq"] +navigation = { + "headlines": ["publishers", "files", "activities"], + "data_quality": [ + "download", + "xml", + "validation", + "versions", + "licenses", + "organisation", + "identifiers", + "reporting_orgs", + ], + "exploring_data": ["elements", "codelists", "booleans", "dates", "traceability", "org_ids"], + "publishing_stats": ["timeliness", "forwardlooking", "comprehensiveness", "summary_stats", "humanitarian"], +} + +LICENSE_NAMES = { + "notspecified": "Other::License Not Specified", + "odc-pddl": "OKD Compliant::Open Data Commons Public Domain Dedication and Licence (PDDL)", + "odc-odbl": "OKD Compliant::Open Data Commons Open Database License (ODbL)", + "odc-by": "OKD Compliant::Open Data Commons Attribution Licence", + "cc-zero": "OKD Compliant::Creative Commons CCZero", + "cc-by": "OKD Compliant::Creative Commons Attribution", + "cc-by-sa": "OKD Compliant::Creative Commons Attribution Share-Alike", + "gfdl": "OKD Compliant::GNU Free Documentation License", + "ukclickusepsi": "OKD Compliant::UK Click Use PSI", + "other-open": "OKD Compliant::Other (Open)", + "other-pd": "OKD Compliant::Other (Public Domain)", + "other-at": "OKD Compliant::Other (Attribution)", + "ukcrown-withrights": "OKD Compliant::UK Crown Copyright with data.gov.uk rights", + "hesa-withrights": "OKD Compliant::Higher Education Statistics Agency Copyright with data.gov.uk rights", + "localauth-withrights": "OKD Compliant::Local Authority Copyright with data.gov.uk rights", + "uk-ogl": "OKD Compliant::UK Open Government Licence (OGL)", + "met-office-cp": "Non-OKD Compliant::Met Office UK Climate Projections Licence Agreement", + "cc-nc": "Non-OKD Compliant::Creative Commons Non-Commercial (Any)", + "ukcrown": "Non-OKD Compliant::UK Crown Copyright", + "other-nc": "Non-OKD Compliant::Other (Non-Commercial)", + "other-closed": "Non-OKD Compliant::Other (Not Open)", + "bsd-license": "OSI Approved::New and Simplified BSD licenses", + "gpl-2.0": "OSI Approved::GNU General Public License (GPL)", + "gpl-3.0": "OSI Approved::GNU General Public License version 3.0 (GPLv3)", + "lgpl-2.1": 'OSI Approved::GNU Library or "Lesser" General Public License (LGPL)', + "mit-license": "OSI Approved::MIT license", + "afl-3.0": "OSI Approved::Academic Free License 3.0 (AFL 3.0)", + "apl1.0": "OSI Approved::Adaptive Public License", + "apache": "OSI Approved::Apache Software License", + "apache2.0": "OSI Approved::Apache License, 2.0", + "apsl-2.0": "OSI Approved::Apple Public Source License", + "artistic-license-2.0": "OSI Approved::Artistic license 2.0", + "attribution": "OSI Approved::Attribution Assurance Licenses", + "ca-tosl1.1": "OSI Approved::Computer Associates Trusted Open Source License 1.1", + "cddl1": "OSI Approved::Common Development and Distribution License", + "cpal_1.0": "OSI Approved::Common Public Attribution License 1.0 (CPAL)", + "cuaoffice": "OSI Approved::CUA Office Public License Version 1.0", + "eudatagrid": "OSI Approved::EU DataGrid Software License", + "eclipse-1.0": "OSI Approved::Eclipse Public License", + "ecl2": "OSI Approved::Educational Community License, Version 2.0", + "eiffel": "OSI Approved::Eiffel Forum License", + "ver2_eiffel": "OSI Approved::Eiffel Forum License V2.0", + "entessa": "OSI Approved::Entessa Public License", + "fair": "OSI Approved::Fair License", + "frameworx": "OSI Approved::Frameworx License", + "ibmpl": "OSI Approved::IBM Public License", + "intel-osl": "OSI Approved::Intel Open Source License", + "jabber-osl": "OSI Approved::Jabber Open Source License", + "lucent-plan9": "OSI Approved::Lucent Public License (Plan9)", + "lucent1.02": "OSI Approved::Lucent Public License Version 1.02", + "mitre": "OSI Approved::MITRE Collaborative Virtual Workspace License (CVW License)", + "motosoto": "OSI Approved::Motosoto License", + "mozilla": "OSI Approved::Mozilla Public License 1.0 (MPL)", + "mozilla1.1": "OSI Approved::Mozilla Public License 1.1 (MPL)", + "nasa1.3": "OSI Approved::NASA Open Source Agreement 1.3", + "naumen": "OSI Approved::Naumen Public License", + "nethack": "OSI Approved::Nethack General Public License", + "nokia": "OSI Approved::Nokia Open Source License", + "oclc2": "OSI Approved::OCLC Research Public License 2.0", + "opengroup": "OSI Approved::Open Group Test Suite License", + "osl-3.0": "OSI Approved::Open Software License 3.0 (OSL 3.0)", + "php": "OSI Approved::PHP License", + "pythonpl": "OSI Approved::Python license", + "PythonSoftFoundation": "OSI Approved::Python Software Foundation License", + "qtpl": "OSI Approved::Qt Public License (QPL)", + "real": "OSI Approved::RealNetworks Public Source License V1.0", + "rpl1.5": "OSI Approved::Reciprocal Public License 1.5 (RPL1.5)", + "ricohpl": "OSI Approved::Ricoh Source Code Public License", + "sleepycat": "OSI Approved::Sleepycat License", + "sun-issl": "OSI Approved::Sun Industry Standards Source License (SISSL)", + "sunpublic": "OSI Approved::Sun Public License", + "sybase": "OSI Approved::Sybase Open Watcom Public License 1.0", + "UoI-NCSA": "OSI Approved::University of Illinois/NCSA Open Source License", + "vovidapl": "OSI Approved::Vovida Software License v. 1.0", + "W3C": "OSI Approved::W3C License", + "wxwindows": "OSI Approved::wxWindows Library License", + "xnet": "OSI Approved::X.Net License", + "zpl": "OSI Approved::Zope Public License", + "zlib-license": "OSI Approved::zlib/libpng license", +} diff --git a/dashboard/timeliness.py b/dashboard/timeliness.py new file mode 100644 index 0000000000..059a1850d1 --- /dev/null +++ b/dashboard/timeliness.py @@ -0,0 +1,245 @@ +# This file converts raw timeliness data into the associated Publishing Statistics assessments + +import datetime +from collections import Counter, defaultdict + +from dateutil.relativedelta import relativedelta + +import filepaths +from cache import json_cache +from data import JSONDir, get_publisher_stats, get_registry_id_matches, publisher_name + + +def short_month(month_str): + """Return the 'short month' represeentation of a date which is inputted as a string, seperated with dashes + For example '01-03-2012' returns 'Mar' + """ + short_months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] + return short_months[int(month_str.split("-")[1]) - 1] + + +def parse_iso_date(d): + """Parse a string representation of a date into a datetime object""" + try: + return datetime.date(int(d[:4]), int(d[5:7]), int(d[8:10])) + except (ValueError, TypeError): + return None + + +def previous_months_generator(d): + """Returns a generator object with the previous month for a given datetime object""" + year = d.year + month = d.month + for i in range(0, 12): + month -= 1 + if month <= 0: + year -= 1 + month = 12 + yield year, month + + +# Store lists of previous months +previous_months = [ + "{}-{}".format(year, str(month).zfill(2)) for year, month in previous_months_generator(datetime.date.today()) +] +previous_months_reversed = list(reversed(previous_months)) + +# Store the current month as a string +today = datetime.date.today() +this_month = "{}-{}".format(today.year, str(today.month).zfill(2)) + +# Store a list of the past 12 months from today +previous_month_days = [today - relativedelta(months=x) for x in range(13)] +previous_year_days = [today - relativedelta(years=x) for x in range(6)] + +# Store the current month and year numbers +this_month_number = datetime.datetime.today().month +this_year = datetime.datetime.today().year + + +@json_cache("timeliness_frequency.json") +def publisher_frequency(): + """Generate the publisher frequency data""" + + # Load all the data from 'gitaggregate-publisher-dated' into memory + gitaggregate_publisher = JSONDir(filepaths.join_stats_path("gitaggregate-publisher-dated")) + + # Loop over each publisher - i.e. a publisher folder within 'gitaggregate-publisher-dated' + for publisher, agg in gitaggregate_publisher.items(): + + # Skip to the next publisher if there is no data for 'most_recent_transaction_date' for this publisher + if "most_recent_transaction_date" not in agg: + continue + + # Skip if this publisher appears in the list of publishers who have since changed their Registry ID + if publisher in get_registry_id_matches().keys(): + continue + + updates_per_month = defaultdict(int) + previous_transaction_date = datetime.date(1, 1, 1) + + # Find the most recent transaction date and parse into a datetime object + for gitdate, transaction_date_str in sorted(agg["most_recent_transaction_date"].items()): + transaction_date = parse_iso_date(transaction_date_str) + + # If transaction date has increased + if transaction_date is not None and transaction_date > previous_transaction_date: + previous_transaction_date = transaction_date + updates_per_month[gitdate[:7]] += 1 + + # Find the first date that this publisher made data available, and parse into a datetime object + first_published_string = sorted(agg["most_recent_transaction_date"])[0] + first_published = parse_iso_date(first_published_string) + + hft = has_future_transactions(publisher) + + # Implement the assessment logic on https://analytics.codeforiati.org/timeliness.html#h_assesment + + if first_published >= previous_month_days[3]: + # This is a publisher of less than 3 months + first_published_band = "Less than 3 months ago" + frequency = "Annual" + elif first_published >= previous_month_days[6]: + # This is a publisher of less than 6 months + first_published_band = "3-6 months ago" + if all([x in updates_per_month for x in previous_months[:3]]): + frequency = "Monthly" + else: + frequency = "Annual" + elif first_published >= previous_month_days[12]: + # This is a publisher of less than 12 months + first_published_band = "6-12 months ago" + if [x in updates_per_month for x in previous_months[:6]].count(True) >= 4: + frequency = "Monthly" + elif any([x in updates_per_month for x in previous_months[:3]]) and any( + [x in updates_per_month for x in previous_months[3:6]] + ): + frequency = "Quarterly" + else: + frequency = "Annual" + else: + if first_published >= previous_year_days[3]: + first_published_band = "1-3 years ago" + elif first_published >= previous_year_days[5]: + first_published_band = "3-5 years ago" + else: + first_published_band = "More than 5 years ago" + # This is a publisher of 1 year or more + if ([x in updates_per_month for x in previous_months[:12]].count(True) >= 7) and ( + [x in updates_per_month for x in previous_months[:2]].count(True) >= 1 + ): + # Data updated in 7 or more of past 12 full months AND data updated at least once in last 2 full months. + frequency = "Monthly" + elif ([x in updates_per_month for x in previous_months[:12]].count(True) >= 3) and ( + [x in updates_per_month for x in previous_months[:4]].count(True) >= 1 + ): + # Data updated in 3 or more of past 12 full months AND data updated at least once in last 4 full months. + frequency = "Quarterly" + elif any([x in updates_per_month for x in previous_months[:6]]) and any( + [x in updates_per_month for x in previous_months[6:12]] + ): + # There has been an update in 2 of the last 6 month periods + frequency = "Six-Monthly" + elif any([x in updates_per_month for x in previous_months[:12]]): + # There has been an update in 1 of the last 12 months + frequency = "Annual" + else: + # There has been an update in none of the last 12 months + frequency = "Less than Annual" + + # If the publisher is in the list of current publishers, return a generator object + if publisher in publisher_name: + yield publisher, publisher_name.get(publisher), updates_per_month, frequency, hft, first_published_band + + +def frequency_index(frequency): + return ["Monthly", "Quarterly", "Six-Monthly", "Annual", "Less than Annual"].index(frequency) + + +def publisher_frequency_sorted(): + return sorted(publisher_frequency(), key=lambda tup: (frequency_index(tup[3]), tup[1])) + + +def publisher_frequency_dict(): + publisher_data_list = sorted(publisher_frequency(), key=lambda publisher: publisher[0]) + data = {} + for v in publisher_data_list: + data[v[0]] = v + return data + + +def publisher_frequency_summary(): + return Counter(frequency for _, _, _, frequency, _, _ in publisher_frequency()) + + +def timelag_index(timelag): + return ["One month", "A quarter", "Six months", "One year", "More than one year"].index(timelag) + + +def first_published_band_index(first_published_band): + return [ + "More than 5 years ago", + "3-5 years ago", + "1-3 years ago", + "6-12 months ago", + "3-6 months ago", + "Less than 3 months ago", + ].index(first_published_band) + + +@json_cache("timeliness_timelag.json") +def publisher_timelag(): + return [ + ( + publisher, + publisher_name.get(publisher, ""), + agg["transaction_months_with_year"], + agg["timelag"], + has_future_transactions(publisher), + ) + for publisher, agg in JSONDir(filepaths.join_stats_path("current/aggregated-publisher")).items() + ] + + +def publisher_timelag_sorted(): + return sorted(publisher_timelag(), key=lambda tup: (timelag_index(tup[3]), tup[1])) + + +def publisher_timelag_dict(): + return {v[0]: v for v in publisher_timelag()} + + +def publisher_timelag_summary(): + return Counter(timelag for _, _, _, timelag, _ in publisher_timelag_sorted()) + + +def has_future_transactions(publisher): + """ + returns 0, 1 or 2 + Returns 2 if the most recent data for a publisher has future transactions. + Returns 1 if the publisher has ever had future transactions. + Returns 0 otherwise. + """ + today = datetime.date.today() + publisher_stats = get_publisher_stats(publisher) + if "transaction_dates" in publisher_stats: + for transaction_type, transaction_counts in publisher_stats["transaction_dates"].items(): + for transaction_date_string, count in transaction_counts.items(): + transaction_date = parse_iso_date(transaction_date_string) + if transaction_date and transaction_date > datetime.date.today(): + return 2 + + gitaggregate_publisher = JSONDir(filepaths.join_stats_path("gitaggregate-publisher-dated")).get(publisher, {}) + mindate = datetime.date(today.year - 1, today.month, 1) + for date_string, latest_transaction_date_string in gitaggregate_publisher.get( + "latest_transaction_date", {} + ).items(): + date = parse_iso_date(date_string) + latest_transaction_date = parse_iso_date(latest_transaction_date_string) + if date >= mindate and latest_transaction_date and latest_transaction_date > date: + return 1 + return 0 + + +def sort_first(list_, key): + return sorted(list_, key=lambda x: key(x[0])) diff --git a/dashboard/ui/__init__.py b/dashboard/ui/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/dashboard/ui/asgi.py b/dashboard/ui/asgi.py new file mode 100644 index 0000000000..b844ecbea3 --- /dev/null +++ b/dashboard/ui/asgi.py @@ -0,0 +1,16 @@ +""" +ASGI config for IATI Dashboard project. + +It exposes the ASGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/5.1/howto/deployment/asgi/ +""" + +import os + +from django.core.asgi import get_asgi_application + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ui.settings") + +application = get_asgi_application() diff --git a/dashboard/ui/jinja2.py b/dashboard/ui/jinja2.py new file mode 100644 index 0000000000..e719e5ac2d --- /dev/null +++ b/dashboard/ui/jinja2.py @@ -0,0 +1,49 @@ +"""Jinja2 template configuration +""" + +import re + +from django.templatetags.static import static +from django.urls import reverse +from jinja2 import Environment + +import timeliness + + +def round_nicely(val, ndigits=0): + """Round a float, but remove the trailing .0 from integers that python insists on""" + if val == "-": + return val + val = round(float(val), ndigits) + if val == int(val): + return int(val) + + +def xpath_to_url(path): + path = path.strip("./") + # remove conditions + path = re.sub(r"\[[^]]+\]", "", path) + if path.startswith("iati-activity"): + url = "http://iatistandard.org/activity-standard/iati-activities/" + path.split("@")[0] + elif path.startswith("iati-organisation"): + url = "http://iatistandard.org/organisation-standard/iati-organisations/" + path.split("@")[0] + else: + url = "http://iatistandard.org/activity-standard/iati-activities/iati-activity/" + path.split("@")[0] + if "@" in path: + url += "#attributes" + return url + + +def environment(**options): + env = Environment(**options) + env.globals.update( + { + "static": static, + "url": reverse, + } + ) + env.filters["url_to_filename"] = lambda x: x.rstrip("/").split("/")[-1] + env.filters["has_future_transactions"] = timeliness.has_future_transactions + env.filters["xpath_to_url"] = xpath_to_url + env.filters["round_nicely"] = round_nicely + return env diff --git a/dashboard/ui/settings.py b/dashboard/ui/settings.py new file mode 100644 index 0000000000..194d758631 --- /dev/null +++ b/dashboard/ui/settings.py @@ -0,0 +1,157 @@ +""" +Django settings for IATI Dashboard project. + +Generated by 'django-admin startproject' using Django 5.1.1. + +For more information on this file, see +https://docs.djangoproject.com/en/5.1/topics/settings/ + +For the full list of settings and their values, see +https://docs.djangoproject.com/en/5.1/ref/settings/ +""" + +from pathlib import Path + +import environ +from django.utils.crypto import get_random_string + +# Build paths inside the project like this: BASE_DIR / 'subdir'. +BASE_DIR = Path(__file__).resolve().parent.parent + +chars = "abcdefghijklmnopqrstuvwxyz0123456789!@#%^&*(-_=+)" +secret_key = get_random_string(50, chars) + +env = environ.Env( # set default values and casting + DEBUG=(bool, False), + SECRET_KEY=(str, secret_key), +) + + +SECRET_KEY = env("SECRET_KEY") + +DEBUG = env("DEBUG") + +ALLOWED_HOSTS = [".dashboard.iatistandard.org", "testserver", "localhost"] + + +# Application definition + +INSTALLED_APPS = [ + "django.contrib.admin", + "django.contrib.auth", + "django.contrib.contenttypes", + "django.contrib.sessions", + "django.contrib.messages", + "django.contrib.staticfiles", +] + +MIDDLEWARE = [ + "django.middleware.security.SecurityMiddleware", + "django.contrib.sessions.middleware.SessionMiddleware", + "django.middleware.common.CommonMiddleware", + "django.middleware.csrf.CsrfViewMiddleware", + "django.contrib.auth.middleware.AuthenticationMiddleware", + "django.contrib.messages.middleware.MessageMiddleware", + "django.middleware.clickjacking.XFrameOptionsMiddleware", +] + +ROOT_URLCONF = "ui.urls" + +TEMPLATES = [ + { + "BACKEND": "django.template.backends.django.DjangoTemplates", + "DIRS": [], + "APP_DIRS": True, + "OPTIONS": { + "context_processors": [ + "django.template.context_processors.debug", + "django.template.context_processors.request", + "django.contrib.auth.context_processors.auth", + "django.contrib.messages.context_processors.messages", + ], + }, + }, + { + "BACKEND": "django.template.backends.jinja2.Jinja2", + "DIRS": ["templates/"], + "APP_DIRS": True, + "OPTIONS": { + "context_processors": [ + "django.template.context_processors.debug", + "django.template.context_processors.request", + "django.contrib.auth.context_processors.auth", + "django.contrib.messages.context_processors.messages", + ], + "environment": "ui.jinja2.environment", + }, + }, +] + +WSGI_APPLICATION = "ui.wsgi.application" + + +# Database +# https://docs.djangoproject.com/en/5.1/ref/settings/#databases + +DATABASES = { + "default": { + "ENGINE": "django.db.backends.sqlite3", + "NAME": BASE_DIR / "db.sqlite3", + } +} + + +# Password validation +# https://docs.djangoproject.com/en/5.1/ref/settings/#auth-password-validators + +AUTH_PASSWORD_VALIDATORS = [ + { + "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", + }, + { + "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator", + }, + { + "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator", + }, + { + "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator", + }, +] + + +# Internationalization +# https://docs.djangoproject.com/en/5.1/topics/i18n/ + +LANGUAGE_CODE = "en-us" + +TIME_ZONE = "UTC" + +USE_I18N = True + +USE_TZ = True + + +# Static files (CSS, JavaScript, Images) +# https://docs.djangoproject.com/en/5.1/howto/static-files/ + +STATIC_URL = "static/" +STATICFILES_DIRS = [ + BASE_DIR / "static", +] +STATIC_ROOT = "../static" + +# Default primary key field type +# https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field + +DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" + +# +# Relative (to dashboard/) paths to IATI data and output directories. +# +DASHBOARD_STATS_DIRECTORY = "../stats-calculated" +DASHBOARD_DATA_DIRECTORY = "../data" +DASHBOARD_BASE_DIRECTORY = "../" +DASHBOARD_OUT_DIRECTORY = "../out" + +DASHBOARD_CREATE_CACHE_FILES = False diff --git a/dashboard/ui/template_funcs.py b/dashboard/ui/template_funcs.py new file mode 100644 index 0000000000..de80d95011 --- /dev/null +++ b/dashboard/ui/template_funcs.py @@ -0,0 +1,18 @@ +import re + + +def firstint(s): + if s[0].startswith("<"): + return 0 + m = re.search(r"\d+", s[0]) + return int(m.group(0)) + + +def get_codelist_values(codelist_values_for_element): + """Return a list of unique values present within a one-level nested dictionary. + Envisaged usage is to gather the codelist values used by each publisher, as in + stats/current/inverted-publisher/codelist_values_by_major_version.json + Input: Set of codelist values for a given element (listed by publisher), for example: + current_stats['inverted_publisher']['codelist_values_by_major_version']['1']['.//@xml:lang'] + """ + return list(set([y for x in codelist_values_for_element.items() for y in list(x[1].keys())])) diff --git a/dashboard/ui/tests.py b/dashboard/ui/tests.py new file mode 100644 index 0000000000..cffb7b1ab8 --- /dev/null +++ b/dashboard/ui/tests.py @@ -0,0 +1,262 @@ +from django.test import TestCase +from django.urls import reverse + + +class BasicPageTests(TestCase): + """Perform basic HTTP 200/404 checks on the Dashboard pages + + These are split up into a number of functions because some can + take some time to run and so running with the "-v 2" flag will + list the tests as they run. + """ + + def test_top_pages(self): + """Test the index and top hierarchy pages return a 200 status code""" + + self.assertEqual(self.client.get(reverse("dash-index")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-headlines")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-publishingstats")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-exploringdata")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-faq")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-registrationagencies")).status_code, 200) + + def test_headlines(self): + """Test the headlines pages""" + + self.assertEqual(self.client.get(reverse("dash-headlines-files")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-headlines-activities")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-headlines-publishers")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-headlines-publisher-detail", args=("undp",))).status_code, 200) + self.assertEqual( + self.client.get(reverse("dash-headlines-publisher-detail", args=("not-a-valid-publisher",))).status_code, + 404, + ) + + def test_dataquality(self): + """Test the data quality pages""" + + self.assertEqual(self.client.get(reverse("dash-dataquality-download")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-download-json")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-xml")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-validation")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-versions")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-organisation")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-identifiers")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-reportingorgs")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-dataquality-licenses")).status_code, 200) + self.assertEqual( + self.client.get(reverse("dash-dataquality-licenses-detail", args=("cc-by",))).status_code, 200 + ) + self.assertEqual( + self.client.get(reverse("dash-dataquality-licenses-detail", args=("not-a-valid-license",))).status_code, + 404, + ) + + def test_publishingstats_timeliness(self): + """Test timeliness pages in the publishing statistics section""" + + self.assertEqual(self.client.get(reverse("dash-publishingstats-timeliness")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-publishingstats-timeliness-timelag")).status_code, 200) + + def test_publishingstats_comprehensiveness(self): + """Test comprehensiveness pages in the publishing statistics section""" + + self.assertEqual(self.client.get(reverse("dash-publishingstats-comprehensiveness")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-publishingstats-comprehensiveness-core")).status_code, 200) + self.assertEqual( + self.client.get(reverse("dash-publishingstats-comprehensiveness-financials")).status_code, 200 + ) + self.assertEqual( + self.client.get(reverse("dash-publishingstats-comprehensiveness-valueadded")).status_code, 200 + ) + + def test_publishingstats_forwardlooking(self): + """Test the forward looking page in the publishing statistics section""" + + self.assertEqual(self.client.get(reverse("dash-publishingstats-forwardlooking")).status_code, 200) + + def test_publishingstats_summarystats(self): + """Test the summary statistics page in the publishing statistics section""" + + self.assertEqual(self.client.get(reverse("dash-publishingstats-summarystats")).status_code, 200) + + def test_publishingstats_humanitarian(self): + """Test the humanitarian page in the publishing statistics section""" + + self.assertEqual(self.client.get(reverse("dash-publishingstats-humanitarian")).status_code, 200) + + def test_exploringdata(self): + """Test the exploring data pages""" + self.assertEqual(self.client.get(reverse("dash-exploringdata-booleans")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-exploringdata-codelists")).status_code, 200) + self.assertEqual( + self.client.get( + reverse( + "dash-exploringdata-codelists-detail", + args=( + "2", + "budget_@type", + ), + ) + ).status_code, + 200, + ) + self.assertEqual( + self.client.get( + reverse( + "dash-exploringdata-codelists-detail", + args=( + "2", + "not-a-valid-slug", + ), + ) + ).status_code, + 404, + ) + self.assertEqual( + self.client.get( + reverse( + "dash-exploringdata-codelists-detail", + args=( + "3", + "budget_@type", + ), + ) + ).status_code, + 404, + ) + self.assertEqual(self.client.get(reverse("dash-exploringdata-dates")).status_code, 200) + self.assertEqual(self.client.get(reverse("dash-exploringdata-elements")).status_code, 200) + self.assertEqual( + self.client.get( + reverse("dash-exploringdata-elements-detail", args=("iati-activity_activity-date_narrative",)) + ).status_code, + 200, + ) + self.assertEqual( + self.client.get(reverse("dash-exploringdata-elements-detail", args=("not-a-valid-element",))).status_code, + 404, + ) + self.assertEqual(self.client.get(reverse("dash-exploringdata-orgids")).status_code, 200) + self.assertEqual( + self.client.get(reverse("dash-exploringdata-orgtypes-detail", args=("funding_org",))).status_code, 200 + ) + self.assertEqual( + self.client.get(reverse("dash-exploringdata-orgtypes-detail", args=("not-a-valid-org-type",))).status_code, + 404, + ) + self.assertEqual(self.client.get(reverse("dash-exploringdata-traceability")).status_code, 200) + + +class OriginalDashboardRedirectTests(TestCase): + """Perform basic HTTP 301 redirection checks on the Dashboard pages + + These are split up into a number of functions because some can + take some time to run and so running with the "-v 2" flag will + list the tests as they run. + """ + + def _url_and_view_helper(self, urls_and_views_to_check): + """Checks that a set of URLs redirect to matching view functions""" + + for url, view_name in urls_and_views_to_check.items(): + self.assertRedirects(self.client.get(f"/{url}.html"), reverse(view_name), status_code=301) + + def test_headlines_and_misc(self): + """Test headlines and miscellaneous pages redirect to their new locations""" + + # This is not particularly DRY as a similar dictionary is created in views.py + # but I think this is minor as that may disappear from views.py in a future + # refactor of what goes into the context. + self._url_and_view_helper( + { + "index": "dash-index", + "headlines": "dash-headlines", + "files": "dash-headlines-files", + "activities": "dash-headlines-activities", + "publishers": "dash-headlines-publishers", + "faq": "dash-faq", + "registration_agencies": "dash-registrationagencies", + } + ) + + def test_dataquality(self): + """Test data quality pages redirect to their new locations""" + + # This is not particularly DRY as a similar dictionary is created in views.py + # but I think this is minor as that may disappear from views.py in a future + # refactor of what goes into the context. + self._url_and_view_helper( + { + "data_quality": "dash-dataquality", + "download": "dash-dataquality-download", + "xml": "dash-dataquality-xml", + "validation": "dash-dataquality-validation", + "versions": "dash-dataquality-versions", + "organisation": "dash-dataquality-organisation", + "identifiers": "dash-dataquality-identifiers", + "reporting_orgs": "dash-dataquality-reportingorgs", + "licenses": "dash-dataquality-licenses", + } + ) + + def test_publishingstats(self): + """Test publishing stats pages redirect to their new locations""" + + # This is not particularly DRY as a similar dictionary is created in views.py + # but I think this is minor as that may disappear from views.py in a future + # refactor of what goes into the context. + self._url_and_view_helper( + { + "publishing_stats": "dash-publishingstats", + "timeliness": "dash-publishingstats-timeliness", + "timeliness_timelag": "dash-publishingstats-timeliness-timelag", + "forwardlooking": "dash-publishingstats-forwardlooking", + "comprehensiveness": "dash-publishingstats-comprehensiveness", + "comprehensiveness_core": "dash-publishingstats-comprehensiveness-core", + "comprehensiveness_financials": "dash-publishingstats-comprehensiveness-financials", + "comprehensiveness_valueadded": "dash-publishingstats-comprehensiveness-valueadded", + "summary_stats": "dash-publishingstats-summarystats", + "humanitarian": "dash-publishingstats-humanitarian", + } + ) + + def test_exploringdata(self): + """Test exploring data pages redirect to their new locations""" + + # This is not particularly DRY as a similar dictionary is created in views.py + # but I think this is minor as that may disappear from views.py in a future + # refactor of what goes into the context. + self._url_and_view_helper( + { + "exploring_data": "dash-exploringdata-elements", + "elements": "dash-exploringdata-elements", + "codelists": "dash-exploringdata-codelists", + "booleans": "dash-exploringdata-booleans", + "dates": "dash-exploringdata-dates", + "traceability": "dash-exploringdata-traceability", + "org_ids": "dash-exploringdata-orgids", + } + ) + + def test_slug_page_redirects(self): + """Test pages with slugs redirect to the section page""" + + self.assertRedirects( + self.client.get(r"/publisher/undp.html"), reverse("dash-headlines-publishers"), status_code=301 + ) + self.assertRedirects( + self.client.get(r"/license/cc-by.html"), reverse("dash-dataquality-licenses"), status_code=301 + ) + self.assertRedirects( + self.client.get(r"/codelist/2/budget_@type.html"), reverse("dash-exploringdata-codelists"), status_code=301 + ) + self.assertRedirects( + self.client.get(r"/element/iati-activity_activity-date_narrative.html"), + reverse("dash-exploringdata-elements"), + status_code=301, + ) + self.assertRedirects( + self.client.get(r"/org_type/funding_org.html"), reverse("dash-exploringdata-orgids"), status_code=301 + ) diff --git a/dashboard/ui/urls.py b/dashboard/ui/urls.py new file mode 100644 index 0000000000..9ed48452b9 --- /dev/null +++ b/dashboard/ui/urls.py @@ -0,0 +1,213 @@ +""" +URL configuration for IATI Dashboard project. + +The `urlpatterns` list routes URLs to views. For more information please see: + https://docs.djangoproject.com/en/5.1/topics/http/urls/ +Examples: +Function views + 1. Add an import: from my_app import views + 2. Add a URL to urlpatterns: path('', views.home, name='home') +Class-based views + 1. Add an import: from other_app.views import Home + 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') +Including another URLconf + 1. Import the include() function: from django.urls import include, path + 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) +""" + +from django.conf.urls.static import static +from django.contrib import admin +from django.urls import path, re_path +from django.views.generic.base import RedirectView + +import ui.views + +urlpatterns = ( + [ + path("admin/", admin.site.urls), + # Top level dashboard pages. + path("", ui.views.index, name="dash-index"), + path("headlines", ui.views.headlines, name="dash-headlines"), + path("data-quality", ui.views.data_quality, name="dash-dataquality"), + path("publishing-statistics", ui.views.publishing_stats, name="dash-publishingstats"), + path("exploring-data", ui.views.exploring_data, name="dash-exploringdata"), + path("faq", ui.views.faq, name="dash-faq"), + # Headlines pages and detail pages - placeholders for now. + path("headlines/publishers", ui.views.headlines_publishers, name="dash-headlines-publishers"), + path("headlines/files", ui.views.headlines_files, name="dash-headlines-files"), + path("headlines/activities", ui.views.headlines_activities, name="dash-headlines-activities"), + path( + "headlines/publishers/", + ui.views.headlines_publisher_detail, + name="dash-headlines-publisher-detail", + ), + # Data quality pages. + path("data-quality/download-errors", ui.views.dataquality_download, name="dash-dataquality-download"), + path( + "data/download_errors.json", + ui.views.dataquality_download_errorsjson, + name="dash-dataquality-download-json", + ), + path("data-quality/xml-errors", ui.views.dataquality_xml, name="dash-dataquality-xml"), + path("data-quality/validation", ui.views.dataquality_validation, name="dash-dataquality-validation"), + path("data-quality/versions", ui.views.dataquality_versions, name="dash-dataquality-versions"), + path("data-quality/organisation", ui.views.dataquality_orgxml, name="dash-dataquality-organisation"), + path("data-quality/licenses", ui.views.dataquality_licenses, name="dash-dataquality-licenses"), + path( + "data-quality/licenses/", + ui.views.dataquality_licenses_detail, + name="dash-dataquality-licenses-detail", + ), + path("data-quality/identifiers", ui.views.dataquality_identifiers, name="dash-dataquality-identifiers"), + path("data-quality/reporting-orgs", ui.views.dataquality_reportingorgs, name="dash-dataquality-reportingorgs"), + # Exploring data pages. + path("exploring-data/elements", ui.views.exploringdata_elements, name="dash-exploringdata-elements"), + path( + "exploring-data/elements/", + ui.views.exploringdata_element_detail, + name="dash-exploringdata-elements-detail", + ), + path("exploring-data/codelists", ui.views.exploringdata_codelists, name="dash-exploringdata-codelists"), + path( + "exploring-data/codelists//", + ui.views.exploringdata_codelists_detail, + name="dash-exploringdata-codelists-detail", + ), + path("exploring-data/booleans", ui.views.exploringdata_booleans, name="dash-exploringdata-booleans"), + path("exploring-data/dates", ui.views.exploringdata_dates, name="dash-exploringdata-dates"), + path( + "exploring-data/traceability", ui.views.exploringdata_traceability, name="dash-exploringdata-traceability" + ), + path( + "exploring-data/organisation-identifiers", ui.views.exploringdata_orgids, name="dash-exploringdata-orgids" + ), + path( + "exploring-data/organisation-type/", + ui.views.exploringdata_orgtypes_detail, + name="dash-exploringdata-orgtypes-detail", + ), + # Publishing statistics pages. + path("publishing-statistics/timeliness", ui.views.pubstats_timeliness, name="dash-publishingstats-timeliness"), + path( + "publishing-statistics/timeliness-timelag", + ui.views.pubstats_timeliness_timelag, + name="dash-publishingstats-timeliness-timelag", + ), + path( + "publishing-statistics/forward-looking", + ui.views.pubstats_forwardlooking, + name="dash-publishingstats-forwardlooking", + ), + path( + "publishing-statistics/comprehensiveness", + ui.views.pubstats_comprehensiveness, + name="dash-publishingstats-comprehensiveness", + ), + path( + "publishing-statistics/comprehensiveness/core", + ui.views.pubstats_comprehensiveness_core, + name="dash-publishingstats-comprehensiveness-core", + ), + path( + "publishing-statistics/comprehensiveness/financials", + ui.views.pubstats_comprehensiveness_financials, + name="dash-publishingstats-comprehensiveness-financials", + ), + path( + "publishing-statistics/comprehensiveness/value-added", + ui.views.pubstats_comprehensiveness_valueadded, + name="dash-publishingstats-comprehensiveness-valueadded", + ), + path( + "publishing-statistics/summary-statistics", + ui.views.pubstats_summarystats, + name="dash-publishingstats-summarystats", + ), + path( + "publishing-statistics/humanitarian-reporting", + ui.views.pubstats_humanitarian, + name="dash-publishingstats-humanitarian", + ), + # Registration agencies. + path("registration-agencies", ui.views.registration_agencies, name="dash-registrationagencies"), + path( + "registration_agencies.html", + RedirectView.as_view(pattern_name="dash-registrationagencies", permanent=True), + ), + # Redirects to support any users with bookmarks to pages on the old Dashboard. + path("index.html", RedirectView.as_view(pattern_name="dash-index", permanent=True)), + path("headlines.html", RedirectView.as_view(pattern_name="dash-headlines", permanent=True)), + path("data_quality.html", RedirectView.as_view(pattern_name="dash-dataquality", permanent=True)), + path("exploring_data.html", RedirectView.as_view(pattern_name="dash-exploringdata-elements", permanent=True)), + path("publishers.html", RedirectView.as_view(pattern_name="dash-headlines-publishers", permanent=True)), + path("publishing_stats.html", RedirectView.as_view(pattern_name="dash-publishingstats", permanent=True)), + path("timeliness.html", RedirectView.as_view(pattern_name="dash-publishingstats-timeliness", permanent=True)), + path( + "timeliness_timelag.html", + RedirectView.as_view(pattern_name="dash-publishingstats-timeliness-timelag", permanent=True), + ), + path( + "forwardlooking.html", + RedirectView.as_view(pattern_name="dash-publishingstats-forwardlooking", permanent=True), + ), + path( + "comprehensiveness.html", + RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness", permanent=True), + ), + path( + "comprehensiveness_core.html", + RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness-core", permanent=True), + ), + path( + "comprehensiveness_financials.html", + RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness-financials", permanent=True), + ), + path( + "comprehensiveness_valueadded.html", + RedirectView.as_view(pattern_name="dash-publishingstats-comprehensiveness-valueadded", permanent=True), + ), + path( + "summary_stats.html", + RedirectView.as_view(pattern_name="dash-publishingstats-summarystats", permanent=True), + ), + path( + "humanitarian.html", RedirectView.as_view(pattern_name="dash-publishingstats-humanitarian", permanent=True) + ), + path("files.html", RedirectView.as_view(pattern_name="dash-headlines-files", permanent=True)), + path("activities.html", RedirectView.as_view(pattern_name="dash-headlines-activities", permanent=True)), + path("download.html", RedirectView.as_view(pattern_name="dash-dataquality-download", permanent=True)), + path("xml.html", RedirectView.as_view(pattern_name="dash-dataquality-xml", permanent=True)), + path("validation.html", RedirectView.as_view(pattern_name="dash-dataquality-validation", permanent=True)), + path("versions.html", RedirectView.as_view(pattern_name="dash-dataquality-versions", permanent=True)), + path("organisation.html", RedirectView.as_view(pattern_name="dash-dataquality-organisation", permanent=True)), + path("identifiers.html", RedirectView.as_view(pattern_name="dash-dataquality-identifiers", permanent=True)), + path( + "reporting_orgs.html", RedirectView.as_view(pattern_name="dash-dataquality-reportingorgs", permanent=True) + ), + path("elements.html", RedirectView.as_view(pattern_name="dash-exploringdata-elements", permanent=True)), + path("codelists.html", RedirectView.as_view(pattern_name="dash-exploringdata-codelists", permanent=True)), + path("booleans.html", RedirectView.as_view(pattern_name="dash-exploringdata-booleans", permanent=True)), + path("dates.html", RedirectView.as_view(pattern_name="dash-exploringdata-dates", permanent=True)), + path( + "traceability.html", RedirectView.as_view(pattern_name="dash-exploringdata-traceability", permanent=True) + ), + path("org_ids.html", RedirectView.as_view(pattern_name="dash-exploringdata-orgids", permanent=True)), + path("faq.html", RedirectView.as_view(pattern_name="dash-faq", permanent=True)), + path("licenses.html", RedirectView.as_view(pattern_name="dash-dataquality-licenses", permanent=True)), + re_path(r"license\/\S*.html", RedirectView.as_view(pattern_name="dash-dataquality-licenses", permanent=True)), + re_path( + r"publisher\/\S*.html", RedirectView.as_view(pattern_name="dash-headlines-publishers", permanent=True) + ), + re_path( + r"codelist\/\d\/\S*.html", + RedirectView.as_view(pattern_name="dash-exploringdata-codelists", permanent=True), + ), + re_path( + r"element\/\S*.html", RedirectView.as_view(pattern_name="dash-exploringdata-elements", permanent=True) + ), + re_path(r"org_type\/\S*.html", RedirectView.as_view(pattern_name="dash-exploringdata-orgids", permanent=True)), + ] + + static("generated", document_root="../out") + + static("stats", document_root="../stats-calculated") +) +# ^ Serve generated files when using runserver for development diff --git a/dashboard/ui/views.py b/dashboard/ui/views.py new file mode 100644 index 0000000000..f61fe75afd --- /dev/null +++ b/dashboard/ui/views.py @@ -0,0 +1,579 @@ +"""Views for the IATI Dashboard""" + +import collections +import datetime +import json +import subprocess + +import dateutil.parser +from django.http import Http404, HttpResponse +from django.template import loader + +import comprehensiveness +import filepaths +import forwardlooking +import humanitarian +import summary_stats +import text +import timeliness +import ui.template_funcs +import vars +from data import ( + MAJOR_VERSIONS, + ckan, + ckan_publishers, + codelist_lookup, + codelist_mapping, + codelist_sets, + current_stats, + dataset_to_publisher_dict, + get_publisher_stats, + github_issues, + is_valid_element_or_attribute, + publisher_name, + publishers_ordered_by_title, + slugs, +) + +COMMIT_HASH = ( + subprocess.run("git show --format=%H --no-patch".split(), cwd=filepaths.join_base_path(""), capture_output=True) + .stdout.decode() + .strip() +) +STATS_COMMIT_HASH = ( + subprocess.run( + "git -C stats-calculated show --format=%H --no-patch".split(), + cwd=filepaths.join_base_path(""), + capture_output=True, + ) + .stdout.decode() + .strip() +) + +# Load all the licenses and generate data for each licence and publisher. +with open(filepaths.join_stats_path("licenses.json")) as handler: + LICENSE_URLS = json.load(handler) + +LICENSES = [ + package["license_id"] if package["license_id"] is not None else "notspecified" + for _, publisher in ckan.items() + for _, package in publisher.items() +] + +LICENCE_COUNT = dict((x, LICENSES.count(x)) for x in set(LICENSES)) + +LICENSES_AND_PUBLISHER = set( + [ + (package["license_id"] if package["license_id"] is not None else "notspecified", publisher_name) + for publisher_name, publisher in ckan.items() + for package_name, package in publisher.items() + ] +) + +LICENSES_PER_PUBLISHER = [license for license, publisher in LICENSES_AND_PUBLISHER] +PUBLISHER_LICENSE_COUNT = dict((x, LICENSES_PER_PUBLISHER.count(x)) for x in set(LICENSES_PER_PUBLISHER)) + + +def _get_licenses_for_publisher(publisher_name): + # Check publisher is in the compiled list of CKAN data + # Arises from https://github.com/IATI/IATI-Dashboard/issues/408 + if publisher_name not in ckan.keys(): + return set() + + # Return unique licenses used + return set( + [ + package["license_id"] if package["license_id"] is not None else "notspecified" + for package in ckan[publisher_name].values() + ] + ) + + +def _registration_agency(orgid): + for code in codelist_sets["2"]["OrganisationRegistrationAgency"]: + if orgid.startswith(code): + return code + + +def dictinvert(d): + inv = collections.defaultdict(list) + for k, v in d.items(): + inv[v].append(k) + return inv + + +def nested_dictinvert(d): + inv = collections.defaultdict(lambda: collections.defaultdict(int)) + for k, v in d.items(): + for k2, v2 in v.items(): + inv[k2][k] += v2 + return inv + + +def _make_context(page_name: str): + """Make a basic context dictionary for a given page""" + + with open(filepaths.join_stats_path("gitdate.json")) as fp: + date_time_data_str = max(json.load(fp).values()) + date_time_data_obj = dateutil.parser.parse(date_time_data_str) + + context = dict( + page=page_name, + top_titles=text.top_titles, + page_titles=text.page_titles, + short_page_titles=text.short_page_titles, + page_leads=text.page_leads, + page_sub_leads=text.page_sub_leads, + top_navigation=text.top_navigation, + navigation=text.navigation, + navigation_reverse={page: k for k, pages in text.navigation.items() for page in pages}, + page_view_names={ + "index": "dash-index", + "headlines": "dash-headlines", + "data_quality": "dash-dataquality", + "publishing_stats": "dash-publishingstats", + "exploring_data": "dash-exploringdata", + "faq": "dash-faq", + "publishers": "dash-headlines-publishers", + "files": "dash-headlines-files", + "activities": "dash-headlines-activities", + "publisher": "dash-headlines-publisher-detail", + "download": "dash-dataquality-download", + "xml": "dash-dataquality-xml", + "validation": "dash-dataquality-validation", + "versions": "dash-dataquality-versions", + "organisation": "dash-dataquality-organisation", + "licenses": "dash-dataquality-licenses", + "identifiers": "dash-dataquality-identifiers", + "reporting_orgs": "dash-dataquality-reportingorgs", + "elements": "dash-exploringdata-elements", + "codelists": "dash-exploringdata-codelists", + "booleans": "dash-exploringdata-booleans", + "dates": "dash-exploringdata-dates", + "traceability": "dash-exploringdata-traceability", + "org_ids": "dash-exploringdata-orgids", + "timeliness": "dash-publishingstats-timeliness", + "forwardlooking": "dash-publishingstats-forwardlooking", + "comprehensiveness": "dash-publishingstats-comprehensiveness", + "coverage": "dash-publishingstats-coverage", + "summary_stats": "dash-publishingstats-summarystats", + "humanitarian": "dash-publishingstats-humanitarian", + }, + current_stats=current_stats, + publisher_name=publisher_name, + publishers_ordered_by_title=publishers_ordered_by_title, + ckan_publishers=ckan_publishers, + ckan=ckan, + codelist_lookup=codelist_lookup, + codelist_mapping=codelist_mapping, + codelist_sets=codelist_sets, + github_issues=github_issues, + MAJOR_VERSIONS=MAJOR_VERSIONS, + expected_versions=vars.expected_versions, + slugs=slugs, + datetime_data=date_time_data_obj.strftime("%-d %B %Y (at %H:%M %Z)"), + current_year=datetime.datetime.now(datetime.UTC).year, + stats_url="/stats", + generated_url="/generated", + commit_hash=COMMIT_HASH, + stats_commit_hash=STATS_COMMIT_HASH, + func={ + "sorted": sorted, + "firstint": ui.template_funcs.firstint, + "get_codelist_values": ui.template_funcs.get_codelist_values, + "dataset_to_publisher": lambda x: dataset_to_publisher_dict.get(x, ""), + "get_publisher_stats": get_publisher_stats, + "is_valid_element_or_attribute": is_valid_element_or_attribute, + "set": set, + "enumerate": enumerate, + }, + ) + context["navigation_reverse"].update({k: k for k in text.navigation}) + + return context + + +# +# Top level navigation pages. +# +def index(request): + template = loader.get_template("index.html") + return HttpResponse(template.render(_make_context("index"), request)) + + +def headlines(request): + template = loader.get_template("headlines.html") + return HttpResponse(template.render(_make_context("headlines"), request)) + + +def data_quality(request): + template = loader.get_template("data_quality.html") + return HttpResponse(template.render(_make_context("data_quality"), request)) + + +def publishing_stats(request): + template = loader.get_template("publishing_stats.html") + return HttpResponse(template.render(_make_context("publishing_stats"), request)) + + +def exploring_data(request): + template = loader.get_template("exploring_data.html") + return HttpResponse(template.render(_make_context("exploring_data"), request)) + + +def faq(request): + template = loader.get_template("faq.html") + return HttpResponse(template.render(_make_context("faq"), request)) + + +# +# Headline pages. +# +def headlines_publishers(request): + template = loader.get_template("publishers.html") + return HttpResponse(template.render(_make_context("publishers"), request)) + + +def headlines_activities(request): + template = loader.get_template("activities.html") + return HttpResponse(template.render(_make_context("activities"), request)) + + +def headlines_files(request): + template = loader.get_template("files.html") + return HttpResponse(template.render(_make_context("files"), request)) + + +def headlines_publisher_detail(request, publisher=None): + template = loader.get_template("publisher.html") + + context = _make_context("publishers") + context["publisher"] = publisher + context["publisher_inverted"] = get_publisher_stats(publisher, "inverted-file") + context["publisher_licenses"] = _get_licenses_for_publisher(publisher) + publisher_stats = get_publisher_stats(publisher) + context["publisher_stats"] = publisher_stats + + try: + context["budget_table"] = [ + { + "year": "Total", + "count_total": sum(sum(x.values()) for x in publisher_stats["count_budgets_by_type_by_year"].values()), + "sum_total": { + currency: sum(sums.values()) + for by_currency in publisher_stats["sum_budgets_by_type_by_year"].values() + for currency, sums in by_currency.items() + }, + "count_original": ( + sum(publisher_stats["count_budgets_by_type_by_year"]["1"].values()) + if "1" in publisher_stats["count_budgets_by_type_by_year"] + else None + ), + "sum_original": ( + {k: sum(v.values()) for k, v in publisher_stats["sum_budgets_by_type_by_year"]["1"].items()} + if "1" in publisher_stats["sum_budgets_by_type_by_year"] + else None + ), + "count_revised": ( + sum(publisher_stats["count_budgets_by_type_by_year"]["2"].values()) + if "2" in publisher_stats["count_budgets_by_type_by_year"] + else None + ), + "sum_revised": ( + {k: sum(v.values()) for k, v in publisher_stats["sum_budgets_by_type_by_year"]["2"].items()} + if "2" in publisher_stats["sum_budgets_by_type_by_year"] + else None + ), + } + ] + [ + { + "year": year, + "count_total": sum( + x[year] for x in publisher_stats["count_budgets_by_type_by_year"].values() if year in x + ), + "sum_total": { + currency: sums.get(year) + for by_currency in publisher_stats["sum_budgets_by_type_by_year"].values() + for currency, sums in by_currency.items() + }, + "count_original": ( + publisher_stats["count_budgets_by_type_by_year"]["1"].get(year) + if "1" in publisher_stats["count_budgets_by_type_by_year"] + else None + ), + "sum_original": ( + {k: v.get(year) for k, v in publisher_stats["sum_budgets_by_type_by_year"]["1"].items()} + if "1" in publisher_stats["sum_budgets_by_type_by_year"] + else None + ), + "count_revised": ( + publisher_stats["count_budgets_by_type_by_year"]["2"].get(year) + if "2" in publisher_stats["count_budgets_by_type_by_year"] + else None + ), + "sum_revised": ( + {k: v.get(year) for k, v in publisher_stats["sum_budgets_by_type_by_year"]["2"].items()} + if "2" in publisher_stats["sum_budgets_by_type_by_year"] + else None + ), + } + for year in sorted( + set(sum((list(x.keys()) for x in publisher_stats["count_budgets_by_type_by_year"].values()), [])) + ) + ] + context["failure_count"] = len( + current_stats["inverted_file_publisher"][publisher]["validation"].get("fail", {}) + ) + except KeyError: + raise Http404("Publisher does not exist") + + return HttpResponse(template.render(context, request)) + + +# +# Views to generate data quality pages. +# +def dataquality_download(request): + template = loader.get_template("download.html") + context = _make_context("download") + return HttpResponse(template.render(context, request)) + + +def dataquality_download_errorsjson(request): + return HttpResponse(json.dumps(current_stats["download_errors"], indent=2), content_type="application/json") + + +def dataquality_xml(request): + template = loader.get_template("xml.html") + context = _make_context("xml") + return HttpResponse(template.render(context, request)) + + +def dataquality_validation(request): + template = loader.get_template("validation.html") + context = _make_context("validation") + return HttpResponse(template.render(context, request)) + + +def dataquality_versions(request): + template = loader.get_template("versions.html") + context = _make_context("versions") + return HttpResponse(template.render(context, request)) + + +def dataquality_licenses(request): + template = loader.get_template("licenses.html") + context = _make_context("licenses") + context["license_urls"] = LICENSE_URLS + context["license_names"] = text.LICENSE_NAMES + context["licenses"] = True + context["license_count"] = LICENCE_COUNT + context["publisher_license_count"] = PUBLISHER_LICENSE_COUNT + return HttpResponse(template.render(context, request)) + + +def dataquality_licenses_detail(request, license_id=None): + template = loader.get_template("license.html") + + if license_id not in LICENSE_URLS: + raise Http404("Unknown license") + + publishers = [ + publisher_name + for publisher_name, publisher in ckan.items() + for _, package in publisher.items() + if package["license_id"] == license_id or (license_id == "notspecified" and package["license_id"] is None) + ] + context = _make_context("licenses") + context["license_urls"] = LICENSE_URLS + context["license_names"] = text.LICENSE_NAMES + context["licenses"] = True + context["license"] = license_id + context["publisher_counts"] = [(publisher, publishers.count(publisher)) for publisher in set(publishers)] + return HttpResponse(template.render(context, request)) + + +def dataquality_orgxml(request): + template = loader.get_template("organisation.html") + context = _make_context("organisation") + return HttpResponse(template.render(context, request)) + + +def dataquality_identifiers(request): + template = loader.get_template("identifiers.html") + context = _make_context("identifiers") + return HttpResponse(template.render(context, request)) + + +def dataquality_reportingorgs(request): + template = loader.get_template("reporting_orgs.html") + context = _make_context("reporting_orgs") + return HttpResponse(template.render(context, request)) + + +# +# Exploring data pages. +# +def exploringdata_elements(request): + template = loader.get_template("elements.html") + return HttpResponse(template.render(_make_context("elements"), request)) + + +def exploringdata_element_detail(request, element=None): + template = loader.get_template("element.html") + context = _make_context("elements") + + if element not in slugs["element"]["by_slug"]: + raise Http404("Unknown element or attribute") + + i = slugs["element"]["by_slug"][element] + context["element"] = list(current_stats["inverted_publisher"]["elements"])[i] + context["publishers"] = list(current_stats["inverted_publisher"]["elements"].values())[i] + context["element_or_attribute"] = "attribute" if "@" in context["element"] else "element" + return HttpResponse(template.render(context, request)) + + +def exploringdata_orgids(request): + template = loader.get_template("org_ids.html") + return HttpResponse(template.render(_make_context("org_ids"), request)) + + +def exploringdata_orgtypes_detail(request, org_type=None): + if org_type not in slugs["org_type"]["by_slug"]: + raise Http404("Unknown organisation type") + + template = loader.get_template("org_type.html") + context = _make_context("org_ids") + context["slug"] = org_type + return HttpResponse(template.render(context, request)) + + +def exploringdata_codelists(request): + template = loader.get_template("codelists.html") + return HttpResponse(template.render(_make_context("codelists"), request)) + + +def exploringdata_codelists_detail(request, major_version=None, attribute=None): + template = loader.get_template("codelist.html") + + if major_version not in slugs["codelist"]: + raise Http404("Unknown major version of the IATI standard") + if attribute not in slugs["codelist"][major_version]["by_slug"]: + raise Http404("Unknown attribute") + + context = _make_context("codelists") + i = slugs["codelist"][major_version]["by_slug"][attribute] + element = list(current_stats["inverted_publisher"]["codelist_values_by_major_version"][major_version])[i] + values = nested_dictinvert( + list(current_stats["inverted_publisher"]["codelist_values_by_major_version"][major_version].values())[i] + ) + context["element"] = element + context["values"] = values + context["reverse_codelist_mapping"] = { + major_version: dictinvert(mapping) for major_version, mapping in codelist_mapping.items() + } + context["major_version"] = major_version + + return HttpResponse(template.render(context, request)) + + +def exploringdata_booleans(request): + template = loader.get_template("booleans.html") + return HttpResponse(template.render(_make_context("booleans"), request)) + + +def exploringdata_dates(request): + template = loader.get_template("dates.html") + return HttpResponse(template.render(_make_context("dates"), request)) + + +def exploringdata_traceability(request): + template = loader.get_template("traceability.html") + return HttpResponse(template.render(_make_context("traceability"), request)) + + +# +# Publishing statistics pages. +# +def pubstats_comprehensiveness(request): + template = loader.get_template("comprehensiveness.html") + context = _make_context("comprehensiveness") + context["comprehensiveness"] = comprehensiveness + return HttpResponse(template.render(context, request)) + + +def pubstats_comprehensiveness_core(request): + template = loader.get_template("comprehensiveness_core.html") + context = _make_context("comprehensiveness") + context["comprehensiveness"] = comprehensiveness + return HttpResponse(template.render(context, request)) + + +def pubstats_comprehensiveness_financials(request): + template = loader.get_template("comprehensiveness_financials.html") + context = _make_context("comprehensiveness") + context["comprehensiveness"] = comprehensiveness + return HttpResponse(template.render(context, request)) + + +def pubstats_comprehensiveness_valueadded(request): + template = loader.get_template("comprehensiveness_valueadded.html") + context = _make_context("comprehensiveness") + context["comprehensiveness"] = comprehensiveness + return HttpResponse(template.render(context, request)) + + +def pubstats_timeliness(request): + template = loader.get_template("timeliness.html") + context = _make_context("timeliness") + context["timeliness"] = timeliness + return HttpResponse(template.render(context, request)) + + +def pubstats_timeliness_timelag(request): + template = loader.get_template("timeliness_timelag.html") + context = _make_context("timeliness") + context["timeliness"] = timeliness + return HttpResponse(template.render(context, request)) + + +def pubstats_summarystats(request): + template = loader.get_template("summary_stats.html") + context = _make_context("summary_stats") + context["summary_stats"] = summary_stats + return HttpResponse(template.render(context, request)) + + +def pubstats_forwardlooking(request): + template = loader.get_template("forwardlooking.html") + context = _make_context("forwardlooking") + context["forwardlooking"] = forwardlooking + return HttpResponse(template.render(context, request)) + + +def pubstats_humanitarian(request): + template = loader.get_template("humanitarian.html") + context = _make_context("humanitarian") + context["humanitarian"] = humanitarian + return HttpResponse(template.render(context, request)) + + +# +# Registration agencies page. +# +def registration_agencies(request): + template = loader.get_template("registration_agencies.html") + + context = _make_context("registration_agencies") + context["registration_agencies"] = collections.defaultdict(int) + context["registration_agencies_publishers"] = collections.defaultdict(list) + context["nonmatching"] = [] + for orgid, publishers in current_stats["inverted_publisher"]["reporting_orgs"].items(): + reg_ag = _registration_agency(orgid) + if reg_ag: + context["registration_agencies"][reg_ag] += 1 + context["registration_agencies_publishers"][reg_ag] += list(publishers) + else: + context["nonmatching"].append((orgid, publishers)) + + return HttpResponse(template.render(context, request)) diff --git a/dashboard/ui/wsgi.py b/dashboard/ui/wsgi.py new file mode 100644 index 0000000000..179c825598 --- /dev/null +++ b/dashboard/ui/wsgi.py @@ -0,0 +1,19 @@ +""" +WSGI config for IATI Dashboard project. + +It exposes the WSGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/5.1/howto/deployment/wsgi/ +""" + +import os + +from django.core.wsgi import get_wsgi_application + +# Import data here so it's in gunicorn's preload +import data # noqa F401 + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ui.settings") + +application = get_wsgi_application() diff --git a/dashboard/vars.py b/dashboard/vars.py new file mode 100644 index 0000000000..5ffa420bc9 --- /dev/null +++ b/dashboard/vars.py @@ -0,0 +1 @@ +expected_versions = ["null", "1.01", "1.02", "1.03", "1.04", "1.05", "2.01", "2.02", "2.03"] diff --git a/data.py b/data.py deleted file mode 100644 index ba2545f4c2..0000000000 --- a/data.py +++ /dev/null @@ -1,277 +0,0 @@ -import json -from collections import OrderedDict, defaultdict -import sys, os, re, copy, datetime, unicodecsv -import UserDict -import csv - -publisher_re = re.compile('(.*)\-[^\-]') - - -# Modified from: -# https://github.com/IATI/IATI-Stats/blob/1d20ed1e/stats/common/decorators.py#L5-L13 -def memoize(f): - def wrapper(self, key): - if not hasattr(self, '__cache'): - self.__cache = {} - if key in self.__cache: - return self.__cache[key] - res = f(self, key) - if type(res) is not JSONDir: - # don't cache JSONDirs - self.__cache[key] = res - return res - return wrapper - - -class GroupFiles(object, UserDict.DictMixin): - def __init__(self, inputdict): - self.inputdict = inputdict - self.cache = {} - - def __getitem__(self, key): - if key in self.cache: return self.cache[key] - self.inputdict[key] - out = OrderedDict() - for k2,v2 in self.inputdict[key].items(): - if type(v2) == OrderedDict: - out[k2] = OrderedDict() - for listitem, v3 in v2.items(): - m = publisher_re.match(listitem) - if m: - publisher = m.group(1) - if not publisher in out[k2]: - out[k2][publisher] = OrderedDict() - out[k2][publisher][listitem] = v3 - else: - pass # FIXME - else: - out[k2] = v2 - - self.cache[key] = out - return out - - -class JSONDir(object, UserDict.DictMixin): - """Produces an object, to be used to access JSON-formatted publisher data and return - this as an ordered dictionary (with nested dictionaries, if appropriate). - Use of this class removes the need to load large amounts of data into memory. - """ - - def __init__(self, folder): - """Set the path of the folder being accessed as an attribute to an instance of - the object. - """ - self.folder = folder - - @memoize - def __getitem__(self, key): - """Define how variables are gathered from the raw JSON files and then parsed into - the OrderedDict that will be returned. - - Note: - try-except should be used around file operations rather than checking before-hand - """ - - if os.path.exists(os.path.join(self.folder, key)): - # The data being sought is a directory - data = JSONDir(os.path.join(self.folder, key)) - elif os.path.exists(os.path.join(self.folder, key+'.json')): - # The data being sought is a json file - with open(os.path.join(self.folder, key+'.json')) as fp: - data = json.load(fp, object_pairs_hook=OrderedDict) - - # Deal with publishers who had an old registry ID - # If this publisher had at least one old ID in the past - if (self.get_publisher_name() in get_registry_id_matches().values()) and ('gitaggregate' in self.folder): - # Perform the merging - # Look over the set of changed registry IDs - for previous_id, current_id in get_registry_id_matches().items(): - folder = self.folder - previous_path = os.path.join(folder.replace(current_id,previous_id), key+'.json') - # If this publisher has had an old ID and there is data for it - if (current_id == self.get_publisher_name()) and os.path.exists(previous_path): - # Get the corresponding value for the old publisher ID, and merge with the existing value for this publisher - with open(previous_path) as old_fp: - old_pub_data = json.load(old_fp, object_pairs_hook=OrderedDict) - deep_merge(data, old_pub_data) - # FIXME i) Should deep_merge attempt to sort this ordereddict ii) Should there be an attempt to aggregate/average conflicting values? - else: - # No value found as either a folder or json file - raise KeyError, key - - # Because these keys are used as filenames, they can never exceed 255 characters - if hasattr(data, "keys"): - for k in data.keys(): - if len(k) >= 255: - data.pop(k) - return data - - def keys(self): - """Method to return a list of keys that are contained within the data folder that - is being accessed within this instance. - """ - return [ x[:-5] if x.endswith('.json') else x for x in os.listdir(self.folder) ] - - def __iter__(self): - """Custom iterable, to iterate over the keys that are contained within the data - folder that is being accessed within this instance. - """ - return iter(self.keys()) - - def get_publisher_name(self): - """Find the name of the publisher that this data relates to. - Note, this is a super hacky way to do this, prize available if a better way is found to do this! - """ - - # Get a list of the parts that are contained within this filepath - path = os.path.normpath(self.folder) - path_components = path.split(os.sep) - - # Loop over this list and return the publisher name if it is found within the historic list of publishers - for x in path_components: - if x in JSONDir('./stats-calculated/gitaggregate-publisher-dated').keys(): - return x - - # If got to the end of the loop and nothing found, this folder does not relate to a single publisher - return None - - - -def get_publisher_stats(publisher, stats_type='aggregated'): - """Function to obtain current data for a given publisher. - Returns: A JSONDir object for the publisher, or an empty dictionary if the publisher - is not found. - """ - try: - return JSONDir('./stats-calculated/current/{0}-publisher/{1}'.format(stats_type, publisher)) - except IOError: - return {} - - -def get_registry_id_matches(): - """Returns a dictionary of publishers who have modified their registry ID - Returns: Dictionary, where the key is the old registry ID, and the corresponding - value is the registry ID that data should be mapped to - """ - - # Load registry IDs for publishers who have changed their registry ID - reader = csv.DictReader(open('registry_id_relationships.csv', 'rU'), delimiter=',') - - # Load this data into a dictonary - registry_matches = {} - for row in reader: - registry_matches[row['previous_registry_id']] = row['current_registry_id'] - - return registry_matches - - -def deep_merge(obj1, obj2): - """Merges two OrderedDict objects with an unknown number of nested levels - Input: obj1 - OrderedDict to be used as the base object - Input: obj2 - OrderedDict to be merged into obj1 - Returns: Nothing, but obj1 will contain the full data - """ - - # Iterate through keys - for key in obj1: - # If this is value, we've hit the bottom, copy all of obj2 into obj1 - if type(obj1[key]) is not OrderedDict: - for key2 in obj2: - # If there exists a dict at that key, make sure it's not erased - if key2 in obj1: - if type(obj1[key2]) is not OrderedDict: - # You can change behavior here to determine - # How duplicate keys are handled - obj1[key2] = obj2[key2] - else: - obj1[key2] = obj2[key2] - - # If it's a dictionary we need to go deeper, by running this function recursively - else: - if key in obj2: - deep_merge(obj1[key],obj2[key]) - - -current_stats = { - 'aggregated': JSONDir('./stats-calculated/current/aggregated'), - 'aggregated_file': JSONDir('./stats-calculated/current/aggregated-file'), - 'inverted_publisher': JSONDir('./stats-calculated/current/inverted-publisher'), - 'inverted_file': JSONDir('./stats-calculated/current/inverted-file'), - 'download_errors': [] -} -current_stats['inverted_file_grouped'] = GroupFiles(current_stats['inverted_file']) -ckan_publishers = JSONDir('./data/ckan_publishers') -ckan = json.load(open('./stats-calculated/ckan.json'), object_pairs_hook=OrderedDict) -gitdate = json.load(open('./stats-calculated/gitdate.json'), object_pairs_hook=OrderedDict) -with open('./data/downloads/errors') as fp: - for line in fp: - if line != '.\n': - current_stats['download_errors'].append(line.strip('\n').split(' ', 3)) - -def transform_codelist_mapping_keys(codelist_mapping): - # Perform the same transformation as https://github.com/IATI/IATI-Stats/blob/d622f8e88af4d33b1161f906ec1b53c63f2f0936/stats.py#L12 - codelist_mapping = {k:v for k,v in codelist_mapping.items() if not k.startswith('//iati-organisation') } - codelist_mapping = {re.sub('^\/\/iati-activity', './', k):v for k,v in codelist_mapping.items() } - codelist_mapping = {re.sub('^\/\/', './/', k):v for k,v, in codelist_mapping.items() } - return codelist_mapping - -def create_codelist_mapping(major_version): - codelist_mapping = {x['path']:x['codelist'] for x in json.load(open('data/IATI-Codelists-{}/out/clv2/mapping.json'.format(major_version)))} - return transform_codelist_mapping_keys(codelist_mapping) - -MAJOR_VERSIONS = ['2', '1'] - -codelist_mapping = { v:create_codelist_mapping(v) for v in MAJOR_VERSIONS } -codelist_conditions = { - major_version: transform_codelist_mapping_keys({ x['path']:x.get('condition') for x in json.load(open('data/IATI-Codelists-{}/out/clv2/mapping.json'.format(major_version)))}) - for major_version in MAJOR_VERSIONS } - -# Create a big dictionary of all codelist values by version and codelist name -codelist_sets = { - major_version: { - cname:set(c['code'] for c in codelist['data']) for cname, codelist in JSONDir('data/IATI-Codelists-{}/out/clv2/json/en/'.format(major_version)).items() - } for major_version in MAJOR_VERSIONS } - - -#Simple look up to map publisher id to a publishers given name (title) -publisher_name={publisher:publisher_json['result']['title'] for publisher,publisher_json in ckan_publishers.items()} -#Create a list of tuples ordered by publisher given name titles - this allows us to display lists of publishers in alphabetical order -publishers_ordered_by_title = [ - (publisher_name[publisher], publisher) - for publisher in current_stats['inverted_publisher']['activities'] - if publisher in publisher_name] -publishers_ordered_by_title.sort(key=lambda x: unicode.lower(x[0])) - -# List of publishers who report all their activities as a secondary publisher -secondary_publishers = [publisher for publisher, stats in JSONDir('./stats-calculated/current/aggregated-publisher').items() - if int(stats['activities']) == len(stats['activities_secondary_reported']) - and int(stats['activities']) > 0] - -import csv -from decimal import Decimal -try: - dac2012 = {x[0]:Decimal(x[1].replace(',','')) for x in csv.reader(open('data/dac2012.csv'))} -except IOError: - dac2012 = {} - - - - -def make_slugs(keys): - out = {'by_slug':{}, 'by_i':{}} - for i,key in enumerate(keys): - slug = re.sub('[^a-zA-Z0-9:@\-_]', '', re.sub('{[^}]*}', '', key.replace('{http://www.w3.org/XML/1998/namespace}','xml:').replace('/','_'))).strip('_') - while slug in out['by_slug']: - slug += '_' - out['by_slug'][slug] = i - out['by_i'][i] = slug - return out - -slugs = { - 'codelist': { major_version:( - make_slugs(current_stats['inverted_publisher']['codelist_values_by_major_version'][major_version].keys()) - if major_version in current_stats['inverted_publisher']['codelist_values_by_major_version'] - else make_slugs([]) - ) for major_version in MAJOR_VERSIONS }, - 'element': make_slugs(current_stats['inverted_publisher']['elements'].keys()) -} diff --git a/fetch_data.py b/fetch_data.py index 331ec5ab67..705ebb5228 100644 --- a/fetch_data.py +++ b/fetch_data.py @@ -1,44 +1,30 @@ """ Uses the CKAN API on the IATI Registry to fetch data about publishers -Makes a call to get a list of all organisations, then +Makes a call to get a list of all publishers, then grabs data about each individual publisher and stores the information in one file per publisher. We're particulary looking for information such as name, organisation type, and the link back to the registry """ +import json +from os import makedirs +from os.path import join +from pathlib import Path import requests -import os -import json -#Make a directory to save the data about each publisher -try: - os.makedirs(os.path.join('data','ckan_publishers')) -except OSError: - pass +# Make a directory to save the data about each publisher +output_path = Path('data/ckan_publishers') +makedirs(output_path, exist_ok=True) -page_size = 50 -url = 'https://iatiregistry.org/api/3/action/organization_list' -params = { - 'all_fields': 'true', - 'include_extras': 'true', - 'include_tags': 'true', - 'include_groups': 'true', - 'include_users': 'true', - 'limit': page_size, -} +res = requests.get('https://registry.codeforiati.org/publisher_list.json') +res.raise_for_status() +publishers = res.json()['result'] -# Loop through the organisation list json, saving a file of information about each publisher -page = 0 -while True: - params['offset'] = page_size * page - res = requests.get(url, params=params).json()['result'] - if res == []: - break - for publisher in res: - name = publisher.get('name') - output = {'result': publisher} - with open(os.path.join('data', 'ckan_publishers', name + '.json'), 'w') as fp: - _ = json.dump(output, fp) - page += 1 +# Loop through the publisher list, saving a file of information about each publisher +for publisher in publishers: + name = publisher.get('name') + output = {'result': publisher} + with open(join(output_path, name + '.json'), 'w') as fp: + _ = json.dump(output, fp) diff --git a/fetch_data.sh b/fetch_data.sh index 83bd950103..9d72cb8794 100755 --- a/fetch_data.sh +++ b/fetch_data.sh @@ -4,9 +4,14 @@ mkdir -p data/downloads/ wget "https://gist.githubusercontent.com/codeforIATIbot/f117c9be138aa94c9762d57affc51a64/raw/errors" -O data/downloads/errors -# Get CKAN (IATI Registry) data +# Get CKAN (IATI Registry) +rm -rf data/ckan_publishers/ python fetch_data.py +# Get GitHub data +rm -rf data/github/ +python fetch_github_issues.py + # Generate a csv file with the number of download errors logged since 2013 cd data/downloads echo "cloning download errors" @@ -27,28 +32,29 @@ echo "cloned and checked out download errors" cd ../../../ # Get codelists for versions v1.x and v2.x of the IATI Standard -cd data +rm -rf data/IATI-Codelists-1 echo "cloning Codelists-1" -if [ ! -d IATI-Codelists-1 ]; then - git clone https://github.com/IATI/IATI-Codelists.git IATI-Codelists-1 -fi -cd IATI-Codelists-1 -echo "checking out Codelists-1" -git checkout version-1.05 > /dev/null -git pull > /dev/null +git clone --branch version-1.05 https://github.com/IATI/IATI-Codelists.git data/IATI-Codelists-1 +cd data/IATI-Codelists-1 echo "running gen.sh for Codelist-1" ./gen.sh +cd ../.. + +echo "Fetching Codelists-2" +rm -rf data/IATI-Codelists-2 +python fetch_v2_codelists.py +echo "Fetching schemas" +mkdir data/schemas +cd data/schemas +# for v in 1.01 1.02 1.03 1.04 1.05 2.01 2.02 2.03; do +for v in 1.05 2.03; do + git clone https://github.com/IATI/IATI-Schemas.git $v + cd $v + git checkout version-$v + git pull + cd .. +done cd .. -echo "cloning Codelists-2" -if [ ! -d IATI-Codelists-2 ]; then - git clone https://github.com/IATI/IATI-Codelists.git IATI-Codelists-2 -fi -cd IATI-Codelists-2 -echo "checking out Codelists-1" -git checkout version-2.03 > /dev/null -git pull > /dev/null -echo "running gen.sh for Codelist-2" -./gen.sh echo "completed fetching data" diff --git a/fetch_github_issues.py b/fetch_github_issues.py new file mode 100644 index 0000000000..6fc068e65f --- /dev/null +++ b/fetch_github_issues.py @@ -0,0 +1,45 @@ +""" +Fetch codeforIATI/iati-data-bugtracker github issues +""" +import json +import os +from collections import defaultdict +from pathlib import Path + +import requests + +# Make a directory to save github issue data +os.makedirs(Path('data/github/publishers'), exist_ok=True) + +j = requests.get( + 'https://api.github.com/repos/codeforIATI/iati-data-bugtracker/issues', + params={'per_page': 100, 'state': 'open'}).json() +with open('data/github/issues.json', 'w') as fp: + json.dump(j, fp) + +publishers = defaultdict(list) +with open(Path('data/github/issues.json')) as f: + issues = json.load(f) +for issue in issues: + awaiting_triage = [ + x for x in issue['labels'] + if x['name'] == 'awaiting triage'] + if awaiting_triage: + # ignore these + continue + pub_ids = [ + x['name'].split(': ', 1)[1] + for x in issue['labels'] + if x['name'].startswith('publisher: ')] + for pub_id in pub_ids: + publishers[pub_id].append({ + 'title': issue['title'], + 'html_url': issue['html_url'], + 'created_at': issue['created_at'], + 'updated_at': issue['updated_at'], + 'state': issue['state'], + 'labels': [x for x in issue['labels'] if not x['name'].startswith('publisher: ')], + }) +for pub_id, issues in publishers.items(): + with open(Path(f'data/github/publishers/{pub_id}.json'), 'w') as f: + json.dump(issues, f) diff --git a/fetch_v2_codelists.py b/fetch_v2_codelists.py new file mode 100644 index 0000000000..d6060a4fb8 --- /dev/null +++ b/fetch_v2_codelists.py @@ -0,0 +1,44 @@ +import json +from os import makedirs +from os.path import join + +import requests +from lxml import etree as ET + +output_path = join( + 'data', 'IATI-Codelists-2', 'out', 'clv2', 'json', 'en') +makedirs(output_path) +resp = requests.get('https://codelists.codeforiati.org/api/') +codelists = resp.json()['formats']['json']['languages']['en'] +for codelist_name, codelist_url in codelists.items(): + r = requests.get("http://dev.iatistandard.org/reference_downloads/203/codelists/downloads/clv3/json/en/" + codelist_url.split("/")[-1]) + if r.status_code == 404: + continue + codelist_json = r.json() + with open(join(output_path, codelist_name + '.json'), 'w') as f: + json.dump(codelist_json, f) + + +def mapping_to_json(mappings): + for mapping in mappings.xpath('//mapping'): + out = { + 'path': mapping.find('path').text, + 'codelist': mapping.find('codelist').attrib['ref'] + } + if mapping.find('condition') is not None: + out['condition'] = mapping.find('condition').text + yield out + + +mapping_urls = [ + 'https://raw.githubusercontent.com/IATI/IATI-Codelists/version-2.03/mapping.xml', + ] +# 'https://raw.githubusercontent.com/codeforIATI/Unofficial-Codelists/master/mapping.xml'] +mappings = [] +for mapping_url in mapping_urls: + resp = requests.get(mapping_url) + doc = ET.fromstring(resp.content) + mappings += mapping_to_json(doc) + +with open(join('data', 'IATI-Codelists-2', 'out', 'clv2', 'mapping.json'), 'w') as f: + json.dump(sorted(mappings, key=lambda x: x['path']), f) diff --git a/get_stats.sh b/get_stats.sh index eb2601626e..bfa3296eec 100755 --- a/get_stats.sh +++ b/get_stats.sh @@ -1,24 +1,15 @@ -# This script is the same as https://github.com/IATI/IATI-Stats/blob/master/get_stats.sh -# but with only the dated historical aggregates. +set -eux +# ^ https://explainshell.com/explain?cmd=set+-eux + mkdir stats-calculated -for f in ckan gitdate; do - curl --compressed "http://dashboard.iatistandard.org/stats/${f}.json" > stats-calculated/${f}.json +for f in ckan gitdate licenses; do + curl --compressed "https://dev.merged.dashboard.iatistandard.org/stats/${f}.json" > stats-calculated/${f}.json done -mkdir stats-blacklist -cd stats-blacklist -wget "http://dashboard.iatistandard.org/stats-blacklist/current.tar.gz" -O current.tar.gz -wget "http://dashboard.iatistandard.org/stats-blacklist/gitaggregate-dated.tar.gz" -O gitaggregate-dated.tar.gz -wget "http://dashboard.iatistandard.org/stats-blacklist/gitaggregate-publisher-dated.tar.gz" -O gitaggregate-publisher-dated.tar.gz -tar -xf current.tar.gz -tar -xf gitaggregate-dated.tar.gz -tar -xf gitaggregate-publisher-dated.tar.gz -cd .. - cd stats-calculated -wget "http://dashboard.iatistandard.org/stats/current.tar.gz" -O current.tar.gz -wget "http://dashboard.iatistandard.org/stats/gitaggregate-dated.tar.gz" -O gitaggregate-dated.tar.gz -wget "http://dashboard.iatistandard.org/stats/gitaggregate-publisher-dated.tar.gz" -O gitaggregate-publisher-dated.tar.gz +wget "https://dev.merged.dashboard.iatistandard.org/stats/current.tar.gz" -O current.tar.gz +wget "https://dev.merged.dashboard.iatistandard.org/stats/gitaggregate-dated.tar.gz" -O gitaggregate-dated.tar.gz +wget "https://dev.merged.dashboard.iatistandard.org/stats/gitaggregate-publisher-dated.tar.gz" -O gitaggregate-publisher-dated.tar.gz tar -xf current.tar.gz tar -xf gitaggregate-dated.tar.gz tar -xf gitaggregate-publisher-dated.tar.gz diff --git a/git.sh b/git.sh index c7b6c780a5..34dd8c8f5b 100755 --- a/git.sh +++ b/git.sh @@ -9,21 +9,22 @@ mkdir out echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Fetching data" ./fetch_data.sh &> fetch_data.log || exit 1 +cd dashboard + echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running plots.py" -python plots.py || exit 1 +python make_plots.py || exit 1 echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running make_csv.py" python make_csv.py || exit 1 -echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running speakers kit.py" +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running speakers_kit.py" python speakers_kit.py || exit 1 -echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running make_html.py" -python make_html.py $1 $2|| exit 1 +echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Running create_caches.py" +rm -r cache +python create_caches.py || exit 1 -echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Copying static elements" -cp static/img/favicon.png out/ -cp static/img/tablesorter-icons.gif out/ +cd .. echo "LOG: `date '+%Y-%m-%d %H:%M:%S'` - Make a backup of the old web directory and make new content live" rsync -a --delete web web.bk diff --git a/licenses.py b/licenses.py deleted file mode 100644 index 949e950753..0000000000 --- a/licenses.py +++ /dev/null @@ -1,123 +0,0 @@ -license_names = { -'notspecified': 'Other::License Not Specified', -'odc-pddl': 'OKD Compliant::Open Data Commons Public Domain Dedication and Licence (PDDL)', -'odc-odbl': 'OKD Compliant::Open Data Commons Open Database License (ODbL)', -'odc-by': 'OKD Compliant::Open Data Commons Attribution Licence', -'cc-zero': 'OKD Compliant::Creative Commons CCZero', -'cc-by': 'OKD Compliant::Creative Commons Attribution', -'cc-by-sa': 'OKD Compliant::Creative Commons Attribution Share-Alike', -'gfdl': 'OKD Compliant::GNU Free Documentation License', -'ukclickusepsi': 'OKD Compliant::UK Click Use PSI', -'other-open': 'OKD Compliant::Other (Open)', -'other-pd': 'OKD Compliant::Other (Public Domain)', -'other-at': 'OKD Compliant::Other (Attribution)', -'ukcrown-withrights': 'OKD Compliant::UK Crown Copyright with data.gov.uk rights', -'hesa-withrights': 'OKD Compliant::Higher Education Statistics Agency Copyright with data.gov.uk rights', -'localauth-withrights': 'OKD Compliant::Local Authority Copyright with data.gov.uk rights', -'uk-ogl': 'OKD Compliant::UK Open Government Licence (OGL)', -'met-office-cp': 'Non-OKD Compliant::Met Office UK Climate Projections Licence Agreement', -'cc-nc': 'Non-OKD Compliant::Creative Commons Non-Commercial (Any)', -'ukcrown': 'Non-OKD Compliant::UK Crown Copyright', -'other-nc': 'Non-OKD Compliant::Other (Non-Commercial)', -'other-closed': 'Non-OKD Compliant::Other (Not Open)', -'bsd-license': 'OSI Approved::New and Simplified BSD licenses', -'gpl-2.0': 'OSI Approved::GNU General Public License (GPL)', -'gpl-3.0': 'OSI Approved::GNU General Public License version 3.0 (GPLv3)', -'lgpl-2.1': 'OSI Approved::GNU Library or "Lesser" General Public License (LGPL)', -'mit-license': 'OSI Approved::MIT license', -'afl-3.0': 'OSI Approved::Academic Free License 3.0 (AFL 3.0)', -'apl1.0': 'OSI Approved::Adaptive Public License', -'apache': 'OSI Approved::Apache Software License', -'apache2.0': 'OSI Approved::Apache License, 2.0', -'apsl-2.0': 'OSI Approved::Apple Public Source License', -'artistic-license-2.0': 'OSI Approved::Artistic license 2.0', -'attribution': 'OSI Approved::Attribution Assurance Licenses', -'ca-tosl1.1': 'OSI Approved::Computer Associates Trusted Open Source License 1.1', -'cddl1': 'OSI Approved::Common Development and Distribution License', -'cpal_1.0': 'OSI Approved::Common Public Attribution License 1.0 (CPAL)', -'cuaoffice': 'OSI Approved::CUA Office Public License Version 1.0', -'eudatagrid': 'OSI Approved::EU DataGrid Software License', -'eclipse-1.0': 'OSI Approved::Eclipse Public License', -'ecl2': 'OSI Approved::Educational Community License, Version 2.0', -'eiffel': 'OSI Approved::Eiffel Forum License', -'ver2_eiffel': 'OSI Approved::Eiffel Forum License V2.0', -'entessa': 'OSI Approved::Entessa Public License', -'fair': 'OSI Approved::Fair License', -'frameworx': 'OSI Approved::Frameworx License', -'ibmpl': 'OSI Approved::IBM Public License', -'intel-osl': 'OSI Approved::Intel Open Source License', -'jabber-osl': 'OSI Approved::Jabber Open Source License', -'lucent-plan9': 'OSI Approved::Lucent Public License (Plan9)', -'lucent1.02': 'OSI Approved::Lucent Public License Version 1.02', -'mitre': 'OSI Approved::MITRE Collaborative Virtual Workspace License (CVW License)', -'motosoto': 'OSI Approved::Motosoto License', -'mozilla': 'OSI Approved::Mozilla Public License 1.0 (MPL)', -'mozilla1.1': 'OSI Approved::Mozilla Public License 1.1 (MPL)', -'nasa1.3': 'OSI Approved::NASA Open Source Agreement 1.3', -'naumen': 'OSI Approved::Naumen Public License', -'nethack': 'OSI Approved::Nethack General Public License', -'nokia': 'OSI Approved::Nokia Open Source License', -'oclc2': 'OSI Approved::OCLC Research Public License 2.0', -'opengroup': 'OSI Approved::Open Group Test Suite License', -'osl-3.0': 'OSI Approved::Open Software License 3.0 (OSL 3.0)', -'php': 'OSI Approved::PHP License', -'pythonpl': 'OSI Approved::Python license', -'PythonSoftFoundation': 'OSI Approved::Python Software Foundation License', -'qtpl': 'OSI Approved::Qt Public License (QPL)', -'real': 'OSI Approved::RealNetworks Public Source License V1.0', -'rpl1.5': 'OSI Approved::Reciprocal Public License 1.5 (RPL1.5)', -'ricohpl': 'OSI Approved::Ricoh Source Code Public License', -'sleepycat': 'OSI Approved::Sleepycat License', -'sun-issl': 'OSI Approved::Sun Industry Standards Source License (SISSL)', -'sunpublic': 'OSI Approved::Sun Public License', -'sybase': 'OSI Approved::Sybase Open Watcom Public License 1.0', -'UoI-NCSA': 'OSI Approved::University of Illinois/NCSA Open Source License', -'vovidapl': 'OSI Approved::Vovida Software License v. 1.0', -'W3C': 'OSI Approved::W3C License', -'wxwindows': 'OSI Approved::wxWindows Library License', -'xnet': 'OSI Approved::X.Net License', -'zpl': 'OSI Approved::Zope Public License', -'zlib-license': 'OSI Approved::zlib/libpng license'} - -import json -from collections import OrderedDict -from flask import render_template - -with open('./stats-calculated/ckan.json') as handler: - ckan = json.load(handler, object_pairs_hook=OrderedDict) - -licenses = [ package.get('license_id') for _, publisher in ckan.items() for _, package in publisher.items() ] - -def licenses_for_publisher(publisher_name): - # Check publisher is in the compiled list of CKAN data - # Arises from https://github.com/IATI/IATI-Dashboard/issues/408 - if publisher_name not in ckan.keys(): - return set() - - # Return unique licenses used - return set([ package.get('license_id') for package in ckan[publisher_name].values() ]) - -def main(): - licenses_and_publisher = set([ (package.get('license_id'), publisher_name) for publisher_name, publisher in ckan.items() for package_name, package in publisher.items() ]) - licenses_per_publisher = [ license for license, publisher in licenses_and_publisher ] - - return render_template('licenses.html', - license_names=license_names, - license_count = dict((x,licenses.count(x)) for x in set(licenses)), - publisher_license_count = dict((x,licenses_per_publisher.count(x)) for x in set(licenses_per_publisher)), - sorted=sorted, - page='licenses', - licenses=True) - -def individual_license(license): - if license == 'None': - license = None - publishers = [ publisher_name for publisher_name, publisher in ckan.items() for _, package in publisher.items() if package.get('license_id') == license ] - publisher_counts = [ (publisher, publishers.count(publisher)) for publisher in set(publishers) ] - return render_template('license.html', - url=lambda x: '../'+x, - license=license, - license_names=license_names, - publisher_counts=publisher_counts, - page='licenses', - licenses=True) diff --git a/make_csv.py b/make_csv.py deleted file mode 100644 index b145dcf304..0000000000 --- a/make_csv.py +++ /dev/null @@ -1,75 +0,0 @@ -# Script to generate CSV files from data in the 'stats-calculated' folder, -# and extra logic in other files in this repository - -import unicodecsv -import os -import data -from collections import OrderedDict - -publisher_name={publisher:publisher_json['result']['title'] for publisher,publisher_json in data.ckan_publishers.items()} - -def publisher_dicts(): - for publisher, activities in data.current_stats['inverted_publisher']['activities'].items(): - if publisher not in data.ckan_publishers: - continue - publisher_stats = data.get_publisher_stats(publisher) - yield { - 'Publisher Name': publisher_name[publisher], - 'Publisher Registry Id': publisher, - 'Activities': activities, - 'Organisations': publisher_stats['organisations'], - 'Files': publisher_stats['activity_files']+publisher_stats['organisation_files'], - 'Activity Files': publisher_stats['activity_files'], - 'Organisation Files': publisher_stats['organisation_files'], - 'Total File Size': publisher_stats['file_size'], - 'Reporting Org on Registry': data.ckan_publishers[publisher]['result']['publisher_iati_id'], - 'Reporting Orgs in Data (count)': len(publisher_stats['reporting_orgs']), - 'Reporting Orgs in Data': ';'.join(publisher_stats['reporting_orgs']), - 'Hierarchies (count)': len(publisher_stats['hierarchies']), - 'Hierarchies': ';'.join(publisher_stats['hierarchies']), - } - -with open(os.path.join('out', 'publishers.csv'), 'w') as fp: - writer = unicodecsv.DictWriter(fp, [ - 'Publisher Name', - 'Publisher Registry Id', - 'Activities', - 'Organisations', - 'Files', - 'Activity Files', - 'Organisation Files', - 'Total File Size', - 'Reporting Org on Registry', - 'Reporting Orgs in Data (count)', - 'Reporting Orgs in Data', - 'Hierarchies (count)', - 'Hierarchies', - ]) - writer.writeheader() - for d in publisher_dicts(): - writer.writerow(d) - - - -publishers = data.current_stats['inverted_publisher']['activities'].keys() - -with open(os.path.join('out', 'elements.csv'), 'w') as fp: - writer = unicodecsv.DictWriter(fp, [ 'Element' ] + publishers ) - writer.writeheader() - for element, publisher_dict in data.current_stats['inverted_publisher']['elements'].items(): - publisher_dict['Element'] = element - writer.writerow(publisher_dict) - -with open(os.path.join('out', 'elements_total.csv'), 'w') as fp: - writer = unicodecsv.DictWriter(fp, [ 'Element' ] + publishers ) - writer.writeheader() - for element, publisher_dict in data.current_stats['inverted_publisher']['elements_total'].items(): - publisher_dict['Element'] = element - writer.writerow(publisher_dict) - -with open(os.path.join('out', 'registry.csv'), 'w') as fp: - keys = ['name', 'title', 'publisher_frequency', 'publisher_frequency_select', 'publisher_implementation_schedule', 'publisher_ui', 'publisher_field_exclusions', 'publisher_contact', 'image_url', 'display_name', 'publisher_iati_id', 'publisher_units', 'publisher_record_exclusions', 'publisher_data_quality', 'publisher_country', 'publisher_description', 'publisher_refs', 'publisher_thresholds' 'publisher_agencies', 'publisher_constraints', 'publisher_organization_type', 'publisher_segmentation', 'license_id', 'state', 'publisher_timeliness'] - writer = unicodecsv.DictWriter(fp, keys) - writer.writeheader() - for publisher_json in data.ckan_publishers.values(): - writer.writerow({x:publisher_json['result'].get(x) or 0 for x in keys}) diff --git a/make_html.py b/make_html.py deleted file mode 100644 index 94d2d7a602..0000000000 --- a/make_html.py +++ /dev/null @@ -1,282 +0,0 @@ -# Script to generate static HTML pages -# This uses Jinja templating to render the HTML templates in the 'templates' folder -# Data is based on the files in the 'stats-calculated' folder, and extra logic in other files in this repository - -from __future__ import print_function -import argparse -import os -import re -import subprocess -from collections import defaultdict - -from flask import Flask, render_template, redirect, abort, Response -app = Flask(__name__, template_folder="static/templates") - -import licenses -from vars import expected_versions -import text -import datetime - -print('Doing initial data import') -from data import * -print('Initial data import finished') - - -def dictinvert(d): - inv = defaultdict(list) - for k, v in d.iteritems(): - inv[v].append(k) - return inv - -def nested_dictinvert(d): - inv = defaultdict(lambda: defaultdict(int)) - for k, v in d.iteritems(): - for k2, v2 in v.iteritems(): - inv[k2][k] += v2 - return inv - -def dataset_to_publisher(publisher_slug): - """ Converts a dataset (package) slug e.g. dfid-bd to the corresponding publisher - slug e.g. dfid """ - return publisher_slug.rsplit('-', 1)[0] - - -def firstint(s): - if s[0].startswith('<'): return 0 - m = re.search('\d+', s[0]) - return int(m.group(0)) - -def xpath_to_url(path): - path = path.strip('./') - if path.startswith('iati-activity'): - return 'http://iatistandard.org/activity-standard/iati-activities/'+path.split('@')[0] - elif path.startswith('iati-organisation'): - return 'http://iatistandard.org/organisation-standard/iati-organisations/'+path.split('@')[0] - else: - return 'http://iatistandard.org/activity-standard/iati-activities/iati-activity/'+path.split('@')[0] - -def registration_agency(orgid): - for code in codelist_sets['2']['OrganisationRegistrationAgency']: - if orgid.startswith(code): - return code - -def get_codelist_values(codelist_values_for_element): - """Return a list of unique values present within a one-level nested dictionary. - Envisaged usage is to gather the codelist values used by each publisher, as in - stats/current/inverted-publisher/codelist_values_by_major_version.json - Input: Set of codelist values for a given element (listed by publisher), for example: - current_stats['inverted_publisher']['codelist_values_by_major_version']['1']['.//@xml:lang'] - """ - return list(set([y for x in codelist_values_for_element.items() for y in x[1].keys()])) - -# Store data processing times -date_time_data_str = max(gitdate.values()) -date_time_data_obj = datetime.datetime.strptime(date_time_data_str[:19], '%Y-%m-%d %H:%M:%S') # Ignores timezone as this is unhelpful for user output - -# Custom Jinja filters -app.jinja_env.filters['xpath_to_url'] = xpath_to_url -app.jinja_env.filters['url_to_filename'] = lambda x: x.split('/')[-1] -app.jinja_env.filters['dataset_to_publisher'] = dataset_to_publisher - -# Custom Jinja globals -app.jinja_env.globals['url'] = lambda x: x -app.jinja_env.globals['datetime_generated'] = subprocess.check_output(['date', '+%Y-%m-%d %H:%M:%S %z']).strip() -app.jinja_env.globals['datetime_data'] = date_time_data_str -app.jinja_env.globals['datetime_data_homepage'] = date_time_data_obj.strftime('%d %B %Y (at %H:%M)') -app.jinja_env.globals['stats_url'] = 'http://dashboard.iatistandard.org/stats' -app.jinja_env.globals['sorted'] = sorted -app.jinja_env.globals['enumerate'] = enumerate -app.jinja_env.globals['top_titles'] = text.top_titles -app.jinja_env.globals['page_titles'] = text.page_titles -app.jinja_env.globals['short_page_titles'] = text.short_page_titles -app.jinja_env.globals['page_leads'] = text.page_leads -app.jinja_env.globals['page_sub_leads'] = text.page_sub_leads -app.jinja_env.globals['top_navigation'] = text.top_navigation -app.jinja_env.globals['navigation'] = text.navigation -app.jinja_env.globals['navigation_reverse'] = {page: k for k, pages in text.navigation.items() for page in pages } -app.jinja_env.globals['navigation_reverse'].update({k: k for k in text.navigation}) -app.jinja_env.globals['current_stats'] = current_stats -app.jinja_env.globals['ckan'] = ckan -app.jinja_env.globals['ckan_publishers'] = ckan_publishers -app.jinja_env.globals['publisher_name'] = publisher_name -app.jinja_env.globals['publishers_ordered_by_title'] = publishers_ordered_by_title -app.jinja_env.globals['get_publisher_stats'] = get_publisher_stats -app.jinja_env.globals['set'] = set -app.jinja_env.globals['firstint'] = firstint -app.jinja_env.globals['expected_versions'] = expected_versions -app.jinja_env.globals['current_year'] = datetime.datetime.now().year -# Following variables set in coverage branch but not in master -# app.jinja_env.globals['float'] = float -# app.jinja_env.globals['dac2012'] = dac2012 -app.jinja_env.globals['MAJOR_VERSIONS'] = MAJOR_VERSIONS - -app.jinja_env.globals['slugs'] = slugs -app.jinja_env.globals['codelist_mapping'] = codelist_mapping -app.jinja_env.globals['codelist_conditions'] = codelist_conditions -app.jinja_env.globals['codelist_sets'] = codelist_sets -app.jinja_env.globals['get_codelist_values'] = get_codelist_values - -basic_page_names = [ - 'index', - 'headlines', - 'data_quality', - 'exploring_data', - 'publishers', - 'files', - 'activities', - 'download', - 'xml', - 'validation', - 'versions', - 'organisation', - 'identifiers', - 'reporting_orgs', - 'elements', - 'codelists', - 'booleans', - 'dates', - 'faq', -] - -@app.route('/.html') -def basic_page(page_name): - if page_name in basic_page_names: - kwargs = {} - parent_page_name = page_name - return render_template(page_name + '.html', page=parent_page_name, **kwargs) - else: - abort(404) - -@app.route('/data/download_errors.json') -def download_errors_json(): - return Response(json.dumps(current_stats['download_errors'], indent=2), mimetype='application/json'), - -app.add_url_rule('/', 'index_redirect', lambda: redirect('index.html')) -app.add_url_rule('/licenses.html', 'licenses', licenses.main) -app.add_url_rule('/license/.html', 'licenses_individual_license', licenses.individual_license) - -@app.route('/publisher/.html') -def publisher(publisher): - publisher_stats = get_publisher_stats(publisher) - budget_table = [{ - 'year': 'Total', - 'count_total': sum(sum(x.values()) for x in publisher_stats['count_budgets_by_type_by_year'].values()), - 'sum_total': {currency: sum(sums.values()) for by_currency in publisher_stats['sum_budgets_by_type_by_year'].values() for currency,sums in by_currency.items()}, - 'count_original': sum(publisher_stats['count_budgets_by_type_by_year']['1'].values()) if '1' in publisher_stats['count_budgets_by_type_by_year'] else None, - 'sum_original': {k: sum(v.values()) for k, v in publisher_stats['sum_budgets_by_type_by_year']['1'].items()} if '1' in publisher_stats['sum_budgets_by_type_by_year'] else None, - 'count_revised': sum(publisher_stats['count_budgets_by_type_by_year']['2'].values()) if '2' in publisher_stats['count_budgets_by_type_by_year'] else None, - 'sum_revised': {k: sum(v.values()) for k, v in publisher_stats['sum_budgets_by_type_by_year']['2'].items()} if '2' in publisher_stats['sum_budgets_by_type_by_year'] else None - }] + [ - { - 'year': year, - 'count_total': sum(x[year] for x in publisher_stats['count_budgets_by_type_by_year'].values() if year in x), - 'sum_total': {currency: sums.get(year) for by_currency in publisher_stats['sum_budgets_by_type_by_year'].values() for currency,sums in by_currency.items()}, - 'count_original': publisher_stats['count_budgets_by_type_by_year']['1'].get(year) if '1' in publisher_stats['count_budgets_by_type_by_year'] else None, - 'sum_original': {k: v.get(year) for k, v in publisher_stats['sum_budgets_by_type_by_year']['1'].items()} if '1' in publisher_stats['sum_budgets_by_type_by_year'] else None, - 'count_revised': publisher_stats['count_budgets_by_type_by_year']['2'].get(year) if '2' in publisher_stats['count_budgets_by_type_by_year'] else None, - 'sum_revised': {k: v.get(year) for k, v in publisher_stats['sum_budgets_by_type_by_year']['2'].items()} if '2' in publisher_stats['sum_budgets_by_type_by_year'] else None - } for year in sorted(set(sum((x.keys() for x in publisher_stats['count_budgets_by_type_by_year'].values()), []))) - ] - return render_template('publisher.html', - url=lambda x: '../' + x, - publisher=publisher, - publisher_stats=publisher_stats, - publisher_inverted=get_publisher_stats(publisher, 'inverted-file'), - publisher_licenses=licenses.licenses_for_publisher(publisher), - budget_table=budget_table - ) - -@app.route('/codelist//.html') -def codelist(major_version, slug): - i = slugs['codelist'][major_version]['by_slug'][slug] - element = current_stats['inverted_publisher']['codelist_values_by_major_version'][major_version].keys()[i] - values = nested_dictinvert(current_stats['inverted_publisher']['codelist_values_by_major_version'][major_version].values()[i]) - return render_template('codelist.html', - element=element, - values=values, - reverse_codelist_mapping={major_version: dictinvert(mapping) for major_version, mapping in codelist_mapping.items() }, - url=lambda x: '../../' + x, - major_version=major_version, - page='codelists') - -@app.route('/element/.html') -def element(slug): - i = slugs['element']['by_slug'][slug] - element = current_stats['inverted_publisher']['elements'].keys()[i] - publishers = current_stats['inverted_publisher']['elements'].values()[i] - file_grouped = current_stats['inverted_file_grouped']['elements'].values()[i] - return render_template('element.html', - element=element, - publishers=publishers, - file_grouped=file_grouped, - url=lambda x: '../' + x, - page='elements') - - -@app.route('/registration_agencies.html') -def registration_agencies(): - registration_agencies = defaultdict(int) - registration_agencies_publishers = defaultdict(list) - nonmatching = [] - for orgid, publishers in current_stats['inverted_publisher']['reporting_orgs'].items(): - reg_ag = registration_agency(orgid) - if reg_ag: - registration_agencies[reg_ag] += 1 - registration_agencies_publishers[reg_ag] += publishers.keys() - else: - nonmatching.append((orgid, publishers)) - return render_template('registration_agencies.html', - page='registration_agencies', - registration_agencies=registration_agencies, - registration_agencies_publishers=registration_agencies_publishers, - nonmatching=nonmatching) - -# Server an image through the development server (--live) -@app.route('/.png') -def image_development(image): - return Response(open(os.path.join('out', image + '.png')).read(), mimetype='image/png') - -@app.route('/.csv') -def csv_development(name): - return Response(open(os.path.join('out', name + '.csv')).read(), mimetype='text/csv') - -@app.route('/publisher_imgs/.png') -def image_development_publisher(image): - print(image) - return Response(open(os.path.join('out', 'publisher_imgs', image + '.png')).read(), mimetype='image/png') - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--url", - help="Link to connect dashboard to publishing stats", - default="") - args = parser.parse_args() - app.jinja_env.globals['pubstats_url'] = args.url - from flask_frozen import Freezer - app.config['FREEZER_DESTINATION'] = 'out' - app.config['FREEZER_REMOVE_EXTRA_FILES'] = False - app.debug = False # Comment to turn off debugging - app.testing = True # Comment to turn off debugging - freezer = Freezer(app) - - @freezer.register_generator - def url_generator(): - for page_name in basic_page_names: - yield 'basic_page', {'page_name': page_name} - for publisher in current_stats['inverted_publisher']['activities'].keys(): - yield 'publisher', {'publisher': publisher} - for slug in slugs['element']['by_slug']: - yield 'element', {'slug': slug} - for major_version, codelist_slugs in slugs['codelist'].items(): - for slug in codelist_slugs['by_slug']: - yield 'codelist', { - 'slug': slug, - 'major_version': major_version - } - for license in licenses.licenses: - if license is None: - license = 'None' - yield 'licenses_individual_license', {'license': license} - - - freezer.freeze() diff --git a/mock_stats_for_unit_tests.sh b/mock_stats_for_unit_tests.sh new file mode 100755 index 0000000000..26b6d0b10c --- /dev/null +++ b/mock_stats_for_unit_tests.sh @@ -0,0 +1,9 @@ +set -eux + +mkdir stats-calculated +curl --compressed "https://dev.merged.dashboard.iatistandard.org/stats/ckan.json" > stats-calculated/ckan.json +mkdir -p stats-calculated/current/aggregated-publisher +mkdir -p stats-calculated/current/inverted-publisher +for f in activities codelist_values_by_major_version elements; do + echo "{}" > stats-calculated/current/inverted-publisher/$f.json +done diff --git a/plots.py b/plots.py deleted file mode 100644 index 6fcf8b7175..0000000000 --- a/plots.py +++ /dev/null @@ -1,194 +0,0 @@ -#!/usr/bin/env python -""" -Show how to make date plots in matplotlib using date tick locators and -formatters. See major_minor_demo1.py for more information on -controlling major and minor ticks - -All matplotlib date plotting is done by converting date instances into -days since the 0001-01-01 UTC. The conversion, tick locating and -formatting is done behind the scenes so this is most transparent to -you. The dates module provides several converter functions date2num -and num2date - -""" -from __future__ import print_function -import datetime -import numpy as np -import matplotlib as mpl -mpl.use('Agg') -import matplotlib.pyplot as plt -import matplotlib.dates as mdates -from collections import defaultdict - -import os -import unicodecsv -import common -import data - -# Import failed_downloads as a global -failed_downloads = unicodecsv.reader(open('data/downloads/history.csv')) - -gitaggregate_publisher = data.JSONDir('./stats-calculated/gitaggregate-publisher-dated') - -class AugmentedJSONDir(data.JSONDir): - def __getitem__(self, key): - if key == 'failed_downloads': - return dict((row[0],row[1]) for row in failed_downloads) - elif key == 'publisher_types': - out = defaultdict(lambda: defaultdict(int)) - for publisher, publisher_data in gitaggregate_publisher.iteritems(): - if publisher in data.ckan_publishers: - organization_type = common.get_publisher_type(publisher)['name'] - for datestring,count in publisher_data['activities'].iteritems(): - out[datestring][organization_type] += 1 - else: - print('Publisher not matched:', publisher) - return out - elif key == 'activities_per_publisher_type': - out = defaultdict(lambda: defaultdict(int)) - for publisher, publisher_data in gitaggregate_publisher.iteritems(): - if publisher in data.ckan_publishers: - organization_type = common.get_publisher_type(publisher)['name'] - for datestring,count in publisher_data['activities'].iteritems(): - out[datestring][organization_type] += count - else: - print('Publisher not matched:', publisher) - return out - else: - return super(AugmentedJSONDir, self).__getitem__(key) - - -from vars import expected_versions - -def make_plot(stat_path, git_stats, img_prefix=''): - if type(stat_path) == tuple: - stat_name = stat_path[0] - else: - stat_name = stat_path - - stat_dict = git_stats.get(stat_name) - if not stat_dict: - return - items = sorted(stat_dict.items()) - x_values = [ datetime.date(int(x[0:4]), int(x[5:7]), int(x[8:10])).toordinal() for x,y in items ] - if type(stat_path) == tuple: - y_values = [ dict((k,v) for k,v in y.iteritems() if stat_path[1](k)) for x,y in items ] - else: - y_values = [ y for x,y in items ] - - #years = mdates.YearLocator() # every year - #months = mdates.MonthLocator() # every month - dateFmt = mdates.DateFormatter('%Y-%m-%d') - - fig, ax = plt.subplots() - ax.set_color_cycle(['b', 'g', 'r', 'c', 'm', 'y', 'k', '#00ff00', '#fc5ab8', '#af31f2']) - fig_legend = plt.figure() - dpi = 96 - fig.set_size_inches(600.0/dpi, 600.0/dpi) - if type(y_values[0]) == dict: - keys = set([ tm for y in y_values for tm in y.keys() ]) - plots = {} - for key in keys: - plots[key], = ax.plot(x_values, [ y.get(key) or 0 for y in y_values ]) - if stat_name in ['publisher_types', 'activities_per_publisher_type']: - # Sort by the most recent value for the key - sorted_items = sorted(plots.items(), key=lambda x: y_values[-1][x[0]], reverse=True) - fig_legend.legend([x[1] for x in sorted_items], [x[0] for x in sorted_items], 'center', ncol=1) - fig_legend.set_size_inches(600.0/dpi, 300.0/dpi) - else: - fig_legend.legend(plots.values(), plots.keys(), 'center', ncol=4) - fig_legend.set_size_inches(600.0/dpi, 100.0/dpi) - fig_legend.savefig('out/{0}{1}{2}_legend.png'.format(img_prefix,stat_name,stat_path[2])) - else: - keys = None - ax.plot(x_values, y_values) - - - # format the ticks - #ax.xaxis.set_major_locator(years) - ax.xaxis.set_major_formatter(dateFmt) - #ax.xaxis.set_minor_locator(months) - - #datemin = datetime.date(r.date.min().year, 1, 1) - #datemax = datetime.date(r.date.max().year+1, 1, 1) - #ax.set_xlim(datemin, datemax) - - # format the coords message box - #def price(x): return '$%1.2f'%x - #ax.format_ydata = price - ax.xaxis_date() - ax.format_xdata = mdates.DateFormatter('%Y-%m-%d') - ax.grid(True) - - # rotates and right aligns the x labels, and moves the bottom of the - # axes up to make room for them - fig.autofmt_xdate() - - fig.savefig('out/{0}{1}{2}.png'.format(img_prefix,stat_name,stat_path[2] if type(stat_path) == tuple else ''), dpi=dpi) - plt.close('all') - - fn = 'out/{0}{1}.csv'.format(img_prefix,stat_name) - with open(fn, 'w') as fp: - writer = unicodecsv.writer(fp) - if keys: - sorted_keys = sorted(list(keys)) - writer.writerow(['date'] + sorted_keys) - else: - writer.writerow(['date', 'value']) - for k,v in items: - if keys: - writer.writerow([k] + [ v.get(key) for key in sorted_keys ]) - else: - writer.writerow([k,v]) - del writer - - -# Load aggregated stats for all data -print("All data") -git_stats = AugmentedJSONDir('./stats-calculated/gitaggregate-dated') - -for stat_path in [ - 'activities', - 'publishers', - 'activity_files', - 'organisation_files', - 'file_size', - 'failed_downloads', - 'invalidxml', - 'nonstandardroots', - 'unique_identifiers', - ('validation', lambda x: x=='fail', ''), - ('publishers_validation', lambda x: x=='fail', ''), - ('publisher_has_org_file', lambda x: x=='no', ''), - ('versions', lambda x: x in expected_versions, '_expected'), - ('versions', lambda x: x not in expected_versions, '_other'), - ('publishers_per_version', lambda x: x in expected_versions, '_expected'), - ('publishers_per_version', lambda x: x not in expected_versions, '_other'), - ('file_size_bins', lambda x: True, ''), - ('publisher_types', lambda x: True, '' ), - ('activities_per_publisher_type', lambda x: True, '' ) - ]: - make_plot(stat_path, git_stats) - -# Delete git_stats variable to save memory -del git_stats - -try: - os.makedirs('out/publisher_imgs') -except OSError: - pass - -git_stats_publishers = AugmentedJSONDir('./stats-calculated/gitaggregate-publisher-dated/') -for publisher, git_stats_publisher in git_stats_publishers.iteritems(): - for stat_path in [ - 'activities', - 'activity_files', - 'organisation_files', - 'file_size', - 'invalidxml', - 'nonstandardroots', - 'publisher_unique_identifiers', - ('validation', lambda x: x=='fail', ''), - ('versions', lambda x: True, ''), - ]: - make_plot(stat_path, git_stats_publisher, 'publisher_imgs/{0}_'.format(publisher)) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000..1a352dc140 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,19 @@ +[tool.isort] +py_version=312 +extend_skip = ["__pycache__", ".mypy_cache", ".ve", ".venv", ".vagrant-ve"] +skip_gitignore = true +src_paths = ["dashboard"] +line_length = 119 +profile = "black" + + +[tool.flake8] +max-line-length = 119 +extend_ignore = ["E203", "W503", "E275", "E501", "E721"] +exclude = ["data", "gitout", "helpers/IATI-Rulesets", "helpers/schemas", "__pycache__", ".mypy_cache", ".pytest_cache", ".ve", ".venv", ".vagrant-ve"] + + +[tool.black] +line-length = 119 +target-version = ["py312"] +include = "^[^/]*\\.py|dashboard/.*\\.py$" diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000000..20843432d9 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +testpaths = dashboard/tests +norecursedirs = *__pycache__* *.pytest_cache* +console_output_style = count diff --git a/requirements.in b/requirements.in new file mode 100644 index 0000000000..98d9f59faf --- /dev/null +++ b/requirements.in @@ -0,0 +1,16 @@ +django +django-environ +gunicorn +flask +frozen-flask +jinja2 +python-dateutil +pytz +matplotlib +werkzeug +xmlschema +lxml +requests +markupsafe +itsdangerous +tqdm diff --git a/requirements.txt b/requirements.txt index 93eda8ebe6..5f9bb2cb13 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,19 +1,91 @@ -Jinja2==2.9.5 -matplotlib==2.0.0 -requests==2.20.0 -Frozen-Flask==0.13 -unicodecsv==0.14.1 -lxml==3.7.2 -pytz==2016.10 -Flask==0.12.3 -MarkupSafe==0.23 -Werkzeug==0.11.15 -argparse==1.4.0 -itsdangerous==0.24 -mock==2.0.0 -nose==1.3.7 -numpy==1.12.0 -pyparsing==2.1.10 -python-dateutil==2.6.0 -six==1.10.0 -wsgiref==0.1.2 +# +# This file is autogenerated by pip-compile with Python 3.12 +# by the following command: +# +# pip-compile +# +asgiref==3.8.1 + # via django +blinker==1.8.2 + # via flask +certifi==2024.8.30 + # via requests +charset-normalizer==3.4.0 + # via requests +click==8.1.7 + # via flask +contourpy==1.3.0 + # via matplotlib +cycler==0.12.1 + # via matplotlib +django==5.1.4 + # via -r requirements.in +django-environ==0.11.2 + # via -r requirements.in +elementpath==4.6.0 + # via xmlschema +flask==3.0.3 + # via + # -r requirements.in + # frozen-flask +fonttools==4.54.1 + # via matplotlib +frozen-flask==1.0.2 + # via -r requirements.in +gunicorn==23.0.0 + # via -r requirements.in +idna==3.10 + # via requests +itsdangerous==2.2.0 + # via + # -r requirements.in + # flask +jinja2==3.1.4 + # via + # -r requirements.in + # flask +kiwisolver==1.4.7 + # via matplotlib +lxml==5.3.0 + # via -r requirements.in +markupsafe==3.0.2 + # via + # -r requirements.in + # jinja2 + # werkzeug +matplotlib==3.9.2 + # via -r requirements.in +numpy==2.1.3 + # via + # contourpy + # matplotlib +packaging==24.1 + # via + # gunicorn + # matplotlib +pillow==11.0.0 + # via matplotlib +pyparsing==3.2.0 + # via matplotlib +python-dateutil==2.9.0.post0 + # via + # -r requirements.in + # matplotlib +pytz==2024.2 + # via -r requirements.in +requests==2.32.3 + # via -r requirements.in +six==1.16.0 + # via python-dateutil +sqlparse==0.5.1 + # via django +tqdm==4.67.0 + # via -r requirements.in +urllib3==2.2.3 + # via requests +werkzeug==3.1.2 + # via + # -r requirements.in + # flask +xmlschema==3.4.3 + # via -r requirements.in diff --git a/requirements_dev.in b/requirements_dev.in new file mode 100644 index 0000000000..e81b4f6610 --- /dev/null +++ b/requirements_dev.in @@ -0,0 +1,8 @@ +-r requirements.txt +pytest +pytest-cov +coveralls +flake8 +flake8-pyproject +isort +black diff --git a/requirements_dev.txt b/requirements_dev.txt index a32183852f..d238804c5f 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,4 +1,170 @@ --r requirements.txt -pytest==3.0.6 -pytest-cov==2.4.0 -coveralls==1.1 +# +# This file is autogenerated by pip-compile with Python 3.12 +# by the following command: +# +# pip-compile requirements_dev.in +# +asgiref==3.8.1 + # via + # -r requirements.txt + # django +black==24.10.0 + # via -r requirements_dev.in +blinker==1.8.2 + # via + # -r requirements.txt + # flask +certifi==2024.8.30 + # via + # -r requirements.txt + # requests +charset-normalizer==3.4.0 + # via + # -r requirements.txt + # requests +click==8.1.7 + # via + # -r requirements.txt + # black + # flask +contourpy==1.3.0 + # via + # -r requirements.txt + # matplotlib +coverage[toml]==7.6.4 + # via + # coveralls + # pytest-cov +coveralls==4.0.1 + # via -r requirements_dev.in +cycler==0.12.1 + # via + # -r requirements.txt + # matplotlib +django==5.1.4 + # via -r requirements.txt +django-environ==0.11.2 + # via -r requirements.txt +docopt==0.6.2 + # via coveralls +elementpath==4.6.0 + # via + # -r requirements.txt + # xmlschema +flake8==7.1.1 + # via + # -r requirements_dev.in + # flake8-pyproject +flake8-pyproject==1.2.3 + # via -r requirements_dev.in +flask==3.0.3 + # via + # -r requirements.txt + # frozen-flask +fonttools==4.54.1 + # via + # -r requirements.txt + # matplotlib +frozen-flask==1.0.2 + # via -r requirements.txt +gunicorn==23.0.0 + # via -r requirements.txt +idna==3.10 + # via + # -r requirements.txt + # requests +iniconfig==2.0.0 + # via pytest +isort==5.13.2 + # via -r requirements_dev.in +itsdangerous==2.2.0 + # via + # -r requirements.txt + # flask +jinja2==3.1.4 + # via + # -r requirements.txt + # flask +kiwisolver==1.4.7 + # via + # -r requirements.txt + # matplotlib +lxml==5.3.0 + # via -r requirements.txt +markupsafe==3.0.2 + # via + # -r requirements.txt + # jinja2 + # werkzeug +matplotlib==3.9.2 + # via -r requirements.txt +mccabe==0.7.0 + # via flake8 +mypy-extensions==1.0.0 + # via black +numpy==2.1.3 + # via + # -r requirements.txt + # contourpy + # matplotlib +packaging==24.1 + # via + # -r requirements.txt + # black + # gunicorn + # matplotlib + # pytest +pathspec==0.12.1 + # via black +pillow==11.0.0 + # via + # -r requirements.txt + # matplotlib +platformdirs==4.3.6 + # via black +pluggy==1.5.0 + # via pytest +pycodestyle==2.12.1 + # via flake8 +pyflakes==3.2.0 + # via flake8 +pyparsing==3.2.0 + # via + # -r requirements.txt + # matplotlib +pytest==8.3.3 + # via + # -r requirements_dev.in + # pytest-cov +pytest-cov==6.0.0 + # via -r requirements_dev.in +python-dateutil==2.9.0.post0 + # via + # -r requirements.txt + # matplotlib +pytz==2024.2 + # via -r requirements.txt +requests==2.32.3 + # via + # -r requirements.txt + # coveralls +six==1.16.0 + # via + # -r requirements.txt + # python-dateutil +sqlparse==0.5.1 + # via + # -r requirements.txt + # django +tqdm==4.67.0 + # via -r requirements.txt +urllib3==2.2.3 + # via + # -r requirements.txt + # requests +werkzeug==3.1.2 + # via + # -r requirements.txt + # flask +xmlschema==3.4.3 + # via -r requirements.txt diff --git a/speakers_kit.py b/speakers_kit.py deleted file mode 100644 index f126bbad94..0000000000 --- a/speakers_kit.py +++ /dev/null @@ -1,75 +0,0 @@ -from __future__ import print_function -import json -import data -import unicodecsv -from collections import defaultdict -from itertools import izip_longest - -def codelist_dict(codelist_path): - codelist_json = json.load(open(codelist_path)) - return {c['code']:c['name'] for c in codelist_json['data']} - -organisation_type_dict = codelist_dict('data/IATI-Codelists-2/out/clv2/json/en/OrganisationType.json') -country_dict = codelist_dict('data/IATI-Codelists-2/out/clv2/json/en/Country.json') -region_dict = codelist_dict('data/IATI-Codelists-2/out/clv2/json/en/Region.json') - -aggregated_publisher = data.JSONDir('./stats-calculated/current/aggregated-publisher/') - -activities_by = defaultdict(lambda: defaultdict(int)) -publishers_by = defaultdict(lambda: defaultdict(int)) - -for publisher, publisher_data in aggregated_publisher.items(): - if publisher in data.ckan_publishers: - organization_type = data.ckan_publishers[publisher]['result']['publisher_organization_type'] - #activities_by['type'][organisation_type_dict[organization_type]] += publisher_data['activities'] - publishers_by['type'][organisation_type_dict[organization_type]] += 1 - - publisher_country_code = data.ckan_publishers[publisher]['result']['publisher_country'] - if publisher_country_code in country_dict or publisher_country_code in region_dict: - publishers_by['country'][country_dict.get(publisher_country_code) or region_dict.get(publisher_country_code)] += 1 - else: - print('Unrecognised registry publisher_country code: ', publisher_country_code) - activity_countries = publisher_data['codelist_values'].get('.//recipient-country/@code') - if activity_countries: - for code, count in activity_countries.items(): - if code and code in country_dict: - activities_by['country'][country_dict.get(code)] += count - activity_regions = publisher_data['codelist_values'].get('.//recipient-region/@code') - if activity_regions: - for code, count in activity_regions.items(): - if code and code in region_dict: - activities_by['region'][region_dict.get(code)] += count - else: - print('Publisher not matched:', publisher) - -fieldnames = ['publisher_type', 'publishers_by_type', '', 'publisher_country', 'publishers_by_country', '', 'date', 'publishers_quarterly', '', 'activity_country', 'activities_by_country', '', 'activity_region', 'activities_by_region' ] - -publishers_quarterly = [] -publishers_by_date = json.load(open('./stats-calculated/gitaggregate-dated/publishers.json')) -for date, publishers in sorted(publishers_by_date.items()): - if (date[8:10] == '30' and date[5:7] in ['06','09']) or (date[8:10] == '31' and date[5:7] in ['03','12']): - publishers_quarterly.append((date, publishers)) - -with open('out/speakers_kit.csv', 'w') as fp: - writer = unicodecsv.DictWriter(fp, fieldnames) - writer.writeheader() - sort_second = lambda x: sorted(x, key=lambda y: y[1], reverse=True) - for publishers_by_type, publishers_by_country, publishers_quarterly_, activities_by_country, activities_by_region in izip_longest( - sort_second(publishers_by['type'].items()), - sort_second(publishers_by['country'].items()), - publishers_quarterly, - sort_second(activities_by['country'].items()), - sort_second(activities_by['region'].items()), - ): - writer.writerow({ - 'publisher_type': publishers_by_type[0] if publishers_by_type else '', - 'publishers_by_type': publishers_by_type[1] if publishers_by_type else '', - 'publisher_country': publishers_by_country[0] if publishers_by_country else '', - 'publishers_by_country': publishers_by_country[1] if publishers_by_country else '', - 'date': publishers_quarterly_[0] if publishers_quarterly_ else '', - 'publishers_quarterly': publishers_quarterly_[1] if publishers_quarterly_ else '', - 'activity_country': activities_by_country[0] if activities_by_country else '', - 'activities_by_country': activities_by_country[1] if activities_by_country else '', - 'activity_region': activities_by_region[0] if activities_by_region else '', - 'activities_by_region': activities_by_region[1] if activities_by_region else '', - }) diff --git a/static/img/favicon.png b/static/img/favicon.png deleted file mode 100644 index f70593250e..0000000000 Binary files a/static/img/favicon.png and /dev/null differ diff --git a/static/templates/activities.html b/static/templates/activities.html deleted file mode 100644 index c135c2de88..0000000000 --- a/static/templates/activities.html +++ /dev/null @@ -1,14 +0,0 @@ -{% extends 'base.html' %} -{% import 'boxes.html' as boxes %} -{% block content %} -
    - {{boxes.box('Total activities', current_stats.aggregated.activities, 'activities.png', 'activities.json', - description='Total count of activities across all publishers, over time. - Note: this includes activities with duplicate iati-identifier')}} - {{boxes.box('Unique Activities', current_stats.aggregated.unique_identifiers, 'unique_identifiers.png', 'unique_identifiers.json', - description='Total count of unique activities across all publishers, over time - Note: this excludes counts of duplicate iati-identifier')}} - {{boxes.box('Activities by publisher type', '', 'activities_per_publisher_type.png', None, 'activities_per_publisher_type_legend.png', - description='Count of all activities, aggregated by publisher type, over time.')}} -
    -{% endblock %} diff --git a/static/templates/base.html b/static/templates/base.html deleted file mode 100644 index cae05353ad..0000000000 --- a/static/templates/base.html +++ /dev/null @@ -1,237 +0,0 @@ - - - - - - - - - - {% block title %}IATI Dashboard - {{page_titles[page]}}{% endblock %} - - - - - - - - - - - {% block extrahead %}{% endblock %} - - -
    - - -
    - IATI Publishing Statistics now has a new home: http://publishingstats.iatistandard.org. -
    - {% if page_titles[page] == "Data Quality" %} -
    - Organisations who publish IATI data are encouraged to use IATI’s new Validator tool to understand how to improve the quality of their data. See more information. -
    - {% endif %} - -
    - {% block container %} - {% block page_header_div %} - - {% endblock %} - - {% block content %} - {% endblock %} - - {% endblock %} -
    - -
    - - - - - - - -{% block tablesorterscript %}{% endblock %} - - - {% block extrafooter %}{% endblock %} - - diff --git a/static/templates/booleans.html b/static/templates/booleans.html deleted file mode 100644 index d0b37f2702..0000000000 --- a/static/templates/booleans.html +++ /dev/null @@ -1,28 +0,0 @@ -{% extends 'base.html' %} -{% import 'boxes.html' as boxes %} - -{% block content %} -

    (This table as JSON)

    - -
    -
    -

    List of values used by publishers for attributes that should be valid XML booleans (ie. true, false, 0 or 1).

    - {% include 'tablesorter_instructions.html' %} -
    -
    {{dataset|dataset_to_publisher}}{{dataset}}{{ publisher }}{{ dataset }}
    - - - {% for boolean, values in current_stats.inverted_publisher.boolean_values.items() %} - {% for value, publishers in values.items()%} - - - - - - {% endfor %} - {% endfor %} - -
    ElementValuesPublishers
    {{boolean}}{{value}}{%for publisher in publishers%}{{publisher}} {%endfor%}
    -
    - -{% endblock %} diff --git a/static/templates/boxes.html b/static/templates/boxes.html deleted file mode 100644 index 4e7c6b930c..0000000000 --- a/static/templates/boxes.html +++ /dev/null @@ -1,24 +0,0 @@ -{% macro box(title, number, image, json, legend, folderextra, description) %} -
    -
    -
    -

    - {% set title_id = title.replace(' ', '-').lower() %} - {{number}} - {{title}} -

    - {% if json %} - (J) - {% endif %} -
    -
    -
    -

    {{description|safe}}

    - - {% if legend %} - - {% endif %} -
    -
    -
    -{% endmacro %} diff --git a/static/templates/codelist.html b/static/templates/codelist.html deleted file mode 100644 index fc4ee5c588..0000000000 --- a/static/templates/codelist.html +++ /dev/null @@ -1,90 +0,0 @@ -{% extends 'base.html' %} -{% import 'boxes.html' as boxes %} -{% block title %} -{{ super () }} - Codelists -{% endblock %} - -{% block page_header %} -

    Codelist values used for {{element}}

    -

    Who uses {{codelist_mapping[major_version].get(element)}} in {{element}}?

    -

    (for files published to version {{major_version}}.xx of the standard)

    -{% if codelist_conditions[major_version][element] %} -
    Note: this attribute can be on multiple codelists (specified by the @vocabulary attribute) but the dashboard only currently checks against the default codelist - see issue #174. Therefore some publishers may incorrectly show up as "Not On Codelist".
    -{% endif %} -

    (This page in JSON format)

    -

    Values should be on the {{codelist_mapping[major_version].get(element)}} codelist.

    -{% endblock %} - -{% block content %} - -

    - {% with elements=reverse_codelist_mapping[major_version][codelist_mapping[element]] %} - {% if elements|count > 1 %} - Other elements/attributes on this codelist: -

      - {% for el in elements%} - {% if el in current_stats.inverted_publisher.codelist_values[major_version].keys() %} - {% if el!=element%}
    • {{el}}
    • {% endif %} - {% endif %} - {% endfor %} -
    - {% endif %} - {% endwith %}

    - -
    -
    -
    -
    -

    On Codelist

    -
    -
    -

    Codes that are on the {{codelist_mapping[major_version].get(element)}} codelist.

    -
    - - - - {% for value, publishers in values.items() %} - {% if value in codelist_sets[major_version].get(codelist_mapping[major_version].get(element)) %} - - {% endif %} - {% endfor %} - -
    ValuePublishers
    {{value}} - {{publishers|length}} -
    -
    -
    - -
    -
    -
    -

    Not On Codelist

    -
    -
    -

    Codes that are not on the {{codelist_mapping[major_version].get(element)}} codelist.

    -
    - - - - {% for value, publishers in values.items() %} - {% if not value in codelist_sets[major_version].get(codelist_mapping[major_version].get(element)) %} - - {% endif %} - {% endfor %} - -
    ValuePublishers
    {{value}} - {{publishers|length}} -
    -
    -
    -
    - -{% endblock %} - -{% block extrafooter %} - -{% endblock %} diff --git a/static/templates/codelists.html b/static/templates/codelists.html deleted file mode 100644 index 6bac7a6616..0000000000 --- a/static/templates/codelists.html +++ /dev/null @@ -1,46 +0,0 @@ -{% extends 'base.html' %} -{% import 'boxes.html' as boxes %} -{% block content %} -

    Count of the different values used by all publishers, per codelist.

    -

    Note: some Elements/Attributes may use the same codelist. In each case, the values counted are different for each use.

    - {% include 'tablesorter_instructions.html' %} - - {% for major_version in MAJOR_VERSIONS %} -
    -
    -

    Codelists for version {{major_version}}.xx

    -
    - {% if major_version not in current_stats.inverted_publisher.codelist_values_by_major_version %} - There are no publishers using {{ major_version }}.xx codelists yet. - {% else %} - - - - - - - - - - - {% for i, (element, values) in enumerate(current_stats.inverted_publisher.codelist_values_by_major_version[major_version].items()) %} - - - - - - {% with codes=sorted(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)).intersection(get_codelist_values(values))) %} - - {% endwith %} - {% with codes=sorted(set(get_codelist_values(values)).difference(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)))) %} - - {% endwith %} - - {% endfor %} - -
    Element/Attribute on codelistCodelistTotal Values Used (J)Total Values on CodelistValues used, on CodelistValues used, not on Codelist
    {{element}}{{codelist_mapping[major_version].get(element)}}{{values|length}}{{codelist_sets[major_version].get(codelist_mapping[major_version].get(element))|length}}{{codes|length}}{{codes|length}}
    -
    - {% endif %} - {% endfor %} - -{% endblock %} diff --git a/static/templates/comment.html b/static/templates/comment.html deleted file mode 100644 index ad7b878fb8..0000000000 --- a/static/templates/comment.html +++ /dev/null @@ -1,8 +0,0 @@ -
    -
    -

    Comment

    -
    -
    -

    To comment on this methodology, please use this page on the support forum.

    -
    -
    \ No newline at end of file diff --git a/static/templates/dates.html b/static/templates/dates.html deleted file mode 100644 index 6fc9792ed7..0000000000 --- a/static/templates/dates.html +++ /dev/null @@ -1,30 +0,0 @@ -{% extends 'base.html' %} -{% block content %} -
    -

    Activity Dates

    -
    - {% include 'tablesorter_instructions.html' %} -
    - - - - - - - - - - {% for publisher_title,publisher in publishers_ordered_by_title %} - {% set publisher_stats = get_publisher_stats(publisher) %} - - - - - - - - {% endfor %} - -
    PublisherEarliest DateLatest DateLatest Actual StartLatest Actual End
    {{publisher_title}}{% if publisher_stats.date_extremes.min.overall %}{{publisher_stats.date_extremes.min.overall}}{% endif %}{% if publisher_stats.date_extremes.max.overall %}{{publisher_stats.date_extremes.max.overall}}{% endif %}{% if publisher_stats.date_extremes.max.by_type['start-actual'] %}{{publisher_stats.date_extremes.max.by_type['start-actual'] }}{% endif %}{% if publisher_stats.date_extremes.max.by_type['end-actual'] %}{{publisher_stats.date_extremes.max.by_type['end-actual'] }}{% endif %}
    -
    -{% endblock %} diff --git a/static/templates/download.html b/static/templates/download.html deleted file mode 100644 index 24c3a1ad07..0000000000 --- a/static/templates/download.html +++ /dev/null @@ -1,39 +0,0 @@ -{% extends 'base.html' %} -{% import 'boxes.html' as boxes %} -{% block content %} -
    - {{boxes.box('Files that fail to download', current_stats.download_errors|length, 'failed_downloads.png', - description='Count of files that fail to download, over time.')}} -
    - -

    History of Download Errors

    - -

    This table as JSON

    - -
    -
    -

    List of files that fail to download.

    - {% include 'tablesorter_instructions.html' %} -
    - - - - - - - - - - - {% for code, publisher, dataset, url in current_stats.download_errors %} - - - - - - - {% endfor %} - -
    PublisherRegistry DatasetURLError Code
    {{publisher}}{{dataset}}{{url|url_to_filename}}{{code}}
    -
    -{% endblock %} diff --git a/static/templates/element.html b/static/templates/element.html deleted file mode 100644 index 198fadeec2..0000000000 --- a/static/templates/element.html +++ /dev/null @@ -1,101 +0,0 @@ -{% extends 'base.html' %} -{% import 'boxes.html' as boxes %} -{% block title %} -{{ super () }} - Elements -{% endblock %} - -{% block page_header %} -

    Usage of {{element}}

    -

    Who uses {{element}}?

    -

    Checking usage of {{element}} across publishers, files and activities.

    -{% endblock %} - -{% block content %} -

    Publishers

    - -

    (In JSON format)

    - -
    -
    -
    -
    -

    Publishing this element

    -
    - - - - - - - - - - - {% for publisher in sorted(publishers) %} - - - {% with publisher_inverted=get_publisher_stats(publisher, 'inverted-file')%} - - {% endwith %} - - {% with publisher_stats=get_publisher_stats(publisher)%} - - - {% endwith %} - - - {% endfor%} - -
    PublisherFiles with ElementTotal FilesActivities/Orgs with ElementTotal Instances of ElementTotal Activities
    {{publisher}}{% if 'elements' in publisher_inverted %}{{publisher_inverted.elements[element]|count}}{% endif %}{{current_stats.inverted_publisher.activity_files.get(publisher)+current_stats.inverted_publisher.organisation_files.get(publisher)}}{{publisher_stats.elements[element]}}{{publisher_stats.elements_total[element]}}{{current_stats.inverted_publisher.activities[publisher]}}
    -
    -
    - -
    -
    -
    -

    Not publishing this element

    -
    - - - - - - - - - {% for publisher in current_stats.inverted_publisher.publishers %} - {% if publisher not in publishers %} - - - - - - - {% endif %} - {% endfor%} - -
    PublisherTotal FilesTotal ActivitiesTotal Organisations
    {{publisher}}{{current_stats.inverted_publisher.activity_files.get(publisher)+current_stats.inverted_publisher.organisation_files.get(publisher)}}{{current_stats.inverted_publisher.activities[publisher]}}{{current_stats.inverted_publisher.organisations[publisher]}}
    -
    -
    -
    - - -

    Files

    -
    - - - - - - - {% for publisher, files in file_grouped.items() %} - - {% endfor %} - -
    PublisherFiles
    {{publisher}} - {% for file in files %} - {{file}} - {% endfor %} -
    -
    -{% endblock %} diff --git a/static/templates/elements.html b/static/templates/elements.html deleted file mode 100644 index d5e6081ad2..0000000000 --- a/static/templates/elements.html +++ /dev/null @@ -1,49 +0,0 @@ -{% extends 'base.html' %} -{% import 'boxes.html' as boxes %} -{% block content %} - - - -
    -
    -

    Usage of an IATI Element by: -

      -
    • Publisher: Total number of publishers that use this element (at least once)
    • -
    • Activities / Orgs: total number of iati-activity or iati-organisation (where applicable) that use this element.
    • -
    • Total - Count of total instances in which this element is used, across all publishers.
    • -
    -

    - {% include 'tablesorter_instructions.html' %} -
    - - - - - - - - - {% for i, (element,publishers) in enumerate(current_stats.inverted_publisher.elements.items()) %} - - - - - - - {% endfor%} - -
    Element/AttributePublishers (J)Activities/Orgs with element (J)Total Instances of Element (J)
    {{element}}{{publishers|length}}{{current_stats.aggregated.elements[element]}}{{current_stats.aggregated.elements_total[element]}}
    -
    - -{% endblock %} diff --git a/static/templates/exploring_data.html b/static/templates/exploring_data.html deleted file mode 100644 index 7cd53a2a1b..0000000000 --- a/static/templates/exploring_data.html +++ /dev/null @@ -1,6 +0,0 @@ -{% extends 'section_index.html' %} -{% block about %} -This section tracks the various elements, attributes and codelists within published IATI data. -For every instance of these properties, a report is made available. -{% endblock about %} -{% block publisher_page_hash %}#h_exploringdata{% endblock %} diff --git a/static/templates/files.html b/static/templates/files.html deleted file mode 100644 index 65cebddc9b..0000000000 --- a/static/templates/files.html +++ /dev/null @@ -1,60 +0,0 @@ -{% extends 'base.html' %} -{% import 'boxes.html' as boxes %} -{% block content %} -
    - {{boxes.box('Total activity files', current_stats.aggregated.activity_files, 'activity_files.png', 'activity_files.json', - description='Count of total number of activity files over time.')}} - {{boxes.box('Total organisation files', current_stats.aggregated.organisation_files, 'organisation_files.png', 'organisation_files.json', - description='Count of total number of organisation files, over time.')}} -
    -
    - {{boxes.box('Total File Size', current_stats.aggregated.file_size|filesizeformat, 'file_size.png', 'file_size.json')}} -
    -
    -
    - (J) -

    File Sizes

    -
    - {% include 'tablesorter_instructions.html' %} - - - {% for bin,freq in sorted(current_stats.aggregated.file_size_bins.items(), key=firstint) %} - - - - - {% endfor %} - -
    {{bin}}{{freq}}
    -
    -
    -
    - -
    - - - - - - - - - - - - {% for package, activities in current_stats.inverted_file.activities.items() %} - - - - - - - - {% endfor %} - -
    PublisherPackageActivities (J)Organisations (J)File Size (J)
    {{publisher_name[package|dataset_to_publisher]}}{{package}}{{activities}}{{current_stats.inverted_file.organisations.get(package)}}{{current_stats.inverted_file.file_size.get(package)|filesizeformat}}
    -
    -{% endblock %} -{% block tablesorteroptions %} -{textExtraction:{4: function(node,table,cellIndex) { return $(node).attr('data-bytes'); }}} -{% endblock %} diff --git a/static/templates/identifiers.html b/static/templates/identifiers.html deleted file mode 100644 index 3a4215e5c9..0000000000 --- a/static/templates/identifiers.html +++ /dev/null @@ -1,39 +0,0 @@ -{% extends 'base.html' %} -{% import 'boxes.html' as boxes %} -{% block content %} -
    -
    -

    - Count of duplicates, per publisher -

    -
    -
    -

    - Duplicate identifiers: a count of the unique iati-identifier that are duplicated. - Instances of duplicate identifiers: the total number of activities that contain a duplicate iati-identifier, within a publisher dataset. - Example: two identifiers could be found as having duplicates. Across the dataset, these duplicates could account for 200 activities. -

    - {% include 'tablesorter_instructions.html' %} -
    - - - - - - - - {% for publisher_title,publisher in publishers_ordered_by_title %} - {% set publisher_stats = get_publisher_stats(publisher) %} - {% if publisher_stats.publisher_duplicate_identifiers|count != 0 %} - - - - - - {% endif %} - {% endfor %} - -
    PublisherDuplicate identifiersInstances of duplicate identifiers
    {{publisher_title}}{{publisher_stats.publisher_duplicate_identifiers|length}}{{publisher_stats.publisher_duplicate_identifiers.values()|sum}}
    -
    -{% endblock %} - diff --git a/static/templates/license.html b/static/templates/license.html deleted file mode 100644 index 27d646ae46..0000000000 --- a/static/templates/license.html +++ /dev/null @@ -1,28 +0,0 @@ -{% extends 'base.html' %} -{% import 'boxes.html' as boxes %} -{% block page_header %} -

    {{license_names[license]}}

    -

    License id: {{license}}

    -{% endblock %} - -{% block content %} - -
    - - - - - - - - - {% for publisher, files in publisher_counts %} - - - - - {% endfor %} - -
    PublisherFiles
    {{publisher}}{{files}}
    -
    -{% endblock %} diff --git a/static/templates/licenses.html b/static/templates/licenses.html deleted file mode 100644 index 66876082ff..0000000000 --- a/static/templates/licenses.html +++ /dev/null @@ -1,30 +0,0 @@ -{% extends 'base.html' %} -{% import 'boxes.html' as boxes %} -{% block content %} -
    -
    -

    Count of publishers per licences in use on the IATI Registry.

    - {% include 'tablesorter_instructions.html' %} -
    - - - - - - - - - - - {% for license, files in sorted(license_count.items()) %} - - - - - - - {% endfor %} - -
    License NameLicense IdFilesPublishers
    {{license_names[license]}}{{license}}{{files}}{{publisher_license_count[license]}}
    -
    -{% endblock %} diff --git a/static/templates/publisher.html b/static/templates/publisher.html deleted file mode 100644 index ab605b7395..0000000000 --- a/static/templates/publisher.html +++ /dev/null @@ -1,415 +0,0 @@ -{% extends 'base.html' %} -{% import 'boxes.html' as boxes %} -{% block title %} -{{ super () }} Publisher: {{publisher_name[publisher]}} -{% endblock %} -{% block page_header %} -(Publisher Stats JSON) -

    Publisher: {{publisher_name[publisher]}}

    -

    -{% endblock %} - -{% block content %} -
    -
    -
    -

    Table of Contents

    -
    - -
    -
    -
    -
    - -

    Headlines

    - -
    -
    -
    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    On the Registry{{publisher}}
    Reporting Org on Registry{% if ckan_publishers and publisher in ckan_publishers %} - {{ckan_publishers[publisher].result.publisher_iati_id}} - {% endif %} -
    Reporting Org(s) in Data - {% for org in publisher_stats.reporting_orgs%} - {{org|replace(' ', ' ')}} - {% endfor %} -
    Activity Files{{publisher_stats.activity_files}}
    Organisation Files{{publisher_stats.organisation_files}}
    Total File Size{{publisher_stats.file_size|filesizeformat}}
    Activities{{publisher_stats.activities}}
    Unique Activities{{publisher_stats.iati_identifiers|count}}
    Organisations{{publisher_stats.organisations}}
    Versions - {%for version in publisher_stats.versions.keys() %} - {{version|replace(' ', ' ')|safe}} - {%endfor%} -
    Hierarchies - {%for hierarchy in publisher_stats.hierarchies %} - {{hierarchy}} - {%endfor%} -
    Licenses - {%for license in publisher_licenses %} - {{license}} - {%endfor%} -
    Files failing validation - {% if publisher in current_stats.inverted_file_grouped.validation.fail %} - {{current_stats.inverted_file_grouped.validation.fail[publisher]|length}} - {% else %} - 0 - {% endif %} -
    -
    -
    - {{boxes.box('Activities', publisher_stats.activities, '../publisher_imgs/'+publisher+'_activities.png', publisher+'/activities.json', '', '-publisher')}} -
    - -
    - {{boxes.box('Activity Files', publisher_stats.activity_files, '../publisher_imgs/'+publisher+'_activity_files.png', publisher+'/activity_files.json', '', '-publisher')}} - {{boxes.box('Organisation Files', publisher_stats.organisation_files, '../publisher_imgs/'+publisher+'_organisation_files.png', publisher+'/organisation_files.json', '', '-publisher')}} -
    - -
    - {{boxes.box('Files per version', '', '../publisher_imgs/'+publisher+'_versions.png', publisher+'/versions.json', '../publisher_imgs/'+publisher+'_versions_legend.png', '-publisher')}} - {{boxes.box('Total File Size', publisher_stats.file_size|filesizeformat, '../publisher_imgs/'+publisher+'_file_size.png', publisher+'/file_size.json', '', '-publisher')}} -
    - -
    - {{boxes.box('Files failing validation', publisher_stats.validation.fail, '../publisher_imgs/'+publisher+'_validation.png', publisher+'/validation.json', '', '-publisher')}} - {{boxes.box('Files where XML is not well-formed', publisher_stats.invalidxml, '../publisher_imgs/'+publisher+'_invalidxml.png', publisher+'/invalidxml.json', '', '-publisher')}} -
    - -

    Data Quality

    - - {% if publisher in current_stats.inverted_file_grouped.validation.fail %} -
    -
    -
    - -

    Files Failing Validation

    - - - - -
    -
    - {% else %} - -

    This section will be blank if no issues were found.

    - -
    - {% endif %} - - {% if 1 in publisher_inverted.invalidxml.values() %} -
    -
    -
    - (J) -

    Files where XML is not well-formed

    -
    - - - - - - - - {% for dataset, invalid in publisher_inverted.invalidxml.items() %} - {% if invalid %} - - - - {% endif %} - {% endfor %} - -
    Dataset
    {{dataset}}
    -
    -
    - {% endif %} - - {% if 1 in publisher_inverted.nonstandardroots.values() %} -
    -
    - -
    - (J) -

    Files with non-standard roots

    -
    -
    - - - - - - - {% for dataset, nonstandard in publisher_inverted.nonstandardroots.items() %} - {% if nonstandard %} - - - - {% endif %} - {% endfor %} - -
    Dataset
    {{dataset}}
    -
    -
    - {% endif %} - -
    - -

    Financial

    - - {% macro currency_value(d) %} - {% if d %} - {% for currency, value in d.items() %} - {% if value!=None %} - {{value}} {{currency}}
    - {% endif %} - {% endfor %} - {% endif %} - {% endmacro %} - -
    -
    -

    Budgets

    -
    -
    -

    The below figures are calculated based on the data contained within the <budget> element for each reported activity. Original and revised elements are based on the value declared in the budget/@type attribute. Where budgets fall across two calendar years, the month of the <period-end> date is used to determine annual groupings, with budgets for periods ending January-June added to the previous calendar year.

    -
    - - - - - - - - - - - - - - {% for row in budget_table %} - - - - - - - - - - {% endfor %} - -
    YearCount (all)Sum (all)Count (Original)Sum (Original)Count (Revised)Sum (Revised)
    {{row.year}}{% if row.count_total %}{{row.count_total}}{% endif %}{% if row.sum_total %}{{currency_value(row.sum_total)}}{% endif %}{% if row.count_original %}{{row.count_original}}{% endif %}{{currency_value(row.sum_original)}}{% if row.count_revised %}{{row.count_revised}}{% endif %}{{currency_value(row.sum_revised)}}
    -
    - - -

    Exploring Data

    - -
    -
    -

    Files

    -
    - - - - - - - - - - - - {% for package, activities in publisher_inverted.activities.items() %} - - - - - - - - {% endfor %} - -
    PackageActivities (J)Organisations (J)File Size (J)Version (J)
    {{package}}{{activities}}{{current_stats.inverted_file.organisations.get(package)}}{{current_stats.inverted_file.file_size.get(package)|filesizeformat}}{{current_stats.aggregated_file[publisher][package]['versions'].keys()[0]}}
    -
    - - {% for major_version in MAJOR_VERSIONS %} - {% if major_version in publisher_stats.codelist_values_by_major_version %} -
    - -
    - (J) -

    Codelist Values (version {{major_version}}.xx)

    -
    -
    - - - - - - - - - - {% for element, values in publisher_stats.codelist_values_by_major_version[major_version].items() %} - - {% with element_i=current_stats.inverted_publisher.codelist_values_by_major_version[major_version].keys().index(element) %} - - - {% with codes=sorted(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)).intersection(values.keys())) %} - - {% endwith %} - {% with codes=sorted(set(values.keys()).difference(codelist_sets[major_version].get(codelist_mapping[major_version].get(element)))) %} - - {% endwith %} - {% endwith %} - - {% endfor %} - -
    Element/AttributeCodelistValues on CodelistValues not on Codelist
    {{element}}{{codelist_mapping[major_version].get(element)}}{% if codes|count %} - {{codes|count}} - {%else%}{{codes|count}}{%endif%} - - {% if codes|count %} - - {{codes|count}} - - {% if codelist_conditions[major_version][element] %} -
    Note: this attribute can be on multiple codelists (specified by the @vocabulary attribute) but the Dashboard only currently checks against the default codelist - see issue #174. Therefore some publishers may incorrectly show up as "Not On Codelist".
    - {% endif %} - {%else%} - {{codes|count}} - {%endif%} -
    -
    - {% endif %} - {% endfor %} - -
    - -
    - (J) -

    Elements and Attributes Published

    -
    -
    - - - - - - - - - {% for element, count in publisher_stats.elements.items() %} - - {% with element_i=current_stats.inverted_publisher.elements.keys().index(element) %} - - - - {% endwith %} - - {% endfor %} - -
    Element/AttributeActivities/OrganisationsFiles
    {{element}}{{count}}{{publisher_inverted.elements[element]|count}}
    -
    - -{% endblock %} - -{% block extrafooter %} - -{% endblock %} - - -{% block tablesorterscript %} - - -{% endblock %} diff --git a/static/templates/publishers.html b/static/templates/publishers.html deleted file mode 100644 index ffecec08b0..0000000000 --- a/static/templates/publishers.html +++ /dev/null @@ -1,52 +0,0 @@ -{% extends 'base.html' %} -{% import 'boxes.html' as boxes %} -{% block content %} -
    - {{boxes.box('Publishers', current_stats.aggregated.publishers, 'publishers.png', 'publishers.json', - description='This graph shows the number of organisations publishing IATI data over time.')}} - {{boxes.box('Publishers by type', '', 'publisher_types.png', None, 'publisher_types_legend.png', - description='This graph show the various types of organisations publishing IATI data.')}} -
    - - -
    -
    -

    (This table as CSV)

    -

    List of current active IATI publishers, Click on the publisher name for more details.

    - {% include 'tablesorter_instructions.html' %} -
    - - - - - - - - - - - - - - - {% for publisher_title,publisher in publishers_ordered_by_title %} - {% set publisher_stats = get_publisher_stats(publisher) %} - - - - - - - - - - - {% endfor %} - -
    Publisher NamePublisher Registry IdActivities (J)Organisations (J)Files (*)Total File Size (J)Hierarchies (J)Reporting Orgs (J)
    {{publisher_name[publisher]}}{{publisher}}{{current_stats.inverted_publisher.activities[publisher]}}{{publisher_stats.organisations}}{{current_stats.inverted_publisher.activity_files.get(publisher)+current_stats.inverted_publisher.organisation_files.get(publisher)}}{{current_stats.inverted_publisher.file_size.get(publisher)|filesizeformat}}{{publisher_stats.hierarchies|length}}{{publisher_stats.reporting_orgs|length}}
    -
    -

    * Files is the sum of Activity Files (J) and Organisation Files (J).

    -{% endblock %} -{% block tablesorteroptions %} -{textExtraction:{5: function(node,table,cellIndex) { return $(node).attr('data-bytes'); }}} -{% endblock %} diff --git a/static/templates/reporting_orgs.html b/static/templates/reporting_orgs.html deleted file mode 100644 index 2fe6f7465a..0000000000 --- a/static/templates/reporting_orgs.html +++ /dev/null @@ -1,37 +0,0 @@ -{% extends 'base.html' %} -{% import 'boxes.html' as boxes %} -{% block content %} -
    -
    -

    - Inconsistent Reporting Org references -

    -
    -
    -

    List of Publishers where the reporting-org element does not match the reporting-org field in the IATI Registry.

    - {% include 'tablesorter_instructions.html' %} -
    - - - - - - - - - {% for publisher_title,publisher in publishers_ordered_by_title %} - {% set publisher_stats = get_publisher_stats(publisher) %} - {% if publisher_stats.reporting_orgs|count != 1 or publisher_stats.reporting_orgs.keys()[0] != ckan_publishers[publisher].result.publisher_iati_id %} - - - - - - - {% endif %} - {% endfor %} - -
    PublisherReporting Org on RegistryReporting Orgs in Data (count)Reporting Orgs in Data
    {{publisher_title}}{{ckan_publishers[publisher].result.publisher_iati_id}}{{publisher_stats.reporting_orgs|length}}{% for ro in publisher_stats.reporting_orgs%}{{ro}} {% endfor %}
    -
    -{% endblock %} - diff --git a/static/templates/sparkwise.html b/static/templates/sparkwise.html deleted file mode 100644 index 5965b629d9..0000000000 --- a/static/templates/sparkwise.html +++ /dev/null @@ -1,23 +0,0 @@ -{% extends 'base.html' %} -{% block extrahead %} - - -{% endblock %} -{% block content %} -
    - -{% endblock %} diff --git a/static/templates/tablesorter_instructions.html b/static/templates/tablesorter_instructions.html deleted file mode 100644 index 35c9ac0131..0000000000 --- a/static/templates/tablesorter_instructions.html +++ /dev/null @@ -1 +0,0 @@ -

    Click the icons to sort the table by a column. Selecting further columns whilst holding the shift key will enable secondary (tertiary etc) sorting by the desired column/s.

    \ No newline at end of file diff --git a/static/templates/validation.html b/static/templates/validation.html deleted file mode 100644 index 34c6c7685b..0000000000 --- a/static/templates/validation.html +++ /dev/null @@ -1,72 +0,0 @@ -{% extends 'base.html' %} -{% import 'boxes.html' as boxes %} -{% block content %} -
    - {{boxes.box('Invalid files', current_stats.aggregated.validation.fail, 'validation.png', 'validation.json', - description='Count of files that do not validate against the relevant schema, over time.')}} - {{boxes.box('Publishers with invalid files', current_stats.aggregated.publishers_validation.fail, 'publishers_validation.png', 'publishers_validation.json', - description='Count of publishers that have at least one invalid file, over time')}} -
    - -

    Breakdown By Publisher

    - -
    - -

    List of files that fail validation, grouped by publisher

    - - {% for publisher,datasets in current_stats.inverted_file_grouped.validation.fail.items() %} -
    - -
    {{publisher_name[publisher]}} ({{datasets|length}})
    - - - - -
    - {% endfor%} -
    - -
    - -

    Count of files that fail validation, per publisher.

    - -
    - - - - - - - {% for publisher,datasets in current_stats.inverted_file_grouped.validation.fail.items() %} - - - - - {% endfor%} - -
    Publisher (J)Failing files (J)
    {{publisher_name[publisher]}}{{datasets|length}}
    -
    -
    -{% endblock %} diff --git a/text.py b/text.py deleted file mode 100644 index 004b10212d..0000000000 --- a/text.py +++ /dev/null @@ -1,102 +0,0 @@ -import copy - -top_titles = { - 'index': 'Dashboard Home', - 'headlines': 'Headlines', - 'data_quality': 'Data Quality', - 'exploring_data': 'Exploring Data', - 'publishing_stats': 'Publishing Statistics', - 'faq': 'FAQ' -} - -page_titles = { - 'index': 'IATI Dashboard', - 'headlines': 'Headlines', - 'data_quality': 'Data Quality', - 'exploring_data': 'Exploring Data', - 'faq': 'IATI Dashboard Frequently Asked Questions', - 'publishers': 'IATI Publishers', - 'files': 'IATI Files', - 'activities': 'IATI Activities', - 'download': 'Download Errors', - 'xml': 'XML Errors', - 'validation': 'Validation Against the Schema', - 'versions': 'Versions', - 'rulesets': 'Rulesets', - 'licenses': 'Licenses listed on the Registry', - 'organisation': 'Organisation XML Files', - 'identifiers': 'Duplicate Activity Identifiers', - 'registration_agencies': 'Registration Agencies', - 'reporting_orgs': 'Reporting Orgs', - 'elements': 'Elements', - 'codelists': 'Codelists', - 'booleans': 'Booleans', - 'dates': 'Dates', - 'publishing_stats': 'Publishing Statistics', - 'coverage': 'Coverage', - 'timeliness': 'Timeliness', - 'forwardlooking': 'Forward Looking', - 'comprehensiveness': 'Comprehensiveness', - 'coverage': 'Coverage', - 'summary_stats': 'Summary Statistics', - 'humanitarian': 'Humanitarian Reporting' -} - -page_leads = { - 'index': 'The IATI Dashboard provides statistics, charts and metrics on data accessed via the IATI Registry.', - 'data_quality': 'What needs fixing in IATI data?', - 'exploring_data': 'Which parts of the IATI Standard are being used?', - 'headlines': 'What is the size, scope and scale of published IATI data?', - 'publishers': 'How many organisations are publishing IATI data?', - 'files': 'How many IATI files are published?', - 'activities': 'How many IATI activities are published?', - 'download': 'How many files failed to download?', - 'xml': 'Which files have XML errors?', - 'validation': 'Which files fail schema validation?', - 'versions': 'Which versions of the IATI Standard are being used?', - 'rulesets': 'How does IATI data test against rulesets?', - 'licenses': 'Which licences are used by IATI publishers?', - 'organisation': 'Who is publishing IATI Organisation files?', - 'identifiers': 'Where are there duplicate IATI identifiers?', - 'reporting_orgs': 'Where are reporting organisation identifiers inconsistent with the IATI Registry?', - 'elements': 'How are the IATI Standard elements used by publishers?', - 'codelists': 'How are codelists used in IATI data?', - 'booleans': 'How are booleans used in IATI data?', - 'dates': 'What date ranges do publishers publish data for?', -} -page_sub_leads = { - 'publishers': 'Publishers represent organisation accounts in the IATI Registry.', - 'files': 'Files are logged on the IATI Registry by publishers The files contain data on activities and the organisation. A publisher may have multiple files, which can contain multiple activities.', - 'activities': 'Activities are the individual projects found in files. A file can contain one or many activities, from a publisher.', - 'download': 'Files that failed to download, when accessed via the IATI Registry. Note: This may because no URL is listed on the registry, or when requesting the URL the publisher\'s server returns an error message (e.g. because there is no file at that location). Since the dashboard\'s download occurs routinely, some files that failed to download may now be available.', - 'xml': 'This page shows files that are not well-formed XML, accessed via the IATI Registry. ', - 'validation': 'IATI files are validated against the appropriate IATI Schema. Note: this is based on the version declared in the file and whether it\'s an activity/organisation file.', - 'versions': 'Files are reported against a specific version of the IATI Standard, using the version attribute in the iati-activities element.', - 'rulesets': 'The IATI Ruleset describe constraints, conditions and logics that are additional to the IATI schema. Note: Currently, on the IATI Standard Ruleset is tested.', - 'licenses': 'Licences are applied to files by publishers on the IATI Registry, and explain how data can be used. ', - 'organisation': 'Checking the IATI Registry for files that have iati-organisations as the root element. IATI Organisation files contain general information about the organisations in the delivery chain. ', - 'identifiers': 'Checking the iati-identifier element for duplicate values per publisher. A duplicate appears if a publisher creates two activities with the same identifier.', - 'reporting_orgs': 'Checking the reporting-org identifiers in IATI data.', - 'elements': 'Checking usage of all elements within the IATI Standard.', - 'codelists': 'Checking usage of codelists across IATI data files.', - 'booleans': 'Checking usage of booleans across IATI data files. Booleans are values that are either true or false. In XML true or 1 can be used for true and false or 0 can be used for false.', -} - -short_page_titles = copy.copy(page_titles) -short_page_titles.update({ - 'publishers': 'Publishers', - 'files': 'Files', - 'activities': 'Activities', - 'validation': 'Validation', - 'licenses': 'Licenses', - 'organisation': 'Organisation XML', - 'identifiers': 'Duplicate Identifiers', -}) - -top_navigation = ['index', 'headlines', 'data_quality', 'exploring_data', 'publishing_stats', 'faq'] -navigation = { - 'headlines': ['publishers', 'files', 'activities'], - 'data_quality': ['download', 'xml', 'validation', 'versions', 'licenses', 'organisation', 'identifiers', 'reporting_orgs'], - 'exploring_data': ['elements', 'codelists', 'booleans', 'dates'], - 'publishing_stats': ['timeliness', 'forwardlooking', 'comprehensiveness', 'coverage', 'summary_stats', 'humanitarian' ] -} diff --git a/timeliness.py b/timeliness.py deleted file mode 100644 index 4775bfeb3e..0000000000 --- a/timeliness.py +++ /dev/null @@ -1,198 +0,0 @@ -# This file converts raw timeliness data into the associated Dashboard assessments - -from __future__ import print_function -from data import JSONDir, publisher_name, get_publisher_stats, get_registry_id_matches -import datetime -from dateutil.relativedelta import relativedelta -from collections import defaultdict, Counter - - -def short_month(month_str): - """Return the 'short month' represeentation of a date which is inputted as a string, seperated with dashes - For example '01-03-2012' returns 'Mar' - """ - short_months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] - return short_months[int(month_str.split('-')[1]) - 1] - - -def parse_iso_date(d): - """Parse a string representation of a date into a datetime object - """ - try: - return datetime.date(int(d[:4]), int(d[5:7]), int(d[8:10])) - except (ValueError, TypeError): - return None - - -def previous_months_generator(d): - """Returns a generator object with the previous month for a given datetime object - """ - year = d.year - month = d.month - for i in range(0,12): - month -= 1 - if month <= 0: - year -= 1 - month = 12 - yield year,month - -# Store lists of previous months -previous_months = ['{}-{}'.format(year,str(month).zfill(2)) for year,month in previous_months_generator(datetime.date.today())] -previous_months_reversed=list(reversed(previous_months)) - -# Store the current month as a string -today = datetime.date.today() -this_month = '{}-{}'.format(today.year, str(today.month).zfill(2)) - -# Store a list of the past 12 months from today -previous_month_days = [today - relativedelta(months=x) for x in range(1, 13)] - -# Store the current month and year numbers -this_month_number = datetime.datetime.today().month -this_year = datetime.datetime.today().year - - -def publisher_frequency(): - """Generate the publisher frequency data - """ - - # Load all the data from 'gitaggregate-publisher-dated' into memory - gitaggregate_publisher = JSONDir('./stats-calculated/gitaggregate-publisher-dated') - - # Loop over each publisher - i.e. a publisher folder within 'gitaggregate-publisher-dated' - for publisher, agg in gitaggregate_publisher.items(): - - # Skip to the next publisher if there is no data for 'most_recent_transaction_date' for this publisher - if not 'most_recent_transaction_date' in agg: - continue - - # Skip if this publisher appears in the list of publishers who have since changed their Registry ID - if publisher in get_registry_id_matches().keys(): - continue - - updates_per_month = defaultdict(int) - previous_transaction_date = datetime.date(1,1,1) - - # Find the most recent transaction date and parse into a datetime object - for gitdate, transaction_date_str in sorted(agg['most_recent_transaction_date'].items()): - transaction_date = parse_iso_date(transaction_date_str) - - # If transaction date has increased - if transaction_date is not None and transaction_date > previous_transaction_date: - previous_transaction_date = transaction_date - updates_per_month[gitdate[:7]] += 1 - - # Find the first date that this publisher made data available, and parse into a datetime object - first_published_string = sorted(agg['most_recent_transaction_date'])[0] - first_published = parse_iso_date(first_published_string) - - # Implement the assessment logic on http://dashboard.iatistandard.org/timeliness.html#h_assesment - - if first_published >= previous_month_days[2]: - # This is a publisher of less than 3 months - #if True in [ x in updates_per_month for x in previous_months[:3] ]: - frequency = 'Annual' - elif first_published >= previous_month_days[5]: - # This is a publisher of less than 6 months - if all([ x in updates_per_month for x in previous_months[:3] ]): - frequency = 'Monthly' - else: - frequency = 'Annual' - elif first_published >= previous_month_days[11]: - # This is a publisher of less than 12 months - if [ x in updates_per_month for x in previous_months[:6] ].count(True) >= 4: - frequency = 'Monthly' - elif any([ x in updates_per_month for x in previous_months[:3] ]) and any([ x in updates_per_month for x in previous_months[3:6] ]): - frequency = 'Quarterly' - else: - frequency = 'Annual' - else: - # This is a publisher of 1 year or more - if ([ x in updates_per_month for x in previous_months[:12] ].count(True) >= 7) and ([ x in updates_per_month for x in previous_months[:2] ].count(True) >= 1): - # Data updated in 7 or more of past 12 full months AND data updated at least once in last 2 full months. - frequency = 'Monthly' - elif ([ x in updates_per_month for x in previous_months[:12] ].count(True) >= 3) and ([ x in updates_per_month for x in previous_months[:4] ].count(True) >= 1): - # Data updated in 3 or more of past 12 full months AND data updated at least once in last 4 full months. - frequency = 'Quarterly' - elif any([ x in updates_per_month for x in previous_months[:6] ]) and any([ x in updates_per_month for x in previous_months[6:12] ]): - # There has been an update in 2 of the last 6 month periods - frequency = 'Six-Monthly' - elif any([ x in updates_per_month for x in previous_months[:12] ]): - # There has been an update in 1 of the last 12 months - frequency = 'Annual' - else: - # There has been an update in none of the last 12 months - frequency = 'Less than Annual' - - # If the publisher is in the list of current publishers, return a generator object - if publisher in publisher_name: - yield publisher, publisher_name.get(publisher), updates_per_month, frequency - - -def frequency_index(frequency): - return ['Monthly', 'Quarterly', 'Six-Monthly', 'Annual', 'Less than Annual'].index(frequency) - -def publisher_frequency_sorted(): - return sorted(publisher_frequency(), key=lambda (publisher, publisher_title , _, frequency): ( - frequency_index(frequency), - publisher_title - )) - -def publisher_frequency_dict(): - publisher_data_list = sorted(publisher_frequency(), key=lambda publisher: publisher[0] ) - data = {} - for v in publisher_data_list: - data[v[0]] = v - return data - -def publisher_frequency_summary(): - return Counter(frequency for _,_,_,frequency in publisher_frequency()) - -def timelag_index(timelag): - return ['One month', 'A quarter', 'Six months', 'One year', 'More than one year'].index(timelag) - -def publisher_timelag_sorted(): - publisher_timelags = [ (publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag']) for publisher,agg in JSONDir('./stats-calculated/current/aggregated-publisher').items() ] - return sorted(publisher_timelags, key=lambda (publisher, publisher_title, _, timelag): ( - timelag_index(timelag), - publisher_title - )) - -def publisher_timelag_dict(): - publisher_timelags = [ (publisher, publisher_name.get(publisher), agg['transaction_months_with_year'], agg['timelag']) for publisher,agg in JSONDir('./stats-calculated/current/aggregated-publisher').items() ] - data = {} - for v in publisher_timelags: - data[v[0]] = v - return data - -def publisher_timelag_summary(): - return Counter(timelag for _,_,_,timelag in publisher_timelag_sorted()) - -blacklist_publisher = JSONDir('./stats-blacklist/gitaggregate-publisher-dated') - -def has_future_transactions(publisher): - """ - returns 0, 1 or 2 - Returns 2 if the most recent data for a publisher has future transactions. - Returns 1 if the publisher has ever had future transactions. - Returns -1 if the publisher has not been checked for some reason. - Returns 0 otherwise. - """ - publisher_stats = get_publisher_stats(publisher) - if 'transaction_dates' in publisher_stats: - for transaction_type, transaction_counts in publisher_stats['transaction_dates'].items(): - for transaction_date_string, count in transaction_counts.items(): - transaction_date = parse_iso_date(transaction_date_string) - if transaction_date and transaction_date > datetime.date.today(): - return 2 - if publisher not in blacklist_publisher: - return -1 - today = datetime.date.today() - mindate = datetime.date(today.year-1, today.month, 1) - for date, activity_blacklist in blacklist_publisher[publisher]['activities_with_future_transactions'].items(): - if parse_iso_date(date) >= mindate and activity_blacklist: - return 1 - return 0 - -def sort_first(list_, key): - return sorted(list_, key=lambda x: key(x[0])) diff --git a/vars.py b/vars.py deleted file mode 100644 index 00063ecd1d..0000000000 --- a/vars.py +++ /dev/null @@ -1 +0,0 @@ -expected_versions = [ 'null', '1.01', '1.02', '1.03', '1.04', '1.05', '2.01', '2.02', '2.03' ]