diff --git a/.coveralls.yml b/.coveralls.yml new file mode 100644 index 0000000..fce062d --- /dev/null +++ b/.coveralls.yml @@ -0,0 +1 @@ +service_name: github diff --git a/.cruft.json b/.cruft.json new file mode 100644 index 0000000..4ab1cbf --- /dev/null +++ b/.cruft.json @@ -0,0 +1,28 @@ +{ + "template": "https://github.com/Ouranosinc/cookiecutter-pypackage", + "commit": "64eceda7d95aeb8937fa9961989d3d617a525c04", + "checkout": null, + "context": { + "cookiecutter": { + "full_name": "Sebastien Langlois", + "email": "sebastien.langlois62@gmail.com", + "github_username": "sebastienlanglois", + "project_name": "xdatasets", + "project_slug": "xdatasets", + "project_short_description": "Easy access to Earth observation datasets with xarray.", + "pypi_username": "sebastienlanglois", + "version": "0.3.0", + "use_pytest": "y", + "use_black": "y", + "use_conda": "y", + "add_pyup_badge": "n", + "make_docs": "y", + "command_line_interface": "No command-line interface", + "create_author_file": "y", + "open_source_license": "MIT license", + "generated_with_cruft": "y", + "_template": "https://github.com/Ouranosinc/cookiecutter-pypackage" + } + }, + "directory": null +} diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..70b8725 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,24 @@ +# http://editorconfig.org + +root = true + +[*] +indent_style = space +indent_size = 4 +trim_trailing_whitespace = true +insert_final_newline = true +charset = utf-8 +end_of_line = lf + +[*.{yaml,yml}] +indent_size = 2 + +[*.bat] +indent_style = tab +end_of_line = crlf + +[LICENSE] +insert_final_newline = false + +[Makefile] +indent_style = tab diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..1116575 --- /dev/null +++ b/.flake8 @@ -0,0 +1,30 @@ +[flake8] +exclude = + .eggs, + .git, + build, + docs, + tests +ignore = + AZ100, + AZ200, + AZ300, + C, + D, + E, + F, + W503 +per-file-ignores = +rst-roles = + doc, + mod, + py:attr, + py:attribute, + py:class, + py:const, + py:data, + py:func, + py:meth, + py:mod, + py:obj, + py:ref diff --git a/.gitattributes b/.gitattributes index 505c778..7a83941 100644 --- a/.gitattributes +++ b/.gitattributes @@ -113,4 +113,4 @@ # Jupyter Notebooks # ================= -*.ipynb text \ No newline at end of file +*.ipynb text diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md new file mode 100644 index 0000000..76233ee --- /dev/null +++ b/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,15 @@ +* xdatasets version: +* Python version: +* Operating System: + +### Description + +Describe what you were trying to get done. +Tell us what happened, what went wrong, and what you expected to happen. + +### What I Did + +``` +Paste the command(s) you ran and the output. +If there was a crash, please include the traceback here. +``` diff --git a/.github/ISSUE_TEMPLATE/0001-GENERIC-ISSUE-TEMPLATE.yml b/.github/ISSUE_TEMPLATE/0001-GENERIC-ISSUE-TEMPLATE.yml new file mode 100644 index 0000000..c22a3e8 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/0001-GENERIC-ISSUE-TEMPLATE.yml @@ -0,0 +1,34 @@ +name: Generic issue template +description: For detailing generic/uncategorized issues in xdatasets + +body: + - type: textarea + id: generic-issue + attributes: + label: Generic Issue + description: Please fill in the following information fields as needed. + value: | + * xdatasets version: + * Python version: + * Operating System: + + ### Description + + + ### What I Did + + ``` + $ pip install foo --bar + ``` + + ### What I Received + + ``` + Traceback (most recent call last): + File "/path/to/file/script.py", line 3326, in run_code + exec(code_obj, self.user_global_ns, self.user_ns) + File "", line 1, in + 1/0 + ZeroDivisionError: division by zero diff --git a/.github/ISSUE_TEMPLATE/0002-BUG-REPORT.yml b/.github/ISSUE_TEMPLATE/0002-BUG-REPORT.yml new file mode 100644 index 0000000..360e8d4 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/0002-BUG-REPORT.yml @@ -0,0 +1,44 @@ +name: Bug report +description: Help us improve xdatasets +labels: [ "bug" ] + +body: + - type: markdown + attributes: + value: | + Thanks for taking the time to fill out this bug report! + - type: textarea + id: setup-information + attributes: + label: Setup Information + description: | + What software versions are you running? Example: + - xdatasets version: 0.55.0-gamma + - Python version: 4.2 + - Operating System: Nutmeg Linux 12.34 | macOS 11.0 "Redmond" + value: | + - xdatasets version: + - Python version: + - Operating System: + - type: textarea + id: description + attributes: + label: Description + description: Describe what you were trying to get done. Tell us what happened, what went wrong, and what you expected to happen. + - type: textarea + id: steps-to-reproduce + attributes: + label: Steps To Reproduce + description: Paste the command(s) you ran and the output. If there was a crash, please include the traceback below. + - type: textarea + id: additional-context + attributes: + label: Additional context + description: Add any other context about the problem here. + - type: checkboxes + id: submit-pr + attributes: + label: Contribution + description: Do you intend to submit a fix for this bug? (The xdatasets developers will help with code compliance) + options: + - label: I would be willing/able to open a Pull Request to address this bug. diff --git a/.github/ISSUE_TEMPLATE/0003-FEATURE-REQUEST.yml b/.github/ISSUE_TEMPLATE/0003-FEATURE-REQUEST.yml new file mode 100644 index 0000000..2115ba4 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/0003-FEATURE-REQUEST.yml @@ -0,0 +1,31 @@ +name: Feature request +description: Suggest an idea for xdatasets +labels: [ "enhancement" ] + +body: + - type: markdown + attributes: + value: | + Thanks for taking the time to fill out this feature request! + - type: textarea + id: problem + attributes: + label: Addressing a Problem? + description: Is your feature request related to a problem? Please describe it. + - type: textarea + id: potential-solution + attributes: + label: Potential Solution + description: Describe the solution you'd like to see implemented. + - type: textarea + id: additional-context + attributes: + label: Additional context + description: Add any other context about the feature request here. + - type: checkboxes + id: submit-pr + attributes: + label: Contribution + description: Do you intend to submit a fix for this bug? (The xdatasets developers will help with code compliance) + options: + - label: I would be willing/able to open a Pull Request to contribute this feature. diff --git a/.github/ISSUE_TEMPLATE/0004-QUESTION-SUPPORT.yml b/.github/ISSUE_TEMPLATE/0004-QUESTION-SUPPORT.yml new file mode 100644 index 0000000..f3c776a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/0004-QUESTION-SUPPORT.yml @@ -0,0 +1,23 @@ +name: Question/Support +description: Ask for help from the developers +labels: [ "support" ] + +body: + - type: textarea + id: setup-information + attributes: + label: Setup Information + description: | + What software versions are you running? Example: + - xdatasets version: 0.55.0-gamma + - Python version: 4.2 + - Operating System: Nutmeg Linux 12.34 | macOS 11.0 "Redmond" + value: | + - xdatasets version: + - Python version: + - Operating System: + - type: textarea + id: description + attributes: + label: Context + description: Describe what you were trying to get done. Tell us what happened, what went wrong, and what you expected to happen. diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..0086358 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1 @@ +blank_issues_enabled: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..dc5f20e --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,18 @@ + + +### Pull Request Checklist: +- [ ] This PR addresses an already opened issue (for bug fixes / features) + - This PR fixes #xyz +- [ ] (If applicable) Documentation has been added / updated (for bug fixes / features). +- [ ] (If applicable) Tests have been added. +- [ ] CHANGES.rst has been updated (with summary of main changes). + - [ ] Link to issue (:issue:`number`) and pull request (:pull:`number`) has been added. + +### What kind of change does this PR introduce? + +* ... + +### Does this PR introduce a breaking change? + + +### Other information: diff --git a/.github/workflows/actions-versions-updater.yml b/.github/workflows/actions-versions-updater.yml new file mode 100644 index 0000000..bb05b24 --- /dev/null +++ b/.github/workflows/actions-versions-updater.yml @@ -0,0 +1,24 @@ +name: GitHub Actions Version Updater + +on: + schedule: + # 12:00 AM on the first of every month + - cron: '0 0 1 * *' + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4.1.1 + with: + # This requires a personal access token with the privileges to push directly to `main` + token: ${{ secrets.WORKFLOW_TOKEN }} + persist-credentials: true + - name: Run GitHub Actions Version Updater + uses: saadmk11/github-actions-version-updater@v0.8.1 + with: + token: ${{ secrets.WORKFLOW_TOKEN }} + committer_email: 'bumpversion[bot]@ouranos.ca' + committer_username: 'update-github-actions[bot]' + pull_request_title: '[bot] Update GitHub Action Versions' diff --git a/.github/workflows/bump-version.yml b/.github/workflows/bump-version.yml index 3bcc24c..6a29a0e 100644 --- a/.github/workflows/bump-version.yml +++ b/.github/workflows/bump-version.yml @@ -48,13 +48,13 @@ jobs: run: echo "current_version=$(grep -E '__version__' xdatasets/__init__.py | cut -d ' ' -f3)" - name: Bump Patch Version run: | - pip install bump2version + pip install bump-my-version echo "Bumping version" - bump2version patch + bump-my-version bump --tag patch echo "new_version=$(grep -E '__version__' xdatasets/__init__.py | cut -d ' ' -f3)" - name: Push Changes uses: ad-m/github-push-action@master with: force: false github_token: ${{ secrets.GITHUB_TOKEN }} - branch: ${{ github.ref }} \ No newline at end of file + branch: ${{ github.ref }} diff --git a/.github/workflows/first_pull_request.yml b/.github/workflows/first_pull_request.yml new file mode 100644 index 0000000..adc51fe --- /dev/null +++ b/.github/workflows/first_pull_request.yml @@ -0,0 +1,46 @@ +name: First Pull Request + +on: + pull_request_target: + types: + - opened + +jobs: + welcome: + name: Welcome + runs-on: ubuntu-latest + steps: + - uses: actions/github-script@v6 + with: + script: | + // Get a list of all issues created by the PR opener + // See: https://octokit.github.io/rest.js/#pagination + const creator = context.payload.sender.login + const opts = github.rest.issues.listForRepo.endpoint.merge({ + ...context.issue, + creator, + state: 'all' + }) + const issues = await github.paginate(opts) + + for (const issue of issues) { + if (issue.number === context.issue.number) { + continue + } + + if (issue.pull_request) { + return // Creator is already a contributor. + } + } + + await github.rest.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: `**Welcome**, new contributor! + + It appears that this is your first Pull Request. To give credit where it's due, we ask that you add your information to the \`AUTHORS.rst\` and \`.zenodo.json\`.: + - [ ] The relevant author information has been added to \`AUTHORS.rst\` and \`.zenodo.json\`. + + Please make sure you've read our [contributing guide](CONTRIBUTING.rst). We look forward to reviewing your Pull Request shortly ✨` + }) diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml index 3c542f5..c0a9805 100644 --- a/.github/workflows/gh-pages.yml +++ b/.github/workflows/gh-pages.yml @@ -1,4 +1,5 @@ name: Docs + on: push: branches: @@ -6,31 +7,40 @@ on: pull_request: branches: - master + permissions: contents: write + jobs: build-and-deploy: runs-on: ubuntu-latest + name: Build and deploy + defaults: + run: + shell: bash -l {0} steps: - name: Checkout πŸ›ŽοΈ - uses: actions/checkout@v2.3.1 # If you're using actions/checkout@v2 you must set persist-credentials to false in most cases for the deployment to work correctly. + uses: actions/checkout@v3 with: persist-credentials: false - - uses: conda-incubator/setup-miniconda@v2 + - name: Setup Conda (Micromamba) + uses: mamba-org/setup-micromamba@v1 with: - activate-environment: xdatasets - environment-file: environment.yml - mamba-version: "*" - python-version: 3.9 - auto-activate-base: false - - name: Deploy docs - shell: bash -l {0} + cache-downloads: true + environment-file: environment-docs.yml + - name: Install xdatasets (docs) run: | python -m ipykernel install --user --name=xdatasets - pip install -e . - make -C doc html + pip install --editable ".[docs]" + - name: Check versions + run: | + pip check || true + - name: Build docs + shell: bash -l {0} + run: | + make docs - name: Deploy πŸš€ uses: JamesIves/github-pages-deploy-action@v4.2.2 with: branch: gh-pages # The branch the action should deploy to. - folder: doc/_build/html # The folder the action should deploy. + folder: docs/_build/html # The folder the action should deploy. diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..0169fc9 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,133 @@ +name: xdatasets Testing Suite + +on: + push: + branches: + - main + paths-ignore: + - CHANGES.rst + - README.rst + - pyproject.toml + - setup.cfg + - xdatasets/__init__.py + pull_request: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + lint: + name: Lint (Python${{ matrix.python-version }}) + runs-on: ubuntu-latest + strategy: + matrix: + python-version: + - "3.x" + steps: + - name: Cancel previous runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ secrets.GITHUB_TOKEN }} + - uses: actions/checkout@v4 + - name: Set up Python${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install tox + run: | + python -m pip install tox + - name: Run linting suite + run: | + python -m tox -e lint + + test: + name: test-${{ matrix.tox-env }} (Python${{ matrix.python-version }}) + needs: lint + runs-on: ubuntu-latest + strategy: + matrix: + include: + - tox-env: "py38" + python-version: "3.8" + - tox-env: "py39" + python-version: "3.9" + - tox-env: "py310" + python-version: "3.10" + - tox-env: "py311" + python-version: "3.11" + steps: + - uses: actions/checkout@v3 + - name: Set up Python${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install tox + run: | + pip install tox + - name: Test with tox + run: | + tox -e ${{ matrix.tox-env }} +# env: +# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} +# COVERALLS_FLAG_NAME: run-${{ matrix.tox-env }} +# COVERALLS_PARALLEL: true +# COVERALLS_SERVICE_NAME: github + +# test-conda: +# name: Test with Python${{ matrix.python-version }} (Anaconda) +# needs: lint +# runs-on: ubuntu-latest +# strategy: +# matrix: +# python-version: ["3.9", "3.10", "3.11"] +# defaults: +# run: +# shell: bash -l {0} +# steps: +# - uses: actions/checkout@v4 +# - name: Setup Conda (Micromamba) with Python${{ matrix.python-version }} +# uses: mamba-org/setup-micromamba@v1 +# with: +# cache-downloads: true +# environment-file: environment-dev.yml +# create-args: >- +# mamba +# python=${{ matrix.python-version }} +# - name: Conda and Mamba versions +# run: | +# mamba --version +# echo "micromamba $(micromamba --version)" +# - name: Install xdatasets +# run: | +# python -m pip install --no-deps . +# - name: Check versions +# run: | +# conda list +# python -m pip check || true +# - name: Test with pytest +# run: | +# python -m pytest --cov xdatasets +# - name: Report coverage +# run: | +# python -m coveralls +# env: +# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} +# COVERALLS_FLAG_NAME: run-Python${{ matrix.python-version }}-conda +# COVERALLS_PARALLEL: true +# COVERALLS_SERVICE_NAME: github + +# finish: +# needs: +# - test-pypi +# - test-conda +# runs-on: ubuntu-latest +# container: python:3-slim +# steps: +# - name: Coveralls Finished +# run: | +# pip install --upgrade coveralls +# coveralls --finish +# env: +# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} +# COVERALLS_SERVICE_NAME: github diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml index b7e54d4..cbdad39 100644 --- a/.github/workflows/publish-pypi.yml +++ b/.github/workflows/publish-pypi.yml @@ -9,18 +9,21 @@ jobs: build-n-publish-pypi: name: Build and publish Python 🐍 distributions πŸ“¦ to PyPI runs-on: ubuntu-latest + environment: production + permissions: + # IMPORTANT: this permission is mandatory for trusted publishing + id-token: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python3 uses: actions/setup-python@v4 with: python-version: "3.x" - name: Install packaging libraries - run: pip install setuptools wheel + run: | + python -m pip install flit - name: Build a binary wheel and a source tarball - run: python setup.py sdist bdist_wheel + run: | + python -m flit build - name: Publish distribution πŸ“¦ to PyPI uses: pypa/gh-action-pypi-publish@release/v1 - with: - user: __token__ - password: ${{ secrets.PYPI_API_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/tag-testpypi.yml b/.github/workflows/tag-testpypi.yml index 782f0fd..036d9a1 100644 --- a/.github/workflows/tag-testpypi.yml +++ b/.github/workflows/tag-testpypi.yml @@ -3,26 +3,49 @@ name: "Publish Python 🐍 distributions πŸ“¦ to TestPyPI" on: push: tags: - - '*' + - 'v*.*' # Push events to matching v*, i.e. v1.0, v20.15.10 jobs: - build-n-publish-testpypi: + + release: + name: Create Release from tag + runs-on: ubuntu-latest + if: startsWith(github.ref, 'refs/tags/v') && endsWith(github.ref, '.0') + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Create Release + uses: softprops/action-gh-release@v1 + env: + # This token is provided by Actions, you do not need to create your own token + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ github.ref }} + name: Release ${{ github.ref }} + draft: true + prerelease: false + + deploy-testpypi: name: Build and publish Python 🐍 distributions πŸ“¦ to TestPyPI runs-on: ubuntu-latest + environment: staging + permissions: + # IMPORTANT: this permission is mandatory for trusted publishing + id-token: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python3 uses: actions/setup-python@v4 with: python-version: "3.x" - name: Install packaging libraries - run: pip install setuptools wheel + run: | + python -m pip install flit - name: Build a binary wheel and a source tarball - run: python setup.py sdist bdist_wheel + run: | + python -m flit build - name: Publish distribution πŸ“¦ to Test PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: - user: __token__ - password: ${{ secrets.TEST_PYPI_API_TOKEN }} repository_url: https://test.pypi.org/legacy/ - skip_existing: true \ No newline at end of file + skip_existing: true diff --git a/.gitignore b/.gitignore index f7573be..f369552 100644 --- a/.gitignore +++ b/.gitignore @@ -9,7 +9,6 @@ docs/source/packages docs/bin docs/source/readme.rst - ##--------------------------------------------------- # Continuous Integration .gitignore files ##--------------------------------------------------- @@ -18,7 +17,6 @@ docs/source/readme.rst testresults.xml coverage.xml - ##--------------------------------------------------- # Python default .gitignore ##--------------------------------------------------- @@ -150,6 +148,11 @@ dmypy.json # pytype static type analyzer .pytype/ +# IDE settings +.vscode/ +.idea/ +.vs/ +*.sublime-workspace ##--------------------------------------------------- # Windows default .gitignore @@ -179,7 +182,6 @@ $RECYCLE.BIN/ # Windows shortcuts *.lnk - ##--------------------------------------------------- # Linux default .gitignore ##--------------------------------------------------- @@ -199,7 +201,6 @@ $RECYCLE.BIN/ # .nfs files are created when an open file is removed but is still being accessed .nfs* - ##--------------------------------------------------- # Mac OSX default .gitignore ##--------------------------------------------------- @@ -229,4 +230,4 @@ Icon .AppleDesktop Network Trash Folder Temporary Items -.apdisk \ No newline at end of file +.apdisk diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..da4247f --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,89 @@ +default_language_version: + python: python3 + +repos: + - repo: https://github.com/asottile/pyupgrade + rev: v3.14.0 + hooks: + - id: pyupgrade + args: [ '--py38-plus' ] + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: debug-statements + - id: check-json + - id: check-toml + - id: pretty-format-json + args: [ '--autofix', '--no-ensure-ascii', '--no-sort-keys' ] + exclude: .ipynb + - id: check-yaml + args: [ '--allow-multiple-documents' ] + - repo: https://github.com/pappasam/toml-sort + rev: v0.23.1 + hooks: + - id: toml-sort-fix + - repo: https://github.com/pre-commit/pygrep-hooks + rev: v1.10.0 + hooks: + - id: rst-inline-touching-normal + - repo: https://github.com/psf/black + rev: 23.11.0 + hooks: + - id: black + - repo: https://github.com/PyCQA/isort + rev: 5.12.0 + hooks: + - id: isort + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.4 + hooks: + - id: ruff + - repo: https://github.com/pycqa/flake8 + rev: 6.0.0 + hooks: + - id: flake8 + additional_dependencies: [ 'flake8-alphabetize', 'flake8-rst-docstrings' ] + args: ['--config=.flake8'] + - repo: https://github.com/nbQA-dev/nbQA + rev: 1.7.0 + hooks: + - id: nbqa-pyupgrade + args: [ '--py38-plus' ] + additional_dependencies: [ 'pyupgrade==3.14.0' ] + - id: nbqa-black + additional_dependencies: [ 'black==23.11.0' ] + - id: nbqa-isort + additional_dependencies: [ 'isort==5.12.0' ] + - repo: https://github.com/keewis/blackdoc + rev: v0.3.8 + hooks: + - id: blackdoc + additional_dependencies: [ 'black==23.11.0' ] + - repo: https://github.com/adrienverge/yamllint.git + rev: v1.32.0 + hooks: + - id: yamllint + args: [ '--config-file=.yamllint.yaml' ] + - repo: https://github.com/python-jsonschema/check-jsonschema + rev: 0.27.1 + hooks: + - id: check-github-workflows + - id: check-readthedocs + - repo: meta + hooks: + - id: check-hooks-apply + - id: check-useless-excludes + +ci: + autofix_commit_msg: | + [pre-commit.ci] auto fixes from pre-commit.com hooks + + for more information, see https://pre-commit.ci + autofix_prs: true + autoupdate_branch: '' + autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate' + autoupdate_schedule: weekly + skip: [] + submodules: false diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 0000000..fbf80e0 --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,26 @@ +# .readthedocs.yml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +version: 2 + +sphinx: + configuration: docs/conf.py + # fail_on_warning might generate hard to fix error, in this case it can be + # disabled but this also means those errors will fail silently, choose wisely. + fail_on_warning: true + +build: + os: ubuntu-22.04 + tools: + python: "mambaforge-22.9" + +conda: + environment: environment-docs.yml + +python: + install: + - method: pip + path: . + extra_requirements: + - dev diff --git a/.yamllint.yaml b/.yamllint.yaml new file mode 100644 index 0000000..2f3b4a3 --- /dev/null +++ b/.yamllint.yaml @@ -0,0 +1,8 @@ +--- + +rules: + document-start: disable + line-length: + max: 120 + level: warning + truthy: disable diff --git a/.zenodo.json b/.zenodo.json new file mode 100644 index 0000000..5184707 --- /dev/null +++ b/.zenodo.json @@ -0,0 +1,22 @@ +{ + "title": "xdatasets", + "creators": [ + { + "name": "Langlois, Sebastien" + }, + { + "name": "Smith, Trevor James", + "affiliation": "Ouranos, MontrΓ©al, QuΓ©bec, Canada", + "orcid": "0000-0001-5393-8359" + } + ], + "keywords": [ + "datasets", + "xarray" + ], + "license": "MIT", + "language": "eng", + "communities": [], + "upload_type": "software", + "access_right": "open" +} diff --git a/AUTHORS.rst b/AUTHORS.rst index 8e9f886..12c9f09 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -5,7 +5,12 @@ Credits Development Lead ---------------- -* Sebastien Langlois +* Sebastien Langlois `@sebastienlanglois `_ + +Co-Developers +------------- + +* Trevor James Smith `@Zeitsperre `_ Contributors ------------ diff --git a/CHANGES.rst b/CHANGES.rst new file mode 100644 index 0000000..2947dc4 --- /dev/null +++ b/CHANGES.rst @@ -0,0 +1,20 @@ +========= +Changelog +========= + +0.3.0 (unreleased) +------------------- + +* `xdatasets` now adheres to PEPs 517/518/621 using the `flit` backend for building and packaging. +* The `cookiecutter` template has been updated to the latest commit via `cruft`. (:pull:`28`): + * `Manifest.in` and `setup.py` have been removed. + * `pyproject.toml` has been added, with most package configurations migrated into it. + * `HISTORY.rst` has been renamed to `CHANGES.rst`. + * `actions-version-updater.yml` has been added to automate the versioning of the package. + * `bump-version.yml` has been added to automate patch versioning of the package. + * `pre-commit` hooks have been updated to the latest versions; `check-toml` and `toml-sort` have been added to cleanup the `pyproject.toml` file. + * `ruff` has been added to the linting tools to replace most `flake8` and `pydocstyle` verifications. + +0.1.2-alpha (2023-01-13) +--------------------------- +First release on PyPI. diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 3cb9f8c..9ba3cc0 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -4,8 +4,7 @@ Contributing ============ -Contributions are welcome, and they are greatly appreciated! Every little bit -helps, and credit will always be given. +Contributions are welcome, and they are greatly appreciated! Every little bit helps, and credit will always be given. You can contribute in many ways: @@ -15,7 +14,7 @@ Types of Contributions Report Bugs ~~~~~~~~~~~ -Report bugs at https://github.com/sebastienlanglois/xdatasets/issues. +Report bugs at https://github.com/hydrologie/xdatasets/issues. If you are reporting a bug, please include: @@ -26,26 +25,22 @@ If you are reporting a bug, please include: Fix Bugs ~~~~~~~~ -Look through the GitHub issues for bugs. Anything tagged with "bug" and "help -wanted" is open to whoever wants to implement it. +Look through the GitHub issues for bugs. Anything tagged with "bug" and "help wanted" is open to whoever wants to implement it. Implement Features ~~~~~~~~~~~~~~~~~~ -Look through the GitHub issues for features. Anything tagged with "enhancement" -and "help wanted" is open to whoever wants to implement it. +Look through the GitHub issues for features. Anything tagged with "enhancement" and "help wanted" is open to whoever wants to implement it. Write Documentation ~~~~~~~~~~~~~~~~~~~ -Xhydro could always use more documentation, whether as part of the -official Xhydro docs, in docstrings, or even on the web in blog posts, -articles, and such. +xdatasets could always use more documentation, whether as part of the official xdatasets docs, in docstrings, or even on the web in blog posts, articles, and such. Submit Feedback ~~~~~~~~~~~~~~~ -The best way to send feedback is to file an issue at https://github.com/sebastienlanglois/xhydro/issues. +The best way to send feedback is to file an issue at https://github.com/hydrologie/xdatasets/issues. If you are proposing a feature: @@ -57,72 +52,221 @@ If you are proposing a feature: Get Started! ------------ -Ready to contribute? Here's how to set up `xhydro` for local development. +.. note:: -1. Fork the `xhydro` repo on GitHub. -2. Clone your fork locally:: + If you are new to using GitHub and `git`, please read `this guide `_ first. - $ git clone git@github.com:your_name_here/xhydro.git +.. warning:: -3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development:: + Anaconda Python users: Due to the complexity of some packages, the default dependency solver can take a long time to resolve the environment. Consider running the following commands in order to speed up the process:: - $ mkvirtualenv xhydro - $ cd xhydro/ - $ python setup.py develop + $ conda install -n base conda-libmamba-solver + $ conda config --set solver libmamba -4. Create a branch for local development:: + For more information, please see the following link: https://www.anaconda.com/blog/a-faster-conda-for-a-growing-community + + Alternatively, you can use the `mamba `_ package manager, which is a drop-in replacement for ``conda``. If you are already using `mamba`, replace the following commands with ``mamba`` instead of ``conda``. + +Ready to contribute? Here's how to set up ``xdatasets`` for local development. + +#. Fork the ``xdatasets`` repo on GitHub. +#. Clone your fork locally:: + + $ git clone git@github.com:your_name_here/xdatasets.git + +#. Install your local copy into a development environment. You can create a new Anaconda development environment with:: + + $ conda env create -f environment-dev.yml + $ conda activate xdatasets + $ flit install --symlink + + This installs ``xdatasets`` in an "editable" state, meaning that changes to the code are immediately seen by the environment. + +#. To ensure a consistent coding style, install the ``pre-commit`` hooks to your local clone:: + + $ pre-commit install + + On commit, ``pre-commit`` will check that ``black``, ``blackdoc``, ``isort``, ``flake8``, and ``ruff`` checks are passing, perform automatic fixes if possible, and warn of violations that require intervention. If your commit fails the checks initially, simply fix the errors, re-add the files, and re-commit. + + You can also run the hooks manually with:: + + $ pre-commit run -a + + If you want to skip the ``pre-commit`` hooks temporarily, you can pass the ``--no-verify`` flag to `$ git commit`. + +#. Create a branch for local development:: $ git checkout -b name-of-your-bugfix-or-feature - Now you can make your changes locally. + Now you can make your changes locally. -5. When you're done making changes, check that your changes pass flake8 and the - tests, including testing other Python versions with tox:: +#. When you're done making changes, we **strongly** suggest running the tests in your environment or with the help of ``tox``:: - $ flake8 xhydro tests - $ python setup.py test or pytest + $ python -m pytest + # Or, to run multiple build tests $ tox - To get flake8 and tox, just pip install them into your virtualenv. - -6. Commit your changes and push your branch to GitHub:: +#. Commit your changes and push your branch to GitHub:: $ git add . $ git commit -m "Your detailed description of your changes." $ git push origin name-of-your-bugfix-or-feature -7. Submit a pull request through the GitHub website. + If ``pre-commit`` hooks fail, try re-committing your changes (or, if need be, you can skip them with `$ git commit --no-verify`). + +#. Submit a `Pull Request `_ through the GitHub website. + +#. When pushing your changes to your branch on GitHub, the documentation will automatically be tested to reflect the changes in your Pull Request. This build process can take several minutes at times. If you are actively making changes that affect the documentation and wish to save time, you can compile and test your changes beforehand locally with:: + + # To generate the html and open it in your browser + $ make docs + # To only generate the html + $ make autodoc + $ make -C docs html + # To simply test that the docs pass build checks + $ tox -e docs + +#. Once your Pull Request has been accepted and merged to the ``main`` branch, several automated workflows will be triggered: + + - The ``bump-version.yml`` workflow will automatically bump the patch version when pull requests are pushed to the ``main`` branch on GitHub. **It is not recommended to manually bump the version in your branch when merging (non-release) pull requests (this will cause the version to be bumped twice).** + - `ReadTheDocs` will automatically build the documentation and publish it to the `latest` branch of `xdatasets` documentation website. + - If your branch is not a fork (ie: you are a maintainer), your branch will be automatically deleted. + + You will have contributed your first changes to ``xdatasets``! Pull Request Guidelines ----------------------- Before you submit a pull request, check that it meets these guidelines: -1. The pull request should include tests. -2. If the pull request adds functionality, the docs should be updated. Put - your new functionality into a function with a docstring, and add the - feature to the list in README.rst. -3. The pull request should work for Python 3.5, 3.6, 3.7 and 3.8, and for PyPy. Check - https://travis-ci.com/sebastienlanglois/xhydro/pull_requests - and make sure that the tests pass for all supported Python versions. +#. The pull request should include tests and should aim to provide `code coverage `_ for all new lines of code. You can use the ``--cov-report html --cov xdatasets`` flags during the call to ``pytest`` to generate an HTML report and analyse the current test coverage. + +#. If the pull request adds functionality, the docs should also be updated. Put your new functionality into a function with a docstring, and add the feature to the list in ``README.rst``. + +#. The pull request should work for Python 3.8, 3.9, 3.10, and 3.11. Check that the tests pass for all supported Python versions. Tips ---- To run a subset of tests:: -$ pytest tests.test_xhydro +$ pytest tests.test_xdatasets + +To run specific code style checks:: + + $ black --check xdatasets tests + $ isort --check xdatasets tests + $ blackdoc --check xdatasets docs + $ ruff xdatasets tests + $ flake8 xdatasets tests + +To get ``black``, ``isort ``blackdoc``, ``ruff``, and ``flake8`` (with plugins ``flake8-alphabetize`` and ``flake8-rst-docstrings``) simply install them with `pip` (or `conda`) into your environment. + +Versioning/Tagging +------------------ + +A reminder for the **maintainers** on how to deploy. This section is only relevant when producing a new point release for the package. + +.. warning:: + + It is important to be aware that any changes to files found within the ``xdatasets`` folder (with the exception of ``xdatasets/__init__.py``) will trigger the ``bump-version.yml`` workflow. Be careful not to commit changes to files in this folder when preparing a new release. + +#. Create a new branch from `main` (e.g. `release-0.2.0`). +#. Update the `CHANGES.rst` file to change the `Unreleased` section to the current date. +#. Bump the version in your branch to the next version (e.g. `v0.1.0 -> v0.2.0`):: + + .. code-block:: shell + + $ bump-my-version bump minor # In most cases, we will be releasing a minor version + $ git push +#. Create a pull request from your branch to `main`. +#. Once the pull request is merged, create a new release on GitHub. On the main branch, run: -Deploying + .. code-block:: shell + + $ git tag v0.2.0 + $ git push --tags + + This will trigger a GitHub workflow to build the package and upload it to TestPyPI. At the same time, the GitHub workflow will create a draft release on GitHub. Assuming that the workflow passes, the final release can then be published on GitHub by finalizing the draft release. + +#. Once the release is published, the `publish-pypi.yml` workflow will go into an `awaiting approval` mode on Github Actions. Only authorized users may approve this workflow (notifications will be sent) to trigger the upload to PyPI. + +.. warning:: + + Uploads to PyPI can **never** be overwritten. If you make a mistake, you will need to bump the version and re-release the package. If the package uploaded to PyPI is broken, you should modify the GitHub release to mark the package as broken, as well as yank the package (mark the version "broken") on PyPI. + +Packaging --------- -A reminder for the maintainers on how to deploy. -Make sure all your changes are committed (including an entry in HISTORY.rst). -Then run:: +When a new version has been minted (features have been successfully integrated test coverage and stability is adequate), maintainers should update the pip-installable package (wheel and source release) on PyPI as well as the binary on conda-forge. + +The simple approach +~~~~~~~~~~~~~~~~~~~ + +The simplest approach to packaging for general support (pip wheels) requires that ``flit`` be installed:: + + $ python -m pip install flit + +From the command line on your Linux distribution, simply run the following from the clone's main dev branch:: + + # To build the packages (sources and wheel) + $ python -m flit build + + # To upload to PyPI + $ python -m flit publish dist/* + +The new version based off of the version checked out will now be available via `pip` (`$ pip install xdatasets`). + +Releasing on conda-forge +~~~~~~~~~~~~~~~~~~~~~~~~ + +Initial Release +^^^^^^^^^^^^^^^ + +Before preparing an initial release on conda-forge, we *strongly* suggest consulting the following links: + * https://conda-forge.org/docs/maintainer/adding_pkgs.html + * https://github.com/conda-forge/staged-recipes + +In order to create a new conda build recipe, to be used when proposing packages to the conda-forge repository, we strongly suggest using the ``grayskull`` tool:: + + $ python -m pip install grayskull + $ grayskull pypi xdatasets + +For more information on ``grayskull``, please see the following link: https://github.com/conda/grayskull + +Before updating the main conda-forge recipe, we echo the conda-forge documentation and *strongly* suggest performing the following checks: + * Ensure that dependencies and dependency versions correspond with those of the tagged version, with open or pinned versions for the `host` requirements. + * If possible, configure tests within the conda-forge build CI (e.g. `imports: xdatasets`, `commands: pytest xdatasets`). + +Subsequent releases +^^^^^^^^^^^^^^^^^^^ + +If the conda-forge feedstock recipe is built from PyPI, then when a new release is published on PyPI, `regro-cf-autotick-bot` will open Pull Requests automatically on the conda-forge feedstock. It is up to the conda-forge feedstock maintainers to verify that the package is building properly before merging the Pull Request to the main branch. + +Building sources for wide support with `manylinux` image +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. warning:: + This section is for building source files that link to or provide links to C/C++ dependencies. + It is not necessary to perform the following when building pure Python packages. + +In order to do ensure best compatibility across architectures, we suggest building wheels using the `PyPA`'s `manylinux` +docker images (at time of writing, we endorse using `manylinux_2_24_x86_64`). + +With `docker` installed and running, begin by pulling the image:: + + $ sudo docker pull quay.io/pypa/manylinux_2_24_x86_64 + +From the xdatasets source folder we can enter into the docker container, providing access to the `xdatasets` source files by linking them to the running image:: + + $ sudo docker run --rm -ti -v $(pwd):/xdatasets -w /xdatasets quay.io/pypa/manylinux_2_24_x86_64 bash + +Finally, to build the wheel, we run it against the provided Python3.9 binary:: + + $ /opt/python/cp39-cp39m/bin/python -m build --sdist --wheel -$ bump2version patch # possible: major / minor / patch -$ git push -$ git push --tags +This will then place two files in `xdatasets/dist/` ("xdatasets-1.2.3-py3-none-any.whl" and "xdatasets-1.2.3.tar.gz"). +We can now leave our docker container (`$ exit`) and continue with uploading the files to PyPI:: -Travis will then deploy to PyPI if tests pass. + $ twine upload dist/* diff --git a/HISTORY.rst b/HISTORY.rst deleted file mode 100644 index 346225f..0000000 --- a/HISTORY.rst +++ /dev/null @@ -1,8 +0,0 @@ -======= -History -======= - -0.1.2-alpha (2023-01-13) ------------------- - -* First release on PyPI. diff --git a/LICENSE b/LICENSE index cf11d07..06b47cb 100644 --- a/LICENSE +++ b/LICENSE @@ -19,4 +19,3 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 965b2dd..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,11 +0,0 @@ -include AUTHORS.rst -include CONTRIBUTING.rst -include HISTORY.rst -include LICENSE -include README.rst - -recursive-include tests * -recursive-exclude * __pycache__ -recursive-exclude * *.py[co] - -recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif diff --git a/Makefile b/Makefile index c4a344d..86a806c 100644 --- a/Makefile +++ b/Makefile @@ -21,6 +21,10 @@ for line in sys.stdin: endef export PRINT_HELP_PYSCRIPT +ifdef READTHEDOCS + export CI=true +endif + BROWSER := python -c "$$BROWSER_PYSCRIPT" help: @@ -35,6 +39,11 @@ clean-build: ## remove build artifacts find . -name '*.egg-info' -exec rm -fr {} + find . -name '*.egg' -exec rm -f {} + +clean-docs: ## remove docs artifacts + rm -f docs/apidoc/xdatasets*.rst + rm -f docs/apidoc/modules.rst + $(MAKE) -C docs clean + clean-pyc: ## remove Python file artifacts find . -name '*.pyc' -exec rm -f {} + find . -name '*.pyo' -exec rm -f {} + @@ -48,14 +57,18 @@ clean-test: ## remove test and coverage artifacts rm -fr .pytest_cache lint/flake8: ## check style with flake8 - flake8 xhydro tests + ruff xdatasets tests + flake8 --config=.flake8 xdatasets tests + lint/black: ## check style with black black --check xdatasets tests + blackdoc --check xdatasets docs + isort --check xdatasets tests lint: lint/flake8 lint/black ## check style test: ## run tests quickly with the default Python - pytest + python -m pytest test-all: ## run tests on every Python version with tox tox @@ -66,24 +79,30 @@ coverage: ## check code coverage quickly with the default Python coverage html $(BROWSER) htmlcov/index.html -docs: ## generate Sphinx HTML documentation, including API docs - rm -f docs/xdatasets.rst - rm -f docs/modules.rst - sphinx-apidoc -o docs/ xdatasets - $(MAKE) -C docs clean +autodoc: clean-docs ## create sphinx-apidoc files: + sphinx-apidoc -o docs/apidoc --private --module-first xdatasets + +linkcheck: autodoc ## run checks over all external links found throughout the documentation + $(MAKE) -C docs linkcheck + +docs: autodoc ## generate Sphinx HTML documentation, including API docs $(MAKE) -C docs html +ifndef CI $(BROWSER) docs/_build/html/index.html +endif servedocs: docs ## compile the docs watching for changes watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D . -release: dist ## package and upload a release - twine upload dist/* - dist: clean ## builds source and wheel package - python setup.py sdist - python setup.py bdist_wheel + python -m flit build ls -l dist +release: dist ## package and upload a release + python -m flit publish dist/* + install: clean ## install the package to the active Python's site-packages - python setup.py install + python -m flit install + +dev: clean ## install the package to the active Python's site-packages + python -m flit install --symlink diff --git a/README.rst b/README.rst index 69f8132..b5aa735 100644 --- a/README.rst +++ b/README.rst @@ -1,25 +1,28 @@ -====== +========= Xdatasets -====== - +========= .. image:: https://img.shields.io/pypi/v/xdatasets.svg :target: https://pypi.python.org/pypi/xdatasets + :alt: PyPI +.. image:: https://github.com/hydrologie/xdatasets/actions/workflows/main.yml/badge.svg + :target: https://github.com/hydrologie/xdatasets/actions + :alt: Build Status -.. image:: https://pyup.io/repos/github/sebastienlanglois/xdatasets/shield.svg - :target: https://pyup.io/repos/github/sebastienlanglois/xdatasets/ - :alt: Updates - +.. image:: https://readthedocs.org/projects/xdatasets/badge/?version=latest + :target: https://xdatasets.readthedocs.io/en/latest/?version=latest + :alt: Documentation Status +.. image:: https://img.shields.io/github/license/hydrologie/xdatasets.svg + :target: https://github.com/hydrologie/xdatasets/blob/master/LICENSE + :alt: License -Xdatasets. - +Easy access to Earth observation datasets with xarray. * Free software: MIT license * Documentation: https://xdatasets.github.io/xdatasets - Features -------- @@ -28,7 +31,7 @@ Features Credits ------- -This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template. +This package was created with Cookiecutter_ and the `Ouranosinc/cookiecutter-pypackage`_ project template. -.. _Cookiecutter: https://github.com/audreyr/cookiecutter -.. _`audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage +.. _Cookiecutter: https://github.com/cookiecutter/cookiecutter +.. _`Ouranosinc/cookiecutter-pypackage`: https://github.com/Ouranosinc/cookiecutter-pypackage diff --git a/doc/history.rst b/doc/history.rst deleted file mode 100644 index 2506499..0000000 --- a/doc/history.rst +++ /dev/null @@ -1 +0,0 @@ -.. include:: ../HISTORY.rst diff --git a/doc/notebooks/getting_started.ipynb b/doc/notebooks/getting_started.ipynb deleted file mode 100644 index 19d7282..0000000 --- a/doc/notebooks/getting_started.ipynb +++ /dev/null @@ -1,7400 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Getting started" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The `xdatasets` library enables users to effortlessly access a vast collection of earth observation datasets that are compatible with `xarray` formats.\n", - "\n", - "The library adopts an opinionated approach to data querying and caters to the specific needs of certain user groups, such as hydrologists, climate scientists, and engineers. One of the functionalities of `xdatasets` is the ability to extract data at a specific location or within a designated region, such as a watershed or municipality, while also enabling spatial and temporal operations." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To use `xdatasets`, users must employ a query. For instance, a straightforward query to extract the variables `t2m` (*2m temperature*) and `tp` (*Total precipitation*) from the `era5_reanalysis_single_levels` dataset at two geographical positions (Montreal and Toronto) could be as follows:" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```python\n", - "query = {\n", - " \"datasets\": {\"era5_reanalysis_single_levels_dev\": {'variables': [\"t2m\", \"tp\"]}},\n", - " \"space\": {\n", - " \"clip\": \"point\", # bbox, point or polygon\n", - " \"geometry\": {'Montreal' : (45.508888, -73.561668),\n", - " 'Toronto' : (43.651070, -79.347015)\n", - " }\n", - " }\n", - "}\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "An example of a more complex query would look like the one below. \n", - "\n", - ".. note::\n", - " Don't worry! Below, you'll find additional examples that will assist in understanding each parameter in the query, as well as the possible combinations.\n", - "\n", - "This query calls the same variables as above. However, instead of specifying geographical positions, a GeoPandas.DataFrame is used to provide features (such as shapefiles or geojson) for extracting data within each of them. Each polygon is identified using the unique identifier `Station`, and a spatial average is computed within each one `(aggregation: True)`. The dataset, initially at an hourly time step, is converted into a daily time step while applying one or more temporal aggregations for each variable as prescribed in the query. `xdatasets` ultimately returns the dataset for the specified date range and time zone." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "```python\n", - "query = {\n", - " \"datasets\": {\"era5_reanalysis_single_levels_dev\": {'variables': [\"t2m\", \"tp\"]}},\n", - " \"space\": {\n", - " \"clip\": \"polygon\", # bbox, point or polygon\n", - " \"aggregation\": True, # spatial average of the variables within each polygon\n", - " \"geometry\": gdf,\n", - " \"unique_id\": \"Station\" # unique column name in geodataframe\n", - " },\n", - " \"time\": {\n", - " \"timestep\": \"D\",\n", - " \"aggregation\": {\"tp\": np.nansum, \n", - " \"t2m\": [np.nanmax, np.nanmin]},\n", - " \n", - " \"start\": '2000-01-01',\n", - " \"end\": '2020-05-31',\n", - " \"timezone\": 'America/Montreal',\n", - " },\n", - "}\n", - "```\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Query climate datasets" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In order to use `xdatasets`, you must import at least `xdatasets`, `pandas`, `geopandas`, and `numpy`. Additionally, we import `pathlib` to interact with files." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "os.environ['USE_PYGEOS'] = '0'\n", - "\n", - "import xdatasets as xd\n", - "import geopandas as gpd\n", - "import pandas as pd\n", - "import numpy as np\n", - "\n", - "# Visualization\n", - "import hvplot.xarray\n", - "import panel as pn\n", - "\n", - "from pathlib import Path" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clip by points (sites)\n", - "\n", - "\n", - "To begin with, we need to create a dictionary of sites and their corresponding geographical coordinates." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "sites = {\n", - " 'Montreal' : (45.508888, -73.561668),\n", - " 'New York': (40.730610, -73.935242),\n", - " 'Miami': (25.761681, -80.191788)\n", - "}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We will then extract the `tp` (*total precipitation*) and `t2m` (*2m temperature*) from the `era5_reanalysis_single_levels` dataset for the designated sites. Afterward, we will convert the time step to daily and adjust the timezone to Eastern Time. Finally, we will limit the temporal interval." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Before proceeding with this first query, let's quickly outline the role of each parameter:\n", - "\n", - "- **datasets**: A dictionary where datasets serve as keys and desired variables as values.\n", - "- **space**: A dictionary that defines the necessary spatial operations to apply on user-supplied geographic features.\n", - "- **time**: A dictionary that defines the necessary temporal operations to apply on the datasets\n", - "\n", - "For more information on each parameter, consult the API documentation.\n", - "\n", - "This is what the requested query looks like :" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Temporal operations: processing tp with era5_reanalysis_single_levels: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 2/2 [00:07<00:00, 3.96s/it]\n" - ] - } - ], - "source": [ - "query = {\n", - " \"datasets\": 'era5_reanalysis_single_levels',\n", - " \n", - " \"space\": {\n", - " \"clip\": \"point\", # bbox, point or polygon\n", - " \"geometry\": sites\n", - " },\n", - " \"time\": { \n", - " \"timestep\": \"D\", # http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases\n", - " \"aggregation\": {\"tp\": np.nansum,\n", - " \"t2m\": np.nanmean},\n", - " \"start\": '1959-01-01',\n", - " \"timezone\": 'America/Montreal',\n", - " },\n", - "}\n", - "xds = xd.Query(**query)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "By accessing the `data` attribute, you can view the data obtained from the query. It's worth noting that the variable name `tp` has been updated to `tp_nansum` to reflect the reduction operation (`np.nansum`) that was utilized to convert the time step from hourly to daily. Likewise, `t2m` was updated to `t2m_nanmean`. " - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:      (time_agg: 2, spatial_agg: 1, timestep: 1, site: 3,\n",
-       "                  time: 23491, source: 1)\n",
-       "Coordinates:\n",
-       "  * time_agg     (time_agg) object 'nanmean' 'nansum'\n",
-       "  * spatial_agg  (spatial_agg) object 'point'\n",
-       "  * timestep     (timestep) object 'D'\n",
-       "  * site         (site) <U8 'Montreal' 'New York' 'Miami'\n",
-       "  * time         (time) datetime64[ns] 1959-01-01 1959-01-02 ... 2023-04-25\n",
-       "    latitude     (site) float64 45.5 40.75 25.75\n",
-       "    longitude    (site) float64 -73.5 -74.0 -80.25\n",
-       "  * source       (source) <U29 'era5_reanalysis_single_levels'\n",
-       "Data variables:\n",
-       "    t2m          (time_agg, spatial_agg, timestep, time, site, source) float32 ...\n",
-       "    tp           (time_agg, spatial_agg, timestep, time, site, source) float32 ...\n",
-       "Attributes: (12/31)\n",
-       "    GRIB_NV:                                  0\n",
-       "    GRIB_Nx:                                  1440\n",
-       "    GRIB_Ny:                                  721\n",
-       "    GRIB_cfName:                              unknown\n",
-       "    GRIB_cfVarName:                           t2m\n",
-       "    GRIB_dataType:                            an\n",
-       "    ...                                       ...\n",
-       "    GRIB_typeOfLevel:                         surface\n",
-       "    GRIB_units:                               K\n",
-       "    coordinates:                              number time step surface latitu...\n",
-       "    long_name:                                2 metre temperature\n",
-       "    standard_name:                            unknown\n",
-       "    units:                                    K
" - ], - "text/plain": [ - "\n", - "Dimensions: (time_agg: 2, spatial_agg: 1, timestep: 1, site: 3,\n", - " time: 23491, source: 1)\n", - "Coordinates:\n", - " * time_agg (time_agg) object 'nanmean' 'nansum'\n", - " * spatial_agg (spatial_agg) object 'point'\n", - " * timestep (timestep) object 'D'\n", - " * site (site) \n", - "
\n", - "\n", - "" - ], - "text/plain": [ - "HoloViews(NdOverlay, height=450, sizing_mode='fixed', widget_location='bottom', width=750)" - ] - }, - "execution_count": 19, - "metadata": { - "application/vnd.holoviews_exec.v0+json": { - "id": "5897" - } - }, - "output_type": "execute_result" - } - ], - "source": [ - "title = f\"Comparison of total precipitation across three cities in North America from \\\n", - "{xds.data.time.dt.year.min().values} to {xds.data.time.dt.year.max().values}\"\n", - "\n", - "xds.data \\\n", - ".sel(\n", - " timestep='D', \n", - " spatial_agg='point', \n", - " time_agg='nansum',\n", - " source='era5_reanalysis_single_levels') \\\n", - ".hvplot(\n", - " title=title,\n", - " x=\"time\",\n", - " y=\"tp\",\n", - " grid=True,\n", - " width=750,\n", - " height=450,\n", - " by=\"site\",\n", - " legend=\"top\",\n", - " widget_location=\"bottom\")" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": {}, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": {}, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.holoviews_exec.v0+json": "", - "text/html": [ - "
\n", - "
\n", - "
\n", - "" - ], - "text/plain": [ - "HoloViews(NdOverlay, height=450, sizing_mode='fixed', widget_location='bottom', width=750)" - ] - }, - "execution_count": 20, - "metadata": { - "application/vnd.holoviews_exec.v0+json": { - "id": "6289" - } - }, - "output_type": "execute_result" - } - ], - "source": [ - "title = f\"Comparison of 2m temperature across three cities in North America from \\\n", - "{xds.data.time.dt.year.min().values} to {xds.data.time.dt.year.max().values}\"\n", - "\n", - "xds.data \\\n", - ".sel(\n", - " timestep='D', \n", - " spatial_agg='point', \n", - " time_agg='nanmean',\n", - " source='era5_reanalysis_single_levels') \\\n", - ".hvplot(\n", - " title=title,\n", - " x=\"time\",\n", - " y=\"t2m\",\n", - " grid=True,\n", - " width=750,\n", - " height=450,\n", - " by=\"site\",\n", - " legend=\"top\",\n", - " widget_location=\"bottom\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clip on polygons with no averaging in space\n", - "\n", - "Let's first access certain polygon features, which can be in the form of shapefiles, geojson, or any other format compatible with `geopandas`. In this example, we are using `JSON` (geojson) files." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "bucket = Path('https://s3.us-east-2.wasabisys.com/watersheds-polygons/MELCC/json')\n", - "\n", - "paths = [bucket.joinpath('023003/023003.json'),\n", - " bucket.joinpath('031101/031101.json'),\n", - " bucket.joinpath('040111/040111.json')]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Subsequently, all of the files can be opened and consolidated into a single `geopandas.GeoDataFrame` object." - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
StationSuperficiegeometry
0023003208.4591919813271POLYGON ((-70.82601 46.81658, -70.82728 46.815...
1031101111.7131058782722POLYGON ((-73.98519 45.21072, -73.98795 45.209...
2040111433.440893903503POLYGON ((-74.06645 46.02253, -74.06647 46.022...
\n", - "
" - ], - "text/plain": [ - " Station Superficie \\\n", - "0 023003 208.4591919813271 \n", - "1 031101 111.7131058782722 \n", - "2 040111 433.440893903503 \n", - "\n", - " geometry \n", - "0 POLYGON ((-70.82601 46.81658, -70.82728 46.815... \n", - "1 POLYGON ((-73.98519 45.21072, -73.98795 45.209... \n", - "2 POLYGON ((-74.06645 46.02253, -74.06647 46.022... " - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gdf = pd.concat([gpd.read_file(path) for path in paths]).reset_index(drop=True)\n", - "gdf" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's examine the geographic locations of the polygon features." - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "data": {}, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.holoviews_exec.v0+json": "", - "text/html": [ - "
\n", - "
\n", - "
\n", - "" - ], - "text/plain": [ - ":Overlay\n", - " .Tiles.I :Tiles [x,y]\n", - " .Polygons.I :Polygons [Longitude,Latitude] (Station,Superficie)" - ] - }, - "execution_count": 23, - "metadata": { - "application/vnd.holoviews_exec.v0+json": { - "id": "6681" - } - }, - "output_type": "execute_result" - } - ], - "source": [ - "gdf.hvplot(geo=True, \n", - " tiles='ESRI',\n", - " color='Station',\n", - " alpha=0.8, \n", - " width=750,\n", - " height=450,\n", - " legend='top',\n", - " hover_cols=['Station','Superficie'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The following query seeks the variables `t2m` and `tp` from the `era5_reanalysis_single_levels` dataset, covering the period between January 1, 1959, and September 30, 1961, for the three polygons mentioned earlier. It is important to note that as `aggregation` is set to `False`, no spatial averaging will be conducted, and a mask (raster) will be returned for each polygon." - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Spatial operations: processing polygon 040111 with era5_reanalysis_single_levels: : 3it [00:00, 5.53it/s]\n" - ] - } - ], - "source": [ - "query = {\n", - " \"datasets\": {\"era5_reanalysis_single_levels\": {'variables': [\"t2m\", \"tp\"]}},\n", - " \"space\": {\n", - " \"clip\": \"polygon\", # bbox, point or polygon\n", - " \"averaging\": False, # spatial average of the variables within each polygon\n", - " \"geometry\": gdf,\n", - " \"unique_id\": \"Station\" # unique column name in geodataframe\n", - " },\n", - " \"time\": {\n", - " \"start\": '1959-01-01',\n", - " \"end\": '1963-08-31',\n", - " },\n", - "}\n", - "\n", - "xds = xd.Query(**query)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "By accessing the `data` attribute, you can view the data obtained from the query. For each variable, the dimensions of `time`, `latitude`, `longitude`, and `Station` (the unique ID) are included. In addition, there is another variable called `weights` that is returned. This variable specifies the weight that should be assigned to each pixel if spatial averaging is conducted over a mask (polygon)." - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:    (latitude: 6, longitude: 5, time: 40896, Station: 3, source: 1)\n",
-       "Coordinates:\n",
-       "  * latitude   (latitude) float64 45.0 45.25 46.0 46.25 46.5 46.75\n",
-       "  * longitude  (longitude) float64 -74.5 -74.25 -74.0 -71.0 -70.75\n",
-       "  * time       (time) datetime64[ns] 1959-01-01 ... 1963-08-31T23:00:00\n",
-       "  * Station    (Station) object '023003' '031101' '040111'\n",
-       "  * source     (source) <U29 'era5_reanalysis_single_levels'\n",
-       "Data variables:\n",
-       "    t2m        (Station, time, latitude, longitude, source) float32 nan ... nan\n",
-       "    tp         (Station, time, latitude, longitude, source) float32 nan ... nan\n",
-       "    weights    (Station, latitude, longitude, source) float64 nan nan ... nan\n",
-       "Attributes:\n",
-       "    Conventions:               CF-1.6\n",
-       "    history:                   2022-11-10 02:03:41 GMT by grib_to_netcdf-2.25...\n",
-       "    pangeo-forge:inputs_hash:  9423abd3198a0f0de3aa8368c73629d26c8207570bf035...\n",
-       "    pangeo-forge:recipe_hash:  4b4eead2724b1cf53d6ddbf112d17dbbc49ecc83610af6...\n",
-       "    pangeo-forge:version:      0.9.4
" - ], - "text/plain": [ - "\n", - "Dimensions: (latitude: 6, longitude: 5, time: 40896, Station: 3, source: 1)\n", - "Coordinates:\n", - " * latitude (latitude) float64 45.0 45.25 46.0 46.25 46.5 46.75\n", - " * longitude (longitude) float64 -74.5 -74.25 -74.0 -71.0 -70.75\n", - " * time (time) datetime64[ns] 1959-01-01 ... 1963-08-31T23:00:00\n", - " * Station (Station) object '023003' '031101' '040111'\n", - " * source (source) \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:    (time: 40896, latitude: 2, longitude: 2)\n",
-       "Coordinates:\n",
-       "  * latitude   (latitude) float64 46.5 46.75\n",
-       "  * longitude  (longitude) float64 -71.0 -70.75\n",
-       "  * time       (time) datetime64[ns] 1959-01-01 ... 1963-08-31T23:00:00\n",
-       "    Station    <U6 '023003'\n",
-       "    source     <U29 'era5_reanalysis_single_levels'\n",
-       "Data variables:\n",
-       "    t2m        (time, latitude, longitude) float32 254.7 254.8 ... 287.6 287.5\n",
-       "    tp         (time, latitude, longitude) float32 nan nan ... 8.583e-06\n",
-       "    weights    (latitude, longitude) float64 1.38e-05 0.0001535 0.9079 0.09194\n",
-       "Attributes:\n",
-       "    Conventions:               CF-1.6\n",
-       "    history:                   2022-11-10 02:03:41 GMT by grib_to_netcdf-2.25...\n",
-       "    pangeo-forge:inputs_hash:  9423abd3198a0f0de3aa8368c73629d26c8207570bf035...\n",
-       "    pangeo-forge:recipe_hash:  4b4eead2724b1cf53d6ddbf112d17dbbc49ecc83610af6...\n",
-       "    pangeo-forge:version:      0.9.4
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 40896, latitude: 2, longitude: 2)\n", - "Coordinates:\n", - " * latitude (latitude) float64 46.5 46.75\n", - " * longitude (longitude) float64 -71.0 -70.75\n", - " * time (time) datetime64[ns] 1959-01-01 ... 1963-08-31T23:00:00\n", - " Station \n", - "
\n", - "\n", - "" - ], - "text/plain": [ - ":Layout\n", - " .Overlay.I :Overlay\n", - " .Tiles.I :Tiles [x,y]\n", - " .Image.I :Image [longitude,latitude] (t2m)\n", - " .Polygons.I :Polygons [Longitude,Latitude] (Station,Superficie)\n", - " .Overlay.II :Overlay\n", - " .Tiles.I :Tiles [x,y]\n", - " .Image.I :Image [longitude,latitude] (weights)\n", - " .Polygons.I :Polygons [Longitude,Latitude] (Station,Superficie)" - ] - }, - "execution_count": 27, - "metadata": { - "application/vnd.holoviews_exec.v0+json": { - "id": "6818" - } - }, - "output_type": "execute_result" - } - ], - "source": [ - "(\n", - " (\n", - " ds_clipped.t2m.isel(time=0).hvplot(\n", - " title=\"The 2m temperature for pixels that intersect with the polygon on January 1, 1959\",\n", - " tiles=\"ESRI\",\n", - " geo=True,\n", - " alpha=0.6,\n", - " colormap=\"isolum\",\n", - " width=750,\n", - " height=450,\n", - " )\n", - " * gdf[gdf.Station == station].hvplot(\n", - " geo=True,\n", - " width=750,\n", - " height=450,\n", - " legend=\"top\",\n", - " hover_cols=[\"Station\", \"Superficie\"],\n", - " )\n", - " )\n", - " + ds_clipped.weights.hvplot(\n", - " title=\"The weights that should be assigned to each pixel when performing spatial averaging\",\n", - " tiles=\"ESRI\",\n", - " alpha=0.6,\n", - " colormap=\"isolum\",\n", - " geo=True,\n", - " width=750,\n", - " height=450,\n", - " )\n", - " * gdf[gdf.Station == station].hvplot(\n", - " geo=True,\n", - " width=750,\n", - " height=450,\n", - " legend=\"top\",\n", - " hover_cols=[\"Station\", \"Superficie\"],\n", - " )\n", - ").cols(1)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The two plots depicted above show the 2m temperature for each pixel that intersects with the polygon from Station `023003` and the corresponding weights to be applied to each pixel. In the lower plot, it is apparent that the majority of the polygon is situated in the upper-left pixel, which results in that pixel having a weight of approximately 91%. It is evident that the two lower pixels have very minimal intersection with the polygon, which results in their respective weights being nearly zero (hover on the plot to verify the weights).\n", - "\n", - "In various libraries, either all pixels that intersect with the geometries are kept, or only pixels with centers within the polygon are retained. However, as shown in the previous example, utilizing such methods can introduce significant biases in the final outcome. Indeed, keeping all four pixels intersecting with the polygon with equal weights when the temperature values in the lower pixels are roughly 2 degrees lower than that of the upper-left pixel would introduce significant biases. Therefore, utilizing weights is a more precise approach." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Clip on polygons with averaging in space\n", - "\n", - "The following query seeks the variables `t2m` and `tp` from the `era5_reanalysis_single_levels` and `era5_land_reanalysis` datasets, covering the period between January 1, 1950, to present, for the three polygons mentioned earlier. Note that when the `aggregation` parameter is set to `True`, spatial averaging takes place. In addition, the weighted mask (raster) described earlier will be applied to generate a time series for each polygon.\n", - "\n", - "Additional steps are carried out in the process, including converting the original hourly time step to a daily time step. During this conversion, various temporal aggregations will be applied to each variable and a conversion to the local time zone will take place.\n", - "\n", - ".. note::\n", - " If users prefer to pass multiple dictionaries instead of a single large one, the following format is also considered acceptable." - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/slanglois/mambaforge/envs/xdatasets/lib/python3.9/site-packages/xdatasets/workflows.py:62: UserWarning: \"start_date\" not found within input date time range. Defaulting to minimum time step in xarray object.\n", - " ds = subset_time(ds, start_date=start_time, end_date=end_time)\n", - "Spatial operations: processing polygon 040111 with era5_reanalysis_single_levels: : 3it [00:00, 5.03it/s]\n", - "Temporal operations: processing tp with era5_reanalysis_single_levels: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 2/2 [00:10<00:00, 5.13s/it]\n", - "Spatial operations: processing polygon 040111 with era5_land_reanalysis_dev: : 3it [00:00, 5.01it/s]\n", - "Temporal operations: processing tp with era5_land_reanalysis_dev: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 2/2 [00:06<00:00, 3.31s/it]\n" - ] - } - ], - "source": [ - "datasets = {\n", - " \"era5_reanalysis_single_levels\": {'variables': [\"t2m\", \"tp\"]},\n", - " \"era5_land_reanalysis_dev\": {'variables': [\"t2m\", \"tp\"]}\n", - "}\n", - "space = {\n", - " \"clip\": \"polygon\", # bbox, point or polygon\n", - " \"averaging\": True,\n", - " \"geometry\": gdf, # 3 polygons\n", - " \"unique_id\": \"Station\"\n", - "}\n", - "time = {\n", - " \"timestep\": \"D\",\n", - " \"aggregation\": {\"tp\": [np.nansum], \n", - " \"t2m\": [np.nanmax, np.nanmin]},\n", - "\n", - " \"start\": '1950-01-01',\n", - " \"timezone\": 'America/Montreal',\n", - "}\n", - "\n", - "xds = xd.Query(datasets=datasets,\n", - " space=space,\n", - " time=time)" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:      (time_agg: 3, spatial_agg: 1, timestep: 1, Station: 3,\n",
-       "                  time: 26778, source: 2)\n",
-       "Coordinates:\n",
-       "  * time_agg     (time_agg) object 'nanmax' 'nanmin' 'nansum'\n",
-       "  * spatial_agg  (spatial_agg) object 'polygon'\n",
-       "  * timestep     (timestep) object 'D'\n",
-       "  * Station      (Station) object '023003' '031101' '040111'\n",
-       "  * time         (time) datetime64[ns] 1950-01-01 1950-01-02 ... 2023-04-25\n",
-       "  * source       (source) <U29 'era5_land_reanalysis_dev' 'era5_reanalysis_si...\n",
-       "Data variables:\n",
-       "    t2m          (time_agg, spatial_agg, timestep, Station, time, source) float64 ...\n",
-       "    tp           (time_agg, spatial_agg, timestep, Station, time, source) float64 ...
" - ], - "text/plain": [ - "\n", - "Dimensions: (time_agg: 3, spatial_agg: 1, timestep: 1, Station: 3,\n", - " time: 26778, source: 2)\n", - "Coordinates:\n", - " * time_agg (time_agg) object 'nanmax' 'nanmin' 'nansum'\n", - " * spatial_agg (spatial_agg) object 'polygon'\n", - " * timestep (timestep) object 'D'\n", - " * Station (Station) object '023003' '031101' '040111'\n", - " * time (time) datetime64[ns] 1950-01-01 1950-01-02 ... 2023-04-25\n", - " * source (source) \n", - "
\n", - "\n", - "" - ], - "text/plain": [ - "Column\n", - " [0] HoloViews(DynamicMap, height=400, sizing_mode='fixed', widget_location='bottom', width=750)\n", - " [1] Row\n", - " [0] HSpacer()\n", - " [1] WidgetBox\n", - " [0] Select(margin=(20, 20, 5, 20), name='source', options=['era5_land_reanalysis_dev...], value='era5_land_reanalysis_dev'..., width=250)\n", - " [1] Select(margin=(5, 20, 20, 20), name='Station', options=['023003', '031101', ...], value='023003', width=250)\n", - " [2] HSpacer()" - ] - }, - "execution_count": 31, - "metadata": { - "application/vnd.holoviews_exec.v0+json": { - "id": "7200" - } - }, - "output_type": "execute_result" - } - ], - "source": [ - "xds.data.squeeze().t2m.hvplot(x='time',\n", - " by='time_agg',\n", - " groupby=['source','Station'],\n", - " width=750,\n", - " height=400,\n", - " grid=True,\n", - " widget_location='bottom')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The resulting dataset can be explored in the data attribute :" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "data": {}, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": {}, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.holoviews_exec.v0+json": "", - "text/html": [ - "
\n", - "
\n", - "
\n", - "" - ], - "text/plain": [ - "Column\n", - " [0] HoloViews(DynamicMap, height=400, sizing_mode='fixed', widget_location='bottom', width=750)\n", - " [1] Row\n", - " [0] HSpacer()\n", - " [1] WidgetBox\n", - " [0] Select(margin=(20, 20, 5, 20), name='source', options=['era5_land_reanalysis_dev...], value='era5_land_reanalysis_dev'..., width=250)\n", - " [1] Select(margin=(5, 20, 20, 20), name='Station', options=['023003', '031101', ...], value='023003', width=250)\n", - " [2] HSpacer()" - ] - }, - "execution_count": 32, - "metadata": { - "application/vnd.holoviews_exec.v0+json": { - "id": "7612" - } - }, - "output_type": "execute_result" - } - ], - "source": [ - "xds.data.squeeze().tp \\\n", - ".sel(time_agg='nansum') \\\n", - ".hvplot(\n", - " x='time',\n", - " groupby=['source','Station'],\n", - " width=750,\n", - " height=400,\n", - " color='blue',\n", - " grid=True,\n", - " widget_location='bottom')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Bounding box (bbox) around polygons\n", - "\n", - "The following query seeks the variable `tp` from the `era5_land_reanalysis_dev` dataset, covering the period between January 1, 1959, and December 31, 1970, for the bounding box that delimits the three polygons mentioned earlier.\n", - "\n", - "Additional steps are carried out in the process, including converting to the local time zone." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "query = {\n", - " \"datasets\": {\"era5_land_reanalysis_dev\": {'variables': [\"tp\"]}},\n", - " \"space\": {\n", - " \"clip\": \"bbox\", # bbox, point or polygon\n", - " \"geometry\": gdf,\n", - " },\n", - " \"time\": {\n", - " \"start\": '1959-01-01',\n", - " \"end\": '1970-12-31',\n", - " \"timezone\": 'America/Montreal',\n", - " },\n", - "}\n", - "\n", - "\n", - "xds = xd.Query(**query)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:    (time: 105192, latitude: 30, longitude: 44, source: 1)\n",
-       "Coordinates:\n",
-       "  * latitude   (latitude) float64 47.8 47.7 47.6 47.5 ... 45.2 45.1 45.0 44.9\n",
-       "  * longitude  (longitude) float64 -74.9 -74.8 -74.7 -74.6 ... -70.8 -70.7 -70.6\n",
-       "  * time       (time) datetime64[ns] 1959-01-01 ... 1970-12-31T23:00:00\n",
-       "  * source     (source) <U24 'era5_land_reanalysis_dev'\n",
-       "Data variables:\n",
-       "    tp         (time, latitude, longitude, source) float32 0.0 0.0 ... 0.0 0.0\n",
-       "Attributes:\n",
-       "    pangeo-forge:inputs_hash:  b93e4b21fdf31524195d82f9bf70f0ac21b55a176b9e38...\n",
-       "    pangeo-forge:recipe_hash:  c52319824bbad18fd08f4b9c0963981d8d10d9f7ebf237...\n",
-       "    pangeo-forge:version:      0.9.4\n",
-       "    timezone:                  America/Montreal
" - ], - "text/plain": [ - "\n", - "Dimensions: (time: 105192, latitude: 30, longitude: 44, source: 1)\n", - "Coordinates:\n", - " * latitude (latitude) float64 47.8 47.7 47.6 47.5 ... 45.2 45.1 45.0 44.9\n", - " * longitude (longitude) float64 -74.9 -74.8 -74.7 -74.6 ... -70.8 -70.7 -70.6\n", - " * time (time) datetime64[ns] 1959-01-01 ... 1970-12-31T23:00:00\n", - " * source (source) \n", - "
\n", - "\n", - "" - ], - "text/plain": [ - "Column\n", - " [0] HoloViews(HoloMap, height=450, sizing_mode='fixed', widget_location='bottom', widget_type='scrubber', width=750)\n", - " [1] Row\n", - " [0] HSpacer()\n", - " [1] WidgetBox\n", - " [0] Player(end=23, width=550)\n", - " [2] HSpacer()" - ] - }, - "execution_count": 14, - "metadata": { - "application/vnd.holoviews_exec.v0+json": { - "id": "4918" - } - }, - "output_type": "execute_result" - } - ], - "source": [ - "da = xds.data.tp.isel(time=slice(indexer - 24, indexer))\n", - "#da = da.where(da>0.0001, drop=True)\n", - "\n", - "(da*1000)\\\n", - ".sel(source='era5_land_reanalysis_dev') \\\n", - ".squeeze() \\\n", - ".hvplot.quadmesh(\n", - " width=750,\n", - " height=450,\n", - " geo=True,\n", - " tiles='ESRI',\n", - " groupby=[\"time\"],\n", - " legend=\"top\",\n", - " cmap='gist_ncar',\n", - " widget_location=\"bottom\",\n", - " widget_type='scrubber',\n", - " dynamic=False,\n", - " clim=(0.01, 10))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Query hydrological datasets\n", - "Hydrological queries are still being tested and output format is likely to change. Stay tuned!" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:                 (basin_id: 470, time: 41007, source: 1)\n",
-       "Coordinates: (12/16)\n",
-       "    _last_update_timestamp  (basin_id) datetime64[ns] dask.array<chunksize=(470,), meta=np.ndarray>\n",
-       "    aggregation             (basin_id) <U1 dask.array<chunksize=(470,), meta=np.ndarray>\n",
-       "  * basin_id                (basin_id) <U6 '010101' '010801' ... '135201'\n",
-       "    data_type               (basin_id) <U1 dask.array<chunksize=(470,), meta=np.ndarray>\n",
-       "    drainage_area           (basin_id) float32 dask.array<chunksize=(470,), meta=np.ndarray>\n",
-       "    end_date                (basin_id) datetime64[ns] dask.array<chunksize=(470,), meta=np.ndarray>\n",
-       "    ...                      ...\n",
-       "    regulated               (basin_id) <U1 dask.array<chunksize=(470,), meta=np.ndarray>\n",
-       "  * source                  (source) <U5 'melcc'\n",
-       "    start_date              (basin_id) datetime64[ns] dask.array<chunksize=(470,), meta=np.ndarray>\n",
-       "  * time                    (time) datetime64[ns] 1910-01-01 ... 2022-04-09\n",
-       "    timestep                (basin_id) <U1 dask.array<chunksize=(470,), meta=np.ndarray>\n",
-       "    units                   (basin_id) <U1 dask.array<chunksize=(470,), meta=np.ndarray>\n",
-       "Data variables:\n",
-       "    flag                    (time, basin_id, source) <U1 dask.array<chunksize=(2563, 59, 1), meta=np.ndarray>\n",
-       "    value                   (time, basin_id, source) float32 dask.array<chunksize=(5126, 59, 1), meta=np.ndarray>
" - ], - "text/plain": [ - "\n", - "Dimensions: (basin_id: 470, time: 41007, source: 1)\n", - "Coordinates: (12/16)\n", - " _last_update_timestamp (basin_id) datetime64[ns] dask.array\n", - " aggregation (basin_id) \n", - " * basin_id (basin_id) \n", - " drainage_area (basin_id) float32 dask.array\n", - " end_date (basin_id) datetime64[ns] dask.array\n", - " ... ...\n", - " regulated (basin_id) \n", - " * source (source) \n", - " * time (time) datetime64[ns] 1910-01-01 ... 2022-04-09\n", - " timestep (basin_id) \n", - " units (basin_id) \n", - "Data variables:\n", - " flag (time, basin_id, source) \n", - " value (time, basin_id, source) float32 dask.array" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "query = {\n", - " \"datasets\": 'melcc'\n", - "}\n", - "xds = xd.Query(**query)\n", - "xds.data" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:        (data_type: 2, id: 7881, spatial_agg: 2, timestep: 1,\n",
-       "                    time_agg: 1, latitude: 2800, longitude: 4680, source: 1,\n",
-       "                    time: 59413)\n",
-       "Coordinates: (12/15)\n",
-       "  * data_type      (data_type) <U5 'flow' 'level'\n",
-       "    drainage_area  (id) float64 dask.array<chunksize=(10,), meta=np.ndarray>\n",
-       "    end_date       (id, data_type, spatial_agg, timestep, time_agg) object dask.array<chunksize=(7881, 2, 2, 1, 1), meta=np.ndarray>\n",
-       "  * id             (id) <U7 '01AA002' '01AD001' ... '11AF004' '11AF005'\n",
-       "  * latitude       (latitude) float64 85.0 84.97 84.95 ... 15.07 15.05 15.02\n",
-       "  * longitude      (longitude) float64 -167.0 -167.0 -166.9 ... -50.05 -50.02\n",
-       "    ...             ...\n",
-       "  * source         (source) <U5 'hydat'\n",
-       "  * spatial_agg    (spatial_agg) object 'point' 'watershed'\n",
-       "    start_date     (id, data_type, spatial_agg, timestep, time_agg) object dask.array<chunksize=(7881, 2, 2, 1, 1), meta=np.ndarray>\n",
-       "  * time           (time) datetime64[ns] 1860-01-01 1860-01-02 ... 2022-08-31\n",
-       "  * time_agg       (time_agg) <U4 'mean'\n",
-       "  * timestep       (timestep) <U3 'day'\n",
-       "Data variables:\n",
-       "    mask           (id, latitude, longitude, source) float64 dask.array<chunksize=(1, 500, 500, 1), meta=np.ndarray>\n",
-       "    value          (id, time, data_type, spatial_agg, timestep, time_agg, source) float64 dask.array<chunksize=(10, 59413, 1, 1, 1, 1, 1), meta=np.ndarray>
" - ], - "text/plain": [ - "\n", - "Dimensions: (data_type: 2, id: 7881, spatial_agg: 2, timestep: 1,\n", - " time_agg: 1, latitude: 2800, longitude: 4680, source: 1,\n", - " time: 59413)\n", - "Coordinates: (12/15)\n", - " * data_type (data_type) \n", - " end_date (id, data_type, spatial_agg, timestep, time_agg) object dask.array\n", - " * id (id) \n", - " * time (time) datetime64[ns] 1860-01-01 1860-01-02 ... 2022-08-31\n", - " * time_agg (time_agg) \n", - " value (id, time, data_type, spatial_agg, timestep, time_agg, source) float64 dask.array" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "query = {\n", - " \"datasets\": 'hydat'\n", - "}\n", - "xds = xd.Query(**query)\n", - "xds.data" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [], - "source": [ - "query = {\n", - " \"datasets\": {'hydrometric': {'variables': ['streamflow',\n", - " 't2m_nanmax', \n", - " 't2m_nanmin',\n", - " 'tp_nansum'],\n", - " 'id': ['01010070', '01016500', '01017290','02*'],\n", - " }\n", - " },\n", - "}\n", - "xds = xd.Query(**query)\n", - "ds_hydro = xds.data" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset>\n",
-       "Dimensions:     (id: 504, time: 44990, source: 1)\n",
-       "Coordinates:\n",
-       "  * id          (id) object '01010070' '01016500' ... '02492343' '02492360'\n",
-       "  * time        (time) datetime64[ns] 1900-01-01 1900-01-02 ... 2023-03-06\n",
-       "  * source      (source) <U11 'hydrometric'\n",
-       "Data variables:\n",
-       "    streamflow  (id, time, source) float64 dask.array<chunksize=(1, 22495, 1), meta=np.ndarray>\n",
-       "    t2m_nanmax  (id, time, source) float32 dask.array<chunksize=(1, 44990, 1), meta=np.ndarray>\n",
-       "    t2m_nanmin  (id, time, source) float32 dask.array<chunksize=(1, 44990, 1), meta=np.ndarray>\n",
-       "    tp_nansum   (id, time, source) float32 dask.array<chunksize=(1, 44990, 1), meta=np.ndarray>
" - ], - "text/plain": [ - "\n", - "Dimensions: (id: 504, time: 44990, source: 1)\n", - "Coordinates:\n", - " * id (id) object '01010070' '01016500' ... '02492343' '02492360'\n", - " * time (time) datetime64[ns] 1900-01-01 1900-01-02 ... 2023-03-06\n", - " * source (source) \n", - " t2m_nanmax (id, time, source) float32 dask.array\n", - " t2m_nanmin (id, time, source) float32 dask.array\n", - " tp_nansum (id, time, source) float32 dask.array" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds_hydro" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "data": {}, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": {}, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.holoviews_exec.v0+json": "", - "text/html": [ - "
\n", - "
\n", - "
\n", - "" - ], - "text/plain": [ - "Row\n", - " [0] Column\n", - " [0] Markdown(str)\n", - " [1] Select(name='id', options=['01010070', '01016500', ...], value='01010070')\n", - " [2] Select(name='source', options=['hydrometric'], value='hydrometric')\n", - " [3] ParamFunction(function, _pane=Column)" - ] - }, - "execution_count": 38, - "metadata": { - "application/vnd.holoviews_exec.v0+json": { - "id": "9435" - } - }, - "output_type": "execute_result" - } - ], - "source": [ - "import panel as pn\n", - "\n", - "id1 = pn.widgets.Select(value=\"01010070\", options=list(xds.data.id.values), name=\"id\")\n", - "source = pn.widgets.Select(\n", - " value=\"hydrometric\", options=list(xds.data.source.values), name=\"source\"\n", - ")\n", - "\n", - "\n", - "@pn.depends(id1, source)\n", - "def plot_hydrograph_and_weather(id1, source):\n", - " da = ds_hydro.sel(id=id1, source=source)\n", - " dx = da[\"streamflow\"].dropna(\"time\", how=\"any\")\n", - "\n", - " trace1 = da[\"streamflow\"].hvplot(\n", - " grid=True,\n", - " widget_location=\"bottom\",\n", - " color=\"black\",\n", - " xlim=(dx.time[0].values, dx.time[-1].values),\n", - " title=f\"Daily streamflow at location {id1}\",\n", - " width=750,\n", - " height=300,\n", - " )\n", - " trace2 = da[[\"t2m_nanmax\", \"t2m_nanmin\"]].hvplot(\n", - " grid=True,\n", - " widget_location=\"bottom\",\n", - " color=[\"red\", \"blue\"],\n", - " xlim=(dx.time[0].values, dx.time[-1].values),\n", - " title=f\"Daily minimum and maximum temperature at location {id1}\",\n", - " width=750,\n", - " height=300,\n", - " )\n", - "\n", - " trace3 = da[[\"tp_nansum\"]].hvplot(\n", - " grid=True,\n", - " color=[\"turquoise\"],\n", - " xlim=(dx.time[0].values, dx.time[-1].values),\n", - " title=f\"Daily precipitation at location {id1}\",\n", - " width=750,\n", - " height=300,\n", - " )\n", - "\n", - " return pn.Column(trace1, trace2, trace3)\n", - "\n", - "\n", - "pn.Row(\n", - " pn.Column(\"## Hydrometric Data Explorer\", id1, source, plot_hydrograph_and_weather)\n", - ")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "xdatasets", - "language": "python", - "name": "xdatasets" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/doc/usage.rst b/doc/usage.rst deleted file mode 100644 index 496ae5b..0000000 --- a/doc/usage.rst +++ /dev/null @@ -1,16 +0,0 @@ -===== -User Guide -===== - -In this user guide, you will find detailed descriptions and examples that describe many common tasks that you can accomplish with xdatasets. - - - -.. toctree:: - :maxdepth: 1 - - notebooks/load_data - - - - diff --git a/doc/.nojekyll b/docs/.nojekyll similarity index 100% rename from doc/.nojekyll rename to docs/.nojekyll diff --git a/doc/Makefile b/docs/Makefile similarity index 100% rename from doc/Makefile rename to docs/Makefile diff --git a/doc/authors.rst b/docs/authors.rst similarity index 100% rename from doc/authors.rst rename to docs/authors.rst diff --git a/docs/changes.rst b/docs/changes.rst new file mode 100644 index 0000000..d9e113e --- /dev/null +++ b/docs/changes.rst @@ -0,0 +1 @@ +.. include:: ../CHANGES.rst diff --git a/doc/conf.py b/docs/conf.py similarity index 72% rename from doc/conf.py rename to docs/conf.py index cd500ef..6e65adb 100755 --- a/doc/conf.py +++ b/docs/conf.py @@ -19,10 +19,10 @@ # import os import sys -sys.path.insert(0, os.path.abspath('..')) -import xdatasets # noqa +sys.path.insert(0, os.path.abspath("..")) +import xdatasets # noqa # -- General configuration --------------------------------------------- @@ -32,24 +32,51 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['nbsphinx'] +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.autosectionlabel", + "sphinx.ext.extlinks", + "sphinx.ext.viewcode", + "sphinx.ext.todo", + "nbsphinx", + "sphinx_codeautolink", + "sphinx_copybutton", + "sphinxcontrib.confluencebuilder", +] + +autosectionlabel_prefix_document = True +autosectionlabel_maxdepth = 2 + +# To ensure that underlined fields (e.g. `_field`) are shown in the docs. +autodoc_default_options = { + "members": True, + "undoc-members": True, + "private-members": False, + "special-members": False, +} + +extlinks = { + "issue": ("https://github.com/hydrologie/xdatasets/issues/%s", "GH/%s"), + "pull": ("https://github.com/hydrlologie/xdatasets/pull/%s", "PR/%s"), + "user": ("https://github.com/%s", "@%s"), +} -nbsphinx_execute = 'always' +nbsphinx_execute = "always" # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = [".rst"] # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = 'Xdatasets' +project = "Xdatasets" copyright = "2023, Sebastien Langlois" author = "Sebastien Langlois" @@ -68,12 +95,12 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = "en" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The name of the Pygments (syntax highlighting) style @@ -86,7 +113,7 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'furo' +html_theme = "furo" # Theme options are theme-specific and customize the look and feel of a # theme further. For a list of options available for each theme, see the @@ -97,13 +124,13 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = [] # "_static"] # -- Options for HTMLHelp output --------------------------------------- # Output file base name for HTML help builder. -htmlhelp_basename = 'xdatasetsdoc' +htmlhelp_basename = "xdatasetsdoc" # -- Options for LaTeX output ------------------------------------------ @@ -112,15 +139,12 @@ # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # # 'preamble': '', - # Latex figure (float) alignment # # 'figure_align': 'htbp', @@ -130,9 +154,13 @@ # (source start file, target name, title, author, documentclass # [howto, manual, or own class]). latex_documents = [ - (master_doc, 'xdatasets.tex', - 'Xdatasets Documentation', - 'Sebastien Langlois', 'manual'), + ( + master_doc, + "xdatasets.tex", + "Xdatasets Documentation", + "Sebastien Langlois", + "manual", + ), ] @@ -140,11 +168,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'xdatasets', - 'Xdatasets Documentation', - [author], 1) -] +man_pages = [(master_doc, "xdatasets", "Xdatasets Documentation", [author], 1)] # -- Options for Texinfo output ---------------------------------------- @@ -153,10 +177,13 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'xdatasets', - 'xdatasets Documentation', - author, - 'xdatasets', - 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + "xdatasets", + "xdatasets Documentation", + author, + "xdatasets", + "One line description of project.", + "Miscellaneous", + ), ] diff --git a/doc/contributing.rst b/docs/contributing.rst similarity index 100% rename from doc/contributing.rst rename to docs/contributing.rst diff --git a/doc/index.rst b/docs/index.rst similarity index 94% rename from doc/index.rst rename to docs/index.rst index 85813c4..cc2b8d6 100644 --- a/doc/index.rst +++ b/docs/index.rst @@ -1,5 +1,5 @@ Xdatasets documentation -====================================== +======================= **Useful links**: Coming Soon! @@ -17,11 +17,12 @@ The genesis of this project can be traced back to a crucial requirement of effor readme installation + usage notebooks/getting_started - modules + apidoc/modules contributing authors - history + changes .. Indices and tables .. ================== diff --git a/doc/installation.rst b/docs/installation.rst similarity index 93% rename from doc/installation.rst rename to docs/installation.rst index f1c9dd3..fd78e7b 100644 --- a/doc/installation.rst +++ b/docs/installation.rst @@ -12,7 +12,7 @@ To install xdatasets, run this command in your terminal: .. code-block:: console - $ pip install xdatasets + $ python -m pip install xdatasets This is the preferred method to install xdatasets, as it will always install the most recent stable release. @@ -44,7 +44,7 @@ Once you have a copy of the source, you can install it with: .. code-block:: console - $ python setup.py install + $ $ python -m pip install . .. _Github repo: https://github.com/hydrologie/xdatasets diff --git a/doc/make.bat b/docs/make.bat similarity index 100% rename from doc/make.bat rename to docs/make.bat diff --git a/docs/notebooks/getting_started.ipynb b/docs/notebooks/getting_started.ipynb new file mode 100644 index 0000000..8114db8 --- /dev/null +++ b/docs/notebooks/getting_started.ipynb @@ -0,0 +1,1362 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Getting started" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `xdatasets` library enables users to effortlessly access a vast collection of earth observation datasets that are compatible with `xarray` formats.\n", + "\n", + "The library adopts an opinionated approach to data querying and caters to the specific needs of certain user groups, such as hydrologists, climate scientists, and engineers. One of the functionalities of `xdatasets` is the ability to extract data at a specific location or within a designated region, such as a watershed or municipality, while also enabling spatial and temporal operations." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To use `xdatasets`, users must employ a query. For instance, a straightforward query to extract the variables `t2m` (*2m temperature*) and `tp` (*Total precipitation*) from the `era5_reanalysis_single_levels` dataset at two geographical positions (Montreal and Toronto) could be as follows:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```python\n", + "query = {\n", + " \"datasets\": {\"era5_reanalysis_single_levels_dev\": {'variables': [\"t2m\", \"tp\"]}},\n", + " \"space\": {\n", + " \"clip\": \"point\", # bbox, point or polygon\n", + " \"geometry\": {'Montreal' : (45.508888, -73.561668),\n", + " 'Toronto' : (43.651070, -79.347015)\n", + " }\n", + " }\n", + "}\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "An example of a more complex query would look like the one below. \n", + "\n", + "> **Note**\n", + "> Don't worry! Below, you'll find additional examples that will assist in understanding each parameter in the query, as well as the possible combinations.\n", + "\n", + "This query calls the same variables as above. However, instead of specifying geographical positions, a GeoPandas.DataFrame is used to provide features (such as shapefiles or geojson) for extracting data within each of them. Each polygon is identified using the unique identifier `Station`, and a spatial average is computed within each one `(aggregation: True)`. The dataset, initially at an hourly time step, is converted into a daily time step while applying one or more temporal aggregations for each variable as prescribed in the query. `xdatasets` ultimately returns the dataset for the specified date range and time zone." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "```python\n", + "query = {\n", + " \"datasets\": {\"era5_reanalysis_single_levels_dev\": {'variables': [\"t2m\", \"tp\"]}},\n", + " \"space\": {\n", + " \"clip\": \"polygon\", # bbox, point or polygon\n", + " \"aggregation\": True, # spatial average of the variables within each polygon\n", + " \"geometry\": gdf,\n", + " \"unique_id\": \"Station\" # unique column name in geodataframe\n", + " },\n", + " \"time\": {\n", + " \"timestep\": \"D\",\n", + " \"aggregation\": {\"tp\": np.nansum, \n", + " \"t2m\": [np.nanmax, np.nanmin]},\n", + " \n", + " \"start\": '2000-01-01',\n", + " \"end\": '2020-05-31',\n", + " \"timezone\": 'America/Montreal',\n", + " },\n", + "}\n", + "```\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Query climate datasets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In order to use `xdatasets`, you must import at least `xdatasets`, `pandas`, `geopandas`, and `numpy`. Additionally, we import `pathlib` to interact with files." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-03T18:40:37.823425184Z", + "start_time": "2023-10-03T18:40:37.809132786Z" + } + }, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-03T18:40:54.073256475Z", + "start_time": "2023-10-03T18:40:49.561354059Z" + } + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " var force = true;\n", + " var py_version = '3.2.2'.replace('rc', '-rc.').replace('.dev', '-dev.');\n", + " var is_dev = py_version.indexOf(\"+\") !== -1 || py_version.indexOf(\"-\") !== -1;\n", + " var reloading = false;\n", + " var Bokeh = root.Bokeh;\n", + " var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n", + "\n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks;\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + " if (js_modules == null) js_modules = [];\n", + " if (js_exports == null) js_exports = {};\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + "\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " if (!reloading) {\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " }\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + " window._bokeh_on_load = on_load\n", + "\n", + " function on_error() {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " var skip = [];\n", + " if (window.requirejs) {\n", + " window.requirejs.config({'packages': {}, 'paths': {'jspanel': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/jspanel', 'jspanel-modal': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal', 'jspanel-tooltip': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip', 'jspanel-hint': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint', 'jspanel-layout': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout', 'jspanel-contextmenu': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu', 'jspanel-dock': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock', 'gridstack': 'https://cdn.jsdelivr.net/npm/gridstack@7.2.3/dist/gridstack-all', 'notyf': 'https://cdn.jsdelivr.net/npm/notyf@3/notyf.min'}, 'shim': {'jspanel': {'exports': 'jsPanel'}, 'gridstack': {'exports': 'GridStack'}}});\n", + " require([\"jspanel\"], function(jsPanel) {\n", + "\twindow.jsPanel = jsPanel\n", + "\ton_load()\n", + " })\n", + " require([\"jspanel-modal\"], function() {\n", + "\ton_load()\n", + " })\n", + " require([\"jspanel-tooltip\"], function() {\n", + "\ton_load()\n", + " })\n", + " require([\"jspanel-hint\"], function() {\n", + "\ton_load()\n", + " })\n", + " require([\"jspanel-layout\"], function() {\n", + "\ton_load()\n", + " })\n", + " require([\"jspanel-contextmenu\"], function() {\n", + "\ton_load()\n", + " })\n", + " require([\"jspanel-dock\"], function() {\n", + "\ton_load()\n", + " })\n", + " require([\"gridstack\"], function(GridStack) {\n", + "\twindow.GridStack = GridStack\n", + "\ton_load()\n", + " })\n", + " require([\"notyf\"], function() {\n", + "\ton_load()\n", + " })\n", + " root._bokeh_is_loading = css_urls.length + 9;\n", + " } else {\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n", + " }\n", + "\n", + " var existing_stylesheets = []\n", + " var links = document.getElementsByTagName('link')\n", + " for (var i = 0; i < links.length; i++) {\n", + " var link = links[i]\n", + " if (link.href != null) {\n", + "\texisting_stylesheets.push(link.href)\n", + " }\n", + " }\n", + " for (var i = 0; i < css_urls.length; i++) {\n", + " var url = css_urls[i];\n", + " if (existing_stylesheets.indexOf(url) !== -1) {\n", + "\ton_load()\n", + "\tcontinue;\n", + " }\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " } if (((window['jsPanel'] !== undefined) && (!(window['jsPanel'] instanceof HTMLElement))) || window.requirejs) {\n", + " var urls = ['https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/jspanel.js', 'https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal.js', 'https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip.js', 'https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint.js', 'https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout.js', 'https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu.js', 'https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock.js'];\n", + " for (var i = 0; i < urls.length; i++) {\n", + " skip.push(urls[i])\n", + " }\n", + " } if (((window['GridStack'] !== undefined) && (!(window['GridStack'] instanceof HTMLElement))) || window.requirejs) {\n", + " var urls = ['https://cdn.holoviz.org/panel/1.2.1/dist/bundled/gridstack/gridstack@7.2.3/dist/gridstack-all.js'];\n", + " for (var i = 0; i < urls.length; i++) {\n", + " skip.push(urls[i])\n", + " }\n", + " } if (((window['Notyf'] !== undefined) && (!(window['Notyf'] instanceof HTMLElement))) || window.requirejs) {\n", + " var urls = ['https://cdn.holoviz.org/panel/1.2.1/dist/bundled/notificationarea/notyf@3/notyf.min.js'];\n", + " for (var i = 0; i < urls.length; i++) {\n", + " skip.push(urls[i])\n", + " }\n", + " } var existing_scripts = []\n", + " var scripts = document.getElementsByTagName('script')\n", + " for (var i = 0; i < scripts.length; i++) {\n", + " var script = scripts[i]\n", + " if (script.src != null) {\n", + "\texisting_scripts.push(script.src)\n", + " }\n", + " }\n", + " for (var i = 0; i < js_urls.length; i++) {\n", + " var url = js_urls[i];\n", + " if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n", + "\tif (!window.requirejs) {\n", + "\t on_load();\n", + "\t}\n", + "\tcontinue;\n", + " }\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " for (var i = 0; i < js_modules.length; i++) {\n", + " var url = js_modules[i];\n", + " if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n", + "\tif (!window.requirejs) {\n", + "\t on_load();\n", + "\t}\n", + "\tcontinue;\n", + " }\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " element.type = \"module\";\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " for (const name in js_exports) {\n", + " var url = js_exports[name];\n", + " if (skip.indexOf(url) >= 0 || root[name] != null) {\n", + "\tif (!window.requirejs) {\n", + "\t on_load();\n", + "\t}\n", + "\tcontinue;\n", + " }\n", + " var element = document.createElement('script');\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.type = \"module\";\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " element.textContent = `\n", + " import ${name} from \"${url}\"\n", + " window.${name} = ${name}\n", + " window._bokeh_on_load()\n", + " `\n", + " document.head.appendChild(element);\n", + " }\n", + " if (!js_urls.length && !js_modules.length) {\n", + " on_load()\n", + " }\n", + " };\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " var js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.2.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.2.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.2.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.2.2.min.js\", \"https://cdn.holoviz.org/panel/1.2.1/dist/panel.min.js\"];\n", + " var js_modules = [];\n", + " var js_exports = {};\n", + " var css_urls = [];\n", + " var inline_js = [ function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + "function(Bokeh) {} // ensure no trailing comma for IE\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " if ((root.Bokeh !== undefined) || (force === true)) {\n", + " for (var i = 0; i < inline_js.length; i++) {\n", + " inline_js[i].call(root, root.Bokeh);\n", + " }\n", + " // Cache old bokeh versions\n", + " if (Bokeh != undefined && !reloading) {\n", + "\tvar NewBokeh = root.Bokeh;\n", + "\tif (Bokeh.versions === undefined) {\n", + "\t Bokeh.versions = new Map();\n", + "\t}\n", + "\tif (NewBokeh.version !== Bokeh.version) {\n", + "\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n", + "\t}\n", + "\troot.Bokeh = Bokeh;\n", + " }} else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " }\n", + " root._bokeh_is_initializing = false\n", + " }\n", + "\n", + " function load_or_wait() {\n", + " // Implement a backoff loop that tries to ensure we do not load multiple\n", + " // versions of Bokeh and its dependencies at the same time.\n", + " // In recent versions we use the root._bokeh_is_initializing flag\n", + " // to determine whether there is an ongoing attempt to initialize\n", + " // bokeh, however for backward compatibility we also try to ensure\n", + " // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n", + " // before older versions are fully initialized.\n", + " if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n", + " root._bokeh_is_initializing = false;\n", + " root._bokeh_onload_callbacks = undefined;\n", + " console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n", + " load_or_wait();\n", + " } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n", + " setTimeout(load_or_wait, 100);\n", + " } else {\n", + " Bokeh = root.Bokeh;\n", + " bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n", + " root._bokeh_is_initializing = true\n", + " root._bokeh_onload_callbacks = []\n", + " if (!reloading && (!bokeh_loaded || is_dev)) {\n", + "\troot.Bokeh = undefined;\n", + " }\n", + " load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n", + "\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + "\trun_inline_js();\n", + " });\n", + " }\n", + " }\n", + " // Give older versions of the autoload script a head-start to ensure\n", + " // they initialize before we start loading newer version.\n", + " setTimeout(load_or_wait, 100)\n", + "}(window));" + ], + "application/vnd.holoviews_load.v0+json": "(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n var py_version = '3.2.2'.replace('rc', '-rc.').replace('.dev', '-dev.');\n var is_dev = py_version.indexOf(\"+\") !== -1 || py_version.indexOf(\"-\") !== -1;\n var reloading = false;\n var Bokeh = root.Bokeh;\n var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n\n if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks;\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n if (js_exports == null) js_exports = {};\n\n root._bokeh_onload_callbacks.push(callback);\n\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n run_callbacks();\n return null;\n }\n if (!reloading) {\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n }\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n window._bokeh_on_load = on_load\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n var skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {'jspanel': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/jspanel', 'jspanel-modal': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal', 'jspanel-tooltip': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip', 'jspanel-hint': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint', 'jspanel-layout': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout', 'jspanel-contextmenu': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu', 'jspanel-dock': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock', 'gridstack': 'https://cdn.jsdelivr.net/npm/gridstack@7.2.3/dist/gridstack-all', 'notyf': 'https://cdn.jsdelivr.net/npm/notyf@3/notyf.min'}, 'shim': {'jspanel': {'exports': 'jsPanel'}, 'gridstack': {'exports': 'GridStack'}}});\n require([\"jspanel\"], function(jsPanel) {\n\twindow.jsPanel = jsPanel\n\ton_load()\n })\n require([\"jspanel-modal\"], function() {\n\ton_load()\n })\n require([\"jspanel-tooltip\"], function() {\n\ton_load()\n })\n require([\"jspanel-hint\"], function() {\n\ton_load()\n })\n require([\"jspanel-layout\"], function() {\n\ton_load()\n })\n require([\"jspanel-contextmenu\"], function() {\n\ton_load()\n })\n require([\"jspanel-dock\"], function() {\n\ton_load()\n })\n require([\"gridstack\"], function(GridStack) {\n\twindow.GridStack = GridStack\n\ton_load()\n })\n require([\"notyf\"], function() {\n\ton_load()\n })\n root._bokeh_is_loading = css_urls.length + 9;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n }\n\n var existing_stylesheets = []\n var links = document.getElementsByTagName('link')\n for (var i = 0; i < links.length; i++) {\n var link = links[i]\n if (link.href != null) {\n\texisting_stylesheets.push(link.href)\n }\n }\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n if (existing_stylesheets.indexOf(url) !== -1) {\n\ton_load()\n\tcontinue;\n }\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n } if (((window['jsPanel'] !== undefined) && (!(window['jsPanel'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/jspanel.js', 'https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal.js', 'https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip.js', 'https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint.js', 'https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout.js', 'https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu.js', 'https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['GridStack'] !== undefined) && (!(window['GridStack'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.2.1/dist/bundled/gridstack/gridstack@7.2.3/dist/gridstack-all.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['Notyf'] !== undefined) && (!(window['Notyf'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.2.1/dist/bundled/notificationarea/notyf@3/notyf.min.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } var existing_scripts = []\n var scripts = document.getElementsByTagName('script')\n for (var i = 0; i < scripts.length; i++) {\n var script = scripts[i]\n if (script.src != null) {\n\texisting_scripts.push(script.src)\n }\n }\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (var i = 0; i < js_modules.length; i++) {\n var url = js_modules[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (const name in js_exports) {\n var url = js_exports[name];\n if (skip.indexOf(url) >= 0 || root[name] != null) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onerror = on_error;\n element.async = false;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n element.textContent = `\n import ${name} from \"${url}\"\n window.${name} = ${name}\n window._bokeh_on_load()\n `\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n var js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.2.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.2.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.2.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.2.2.min.js\", \"https://cdn.holoviz.org/panel/1.2.1/dist/panel.min.js\"];\n var js_modules = [];\n var js_exports = {};\n var css_urls = [];\n var inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n // Cache old bokeh versions\n if (Bokeh != undefined && !reloading) {\n\tvar NewBokeh = root.Bokeh;\n\tif (Bokeh.versions === undefined) {\n\t Bokeh.versions = new Map();\n\t}\n\tif (NewBokeh.version !== Bokeh.version) {\n\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n\t}\n\troot.Bokeh = Bokeh;\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n root._bokeh_is_initializing = false\n }\n\n function load_or_wait() {\n // Implement a backoff loop that tries to ensure we do not load multiple\n // versions of Bokeh and its dependencies at the same time.\n // In recent versions we use the root._bokeh_is_initializing flag\n // to determine whether there is an ongoing attempt to initialize\n // bokeh, however for backward compatibility we also try to ensure\n // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n // before older versions are fully initialized.\n if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n root._bokeh_is_initializing = false;\n root._bokeh_onload_callbacks = undefined;\n console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n load_or_wait();\n } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n setTimeout(load_or_wait, 100);\n } else {\n Bokeh = root.Bokeh;\n bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n root._bokeh_is_initializing = true\n root._bokeh_onload_callbacks = []\n if (!reloading && (!bokeh_loaded || is_dev)) {\n\troot.Bokeh = undefined;\n }\n load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n\trun_inline_js();\n });\n }\n }\n // Give older versions of the autoload script a head-start to ensure\n // they initialize before we start loading newer version.\n setTimeout(load_or_wait, 100)\n}(window));" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + "if ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n", + " window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n", + "}\n", + "\n", + "\n", + " function JupyterCommManager() {\n", + " }\n", + "\n", + " JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n", + " if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n", + " var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n", + " comm_manager.register_target(comm_id, function(comm) {\n", + " comm.on_msg(msg_handler);\n", + " });\n", + " } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n", + " window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n", + " comm.onMsg = msg_handler;\n", + " });\n", + " } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n", + " google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n", + " var messages = comm.messages[Symbol.asyncIterator]();\n", + " function processIteratorResult(result) {\n", + " var message = result.value;\n", + " console.log(message)\n", + " var content = {data: message.data, comm_id};\n", + " var buffers = []\n", + " for (var buffer of message.buffers || []) {\n", + " buffers.push(new DataView(buffer))\n", + " }\n", + " var metadata = message.metadata || {};\n", + " var msg = {content, buffers, metadata}\n", + " msg_handler(msg);\n", + " return messages.next().then(processIteratorResult);\n", + " }\n", + " return messages.next().then(processIteratorResult);\n", + " })\n", + " }\n", + " }\n", + "\n", + " JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n", + " if (comm_id in window.PyViz.comms) {\n", + " return window.PyViz.comms[comm_id];\n", + " } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n", + " var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n", + " var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n", + " if (msg_handler) {\n", + " comm.on_msg(msg_handler);\n", + " }\n", + " } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n", + " var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n", + " comm.open();\n", + " if (msg_handler) {\n", + " comm.onMsg = msg_handler;\n", + " }\n", + " } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n", + " var comm_promise = google.colab.kernel.comms.open(comm_id)\n", + " comm_promise.then((comm) => {\n", + " window.PyViz.comms[comm_id] = comm;\n", + " if (msg_handler) {\n", + " var messages = comm.messages[Symbol.asyncIterator]();\n", + " function processIteratorResult(result) {\n", + " var message = result.value;\n", + " var content = {data: message.data};\n", + " var metadata = message.metadata || {comm_id};\n", + " var msg = {content, metadata}\n", + " msg_handler(msg);\n", + " return messages.next().then(processIteratorResult);\n", + " }\n", + " return messages.next().then(processIteratorResult);\n", + " }\n", + " }) \n", + " var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n", + " return comm_promise.then((comm) => {\n", + " comm.send(data, metadata, buffers, disposeOnDone);\n", + " });\n", + " };\n", + " var comm = {\n", + " send: sendClosure\n", + " };\n", + " }\n", + " window.PyViz.comms[comm_id] = comm;\n", + " return comm;\n", + " }\n", + " window.PyViz.comm_manager = new JupyterCommManager();\n", + " \n", + "\n", + "\n", + "var JS_MIME_TYPE = 'application/javascript';\n", + "var HTML_MIME_TYPE = 'text/html';\n", + "var EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\n", + "var CLASS_NAME = 'output';\n", + "\n", + "/**\n", + " * Render data to the DOM node\n", + " */\n", + "function render(props, node) {\n", + " var div = document.createElement(\"div\");\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(div);\n", + " node.appendChild(script);\n", + "}\n", + "\n", + "/**\n", + " * Handle when a new output is added\n", + " */\n", + "function handle_add_output(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + " if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + " var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + " if (id !== undefined) {\n", + " var nchildren = toinsert.length;\n", + " var html_node = toinsert[nchildren-1].children[0];\n", + " html_node.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var scripts = [];\n", + " var nodelist = html_node.querySelectorAll(\"script\");\n", + " for (var i in nodelist) {\n", + " if (nodelist.hasOwnProperty(i)) {\n", + " scripts.push(nodelist[i])\n", + " }\n", + " }\n", + "\n", + " scripts.forEach( function (oldScript) {\n", + " var newScript = document.createElement(\"script\");\n", + " var attrs = [];\n", + " var nodemap = oldScript.attributes;\n", + " for (var j in nodemap) {\n", + " if (nodemap.hasOwnProperty(j)) {\n", + " attrs.push(nodemap[j])\n", + " }\n", + " }\n", + " attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n", + " newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n", + " oldScript.parentNode.replaceChild(newScript, oldScript);\n", + " });\n", + " if (JS_MIME_TYPE in output.data) {\n", + " toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n", + " }\n", + " output_area._hv_plot_id = id;\n", + " if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n", + " window.PyViz.plot_index[id] = Bokeh.index[id];\n", + " } else {\n", + " window.PyViz.plot_index[id] = null;\n", + " }\n", + " } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + "}\n", + "\n", + "/**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + "function handle_clear_output(event, handle) {\n", + " var id = handle.cell.output_area._hv_plot_id;\n", + " var server_id = handle.cell.output_area._bokeh_server_id;\n", + " if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n", + " var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n", + " if (server_id !== null) {\n", + " comm.send({event_type: 'server_delete', 'id': server_id});\n", + " return;\n", + " } else if (comm !== null) {\n", + " comm.send({event_type: 'delete', 'id': id});\n", + " }\n", + " delete PyViz.plot_index[id];\n", + " if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n", + " var doc = window.Bokeh.index[id].model.document\n", + " doc.clear();\n", + " const i = window.Bokeh.documents.indexOf(doc);\n", + " if (i > -1) {\n", + " window.Bokeh.documents.splice(i, 1);\n", + " }\n", + " }\n", + "}\n", + "\n", + "/**\n", + " * Handle kernel restart event\n", + " */\n", + "function handle_kernel_cleanup(event, handle) {\n", + " delete PyViz.comms[\"hv-extension-comm\"];\n", + " window.PyViz.plot_index = {}\n", + "}\n", + "\n", + "/**\n", + " * Handle update_display_data messages\n", + " */\n", + "function handle_update_output(event, handle) {\n", + " handle_clear_output(event, {cell: {output_area: handle.output_area}})\n", + " handle_add_output(event, handle)\n", + "}\n", + "\n", + "function register_renderer(events, OutputArea) {\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[0]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " events.on('output_added.OutputArea', handle_add_output);\n", + " events.on('output_updated.OutputArea', handle_update_output);\n", + " events.on('clear_output.CodeCell', handle_clear_output);\n", + " events.on('delete.Cell', handle_clear_output);\n", + " events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n", + "\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " safe: true,\n", + " index: 0\n", + " });\n", + "}\n", + "\n", + "if (window.Jupyter !== undefined) {\n", + " try {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " } catch(err) {\n", + " }\n", + "}\n" + ], + "application/vnd.holoviews_load.v0+json": "\nif ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n}\n\n\n function JupyterCommManager() {\n }\n\n JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n comm_manager.register_target(comm_id, function(comm) {\n comm.on_msg(msg_handler);\n });\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n comm.onMsg = msg_handler;\n });\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n console.log(message)\n var content = {data: message.data, comm_id};\n var buffers = []\n for (var buffer of message.buffers || []) {\n buffers.push(new DataView(buffer))\n }\n var metadata = message.metadata || {};\n var msg = {content, buffers, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n })\n }\n }\n\n JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n if (comm_id in window.PyViz.comms) {\n return window.PyViz.comms[comm_id];\n } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n if (msg_handler) {\n comm.on_msg(msg_handler);\n }\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n comm.open();\n if (msg_handler) {\n comm.onMsg = msg_handler;\n }\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n var comm_promise = google.colab.kernel.comms.open(comm_id)\n comm_promise.then((comm) => {\n window.PyViz.comms[comm_id] = comm;\n if (msg_handler) {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n var content = {data: message.data};\n var metadata = message.metadata || {comm_id};\n var msg = {content, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n }\n }) \n var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n return comm_promise.then((comm) => {\n comm.send(data, metadata, buffers, disposeOnDone);\n });\n };\n var comm = {\n send: sendClosure\n };\n }\n window.PyViz.comms[comm_id] = comm;\n return comm;\n }\n window.PyViz.comm_manager = new JupyterCommManager();\n \n\n\nvar JS_MIME_TYPE = 'application/javascript';\nvar HTML_MIME_TYPE = 'text/html';\nvar EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\nvar CLASS_NAME = 'output';\n\n/**\n * Render data to the DOM node\n */\nfunction render(props, node) {\n var div = document.createElement(\"div\");\n var script = document.createElement(\"script\");\n node.appendChild(div);\n node.appendChild(script);\n}\n\n/**\n * Handle when a new output is added\n */\nfunction handle_add_output(event, handle) {\n var output_area = handle.output_area;\n var output = handle.output;\n if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n return\n }\n var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n if (id !== undefined) {\n var nchildren = toinsert.length;\n var html_node = toinsert[nchildren-1].children[0];\n html_node.innerHTML = output.data[HTML_MIME_TYPE];\n var scripts = [];\n var nodelist = html_node.querySelectorAll(\"script\");\n for (var i in nodelist) {\n if (nodelist.hasOwnProperty(i)) {\n scripts.push(nodelist[i])\n }\n }\n\n scripts.forEach( function (oldScript) {\n var newScript = document.createElement(\"script\");\n var attrs = [];\n var nodemap = oldScript.attributes;\n for (var j in nodemap) {\n if (nodemap.hasOwnProperty(j)) {\n attrs.push(nodemap[j])\n }\n }\n attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n oldScript.parentNode.replaceChild(newScript, oldScript);\n });\n if (JS_MIME_TYPE in output.data) {\n toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n }\n output_area._hv_plot_id = id;\n if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n window.PyViz.plot_index[id] = Bokeh.index[id];\n } else {\n window.PyViz.plot_index[id] = null;\n }\n } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n var bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n var script_attrs = bk_div.children[0].attributes;\n for (var i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n}\n\n/**\n * Handle when an output is cleared or removed\n */\nfunction handle_clear_output(event, handle) {\n var id = handle.cell.output_area._hv_plot_id;\n var server_id = handle.cell.output_area._bokeh_server_id;\n if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n if (server_id !== null) {\n comm.send({event_type: 'server_delete', 'id': server_id});\n return;\n } else if (comm !== null) {\n comm.send({event_type: 'delete', 'id': id});\n }\n delete PyViz.plot_index[id];\n if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n var doc = window.Bokeh.index[id].model.document\n doc.clear();\n const i = window.Bokeh.documents.indexOf(doc);\n if (i > -1) {\n window.Bokeh.documents.splice(i, 1);\n }\n }\n}\n\n/**\n * Handle kernel restart event\n */\nfunction handle_kernel_cleanup(event, handle) {\n delete PyViz.comms[\"hv-extension-comm\"];\n window.PyViz.plot_index = {}\n}\n\n/**\n * Handle update_display_data messages\n */\nfunction handle_update_output(event, handle) {\n handle_clear_output(event, {cell: {output_area: handle.output_area}})\n handle_add_output(event, handle)\n}\n\nfunction register_renderer(events, OutputArea) {\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n var toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[0]);\n element.append(toinsert);\n return toinsert\n }\n\n events.on('output_added.OutputArea', handle_add_output);\n events.on('output_updated.OutputArea', handle_update_output);\n events.on('clear_output.CodeCell', handle_clear_output);\n events.on('delete.Cell', handle_clear_output);\n events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n safe: true,\n index: 0\n });\n}\n\nif (window.Jupyter !== undefined) {\n try {\n var events = require('base/js/events');\n var OutputArea = require('notebook/js/outputarea').OutputArea;\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n } catch(err) {\n }\n}\n" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import os\n", + "\n", + "os.environ[\"USE_PYGEOS\"] = \"0\"\n", + "from pathlib import Path\n", + "\n", + "import geopandas as gpd\n", + "\n", + "# Visualization\n", + "import hvplot.pandas # noqa\n", + "import hvplot.xarray # noqa-\n", + "import numpy as np\n", + "import pandas as pd\n", + "import panel as pn # noqa\n", + "\n", + "import xdatasets as xd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Clip by points (sites)\n", + "\n", + "\n", + "To begin with, we need to create a dictionary of sites and their corresponding geographical coordinates." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "sites = {\n", + " \"Montreal\": (45.508888, -73.561668),\n", + " \"New York\": (40.730610, -73.935242),\n", + " \"Miami\": (25.761681, -80.191788),\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will then extract the `tp` (*total precipitation*) and `t2m` (*2m temperature*) from the `era5_reanalysis_single_levels` dataset for the designated sites. Afterward, we will convert the time step to daily and adjust the timezone to Eastern Time. Finally, we will limit the temporal interval." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before proceeding with this first query, let's quickly outline the role of each parameter:\n", + "\n", + "- **datasets**: A dictionary where datasets serve as keys and desired variables as values.\n", + "- **space**: A dictionary that defines the necessary spatial operations to apply on user-supplied geographic features.\n", + "- **time**: A dictionary that defines the necessary temporal operations to apply on the datasets\n", + "\n", + "For more information on each parameter, consult the API documentation.\n", + "\n", + "This is what the requested query looks like :" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "query = {\n", + " \"datasets\": \"era5_reanalysis_single_levels\",\n", + " \"space\": {\"clip\": \"point\", \"geometry\": sites}, # bbox, point or polygon\n", + " \"time\": {\n", + " \"timestep\": \"D\", # http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases\n", + " \"aggregation\": {\"tp\": np.nansum, \"t2m\": np.nanmean},\n", + " \"start\": \"1959-01-01\",\n", + " \"timezone\": \"America/Montreal\",\n", + " },\n", + "}\n", + "xds = xd.Query(**query)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "By accessing the `data` attribute, you can view the data obtained from the query. It's worth noting that the variable name `tp` has been updated to `tp_nansum` to reflect the reduction operation (`np.nansum`) that was utilized to convert the time step from hourly to daily. Likewise, `t2m` was updated to `t2m_nanmean`. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "xds.data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "title = f\"Comparison of total precipitation across three cities in North America from \\\n", + "{xds.data.time.dt.year.min().values} to {xds.data.time.dt.year.max().values}\"\n", + "\n", + "xds.data.sel(\n", + " timestep=\"D\",\n", + " spatial_agg=\"point\",\n", + " time_agg=\"nansum\",\n", + " source=\"era5_reanalysis_single_levels\",\n", + ").hvplot(\n", + " title=title,\n", + " x=\"time\",\n", + " y=\"tp\",\n", + " grid=True,\n", + " width=750,\n", + " height=450,\n", + " by=\"site\",\n", + " legend=\"top\",\n", + " widget_location=\"bottom\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "title = f\"Comparison of 2m temperature across three cities in North America from \\\n", + "{xds.data.time.dt.year.min().values} to {xds.data.time.dt.year.max().values}\"\n", + "\n", + "xds.data.sel(\n", + " timestep=\"D\",\n", + " spatial_agg=\"point\",\n", + " time_agg=\"nanmean\",\n", + " source=\"era5_reanalysis_single_levels\",\n", + ").hvplot(\n", + " title=title,\n", + " x=\"time\",\n", + " y=\"t2m\",\n", + " grid=True,\n", + " width=750,\n", + " height=450,\n", + " by=\"site\",\n", + " legend=\"top\",\n", + " widget_location=\"bottom\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Clip on polygons with no averaging in space\n", + "\n", + "Let's first access certain polygon features, which can be in the form of shapefiles, geojson, or any other format compatible with `geopandas`. In this example, we are using `JSON` (geojson) files." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bucket = Path(\"https://s3.us-east-2.wasabisys.com/watersheds-polygons/MELCC/json\")\n", + "\n", + "paths = [\n", + " bucket.joinpath(\"023003/023003.json\"),\n", + " bucket.joinpath(\"031101/031101.json\"),\n", + " bucket.joinpath(\"040111/040111.json\"),\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Subsequently, all of the files can be opened and consolidated into a single `geopandas.GeoDataFrame` object." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gdf = pd.concat([gpd.read_file(path) for path in paths]).reset_index(drop=True)\n", + "gdf" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's examine the geographic locations of the polygon features." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gdf.hvplot(\n", + " geo=True,\n", + " tiles=\"ESRI\",\n", + " color=\"Station\",\n", + " alpha=0.8,\n", + " width=750,\n", + " height=450,\n", + " legend=\"top\",\n", + " hover_cols=[\"Station\", \"Superficie\"],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following query seeks the variables `t2m` and `tp` from the `era5_reanalysis_single_levels` dataset, covering the period between January 1, 1959, and September 30, 1961, for the three polygons mentioned earlier. It is important to note that as `aggregation` is set to `False`, no spatial averaging will be conducted, and a mask (raster) will be returned for each polygon." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# FIXME: New dimensions ('lat', 'bnds') must be a superset of existing dimensions ('lat', 'bnd')\n", + "\n", + "# query = {\n", + "# \"datasets\": {\"era5_reanalysis_single_levels\": {\"variables\": [\"t2m\", \"tp\"]}},\n", + "# \"space\": {\n", + "# \"clip\": \"polygon\", # bbox, point or polygon\n", + "# \"averaging\": False, # spatial average of the variables within each polygon\n", + "# \"geometry\": gdf,\n", + "# \"unique_id\": \"Station\", # unique column name in geodataframe\n", + "# },\n", + "# \"time\": {\n", + "# \"start\": \"1959-01-01\",\n", + "# \"end\": \"1963-08-31\",\n", + "# },\n", + "# }\n", + "#\n", + "# xds = xd.Query(**query)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "By accessing the `data` attribute, you can view the data obtained from the query. For each variable, the dimensions of `time`, `latitude`, `longitude`, and `Station` (the unique ID) are included. In addition, there is another variable called `weights` that is returned. This variable specifies the weight that should be assigned to each pixel if spatial averaging is conducted over a mask (polygon)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# xds.data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Weights are much easier to comprehend visually, so let's examine the weights returned for the station *023003*. Notice that when selecting a single feature (Station *023003* in this case), the shape of our spatial dimensions is reduced to a 2x2 pixel area (longitude x latitude) that encompasses the entire feature." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# station = \"023003\"\n", + "#\n", + "# ds_station = xds.data.sel(Station=station)\n", + "# ds_clipped = xds.bbox_clip(ds_station).squeeze()\n", + "# ds_clipped" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# FIXME: This cell raises `Exception: An axis may only be assigned one projection type`\n", + "\n", + "# (\n", + "# (\n", + "# ds_clipped.t2m.isel(time=0).hvplot(\n", + "# title=\"The 2m temperature for pixels that intersect with the polygon on January 1, 1959\",\n", + "# tiles=\"ESRI\",\n", + "# geo=True,\n", + "# alpha=0.6,\n", + "# colormap=\"isolum\",\n", + "# width=750,\n", + "# height=450,\n", + "# )\n", + "# * gdf[gdf.Station == station].hvplot(\n", + "# geo=True,\n", + "# width=750,\n", + "# height=450,\n", + "# legend=\"top\",\n", + "# hover_cols=[\"Station\", \"Superficie\"],\n", + "# )\n", + "# )\n", + "# + ds_clipped.weights.hvplot(\n", + "# title=\"The weights that should be assigned to each pixel when performing spatial averaging\",\n", + "# tiles=\"ESRI\",\n", + "# alpha=0.6,\n", + "# colormap=\"isolum\",\n", + "# geo=True,\n", + "# width=750,\n", + "# height=450,\n", + "# )\n", + "# * gdf[gdf.Station == station].hvplot(\n", + "# geo=True,\n", + "# width=750,\n", + "# height=450,\n", + "# legend=\"top\",\n", + "# hover_cols=[\"Station\", \"Superficie\"],\n", + "# )\n", + "# ).cols(1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The two plots depicted above show the 2m temperature for each pixel that intersects with the polygon from Station `023003` and the corresponding weights to be applied to each pixel. In the lower plot, it is apparent that the majority of the polygon is situated in the upper-left pixel, which results in that pixel having a weight of approximately 91%. It is evident that the two lower pixels have very minimal intersection with the polygon, which results in their respective weights being nearly zero (hover on the plot to verify the weights).\n", + "\n", + "In various libraries, either all pixels that intersect with the geometries are kept, or only pixels with centers within the polygon are retained. However, as shown in the previous example, utilizing such methods can introduce significant biases in the final outcome. Indeed, keeping all four pixels intersecting with the polygon with equal weights when the temperature values in the lower pixels are roughly 2 degrees lower than that of the upper-left pixel would introduce significant biases. Therefore, utilizing weights is a more precise approach." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Clip on polygons with averaging in space\n", + "\n", + "The following query seeks the variables `t2m` and `tp` from the `era5_reanalysis_single_levels` and `era5_land_reanalysis` datasets, covering the period between January 1, 1950, to present, for the three polygons mentioned earlier. Note that when the `aggregation` parameter is set to `True`, spatial averaging takes place. In addition, the weighted mask (raster) described earlier will be applied to generate a time series for each polygon.\n", + "\n", + "Additional steps are carried out in the process, including converting the original hourly time step to a daily time step. During this conversion, various temporal aggregations will be applied to each variable and a conversion to the local time zone will take place.\n", + "\n", + "> **Note**\n", + "> If users prefer to pass multiple dictionaries instead of a single large one, the following format is also considered acceptable." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# FIXME: new dimensions ('lat', 'bnds') must be a superset of existing dimensions ('lat', 'bnd')\n", + "\n", + "# datasets = {\n", + "# \"era5_reanalysis_single_levels\": {\"variables\": [\"t2m\", \"tp\"]},\n", + "# \"era5_land_reanalysis_dev\": {\"variables\": [\"t2m\", \"tp\"]},\n", + "# }\n", + "# space = {\n", + "# \"clip\": \"polygon\", # bbox, point or polygon\n", + "# \"averaging\": True,\n", + "# \"geometry\": gdf, # 3 polygons\n", + "# \"unique_id\": \"Station\",\n", + "# }\n", + "# time = {\n", + "# \"timestep\": \"D\",\n", + "# \"aggregation\": {\"tp\": [np.nansum], \"t2m\": [np.nanmax, np.nanmin]},\n", + "# \"start\": \"1950-01-01\",\n", + "# \"timezone\": \"America/Montreal\",\n", + "# }\n", + "#\n", + "# xds = xd.Query(datasets=datasets, space=space, time=time)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# xds.data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# FIXME: squeeze() does not work on xds.data -> list.\n", + "\n", + "# xds.data.squeeze().t2m.hvplot(\n", + "# x=\"time\",\n", + "# by=\"time_agg\",\n", + "# groupby=[\"source\", \"Station\"],\n", + "# width=750,\n", + "# height=400,\n", + "# grid=True,\n", + "# widget_location=\"bottom\",\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The resulting dataset can be explored in the data attribute :" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# FIXME: squeeze() does not work on xds.data -> list.\n", + "\n", + "# xds.data.squeeze().tp.sel(time_agg=\"nansum\").hvplot(\n", + "# x=\"time\",\n", + "# groupby=[\"source\", \"Station\"],\n", + "# width=750,\n", + "# height=400,\n", + "# color=\"blue\",\n", + "# grid=True,\n", + "# widget_location=\"bottom\",\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Bounding box (bbox) around polygons\n", + "\n", + "The following query seeks the variable `tp` from the `era5_land_reanalysis_dev` dataset, covering the period between January 1, 1959, and December 31, 1970, for the bounding box that delimits the three polygons mentioned earlier.\n", + "\n", + "Additional steps are carried out in the process, including converting to the local time zone." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "query = {\n", + " \"datasets\": {\"era5_land_reanalysis_dev\": {\"variables\": [\"tp\"]}},\n", + " \"space\": {\n", + " \"clip\": \"bbox\", # bbox, point or polygon\n", + " \"geometry\": gdf,\n", + " },\n", + " \"time\": {\n", + " \"start\": \"1959-01-01\",\n", + " \"end\": \"1970-12-31\",\n", + " \"timezone\": \"America/Montreal\",\n", + " },\n", + "}\n", + "\n", + "\n", + "xds = xd.Query(**query)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# FIXME: This query does not return TP - instead returns d2m, sf, sp, ssrd, strd, u10, v10\n", + "\n", + "xds.data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's find out which day (24-hour period) was the rainiest in the entire region for the data retrieved in previous cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# indexer = (\n", + "# xds.data.sel(source=\"era5_land_reanalysis_dev\")\n", + "# .tp.sum([\"latitude\", \"longitude\"])\n", + "# .rolling(time=24)\n", + "# .sum()\n", + "# .argmax(\"time\")\n", + "# .values\n", + "# )\n", + "\n", + "# xds.data.isel(time=indexer).time.dt.date.values.tolist()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's visualise the evolution of the hourly precipitation during that day. Note that each image (raster) delimits exactly the bounding box required to cover all polygons in the query. Please note that for full interactivity, running the code in a Jupyter Notebook is necessary.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# da = xds.data.tp.isel(time=slice(indexer - 24, indexer))\n", + "# # da = da.where(da>0.0001, drop=True)\n", + "\n", + "# (da * 1000).sel(source=\"era5_land_reanalysis_dev\").squeeze().hvplot.quadmesh(\n", + "# width=750,\n", + "# height=450,\n", + "# geo=True,\n", + "# tiles=\"ESRI\",\n", + "# groupby=[\"time\"],\n", + "# legend=\"top\",\n", + "# cmap=\"gist_ncar\",\n", + "# widget_location=\"bottom\",\n", + "# widget_type=\"scrubber\",\n", + "# dynamic=False,\n", + "# clim=(0.01, 10),\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Query hydrological datasets\n", + "Hydrological queries are still being tested and output format is likely to change. Stay tuned!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# FIXME: MELCC not yet supported.\n", + "\n", + "# query = {\"datasets\": \"melcc\"}\n", + "# xds = xd.Query(**query)\n", + "# xds.data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "query = {\"datasets\": \"hydat\"}\n", + "xds = xd.Query(**query)\n", + "xds.data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# FIXME: No such keys found for query\n", + "\n", + "# query = {\n", + "# \"datasets\": {\n", + "# \"hydrometric\": {\n", + "# \"variables\": [\"streamflow\", \"t2m_nanmax\", \"t2m_nanmin\", \"tp_nansum\"],\n", + "# \"id\": [\"01010070\", \"01016500\", \"01017290\", \"02*\"],\n", + "# }\n", + "# },\n", + "# }\n", + "# xds = xd.Query(**query)\n", + "# ds_hydro = xds.data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# ds_hydro" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import panel as pn\n", + "\n", + "# id1 = pn.widgets.Select(value=\"01010070\", options=list(xds.data.id.values), name=\"id\")\n", + "# source = pn.widgets.Select(\n", + "# value=\"hydrometric\", options=list(xds.data.source.values), name=\"source\"\n", + "# )\n", + "\n", + "\n", + "# @pn.depends(id1, source)\n", + "# def plot_hydrograph_and_weather(id1, source):\n", + "# da = ds_hydro.sel(id=id1, source=source)\n", + "# dx = da[\"streamflow\"].dropna(\"time\", how=\"any\")\n", + "\n", + "# trace1 = da[\"streamflow\"].hvplot(\n", + "# grid=True,\n", + "# widget_location=\"bottom\",\n", + "# color=\"black\",\n", + "# xlim=(dx.time[0].values, dx.time[-1].values),\n", + "# title=f\"Daily streamflow at location {id1}\",\n", + "# width=750,\n", + "# height=300,\n", + "# )\n", + "# trace2 = da[[\"t2m_nanmax\", \"t2m_nanmin\"]].hvplot(\n", + "# grid=True,\n", + "# widget_location=\"bottom\",\n", + "# color=[\"red\", \"blue\"],\n", + "# xlim=(dx.time[0].values, dx.time[-1].values),\n", + "# title=f\"Daily minimum and maximum temperature at location {id1}\",\n", + "# width=750,\n", + "# height=300,\n", + "# )\n", + "\n", + "# trace3 = da[[\"tp_nansum\"]].hvplot(\n", + "# grid=True,\n", + "# color=[\"turquoise\"],\n", + "# xlim=(dx.time[0].values, dx.time[-1].values),\n", + "# title=f\"Daily precipitation at location {id1}\",\n", + "# width=750,\n", + "# height=300,\n", + "# )\n", + "\n", + "# return pn.Column(trace1, trace2, trace3)\n", + "\n", + "\n", + "# pn.Row(\n", + "# pn.Column(\"## Hydrometric Data Explorer\", id1, source, plot_hydrograph_and_weather)\n", + "# )" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/readme.rst b/docs/readme.rst similarity index 100% rename from doc/readme.rst rename to docs/readme.rst diff --git a/docs/usage.rst b/docs/usage.rst new file mode 100644 index 0000000..fd155b8 --- /dev/null +++ b/docs/usage.rst @@ -0,0 +1,9 @@ +========== +User Guide +========== + +In this user guide, you will find detailed descriptions and examples that describe many common tasks that you can accomplish with xdatasets. + +.. note:: + + This user guide is a work in progress. If you have any questions or suggestions, please feel free to open an issue on the `xdatasets GitHub Issues page `_. diff --git a/environment-dev.yml b/environment-dev.yml new file mode 100644 index 0000000..4755795 --- /dev/null +++ b/environment-dev.yml @@ -0,0 +1,38 @@ +name: xdatasets +channels: + - conda-forge +dependencies: + - python >=3.9,<3.12 + - bottleneck + - clisops + - dask + - geopandas + - intake + - intake-geopandas + - intake-xarray >=0.6.1 + - ipykernel + - ipython + - jsonpickle + - matplotlib-base + - pint >=0.10 + - tqdm + - xagg + - xarray >=2023.1 + - zarr + # Dev tools and testing + - black >=23.11.0 + - blackdoc >=0.3.9 + - bump-my-version >=0.12.0 + - coverage >=6.2.0,<7.0.0 + - coveralls >=3.3.1 + - flake8 >=6.1.0 + - flake8-rst-docstrings >=0.3.0 + - flit + - isort >=5.12.0 + - pip >=23.1.2 + - pre-commit >=3.3.2 + - pytest >=7.3.1 + - pytest-cov>=4.0.0 + - tokenize-rt>=3.2.0 + - tox >=4.5.1 + - watchdog >=3.0.0 diff --git a/environment-docs.yml b/environment-docs.yml new file mode 100644 index 0000000..2403b0f --- /dev/null +++ b/environment-docs.yml @@ -0,0 +1,32 @@ +name: xdatasets +channels: +- conda-forge +- defaults +dependencies: + - python >=3.9,<3.10 + - cartopy + - distributed >=2.0 + - furo + - geoviews + - hvplot + - intake + - intake-geopandas + - intake-xarray >=0.6.1 + - ipykernel + - ipython + - jupyter_client + - matplotlib + - nbsphinx + - nc-time-axis + - netCDF4 + - pandoc + - panel + - pooch + - s3fs + - sphinx + - sphinx >=7.0.0 + - sphinx-click + - sphinx-codeautolink + - sphinx-copybutton + - sphinxcontrib-confluencebuilder + - xagg diff --git a/environment.yml b/environment.yml deleted file mode 100644 index 459e7a4..0000000 --- a/environment.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: xdatasets -channels: - - conda-forge -dependencies: - - python=3.9 - - IPyKernel - - xarray - - dask - - netCDF4 - - bottleneck - - s3fs - - zarr - - cartopy - - geoviews - - rioxarray - - matplotlib - - tqdm - - intake - - geopandas - - hvplot - - intake-xarray=0.6.1 - - intake-geopandas - - nbsphinx - - sphinx - - pip: - - sphinx-pangeo-theme - - furo \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..af27f70 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,279 @@ +[build-system] +requires = ["flit_core >=3.8,<4"] +build-backend = "flit_core.buildapi" + +[project] +name = "xdatasets" +authors = [ + {name = "Sebastien Langlois", email = "sebastien.langlois62@gmail.com"} +] +maintainers = [ + {name = "Trevor James Smith", email = "smith.trevorj@ouranos.ca"} +] +readme = {file = "README.rst", content-type = "text/x-rst"} +requires-python = ">=3.8.0" +keywords = [ + "xdatasets", + "hydrology", + "meteorology", + "climate", + "climatology", + "netcdf", + "gridded analysis" +] +license = {file = "LICENSE"} +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Natural Language :: English", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Topic :: Scientific/Engineering :: Hydrology", + "Programming Language :: Python :: Implementation :: CPython" +] +dynamic = ["description", "version"] +dependencies = [ + "bottleneck>=1.3.1", + # This is differently named in conda-forge + "cf-xarray>=0.6.1", + "cftime>=1.4.1", + "clisops>=0.9.2", + "dask[array]>=2.6", + "geopandas", + "intake", + "intake-xarray>=0.6.1", + "ipython", + "jsonpickle", + "numba", + "numpy>=1.16", + "pandas>=0.23", + "pint>=0.10", + "scipy>=1.2", + "tqdm", + # FIXME: This needs to be revisited. `xagg` should be an optional dependency, reliant on Anaconda. + # "xagg-no-xesmf-deps", + # "xagg", + "xarray>=2023.1", + "zarr>=2.11.1" +] + +[project.optional-dependencies] +dev = [ + # Dev tools and testing + "black[jupyter]>=23.11.0", + "blackdoc>=0.3.9", + "bump-my-version>=0.12.0", + "coverage", + "coverage>=6.2.2,<7.0.0", + "coveralls>=3.3.1", + "flake8-alphabetize>=0.0.21", + "flake8-rst-docstrings>=0.3.0", + "flake8>=6.1.0", + "flit", + "ipython<8.0", # this needed when testing see #1005 + "isort>=5.12.0", + "mypy", + "nbval", + "pip>=23.1.2", + "pre-commit>=3.3.2", + "pylint", + "pytest-cov>=4.0.0", + "pytest>=7.3.1", + "tokenize-rt", + "tox>=4.5.1", + "watchdog>=3.0.0", + "xdoctest" +] +docs = [ + # Documentation and examples + "cartopy", + "distributed>=2.0", + "furo", + "geoviews", + "hvplot", + "ipykernel", + "ipython", + "jupyter_client", + "matplotlib", + "nbsphinx", + "nbsphinx", + "nc-time-axis", + "netCDF4", + "pandoc", + "panel", + "pooch", + "s3fs", + "sphinx-codeautolink", + "sphinx-copybutton", + "sphinx-rtd-theme>=1.0", + "sphinx>=7.0", + "sphinxcontrib-confluencebuilder" +] + +[project.urls] +"Homepage" = "https://hydrologie.github.io/xdatasets/" +"Changelog" = "https://xdatasets.readthedocs.io/en/stable/Changes.html" +"Source" = "https://github.com/hydrologie/xdatasets" +"Issue tracker" = "https://github.com/hydrologie/xdatasets/issues" + +[tool] + +[tool.black] +target-version = [ + "py38", + "py39", + "py310", + "py311" +] + +[tool.bumpversion] +current_version = "0.3.0" +commit = true +tag = false +tag_name = "{new_version}" +allow_dirty = false +serialize = ["{major}.{minor}.{patch}"] +parse = "(?P\\d+)\\.(?P\\d+)\\.(?P\\d+)" + +[[tool.bumpversion.files]] +filename = "xdatasets/__init__.py" +search = "__version__ = \"{current_version}\"" +replace = "__version__ = \"{new_version}\"" + +[[tool.bumpversion.files]] +filename = "tests/test_xdatasets.py" +search = "__version__ = \"{current_version}\"" +replace = "__version__ = \"{new_version}\"" + +[[tool.bumpversion.files]] +filename = ".cruft.json" +search = "\"version\": \"{current_version}\"" +replace = "\"version\": \"{new_version}\"" + +[tool.coverage.run] +relative_files = true +include = ["xdatasets/*"] +omit = ["tests/*.py"] + +[tool.flit.sdist] +include = [ + ".zenodo.json", + "AUTHORS.rst", + "CHANGES.rst", + "CONTRIBUTING.rst", + "LICENSE", + "Makefile", + "README.rst", + "docs/notebooks/*.ipynb", + "environment-dev.yml", + "environment-docs.yml", + "docs/_static/_images/*.gif", + "docs/_static/_images/*.jpg", + "docs/_static/_images/*.png", + "docs/_static/_images/*.rst", + "docs/Makefile", + "docs/conf.py", + "docs/make.bat", + "tests/*.py", + "tox.ini", + "xdatasets" +] +exclude = [ + "*.py[co]", + "__pycache__", + ".coveralls.yml", + ".cruft.json", + ".editorconfig", + ".flake8", + ".gitignore", + ".nojekyll", + ".pre-commit-config.yaml", + ".readthedocs.yml", + ".yamllint.yaml", + "docs/_*", + "docs/apidoc/modules.rst", + "docs/apidoc/xdatasets*.rst" +] + +[tool.isort] +profile = "black" +py_version = 38 + +[tool.mypy] +python_version = 3.8 +show_error_codes = true +warn_return_any = true +warn_unused_configs = true + +[[tool.mypy.overrides]] +module = [] +ignore_missing_imports = true + +[tool.pytest.ini_options] +addopts = [ + "--verbose", + "--color=yes" +] +filterwarnings = ["ignore::UserWarning"] +testpaths = "tests" + +[tool.ruff] +src = [""] +line-length = 150 +target-version = "py38" +exclude = [ + ".eggs", + ".git", + "build", + "docs", + "tests" +] +ignore = [ + "D100", # Missing docstring in public module + "D101", # Missing docstring in public class + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D104", # Missing docstring in public package + "D105", # Missing docstring in magic method + "D400", # First line should end with a period + "D401", # First line should be in imperative mood + "E722" # do not use bare except, specify exception instead +] +select = [ + "C9", + "D", + "E", + "F", + "W" +] + +[tool.ruff.flake8-bandit] +check-typed-exception = true + +[tool.ruff.format] +line-ending = "auto" + +[tool.ruff.isort] +known-first-party = ["xdatasets"] +case-sensitive = true +detect-same-package = false +lines-after-imports = 1 +no-lines-before = ["future", "standard-library"] + +[tool.ruff.mccabe] +max-complexity = 15 + +[tool.ruff.per-file-ignores] +"xdatasets/**/__init__.py" = ["F401", "F403"] + +[tool.ruff.pycodestyle] +max-doc-length = 180 + +[tool.ruff.pydocstyle] +convention = "numpy" diff --git a/requirements_dev.txt b/requirements_dev.txt deleted file mode 100644 index 9715bc7..0000000 --- a/requirements_dev.txt +++ /dev/null @@ -1,32 +0,0 @@ -# dev tools and testing -black -bump2version -coverage -flake8 -flake8-rst-docstrings -ipython<8.0 # this needed when testing see #1005 -mypy -nbval -pre-commit>=2.9 -pydocstyle>=5.1.1 -pylint -pytest -pytest-cov -pytest-runner -tokenize-rt -tox -wheel -xdoctest -yamale -yamllint -# Documentation and examples -distributed>=2.0 -ipykernel -matplotlib -nbsphinx -nc-time-axis -netCDF4 -pooch -sphinx -sphinx-rtd-theme -sphinxcontrib-confluencebuilder diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 797ab1e..0000000 --- a/setup.cfg +++ /dev/null @@ -1,21 +0,0 @@ -[bumpversion] -current_version = 0.2.10 -commit = True -tag = True - -[bumpversion:file:setup.py] -search = version='{current_version}' -replace = version='{new_version}' - -[bumpversion:file:xdatasets/__init__.py] -search = __version__ = '{current_version}' -replace = __version__ = '{new_version}' - -[bdist_wheel] -universal = 1 - -[flake8] -exclude = docs - -[tool:pytest] -collect_ignore = ['setup.py'] diff --git a/setup.py b/setup.py deleted file mode 100644 index 4bba0c6..0000000 --- a/setup.py +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env python -"""The setup script.""" -import re - -from setuptools import find_packages, setup - -NAME = "xdatasets" -DESCRIPTION = "Easy acess to earth observation datasets with xarray." -URL = "https://github.com/hydrologie/xdatasets'" -AUTHOR = "Sebastien Langlois" -AUTHOR_EMAIL = "sebastien.langlois62@gmail.com" -REQUIRES_PYTHON = ">=3.8.0" -LICENSE = "MIT license" - -with open('README.rst') as readme_file: - readme = readme_file.read() - -with open('HISTORY.rst') as history_file: - history = history_file.read() - -requirements = [ - "bottleneck>=1.3.1", - "cf-xarray>=0.6.1", - "cftime>=1.4.1", - "clisops>=0.9.2", - "dask[array]>=2.6", - "intake-xarray>=0.6.1", - "jsonpickle", - "numba", - "numpy>=1.16", - "pandas>=0.23", - "pint>=0.10", - "pyyaml", - "s3fs>=2022.7.0", - "geopandas", - "tqdm", - "scipy>=1.2", - "xarray>=0.17", - "zarr>=2.11.1", - "xagg-no-xesmf-deps" -] - - -dev_requirements = [] -with open("requirements_dev.txt") as dev: - for dependency in dev.readlines(): - dev_requirements.append(dependency) - -KEYWORDS = "xdatasets hydrology meteorology climate climatology netcdf gridded analysis" - -setup( - author=AUTHOR, - author_email=AUTHOR_EMAIL, - classifiers=[ - "Development Status :: 3 - Alpha", - "Intended Audience :: Developers", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: MIT License", - "Natural Language :: English", - "Operating System :: OS Independent", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Topic :: Scientific/Engineering :: Hydrology", - ], - description=DESCRIPTION, - python_requires=REQUIRES_PYTHON, - install_requires=requirements, - license=LICENSE, - long_description="Xdatasets", - long_description_content_type="text/x-rst", - include_package_data=True, - keywords=KEYWORDS, - name=NAME, - packages=find_packages(), - extras_require={"dev": dev_requirements}, - url=URL, - version='0.2.10', - zip_safe=False, -) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..87ca129 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Unit test package for xdatasets.""" diff --git a/tests/test_xdatasets.py b/tests/test_xdatasets.py new file mode 100644 index 0000000..c512b85 --- /dev/null +++ b/tests/test_xdatasets.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python + +"""Tests for `xdatasets` package.""" + +import pathlib +import pkgutil + +import pytest + +# import xdatasets + + +@pytest.fixture +def response(): + """Sample pytest fixture. + + See more at: https://doc.pytest.org/en/latest/explanation/fixtures.html + """ + # import requests + # return requests.get('https://github.com/audreyr/cookiecutter-pypackage') + + +def test_content(response): + """Sample pytest test function with the pytest fixture as an argument.""" + # from bs4 import BeautifulSoup + # assert 'GitHub' in BeautifulSoup(response.content).title.string + + +def test_package_metadata(): + """Test the package metadata.""" + project = pkgutil.get_loader("xdatasets").get_filename() + + metadata = pathlib.Path(project).resolve().parent.joinpath("__init__.py") + + with open(metadata) as f: + contents = f.read() + assert """Sebastien Langlois""" in contents + assert '__email__ = "sebastien.langlois62@gmail.com"' in contents + assert '__version__ = "0.3.0"' in contents diff --git a/tox.ini b/tox.ini index 6b37fb6..b963aae 100644 --- a/tox.ini +++ b/tox.ini @@ -1,26 +1,57 @@ [tox] -envlist = py36, py37, py38, flake8 +min_version = 4.0 +envlist = + lint + py{38,39,310,311} + docs + coveralls +requires = + flit + pip >= 23.3.0 +opts = + --verbose -[travis] -python = - 3.8: py38 - 3.7: py37 - 3.6: py36 +[testenv:lint] +skip_install = True +deps = + black[jupyter] + blackdoc + isort + flake8 + ruff +commands = + make lint +allowlist_externals = + make -[testenv:flake8] -basepython = python -deps = flake8 -commands = flake8 xdatasets tests +[testenv:docs] +extras = + docs +commands = + make --directory=docs clean html +allowlist_externals = + make [testenv] setenv = + PYTEST_ADDOPTS = "--color=yes" PYTHONPATH = {toxinidir} +passenv = + CI + COVERALLS_* + GITHUB_* +extras = + dev +download = True +install_command = python -m pip install --no-user {opts} {packages} deps = - -r{toxinidir}/requirements_dev.txt ; If you want to make tox run the tests with the same versions, create a ; requirements.txt with the pinned versions and uncomment the following line: ; -r{toxinidir}/requirements.txt +commands_pre = + pip list + pip check commands = - pip install -U pip - pytest --basetemp={envtmpdir} - + pytest --cov xdatasets + # Coveralls requires access to a repo token set in .coveralls.yml in order to report stats + coveralls: - coveralls diff --git a/xdatasets/__init__.py b/xdatasets/__init__.py index 3d59df1..c0ffa9c 100644 --- a/xdatasets/__init__.py +++ b/xdatasets/__init__.py @@ -1,6 +1,8 @@ -from .tutorial import load_dataset, list_available_datasets +"""Easy access to Earth observation datasets with xarray.""" + from .core import Query +from .tutorial import list_available_datasets, load_dataset __author__ = """Sebastien Langlois""" __email__ = "sebastien.langlois62@gmail.com" -__version__ = '0.2.10' \ No newline at end of file +__version__ = "0.3.0" diff --git a/xdatasets/core.py b/xdatasets/core.py index 9597bcc..716cc11 100644 --- a/xdatasets/core.py +++ b/xdatasets/core.py @@ -1,87 +1,82 @@ -from typing import Sequence, Tuple, Union, Dict, List, Optional, Callable, Any -import warnings import logging +import logging.config +import warnings +from typing import Any, Callable, Dict, List, Optional, Union -import intake import geopandas as gpd +import intake import xarray as xr -import hvplot.xarray -import hvplot.pandas -from .validations import _validate_space_params -from .workflows import climate_request, hydrometric_request, user_provided_dataset from .scripting import LOGGING_CONFIG from .utils import cache_catalog +from .validations import _validate_space_params +from .workflows import climate_request, hydrometric_request, user_provided_dataset logging.config.dictConfig(LOGGING_CONFIG) logger = logging.getLogger(__name__) -url_path = 'https://raw.githubusercontent.com/hydrocloudservices/catalogs/main/catalogs/main.yaml' +url_path = "https://raw.githubusercontent.com/hydrocloudservices/catalogs/main/catalogs/main.yaml" __all__ = ["Query"] class Query: + """The Query class. + + The Query interface facilitates access to analysis-ready earth observation datasets and allows for + spatiotemporal operations to be performed based on user queries. - """"The Query interface facilitates access to analysis-ready - earth observation datasets and allows for spatiotemporal - operations to be performed based on user queries. - - Parameters + Attributes ---------- datasets : str, list, dict-like - - If str, a dataset name, i.e.: era5_land_reanalysis - - If list, a list of dataset names, i.e.: [era5_single_levels_reanalysis, era5_land_reanalysis] - - If dictionary, it should map dataset names to their corresponding requested - content such as some desired variables. This allows more flexibility in the request. - i.e.: {era5_land_reanalysis: {'variables': ['t2m', 'tp]}, - era5_single_levels_reanalysis: {'variables': 't2m'} - } - Currently, accepted key, value pairs for a mapping argument include the following: - =========== ============== - Key Variables - =========== ============== - variables str, List[str] - =========== ============== - - The list of available datasets in this library can be accessed here: - # Coming soon! + If a str, a dataset name, i.e.: era5_land_reanalysis. + If a list, a list of dataset names, i.e.: [era5_single_levels_reanalysis, era5_land_reanalysis]. + If a dictionary, it should map dataset names to their corresponding requested + content such as some desired variables. See the notes below for more details. The list of available datasets in this library is coming soon! space : dict-like A dictionary that maps spatial parameters with their corresponding value. - More information on accepted key/value pairs : :py:meth:`~xdatasets.Query._resolve_space_params` + More information on accepted key/value pairs : :py:meth:`~xdatasets.Query._resolve_space_params` time : dict-like A dictionary that maps temporal parameters with their corresponding value. - More information on accepted key/value pairs : :py:meth:`~xdatasets.Query._resolve_time_params` - catalog_path: str - URL for the intake catalog which provides access to the datasets. While - this library provides its own intake catalog, users have the option to - provide their own catalog, which can be particularly beneficial for + More information on accepted key/value pairs : :py:meth:`~xdatasets.Query._resolve_time_params` + catalog_path : str + URL for the intake catalog which provides access to the datasets. While this library provides its own + intake catalog, users have the option to provide their own catalog, which can be particularly beneficial for private datasets or if different configurations are needed. + Notes + ----- + The dictionary approach allows more flexibility in the request. i.e.: + + >>> query = { + ... era5_land_reanalysis: {"variables": ["t2m", "tp"]}, + ... era5_single_levels_reanalysis: {"variables": "t2m"}, + ... } + + Currently, accepted key, value pairs for a mapping argument include the following: + + >>> {"variables": Union[str, List[str]]} + Examples -------- Create data: >>> sites = { - ... 'Montreal' : (45.508888, -73.561668), - ... 'New York': (40.730610, -73.935242), - ... 'Miami': (25.761681, -80.191788) + ... "Montreal": (45.508888, -73.561668), + ... "New York": (40.730610, -73.935242), + ... "Miami": (25.761681, -80.191788), ... } >>> query = { - ... "datasets": 'era5_land_reanalysis_dev', - ... "space": { - ... "clip": "point", - ... "geometry": sites - ... }, - ... "time": { + ... "datasets": "era5_land_reanalysis_dev", + ... "space": {"clip": "point", "geometry": sites}, + ... "time": { ... "timestep": "D", - ... "aggregation": {"tp": np.nansum, - ... "t2m": np.nanmean}, - ... "start": '1950-01-01', - ... "end": '1955-12-31', - ... "timezone": 'America/Montreal', + ... "aggregation": {"tp": np.nansum, "t2m": np.nanmean}, + ... "start": "1950-01-01", + ... "end": "1955-12-31", + ... "timezone": "America/Montreal", ... }, ... } >>> xds = xd.Query(**query) @@ -101,16 +96,16 @@ class Query: pangeo-forge:inputs_hash: 1622c0abe9326bfa4d6ee6cdf817fccb1ef1661046f30f... pangeo-forge:recipe_hash: f2b6c75f28693bbae820161d5b71ebdb9d740dcdde0666... pangeo-forge:version: 0.9.4 - - """ - def __init__(self, - datasets: Union[str, List[str], Dict[str, Union[str, List[str]]]], - space: Dict[str, Union[str, List[str]]] = dict(), - time: Dict[str, Union[str, List[str]]] = dict(), - catalog_path: str = url_path - )-> None: + """ + def __init__( + self, + datasets: Union[str, List[str], Dict[str, Union[str, List[str]]]], + space: Dict[str, Union[str, List[str]]] = dict(), + time: Dict[str, Union[str, List[str]]] = dict(), + catalog_path: str = url_path, + ) -> None: # We cache the catalog's yaml files for easier access behind corporate firewalls catalog_path = cache_catalog(catalog_path) @@ -118,22 +113,17 @@ def __init__(self, self.datasets = datasets self.space = self._resolve_space_params(**space) self.time = self._resolve_time_params(**time) - - self.load_query(datasets=self.datasets, - space=self.space, - time=self.time) - - - def _resolve_space_params(self, - clip: str = None, - geometry: Union[Dict[str, tuple], gpd.GeoDataFrame] = None, - averaging: Optional[bool] = False, - unique_id: Optional[str] = None - )-> Dict: - - - """ - Resolves and validates user-provided space params + + self.load_query(datasets=self.datasets, space=self.space, time=self.time) + + def _resolve_space_params( + self, + clip: str = None, + geometry: Union[Dict[str, tuple], gpd.GeoDataFrame] = None, + averaging: Optional[bool] = False, + unique_id: Optional[str] = None, + ) -> Dict: + """Resolves and validates user-provided space params. Parameters ---------- @@ -141,53 +131,54 @@ def _resolve_space_params(self, Which kind of clip operation to perform on geometry. Possible values are one of "polygon", "point" or "bbox". geometry : gdf.DataFrame, Dict[str, Tuple] - Geometry/geometries on which to perform spatial operations + Geometry/geometries on which to perform spatial operations averaging : bool, optional Whether to spatially average the arrays within a geometry or not unique_id : str, optional a column name, if gdf.DataFrame is provided, to identify each unique geometry """ - space = locals() - space.pop('self') + space.pop("self") assert _validate_space_params(**space) - - if isinstance(geometry, gpd.GeoDataFrame ): + + if isinstance(geometry, gpd.GeoDataFrame): geometry = geometry.reset_index(drop=True) # We created a new dict based on user-provided parameters # TODO : adapt all parameters before requesting any operations on datasets - args = {'clip': clip, - 'geometry': geometry, - 'averaging': averaging, - 'unique_id': unique_id} + args = { + "clip": clip, + "geometry": geometry, + "averaging": averaging, + "unique_id": unique_id, + } return args - def _resolve_time_params(self, - timestep: Optional[str] = None, - aggregation: Optional[Dict[str, Union[Callable[..., Any], List[Callable[..., Any]]]]] = None, - start: Optional[bool] = None, - end: Optional[str] = None, - timezone: Optional[str] = None, - minimum_duration: Optional[str] = None, - ) -> Dict: - - - """ - Resolves and validates user-provided time params + def _resolve_time_params( + self, + timestep: Optional[str] = None, + aggregation: Optional[ + Dict[str, Union[Callable[..., Any], List[Callable[..., Any]]]] + ] = None, + start: Optional[bool] = None, + end: Optional[str] = None, + timezone: Optional[str] = None, + minimum_duration: Optional[str] = None, + ) -> Dict: + """Resolves and validates user-provided time params. Parameters ---------- timestep : str, optional In which time step should the data be returned - Possible values : http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases + Possible values: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases aggregation : Dict[str, callable], optional Mapping that associates a variable name with the aggregation function to be applied to it. Function which can be called in the form `f(x, axis=axis, **kwargs)` to return the result of reducing an - np.ndarray over an integer valued axis. This parameter is required + np.ndarray over an integer valued axis. This parameter is required should the `timestep` argument be passed. start : str, optional Start date of the selected time period. @@ -202,31 +193,33 @@ def _resolve_time_params(self, Possible values are listed here: https://gist.github.com/heyalexej/8bf688fd67d7199be4a1682b3eec7568 minimum_duration : str, optional - Minimum duration of a time series (id) in order to be kept - Possible values : http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases + Minimum duration of a time series (id) in order to be kept + Possible values: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases """ - space = locals() - space.pop('self') + space.pop("self") + + # assert _validate_time_params(**space) - #assert _validate_time_params(**space) - # We created a new dict based on user-provided parameters # TODO : adapt all parameters before requesting any operations on datasets - args = {'timestep': timestep, - 'aggregation': aggregation, - 'start': start, - 'end': end, - 'timezone': timezone, - 'minimum_duration': minimum_duration} + args = { + "timestep": timestep, + "aggregation": aggregation, + "start": start, + "end": end, + "timezone": timezone, + "minimum_duration": minimum_duration, + } return args - - def load_query(self, - datasets: Union[str, Dict[str, Union[str, List[str]]]], - space: Dict[str, Union[str, List[str]]], - time): - + + def load_query( + self, + datasets: Union[str, Dict[str, Union[str, List[str]]]], + space: Dict[str, Union[str, List[str]]], + time, + ): # Get all datasets in query if isinstance(datasets, str): datasets_name = [datasets] @@ -237,34 +230,41 @@ def load_query(self, # Load data for each dataset dsets = [] for dataset_name in datasets_name: - data = None + # data = None # FIXME: This is never used. kwargs = {} try: - variables_name = self.datasets[dataset_name]['variables'] + variables_name = self.datasets[dataset_name]["variables"] if isinstance(variables_name, str): variables_name = [variables_name] except: variables_name = None pass try: - kwargs = {k:v for k,v in self.datasets[dataset_name].items() if k not in ['variables']} + kwargs = { + k: v + for k, v in self.datasets[dataset_name].items() + if k not in ["variables"] + } except: pass - ds_one = self._process_one_dataset(dataset_name=dataset_name, - variables=variables_name, - space=space, - time=time, - **kwargs - ) + ds_one = self._process_one_dataset( + dataset_name=dataset_name, + variables=variables_name, + space=space, + time=time, + **kwargs, + ) dsets.append(ds_one) - + try: # Try naively merging datasets into single dataset ds = xr.merge(dsets) ds = ds except: - logging.warn("Couldn't merge datasets so we pass a dictionary of datasets. ") + logging.warn( + "Couldn't merge datasets so we pass a dictionary of datasets. " + ) # Look into passing a DataTree instead ds = dsets pass @@ -273,64 +273,40 @@ def load_query(self, return self - def _process_one_dataset(self, - dataset_name, - variables, - space, - time, - **kwargs): - + def _process_one_dataset(self, dataset_name, variables, space, time, **kwargs): data = None - if 'data' in kwargs: - data = kwargs['data'] + if "data" in kwargs: + data = kwargs["data"] - - if data != None and isinstance(data, xr.Dataset): - dataset_category = 'user-provided' + if data is not None and isinstance(data, xr.Dataset): + dataset_category = "user-provided" elif isinstance(dataset_name, str): - dataset_category = [category for category in self.catalog._entries.keys() - for name in self.catalog[category]._entries.keys() - if name == dataset_name][0] - - - if dataset_category in ['atmosphere']: + dataset_category = [ + category + for category in self.catalog._entries.keys() + for name in self.catalog[category]._entries.keys() + if name == dataset_name + ][0] + + if dataset_category in ["atmosphere"]: with warnings.catch_warnings(): warnings.simplefilter("ignore", category=RuntimeWarning) - ds = climate_request(dataset_name, - variables, - space, - time, - self.catalog) - - elif dataset_category in ['hydrology']: + ds = climate_request(dataset_name, variables, space, time, self.catalog) + + elif dataset_category in ["hydrology"]: with warnings.catch_warnings(): warnings.simplefilter("ignore", category=RuntimeWarning) - ds = hydrometric_request(dataset_name, - variables, - space, - time, - self.catalog, - **kwargs) - - elif dataset_category in ['user-provided']: + ds = hydrometric_request( + dataset_name, variables, space, time, self.catalog, **kwargs + ) + + elif dataset_category in ["user-provided"]: with warnings.catch_warnings(): warnings.simplefilter("ignore", category=RuntimeWarning) - ds = user_provided_dataset(dataset_name, - variables, - space, - time, - data) - - + ds = user_provided_dataset(dataset_name, variables, space, time, data) + return ds - + def bbox_clip(self, ds): - """ - """ return ds.where(~ds.isnull(), drop=True) - - - - - diff --git a/xdatasets/scripting.py b/xdatasets/scripting.py index f77381c..3302fef 100644 --- a/xdatasets/scripting.py +++ b/xdatasets/scripting.py @@ -1,7 +1,3 @@ -import pathlib -import sys -from datetime import datetime as dt - MiB = int(pow(2, 20)) _CONSOLE_FORMAT = "%(message)s" @@ -9,37 +5,31 @@ __all__ = ["LOGGING_CONFIG"] -LOGGING_CONFIG = { - 'version': 1, - 'disable_existing_loggers': False, - 'formatters': { - 'standard': { - 'format': _CONSOLE_FORMAT - }, +LOGGING_CONFIG = { + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "standard": {"format": _CONSOLE_FORMAT}, }, - 'handlers': { - 'default': { - 'level': 'INFO', - 'formatter': 'standard', - 'class': 'logging.StreamHandler', - 'stream': 'ext://sys.stdout', # Default is stderr + "handlers": { + "default": { + "level": "INFO", + "formatter": "standard", + "class": "logging.StreamHandler", + "stream": "ext://sys.stdout", # Default is stderr }, }, - 'loggers': { - '': { # root logger - 'handlers': ['default'], - 'level': 'WARNING', - 'propagate': False + "loggers": { + "": { # root logger + "handlers": ["default"], + "level": "WARNING", + "propagate": False, }, - 'xdatasets': { - 'handlers': ['default'], - 'level': 'INFO', - 'propagate': False + "xdatasets": {"handlers": ["default"], "level": "INFO", "propagate": False}, + "__main__": { # if __name__ == '__main__' + "handlers": ["default"], + "level": "DEBUG", + "propagate": False, }, - '__main__': { # if __name__ == '__main__' - 'handlers': ['default'], - 'level': 'DEBUG', - 'propagate': False - }, - } -} \ No newline at end of file + }, +} diff --git a/xdatasets/spatial.py b/xdatasets/spatial.py index 6ae5df6..dc90f96 100644 --- a/xdatasets/spatial.py +++ b/xdatasets/spatial.py @@ -1,78 +1,89 @@ -from typing import Sequence, Tuple, Union, Dict, List, Optional +import logging +import warnings -from clisops.core.subset import subset_shape, subset_time, create_mask, shape_bbox_indexer, subset_gridpoint -from clisops.core.average import average_shape +import pandas as pd import xarray as xr +from clisops.core.subset import shape_bbox_indexer, subset_gridpoint from tqdm import tqdm -import logging -import xagg_no_xesmf_deps as xa -import pandas as pd from .utils import HiddenPrints +try: + import xagg as xa +except ImportError: + warnings.warn("xagg is not installed. Please install it with `pip install xagg`") + xa = None + def bbox_ds(ds_copy, geom): indexer = shape_bbox_indexer(ds_copy, geom) da = ds_copy.isel(indexer) - da = da.chunk({'latitude':-1, 'longitude':-1}) + da = da.chunk({"latitude": -1, "longitude": -1}) return da -def clip_by_bbox(ds, - space, - dataset_name - ): +def clip_by_bbox(ds, space, dataset_name): logging.info(f"Spatial operations: processing bbox with {dataset_name}") - indexer = shape_bbox_indexer(ds, space['geometry']) + indexer = shape_bbox_indexer(ds, space["geometry"]) ds_copy = ds.isel(indexer).copy() return ds_copy - + + def create_weights_mask(da, poly): - - weightmap = xa.pixel_overlaps(da, poly, subset_bbox=True) + if xa is None: + raise ImportError( + "xagg is not installed. Please install it with `pip install xagg`" + ) - pixels = pd.DataFrame(index=weightmap.agg['pix_idxs'][0], - data=list(map(list, weightmap.agg['coords'][0])), - columns=['latitude','longitude'] - ) + weightmap = xa.pixel_overlaps(da, poly, subset_bbox=True) - weights = pd.DataFrame(index=weightmap.agg['pix_idxs'][0], - data=weightmap.agg['rel_area'][0][0].tolist(), - columns=['weights']) + pixels = pd.DataFrame( + index=weightmap.agg["pix_idxs"][0], + data=list(map(list, weightmap.agg["coords"][0])), + columns=["latitude", "longitude"], + ) + weights = pd.DataFrame( + index=weightmap.agg["pix_idxs"][0], + data=weightmap.agg["rel_area"][0][0].tolist(), + columns=["weights"], + ) df = pd.merge(pixels, weights, left_index=True, right_index=True) - return df.set_index(['latitude', 'longitude']).to_xarray() + return df.set_index(["latitude", "longitude"]).to_xarray() + def aggregate(ds_in, ds_weights): - return (ds_in*ds_weights.weights).sum(['latitude','longitude'], min_count=1) + return (ds_in * ds_weights.weights).sum(["latitude", "longitude"], min_count=1) -def clip_by_polygon(ds, - space, - dataset_name - ): +def clip_by_polygon(ds, space, dataset_name): # We are not using clisops for weighted averages because it is too unstable for now and requires conda environment. # We use a modified version of the xagg package from which we have removed the xesmf/esmpy dependency - - indexer = shape_bbox_indexer(ds, space['geometry']) + indexer = shape_bbox_indexer(ds, space["geometry"]) ds_copy = ds.isel(indexer).copy() - + arrays = [] - pbar = tqdm(space['geometry'].iterrows()) + pbar = tqdm(space["geometry"].iterrows()) for idx, row in pbar: - item = row[space['unique_id']] if space['unique_id'] != None and space['unique_id'] in row else idx - pbar.set_description(f"Spatial operations: processing polygon {item} with {dataset_name}") - - geom = space['geometry'].iloc[[idx]] + item = ( + row[space["unique_id"]] + if space["unique_id"] is not None and space["unique_id"] in row + else idx + ) + pbar.set_description( + f"Spatial operations: processing polygon {item} with {dataset_name}" + ) + + geom = space["geometry"].iloc[[idx]] da = bbox_ds(ds_copy, geom) # Average data array over shape - #da = average_shape(da, shape=geom) + # da = average_shape(da, shape=geom) with HiddenPrints(): - ds_weights = create_weights_mask(da.isel(time=0), geom) - if space['averaging'] is True: + ds_weights = create_weights_mask(da.isel(time=0), geom) + if space["averaging"] is True: da = aggregate(da, ds_weights) else: da = xr.merge([da, ds_weights]) @@ -80,31 +91,37 @@ def clip_by_polygon(ds, da = da.expand_dims({"geom": geom.index.values}) arrays.append(da) - data = xr.concat(arrays, dim='geom') + data = xr.concat(arrays, dim="geom") - if 'unique_id' in space: + if "unique_id" in space: try: data = data.swap_dims({"geom": space["unique_id"]}) - data = data.drop('geom') - - if space['unique_id'] not in data.coords: - data = data.assign_coords({space['unique_id']: (space['unique_id'], - space['geometry'][space['unique_id']])}) - except: + data = data.drop_vars("geom") + + if space["unique_id"] not in data.coords: + data = data.assign_coords( + { + space["unique_id"]: ( + space["unique_id"], + space["geometry"][space["unique_id"]], + ) + } + ) + except KeyError: pass return data def clip_by_point(ds, space, dataset_name): + # TODO : adapt logic for coordinate names - #TODO : adapt logic for coordinate names - - logger = logging.getLogger() logging.info(f"Spatial operations: processing points with {dataset_name}") - lat,lon = zip(*space['geometry'].values()) - data = subset_gridpoint(ds.rename({'latitude':'lat', 'longitude':'lon'}), lon=list(lon), lat=list(lat)) - data = data.rename({'lat':'latitude', 'lon':'longitude'}) + lat, lon = zip(*space["geometry"].values()) + data = subset_gridpoint( + ds.rename({"latitude": "lat", "longitude": "lon"}), lon=list(lon), lat=list(lat) + ) + data = data.rename({"lat": "latitude", "lon": "longitude"}) - data = data.assign_coords({'site': ('site', list(space['geometry'].keys()))}) - return data \ No newline at end of file + data = data.assign_coords({"site": ("site", list(space["geometry"].keys()))}) + return data diff --git a/xdatasets/temporal.py b/xdatasets/temporal.py index a6f0740..1c2e08e 100644 --- a/xdatasets/temporal.py +++ b/xdatasets/temporal.py @@ -1,115 +1,111 @@ -import xarray as xr -from tqdm import tqdm import numpy as np import pandas as pd +import xarray as xr +from tqdm import tqdm -def change_timezone(ds, - input_timezone, - output_timezone = None - ): - - if output_timezone == None: +def change_timezone(ds, input_timezone, output_timezone=None): + if output_timezone is None: output_timezone = input_timezone - time = ds['time'].to_index() + time = ds["time"].to_index() time_input_tz = time.tz_localize(input_timezone) time_output_tz = time_input_tz.tz_convert(output_timezone).tz_localize(None) - ds = ds.update({'time': time_output_tz}) - ds.attrs['timezone'] = output_timezone + ds = ds.update({"time": time_output_tz}) + ds.attrs["timezone"] = output_timezone return ds - -def temporal_aggregation(ds, - time, - dataset_name, - spatial_agg): - + + +def temporal_aggregation(ds, time, dataset_name, spatial_agg): ds_new = xr.Dataset(attrs=ds.attrs) ds_list = [] pbar = tqdm(ds.keys()) for var in pbar: - pbar.set_description(f"Temporal operations: processing {var} with {dataset_name}") + pbar.set_description( + f"Temporal operations: processing {var} with {dataset_name}" + ) # Verify if requested timestep is higher or lower or equal to dataset's native timestep - # if requested timestep is higher - if 'aggregation' in time and var in time['aggregation'].keys(): - operation = time['aggregation'][var] if var in time['aggregation'].keys() else None - operation = operation if isinstance(operation, list) else [operation] + # if requested timestep is higher + if "aggregation" in time and var in time["aggregation"].keys(): + operation = ( + time["aggregation"][var] if var in time["aggregation"].keys() else None + ) + operation = operation if isinstance(operation, list) else [operation] oper_list = [] for oper in operation: - var_name = f"{var}_{oper.__name__}" - da = ds[var].resample(time=time['timestep']).reduce(oper, dim='time').expand_dims({'time_agg':[oper.__name__], - 'spatial_agg': [spatial_agg], - 'timestep': [time['timestep']]}) - #da = da.transpose('id','time', 'timestep','time_agg','spatial_agg') + # var_name = f"{var}_{oper.__name__}" + da = ( + ds[var] + .resample(time=time["timestep"]) + .reduce(oper, dim="time") + .expand_dims( + { + "time_agg": [oper.__name__], + "spatial_agg": [spatial_agg], + "timestep": [time["timestep"]], + } + ) + ) + # da = da.transpose('id','time', 'timestep','time_agg','spatial_agg') oper_list.append(da) # ds_new = ds_new.merge(xr.concat(oper_list, dim='time_agg')) - ds_list.append(xr.concat(oper_list, dim='time_agg')) + ds_list.append(xr.concat(oper_list, dim="time_agg")) else: try: ds_new = ds_new.merge(ds[var]) except: pass - # TODO: return error if cannot merge for inconstitant query - + # TODO: return error if cannot merge for inconstant query + if ds_list: ds_new = xr.merge(ds_list) - - # if requested timestep is lower # bfill the timestep and add a warning # if requested timestep is equal : do nothing - # print(ds_new.tp) + # print(ds_new.tp) return ds_new -def ajust_dates(ds, - time): - """ - - """ +def ajust_dates(ds, time): + start = time["start"] + end = time["end"] - start = time['start'] - end = time['end'] - - - if start != None: + if start is not None: ds["start_date"] = xr.where( - ds.start_date < pd.Timestamp(start), - np.datetime64(start), - ds.start_date, - ) - - if end != None: + ds.start_date < pd.Timestamp(start), + np.datetime64(start), + ds.start_date, + ) + + if end is not None: ds["end_date"] = xr.where( - ds.end_date > pd.Timestamp(end), - np.datetime64(end), - ds.end_date, - ) - + ds.end_date > pd.Timestamp(end), + np.datetime64(end), + ds.end_date, + ) + return ds -# Only keep ids where at least 15 years of data is available +# Only keep ids where at least 15 years of data is available -def minimum_duration(ds, - time): +def minimum_duration(ds, time): + minimum_duration_value, unit = time["minimum_duration"] - minimum_duration_value, unit = time['minimum_duration'] + indexer = (ds.end_date - ds.start_date) > pd.to_timedelta( + minimum_duration_value, unit=unit + ) - indexer = (ds.end_date - ds.start_date) > pd.to_timedelta(minimum_duration_value, unit=unit) - if indexer.chunks is not None: indexer = indexer.compute() - return ds.where(indexer, - drop=True - ) + return ds.where(indexer, drop=True) diff --git a/xdatasets/tutorial.py b/xdatasets/tutorial.py index 0664332..9aca7e9 100644 --- a/xdatasets/tutorial.py +++ b/xdatasets/tutorial.py @@ -1,30 +1,31 @@ -import itertools +import uuid from functools import reduce -from xarray.core.formatting_html import _mapping_section, _load_static_files, format_dims, _obj_repr, _icon from html import escape -import uuid -from IPython.core.display import HTML +from IPython.core.display import HTML +from xarray.core.formatting_html import _icon, _mapping_section, _obj_repr -catalog_path = 'https://raw.githubusercontent.com/hydrocloudservices/catalogs/main/catalogs/main.yaml' +catalog_path = "https://raw.githubusercontent.com/hydrocloudservices/catalogs/main/catalogs/main.yaml" def open_dataset( - name, + name: str, **kws, ): - """ - Open a dataset from the online public repository (requires internet). + r"""Open a dataset from the online public repository (requires internet). + Available datasets: * ``"era5_reanalysis_single_levels"``: ERA5 reanalysis subset (t2m and tp) - * ``"cehq"``: CEHQ flow and water levels observations + * ``"cehq"``: CEHQ flow and water levels observations + Parameters ---------- name : str Name of the file containing the dataset. e.g. 'era5_reanalysis_single_levels' - **kws : dict, optional + \*\*kws : dict, optional Passed to xarray.open_dataset + See Also -------- xarray.open_dataset @@ -38,17 +39,23 @@ def open_dataset( ) from e cat = intake.open_catalog(catalog_path) - dataset_info = [(category, dataset_name) for category in cat._entries.keys() - for dataset_name in cat[category]._entries.keys() if dataset_name == name] - - data = reduce(lambda array, index : array[index], dataset_info, cat) - - if data.describe()['driver'][0] == 'geopandasfile': - data = data.read() - elif data.describe()['driver'][0] == 'zarr': - data = data.to_dask() + dataset_info = [ + (category, dataset_name) + for category in cat._entries.keys() + for dataset_name in cat[category]._entries.keys() + if dataset_name == name + ] + + data = reduce(lambda array, index: array[index], dataset_info, cat) + + if data.describe()["driver"][0] == "geopandasfile": + data = data.read() + elif data.describe()["driver"][0] == "zarr": + data = data.to_dask() else: - raise NotImplementedError(f'Dataset {name} is not available. Please request further datasets to our github issues pages') + raise NotImplementedError( + f"Dataset {name} is not available. Please request further datasets to our github issues pages" + ) return data @@ -64,7 +71,6 @@ def summarize_coords(variables): def summarize_variable(name, is_index=False, dtype=None): - cssclass_idx = " class='xr-has-index'" if is_index else "" name = escape(str(name)) @@ -85,10 +91,10 @@ def summarize_variable(name, is_index=False, dtype=None): f"{data_icon}" ) + def list_available_datasets(): - """ - Open, load lazily, and close a dataset from the public online repository - (requires internet). + """Open, load lazily, and close a dataset from the public online repository (requires internet). + See Also -------- open_dataset @@ -106,29 +112,37 @@ def list_available_datasets(): # This will need refactor if the catalog has more than 2 levels # list(itertools.chain.from_iterable([list(cat[name].keys()) for name in cat._entries.keys()])) - datasets_catalog = \ - {field: list(sorted(cat[field]._entries.keys())) for field in sorted(cat._entries.keys())} + datasets_catalog = { + field: list(sorted(cat[field]._entries.keys())) + for field in sorted(cat._entries.keys()) + } def add_section(datasets_catalog): - return [_mapping_section(datasets, name=field.capitalize(), - details_func=summarize_coords, - max_items_collapse=25, - expand_option_name="display_expand_coords") for field, datasets in datasets_catalog.items()] - - a = _obj_repr('', [f"
{escape('xdatasets.Catalog')}
"], - add_section(datasets_catalog)) + return [ + _mapping_section( + datasets, + name=field.capitalize(), + details_func=summarize_coords, + max_items_collapse=25, + expand_option_name="display_expand_coords", + ) + for field, datasets in datasets_catalog.items() + ] + + a = _obj_repr( + "", + [f"
{escape('xdatasets.Catalog')}
"], + add_section(datasets_catalog), + ) return HTML(a) def load_dataset(*args, **kwargs): - """ - Open, load lazily, and close a dataset from the online repository - (requires internet). + """Open, load lazily, and close a dataset from the online repository (requires internet). + See Also -------- open_dataset """ - return open_dataset(*args, **kwargs) - diff --git a/xdatasets/utils.py b/xdatasets/utils.py index 73f09cf..b7b0a89 100644 --- a/xdatasets/utils.py +++ b/xdatasets/utils.py @@ -1,37 +1,42 @@ -import datetime -import time +import os +import sys +import tempfile +import urllib.request from functools import reduce -import os, sys -import urllib from pathlib import Path -import tempfile + import intake -catalog_path = 'https://raw.githubusercontent.com/hydrocloudservices/catalogs/main/catalogs/main.yaml' +catalog_path = "https://raw.githubusercontent.com/hydrocloudservices/catalogs/main/catalogs/main.yaml" + def open_dataset( - name, + name: str, catalog, **kws, ): - """ - Open a dataset from the online public repository (requires internet). + r"""Open a dataset from the online public repository (requires internet). + + Notes + ----- Available datasets: - * ``"era5_reanalysis_single_levels"``: ERA5 reanalysis subset (t2m and tp) - * ``"cehq"``: CEHQ flow and water levels observations + `"era5_reanalysis_single_levels"`: ERA5 reanalysis subset (t2m and tp) + `"cehq"`: CEHQ flow and water levels observations + Parameters ---------- name : str Name of the file containing the dataset. e.g. 'era5_reanalysis_single_levels' - **kws : dict, optional + \*\*kws : dict, optional Passed to xarray.open_dataset + See Also -------- xarray.open_dataset """ try: - import intake + import intake # noqa: F401 except ImportError as e: raise ImportError( "tutorial.open_dataset depends on intake and intake-xarray to download and manage datasets." @@ -39,29 +44,35 @@ def open_dataset( ) from e cat = catalog - dataset_info = [(category, dataset_name) for category in cat._entries.keys() - for dataset_name in cat[category]._entries.keys() if dataset_name == name] - - data = reduce(lambda array, index : array[index], dataset_info, cat) + dataset_info = [ + (category, dataset_name) + for category in cat._entries.keys() + for dataset_name in cat[category]._entries.keys() + if dataset_name == name + ] + + data = reduce(lambda array, index: array[index], dataset_info, cat) # add proxy infos - proxies=urllib.request.getproxies() + proxies = urllib.request.getproxies() storage_options = data.storage_options - storage_options['config_kwargs']['proxies'] = proxies + storage_options["config_kwargs"]["proxies"] = proxies - if data.describe()['driver'][0] == 'geopandasfile': - data = data(storage_options=storage_options).read() - elif data.describe()['driver'][0] == 'zarr': + if data.describe()["driver"][0] == "geopandasfile": + data = data(storage_options=storage_options).read() + elif data.describe()["driver"][0] == "zarr": data = data(storage_options=storage_options).to_dask() else: - raise NotImplementedError(f'Dataset {name} is not available. Please request further datasets to our github issues pages') - return data + raise NotImplementedError( + f"Dataset {name} is not available. Please request further datasets to our github issues pages" + ) + return data class HiddenPrints: def __enter__(self): self._original_stdout = sys.stdout - sys.stdout = open(os.devnull, 'w') + sys.stdout = open(os.devnull, "w") def __exit__(self, exc_type, exc_val, exc_tb): sys.stdout.close() @@ -69,27 +80,28 @@ def __exit__(self, exc_type, exc_val, exc_tb): def cache_catalog(url): - """"Cache the catalog in the system's temporary folder for easier access. - This is espacially useful when working behind firewalls or if the remote server - containing the yaml files is down. Looks for http_proxy/https_proxy environment variable + """Cache the catalog in the system's temporary folder for easier access. + + This is especially useful when working behind firewalls or if the remote server + containing the yaml files is down. Looks for http_proxy/https_proxy environment variable if the request goes through a proxy. - + Parameters ---------- - url: str + url : str URL for the intake catalog which provides access to the datasets. While - this library provides its own intake catalog, users have the option to - provide their own catalog, which can be particularly beneficial for + this library provides its own intake catalog, users have the option to + provide their own catalog, which can be particularly beneficial for private datasets or if different configurations are needed. """ - proxies=urllib.request.getproxies() + proxies = urllib.request.getproxies() proxy = urllib.request.ProxyHandler(proxies) opener = urllib.request.build_opener(proxy) urllib.request.install_opener(opener) - - tmp_dir = os.path.join(tempfile.gettempdir(), 'catalogs') + + tmp_dir = os.path.join(tempfile.gettempdir(), "catalogs") Path(tmp_dir).mkdir(parents=True, exist_ok=True) - main_catalog_path = os.path.join(tmp_dir,os.path.basename(url)) + main_catalog_path = os.path.join(tmp_dir, os.path.basename(url)) try: urllib.request.urlretrieve(url, main_catalog_path) @@ -102,13 +114,12 @@ def cache_catalog(url): "proxy = 'http://:'" "os.environ['http_proxy'] = proxy" "os.environ['https_proxy'] = proxy" - ) from e + ) from e - for _, value in intake.open_catalog(os.path.join(tmp_dir, os.path.basename(url)))._entries.items(): + for _, value in intake.open_catalog( + os.path.join(tmp_dir, os.path.basename(url)) + )._entries.items(): path = f"{os.path.dirname(url)}/{os.path.basename(value.describe()['args']['path'])}" urllib.request.urlretrieve(path, os.path.join(tmp_dir, os.path.basename(path))) return main_catalog_path - - - diff --git a/xdatasets/validations.py b/xdatasets/validations.py index 8f02321..cb109f5 100644 --- a/xdatasets/validations.py +++ b/xdatasets/validations.py @@ -1,52 +1,56 @@ -from typing import Sequence, Tuple, Union, Dict, List, Optional +import logging +from typing import Dict, Optional, Union import geopandas as gpd -import logging -def _validate_space_params(clip: str, - geometry: Union[Dict[str, tuple], gpd.GeoDataFrame], - averaging: bool = False, - unique_id: Optional[str] = None): +def _validate_space_params( + clip: str, + geometry: Union[Dict[str, tuple], gpd.GeoDataFrame], + averaging: bool = False, + unique_id: Optional[str] = None, +): + _clip_available_methods = ["bbox", "point", "polygon", None] - _clip_available_methods = ['bbox', 'point', 'polygon', None] - if clip not in _clip_available_methods: raise ValueError(f"clip value '{clip}' is not one of {_clip_available_methods}") - + if not isinstance(averaging, bool): raise ValueError(f"averaging value '{averaging}' should be a boolean") - + if not (isinstance(unique_id, type(None)) or isinstance(unique_id, str)): raise ValueError(f"unique_id value '{unique_id}' should be a string") - - if unique_id != None: + + if unique_id is not None: if isinstance(geometry, gpd.GeoDataFrame) and unique_id not in geometry.columns: - message = (f"\nunique_id value '{unique_id}' was not found in gpd.GeoDataFrame \n" - f"so a random index will be used instead." - ) + message = ( + f"\nunique_id value '{unique_id}' was not found in gpd.GeoDataFrame \n" + f"so a random index will be used instead." + ) logging.warning(message) if averaging is True and not isinstance(geometry, gpd.GeoDataFrame): - message = (f"\naveraging value '{averaging}' is not necessary \n" - f"because geometry is not a GeoPandas GeoDataFrame.\n" - f"averaging value will be ignored." - ) + message = ( + f"\naveraging value '{averaging}' is not necessary \n" + f"because geometry is not a GeoPandas GeoDataFrame.\n" + f"averaging value will be ignored." + ) + logging.warning(message) + + if averaging is True and clip in ["point"]: + message = ( + f"\naveraging value '{averaging}' is not necessary \n" + f"because clip operation requested is on a {clip}.\n" + f"averaging value will be ignored." + ) logging.warning(message) - - if averaging is True and clip in ['point'] : - message = (f"\naveraging value '{averaging}' is not necessary \n" - f"because clip operation requested is on a {clip}.\n" - f"averaging value will be ignored." - ) - logging.warning(message) - + if unique_id is not None and not isinstance(geometry, gpd.GeoDataFrame): - message = (f"\nunique_id value '{unique_id}' is not necessary \n" - f"because geometry is not a GeoPandas GeoDataFrame.\n" - f"unique_id value will be ignored." - ) + message = ( + f"\nunique_id value '{unique_id}' is not necessary \n" + f"because geometry is not a GeoPandas GeoDataFrame.\n" + f"unique_id value will be ignored." + ) logging.warning(message) - - return True \ No newline at end of file + return True diff --git a/xdatasets/workflows.py b/xdatasets/workflows.py index 0e9e639..3bfb0c9 100644 --- a/xdatasets/workflows.py +++ b/xdatasets/workflows.py @@ -1,42 +1,30 @@ -from typing import Sequence, Tuple, Union, Dict, List, Optional - -from clisops.core.subset import subset_time, shape_bbox_indexer, subset_gridpoint -from clisops.core.average import average_shape -import intake -import geopandas as gpd -import xarray as xr -import numpy as np -import warnings -from tqdm import tqdm -import pandas as pd -from functools import reduce -from dask.distributed import Client import fnmatch -import logging import itertools -from .spatial import clip_by_polygon, clip_by_point, clip_by_bbox -from .temporal import change_timezone, temporal_aggregation, ajust_dates, minimum_duration -from .validations import _validate_space_params +import pandas as pd +import xarray as xr +from clisops.core.subset import subset_time + +from .spatial import clip_by_bbox, clip_by_point, clip_by_polygon +from .temporal import ( + ajust_dates, + change_timezone, + minimum_duration, + temporal_aggregation, +) from .utils import open_dataset -def climate_request(dataset_name, - variables, - space, - time, - catalog): - +def climate_request(dataset_name, variables, space, time, catalog): ds = open_dataset(dataset_name, catalog) rename_dict = { # dim labels (order represents the priority when checking for the dim labels) - "longitude": ["lon", "long", "x"], "latitude": ["lat", "y"], } - dims = list(ds.dims) + dims = list(ds.dims) for key, values in rename_dict.items(): for value in values: if value in dims: @@ -48,54 +36,45 @@ def climate_request(dataset_name, pass # Ajust timezone and then slice time dimension before moving on with spatiotemporal operations - if time["timezone"] != None: + if time["timezone"] is not None: try: # Assume UTC for now, will change when metadata database in up and running - ds = change_timezone(ds, 'UTC', time['timezone']) + ds = change_timezone(ds, "UTC", time["timezone"]) except: - pass # replace by error + pass # replace by error - if time["start"] != None or time["end"] != None: + if time["start"] is not None or time["end"] is not None: try: - start_time = time['start'] if 'start' in time else None - end_time = time['end'] if 'end' in time else None + start_time = time["start"] if "start" in time else None + end_time = time["end"] if "end" in time else None ds = subset_time(ds, start_date=start_time, end_date=end_time) except: - pass # replace by error + pass # replace by error # Spatial operations - if space['clip'] == 'polygon': - spatial_agg = 'polygon' + if space["clip"] == "polygon": + spatial_agg = "polygon" ds = clip_by_polygon(ds, space, dataset_name).load() - elif space['clip'] == 'point': - spatial_agg = 'point' + elif space["clip"] == "point": + spatial_agg = "point" ds = clip_by_point(ds, space, dataset_name).load() - elif space['clip'] == 'bbox': - spatial_agg = 'polygon' + elif space["clip"] == "bbox": + spatial_agg = "polygon" ds = clip_by_bbox(ds, space, dataset_name).load() - - if time["timestep"] != None and time['aggregation'] != None: - ds = temporal_aggregation(ds, - time, - dataset_name, - spatial_agg) + + if time["timestep"] is not None and time["aggregation"] is not None: + ds = temporal_aggregation(ds, time, dataset_name, spatial_agg) # Add source name to dataset - #np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning) + # np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning) ds = ds.assign_coords(source=("source", [dataset_name])) for var in ds.keys(): ds[var] = ds[var].expand_dims("source", axis=-1) return ds -def hydrometric_request(dataset_name, - variables, - space, - time, - catalog, - **kwargs): - +def hydrometric_request(dataset_name, variables, space, time, catalog, **kwargs): ds = open_dataset(dataset_name, catalog) try: @@ -108,18 +87,21 @@ def hydrometric_request(dataset_name, try: if isinstance(value, str): value = [value] - + # If user provided a wildcard to match a pattern for a specific dimension - if any('*' in pattern or '?' in pattern for pattern in value): - value = list(itertools.chain.from_iterable([fnmatch.filter(ds[key].data, val) for val in value])) - + if any("*" in pattern or "?" in pattern for pattern in value): + value = list( + itertools.chain.from_iterable( + [fnmatch.filter(ds[key].data, val) for val in value] + ) + ) + ds = ds.where(ds[key].isin(value), drop=True) except: # Add warning pass - - # TODO: to implement this feature, We will need the timezone as a coords for each id + # TODO: to implement this feature, We will need the timezone as a coords for each id # # Ajust timezone and then slice time dimension before moving on with spatiotemporal operations # if time["timezone"] != None: @@ -129,13 +111,13 @@ def hydrometric_request(dataset_name, # except: # pass # replace by error - if time["start"] != None or time["end"] != None: + if time["start"] is not None or time["end"] is not None: try: - start_time = time['start'] if 'start' in time else None - end_time = time['end'] if 'end' in time else None + start_time = time["start"] if "start" in time else None + end_time = time["end"] if "end" in time else None ds = subset_time(ds, start_date=start_time, end_date=end_time) except: - pass # replace by error + pass # replace by error # # Spatial operations # TODO : Find all stations within a gdf's mask @@ -150,21 +132,19 @@ def hydrometric_request(dataset_name, # elif space['clip'] == 'bbox': # ds = clip_by_bbox(ds, space, dataset_name).load() - if time["start"] != None or time["end"] != None: - ds = ajust_dates(ds, - time) - - if time["minimum_duration"] != None: - ds = minimum_duration(ds, - time) - - if time["timestep"] != None and time['aggregation'] != None: - if pd.Timedelta(1, unit=time["timestep"]) > pd.Timedelta(1, unit=xr.infer_freq(ds.time)): - ds = temporal_aggregation(ds, - time, - dataset_name) - - # Remove all dimension values that are not required anymore after previous filetring + if time["start"] is not None or time["end"] is not None: + ds = ajust_dates(ds, time) + + if time["minimum_duration"] is not None: + ds = minimum_duration(ds, time) + + if time["timestep"] is not None and time["aggregation"] is not None: + if pd.Timedelta(1, unit=time["timestep"]) > pd.Timedelta( + 1, unit=xr.infer_freq(ds.time) + ): + ds = temporal_aggregation(ds, time, dataset_name) + + # Remove all dimension values that are not required anymore after previous filetring # This returns a cleaner dataset at the cost of a compute # _to_stack = [] # for dim in ds.dims: @@ -183,19 +163,13 @@ def hydrometric_request(dataset_name, return ds -def user_provided_dataset(dataset_name, - variables, - space, - time, - ds): - - +def user_provided_dataset(dataset_name, variables, space, time, ds): try: ds = ds[variables] except: pass - # TODO: to implement this feature, We will need the timezone as a coords for each id + # TODO: to implement this feature, We will need the timezone as a coords for each id # # Ajust timezone and then slice time dimension before moving on with spatiotemporal operations # if time["timezone"] != None: @@ -205,33 +179,33 @@ def user_provided_dataset(dataset_name, # except: # pass # replace by error - if time["start"] != None or time["end"] != None: + if time["start"] is not None or time["end"] is not None: try: - start_time = time['start'] if 'start' in time else None - end_time = time['end'] if 'end' in time else None + start_time = time["start"] if "start" in time else None + end_time = time["end"] if "end" in time else None ds = subset_time(ds, start_date=start_time, end_date=end_time) except: - pass # replace by error + pass # replace by error # Spatial operations - if space['clip'] == 'polygon': + if space["clip"] == "polygon": ds = clip_by_polygon(ds, space, dataset_name).load() - elif space['clip'] == 'point': + elif space["clip"] == "point": ds = clip_by_point(ds, space, dataset_name).load() - elif space['clip'] == 'bbox': + elif space["clip"] == "bbox": ds = clip_by_bbox(ds, space, dataset_name).load() - - if time["timestep"] != None and time['aggregation'] != None: - if pd.Timedelta(1, unit=time["timestep"]) > pd.Timedelta(1, unit=xr.infer_freq(ds.time)): - ds = temporal_aggregation(ds, - time, - dataset_name) + + if time["timestep"] is not None and time["aggregation"] is not None: + if pd.Timedelta(1, unit=time["timestep"]) > pd.Timedelta( + 1, unit=xr.infer_freq(ds.time) + ): + ds = temporal_aggregation(ds, time, dataset_name) # Add source name to dataset - #np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning) + # np.warnings.filterwarnings('ignore', category=np.VisibleDeprecationWarning) ds = ds.assign_coords(source=("source", [dataset_name])) for var in ds.keys(): ds[var] = ds[var].expand_dims("source", axis=-1) - return ds \ No newline at end of file + return ds