From af893993f40d65bbd65e770f4ab5713dbaba5b51 Mon Sep 17 00:00:00 2001 From: Chris Mutel Date: Tue, 24 Oct 2023 19:42:40 +0200 Subject: [PATCH 1/8] Start work on mapped matrix list --- matrix_utils/mapped_matrix.py | 60 ++++++++++++++++++------------ matrix_utils/mapped_matrix_list.py | 56 ++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 24 deletions(-) create mode 100644 matrix_utils/mapped_matrix_list.py diff --git a/matrix_utils/mapped_matrix.py b/matrix_utils/mapped_matrix.py index 7d915bf..6d396c9 100644 --- a/matrix_utils/mapped_matrix.py +++ b/matrix_utils/mapped_matrix.py @@ -12,30 +12,6 @@ class MappedMatrix: - """A scipy sparse matrix handler which takes in ``bw_processing`` data packages. Row and column ids are mapped to matrix indices, and a matrix is constructed. - - `indexer_override` allows for custom indexer behaviour. Indexers should follow a simple API: they must support `.__next__()`, and have the attribute `.index`, which returns an integer. - - `custom_filter` allows you to remove some data based on their indices. It is applied to all resource groups. If you need more fine-grained control, process the matrix after construction/iteration. `custom_filter` should take the indices array as an input, and return a Numpy boolean array with the same length as the indices array. - - Args: - - * packages: A list of Ddatapackage objects. - * matrix: The string identifying the matrix to be built. - * use_vectors: Flag to use vector data from datapackages - * use_arrays: Flag to use array data from datapackages - * use_distributions: Flag to use `stats_arrays` distribution data from datapackages - * row_mapper: Optional instance of `ArrayMapper`. Used when matrices must align. - * col_mapper: Optional instance of `ArrayMapper`. Used when matrices must align. - * seed_override: Optional integer. Overrides the RNG seed given in the datapackage, if any. - * indexer_override: Parameter for custom indexers. See above. - * diagonal: If True, only use the `row` indices to build a diagonal matrix. - * transpose: Transpose row and column indices. Happens before any processing, so filters and mappers should refer to the transposed dimensions. - * custom_filter: Callable for function to filter data based on `indices` values. See above. - * empty_ok: If False, raise `AllArraysEmpty` if the matrix would be empty - - """ - def __init__( self, *, @@ -53,6 +29,42 @@ def __init__( custom_filter: Optional[Callable] = None, empty_ok: bool = False, ): + """A scipy sparse matrix handler which takes in ``bw_processing`` data packages. Row and column ids are mapped to matrix indices, and a matrix is constructed. + + `indexer_override` allows for custom indexer behaviour. Indexers should follow a simple API: they must support `.__next__()`, and have the attribute `.index`, which returns an integer. + + `custom_filter` allows you to remove some data based on their indices. It is applied to all resource groups. If you need more fine-grained control, process the matrix after construction/iteration. `custom_filter` should take the indices array as an input, and return a Numpy boolean array with the same length as the indices array. + + Parameters + ---------- + packages : list[Datapackage] + A list of Datapackage objects. + matrix : str + The string identifying the matrix to be built. + use_vectors : bool + Flag to use vector data from datapackages + use_arrays : bool + Flag to use array data from datapackages + use_distributions : bool + Flag to use `stats_arrays` distribution data from datapackages + row_mapper : ArrayMapper + Used when matrices must align to an existing mapping. + col_mapper : + Used when matrices must align to an existing mapping. + seed_override : int + Overrides the RNG seed given in the datapackage, if any. + indexer_override : Any + Parameter for custom indexers. See above. + diagonal : bool + If `True`, only use the `row` indices to build a diagonal matrix. + transpose : bool + Transpose row and column indices. Happens before any processing, so filters and mappers should refer to the transposed dimensions. + custom_filter : Callable + Callable for function to filter data based on `indices` values. See above. + empty_ok : bool + If False, raise `AllArraysEmpty` if the matrix would be empty + + """ self.seed_override = seed_override self.diagonal = diagonal self.matrix_label = matrix diff --git a/matrix_utils/mapped_matrix_list.py b/matrix_utils/mapped_matrix_list.py new file mode 100644 index 0000000..3c376dd --- /dev/null +++ b/matrix_utils/mapped_matrix_list.py @@ -0,0 +1,56 @@ +from .mapped_matrix import MappedMatrix + + +class MappedMatrixList: + """Class which handles a list of mapped matrices. + + The matrices have the same dimensions, the same lookup dictionaries, and the same indexer.""" + def __init__( + self, + *, + packages: dict[str, Sequence[Datapackage]], + matrix: str, + row_mapper: ArrayMapper, + col_mapper: ArrayMapper, + use_vectors: bool = True, + use_arrays: bool = True, + use_distributions: bool = False, + seed_override: Optional[int] = None, + indexer_override: Any = None, + diagonal: bool = False, + transpose: bool = False, + custom_filter: Optional[Callable] = None, + empty_ok: bool = False, + ): + """A thin wrapper around a list of `MappedMatrix` objects. See its docstring for details on `custom_filter` and `indexer_override`. + + Parameters + ---------- + packages : list[Datapackage] + A list of Datapackage objects. + matrix : str + The string identifying the matrix to be built. + use_vectors : bool + Flag to use vector data from datapackages + use_arrays : bool + Flag to use array data from datapackages + use_distributions : bool + Flag to use `stats_arrays` distribution data from datapackages + row_mapper : ArrayMapper + Used when matrices must align to an existing mapping. + col_mapper : + Used when matrices must align to an existing mapping. + seed_override : int + Overrides the RNG seed given in the datapackage, if any. + indexer_override : Any + Parameter for custom indexers. See above. + diagonal : bool + If `True`, only use the `row` indices to build a diagonal matrix. + transpose : bool + Transpose row and column indices. Happens before any processing, so filters and mappers should refer to the transposed dimensions. + custom_filter : Callable + Callable for function to filter data based on `indices` values. See above. + empty_ok : bool + If False, raise `AllArraysEmpty` if the matrix would be empty + + """ From 0d54fda8c8a7d0a48a1fb78a54f85b2b62878091 Mon Sep 17 00:00:00 2001 From: Chris Mutel Date: Tue, 24 Oct 2023 22:29:00 +0200 Subject: [PATCH 2/8] Change packaging to pyproject --- .github/workflows/python-package-deploy.yml | 40 +++ .github/workflows/python-test.yml | 62 +++++ .pre-commit-config.yaml | 69 +++-- .readthedocs.yml | 21 ++ azure-pipelines.yml | 271 -------------------- ci/conda_upload.sh | 11 - ci/meta.yaml | 33 --- conftest.py | 0 matrix_utils/__init__.py | 3 +- matrix_utils/version.py | 1 - pyproject.toml | 109 ++++++++ pytest.ini | 4 - requirements.txt | 5 - setup.py | 44 ---- 14 files changed, 287 insertions(+), 386 deletions(-) create mode 100644 .github/workflows/python-package-deploy.yml create mode 100644 .github/workflows/python-test.yml create mode 100644 .readthedocs.yml delete mode 100644 azure-pipelines.yml delete mode 100644 ci/conda_upload.sh delete mode 100644 ci/meta.yaml delete mode 100644 conftest.py delete mode 100644 matrix_utils/version.py create mode 100644 pyproject.toml delete mode 100644 pytest.ini delete mode 100644 requirements.txt delete mode 100644 setup.py diff --git a/.github/workflows/python-package-deploy.yml b/.github/workflows/python-package-deploy.yml new file mode 100644 index 0000000..dbb979a --- /dev/null +++ b/.github/workflows/python-package-deploy.yml @@ -0,0 +1,40 @@ +--- +# Inspired from: +# https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/ +name: Publish Python 🐍 distributions 📦 to PyPI and TestPyPI + +on: + push: + branches: [main, develop] +jobs: + build-n-publish: + name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI + runs-on: ubuntu-latest + permissions: + id-token: write + steps: + - uses: actions/checkout@master + - name: Set up Python 3.11 + uses: actions/setup-python@v4 + with: + python-version: "3.11" + - name: Install pypa/build + run: >- + python -m + pip install + build + --user + - name: Build a binary wheel and a source tarball + run: >- + python -m + build + --outdir dist/ + . + - name: Publish distribution 📦 to Test PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository_url: https://test.pypi.org/legacy/ + skip_existing: true + - name: Publish distribution 📦 to PyPI + if: startsWith(github.ref, 'refs/tags') + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml new file mode 100644 index 0000000..981a41d --- /dev/null +++ b/.github/workflows/python-test.yml @@ -0,0 +1,62 @@ +--- +# This workflow will install Python dependencies and run tests +# across operating systems, select versions of Python, and user + dev environments +# For more info see: +# https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Python 🐍 CI/CD tests + +on: + push: + branches: [main, develop] + paths-ignore: # prevents workflow execution when only these types of files are modified + - "**.md" # wildcards prevent file in any repo dir from trigering workflow + - "**.bib" + - "**.ya?ml" # captures both .yml and .yaml + - "LICENSE" + - ".gitignore" + pull_request: + branches: [main, develop] + types: [opened, reopened] # excludes syncronize to avoid redundant trigger from commits on PRs + paths-ignore: + - "**.md" + - "**.bib" + - "**.ya?ml" + - "LICENSE" + - ".gitignore" + workflow_dispatch: # also allow manual trigger, for testing purposes + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + py-version: ["3.9", "3.10", "3.11", "3.12"] + + steps: + - uses: actions/checkout@v2 + + # general Python setup + - name: Set up Python ${{ matrix.py-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.py-version }} + + - name: Update pip & install testing pkgs + run: | + python -VV + python -m pip install --upgrade pip setuptools wheel + + # install testing + - name: Install package and test deps + run: | + pip install .[testing] # install the package and the testing deps + + - name: Test with pytest + run: | + pytest + + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v3 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1ebf75e..84518b8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,17 +1,54 @@ +exclude: '^docs/conf.py' + repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v2.3.0 - hooks: - - id: check-yaml - - id: end-of-file-fixer - - id: trailing-whitespace -- repo: https://github.com/psf/black - rev: 19.3b0 - hooks: - - id: black -- repo: https://github.com/pycqa/isort - rev: 5.9.3 - hooks: - - id: isort - name: isort (python) - args: ["--profile", "black", "--filter-files"] +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: trailing-whitespace + - id: check-added-large-files + - id: check-ast + - id: check-json + - id: check-merge-conflict + - id: check-xml + - id: check-yaml + - id: debug-statements + - id: end-of-file-fixer + - id: requirements-txt-fixer + - id: mixed-line-ending + args: ['--fix=auto'] # replace 'auto' with 'lf' to enforce Linux/Mac line endings or 'crlf' for Windows + +## If you want to avoid flake8 errors due to unused vars or imports: +# - repo: https://github.com/myint/autoflake +# rev: v1.4 +# hooks: +# - id: autoflake +# args: [ +# --in-place, +# --remove-all-unused-imports, +# --remove-unused-variables, +# ] + +- repo: https://github.com/pycqa/isort + rev: 5.11.5 + hooks: + - id: isort + args: [--settings-path=pyproject.toml] + +- repo: https://github.com/psf/black + rev: 22.12.0 + hooks: + - id: black + args: [--config=pyproject.toml] + +## If like to embrace black styles even in the docs: +# - repo: https://github.com/asottile/blacken-docs +# rev: v1.12.0 +# hooks: +# - id: blacken-docs +# additional_dependencies: [black] + +- repo: https://github.com/PyCQA/flake8 + rev: 6.0.0 + hooks: + - id: flake8 + additional_dependencies: [Flake8-pyproject] diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 0000000..4769f1f --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,21 @@ +--- +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +build: + os: ubuntu-20.04 + tools: + python: "3.10" +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/conf.py +formats: all +python: + install: + - method: pip + path: . + extra_requirements: + - docs diff --git a/azure-pipelines.yml b/azure-pipelines.yml deleted file mode 100644 index 9729a6f..0000000 --- a/azure-pipelines.yml +++ /dev/null @@ -1,271 +0,0 @@ -pr: - autoCancel: true - branches: - include: - - master - -jobs: -- job: - displayName: ubuntu-20.04-conda - pool: - vmImage: 'ubuntu-20.04' - strategy: - matrix: - Python310: - python.version: '3.10' - Python39: - python.version: '3.9' - Python38: - python.version: '3.8' - - timeoutInMinutes: 8 - steps: - - bash: echo "##vso[task.prependpath]$CONDA/bin" - displayName: Add conda to PATH - - - bash: conda create --yes --quiet --name bw - displayName: Create Anaconda environment - - - bash: | - source activate bw - conda install --yes --quiet -c defaults -c conda-forge --name bw python=$PYTHON_VERSION numpy pandas pytest pytest-azurepipelines">=1.0" - displayName: Install Anaconda packages - - - bash: | - source activate bw - pytest --color=yes -v - displayName: pytest - -- job: - displayName: ubuntu-20.04-pip - pool: - vmImage: 'ubuntu-20.04' - strategy: - matrix: - Python310: - python.version: '3.10' - Python39: - python.version: '3.9' - Python38: - python.version: '3.8' - Python36: - python.version: '3.6' - Python37: - python.version: '3.7' - Python38: - python.version: '3.8' - - timeoutInMinutes: 8 - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '$(python.version)' - - - bash: | - python --version - pip --version - pip install numpy pandas pytest pytest-cov pytest-azurepipelines - echo "which pytest" - which pytest - echo "pytest version" - pytest --version - displayName: Install packages - - - bash: | - pytest --color=yes -v --cov-report html --cov=bw_processing - displayName: pytest - -- job: - displayName: macOS-1015-38-conda - pool: - vmImage: 'macOS-1015' - strategy: - matrix: - Python310: - python.version: '3.10' - Python39: - python.version: '3.9' - Python38: - python.version: '3.8' - - timeoutInMinutes: 8 - steps: - - bash: echo "##vso[task.prependpath]$CONDA/bin" - displayName: Add conda to PATH - - - bash: sudo chown -R $USER $CONDA - displayName: Take ownership of conda installation - - - bash: conda create --yes --quiet --name bw - displayName: Create Anaconda environment - - - bash: | - source activate bw - conda install --yes --quiet -c defaults -c conda-forge --name bw python=$PYTHON_VERSION numpy pandas pytest pytest-azurepipelines">=1.0" - displayName: Install Anaconda packages - - - bash: | - source activate bw - pytest --color=yes -v - displayName: pytest - -- job: - displayName: macOS-1015-pip - pool: - vmImage: 'macOS-1015' - strategy: - matrix: - Python310: - python.version: '3.10' - Python39: - python.version: '3.9' - Python38: - python.version: '3.8' - Python37: - python.version: '3.7' - Python36: - python.version: '3.6' - - timeoutInMinutes: 8 - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '$(python.version)' - - - bash: | - python --version - pip3 install numpy pandas pytest pytest-cov pytest-azurepipelines - displayName: Install packages - - - bash: | - pytest --color=yes -v --cov-report xml --cov=bw_processing - displayName: pytest - -- job: - displayName: macOS-11-38-conda - pool: - vmImage: 'macOS-11' - strategy: - matrix: - Python310: - python.version: '3.10' - Python39: - python.version: '3.9' - Python38: - python.version: '3.8' - - timeoutInMinutes: 8 - steps: - - bash: echo "##vso[task.prependpath]$CONDA/bin" - displayName: Add conda to PATH - - - bash: sudo chown -R $USER $CONDA - displayName: Take ownership of conda installation - - - bash: conda create --yes --quiet --name bw - displayName: Create Anaconda environment - - - bash: | - source activate bw - conda install --yes --quiet -c defaults -c conda-forge --name bw python=$PYTHON_VERSION numpy pandas pytest pytest-azurepipelines">=1.0" - displayName: Install Anaconda packages - - - bash: | - source activate bw - pytest --color=yes -v - displayName: pytest - -- job: - displayName: macOS-11-pip - pool: - vmImage: 'macOS-11' - strategy: - matrix: - Python310: - python.version: '3.10' - Python39: - python.version: '3.9' - Python38: - python.version: '3.8' - Python37: - python.version: '3.7' - Python36: - python.version: '3.6' - - timeoutInMinutes: 8 - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '$(python.version)' - - - bash: | - python --version - pip3 install numpy pandas pytest pytest-cov pytest-azurepipelines - displayName: Install packages - - - bash: | - pytest --color=yes -v --cov-report xml --cov=bw_processing - displayName: pytest - -- job: - displayName: vs2017-win2016-conda - pool: - vmImage: 'vs2017-win2016' - strategy: - matrix: - Python310: - python.version: '3.10' - Python39: - python.version: '3.9' - Python38: - python.version: '3.8' - - timeoutInMinutes: 8 - steps: - - powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts" - displayName: Add conda to PATH - - - script: conda create --yes --quiet --name bw - displayName: Create Anaconda environment - - - script: | - call activate bw - conda install --yes --quiet -c defaults -c conda-forge --name bw python=%PYTHON_VERSION% numpy pandas pytest pytest-azurepipelines">=1.0" - displayName: Install Anaconda packages - - - script: | - call activate bw - pytest --color=yes -v - displayName: pytest - -- job: - displayName: vs2017-win2016-pip - pool: - vmImage: 'vs2017-win2016' - strategy: - matrix: - Python310: - python.version: '3.10' - Python39: - python.version: '3.9' - Python38: - python.version: '3.8' - Python37: - python.version: '3.7' - Python36: - python.version: '3.6' - - timeoutInMinutes: 8 - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '$(python.version)' - - - script: | - python --version - pip install numpy pandas pytest pytest-cov pytest-azurepipelines - displayName: Install packages - - - script: | - pytest --color=yes -v --cov-report xml --cov=bw_processing - displayName: pytest diff --git a/ci/conda_upload.sh b/ci/conda_upload.sh deleted file mode 100644 index f554ad5..0000000 --- a/ci/conda_upload.sh +++ /dev/null @@ -1,11 +0,0 @@ -# Only need to change these two variables -PKG_NAME=bw_processing-dev -USER=cmutel - -mkdir ~/conda-bld -conda config --set anaconda_upload no -export CONDA_BLD_PATH=~/conda-bld -export VERSION=`date +%Y.%m.%d` -conda build . --old-build-string -ls $CONDA_BLD_PATH/noarch/ -anaconda -t $CONDA_UPLOAD_TOKEN upload -u $USER -l nightly $CONDA_BLD_PATH/noarch/$PKG_NAME-$VERSION-py_0.tar.bz2 --force diff --git a/ci/meta.yaml b/ci/meta.yaml deleted file mode 100644 index 4133433..0000000 --- a/ci/meta.yaml +++ /dev/null @@ -1,33 +0,0 @@ -package: - name: bw_processing-dev - version: {{ environ['VERSION'] }} - -source: - path: .. - -build: - noarch: python - number: 0 - script: python setup.py install --single-version-externally-managed --record record.txt - script_env: - - VERSION - - CONDA_BLD_PATH - -requirements: - build: - - python - - setuptools - run: - - python - - pandas - - numpy - -test: - imports: - - bw_processing - -about: - home: "https://github.com/brightway-lca/bw_processing" - license: BSD License - summary: 'Tools to create structured arrays in a common format' -license_family: BSD diff --git a/conftest.py b/conftest.py deleted file mode 100644 index e69de29..0000000 diff --git a/matrix_utils/__init__.py b/matrix_utils/__init__.py index 1a65bd8..67a0355 100644 --- a/matrix_utils/__init__.py +++ b/matrix_utils/__init__.py @@ -9,8 +9,9 @@ "Proxy", ) +__version__ = "0.3" + from .array_mapper import ArrayMapper from .indexers import CombinatorialIndexer, Proxy, RandomIndexer, SequentialIndexer from .mapped_matrix import MappedMatrix from .resource_group import ResourceGroup -from .version import version as __version__ diff --git a/matrix_utils/version.py b/matrix_utils/version.py deleted file mode 100644 index 4d8a2a8..0000000 --- a/matrix_utils/version.py +++ /dev/null @@ -1 +0,0 @@ -version = (0, 2, 5) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..950c845 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,109 @@ +[build-system] +requires = ["setuptools>=68.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "matrix_utils" +authors = [ + { name="Chris Mutel", email="cmutel@gmail.com" } +] +maintainers = [ + { name="Chris Mutel", email="cmutel@gmail.com" } +] +description = "Tools to create matrices from data packages" +readme = "README.md" +dynamic = ["version"] +# Add here all kinds of additional classifiers as defined under +# https://pypi.org/classifiers/ +classifiers = [ + "Intended Audience :: End Users/Desktop", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: BSD License", + "Operating System :: MacOS :: MacOS X", + "Operating System :: Microsoft :: Windows", + "Operating System :: POSIX", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Scientific/Engineering :: Information Analysis", + "Topic :: Scientific/Engineering :: Mathematics", +] +requires-python = ">=3.9" +dependencies = [ + # dependencies as strings with quotes, e.g. "foo" + # You can add version requirements like "foo>2.0" + "numpy", + "scipy", + "pandas", + "bw_processing", + "stats_arrays", +] + +[project.urls] +source = "https://github.com/brightway-lca/matrix_utils" +homepage = "https://github.com/brightway-lca/matrix_utils" +tracker = "https://github.com/brightway-lca/matrix_utils/issues" + +[project.optional-dependencies] +# Getting recursive dependencies to work is a pain, this +# seems to work, at least for now +testing = [ + "matrix_utils", + "pytest", + "pytest-cov", + "python-coveralls" +] +dev = [ + "build", + "pre-commit", + "pylint", + "pytest", + "pytest-cov", + "setuptools", +] + +[tool.setuptools] +license-files = ["LICENSE"] +include-package-data = true +packages = ["matrix_utils"] + +[tool.setuptools.dynamic] +version = {attr = "matrix_utils.__version__"} + +[tool.pytest.ini_options] +addopts = "--cov matrix_utils --cov-report term-missing --verbose" +norecursedirs = [ + "dist", + "build", + ".tox" +] +testpaths = ["tests/*.py"] + +[tool.flake8] +# Some sane defaults for the code style checker flake8 +max_line_length = 88 +extend_ignore = ["E203", "W503"] +# ^ Black-compatible +# E203 and W503 have edge cases handled by black +exclude = [ + ".tox", + "build", + "dist", + ".eggs", + "docs/conf.py", +] + +[tool.black] +line-length = 88 + +[tool.isort] +line_length = 88 +multi_line_output = 3 +include_trailing_comma = true +force_grid_wrap = 0 +use_parentheses = true +ensure_newline_before_comments = true diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index e4cf065..0000000 --- a/pytest.ini +++ /dev/null @@ -1,4 +0,0 @@ -[pytest] -testpaths = tests -python_files = tests/*.py -norecursedirs = venv, manual, fixtures diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 9088690..0000000 --- a/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -numpy -scipy -pandas -bw_processing -stats_arrays diff --git a/setup.py b/setup.py deleted file mode 100644 index 8435e1e..0000000 --- a/setup.py +++ /dev/null @@ -1,44 +0,0 @@ -from setuptools import setup - -v_temp = {} -with open("matrix_utils/version.py") as fp: - exec(fp.read(), v_temp) -version = ".".join((str(x) for x in v_temp["version"])) - - -setup( - name="matrix_utils", - version=version, - packages=["matrix_utils"], - author="Chris Mutel", - author_email="cmutel@gmail.com", - license="BSD 3-clause", - install_requires=[ - "numpy", - "scipy", - "pandas", - "bw_processing", - "stats_arrays", - ], - url="https://github.com/brightway-lca/matrix_utils", - long_description_content_type="text/markdown", - long_description=open("README.md").read(), - description="Tools to create matrices from data packages", - classifiers=[ - "Intended Audience :: End Users/Desktop", - "Intended Audience :: Developers", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: BSD License", - "Operating System :: MacOS :: MacOS X", - "Operating System :: Microsoft :: Windows", - "Operating System :: POSIX", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Topic :: Scientific/Engineering :: Information Analysis", - "Topic :: Scientific/Engineering :: Mathematics", - ], -) From 11573b49642f624a0fad2e40d2d46b1eb3fa45a6 Mon Sep 17 00:00:00 2001 From: Chris Mutel Date: Tue, 24 Oct 2023 22:41:42 +0200 Subject: [PATCH 3/8] Switch from list to dict --- .pre-commit-config.yaml | 7 ++- matrix_utils/mapped_matrix.py | 79 ++++++++++++++++++++---------- matrix_utils/mapped_matrix_list.py | 57 ++++++++++++++++++--- 3 files changed, 107 insertions(+), 36 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 84518b8..58e01d1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -35,14 +35,13 @@ repos: args: [--settings-path=pyproject.toml] - repo: https://github.com/psf/black - rev: 22.12.0 + rev: 23.10.0 hooks: - id: black args: [--config=pyproject.toml] -## If like to embrace black styles even in the docs: -# - repo: https://github.com/asottile/blacken-docs -# rev: v1.12.0 +# - repo: https://github.com/adamchainz/blacken-docs +# rev: 1.16.0 # hooks: # - id: blacken-docs # additional_dependencies: [black] diff --git a/matrix_utils/mapped_matrix.py b/matrix_utils/mapped_matrix.py index 6d396c9..e086efe 100644 --- a/matrix_utils/mapped_matrix.py +++ b/matrix_utils/mapped_matrix.py @@ -1,4 +1,4 @@ -from typing import Any, Callable, Optional, Sequence, List +from typing import Any, Callable, List, Optional, Sequence import numpy as np from bw_processing import INDICES_DTYPE, UNCERTAINTY_DTYPE, Datapackage @@ -29,11 +29,19 @@ def __init__( custom_filter: Optional[Callable] = None, empty_ok: bool = False, ): - """A scipy sparse matrix handler which takes in ``bw_processing`` data packages. Row and column ids are mapped to matrix indices, and a matrix is constructed. + """A scipy sparse matrix handler which takes in ``bw_processing`` data + packages. Row and column ids are mapped to matrix indices, and a matrix is + constructed. - `indexer_override` allows for custom indexer behaviour. Indexers should follow a simple API: they must support `.__next__()`, and have the attribute `.index`, which returns an integer. + `indexer_override` allows for custom indexer behaviour. Indexers should follow + a simple API: they must support `.__next__()`, and have the attribute `.index`, + which returns an integer. - `custom_filter` allows you to remove some data based on their indices. It is applied to all resource groups. If you need more fine-grained control, process the matrix after construction/iteration. `custom_filter` should take the indices array as an input, and return a Numpy boolean array with the same length as the indices array. + `custom_filter` allows you to remove some data based on their indices. It is + applied to all resource groups. If you need more fine-grained control, process + the matrix after construction/iteration. `custom_filter` should take the + indices array as an input, and return a Numpy boolean array with the same + length as the indices array. Parameters ---------- @@ -58,7 +66,8 @@ def __init__( diagonal : bool If `True`, only use the `row` indices to build a diagonal matrix. transpose : bool - Transpose row and column indices. Happens before any processing, so filters and mappers should refer to the transposed dimensions. + Transpose row and column indices. Happens before any processing, so filters + and mappers should refer to the transposed dimensions. custom_filter : Callable Callable for function to filter data based on `indices` values. See above. empty_ok : bool @@ -91,9 +100,10 @@ def __init__( for package in self.packages: if package.dehydrated_interfaces(): raise EmptyInterface( - "Dehydrated interfaces {} in package {} need to be rehydrated to be used in matrix calculations".format( - package.dehydrated_interfaces(), package - ) + ( + "Dehydrated interfaces {} in package {} need to be rehydrated " + + "to be used in matrix calculations" + ).format(package.dehydrated_interfaces(), package) ) self.groups = tuple([obj for lst in self.packages.values() for obj in lst]) @@ -158,20 +168,20 @@ def map_indices(self): for group in self.groups: group.map_indices(diagonal=self.diagonal) - def iterate_indexers(self): + def iterate_indexers(self) -> None: for obj in self.packages: # Avoid ``StopIteration`` errors if packaged is filtered to emptiness if hasattr(obj, "indexer") and self.packages[obj]: next(obj.indexer) - def reset_indexers(self, rebuild=False): + def reset_indexers(self, rebuild=False) -> None: for obj in self.packages: if hasattr(obj, "indexer"): obj.indexer.reset() if rebuild: self.rebuild_matrix() - def rebuild_matrix(self): + def rebuild_matrix(self) -> None: self.matrix.data *= 0 for group in self.groups: row, col, data = group.calculate() @@ -180,11 +190,11 @@ def rebuild_matrix(self): else: self.matrix[row, col] = data - def __next__(self): + def __next__(self) -> None: self.iterate_indexers() self.rebuild_matrix() - def add_indexers(self, indexer_override: Any, seed_override: Optional[int]): + def add_indexers(self, indexer_override: Any, seed_override: Optional[int]) -> None: """Add indexers""" for package, resources in self.packages.items(): if hasattr(package, "indexer"): @@ -229,16 +239,23 @@ def input_row_col_indices(self) -> np.ndarray: return array def input_provenance(self) -> List[tuple]: - """Describe where the data in the other ``input_X`` comes from. Returns a list of ``(datapackage, group_label, (start_index, end_index))`` tuples. + """Describe where the data in the other ``input_X`` comes from. Returns a list + of ``(datapackage, group_label, (start_index, end_index))`` tuples. - Note that the ``end_index`` is exclusive, following the Python slicing convention, i.e. ``(7, 9)`` means start from the 8th element (indices start from 0), and go up to but don't include the 10th element (i.e. (7, 9) has two elements).""" + Note that the ``end_index`` is exclusive, following the Python slicing + convention, i.e. ``(7, 9)`` means start from the 8th element (indices start + from 0), and go up to but don't include the 10th element (i.e. (7, 9) has two + elements). + """ position, result = 0, [] for package, groups in self.packages.items(): for group in groups: num_elements = len(group.data_current) # Minus one because we include the first element as element 0 - result.append((package, group.label, (position, position + num_elements))) + result.append( + (package, group.label, (position, position + num_elements)) + ) # Plus one because start at the next value position += num_elements return result @@ -271,15 +288,27 @@ def _construct_distributions_array(self, given, uncertainty_type=0) -> np.ndarra def input_uncertainties(self, number_samples: Optional[int] = None) -> np.ndarray: """Return the stacked uncertainty arrays of all resources groups. - Note that this data is masked with both the custom filter (if present) and the mapping mask! + Note that this data is masked with both the custom filter (if present) and the + mapping mask! - If the resource group has a distributions array, then this is returned. Otherwise, if the data is static, a distributions array with uncertainty type 0 (undefined uncertainty) is constructed. If the data is an array, an estimate of the mean and standard deviation are given in the ``loc`` and ``scale`` columns. This estimate uses ``number_samples`` columns, or all columns if ``number_samples`` is ``None``. + If the resource group has a distributions array, then this is returned. + Otherwise, if the data is static, a distributions array with uncertainty type + 0 (undefined uncertainty) is constructed. If the data is an array, an estimate + of the mean and standard deviation are given in the ``loc`` and ``scale`` + columns. This estimate uses ``number_samples`` columns, or all columns if + ``number_samples`` is ``None``. - If the data comes from an interface, a distributions array with uncertainty type 0 will be created. Regardless if whether it is a vector or an array interface, the current data vector is used, and no estimate of uncertainty is made. Therefore, this data will never consume new data from an interface. + If the data comes from an interface, a distributions array with uncertainty + type 0 will be created. Regardless if whether it is a vector or an array + interface, the current data vector is used, and no estimate of uncertainty is + made. Therefore, this data will never consume new data from an interface. - Raises a ``TypeError`` if distributions arrays are present but don't follow the dtype of ``bw_processing.UNCERTAINTY_DTYPE``. + Raises a ``TypeError`` if distributions arrays are present but don't follow the + dtype of ``bw_processing.UNCERTAINTY_DTYPE``. - As both population samples (arrays) and interfaces don't fit into the traditional ``stat_arrays`` framework, we mark these with custom ``uncertainty_types``: + As both population samples (arrays) and interfaces don't fit into the + traditional ``stat_arrays`` framework, we mark these with custom + ``uncertainty_types``: * ``98`` for arrays * ``99`` for interfaces @@ -290,11 +319,11 @@ def input_uncertainties(self, number_samples: Optional[int] = None) -> np.ndarra for group in self.groups: if group.has_distributions and self.use_distributions: if group.data_original.dtype != UNCERTAINTY_DTYPE: - raise TypeError( - "Distributions datatype should be `bw_processing.UNCERTAINTY_DTYPE`, but was {}".format( - group.data_original.dtype - ) + message = ( + "Distributions datatype should be " + "`bw_processing.UNCERTAINTY_DTYPE`, but was {}" ) + raise TypeError(message.format(group.data_original.dtype)) arrays.append(group.apply_masks(group.data_original)) elif group.is_array() and not group.is_interface(): data = group.data_original diff --git a/matrix_utils/mapped_matrix_list.py b/matrix_utils/mapped_matrix_list.py index 3c376dd..5b9f63c 100644 --- a/matrix_utils/mapped_matrix_list.py +++ b/matrix_utils/mapped_matrix_list.py @@ -1,14 +1,23 @@ +from collections.abc import Mapping +from typing import Any, Callable, Optional, Sequence, Union + +from bw_processing import Datapackage + +from .array_mapper import ArrayMapper from .mapped_matrix import MappedMatrix -class MappedMatrixList: +class MappedMatrixDict(Mapping): """Class which handles a list of mapped matrices. - The matrices have the same dimensions, the same lookup dictionaries, and the same indexer.""" + The matrices have the same dimensions, the same lookup dictionaries, and the same + indexer. + """ + def __init__( self, *, - packages: dict[str, Sequence[Datapackage]], + packages: dict[Union[tuple, str], Sequence[Datapackage]], matrix: str, row_mapper: ArrayMapper, col_mapper: ArrayMapper, @@ -22,12 +31,14 @@ def __init__( custom_filter: Optional[Callable] = None, empty_ok: bool = False, ): - """A thin wrapper around a list of `MappedMatrix` objects. See its docstring for details on `custom_filter` and `indexer_override`. + """A thin wrapper around a list of `MappedMatrix` objects. See its docstring + for details on `custom_filter` and `indexer_override`. Parameters ---------- - packages : list[Datapackage] - A list of Datapackage objects. + packages : dict[Union[tuple, str], Sequence[Datapackage]] + A dictionary with identifiers as keys and a list of `bw_processing` + datapackages as values. matrix : str The string identifying the matrix to be built. use_vectors : bool @@ -47,10 +58,42 @@ def __init__( diagonal : bool If `True`, only use the `row` indices to build a diagonal matrix. transpose : bool - Transpose row and column indices. Happens before any processing, so filters and mappers should refer to the transposed dimensions. + Transpose row and column indices. Happens before any processing, so filters + and mappers should refer to the transposed dimensions. custom_filter : Callable Callable for function to filter data based on `indices` values. See above. empty_ok : bool If False, raise `AllArraysEmpty` if the matrix would be empty """ + self.matrices = { + tpl: MappedMatrix( + packages=packages, + matrix=matrix, + use_vectors=use_vectors, + use_arrays=use_arrays, + use_distributions=use_distributions, + row_mapper=row_mapper, + col_mapper=col_mapper, + seed_override=seed_override, + indexer_override=None, + diagonal=diagonal, + transpose=transpose, + custom_filter=custom_filter, + empty_ok=empty_ok, + ) + for tpl, packages in packages.items() + } + + def __getitem__(self, key: Union[tuple, str]) -> MappedMatrix: + return self.matrices[key] + + def __iter__(self): + for tpl in self.matrices: + return self.matrices[tpl] + + def __len__(self) -> int: + return len(self.matrices) + + def __next__(self) -> None: + pass From de400d47139386a631db378fd12eb7783e1d5b8f Mon Sep 17 00:00:00 2001 From: Chris Mutel Date: Tue, 24 Oct 2023 22:47:43 +0200 Subject: [PATCH 4/8] Fix filename for mapped dict instead of list --- matrix_utils/__init__.py | 2 ++ matrix_utils/{mapped_matrix_list.py => mapped_matrix_dict.py} | 0 2 files changed, 2 insertions(+) rename matrix_utils/{mapped_matrix_list.py => mapped_matrix_dict.py} (100%) diff --git a/matrix_utils/__init__.py b/matrix_utils/__init__.py index 67a0355..9047cf2 100644 --- a/matrix_utils/__init__.py +++ b/matrix_utils/__init__.py @@ -3,6 +3,7 @@ "ResourceGroup", "ArrayMapper", "MappedMatrix", + "MappedMatrixDict", "RandomIndexer", "SequentialIndexer", "CombinatorialIndexer", @@ -14,4 +15,5 @@ from .array_mapper import ArrayMapper from .indexers import CombinatorialIndexer, Proxy, RandomIndexer, SequentialIndexer from .mapped_matrix import MappedMatrix +from .mapped_matrix_dict import MappedMatrixDict from .resource_group import ResourceGroup diff --git a/matrix_utils/mapped_matrix_list.py b/matrix_utils/mapped_matrix_dict.py similarity index 100% rename from matrix_utils/mapped_matrix_list.py rename to matrix_utils/mapped_matrix_dict.py From 76b7feb1281c57e57b514c8e54dcb5f76dddbbb1 Mon Sep 17 00:00:00 2001 From: Chris Mutel Date: Wed, 25 Oct 2023 07:52:40 +0200 Subject: [PATCH 5/8] Add global indexing to MMD --- matrix_utils/mapped_matrix_dict.py | 58 ++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/matrix_utils/mapped_matrix_dict.py b/matrix_utils/mapped_matrix_dict.py index 5b9f63c..3b1927b 100644 --- a/matrix_utils/mapped_matrix_dict.py +++ b/matrix_utils/mapped_matrix_dict.py @@ -5,15 +5,10 @@ from .array_mapper import ArrayMapper from .mapped_matrix import MappedMatrix +from .indexers import RandomIndexer, SequentialIndexer, Indexer class MappedMatrixDict(Mapping): - """Class which handles a list of mapped matrices. - - The matrices have the same dimensions, the same lookup dictionaries, and the same - indexer. - """ - def __init__( self, *, @@ -30,10 +25,23 @@ def __init__( transpose: bool = False, custom_filter: Optional[Callable] = None, empty_ok: bool = False, + sequential: bool = False, ): - """A thin wrapper around a list of `MappedMatrix` objects. See its docstring + """A thin wrapper around a dict of `MappedMatrix` objects. See its docstring for details on `custom_filter` and `indexer_override`. + The matrices have the same dimensions, the same lookup dictionaries, and the + same indexer. + + The number of possible configurations of resource groups and indexers is far + higher than any generic class can handle. This class supports either + sequential or random indexing, and the indexing is applied to **all resource + groups and datapackages**. If you need finer-grained control, you can access + set and access the individual resource group `indexer` attributes. + + Because the same indexer is used for all datapackages, individual `seed` values + are ignored. Use `seed_override` to set a global RNG seed. + Parameters ---------- packages : dict[Union[tuple, str], Sequence[Datapackage]] @@ -64,10 +72,22 @@ def __init__( Callable for function to filter data based on `indices` values. See above. empty_ok : bool If False, raise `AllArraysEmpty` if the matrix would be empty - + sequential : bool + Use the **same sequential indexer** across all resource groups in all datapackages """ + self.matrix = matrix + self.row_mapper = row_mapper + self.col_mapper = col_mapper + self.use_vectors = use_vectors + self.use_arrays = use_arrays + self.seed_override = seed_override + self.diagonal = diagonal + self.transpose = transpose + self.custom_filter = custom_filter + self.empty_ok = empty_ok + self.global_indexer = self.get_global_indexer(indexer_override=indexer_override, sequential=sequential, seed_override=seed_override) self.matrices = { - tpl: MappedMatrix( + obj: MappedMatrix( packages=packages, matrix=matrix, use_vectors=use_vectors, @@ -76,24 +96,34 @@ def __init__( row_mapper=row_mapper, col_mapper=col_mapper, seed_override=seed_override, - indexer_override=None, + indexer_override=self.global_indexer, diagonal=diagonal, transpose=transpose, custom_filter=custom_filter, empty_ok=empty_ok, ) - for tpl, packages in packages.items() + for obj, packages in packages.items() } def __getitem__(self, key: Union[tuple, str]) -> MappedMatrix: return self.matrices[key] def __iter__(self): - for tpl in self.matrices: - return self.matrices[tpl] + for obj in self.matrices: + return self.matrices[obj] def __len__(self) -> int: return len(self.matrices) def __next__(self) -> None: - pass + next(self.global_indexer) + for mm in self.matrices.values(): + mm.rebuild_matrix() + + def get_global_indexer(self, indexer_override: Any, sequential: bool, seed_override: Optional[int]) -> Indexer: + if indexer_override is not None: + return indexer_override + elif sequential: + return SequentialIndexer() + else: + return RandomIndexer(seed=seed_override) From 7e40123499d04553b7994d6f0c1a5b4cc2ad25c8 Mon Sep 17 00:00:00 2001 From: Chris Mutel Date: Wed, 25 Oct 2023 07:56:48 +0200 Subject: [PATCH 6/8] Formatting --- matrix_utils/mapped_matrix_dict.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/matrix_utils/mapped_matrix_dict.py b/matrix_utils/mapped_matrix_dict.py index 3b1927b..5a752ff 100644 --- a/matrix_utils/mapped_matrix_dict.py +++ b/matrix_utils/mapped_matrix_dict.py @@ -85,7 +85,11 @@ def __init__( self.transpose = transpose self.custom_filter = custom_filter self.empty_ok = empty_ok - self.global_indexer = self.get_global_indexer(indexer_override=indexer_override, sequential=sequential, seed_override=seed_override) + self.global_indexer = self.get_global_indexer( + indexer_override=indexer_override, + sequential=sequential, + seed_override=seed_override, + ) self.matrices = { obj: MappedMatrix( packages=packages, @@ -120,7 +124,9 @@ def __next__(self) -> None: for mm in self.matrices.values(): mm.rebuild_matrix() - def get_global_indexer(self, indexer_override: Any, sequential: bool, seed_override: Optional[int]) -> Indexer: + def get_global_indexer( + self, indexer_override: Any, sequential: bool, seed_override: Optional[int] + ) -> Indexer: if indexer_override is not None: return indexer_override elif sequential: From 493a40f6f9fe020b60e1327e4fd43653681fdbbc Mon Sep 17 00:00:00 2001 From: Chris Mutel Date: Wed, 25 Oct 2023 09:23:16 +0200 Subject: [PATCH 7/8] Finish `MappedMatrixDict` --- matrix_utils/mapped_matrix_dict.py | 22 ++- tests/mmd.py | 250 +++++++++++++++++++++++++++++ 2 files changed, 265 insertions(+), 7 deletions(-) create mode 100644 tests/mmd.py diff --git a/matrix_utils/mapped_matrix_dict.py b/matrix_utils/mapped_matrix_dict.py index 5a752ff..1f16d97 100644 --- a/matrix_utils/mapped_matrix_dict.py +++ b/matrix_utils/mapped_matrix_dict.py @@ -4,8 +4,8 @@ from bw_processing import Datapackage from .array_mapper import ArrayMapper +from .indexers import Indexer, RandomIndexer, SequentialIndexer from .mapped_matrix import MappedMatrix -from .indexers import RandomIndexer, SequentialIndexer, Indexer class MappedMatrixDict(Mapping): @@ -42,6 +42,10 @@ def __init__( Because the same indexer is used for all datapackages, individual `seed` values are ignored. Use `seed_override` to set a global RNG seed. + The `empty_ok` flag applies to **all matrices** - if any of the matrices have + a valid data value no error will be raised. In practice this flag should have + no effect for `MappedMatrixDict` unless the input data is very broken. + Parameters ---------- packages : dict[Union[tuple, str], Sequence[Datapackage]] @@ -73,7 +77,8 @@ def __init__( empty_ok : bool If False, raise `AllArraysEmpty` if the matrix would be empty sequential : bool - Use the **same sequential indexer** across all resource groups in all datapackages + Use the **same sequential indexer** across all resource groups in all + datapackages """ self.matrix = matrix self.row_mapper = row_mapper @@ -90,8 +95,12 @@ def __init__( sequential=sequential, seed_override=seed_override, ) + + if not isinstance(packages, Mapping): + raise ValueError("`packages` must be a dictionary") + self.matrices = { - obj: MappedMatrix( + key: MappedMatrix( packages=packages, matrix=matrix, use_vectors=use_vectors, @@ -106,15 +115,14 @@ def __init__( custom_filter=custom_filter, empty_ok=empty_ok, ) - for obj, packages in packages.items() + for key, packages in packages.items() } - def __getitem__(self, key: Union[tuple, str]) -> MappedMatrix: + def __getitem__(self, key: Any) -> MappedMatrix: return self.matrices[key] def __iter__(self): - for obj in self.matrices: - return self.matrices[obj] + yield from self.matrices def __len__(self) -> int: return len(self.matrices) diff --git a/tests/mmd.py b/tests/mmd.py new file mode 100644 index 0000000..37cb5b4 --- /dev/null +++ b/tests/mmd.py @@ -0,0 +1,250 @@ +import bw_processing as bwp +import numpy as np +import pytest + +from matrix_utils import ArrayMapper, MappedMatrix, MappedMatrixDict +from matrix_utils.errors import AllArraysEmpty +from matrix_utils.indexers import RandomIndexer, SequentialIndexer + + +@pytest.fixture +def mmd_fixture(): + first = bwp.create_datapackage() + first.add_persistent_vector( + matrix="foo", + name="vector", + indices_array=np.array([(0, 0), (2, 1)], dtype=bwp.INDICES_DTYPE), + data_array=np.array([1, 2.3]), + ) + second = bwp.create_datapackage() + second.add_persistent_vector( + matrix="foo", + name="vector", + indices_array=np.array([(10, 10), (12, 11)], dtype=bwp.INDICES_DTYPE), + data_array=np.array([11, 12]), + ) + third = bwp.create_datapackage() + third.add_persistent_vector( + matrix="foo", + name="vector", + indices_array=np.array([(0, 10), (2, 11)], dtype=bwp.INDICES_DTYPE), + data_array=np.array([11, 12]), + ) + fourth = bwp.create_datapackage() + fourth.add_persistent_array( + matrix="foo", + name="array", + indices_array=np.array( + [(1, 0), (2, 1), (5, 1), (8, 1)], dtype=bwp.INDICES_DTYPE + ), + data_array=np.arange(8).reshape((4, 2)) + 10, + ) + fifth = bwp.create_datapackage() + fifth.add_persistent_array( + matrix="foo", + name="array", + indices_array=np.array( + [(1, 0), (12, 11), (5, 1), (18, 11)], dtype=bwp.INDICES_DTYPE + ), + data_array=np.arange(20).reshape((4, 5)), + ) + row_mapper = ArrayMapper( + array=np.array([0, 2, 10, 12, 0, 2, 1, 2, 5, 8, 1, 12, 5, 18]) + ) + col_mapper = ArrayMapper( + array=np.array([0, 1, 10, 11, 10, 11, 0, 1, 1, 1, 0, 11, 1, 11]) + ) + return first, second, third, fourth, fifth, row_mapper, col_mapper + + +def test_basic_mmd_construction(mmd_fixture): + first, second, third, fourth, fifth, rows, cols = mmd_fixture + mmd = MappedMatrixDict( + packages={"a": [first, second], "b": [third, fourth], "c": [fifth]}, + matrix="foo", + row_mapper=rows, + col_mapper=cols, + use_arrays=True, + ) + + assert len(mmd) == 3 + assert len(mmd.matrices) == 3 + for key in mmd: + mm = mmd[key] + assert isinstance(mm, MappedMatrix) + assert mm.matrix.shape == (8, 4) + + +def test_basic_mmd_as_dict(mmd_fixture): + first, second, third, fourth, fifth, rows, cols = mmd_fixture + mmd = MappedMatrixDict( + packages={"a": [first, second], "b": [third, fourth], "c": [fifth]}, + matrix="foo", + row_mapper=rows, + col_mapper=cols, + use_arrays=True, + ) + + assert "a" in mmd + assert "g" not in mmd + assert len(mmd) == 3 + assert mmd.keys() + assert list(mmd.keys()) == ["a", "b", "c"] + assert mmd.values() + with pytest.raises(TypeError): + del mmd["a"] + + +def test_mmd_shared_indexer(mmd_fixture): + first, second, third, fourth, fifth, rows, cols = mmd_fixture + mmd = MappedMatrixDict( + packages={"a": [first, second], "b": [third, fourth], "c": [fifth]}, + matrix="foo", + row_mapper=rows, + col_mapper=cols, + use_arrays=True, + ) + + for mm in mmd.values(): + for group in mm.groups: + assert group.indexer is mmd.global_indexer + + +def test_mmd_iterate_indexer_changes_matrix_values(mmd_fixture): + first, second, third, fourth, fifth, rows, cols = mmd_fixture + mmd = MappedMatrixDict( + packages={"a": [first, second], "b": [third, fourth], "c": [fifth]}, + matrix="foo", + row_mapper=rows, + col_mapper=cols, + use_arrays=True, + sequential=True, + ) + + for mm, value in zip( + mmd.values(), [1 + 2.3 + 11 + 12, 11 + 12 + 10 + 12 + 14 + 16, 0 + 5 + 10 + 15] + ): + assert mm.matrix.sum() == value + + next(mmd) + + for mm, value in zip( + mmd.values(), [1 + 2.3 + 11 + 12, 11 + 12 + 11 + 13 + 15 + 17, 1 + 6 + 11 + 16] + ): + assert mm.matrix.sum() == value + + +def test_mmd_empty_datapackages(mmd_fixture): + first, second, third, fourth, fifth, rows, cols = mmd_fixture + MappedMatrixDict( + packages={"a": [first, second], "b": [third, fourth], "c": [fifth]}, + matrix="foo", + row_mapper=rows, + col_mapper=cols, + ) + + with pytest.raises(AllArraysEmpty): + MappedMatrixDict( + packages={ + "a": [], + "b": [], + }, + matrix="foo", + row_mapper=rows, + col_mapper=cols, + ) + + MappedMatrixDict( + packages={ + "a": [], + "b": [], + }, + matrix="foo", + row_mapper=rows, + col_mapper=cols, + empty_ok=True, + ) + + +def test_mmd_random(mmd_fixture): + first, second, third, fourth, fifth, rows, cols = mmd_fixture + indexer = RandomIndexer(seed=42) + mmd = MappedMatrixDict( + packages={"a": [first, second], "b": [third, fourth], "c": [fifth]}, + matrix="foo", + row_mapper=rows, + col_mapper=cols, + use_arrays=True, + indexer_override=indexer, + ) + + for mm in mmd.values(): + for group in mm.groups: + assert group.indexer is indexer + assert group.indexer.index == 191664963 + + next(mmd) + + for mm in mmd.values(): + for group in mm.groups: + assert group.indexer is indexer + assert group.indexer.index == 1662057957 + + +def test_mmd_custom_indexer(mmd_fixture): + first, second, third, fourth, fifth, rows, cols = mmd_fixture + indexer = SequentialIndexer() + mmd = MappedMatrixDict( + packages={"a": [first, second], "b": [third, fourth], "c": [fifth]}, + matrix="foo", + row_mapper=rows, + col_mapper=cols, + use_arrays=True, + indexer_override=indexer, + ) + + for mm in mmd.values(): + for group in mm.groups: + assert group.indexer is indexer + assert group.indexer.index == 0 + + next(indexer) + + for mm in mmd.values(): + for group in mm.groups: + assert group.indexer is indexer + assert group.indexer.index == 1 + + +def test_mmd_sequential(mmd_fixture): + first, second, third, fourth, fifth, rows, cols = mmd_fixture + mmd = MappedMatrixDict( + packages={"a": [first, second], "b": [third, fourth], "c": [fifth]}, + matrix="foo", + row_mapper=rows, + col_mapper=cols, + use_arrays=True, + sequential=True, + ) + + for mm in mmd.values(): + for group in mm.groups: + assert group.indexer.index == 0 + + next(mmd) + + for mm in mmd.values(): + for group in mm.groups: + assert group.indexer.index == 1 + + +def test_mmd_invalid_packages(mmd_fixture): + first, second, third, fourth, fifth, rows, cols = mmd_fixture + with pytest.raises(ValueError): + MappedMatrixDict( + packages=[("a", [first, second]), ("b", [third, fourth]), ("c", [fifth])], + matrix="foo", + row_mapper=rows, + col_mapper=cols, + use_arrays=True, + ) From 1e7ece3841fe97357467b6ac5b4f4c290ce213ba Mon Sep 17 00:00:00 2001 From: Chris Mutel Date: Wed, 25 Oct 2023 09:23:23 +0200 Subject: [PATCH 8/8] Update CHANGELOG.md --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d3acfc..1e80c4d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## [0.3.DEV] + +* Add `MappedMatrixDict` class for `MultiLCA` + ### [0.2.5] - 2022-05-21 * Add functions to reset indexers directly and on `MappedMatrix`