From af893993f40d65bbd65e770f4ab5713dbaba5b51 Mon Sep 17 00:00:00 2001
From: Chris Mutel <cmutel@gmail.com>
Date: Tue, 24 Oct 2023 19:42:40 +0200
Subject: [PATCH 1/8] Start work on mapped matrix list

---
 matrix_utils/mapped_matrix.py      | 60 ++++++++++++++++++------------
 matrix_utils/mapped_matrix_list.py | 56 ++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+), 24 deletions(-)
 create mode 100644 matrix_utils/mapped_matrix_list.py

diff --git a/matrix_utils/mapped_matrix.py b/matrix_utils/mapped_matrix.py
index 7d915bf..6d396c9 100644
--- a/matrix_utils/mapped_matrix.py
+++ b/matrix_utils/mapped_matrix.py
@@ -12,30 +12,6 @@
 
 
 class MappedMatrix:
-    """A scipy sparse matrix handler which takes in ``bw_processing`` data packages. Row and column ids are mapped to matrix indices, and a matrix is constructed.
-
-    `indexer_override` allows for custom indexer behaviour. Indexers should follow a simple API: they must support `.__next__()`, and have the attribute `.index`, which returns an integer.
-
-    `custom_filter` allows you to remove some data based on their indices. It is applied to all resource groups. If you need more fine-grained control, process the matrix after construction/iteration. `custom_filter` should take the indices array as an input, and return a Numpy boolean array with the same length as the indices array.
-
-    Args:
-
-        * packages: A list of Ddatapackage objects.
-        * matrix: The string identifying the matrix to be built.
-        * use_vectors: Flag to use vector data from datapackages
-        * use_arrays: Flag to use array data from datapackages
-        * use_distributions: Flag to use `stats_arrays` distribution data from datapackages
-        * row_mapper: Optional instance of `ArrayMapper`. Used when matrices must align.
-        * col_mapper: Optional instance of `ArrayMapper`. Used when matrices must align.
-        * seed_override: Optional integer. Overrides the RNG seed given in the datapackage, if any.
-        * indexer_override: Parameter for custom indexers. See above.
-        * diagonal: If True, only use the `row` indices to build a diagonal matrix.
-        * transpose: Transpose row and column indices. Happens before any processing, so filters and mappers should refer to the transposed dimensions.
-        * custom_filter: Callable for function to filter data based on `indices` values. See above.
-        * empty_ok: If False, raise `AllArraysEmpty` if the matrix would be empty
-
-    """
-
     def __init__(
         self,
         *,
@@ -53,6 +29,42 @@ def __init__(
         custom_filter: Optional[Callable] = None,
         empty_ok: bool = False,
     ):
+        """A scipy sparse matrix handler which takes in ``bw_processing`` data packages. Row and column ids are mapped to matrix indices, and a matrix is constructed.
+
+        `indexer_override` allows for custom indexer behaviour. Indexers should follow a simple API: they must support `.__next__()`, and have the attribute `.index`, which returns an integer.
+
+        `custom_filter` allows you to remove some data based on their indices. It is applied to all resource groups. If you need more fine-grained control, process the matrix after construction/iteration. `custom_filter` should take the indices array as an input, and return a Numpy boolean array with the same length as the indices array.
+
+        Parameters
+        ----------
+        packages : list[Datapackage]
+            A list of Datapackage objects.
+        matrix : str
+            The string identifying the matrix to be built.
+        use_vectors : bool
+            Flag to use vector data from datapackages
+        use_arrays : bool
+            Flag to use array data from datapackages
+        use_distributions : bool
+            Flag to use `stats_arrays` distribution data from datapackages
+        row_mapper : ArrayMapper
+            Used when matrices must align to an existing mapping.
+        col_mapper :
+            Used when matrices must align to an existing mapping.
+        seed_override : int
+            Overrides the RNG seed given in the datapackage, if any.
+        indexer_override : Any
+            Parameter for custom indexers. See above.
+        diagonal : bool
+            If `True`, only use the `row` indices to build a diagonal matrix.
+        transpose : bool
+            Transpose row and column indices. Happens before any processing, so filters and mappers should refer to the transposed dimensions.
+        custom_filter : Callable
+            Callable for function to filter data based on `indices` values. See above.
+        empty_ok : bool
+            If False, raise `AllArraysEmpty` if the matrix would be empty
+
+        """
         self.seed_override = seed_override
         self.diagonal = diagonal
         self.matrix_label = matrix
diff --git a/matrix_utils/mapped_matrix_list.py b/matrix_utils/mapped_matrix_list.py
new file mode 100644
index 0000000..3c376dd
--- /dev/null
+++ b/matrix_utils/mapped_matrix_list.py
@@ -0,0 +1,56 @@
+from .mapped_matrix import MappedMatrix
+
+
+class MappedMatrixList:
+    """Class which handles a list of mapped matrices.
+
+    The matrices have the same dimensions, the same lookup dictionaries, and the same indexer."""
+    def __init__(
+        self,
+        *,
+        packages: dict[str, Sequence[Datapackage]],
+        matrix: str,
+        row_mapper: ArrayMapper,
+        col_mapper: ArrayMapper,
+        use_vectors: bool = True,
+        use_arrays: bool = True,
+        use_distributions: bool = False,
+        seed_override: Optional[int] = None,
+        indexer_override: Any = None,
+        diagonal: bool = False,
+        transpose: bool = False,
+        custom_filter: Optional[Callable] = None,
+        empty_ok: bool = False,
+    ):
+        """A thin wrapper around a list of `MappedMatrix` objects. See its docstring for details on `custom_filter` and `indexer_override`.
+
+        Parameters
+        ----------
+        packages : list[Datapackage]
+            A list of Datapackage objects.
+        matrix : str
+            The string identifying the matrix to be built.
+        use_vectors : bool
+            Flag to use vector data from datapackages
+        use_arrays : bool
+            Flag to use array data from datapackages
+        use_distributions : bool
+            Flag to use `stats_arrays` distribution data from datapackages
+        row_mapper : ArrayMapper
+            Used when matrices must align to an existing mapping.
+        col_mapper :
+            Used when matrices must align to an existing mapping.
+        seed_override : int
+            Overrides the RNG seed given in the datapackage, if any.
+        indexer_override : Any
+            Parameter for custom indexers. See above.
+        diagonal : bool
+            If `True`, only use the `row` indices to build a diagonal matrix.
+        transpose : bool
+            Transpose row and column indices. Happens before any processing, so filters and mappers should refer to the transposed dimensions.
+        custom_filter : Callable
+            Callable for function to filter data based on `indices` values. See above.
+        empty_ok : bool
+            If False, raise `AllArraysEmpty` if the matrix would be empty
+
+        """

From 0d54fda8c8a7d0a48a1fb78a54f85b2b62878091 Mon Sep 17 00:00:00 2001
From: Chris Mutel <cmutel@gmail.com>
Date: Tue, 24 Oct 2023 22:29:00 +0200
Subject: [PATCH 2/8] Change packaging to pyproject

---
 .github/workflows/python-package-deploy.yml |  40 +++
 .github/workflows/python-test.yml           |  62 +++++
 .pre-commit-config.yaml                     |  69 +++--
 .readthedocs.yml                            |  21 ++
 azure-pipelines.yml                         | 271 --------------------
 ci/conda_upload.sh                          |  11 -
 ci/meta.yaml                                |  33 ---
 conftest.py                                 |   0
 matrix_utils/__init__.py                    |   3 +-
 matrix_utils/version.py                     |   1 -
 pyproject.toml                              | 109 ++++++++
 pytest.ini                                  |   4 -
 requirements.txt                            |   5 -
 setup.py                                    |  44 ----
 14 files changed, 287 insertions(+), 386 deletions(-)
 create mode 100644 .github/workflows/python-package-deploy.yml
 create mode 100644 .github/workflows/python-test.yml
 create mode 100644 .readthedocs.yml
 delete mode 100644 azure-pipelines.yml
 delete mode 100644 ci/conda_upload.sh
 delete mode 100644 ci/meta.yaml
 delete mode 100644 conftest.py
 delete mode 100644 matrix_utils/version.py
 create mode 100644 pyproject.toml
 delete mode 100644 pytest.ini
 delete mode 100644 requirements.txt
 delete mode 100644 setup.py

diff --git a/.github/workflows/python-package-deploy.yml b/.github/workflows/python-package-deploy.yml
new file mode 100644
index 0000000..dbb979a
--- /dev/null
+++ b/.github/workflows/python-package-deploy.yml
@@ -0,0 +1,40 @@
+---
+# Inspired from:
+# https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/
+name: Publish Python 🐍 distributions 📦 to PyPI and TestPyPI
+
+on:
+  push:
+    branches: [main, develop]
+jobs:
+  build-n-publish:
+    name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI
+    runs-on: ubuntu-latest
+    permissions:
+      id-token: write
+    steps:
+    - uses: actions/checkout@master
+    - name: Set up Python 3.11
+      uses: actions/setup-python@v4
+      with:
+        python-version: "3.11"
+    - name: Install pypa/build
+      run: >-
+        python -m
+        pip install
+        build
+        --user
+    - name: Build a binary wheel and a source tarball
+      run: >-
+        python -m
+        build
+        --outdir dist/
+        .
+    - name: Publish distribution 📦 to Test PyPI
+      uses: pypa/gh-action-pypi-publish@release/v1
+      with:
+        repository_url: https://test.pypi.org/legacy/
+        skip_existing: true
+    - name: Publish distribution 📦 to PyPI
+      if: startsWith(github.ref, 'refs/tags')
+      uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml
new file mode 100644
index 0000000..981a41d
--- /dev/null
+++ b/.github/workflows/python-test.yml
@@ -0,0 +1,62 @@
+---
+# This workflow will install Python dependencies and run tests
+# across operating systems, select versions of Python, and user + dev environments
+# For more info see:
+# https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: Python 🐍 CI/CD tests
+
+on:
+  push:
+    branches: [main, develop]
+    paths-ignore: # prevents workflow execution when only these types of files are modified
+      - "**.md" # wildcards prevent file in any repo dir from trigering workflow
+      - "**.bib"
+      - "**.ya?ml" # captures both .yml and .yaml
+      - "LICENSE"
+      - ".gitignore"
+  pull_request:
+    branches: [main, develop]
+    types: [opened, reopened] # excludes syncronize to avoid redundant trigger from commits on PRs
+    paths-ignore:
+      - "**.md"
+      - "**.bib"
+      - "**.ya?ml"
+      - "LICENSE"
+      - ".gitignore"
+  workflow_dispatch: # also allow manual trigger, for testing purposes
+
+jobs:
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+        py-version: ["3.9", "3.10", "3.11", "3.12"]
+
+    steps:
+      - uses: actions/checkout@v2
+
+      # general Python setup
+      - name: Set up Python ${{ matrix.py-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.py-version }}
+
+      - name: Update pip & install testing pkgs
+        run: |
+          python -VV
+          python -m pip install --upgrade pip setuptools wheel
+
+      # install testing
+      - name: Install package and test deps
+        run: |
+          pip install .[testing] # install the package and the testing deps
+
+      - name: Test with pytest
+        run: |
+          pytest
+
+      - name: Upload coverage reports to Codecov
+        uses: codecov/codecov-action@v3
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1ebf75e..84518b8 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,17 +1,54 @@
+exclude: '^docs/conf.py'
+
 repos:
--   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v2.3.0
-    hooks:
-    -   id: check-yaml
-    -   id: end-of-file-fixer
-    -   id: trailing-whitespace
--   repo: https://github.com/psf/black
-    rev: 19.3b0
-    hooks:
-    -   id: black
--   repo: https://github.com/pycqa/isort
-    rev: 5.9.3
-    hooks:
-    -   id: isort
-        name: isort (python)
-        args: ["--profile", "black", "--filter-files"]
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.4.0
+  hooks:
+  - id: trailing-whitespace
+  - id: check-added-large-files
+  - id: check-ast
+  - id: check-json
+  - id: check-merge-conflict
+  - id: check-xml
+  - id: check-yaml
+  - id: debug-statements
+  - id: end-of-file-fixer
+  - id: requirements-txt-fixer
+  - id: mixed-line-ending
+    args: ['--fix=auto']  # replace 'auto' with 'lf' to enforce Linux/Mac line endings or 'crlf' for Windows
+
+## If you want to avoid flake8 errors due to unused vars or imports:
+# - repo: https://github.com/myint/autoflake
+#   rev: v1.4
+#   hooks:
+#   - id: autoflake
+#     args: [
+#       --in-place,
+#       --remove-all-unused-imports,
+#       --remove-unused-variables,
+#     ]
+
+- repo: https://github.com/pycqa/isort
+  rev: 5.11.5
+  hooks:
+  - id: isort
+    args: [--settings-path=pyproject.toml]
+
+- repo: https://github.com/psf/black
+  rev: 22.12.0
+  hooks:
+  - id: black
+    args: [--config=pyproject.toml]
+
+## If like to embrace black styles even in the docs:
+# - repo: https://github.com/asottile/blacken-docs
+#   rev: v1.12.0
+#   hooks:
+#   - id: blacken-docs
+#     additional_dependencies: [black]
+
+- repo: https://github.com/PyCQA/flake8
+  rev: 6.0.0
+  hooks:
+  - id: flake8
+    additional_dependencies: [Flake8-pyproject]
diff --git a/.readthedocs.yml b/.readthedocs.yml
new file mode 100644
index 0000000..4769f1f
--- /dev/null
+++ b/.readthedocs.yml
@@ -0,0 +1,21 @@
+---
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+build:
+  os: ubuntu-20.04
+  tools:
+    python: "3.10"
+# Build documentation in the docs/ directory with Sphinx
+sphinx:
+  configuration: docs/conf.py
+formats: all
+python:
+  install:
+    - method: pip
+      path: .
+      extra_requirements:
+        - docs
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
deleted file mode 100644
index 9729a6f..0000000
--- a/azure-pipelines.yml
+++ /dev/null
@@ -1,271 +0,0 @@
-pr:
-  autoCancel: true
-  branches:
-    include:
-    - master
-
-jobs:
-- job:
-  displayName: ubuntu-20.04-conda
-  pool:
-    vmImage: 'ubuntu-20.04'
-  strategy:
-    matrix:
-      Python310:
-        python.version: '3.10'
-      Python39:
-        python.version: '3.9'
-      Python38:
-        python.version: '3.8'
-
-  timeoutInMinutes: 8
-  steps:
-  - bash: echo "##vso[task.prependpath]$CONDA/bin"
-    displayName: Add conda to PATH
-
-  - bash: conda create --yes --quiet --name bw
-    displayName: Create Anaconda environment
-
-  - bash: |
-      source activate bw
-      conda install --yes --quiet -c defaults -c conda-forge --name bw python=$PYTHON_VERSION numpy pandas pytest pytest-azurepipelines">=1.0"
-    displayName: Install Anaconda packages
-
-  - bash: |
-      source activate bw
-      pytest --color=yes -v
-    displayName: pytest
-
-- job:
-  displayName: ubuntu-20.04-pip
-  pool:
-    vmImage: 'ubuntu-20.04'
-  strategy:
-    matrix:
-      Python310:
-        python.version: '3.10'
-      Python39:
-        python.version: '3.9'
-      Python38:
-        python.version: '3.8'
-      Python36:
-        python.version: '3.6'
-      Python37:
-        python.version: '3.7'
-      Python38:
-        python.version: '3.8'
-
-  timeoutInMinutes: 8
-  steps:
-  - task: UsePythonVersion@0
-    inputs:
-      versionSpec: '$(python.version)'
-
-  - bash: |
-      python --version
-      pip --version
-      pip install numpy pandas pytest pytest-cov pytest-azurepipelines
-      echo "which pytest"
-      which pytest
-      echo "pytest version"
-      pytest --version
-    displayName: Install packages
-
-  - bash: |
-      pytest --color=yes -v --cov-report html --cov=bw_processing
-    displayName: pytest
-
-- job:
-  displayName: macOS-1015-38-conda
-  pool:
-    vmImage: 'macOS-1015'
-  strategy:
-    matrix:
-      Python310:
-        python.version: '3.10'
-      Python39:
-        python.version: '3.9'
-      Python38:
-        python.version: '3.8'
-
-  timeoutInMinutes: 8
-  steps:
-  - bash: echo "##vso[task.prependpath]$CONDA/bin"
-    displayName: Add conda to PATH
-
-  - bash: sudo chown -R $USER $CONDA
-    displayName: Take ownership of conda installation
-
-  - bash: conda create --yes --quiet --name bw
-    displayName: Create Anaconda environment
-
-  - bash: |
-      source activate bw
-      conda install --yes --quiet -c defaults -c conda-forge --name bw python=$PYTHON_VERSION numpy pandas pytest pytest-azurepipelines">=1.0"
-    displayName: Install Anaconda packages
-
-  - bash: |
-      source activate bw
-      pytest --color=yes -v
-    displayName: pytest
-
-- job:
-  displayName: macOS-1015-pip
-  pool:
-    vmImage: 'macOS-1015'
-  strategy:
-    matrix:
-      Python310:
-        python.version: '3.10'
-      Python39:
-        python.version: '3.9'
-      Python38:
-        python.version: '3.8'
-      Python37:
-        python.version: '3.7'
-      Python36:
-        python.version: '3.6'
-
-  timeoutInMinutes: 8
-  steps:
-  - task: UsePythonVersion@0
-    inputs:
-      versionSpec: '$(python.version)'
-
-  - bash: |
-      python --version
-      pip3 install numpy pandas pytest pytest-cov pytest-azurepipelines
-    displayName: Install packages
-
-  - bash: |
-      pytest --color=yes -v --cov-report xml --cov=bw_processing
-    displayName: pytest
-
-- job:
-  displayName: macOS-11-38-conda
-  pool:
-    vmImage: 'macOS-11'
-  strategy:
-    matrix:
-      Python310:
-        python.version: '3.10'
-      Python39:
-        python.version: '3.9'
-      Python38:
-        python.version: '3.8'
-
-  timeoutInMinutes: 8
-  steps:
-  - bash: echo "##vso[task.prependpath]$CONDA/bin"
-    displayName: Add conda to PATH
-
-  - bash: sudo chown -R $USER $CONDA
-    displayName: Take ownership of conda installation
-
-  - bash: conda create --yes --quiet --name bw
-    displayName: Create Anaconda environment
-
-  - bash: |
-      source activate bw
-      conda install --yes --quiet -c defaults -c conda-forge --name bw python=$PYTHON_VERSION numpy pandas pytest pytest-azurepipelines">=1.0"
-    displayName: Install Anaconda packages
-
-  - bash: |
-      source activate bw
-      pytest --color=yes -v
-    displayName: pytest
-
-- job:
-  displayName: macOS-11-pip
-  pool:
-    vmImage: 'macOS-11'
-  strategy:
-    matrix:
-      Python310:
-        python.version: '3.10'
-      Python39:
-        python.version: '3.9'
-      Python38:
-        python.version: '3.8'
-      Python37:
-        python.version: '3.7'
-      Python36:
-        python.version: '3.6'
-
-  timeoutInMinutes: 8
-  steps:
-  - task: UsePythonVersion@0
-    inputs:
-      versionSpec: '$(python.version)'
-
-  - bash: |
-      python --version
-      pip3 install numpy pandas pytest pytest-cov pytest-azurepipelines
-    displayName: Install packages
-
-  - bash: |
-      pytest --color=yes -v --cov-report xml --cov=bw_processing
-    displayName: pytest
-
-- job:
-  displayName: vs2017-win2016-conda
-  pool:
-    vmImage: 'vs2017-win2016'
-  strategy:
-    matrix:
-      Python310:
-        python.version: '3.10'
-      Python39:
-        python.version: '3.9'
-      Python38:
-        python.version: '3.8'
-
-  timeoutInMinutes: 8
-  steps:
-  - powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
-    displayName: Add conda to PATH
-
-  - script: conda create --yes --quiet --name bw
-    displayName: Create Anaconda environment
-
-  - script: |
-      call activate bw
-      conda install --yes --quiet -c defaults -c conda-forge --name bw python=%PYTHON_VERSION% numpy pandas pytest pytest-azurepipelines">=1.0"
-    displayName: Install Anaconda packages
-
-  - script: |
-      call activate bw
-      pytest --color=yes -v
-    displayName: pytest
-
-- job:
-  displayName: vs2017-win2016-pip
-  pool:
-    vmImage: 'vs2017-win2016'
-  strategy:
-    matrix:
-      Python310:
-        python.version: '3.10'
-      Python39:
-        python.version: '3.9'
-      Python38:
-        python.version: '3.8'
-      Python37:
-        python.version: '3.7'
-      Python36:
-        python.version: '3.6'
-
-  timeoutInMinutes: 8
-  steps:
-  - task: UsePythonVersion@0
-    inputs:
-      versionSpec: '$(python.version)'
-
-  - script: |
-      python --version
-      pip install numpy pandas pytest pytest-cov pytest-azurepipelines
-    displayName: Install packages
-
-  - script: |
-      pytest --color=yes -v --cov-report xml --cov=bw_processing
-    displayName: pytest
diff --git a/ci/conda_upload.sh b/ci/conda_upload.sh
deleted file mode 100644
index f554ad5..0000000
--- a/ci/conda_upload.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-# Only need to change these two variables
-PKG_NAME=bw_processing-dev
-USER=cmutel
-
-mkdir ~/conda-bld
-conda config --set anaconda_upload no
-export CONDA_BLD_PATH=~/conda-bld
-export VERSION=`date +%Y.%m.%d`
-conda build . --old-build-string
-ls $CONDA_BLD_PATH/noarch/
-anaconda -t $CONDA_UPLOAD_TOKEN upload -u $USER -l nightly $CONDA_BLD_PATH/noarch/$PKG_NAME-$VERSION-py_0.tar.bz2 --force
diff --git a/ci/meta.yaml b/ci/meta.yaml
deleted file mode 100644
index 4133433..0000000
--- a/ci/meta.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-package:
-  name: bw_processing-dev
-  version: {{ environ['VERSION'] }}
-
-source:
-  path: ..
-
-build:
-  noarch: python
-  number: 0
-  script: python setup.py install --single-version-externally-managed --record record.txt
-  script_env:
-   - VERSION
-   - CONDA_BLD_PATH
-
-requirements:
-  build:
-    - python
-    - setuptools
-  run:
-    - python
-    - pandas
-    - numpy
-
-test:
-  imports:
-    - bw_processing
-
-about:
-  home: "https://github.com/brightway-lca/bw_processing"
-  license: BSD License
-  summary: 'Tools to create structured arrays in a common format'
-license_family: BSD
diff --git a/conftest.py b/conftest.py
deleted file mode 100644
index e69de29..0000000
diff --git a/matrix_utils/__init__.py b/matrix_utils/__init__.py
index 1a65bd8..67a0355 100644
--- a/matrix_utils/__init__.py
+++ b/matrix_utils/__init__.py
@@ -9,8 +9,9 @@
     "Proxy",
 )
 
+__version__ = "0.3"
+
 from .array_mapper import ArrayMapper
 from .indexers import CombinatorialIndexer, Proxy, RandomIndexer, SequentialIndexer
 from .mapped_matrix import MappedMatrix
 from .resource_group import ResourceGroup
-from .version import version as __version__
diff --git a/matrix_utils/version.py b/matrix_utils/version.py
deleted file mode 100644
index 4d8a2a8..0000000
--- a/matrix_utils/version.py
+++ /dev/null
@@ -1 +0,0 @@
-version = (0, 2, 5)
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..950c845
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,109 @@
+[build-system]
+requires = ["setuptools>=68.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "matrix_utils"
+authors = [
+  { name="Chris Mutel", email="cmutel@gmail.com" }
+]
+maintainers = [
+  { name="Chris Mutel", email="cmutel@gmail.com" }
+]
+description = "Tools to create matrices from data packages"
+readme = "README.md"
+dynamic = ["version"]
+# Add here all kinds of additional classifiers as defined under
+# https://pypi.org/classifiers/
+classifiers = [
+    "Intended Audience :: End Users/Desktop",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: BSD License",
+    "Operating System :: MacOS :: MacOS X",
+    "Operating System :: Microsoft :: Windows",
+    "Operating System :: POSIX",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Scientific/Engineering :: Information Analysis",
+    "Topic :: Scientific/Engineering :: Mathematics",
+]
+requires-python = ">=3.9"
+dependencies = [
+    # dependencies as strings with quotes, e.g. "foo"
+    # You can add version requirements like "foo>2.0"
+    "numpy",
+    "scipy",
+    "pandas",
+    "bw_processing",
+    "stats_arrays",
+]
+
+[project.urls]
+source = "https://github.com/brightway-lca/matrix_utils"
+homepage = "https://github.com/brightway-lca/matrix_utils"
+tracker = "https://github.com/brightway-lca/matrix_utils/issues"
+
+[project.optional-dependencies]
+# Getting recursive dependencies to work is a pain, this
+# seems to work, at least for now
+testing = [
+    "matrix_utils",
+    "pytest",
+    "pytest-cov",
+    "python-coveralls"
+]
+dev = [
+    "build",
+    "pre-commit",
+    "pylint",
+    "pytest",
+    "pytest-cov",
+    "setuptools",
+]
+
+[tool.setuptools]
+license-files = ["LICENSE"]
+include-package-data = true
+packages = ["matrix_utils"]
+
+[tool.setuptools.dynamic]
+version = {attr = "matrix_utils.__version__"}
+
+[tool.pytest.ini_options]
+addopts = "--cov matrix_utils --cov-report term-missing --verbose"
+norecursedirs = [
+    "dist",
+    "build",
+    ".tox"
+]
+testpaths = ["tests/*.py"]
+
+[tool.flake8]
+# Some sane defaults for the code style checker flake8
+max_line_length = 88
+extend_ignore = ["E203", "W503"]
+# ^  Black-compatible
+#    E203 and W503 have edge cases handled by black
+exclude = [
+    ".tox",
+    "build",
+    "dist",
+    ".eggs",
+    "docs/conf.py",
+]
+
+[tool.black]
+line-length = 88
+
+[tool.isort]
+line_length = 88
+multi_line_output = 3
+include_trailing_comma = true
+force_grid_wrap = 0
+use_parentheses = true
+ensure_newline_before_comments = true
diff --git a/pytest.ini b/pytest.ini
deleted file mode 100644
index e4cf065..0000000
--- a/pytest.ini
+++ /dev/null
@@ -1,4 +0,0 @@
-[pytest]
-testpaths = tests
-python_files = tests/*.py
-norecursedirs = venv, manual, fixtures
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 9088690..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-numpy
-scipy
-pandas
-bw_processing
-stats_arrays
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 8435e1e..0000000
--- a/setup.py
+++ /dev/null
@@ -1,44 +0,0 @@
-from setuptools import setup
-
-v_temp = {}
-with open("matrix_utils/version.py") as fp:
-    exec(fp.read(), v_temp)
-version = ".".join((str(x) for x in v_temp["version"]))
-
-
-setup(
-    name="matrix_utils",
-    version=version,
-    packages=["matrix_utils"],
-    author="Chris Mutel",
-    author_email="cmutel@gmail.com",
-    license="BSD 3-clause",
-    install_requires=[
-        "numpy",
-        "scipy",
-        "pandas",
-        "bw_processing",
-        "stats_arrays",
-    ],
-    url="https://github.com/brightway-lca/matrix_utils",
-    long_description_content_type="text/markdown",
-    long_description=open("README.md").read(),
-    description="Tools to create matrices from data packages",
-    classifiers=[
-        "Intended Audience :: End Users/Desktop",
-        "Intended Audience :: Developers",
-        "Intended Audience :: Science/Research",
-        "License :: OSI Approved :: BSD License",
-        "Operating System :: MacOS :: MacOS X",
-        "Operating System :: Microsoft :: Windows",
-        "Operating System :: POSIX",
-        "Programming Language :: Python",
-        "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.6",
-        "Programming Language :: Python :: 3.7",
-        "Programming Language :: Python :: 3.8",
-        "Programming Language :: Python :: 3.9",
-        "Topic :: Scientific/Engineering :: Information Analysis",
-        "Topic :: Scientific/Engineering :: Mathematics",
-    ],
-)

From 11573b49642f624a0fad2e40d2d46b1eb3fa45a6 Mon Sep 17 00:00:00 2001
From: Chris Mutel <cmutel@gmail.com>
Date: Tue, 24 Oct 2023 22:41:42 +0200
Subject: [PATCH 3/8] Switch from list to dict

---
 .pre-commit-config.yaml            |  7 ++-
 matrix_utils/mapped_matrix.py      | 79 ++++++++++++++++++++----------
 matrix_utils/mapped_matrix_list.py | 57 ++++++++++++++++++---
 3 files changed, 107 insertions(+), 36 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 84518b8..58e01d1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -35,14 +35,13 @@ repos:
     args: [--settings-path=pyproject.toml]
 
 - repo: https://github.com/psf/black
-  rev: 22.12.0
+  rev: 23.10.0
   hooks:
   - id: black
     args: [--config=pyproject.toml]
 
-## If like to embrace black styles even in the docs:
-# - repo: https://github.com/asottile/blacken-docs
-#   rev: v1.12.0
+# - repo: https://github.com/adamchainz/blacken-docs
+#   rev: 1.16.0
 #   hooks:
 #   - id: blacken-docs
 #     additional_dependencies: [black]
diff --git a/matrix_utils/mapped_matrix.py b/matrix_utils/mapped_matrix.py
index 6d396c9..e086efe 100644
--- a/matrix_utils/mapped_matrix.py
+++ b/matrix_utils/mapped_matrix.py
@@ -1,4 +1,4 @@
-from typing import Any, Callable, Optional, Sequence, List
+from typing import Any, Callable, List, Optional, Sequence
 
 import numpy as np
 from bw_processing import INDICES_DTYPE, UNCERTAINTY_DTYPE, Datapackage
@@ -29,11 +29,19 @@ def __init__(
         custom_filter: Optional[Callable] = None,
         empty_ok: bool = False,
     ):
-        """A scipy sparse matrix handler which takes in ``bw_processing`` data packages. Row and column ids are mapped to matrix indices, and a matrix is constructed.
+        """A scipy sparse matrix handler which takes in ``bw_processing`` data
+        packages. Row and column ids are mapped to matrix indices, and a matrix is
+        constructed.
 
-        `indexer_override` allows for custom indexer behaviour. Indexers should follow a simple API: they must support `.__next__()`, and have the attribute `.index`, which returns an integer.
+        `indexer_override` allows for custom indexer behaviour. Indexers should follow
+        a simple API: they must support `.__next__()`, and have the attribute `.index`,
+         which returns an integer.
 
-        `custom_filter` allows you to remove some data based on their indices. It is applied to all resource groups. If you need more fine-grained control, process the matrix after construction/iteration. `custom_filter` should take the indices array as an input, and return a Numpy boolean array with the same length as the indices array.
+        `custom_filter` allows you to remove some data based on their indices. It is
+        applied to all resource groups. If you need more fine-grained control, process
+        the matrix after construction/iteration. `custom_filter` should take the
+        indices array as an input, and return a Numpy boolean array with the same
+        length as the indices array.
 
         Parameters
         ----------
@@ -58,7 +66,8 @@ def __init__(
         diagonal : bool
             If `True`, only use the `row` indices to build a diagonal matrix.
         transpose : bool
-            Transpose row and column indices. Happens before any processing, so filters and mappers should refer to the transposed dimensions.
+            Transpose row and column indices. Happens before any processing, so filters
+            and mappers should refer to the transposed dimensions.
         custom_filter : Callable
             Callable for function to filter data based on `indices` values. See above.
         empty_ok : bool
@@ -91,9 +100,10 @@ def __init__(
         for package in self.packages:
             if package.dehydrated_interfaces():
                 raise EmptyInterface(
-                    "Dehydrated interfaces {} in package {} need to be rehydrated to be used in matrix calculations".format(
-                        package.dehydrated_interfaces(), package
-                    )
+                    (
+                        "Dehydrated interfaces {} in package {} need to be rehydrated "
+                        + "to be used in matrix calculations"
+                    ).format(package.dehydrated_interfaces(), package)
                 )
 
         self.groups = tuple([obj for lst in self.packages.values() for obj in lst])
@@ -158,20 +168,20 @@ def map_indices(self):
         for group in self.groups:
             group.map_indices(diagonal=self.diagonal)
 
-    def iterate_indexers(self):
+    def iterate_indexers(self) -> None:
         for obj in self.packages:
             # Avoid ``StopIteration`` errors if packaged is filtered to emptiness
             if hasattr(obj, "indexer") and self.packages[obj]:
                 next(obj.indexer)
 
-    def reset_indexers(self, rebuild=False):
+    def reset_indexers(self, rebuild=False) -> None:
         for obj in self.packages:
             if hasattr(obj, "indexer"):
                 obj.indexer.reset()
         if rebuild:
             self.rebuild_matrix()
 
-    def rebuild_matrix(self):
+    def rebuild_matrix(self) -> None:
         self.matrix.data *= 0
         for group in self.groups:
             row, col, data = group.calculate()
@@ -180,11 +190,11 @@ def rebuild_matrix(self):
             else:
                 self.matrix[row, col] = data
 
-    def __next__(self):
+    def __next__(self) -> None:
         self.iterate_indexers()
         self.rebuild_matrix()
 
-    def add_indexers(self, indexer_override: Any, seed_override: Optional[int]):
+    def add_indexers(self, indexer_override: Any, seed_override: Optional[int]) -> None:
         """Add indexers"""
         for package, resources in self.packages.items():
             if hasattr(package, "indexer"):
@@ -229,16 +239,23 @@ def input_row_col_indices(self) -> np.ndarray:
         return array
 
     def input_provenance(self) -> List[tuple]:
-        """Describe where the data in the other ``input_X`` comes from. Returns a list of ``(datapackage, group_label, (start_index, end_index))`` tuples.
+        """Describe where the data in the other ``input_X`` comes from. Returns a list
+        of ``(datapackage, group_label, (start_index, end_index))`` tuples.
 
-        Note that the ``end_index`` is exclusive, following the Python slicing convention, i.e. ``(7, 9)`` means start from the 8th element (indices start from 0), and go up to but don't include the 10th element (i.e. (7, 9) has two elements)."""
+        Note that the ``end_index`` is exclusive, following the Python slicing
+        convention, i.e. ``(7, 9)`` means start from the 8th element (indices start
+        from 0), and go up to but don't include the 10th element (i.e. (7, 9) has two
+        elements).
+        """
         position, result = 0, []
 
         for package, groups in self.packages.items():
             for group in groups:
                 num_elements = len(group.data_current)
                 # Minus one because we include the first element as element 0
-                result.append((package, group.label, (position, position + num_elements)))
+                result.append(
+                    (package, group.label, (position, position + num_elements))
+                )
                 # Plus one because start at the next value
                 position += num_elements
         return result
@@ -271,15 +288,27 @@ def _construct_distributions_array(self, given, uncertainty_type=0) -> np.ndarra
     def input_uncertainties(self, number_samples: Optional[int] = None) -> np.ndarray:
         """Return the stacked uncertainty arrays of all resources groups.
 
-        Note that this data is masked with both the custom filter (if present) and the mapping mask!
+        Note that this data is masked with both the custom filter (if present) and the
+        mapping mask!
 
-        If the resource group has a distributions array, then this is returned. Otherwise, if the data is static, a distributions array with uncertainty type 0 (undefined uncertainty) is constructed. If the data is an array, an estimate of the mean and standard deviation are given in the ``loc`` and ``scale`` columns. This estimate uses ``number_samples`` columns, or all columns if ``number_samples`` is ``None``.
+        If the resource group has a distributions array, then this is returned.
+        Otherwise, if the data is static, a distributions array with uncertainty type
+        0 (undefined uncertainty) is constructed. If the data is an array, an estimate
+        of the mean and standard deviation are given in the ``loc`` and ``scale``
+        columns. This estimate uses ``number_samples`` columns, or all columns if
+        ``number_samples`` is ``None``.
 
-        If the data comes from an interface, a distributions array with uncertainty type 0 will be created. Regardless if whether it is a vector or an array interface, the current data vector is used, and no estimate of uncertainty is made. Therefore, this data will never consume new data from an interface.
+        If the data comes from an interface, a distributions array with uncertainty
+        type 0 will be created. Regardless if whether it is a vector or an array
+        interface, the current data vector is used, and no estimate of uncertainty is
+        made. Therefore, this data will never consume new data from an interface.
 
-        Raises a ``TypeError`` if distributions arrays are present but don't follow the dtype of ``bw_processing.UNCERTAINTY_DTYPE``.
+        Raises a ``TypeError`` if distributions arrays are present but don't follow the
+        dtype of ``bw_processing.UNCERTAINTY_DTYPE``.
 
-        As both population samples (arrays) and interfaces don't fit into the traditional ``stat_arrays`` framework, we mark these with custom ``uncertainty_types``:
+        As both population samples (arrays) and interfaces don't fit into the
+        traditional ``stat_arrays`` framework, we mark these with custom
+        ``uncertainty_types``:
 
         * ``98`` for arrays
         * ``99`` for interfaces
@@ -290,11 +319,11 @@ def input_uncertainties(self, number_samples: Optional[int] = None) -> np.ndarra
         for group in self.groups:
             if group.has_distributions and self.use_distributions:
                 if group.data_original.dtype != UNCERTAINTY_DTYPE:
-                    raise TypeError(
-                        "Distributions datatype should be `bw_processing.UNCERTAINTY_DTYPE`, but was {}".format(
-                            group.data_original.dtype
-                        )
+                    message = (
+                        "Distributions datatype should be "
+                        "`bw_processing.UNCERTAINTY_DTYPE`, but was {}"
                     )
+                    raise TypeError(message.format(group.data_original.dtype))
                 arrays.append(group.apply_masks(group.data_original))
             elif group.is_array() and not group.is_interface():
                 data = group.data_original
diff --git a/matrix_utils/mapped_matrix_list.py b/matrix_utils/mapped_matrix_list.py
index 3c376dd..5b9f63c 100644
--- a/matrix_utils/mapped_matrix_list.py
+++ b/matrix_utils/mapped_matrix_list.py
@@ -1,14 +1,23 @@
+from collections.abc import Mapping
+from typing import Any, Callable, Optional, Sequence, Union
+
+from bw_processing import Datapackage
+
+from .array_mapper import ArrayMapper
 from .mapped_matrix import MappedMatrix
 
 
-class MappedMatrixList:
+class MappedMatrixDict(Mapping):
     """Class which handles a list of mapped matrices.
 
-    The matrices have the same dimensions, the same lookup dictionaries, and the same indexer."""
+    The matrices have the same dimensions, the same lookup dictionaries, and the same
+    indexer.
+    """
+
     def __init__(
         self,
         *,
-        packages: dict[str, Sequence[Datapackage]],
+        packages: dict[Union[tuple, str], Sequence[Datapackage]],
         matrix: str,
         row_mapper: ArrayMapper,
         col_mapper: ArrayMapper,
@@ -22,12 +31,14 @@ def __init__(
         custom_filter: Optional[Callable] = None,
         empty_ok: bool = False,
     ):
-        """A thin wrapper around a list of `MappedMatrix` objects. See its docstring for details on `custom_filter` and `indexer_override`.
+        """A thin wrapper around a list of `MappedMatrix` objects. See its docstring
+        for details on `custom_filter` and `indexer_override`.
 
         Parameters
         ----------
-        packages : list[Datapackage]
-            A list of Datapackage objects.
+        packages : dict[Union[tuple, str], Sequence[Datapackage]]
+            A dictionary with identifiers as keys and a list of `bw_processing`
+            datapackages as values.
         matrix : str
             The string identifying the matrix to be built.
         use_vectors : bool
@@ -47,10 +58,42 @@ def __init__(
         diagonal : bool
             If `True`, only use the `row` indices to build a diagonal matrix.
         transpose : bool
-            Transpose row and column indices. Happens before any processing, so filters and mappers should refer to the transposed dimensions.
+            Transpose row and column indices. Happens before any processing, so filters
+            and mappers should refer to the transposed dimensions.
         custom_filter : Callable
             Callable for function to filter data based on `indices` values. See above.
         empty_ok : bool
             If False, raise `AllArraysEmpty` if the matrix would be empty
 
         """
+        self.matrices = {
+            tpl: MappedMatrix(
+                packages=packages,
+                matrix=matrix,
+                use_vectors=use_vectors,
+                use_arrays=use_arrays,
+                use_distributions=use_distributions,
+                row_mapper=row_mapper,
+                col_mapper=col_mapper,
+                seed_override=seed_override,
+                indexer_override=None,
+                diagonal=diagonal,
+                transpose=transpose,
+                custom_filter=custom_filter,
+                empty_ok=empty_ok,
+            )
+            for tpl, packages in packages.items()
+        }
+
+    def __getitem__(self, key: Union[tuple, str]) -> MappedMatrix:
+        return self.matrices[key]
+
+    def __iter__(self):
+        for tpl in self.matrices:
+            return self.matrices[tpl]
+
+    def __len__(self) -> int:
+        return len(self.matrices)
+
+    def __next__(self) -> None:
+        pass

From de400d47139386a631db378fd12eb7783e1d5b8f Mon Sep 17 00:00:00 2001
From: Chris Mutel <cmutel@gmail.com>
Date: Tue, 24 Oct 2023 22:47:43 +0200
Subject: [PATCH 4/8] Fix filename for mapped dict instead of list

---
 matrix_utils/__init__.py                                      | 2 ++
 matrix_utils/{mapped_matrix_list.py => mapped_matrix_dict.py} | 0
 2 files changed, 2 insertions(+)
 rename matrix_utils/{mapped_matrix_list.py => mapped_matrix_dict.py} (100%)

diff --git a/matrix_utils/__init__.py b/matrix_utils/__init__.py
index 67a0355..9047cf2 100644
--- a/matrix_utils/__init__.py
+++ b/matrix_utils/__init__.py
@@ -3,6 +3,7 @@
     "ResourceGroup",
     "ArrayMapper",
     "MappedMatrix",
+    "MappedMatrixDict",
     "RandomIndexer",
     "SequentialIndexer",
     "CombinatorialIndexer",
@@ -14,4 +15,5 @@
 from .array_mapper import ArrayMapper
 from .indexers import CombinatorialIndexer, Proxy, RandomIndexer, SequentialIndexer
 from .mapped_matrix import MappedMatrix
+from .mapped_matrix_dict import MappedMatrixDict
 from .resource_group import ResourceGroup
diff --git a/matrix_utils/mapped_matrix_list.py b/matrix_utils/mapped_matrix_dict.py
similarity index 100%
rename from matrix_utils/mapped_matrix_list.py
rename to matrix_utils/mapped_matrix_dict.py

From 76b7feb1281c57e57b514c8e54dcb5f76dddbbb1 Mon Sep 17 00:00:00 2001
From: Chris Mutel <cmutel@gmail.com>
Date: Wed, 25 Oct 2023 07:52:40 +0200
Subject: [PATCH 5/8] Add global indexing to MMD

---
 matrix_utils/mapped_matrix_dict.py | 58 ++++++++++++++++++++++--------
 1 file changed, 44 insertions(+), 14 deletions(-)

diff --git a/matrix_utils/mapped_matrix_dict.py b/matrix_utils/mapped_matrix_dict.py
index 5b9f63c..3b1927b 100644
--- a/matrix_utils/mapped_matrix_dict.py
+++ b/matrix_utils/mapped_matrix_dict.py
@@ -5,15 +5,10 @@
 
 from .array_mapper import ArrayMapper
 from .mapped_matrix import MappedMatrix
+from .indexers import RandomIndexer, SequentialIndexer, Indexer
 
 
 class MappedMatrixDict(Mapping):
-    """Class which handles a list of mapped matrices.
-
-    The matrices have the same dimensions, the same lookup dictionaries, and the same
-    indexer.
-    """
-
     def __init__(
         self,
         *,
@@ -30,10 +25,23 @@ def __init__(
         transpose: bool = False,
         custom_filter: Optional[Callable] = None,
         empty_ok: bool = False,
+        sequential: bool = False,
     ):
-        """A thin wrapper around a list of `MappedMatrix` objects. See its docstring
+        """A thin wrapper around a dict of `MappedMatrix` objects. See its docstring
         for details on `custom_filter` and `indexer_override`.
 
+        The matrices have the same dimensions, the same lookup dictionaries, and the
+        same indexer.
+
+        The number of possible configurations of resource groups and indexers is far
+        higher than any generic class can handle. This class supports either
+        sequential or random indexing, and the indexing is applied to **all resource
+        groups and datapackages**. If you need finer-grained control, you can access
+        set and access the individual resource group `indexer` attributes.
+
+        Because the same indexer is used for all datapackages, individual `seed` values
+        are ignored. Use `seed_override` to set a global RNG seed.
+
         Parameters
         ----------
         packages : dict[Union[tuple, str], Sequence[Datapackage]]
@@ -64,10 +72,22 @@ def __init__(
             Callable for function to filter data based on `indices` values. See above.
         empty_ok : bool
             If False, raise `AllArraysEmpty` if the matrix would be empty
-
+        sequential : bool
+            Use the **same sequential indexer** across all resource groups in all datapackages
         """
+        self.matrix = matrix
+        self.row_mapper = row_mapper
+        self.col_mapper = col_mapper
+        self.use_vectors = use_vectors
+        self.use_arrays = use_arrays
+        self.seed_override = seed_override
+        self.diagonal = diagonal
+        self.transpose = transpose
+        self.custom_filter = custom_filter
+        self.empty_ok = empty_ok
+        self.global_indexer = self.get_global_indexer(indexer_override=indexer_override, sequential=sequential, seed_override=seed_override)
         self.matrices = {
-            tpl: MappedMatrix(
+            obj: MappedMatrix(
                 packages=packages,
                 matrix=matrix,
                 use_vectors=use_vectors,
@@ -76,24 +96,34 @@ def __init__(
                 row_mapper=row_mapper,
                 col_mapper=col_mapper,
                 seed_override=seed_override,
-                indexer_override=None,
+                indexer_override=self.global_indexer,
                 diagonal=diagonal,
                 transpose=transpose,
                 custom_filter=custom_filter,
                 empty_ok=empty_ok,
             )
-            for tpl, packages in packages.items()
+            for obj, packages in packages.items()
         }
 
     def __getitem__(self, key: Union[tuple, str]) -> MappedMatrix:
         return self.matrices[key]
 
     def __iter__(self):
-        for tpl in self.matrices:
-            return self.matrices[tpl]
+        for obj in self.matrices:
+            return self.matrices[obj]
 
     def __len__(self) -> int:
         return len(self.matrices)
 
     def __next__(self) -> None:
-        pass
+        next(self.global_indexer)
+        for mm in self.matrices.values():
+            mm.rebuild_matrix()
+
+    def get_global_indexer(self, indexer_override: Any, sequential: bool, seed_override: Optional[int]) -> Indexer:
+        if indexer_override is not None:
+            return indexer_override
+        elif sequential:
+            return SequentialIndexer()
+        else:
+            return RandomIndexer(seed=seed_override)

From 7e40123499d04553b7994d6f0c1a5b4cc2ad25c8 Mon Sep 17 00:00:00 2001
From: Chris Mutel <cmutel@gmail.com>
Date: Wed, 25 Oct 2023 07:56:48 +0200
Subject: [PATCH 6/8] Formatting

---
 matrix_utils/mapped_matrix_dict.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/matrix_utils/mapped_matrix_dict.py b/matrix_utils/mapped_matrix_dict.py
index 3b1927b..5a752ff 100644
--- a/matrix_utils/mapped_matrix_dict.py
+++ b/matrix_utils/mapped_matrix_dict.py
@@ -85,7 +85,11 @@ def __init__(
         self.transpose = transpose
         self.custom_filter = custom_filter
         self.empty_ok = empty_ok
-        self.global_indexer = self.get_global_indexer(indexer_override=indexer_override, sequential=sequential, seed_override=seed_override)
+        self.global_indexer = self.get_global_indexer(
+            indexer_override=indexer_override,
+            sequential=sequential,
+            seed_override=seed_override,
+        )
         self.matrices = {
             obj: MappedMatrix(
                 packages=packages,
@@ -120,7 +124,9 @@ def __next__(self) -> None:
         for mm in self.matrices.values():
             mm.rebuild_matrix()
 
-    def get_global_indexer(self, indexer_override: Any, sequential: bool, seed_override: Optional[int]) -> Indexer:
+    def get_global_indexer(
+        self, indexer_override: Any, sequential: bool, seed_override: Optional[int]
+    ) -> Indexer:
         if indexer_override is not None:
             return indexer_override
         elif sequential:

From 493a40f6f9fe020b60e1327e4fd43653681fdbbc Mon Sep 17 00:00:00 2001
From: Chris Mutel <cmutel@gmail.com>
Date: Wed, 25 Oct 2023 09:23:16 +0200
Subject: [PATCH 7/8] Finish `MappedMatrixDict`

---
 matrix_utils/mapped_matrix_dict.py |  22 ++-
 tests/mmd.py                       | 250 +++++++++++++++++++++++++++++
 2 files changed, 265 insertions(+), 7 deletions(-)
 create mode 100644 tests/mmd.py

diff --git a/matrix_utils/mapped_matrix_dict.py b/matrix_utils/mapped_matrix_dict.py
index 5a752ff..1f16d97 100644
--- a/matrix_utils/mapped_matrix_dict.py
+++ b/matrix_utils/mapped_matrix_dict.py
@@ -4,8 +4,8 @@
 from bw_processing import Datapackage
 
 from .array_mapper import ArrayMapper
+from .indexers import Indexer, RandomIndexer, SequentialIndexer
 from .mapped_matrix import MappedMatrix
-from .indexers import RandomIndexer, SequentialIndexer, Indexer
 
 
 class MappedMatrixDict(Mapping):
@@ -42,6 +42,10 @@ def __init__(
         Because the same indexer is used for all datapackages, individual `seed` values
         are ignored. Use `seed_override` to set a global RNG seed.
 
+        The `empty_ok` flag applies to **all matrices** - if any of the matrices have
+        a valid data value no error will be raised. In practice this flag should have
+        no effect for `MappedMatrixDict` unless the input data is very broken.
+
         Parameters
         ----------
         packages : dict[Union[tuple, str], Sequence[Datapackage]]
@@ -73,7 +77,8 @@ def __init__(
         empty_ok : bool
             If False, raise `AllArraysEmpty` if the matrix would be empty
         sequential : bool
-            Use the **same sequential indexer** across all resource groups in all datapackages
+            Use the **same sequential indexer** across all resource groups in all
+            datapackages
         """
         self.matrix = matrix
         self.row_mapper = row_mapper
@@ -90,8 +95,12 @@ def __init__(
             sequential=sequential,
             seed_override=seed_override,
         )
+
+        if not isinstance(packages, Mapping):
+            raise ValueError("`packages` must be a dictionary")
+
         self.matrices = {
-            obj: MappedMatrix(
+            key: MappedMatrix(
                 packages=packages,
                 matrix=matrix,
                 use_vectors=use_vectors,
@@ -106,15 +115,14 @@ def __init__(
                 custom_filter=custom_filter,
                 empty_ok=empty_ok,
             )
-            for obj, packages in packages.items()
+            for key, packages in packages.items()
         }
 
-    def __getitem__(self, key: Union[tuple, str]) -> MappedMatrix:
+    def __getitem__(self, key: Any) -> MappedMatrix:
         return self.matrices[key]
 
     def __iter__(self):
-        for obj in self.matrices:
-            return self.matrices[obj]
+        yield from self.matrices
 
     def __len__(self) -> int:
         return len(self.matrices)
diff --git a/tests/mmd.py b/tests/mmd.py
new file mode 100644
index 0000000..37cb5b4
--- /dev/null
+++ b/tests/mmd.py
@@ -0,0 +1,250 @@
+import bw_processing as bwp
+import numpy as np
+import pytest
+
+from matrix_utils import ArrayMapper, MappedMatrix, MappedMatrixDict
+from matrix_utils.errors import AllArraysEmpty
+from matrix_utils.indexers import RandomIndexer, SequentialIndexer
+
+
+@pytest.fixture
+def mmd_fixture():
+    first = bwp.create_datapackage()
+    first.add_persistent_vector(
+        matrix="foo",
+        name="vector",
+        indices_array=np.array([(0, 0), (2, 1)], dtype=bwp.INDICES_DTYPE),
+        data_array=np.array([1, 2.3]),
+    )
+    second = bwp.create_datapackage()
+    second.add_persistent_vector(
+        matrix="foo",
+        name="vector",
+        indices_array=np.array([(10, 10), (12, 11)], dtype=bwp.INDICES_DTYPE),
+        data_array=np.array([11, 12]),
+    )
+    third = bwp.create_datapackage()
+    third.add_persistent_vector(
+        matrix="foo",
+        name="vector",
+        indices_array=np.array([(0, 10), (2, 11)], dtype=bwp.INDICES_DTYPE),
+        data_array=np.array([11, 12]),
+    )
+    fourth = bwp.create_datapackage()
+    fourth.add_persistent_array(
+        matrix="foo",
+        name="array",
+        indices_array=np.array(
+            [(1, 0), (2, 1), (5, 1), (8, 1)], dtype=bwp.INDICES_DTYPE
+        ),
+        data_array=np.arange(8).reshape((4, 2)) + 10,
+    )
+    fifth = bwp.create_datapackage()
+    fifth.add_persistent_array(
+        matrix="foo",
+        name="array",
+        indices_array=np.array(
+            [(1, 0), (12, 11), (5, 1), (18, 11)], dtype=bwp.INDICES_DTYPE
+        ),
+        data_array=np.arange(20).reshape((4, 5)),
+    )
+    row_mapper = ArrayMapper(
+        array=np.array([0, 2, 10, 12, 0, 2, 1, 2, 5, 8, 1, 12, 5, 18])
+    )
+    col_mapper = ArrayMapper(
+        array=np.array([0, 1, 10, 11, 10, 11, 0, 1, 1, 1, 0, 11, 1, 11])
+    )
+    return first, second, third, fourth, fifth, row_mapper, col_mapper
+
+
+def test_basic_mmd_construction(mmd_fixture):
+    first, second, third, fourth, fifth, rows, cols = mmd_fixture
+    mmd = MappedMatrixDict(
+        packages={"a": [first, second], "b": [third, fourth], "c": [fifth]},
+        matrix="foo",
+        row_mapper=rows,
+        col_mapper=cols,
+        use_arrays=True,
+    )
+
+    assert len(mmd) == 3
+    assert len(mmd.matrices) == 3
+    for key in mmd:
+        mm = mmd[key]
+        assert isinstance(mm, MappedMatrix)
+        assert mm.matrix.shape == (8, 4)
+
+
+def test_basic_mmd_as_dict(mmd_fixture):
+    first, second, third, fourth, fifth, rows, cols = mmd_fixture
+    mmd = MappedMatrixDict(
+        packages={"a": [first, second], "b": [third, fourth], "c": [fifth]},
+        matrix="foo",
+        row_mapper=rows,
+        col_mapper=cols,
+        use_arrays=True,
+    )
+
+    assert "a" in mmd
+    assert "g" not in mmd
+    assert len(mmd) == 3
+    assert mmd.keys()
+    assert list(mmd.keys()) == ["a", "b", "c"]
+    assert mmd.values()
+    with pytest.raises(TypeError):
+        del mmd["a"]
+
+
+def test_mmd_shared_indexer(mmd_fixture):
+    first, second, third, fourth, fifth, rows, cols = mmd_fixture
+    mmd = MappedMatrixDict(
+        packages={"a": [first, second], "b": [third, fourth], "c": [fifth]},
+        matrix="foo",
+        row_mapper=rows,
+        col_mapper=cols,
+        use_arrays=True,
+    )
+
+    for mm in mmd.values():
+        for group in mm.groups:
+            assert group.indexer is mmd.global_indexer
+
+
+def test_mmd_iterate_indexer_changes_matrix_values(mmd_fixture):
+    first, second, third, fourth, fifth, rows, cols = mmd_fixture
+    mmd = MappedMatrixDict(
+        packages={"a": [first, second], "b": [third, fourth], "c": [fifth]},
+        matrix="foo",
+        row_mapper=rows,
+        col_mapper=cols,
+        use_arrays=True,
+        sequential=True,
+    )
+
+    for mm, value in zip(
+        mmd.values(), [1 + 2.3 + 11 + 12, 11 + 12 + 10 + 12 + 14 + 16, 0 + 5 + 10 + 15]
+    ):
+        assert mm.matrix.sum() == value
+
+    next(mmd)
+
+    for mm, value in zip(
+        mmd.values(), [1 + 2.3 + 11 + 12, 11 + 12 + 11 + 13 + 15 + 17, 1 + 6 + 11 + 16]
+    ):
+        assert mm.matrix.sum() == value
+
+
+def test_mmd_empty_datapackages(mmd_fixture):
+    first, second, third, fourth, fifth, rows, cols = mmd_fixture
+    MappedMatrixDict(
+        packages={"a": [first, second], "b": [third, fourth], "c": [fifth]},
+        matrix="foo",
+        row_mapper=rows,
+        col_mapper=cols,
+    )
+
+    with pytest.raises(AllArraysEmpty):
+        MappedMatrixDict(
+            packages={
+                "a": [],
+                "b": [],
+            },
+            matrix="foo",
+            row_mapper=rows,
+            col_mapper=cols,
+        )
+
+    MappedMatrixDict(
+        packages={
+            "a": [],
+            "b": [],
+        },
+        matrix="foo",
+        row_mapper=rows,
+        col_mapper=cols,
+        empty_ok=True,
+    )
+
+
+def test_mmd_random(mmd_fixture):
+    first, second, third, fourth, fifth, rows, cols = mmd_fixture
+    indexer = RandomIndexer(seed=42)
+    mmd = MappedMatrixDict(
+        packages={"a": [first, second], "b": [third, fourth], "c": [fifth]},
+        matrix="foo",
+        row_mapper=rows,
+        col_mapper=cols,
+        use_arrays=True,
+        indexer_override=indexer,
+    )
+
+    for mm in mmd.values():
+        for group in mm.groups:
+            assert group.indexer is indexer
+            assert group.indexer.index == 191664963
+
+    next(mmd)
+
+    for mm in mmd.values():
+        for group in mm.groups:
+            assert group.indexer is indexer
+            assert group.indexer.index == 1662057957
+
+
+def test_mmd_custom_indexer(mmd_fixture):
+    first, second, third, fourth, fifth, rows, cols = mmd_fixture
+    indexer = SequentialIndexer()
+    mmd = MappedMatrixDict(
+        packages={"a": [first, second], "b": [third, fourth], "c": [fifth]},
+        matrix="foo",
+        row_mapper=rows,
+        col_mapper=cols,
+        use_arrays=True,
+        indexer_override=indexer,
+    )
+
+    for mm in mmd.values():
+        for group in mm.groups:
+            assert group.indexer is indexer
+            assert group.indexer.index == 0
+
+    next(indexer)
+
+    for mm in mmd.values():
+        for group in mm.groups:
+            assert group.indexer is indexer
+            assert group.indexer.index == 1
+
+
+def test_mmd_sequential(mmd_fixture):
+    first, second, third, fourth, fifth, rows, cols = mmd_fixture
+    mmd = MappedMatrixDict(
+        packages={"a": [first, second], "b": [third, fourth], "c": [fifth]},
+        matrix="foo",
+        row_mapper=rows,
+        col_mapper=cols,
+        use_arrays=True,
+        sequential=True,
+    )
+
+    for mm in mmd.values():
+        for group in mm.groups:
+            assert group.indexer.index == 0
+
+    next(mmd)
+
+    for mm in mmd.values():
+        for group in mm.groups:
+            assert group.indexer.index == 1
+
+
+def test_mmd_invalid_packages(mmd_fixture):
+    first, second, third, fourth, fifth, rows, cols = mmd_fixture
+    with pytest.raises(ValueError):
+        MappedMatrixDict(
+            packages=[("a", [first, second]), ("b", [third, fourth]), ("c", [fifth])],
+            matrix="foo",
+            row_mapper=rows,
+            col_mapper=cols,
+            use_arrays=True,
+        )

From 1e7ece3841fe97357467b6ac5b4f4c290ce213ba Mon Sep 17 00:00:00 2001
From: Chris Mutel <cmutel@gmail.com>
Date: Wed, 25 Oct 2023 09:23:23 +0200
Subject: [PATCH 8/8] Update CHANGELOG.md

---
 CHANGELOG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9d3acfc..1e80c4d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,9 @@
 # Changelog
 
+## [0.3.DEV]
+
+* Add `MappedMatrixDict` class for `MultiLCA`
+
 ### [0.2.5] - 2022-05-21
 
 * Add functions to reset indexers directly and on `MappedMatrix`