diff --git a/.DS_Store b/.DS_Store
deleted file mode 100644
index 9c7e095..0000000
Binary files a/.DS_Store and /dev/null differ
diff --git a/.gitignore b/.gitignore
index d568cd1..8da5368 100644
--- a/.gitignore
+++ b/.gitignore
@@ -138,3 +138,5 @@ dmypy.json
# Sphinx
_build
/docs/api/
+
+.DS_Store
\ No newline at end of file
diff --git a/MANIFEST.in b/MANIFEST.in
index b4809dd..990abce 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,2 +1,3 @@
-include bw_processing/examples/simple.zip
+include bw_processing/examples/*.zip
+include bw_processing/examples/datapackage_1/*.parquet
recursive-exclude dev *
diff --git a/README.md b/README.md
index 36e79cb..644c803 100644
--- a/README.md
+++ b/README.md
@@ -107,10 +107,10 @@ class ExampleArrayInterface:
Serialized datapackages cannot contain executable code, both because of our chosen data formats, and for security reasons. Therefore, when loading a datapackage with an interface, that interface object needs to be reconstituted as Python code - we call this cycle dehydration and rehydration. Dehydration happens automatically when a datapackage is finalized with `finalize_serialization()`, but rehydration needs to be done manually using `rehydrate_interface()`. For example:
```python
-from fs.zipfs import ZipFS
+from fsspec.implementations.zip import ZipFileSystem
from bw_processing import load_datapackage
-my_dp = load_datapackage(ZipFS("some-path.zip"))
+my_dp = load_datapackage(ZipFileSystem("some-path.zip"))
my_dp.rehydrate_interface("some-resource-name", ExampleVectorInterface())
```
@@ -119,7 +119,7 @@ You can list the dehydrated interfaces present with `.dehydrated_interfaces()`.
You can store useful information for the interface object initialization under the resource key `config`. This can be used in instantiating an interface if you pass `initialize_with_config`:
```python
-from fs.zipfs import ZipFS
+from fsspec.implementations.zip import ZipFileSystem
from bw_processing import load_datapackage
import requests
import numpy as np
@@ -133,7 +133,7 @@ class MyInterface:
return np.array(requests.get(self.url).json())
-my_dp = load_datapackage(ZipFS("some-path.zip"))
+my_dp = load_datapackage(ZipFileSystem("some-path.zip"))
data_obj, resource_metadata = my_dp.get_resource("some-interface")
print(resource_metadata['config'])
>>> {"url": "example.com"}
diff --git a/bw_processing/__init__.py b/bw_processing/__init__.py
index d2b9a87..47a5ad4 100644
--- a/bw_processing/__init__.py
+++ b/bw_processing/__init__.py
@@ -30,12 +30,7 @@
from .array_creation import create_array, create_structured_array
-from .constants import (
- DEFAULT_LICENSES,
- INDICES_DTYPE,
- UNCERTAINTY_DTYPE,
- MatrixSerializeFormat,
-)
+from .constants import DEFAULT_LICENSES, INDICES_DTYPE, UNCERTAINTY_DTYPE, MatrixSerializeFormat
from .datapackage import (
Datapackage,
DatapackageBase,
diff --git a/bw_processing/datapackage.py b/bw_processing/datapackage.py
index 67028aa..471a419 100644
--- a/bw_processing/datapackage.py
+++ b/bw_processing/datapackage.py
@@ -6,9 +6,10 @@
import numpy as np
import pandas as pd
-from fs.base import FS
-from fs.errors import ResourceNotFound
-from fs.memoryfs import MemoryFS
+from fsspec import AbstractFileSystem
+
+# Use this instead of fsspec MemoryFileSystem because that is a singleton!?
+from morefs.dict import DictFS
from .constants import (
DEFAULT_LICENSES,
@@ -109,15 +110,13 @@ def _get_index(self, name_or_index: Union[str, int]) -> int:
def del_resource(self, name_or_index: Union[str, int]) -> None:
"""Remove a resource, and delete its data file, if any."""
if self._modified:
- raise PotentialInconsistency(
- "Datapackage is modified; save modifications or reload"
- )
+ raise PotentialInconsistency("Datapackage is modified; save modifications or reload")
index = self._get_index(name_or_index)
try:
- self.fs.remove(self.resources[index]["path"])
- except (KeyError, ResourceNotFound):
+ self.fs.rm(self.resources[index]["path"])
+ except (KeyError, FileNotFoundError):
# Interface has no path
pass
@@ -130,26 +129,18 @@ def del_resource_group(self, name: str) -> None:
Use ``exclude_resource_group`` if you want to keep the underlying resource in the filesystem.
"""
if self._modified:
- raise PotentialInconsistency(
- "Datapackage is modified; save modifications or reload"
- )
+ raise PotentialInconsistency("Datapackage is modified; save modifications or reload")
- indices = [
- i
- for i, resource in enumerate(self.resources)
- if resource.get("group") == name
- ]
+ indices = [i for i, resource in enumerate(self.resources) if resource.get("group") == name]
for obj in (obj for i, obj in enumerate(self.resources) if i in indices):
try:
- self.fs.remove(obj["path"])
- except (KeyError, ResourceNotFound):
+ self.fs.rm(obj["path"])
+ except (KeyError, FileNotFoundError):
# Interface has no path
pass
- self.resources = [
- obj for i, obj in enumerate(self.resources) if i not in indices
- ]
+ self.resources = [obj for i, obj in enumerate(self.resources) if i not in indices]
self.data = [obj for i, obj in enumerate(self.data) if i not in indices]
def get_resource(self, name_or_index: Union[str, int]) -> (Any, dict):
@@ -188,9 +179,7 @@ def filter_by_attribute(self, key: str, value: Any) -> "FilteredDatapackage":
fdp.fs = self.fs
fdp.metadata = {k: v for k, v in self.metadata.items() if k != "resources"}
fdp.metadata["resources"] = []
- to_include = [
- i for i, resource in enumerate(self.resources) if resource.get(key) == value
- ]
+ to_include = [i for i, resource in enumerate(self.resources) if resource.get(key) == value]
fdp.data = [o for i, o in enumerate(self.data) if i in to_include]
fdp.resources = [o for i, o in enumerate(self.resources) if i in to_include]
if hasattr(self, "indexer"):
@@ -225,17 +214,13 @@ def exclude(self, filters: Dict[str, str]) -> "FilteredDatapackage":
if any(resource.get(key) != value for key, value in filters.items())
]
fdp.data = [o for i, o in enumerate(self.data) if i in indices_to_include]
- fdp.resources = [
- o for i, o in enumerate(self.resources) if i in indices_to_include
- ]
+ fdp.resources = [o for i, o in enumerate(self.resources) if i in indices_to_include]
return fdp
def _dehydrate_interfaces(self) -> None:
"""Substitute an interface resource with ``UndefinedInterface``, in preparation for finalizing data on disk."""
interface_indices = [
- index
- for index, obj in enumerate(self.resources)
- if obj["profile"] == "interface"
+ index for index, obj in enumerate(self.resources) if obj["profile"] == "interface"
]
for index in interface_indices:
@@ -301,7 +286,7 @@ def __hash__(self):
return hash((self.fs, self.metadata))
def __eq__(self, other):
- return (self.fs, self.metadata) == (other.fs, other.metadata)
+ return (id(self.fs), self.metadata) == (id(other.fs), other.metadata)
def _check_length_consistency(self) -> None:
if len(self.resources) != len(self.data):
@@ -312,7 +297,7 @@ def _check_length_consistency(self) -> None:
)
def _load(
- self, fs: FS, mmap_mode: Optional[str] = None, proxy: bool = False
+ self, fs: AbstractFileSystem, mmap_mode: Optional[str] = None, proxy: bool = False
) -> None:
self.fs = fs
self.metadata = file_reader(
@@ -338,7 +323,7 @@ def _load_all(self, mmap_mode: Optional[str] = None, proxy: bool = False) -> Non
def _create(
self,
- fs: Optional[FS],
+ fs: Optional[AbstractFileSystem],
name: Optional[str],
id_: Optional[str],
metadata: Optional[dict],
@@ -358,7 +343,7 @@ def _create(
name = clean_datapackage_name(name or uuid.uuid4().hex)
check_name(name)
- self.fs = fs or MemoryFS()
+ self.fs = fs or DictFS()
if not isinstance(matrix_serialize_format_type, MatrixSerializeFormat):
raise TypeError(
f"Matrix serialize format type ({matrix_serialize_format_type}) not recognized!"
@@ -388,7 +373,7 @@ def _create(
def finalize_serialization(self) -> None:
if self._finalized:
raise Closed("Datapackage already finalized")
- elif isinstance(self.fs, MemoryFS):
+ elif isinstance(self.fs, DictFS):
raise ValueError("In-memory file systems can't be serialized")
self._dehydrate_interfaces()
@@ -400,7 +385,8 @@ def finalize_serialization(self) -> None:
resource="datapackage.json",
mimetype="application/json",
)
- self.fs.close()
+ if hasattr(self.fs, "close"):
+ self.fs.close()
self._finalized = True
def _prepare_modifications(self) -> None:
@@ -471,9 +457,7 @@ def add_persistent_vector(
# Check lengths
- kwargs.update(
- {"matrix": matrix, "category": "vector", "nrows": len(indices_array)}
- )
+ kwargs.update({"matrix": matrix, "category": "vector", "nrows": len(indices_array)})
name = self._prepare_name(name)
indices_array = load_bytes(indices_array)
@@ -516,9 +500,7 @@ def add_persistent_vector(
if distributions_array is not None:
distributions_array = load_bytes(distributions_array)
# If no uncertainty, don't need to store it
- if (distributions_array["uncertainty_type"] < 2).sum() < len(
- distributions_array
- ):
+ if (distributions_array["uncertainty_type"] < 2).sum() < len(distributions_array):
if distributions_array.shape != indices_array.shape:
raise ShapeMismatch(
"`distributions_array` shape ({}) doesn't match `indices_array` ({}).".format(
@@ -542,9 +524,7 @@ def add_persistent_vector(
if flip_array.sum():
if flip_array.dtype != bool:
raise WrongDatatype(
- "`flip_array` dtype is {}, but must be `bool`".format(
- flip_array.dtype
- )
+ "`flip_array` dtype is {}, but must be `bool`".format(flip_array.dtype)
)
elif flip_array.shape != indices_array.shape:
raise ShapeMismatch(
@@ -579,9 +559,7 @@ def add_persistent_array(
""" """
self._prepare_modifications()
- kwargs.update(
- {"matrix": matrix, "category": "array", "nrows": len(indices_array)}
- )
+ kwargs.update({"matrix": matrix, "category": "array", "nrows": len(indices_array)})
name = self._prepare_name(name)
indices_array = load_bytes(indices_array)
@@ -626,9 +604,7 @@ def add_persistent_array(
if flip_array.sum():
if flip_array.dtype != bool:
raise WrongDatatype(
- "`flip_array` dtype is {}, but must be `bool`".format(
- flip_array.dtype
- )
+ "`flip_array` dtype is {}, but must be `bool`".format(flip_array.dtype)
)
elif flip_array.shape != indices_array.shape:
raise ShapeMismatch(
@@ -716,9 +692,7 @@ def _add_numpy_array_resource(
meta_type: Optional[str] = None,
**kwargs,
) -> None:
- assert (
- array.ndim <= 2
- ), f"Numpy array should be of dim 2 or less instead of {array.ndim}!"
+ assert array.ndim <= 2, f"Numpy array should be of dim 2 or less instead of {array.ndim}!"
if matrix_serialize_format_type is None:
# use instance default serialization format
@@ -741,13 +715,13 @@ def _add_numpy_array_resource(
f"Matrix serialize format type {matrix_serialize_format_type} is not recognized!"
)
- if not isinstance(self.fs, MemoryFS):
+ if not isinstance(self.fs, DictFS):
file_writer(
data=array,
fs=self.fs,
resource=filename,
mimetype="application/octet-stream",
- matrix_serialize_format_type=matrix_serialize_format_type, # NIKO
+ matrix_serialize_format_type=matrix_serialize_format_type,
meta_object=meta_object,
meta_type=meta_type,
)
@@ -793,9 +767,7 @@ def add_dynamic_vector(
) -> None:
self._prepare_modifications()
- kwargs.update(
- {"matrix": matrix, "category": "vector", "nrows": len(indices_array)}
- )
+ kwargs.update({"matrix": matrix, "category": "vector", "nrows": len(indices_array)})
name = self._prepare_name(name)
indices_array = load_bytes(indices_array)
@@ -815,9 +787,7 @@ def add_dynamic_vector(
if flip_array.sum():
if flip_array.dtype != bool:
raise WrongDatatype(
- "`flip_array` dtype is {}, but must be `bool`".format(
- flip_array.dtype
- )
+ "`flip_array` dtype is {}, but must be `bool`".format(flip_array.dtype)
)
elif flip_array.shape != indices_array.shape:
raise ShapeMismatch(
@@ -865,9 +835,7 @@ def add_dynamic_array(
if isinstance(flip_array, np.ndarray) and not flip_array.sum():
flip_array = None
- kwargs.update(
- {"matrix": matrix, "category": "array", "nrows": len(indices_array)}
- )
+ kwargs.update({"matrix": matrix, "category": "array", "nrows": len(indices_array)})
name = self._prepare_name(name)
indices_array = load_bytes(indices_array)
@@ -887,9 +855,7 @@ def add_dynamic_array(
if flip_array.sum():
if flip_array.dtype != bool:
raise WrongDatatype(
- "`flip_array` dtype is {}, but must be `bool`".format(
- flip_array.dtype
- )
+ "`flip_array` dtype is {}, but must be `bool`".format(flip_array.dtype)
)
elif flip_array.shape != indices_array.shape:
raise ShapeMismatch(
@@ -977,9 +943,7 @@ def add_csv_metadata(
)
self.resources.append(kwargs)
- def add_json_metadata(
- self, *, data: Any, valid_for: str, name: str = None, **kwargs
- ) -> None:
+ def add_json_metadata(self, *, data: Any, valid_for: str, name: str = None, **kwargs) -> None:
"""Add an iterable metadata object to be stored as a JSON file.
The purpose of storing metadata is to enable data exchange; therefore, this method assumes that data is written to disk.
@@ -1015,9 +979,7 @@ def add_json_metadata(
filename = check_suffix(name, ".json")
- file_writer(
- data=data, fs=self.fs, resource=filename, mimetype="application/json"
- )
+ file_writer(data=data, fs=self.fs, resource=filename, mimetype="application/json")
self.data.append(data)
kwargs.update(
@@ -1035,7 +997,7 @@ def add_json_metadata(
def create_datapackage(
- fs: Optional[FS] = None,
+ fs: Optional[AbstractFileSystem] = None,
name: Optional[str] = None,
id_: Optional[str] = None,
metadata: Optional[dict] = None,
@@ -1048,14 +1010,14 @@ def create_datapackage(
) -> Datapackage:
"""Create a new data package.
- All arguments are optional; if a `PyFilesystem2 `__ filesystem is not provided, a `MemoryFS `__ will be used.
+ All arguments are optional; if a `fsspec `__ filesystem is not provided, an in-memory `DictFS `__ will be used.
All metadata elements should follow the `datapackage specification `__.
Licenses are specified as a list in ``metadata``. The default license is the `Open Data Commons Public Domain Dedication and License v1.0 `__.
Args:
- * fs: A ``Filesystem``, optional. A new ``MemoryFS`` is used if not provided.
+ * fs: A ``Filesystem``, optional. A new ``DictFS`` is used if not provided.
* name: ``str``, optional. A new uuid is used if not provided.
* `id_`: ``str``, optional. A new uuid is used if not provided.
* metadata: ``dict``, optional. Metadata dictionary following datapackage specification; see above.
@@ -1088,7 +1050,7 @@ def create_datapackage(
def load_datapackage(
- fs_or_obj: Union[DatapackageBase, FS],
+ fs_or_obj: Union[DatapackageBase, AbstractFileSystem],
mmap_mode: Optional[str] = None,
proxy: bool = False,
) -> Datapackage:
@@ -1117,7 +1079,7 @@ def load_datapackage(
return obj
-def simple_graph(data: dict, fs: Optional[FS] = None, **metadata) -> Datapackage:
+def simple_graph(data: dict, fs: Optional[AbstractFileSystem] = None, **metadata) -> Datapackage:
"""Easy creation of simple datapackages with only persistent vectors.
Args:
@@ -1140,13 +1102,11 @@ def simple_graph(data: dict, fs: Optional[FS] = None, **metadata) -> Datapackage
the datapackage.
"""
- dp = create_datapackage(fs=fs, **metadata)
+ dp = create_datapackage(fs=fs or DictFS(), **metadata)
for key, value in data.items():
indices_array = np.array([row[:2] for row in value], dtype=INDICES_DTYPE)
data_array = np.array([row[2] for row in value])
- flip_array = np.array(
- [row[3] if len(row) > 3 else False for row in value], dtype=bool
- )
+ flip_array = np.array([row[3] if len(row) > 3 else False for row in value], dtype=bool)
dp.add_persistent_vector(
matrix=key,
data_array=data_array,
diff --git a/bw_processing/examples/parquet_files.py b/bw_processing/examples/parquet_files.py
index 45c8ea0..edad78b 100644
--- a/bw_processing/examples/parquet_files.py
+++ b/bw_processing/examples/parquet_files.py
@@ -5,10 +5,10 @@
from pathlib import Path
import numpy as np
-from fs.osfs import OSFS
-from fs.zipfs import ZipFS
+from fsspec.implementations.zip import ZipFileSystem
import bw_processing as bwp
+from bw_processing.io_helpers import generic_directory_filesystem
if __name__ == "__main__":
print("This is a basic example on how to use parquet files.")
@@ -30,13 +30,13 @@
# VERSION OSFS
# Directory must exist for OSFS otherwise use OSFS(dirpath, create=True)!
# Every created object will be saved in that same directory
- dp_dir = OSFS(str(dirpath / "datapackage_1"), create=True)
+ dp_dir = generic_directory_filesystem(dirpath=dirpath / "datapackage_1", create=True)
dp = bwp.create_datapackage(
fs=dp_dir, matrix_serialize_format_type=bwp.MatrixSerializeFormat.NUMPY
)
else:
# VERSION ZIP
- dp_zip_file = ZipFS(str(dirpath / "datapackage_2.zip"), write=True)
+ dp_zip_file = ZipFileSystem(str(dirpath / "datapackage_2.zip"), mode="w")
dp = bwp.create_datapackage(
fs=dp_zip_file, matrix_serialize_format_type=bwp.MatrixSerializeFormat.NUMPY
) # bwp.create_datapackage(fs=dp_zip_file, serialize_type=SerializeENum.parquet)
@@ -98,10 +98,10 @@
# OSFS must be open! (and it was closed with finalize_serialization())
if USE_OSFS:
- dp_dir = OSFS(str(dirpath / "datapackage_1"))
+ dp_dir = generic_directory_filesystem(dirpath=dirpath / "datapackage_1")
dp2 = bwp.load_datapackage(fs_or_obj=dp_dir)
else:
- dp_zip_file = ZipFS(str(dirpath / "datapackage_2.zip"))
+ dp_zip_file = ZipFileSystem(dirpath / "datapackage_2.zip")
dp2 = bwp.load_datapackage(fs_or_obj=dp_zip_file)
print("Done!")
diff --git a/bw_processing/filesystem.py b/bw_processing/filesystem.py
index 28baf12..a8bdb4a 100644
--- a/bw_processing/filesystem.py
+++ b/bw_processing/filesystem.py
@@ -16,9 +16,7 @@ def clean_datapackage_name(name: str) -> str:
return re.sub(MULTI_RE, "_", re.sub(SUBSTITUTION_RE, "_", name).strip("_")).strip()
-def safe_filename(
- string: Union[str, bytes], add_hash: bool = True, full: bool = False
-) -> str:
+def safe_filename(string: Union[str, bytes], add_hash: bool = True, full: bool = False) -> str:
"""Convert arbitrary strings to make them safe for filenames. Substitutes strange characters, and uses unicode normalization.
if `add_hash`, appends hash of `string` to avoid name collisions.
diff --git a/bw_processing/indexing.py b/bw_processing/indexing.py
index 6e3f343..6cd19d1 100644
--- a/bw_processing/indexing.py
+++ b/bw_processing/indexing.py
@@ -3,14 +3,14 @@
import numpy as np
import pandas as pd
-from fs.base import FS
+from fsspec import AbstractFileSystem
from .datapackage import Datapackage, load_datapackage
from .errors import NonUnique
def _get_csv_data(
- datapackage: Union[Datapackage, FS], metadata_name: str
+ datapackage: Union[Datapackage, AbstractFileSystem], metadata_name: str
) -> (Datapackage, pd.DataFrame, dict, List[np.ndarray], List[int]):
"""Utility function to get CSV data from datapackage.
@@ -40,14 +40,15 @@ def _get_csv_data(
if not isinstance(df, pd.DataFrame):
raise ValueError("Given metadata is not a CSV file")
resources = [
- dp.get_resource(key + ".indices")[0][label]
- for key, label in metadata["valid_for"]
+ dp.get_resource(key + ".indices")[0][label] for key, label in metadata["valid_for"]
]
indices = [dp._get_index(key + ".indices") for key, _ in metadata["valid_for"]]
return dp, df, metadata, resources, indices
-def reset_index(datapackage: Union[Datapackage, FS], metadata_name: str) -> Datapackage:
+def reset_index(
+ datapackage: Union[Datapackage, AbstractFileSystem], metadata_name: str
+) -> Datapackage:
"""Reset the numerical indices in ``datapackage`` to sequential integers starting from zero.
Updates the datapackage in place.
@@ -79,7 +80,7 @@ def reset_index(datapackage: Union[Datapackage, FS], metadata_name: str) -> Data
def reindex(
- datapackage: Union[Datapackage, FS],
+ datapackage: Union[Datapackage, AbstractFileSystem],
metadata_name: str,
data_iterable: Iterable,
fields: List[str] = None,
diff --git a/bw_processing/io_helpers.py b/bw_processing/io_helpers.py
index 1e7b5f7..bdf5f7c 100644
--- a/bw_processing/io_helpers.py
+++ b/bw_processing/io_helpers.py
@@ -5,9 +5,11 @@
import numpy as np
import pandas as pd
-from fs.base import FS
-from fs.osfs import OSFS
-from fs.zipfs import ZipFS
+from fsspec import AbstractFileSystem
+from fsspec.implementations.dirfs import DirFileSystem
+from fsspec.implementations.local import LocalFileSystem
+from fsspec.implementations.zip import ZipFileSystem
+from morefs.dict import DictFS
from .constants import MatrixSerializeFormat
from .errors import InvalidMimetype
@@ -23,29 +25,27 @@
PARQUET = False
-def generic_directory_filesystem(*, dirpath: Path) -> OSFS:
+def generic_directory_filesystem(*, dirpath: Path) -> DirFileSystem:
assert isinstance(dirpath, Path), "`dirpath` must be a `pathlib.Path` instance"
if not dirpath.is_dir():
if not dirpath.parent.is_dir():
- raise ValueError(
- "Parent directory `{}` doesn't exist".format(dirpath.parent)
- )
+ raise ValueError("Parent directory `{}` doesn't exist".format(dirpath.parent))
dirpath.mkdir()
- return OSFS(dirpath)
+ return DirFileSystem(path=dirpath, fs=LocalFileSystem())
def generic_zipfile_filesystem(
*, dirpath: Path, filename: str, write: bool = True
-) -> ZipFS:
+) -> ZipFileSystem:
assert isinstance(dirpath, Path), "`dirpath` must be a `pathlib.Path` instance"
if not dirpath.is_dir():
raise ValueError("Destination directory `{}` doesn't exist".format(dirpath))
- return ZipFS(dirpath / filename, write=write)
+ return ZipFileSystem(dirpath / filename, mode="w" if write else "r")
def file_reader(
*,
- fs: FS,
+ fs: AbstractFileSystem,
resource: str,
mimetype: str,
proxy: bool = False,
@@ -116,7 +116,7 @@ def file_reader(
def file_writer(
*,
data: Any,
- fs: FS,
+ fs: AbstractFileSystem,
resource: str,
mimetype: str,
matrix_serialize_format_type: MatrixSerializeFormat = MatrixSerializeFormat.NUMPY, # NIKO
@@ -129,7 +129,8 @@ def file_writer(
if mimetype == "application/octet-stream":
if matrix_serialize_format_type == MatrixSerializeFormat.NUMPY:
- return np.save(fs.open(resource, mode="wb"), data, allow_pickle=False)
+ with fs.open(resource, mode="wb") as fo:
+ return np.save(fo, data, allow_pickle=False)
elif matrix_serialize_format_type == MatrixSerializeFormat.PARQUET:
if not PARQUET:
raise ImportError("`pyarrow` library not installed")
diff --git a/bw_processing/io_parquet_helpers.py b/bw_processing/io_parquet_helpers.py
index 8f7638a..365c2a7 100644
--- a/bw_processing/io_parquet_helpers.py
+++ b/bw_processing/io_parquet_helpers.py
@@ -7,15 +7,14 @@
import os
# for annotation
-from io import BufferedWriter, RawIOBase
+from io import BufferedWriter, IOBase, RawIOBase
import numpy
import numpy as np
import pyarrow.parquet as pq
-from fs.iotools import RawWrapper
-from bw_processing.errors import WrongDatatype
-from bw_processing.io_pyarrow_helpers import (
+from .errors import WrongDatatype
+from .io_pyarrow_helpers import (
numpy_distributions_vector_to_pyarrow_distributions_vector_table,
numpy_generic_matrix_to_pyarrow_generic_matrix_table,
numpy_generic_vector_to_pyarrow_generic_vector_table,
@@ -49,9 +48,7 @@ def write_ndarray_to_parquet_file(
elif meta_type == "generic":
table = numpy_generic_vector_to_pyarrow_generic_vector_table(arr=arr)
elif meta_type == "distributions":
- table = numpy_distributions_vector_to_pyarrow_distributions_vector_table(
- arr=arr
- )
+ table = numpy_distributions_vector_to_pyarrow_distributions_vector_table(arr=arr)
else:
raise NotImplementedError(f"Vector of type {meta_type} is not recognized!")
else:
@@ -61,12 +58,12 @@ def write_ndarray_to_parquet_file(
pq.write_table(table, file)
-def read_parquet_file_to_ndarray(file: RawWrapper) -> numpy.ndarray:
+def read_parquet_file_to_ndarray(file: RawIOBase) -> numpy.ndarray:
"""
Read an `ndarray` from a `parquet` file.
Args:
- file (fs.iotools.RawWrapper): File to read from.
+ file (io.RawIOBase or fsspec file object): File to read from.
Raises:
`WrongDatatype` if the correct metadata is not found in the `parquet` file.
@@ -81,9 +78,7 @@ def read_parquet_file_to_ndarray(file: RawWrapper) -> numpy.ndarray:
binary_meta_object = table.schema.metadata[b"object"]
binary_meta_type = table.schema.metadata[b"type"]
except KeyError:
- raise WrongDatatype(
- f"Parquet file {file} does not contain the right metadata format!"
- )
+ raise WrongDatatype(f"Parquet file {file} does not contain the right metadata format!")
arr = None
if binary_meta_object == b"matrix":
@@ -94,9 +89,7 @@ def read_parquet_file_to_ndarray(file: RawWrapper) -> numpy.ndarray:
elif binary_meta_type == b"generic":
arr = pyarrow_generic_vector_table_to_numpy_generic_vector(table=table)
elif binary_meta_type == b"distributions":
- arr = pyarrow_distributions_vector_table_to_numpy_distributions_vector(
- table=table
- )
+ arr = pyarrow_distributions_vector_table_to_numpy_distributions_vector(table=table)
else:
raise NotImplementedError("Vector type not recognized")
else:
@@ -105,9 +98,7 @@ def read_parquet_file_to_ndarray(file: RawWrapper) -> numpy.ndarray:
return arr
-def save_arr_to_parquet(
- file: RawIOBase, arr: np.ndarray, meta_object: str, meta_type: str
-) -> None:
+def save_arr_to_parquet(file: RawIOBase, arr: np.ndarray, meta_object: str, meta_type: str) -> None:
"""
Serialize a `numpy` `ndarray` to a `parquet` `file`.
@@ -127,17 +118,15 @@ def save_arr_to_parquet(
with file_ctx as fid:
arr = np.asanyarray(arr)
- write_ndarray_to_parquet_file(
- fid, arr, meta_object=meta_object, meta_type=meta_type
- )
+ write_ndarray_to_parquet_file(fid, arr, meta_object=meta_object, meta_type=meta_type)
-def load_ndarray_from_parquet(file: RawWrapper) -> np.ndarray:
+def load_ndarray_from_parquet(file: RawIOBase) -> np.ndarray:
"""
Deserialize a `numpy` `ndarray` from a `parquet` `file`.
Parameters
- file (fs.iotools.RawWrapper): File to read from.
+ file (io.RawIOBase or fsspec file object): File to read from.
Returns
The corresponding `numpy` `ndarray`.
diff --git a/bw_processing/io_pyarrow_helpers.py b/bw_processing/io_pyarrow_helpers.py
index 4684dd2..76d7b4b 100644
--- a/bw_processing/io_pyarrow_helpers.py
+++ b/bw_processing/io_pyarrow_helpers.py
@@ -160,9 +160,7 @@ def pyarrow_indices_vector_table_to_numpy_indices_vector(table: pa.Table) -> np.
PA_UNCERTAINTY_FIELDS, metadata={"object": "vector", "type": "distributions"}
)
-UNCERTAINTY_FIELDS_NAMES = [
- UNCERTAINTY_DTYPE[i][0] for i in range(NBR_UNCERTAINTY_FIELDS)
-]
+UNCERTAINTY_FIELDS_NAMES = [UNCERTAINTY_DTYPE[i][0] for i in range(NBR_UNCERTAINTY_FIELDS)]
def numpy_distributions_vector_to_pyarrow_distributions_vector_table(
@@ -224,9 +222,7 @@ def pyarrow_distributions_vector_table_to_numpy_distributions_vector(
distributions_array = []
for el in zip(*distributions_arrays_list):
- distributions_array.append(
- tuple(el[i].as_py() for i in range(NBR_UNCERTAINTY_FIELDS))
- )
+ distributions_array.append(tuple(el[i].as_py() for i in range(NBR_UNCERTAINTY_FIELDS)))
arr = np.array(distributions_array, dtype=UNCERTAINTY_DTYPE)
return arr
@@ -254,10 +250,7 @@ def numpy_generic_matrix_to_pyarrow_generic_matrix_table(arr: np.ndarray) -> pa.
arr_dtype = arr.dtype
metadata = {"object": "matrix", "type": "generic"}
nbr_rows, nbr_cols = arr.shape
- arrays = [
- pa.array(arr[:, j], type=pa.from_numpy_dtype(arr_dtype))
- for j in range(nbr_cols)
- ]
+ arrays = [pa.array(arr[:, j], type=pa.from_numpy_dtype(arr_dtype)) for j in range(nbr_cols)]
table = pa.Table.from_arrays(
arrays=arrays,
names=[str(j) for j in range(nbr_cols)], # give names to each column
diff --git a/bw_processing/merging.py b/bw_processing/merging.py
index aac7110..137921d 100644
--- a/bw_processing/merging.py
+++ b/bw_processing/merging.py
@@ -4,8 +4,8 @@
import numpy as np
import pandas as pd
-from fs.base import FS
-from fs.memoryfs import MemoryFS
+from fsspec import AbstractFileSystem
+from morefs.dict import DictFS
from .datapackage import DatapackageBase, create_datapackage
from .errors import LengthMismatch
@@ -63,8 +63,8 @@ def add_resource_suffix(metadata: dict, suffix: str) -> dict:
return metadata
-def write_data_to_fs(resource: dict, data: Any, fs: FS) -> None:
- if isinstance(fs, MemoryFS):
+def write_data_to_fs(resource: dict, data: Any, fs: AbstractFileSystem) -> None:
+ if isinstance(fs, DictFS):
return
file_writer(
data=data,
@@ -80,7 +80,7 @@ def merge_datapackages_with_mask(
second_dp: DatapackageBase,
second_resource_group_label: str,
mask_array: np.ndarray,
- output_fs: Optional[FS] = None,
+ output_fs: Optional[AbstractFileSystem] = None,
metadata: Optional[dict] = None,
) -> DatapackageBase:
"""Merge two resources using a Numpy boolean mask. Returns elements from ``first_dp`` where the mask is ``True``, otherwise ``second_dp``.
@@ -108,35 +108,27 @@ def merge_datapackages_with_mask(
"""
if first_resource_group_label == second_resource_group_label:
add_suffix = True
- warnings.warn(
- "Adding suffixes '_true' and '_false' as resource group labels are identical"
- )
+ warnings.warn("Adding suffixes '_true' and '_false' as resource group labels are identical")
else:
add_suffix = False
try:
first_dp = first_dp.groups[first_resource_group_label]
except KeyError:
- raise ValueError(
- f"Resource group not {first_resource_group_label} not in ``first_dp``"
- )
+ raise ValueError(f"Resource group not {first_resource_group_label} not in ``first_dp``")
try:
second_dp = second_dp.groups[second_resource_group_label]
except KeyError:
- raise ValueError(
- f"Resource group not {second_resource_group_label} not in ``second_dp``"
- )
+ raise ValueError(f"Resource group not {second_resource_group_label} not in ``second_dp``")
DIMENSION_ERROR = """Dimension mismatch. All array lengths must be the same, but got:\n\tFirst DP: {}\n\tSecond DP: {}\n\t Mask array: {}"""
if not (len(first_dp.data[0]) == len(first_dp.data[0]) == len(mask_array)):
raise LengthMismatch(
- DIMENSION_ERROR.format(
- len(first_dp.data[0]), len(first_dp.data[0]), len(mask_array)
- )
+ DIMENSION_ERROR.format(len(first_dp.data[0]), len(first_dp.data[0]), len(mask_array))
)
if output_fs is None:
- output_fs = MemoryFS()
+ output_fs = DictFS()
if any(resource["profile"] == "interface" for resource in first_dp.resources):
raise ValueError("Unsupported interface found in ``first_dp``")
diff --git a/pyproject.toml b/pyproject.toml
index c1247af..a2ce894 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,7 +31,8 @@ requires-python = ">=3.9"
dependencies = [
# dependencies as strings with quotes, e.g. "foo"
# You can add version requirements
- "fs",
+ "fsspec",
+ "morefs",
"numpy",
"pandas",
@@ -64,7 +65,7 @@ dev = [
[tool.setuptools]
license-files = ["LICENSE"]
include-package-data = true
-packages = ["bw_processing"]
+packages = ["bw_processing", "bw_processing.examples"]
[tool.setuptools.dynamic]
version = {attr = "bw_processing.__version__"}
diff --git a/tests/calculation_package.py b/tests/calculation_package.py
deleted file mode 100644
index a35f119..0000000
--- a/tests/calculation_package.py
+++ /dev/null
@@ -1,148 +0,0 @@
-# from bw_processing import (
-# as_unique_attributes,
-# chunked,
-# COMMON_DTYPE,
-# create_package,
-# create_datapackage_metadata,
-# create_structured_array,
-# create_processed_datapackage,
-# format_calculation_resource,
-# greedy_set_cover,
-# NAME_RE,
-# )
-# from copy import deepcopy
-# import pytest
-# import tempfile
-
-
-# def test_format_calculation_resource():
-# given = {
-# "path": "basic_array",
-# "name": "test-name",
-# "matrix": "technosphere",
-# "description": "some words",
-# "foo": "bar",
-# }
-# expected = {
-# "format": "npy",
-# "mediatype": "application/octet-stream",
-# "path": "basic_array.npy",
-# "name": "test-name",
-# "profile": "data-resource",
-# "matrix": "technosphere",
-# "description": "some words",
-# "foo": "bar",
-# }
-# assert format_calculation_resource(given) == expected
-
-
-# def test_calculation_package():
-# resources = [
-# {
-# "name": "first-resource",
-# "path": "some-array.npy",
-# "matrix": "technosphere",
-# "data": [
-# tuple(list(range(11)) + [False, False]),
-# tuple(list(range(12, 23)) + [True, True]),
-# ],
-# }
-# ]
-# with tempfile.TemporaryDirectory() as td:
-# fp = create_package(
-# name="test-package", resources=resources, path=td, replace=False
-# )
-# # Test data in fp
-
-
-# def test_calculation_package_directory():
-# resources = [
-# {
-# "name": "first-resource",
-# "path": "some-array.npy",
-# "matrix": "technosphere",
-# "data": [
-# tuple(list(range(11)) + [False, False]),
-# tuple(list(range(12, 23)) + [True, True]),
-# ],
-# }
-# ]
-# with tempfile.TemporaryDirectory() as td:
-# fp = create_package(
-# name="test-package", resources=resources, path=td, compress=False
-# )
-# # Test data in fp
-
-
-# def test_calculation_package_in_memory():
-# resources = [
-# {
-# "name": "first-resource",
-# "path": "some-array.npy",
-# "matrix": "technosphere",
-# "data": [
-# tuple(list(range(11)) + [False, False]),
-# tuple(list(range(12, 23)) + [True, True]),
-# ],
-# }
-# ]
-# fp = create_package(name="test-package", resources=resources)
-# # Test data in fp
-
-
-# def test_calculation_package_replace():
-# resources = [
-# {
-# "name": "first-resource",
-# "path": "some-array.npy",
-# "matrix": "technosphere",
-# "data": [
-# tuple(list(range(11)) + [False, False]),
-# tuple(list(range(12, 23)) + [True, True]),
-# ],
-# }
-# ]
-# with tempfile.TemporaryDirectory() as td:
-# create_package(
-# name="test-package", resources=deepcopy(resources), path=td
-# )
-# create_package(
-# name="test-package", resources=deepcopy(resources), path=td, replace=True
-# )
-
-
-# def test_calculation_package_replace_error():
-# resources = [
-# {
-# "name": "first-resource",
-# "path": "some-array.npy",
-# "matrix": "technosphere",
-# "data": [
-# tuple(list(range(11)) + [False, False]),
-# tuple(list(range(12, 23)) + [True, True]),
-# ],
-# }
-# ]
-# with tempfile.TemporaryDirectory() as td:
-# create_package(
-# name="test-package", resources=deepcopy(resources), path=td
-# )
-# with pytest.raises(ValueError):
-# create_package(
-# name="test-package",
-# resources=deepcopy(resources),
-# path=td,
-# replace=False,
-# )
-
-
-# def test_calculation_package_name_conflict():
-# pass
-
-
-# def test_calculation_package_specify_id():
-# pass
-
-
-# def test_calculation_package_metadata():
-# pass
diff --git a/tests/conftest.py b/tests/conftest.py
index 1d4b6e5..c27004f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -2,17 +2,11 @@
from pathlib import Path
import pytest
-from fixtures.basic_arrays import ( # noqa: F401
- distributions_vector,
- flip_vector,
- indices_vector,
-)
+from fixtures.basic_arrays import distributions_vector, flip_vector, indices_vector # noqa: F401
def pytest_addoption(parser):
- parser.addoption(
- "--runslow", action="store_true", default=False, help="run slow tests"
- )
+ parser.addoption("--runslow", action="store_true", default=False, help="run slow tests")
def pytest_configure(config):
diff --git a/tests/datapackage.py b/tests/datapackage.py
index 68e60e7..89deace 100644
--- a/tests/datapackage.py
+++ b/tests/datapackage.py
@@ -4,18 +4,13 @@
import numpy as np
import pandas as pd
import pytest
-from fs.memoryfs import MemoryFS
-from fs.osfs import OSFS
-from fs.zipfs import ZipFS
+from fsspec.implementations.zip import ZipFileSystem
+from morefs.dict import DictFS
from bw_processing import create_datapackage, load_datapackage, simple_graph
from bw_processing.constants import INDICES_DTYPE, UNCERTAINTY_DTYPE
-from bw_processing.errors import (
- NonUnique,
- PotentialInconsistency,
- ShapeMismatch,
- WrongDatatype,
-)
+from bw_processing.errors import NonUnique, PotentialInconsistency, ShapeMismatch, WrongDatatype
+from bw_processing.io_helpers import generic_directory_filesystem
dirpath = Path(__file__).parent.resolve() / "fixtures"
@@ -84,7 +79,7 @@ def copy_fixture(fixture_name, dest):
def test_group_ordering_consistent():
- dp = load_datapackage(ZipFS(dirpath / "test-fixture.zip"))
+ dp = load_datapackage(ZipFileSystem(dirpath / "test-fixture.zip"))
assert list(dp.groups) == [
"sa-data-vector-from-dict",
"sa-data-vector",
@@ -111,7 +106,7 @@ def test_add_resource_with_same_name():
def test_save_modifications(tmp_path):
copy_fixture("tfd", tmp_path)
- dp = load_datapackage(OSFS(str(tmp_path)))
+ dp = load_datapackage(generic_directory_filesystem(dirpath=tmp_path))
assert dp.resources[1]["name"] == "sa-data-vector-from-dict.data"
assert np.allclose(dp.data[1], [3.3, 8.3])
@@ -123,13 +118,13 @@ def test_save_modifications(tmp_path):
assert np.allclose(dp.data[1], 42)
assert not dp._modified
- dp = load_datapackage(OSFS(str(tmp_path)))
+ dp = load_datapackage(generic_directory_filesystem(dirpath=tmp_path))
assert np.allclose(dp.data[1], 42)
def test_del_resource_filesystem(tmp_path):
copy_fixture("tfd", tmp_path)
- dp = load_datapackage(OSFS(str(tmp_path)))
+ dp = load_datapackage(generic_directory_filesystem(dirpath=tmp_path))
reference_length = len(dp)
assert "sa-vector-interface.indices.npy" in [o.name for o in tmp_path.iterdir()]
dp.del_resource("sa-vector-interface.indices")
@@ -143,7 +138,7 @@ def test_del_resource_filesystem(tmp_path):
def test_del_resource_in_memory():
dp = create_datapackage()
add_data(dp)
- assert isinstance(dp.fs, MemoryFS)
+ assert isinstance(dp.fs, DictFS)
reference_length = len(dp)
assert "sa-vector-interface.indices" in [o["name"] for o in dp.resources]
@@ -157,7 +152,7 @@ def test_del_resource_in_memory():
def test_del_resource_error_modifications(tmp_path):
copy_fixture("tfd", tmp_path)
- dp = load_datapackage(OSFS(str(tmp_path)))
+ dp = load_datapackage(generic_directory_filesystem(dirpath=tmp_path))
dp._modified = [1]
with pytest.raises(PotentialInconsistency):
dp.del_resource(1)
@@ -165,7 +160,7 @@ def test_del_resource_error_modifications(tmp_path):
def test_del_resource_group_filesystem(tmp_path):
copy_fixture("tfd", tmp_path)
- dp = load_datapackage(OSFS(str(tmp_path)))
+ dp = load_datapackage(generic_directory_filesystem(dirpath=tmp_path))
reference_length = len(dp)
assert "sa-data-vector.indices.npy" in [o.name for o in tmp_path.iterdir()]
@@ -180,7 +175,7 @@ def test_del_resource_group_filesystem(tmp_path):
def test_del_resource_group_in_memory():
dp = create_datapackage()
add_data(dp)
- assert isinstance(dp.fs, MemoryFS)
+ assert isinstance(dp.fs, DictFS)
reference_length = len(dp)
assert "sa-data-vector.indices" in [o["name"] for o in dp.resources]
@@ -194,7 +189,7 @@ def test_del_resource_group_in_memory():
def test_del_resource_group_error_modifications(tmp_path):
copy_fixture("tfd", tmp_path)
- dp = load_datapackage(OSFS(str(tmp_path)))
+ dp = load_datapackage(generic_directory_filesystem(dirpath=tmp_path))
dp._modified = [1]
with pytest.raises(PotentialInconsistency):
dp.del_resource_group("sa-vector-interface")
@@ -203,7 +198,7 @@ def test_del_resource_group_error_modifications(tmp_path):
def test_exclude_basic():
dp = create_datapackage()
add_data(dp)
- assert isinstance(dp.fs, MemoryFS)
+ assert isinstance(dp.fs, DictFS)
reference_length = len(dp)
assert "sa-data-vector.indices" in [o["name"] for o in dp.resources]
@@ -220,7 +215,7 @@ def test_exclude_basic():
def test_exclude_no_match():
dp = create_datapackage()
add_data(dp)
- assert isinstance(dp.fs, MemoryFS)
+ assert isinstance(dp.fs, DictFS)
reference_length = len(dp)
assert "sa-data-vector.indices" in [o["name"] for o in dp.resources]
@@ -234,7 +229,7 @@ def test_exclude_no_match():
def test_exclude_multiple_filters():
dp = create_datapackage()
add_data(dp)
- assert isinstance(dp.fs, MemoryFS)
+ assert isinstance(dp.fs, DictFS)
reference_length = len(dp)
assert "sa-array-interface.indices" in [o["name"] for o in dp.resources]
@@ -251,7 +246,7 @@ def test_exclude_multiple_filters():
def test_exclude_multiple_matrix():
dp = create_datapackage()
add_data(dp)
- assert isinstance(dp.fs, MemoryFS)
+ assert isinstance(dp.fs, DictFS)
assert "sa-data-vector.indices" in [o["name"] for o in dp.resources]
ndp = dp.exclude({"matrix": "sa_matrix"})
diff --git a/tests/filesystem.py b/tests/filesystem.py
index 84f0f31..ef282bc 100644
--- a/tests/filesystem.py
+++ b/tests/filesystem.py
@@ -24,9 +24,7 @@ def test_safe_filename():
safe_filename("Wave your hand yeah 🙋!", full=True)
== "Wave-your-hand-yeah.f7952a3d4b0534cdac0e0cbbf66aac73"
)
- assert (
- safe_filename("Wave your hand yeah 🙋!", add_hash=False) == "Wave-your-hand-yeah"
- )
+ assert safe_filename("Wave your hand yeah 🙋!", add_hash=False) == "Wave-your-hand-yeah"
def test_clean_datapackage_name():
diff --git a/tests/filtered_datapackage.py b/tests/filtered_datapackage.py
index adc584e..0970552 100644
--- a/tests/filtered_datapackage.py
+++ b/tests/filtered_datapackage.py
@@ -2,21 +2,16 @@
import numpy as np
import pytest
-from fs.osfs import OSFS
-from fs.zipfs import ZipFS
+from fsspec.implementations.zip import ZipFileSystem
-from bw_processing import (
- INDICES_DTYPE,
- UNCERTAINTY_DTYPE,
- create_datapackage,
- load_datapackage,
-)
+from bw_processing import INDICES_DTYPE, UNCERTAINTY_DTYPE, create_datapackage, load_datapackage
+from bw_processing.io_helpers import generic_directory_filesystem
dirpath = Path(__file__).parent.resolve() / "fixtures"
def test_metadata_is_the_same_object():
- dp = load_datapackage(fs_or_obj=ZipFS(str(dirpath / "test-fixture.zip")))
+ dp = load_datapackage(fs_or_obj=ZipFileSystem(dirpath / "test-fixture.zip"))
fdp = dp.filter_by_attribute("matrix", "sa_matrix")
for k, v in fdp.metadata.items():
@@ -28,21 +23,21 @@ def test_metadata_is_the_same_object():
def test_fs_is_the_same_object():
- dp = load_datapackage(fs_or_obj=ZipFS(str(dirpath / "test-fixture.zip")))
+ dp = load_datapackage(fs_or_obj=ZipFileSystem(dirpath / "test-fixture.zip"))
fdp = dp.filter_by_attribute("matrix", "sa_matrix")
assert fdp.fs is dp.fs
def test_indexer_is_the_same_object():
- dp = load_datapackage(fs_or_obj=ZipFS(str(dirpath / "test-fixture.zip")))
+ dp = load_datapackage(fs_or_obj=ZipFileSystem(dirpath / "test-fixture.zip"))
dp.indexer = lambda x: False
fdp = dp.filter_by_attribute("matrix", "sa_matrix")
assert fdp.indexer is dp.indexer
def test_data_is_the_same_object_when_not_proxy():
- dp = load_datapackage(fs_or_obj=ZipFS(str(dirpath / "test-fixture.zip")))
+ dp = load_datapackage(fs_or_obj=ZipFileSystem(dirpath / "test-fixture.zip"))
fdp = dp.filter_by_attribute("matrix", "sa_matrix")
arr1, _ = dp.get_resource("sa-data-array.data")
@@ -54,9 +49,7 @@ def test_data_is_the_same_object_when_not_proxy():
def test_data_is_readable_multiple_times_when_proxy_zipfs():
- dp = load_datapackage(
- fs_or_obj=ZipFS(str(dirpath / "test-fixture.zip")), proxy=True
- )
+ dp = load_datapackage(fs_or_obj=ZipFileSystem(dirpath / "test-fixture.zip"), proxy=True)
fdp = dp.filter_by_attribute("matrix", "sa_matrix")
arr1, _ = dp.get_resource("sa-data-array.data")
@@ -69,7 +62,9 @@ def test_data_is_readable_multiple_times_when_proxy_zipfs():
def test_data_is_readable_multiple_times_when_proxy_directory():
- dp = load_datapackage(fs_or_obj=OSFS(str(dirpath / "tfd")), proxy=True)
+ dp = load_datapackage(
+ fs_or_obj=generic_directory_filesystem(dirpath=dirpath / "tfd"), proxy=True
+ )
fdp = dp.filter_by_attribute("matrix", "sa_matrix")
arr1, _ = dp.get_resource("sa-data-array.data")
@@ -82,9 +77,7 @@ def test_data_is_readable_multiple_times_when_proxy_directory():
def test_fdp_can_load_proxy_first():
- dp = load_datapackage(
- fs_or_obj=ZipFS(str(dirpath / "test-fixture.zip")), proxy=True
- )
+ dp = load_datapackage(fs_or_obj=ZipFileSystem(dirpath / "test-fixture.zip"), proxy=True)
fdp = dp.filter_by_attribute("matrix", "sa_matrix")
arr2, _ = fdp.get_resource("sa-data-array.data")
arr1, _ = dp.get_resource("sa-data-array.data")
@@ -97,9 +90,7 @@ def test_fdp_can_load_proxy_first():
@pytest.fixture
def erg():
- dp = create_datapackage(
- fs=None, name="frg-fixture", id_="something something danger zone"
- )
+ dp = create_datapackage(fs=None, name="frg-fixture", id_="something something danger zone")
data_array = np.arange(3)
indices_array = np.array([(0, 1), (2, 3), (4, 5)], dtype=INDICES_DTYPE)
diff --git a/tests/fixtures/test-fixture.zip b/tests/fixtures/test-fixture.zip
index 30e5651..7d4c1aa 100644
Binary files a/tests/fixtures/test-fixture.zip and b/tests/fixtures/test-fixture.zip differ
diff --git a/tests/fixtures/tfd/datapackage.json b/tests/fixtures/tfd/datapackage.json
index 660ff54..9c04974 100644
--- a/tests/fixtures/tfd/datapackage.json
+++ b/tests/fixtures/tfd/datapackage.json
@@ -203,10 +203,11 @@
"valid_for": "sa-data-array"
}
],
- "created": "2021-10-29T21:46:22.583269Z",
+ "created": "2024-05-22T12:20:16.587994+00:00Z",
"combinatorial": false,
"sequential": false,
"seed": null,
"sum_intra_duplicates": true,
- "sum_inter_duplicates": false
-}
+ "sum_inter_duplicates": false,
+ "matrix_serialize_format_type": "numpy"
+}
\ No newline at end of file
diff --git a/tests/fixtures/tfd/sa-data-array-json-metadata.json b/tests/fixtures/tfd/sa-data-array-json-metadata.json
index 836c4a6..51a78ad 100644
--- a/tests/fixtures/tfd/sa-data-array-json-metadata.json
+++ b/tests/fixtures/tfd/sa-data-array-json-metadata.json
@@ -4,4 +4,4 @@
},
1,
true
-]
+]
\ No newline at end of file
diff --git a/tests/fixtures/tfd/sa-data-array-json-parameters.json b/tests/fixtures/tfd/sa-data-array-json-parameters.json
index e8e7bfe..a8d36fc 100644
--- a/tests/fixtures/tfd/sa-data-array-json-parameters.json
+++ b/tests/fixtures/tfd/sa-data-array-json-parameters.json
@@ -1,4 +1,4 @@
[
"a",
"foo"
-]
+]
\ No newline at end of file
diff --git a/tests/fixtures/tfd/sa-data-vector-csv-metadata.csv b/tests/fixtures/tfd/sa-data-vector-csv-metadata.csv
index a4796ad..9fad506 100644
--- a/tests/fixtures/tfd/sa-data-vector-csv-metadata.csv
+++ b/tests/fixtures/tfd/sa-data-vector-csv-metadata.csv
@@ -1,4 +1,4 @@
-id,a,c,d
-1,1,3,11
-2,2,4,11
-3,1,4,11
+a,c,d
+1,3,11
+2,4,11
+1,4,11
diff --git a/tests/indexing.py b/tests/indexing.py
index 57995dd..37c0197 100644
--- a/tests/indexing.py
+++ b/tests/indexing.py
@@ -3,12 +3,12 @@
import numpy as np
import pandas as pd
import pytest
-from fs.osfs import OSFS
from bw_processing import create_datapackage, load_datapackage
from bw_processing.constants import INDICES_DTYPE
from bw_processing.errors import NonUnique
from bw_processing.indexing import reindex, reset_index
+from bw_processing.io_helpers import generic_directory_filesystem
### Fixture
@@ -44,20 +44,14 @@ def add_data(dp, id_field="id"):
name="array",
flip_array=flip_array,
)
- dp.add_csv_metadata(
- dataframe=df, valid_for=[("vector", "row")], name="vector-csv-rows"
- )
- dp.add_csv_metadata(
- dataframe=df, valid_for=[("vector", "col")], name="vector-csv-cols"
- )
+ dp.add_csv_metadata(dataframe=df, valid_for=[("vector", "row")], name="vector-csv-rows")
+ dp.add_csv_metadata(dataframe=df, valid_for=[("vector", "col")], name="vector-csv-cols")
dp.add_csv_metadata(
dataframe=df,
valid_for=[("vector", "row"), ("vector", "col")],
name="vector-csv-both",
)
- dp.add_csv_metadata(
- dataframe=df, valid_for=[("array", "row")], name="array-csv-rows"
- )
+ dp.add_csv_metadata(dataframe=df, valid_for=[("array", "row")], name="array-csv-rows")
dp.add_csv_metadata(
dataframe=df,
valid_for=[("array", "row"), ("array", "col")],
@@ -73,7 +67,9 @@ def add_data(dp, id_field="id"):
@pytest.fixture
def fixture():
dp = load_datapackage(
- OSFS(str(Path(__file__).parent.resolve() / "fixtures" / "indexing"))
+ generic_directory_filesystem(
+ dirpath=Path(__file__).parent.resolve() / "fixtures" / "indexing"
+ )
)
dp_unchanged(dp)
return dp
@@ -82,7 +78,9 @@ def fixture():
def dp_unchanged(dp=None):
if dp is None:
dp = load_datapackage(
- OSFS(str(Path(__file__).parent.resolve() / "fixtures" / "indexing"))
+ generic_directory_filesystem(
+ dirpath=Path(__file__).parent.resolve() / "fixtures" / "indexing"
+ )
)
array, _ = dp.get_resource("vector.indices")
@@ -135,7 +133,9 @@ def test_reset_index_modified(fixture):
assert fixture._modified == set([fixture._get_index("vector.indices")])
fixture = load_datapackage(
- OSFS(str(Path(__file__).parent.resolve() / "fixtures" / "indexing"))
+ generic_directory_filesystem(
+ dirpath=Path(__file__).parent.resolve() / "fixtures" / "indexing"
+ )
)
assert not fixture._modified
@@ -376,7 +376,9 @@ def test_reindex_data_iterable_wrong_type(fixture):
(dirpath / "indexing").mkdir(exist_ok=True)
dp = create_datapackage(
- fs=OSFS(str(dirpath / "indexing")), name="indexing-fixture", id_="fixture-i"
+ fs=generic_directory_filesystem(dirpath=dirpath / "indexing"),
+ name="indexing-fixture",
+ id_="fixture-i",
)
add_data(dp)
dp.finalize_serialization()
diff --git a/tests/integration.py b/tests/integration.py
index c9c048a..548388d 100644
--- a/tests/integration.py
+++ b/tests/integration.py
@@ -6,12 +6,12 @@
import numpy as np
import pandas as pd
import pytest
-from fs import open_fs
-from fs.memoryfs import MemoryFS
-from fs.osfs import OSFS
-from fs.zipfs import ZipFS
+from fsspec.implementations.ftp import FTPFileSystem
+from fsspec.implementations.zip import ZipFileSystem
+from morefs.dict import DictFS
from bw_processing import INDICES_DTYPE, create_datapackage, load_datapackage
+from bw_processing.io_helpers import generic_directory_filesystem
_windows = platform.system() == "Windows"
@@ -339,7 +339,7 @@ def check_metadata(dp, as_tuples=True):
def test_integration_test_in_memory():
dp = create_datapackage(fs=None, name="test-fixture", id_="fixture-42")
- assert isinstance(dp.fs, MemoryFS)
+ assert isinstance(dp.fs, DictFS)
add_data(dp)
check_metadata(dp)
@@ -348,7 +348,9 @@ def test_integration_test_in_memory():
def test_integration_test_directory():
dp = load_datapackage(
- fs_or_obj=open_fs(str(Path(__file__).parent.resolve() / "fixtures" / "tfd"))
+ fs_or_obj=generic_directory_filesystem(
+ dirpath=Path(__file__).parent.resolve() / "fixtures" / "tfd"
+ )
)
check_metadata(dp, False)
@@ -357,53 +359,54 @@ def test_integration_test_directory():
@pytest.mark.slow
def test_integration_test_ftp():
- dp = load_datapackage(fs_or_obj=open_fs("ftp://brightway.dev/tfd/"))
+ dp = load_datapackage(fs_or_obj=FTPFileSystem(host="ftp://brightway.dev/tfd/"))
check_metadata(dp, False)
check_data(dp)
-@pytest.mark.slow
-def test_integration_test_s3():
- try:
- import fs_s3fs
- from botocore.exceptions import NoCredentialsError
- except ImportError:
- raise ImportError(
- "https://github.com/PyFilesystem/s3fs must be installed for this test."
- )
- try:
- dp = load_datapackage(fs_or_obj=open_fs("s3://bwprocessing"))
- check_metadata(dp, False)
- check_data(dp)
- except NoCredentialsError:
- raise NoCredentialsError(
- "Supply AWS credentials (https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html)"
- )
+# TBD: https://s3fs.readthedocs.io/en/latest/
+# @pytest.mark.slow
+# def test_integration_test_s3():
+# try:
+# import fs_s3fs
+# from botocore.exceptions import NoCredentialsError
+# except ImportError:
+# raise ImportError(
+# "https://github.com/PyFilesystem/s3fs must be installed for this test."
+# )
+# try:
+# dp = load_datapackage(fs_or_obj=open_fs("s3://bwprocessing"))
+# check_metadata(dp, False)
+# check_data(dp)
+# except NoCredentialsError:
+# raise NoCredentialsError(
+# "Supply AWS credentials (https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html)"
+# )
@pytest.mark.skipif(_windows, reason="Permission errors on Windows CI")
def test_integration_test_fs_temp_directory():
with tempfile.TemporaryDirectory() as td:
- dp = create_datapackage(fs=OSFS(td), name="test-fixture", id_="fixture-42")
+ dp = create_datapackage(
+ fs=generic_directory_filesystem(dirpath=Path(td)), name="test-fixture", id_="fixture-42"
+ )
add_data(dp)
dp.finalize_serialization()
check_metadata(dp)
check_data(dp)
- loaded = load_datapackage(OSFS(td))
+ loaded = load_datapackage(generic_directory_filesystem(dirpath=Path(td)))
check_metadata(loaded, False)
check_data(loaded)
- loaded.fs.close()
-
@pytest.mark.skipif(_windows, reason="Permission errors on Windows CI")
def test_integration_test_new_zipfile():
with tempfile.TemporaryDirectory() as td:
dp = create_datapackage(
- fs=ZipFS(str(Path(td) / "foo.zip"), write=True),
+ fs=ZipFileSystem(Path(td) / "foo.zip", mode="w"),
name="test-fixture",
id_="fixture-42",
)
@@ -413,7 +416,7 @@ def test_integration_test_new_zipfile():
check_metadata(dp)
check_data(dp)
- loaded = load_datapackage(ZipFS(str(Path(td) / "foo.zip"), write=False))
+ loaded = load_datapackage(ZipFileSystem(Path(td) / "foo.zip", mode="r"))
check_metadata(loaded, False)
check_data(loaded)
@@ -421,9 +424,9 @@ def test_integration_test_new_zipfile():
def test_integration_test_fixture_zipfile():
loaded = load_datapackage(
- ZipFS(
- str(Path(__file__).parent.resolve() / "fixtures" / "test-fixture.zip"),
- write=False,
+ ZipFileSystem(
+ Path(__file__).parent.resolve() / "fixtures" / "test-fixture.zip",
+ mode="r",
)
)
@@ -439,13 +442,15 @@ def test_integration_test_fixture_zipfile():
(dirpath / "tfd").mkdir(exist_ok=True)
dp = create_datapackage(
- fs=OSFS(str(dirpath / "tfd")), name="test-fixture", id_="fixture-42"
+ fs=generic_directory_filesystem(dirpath=dirpath / "tfd"),
+ name="test-fixture",
+ id_="fixture-42",
)
add_data(dp)
dp.finalize_serialization()
dp = create_datapackage(
- fs=ZipFS(str(dirpath / "test-fixture.zip"), write=True),
+ fs=ZipFileSystem(dirpath / "test-fixture.zip", mode="w"),
name="test-fixture",
id_="fixture-42",
)
diff --git a/tests/interfaces.py b/tests/interfaces.py
index e0e53fb..53fce33 100644
--- a/tests/interfaces.py
+++ b/tests/interfaces.py
@@ -1,7 +1,7 @@
from pathlib import Path
import pytest
-from fs.zipfs import ZipFS
+from fsspec.implementations.zip import ZipFileSystem
from bw_processing import load_datapackage
@@ -25,7 +25,7 @@ def __getitem__(self, args):
def test_list_dehydrated_interfaces():
- dp = load_datapackage(ZipFS(dirpath / "test-fixture.zip"))
+ dp = load_datapackage(ZipFileSystem(dirpath / "test-fixture.zip"))
assert dp.dehydrated_interfaces() == ["sa-vector-interface", "sa-array-interface"]
dp.rehydrate_interface("sa-vector-interface.data", Vector())
@@ -33,7 +33,7 @@ def test_list_dehydrated_interfaces():
def test_rehydrate_vector_interface():
- dp = load_datapackage(ZipFS(dirpath / "test-fixture.zip"))
+ dp = load_datapackage(ZipFileSystem(dirpath / "test-fixture.zip"))
dp.rehydrate_interface("sa-vector-interface.data", Vector())
data, resource = dp.get_resource("sa-vector-interface.data")
assert next(data) == 1
@@ -51,14 +51,14 @@ def test_rehydrate_vector_interface():
def test_rehydrate_vector_interface_fix_name():
- dp = load_datapackage(ZipFS(dirpath / "test-fixture.zip"))
+ dp = load_datapackage(ZipFileSystem(dirpath / "test-fixture.zip"))
dp.rehydrate_interface("sa-vector-interface", Vector())
data, resource = dp.get_resource("sa-vector-interface.data")
assert next(data) == 1
def test_rehydrate_vector_interface_config():
- dp = load_datapackage(ZipFS(dirpath / "test-fixture.zip"))
+ dp = load_datapackage(ZipFileSystem(dirpath / "test-fixture.zip"))
data, resource = dp.get_resource("sa-vector-interface.data")
resource["config"] = {"foo": "bar"}
@@ -69,7 +69,7 @@ def test_rehydrate_vector_interface_config():
def test_rehydrate_vector_interface_config_keyerror():
- dp = load_datapackage(ZipFS(dirpath / "test-fixture.zip"))
+ dp = load_datapackage(ZipFileSystem(dirpath / "test-fixture.zip"))
data, resource = dp.get_resource("sa-vector-interface.data")
with pytest.raises(KeyError):
@@ -77,7 +77,7 @@ def test_rehydrate_vector_interface_config_keyerror():
def test_rehydrate_array_interface():
- dp = load_datapackage(ZipFS(dirpath / "test-fixture.zip"))
+ dp = load_datapackage(ZipFileSystem(dirpath / "test-fixture.zip"))
dp.rehydrate_interface("sa-array-interface.data", Array())
data, resource = dp.get_resource("sa-array-interface.data")
assert data[7] == 7
@@ -95,7 +95,7 @@ def test_rehydrate_array_interface():
def test_rehydrate_array_interface_config():
- dp = load_datapackage(ZipFS(dirpath / "test-fixture.zip"))
+ dp = load_datapackage(ZipFileSystem(dirpath / "test-fixture.zip"))
data, resource = dp.get_resource("sa-array-interface.data")
resource["config"] = {"foo": "bar"}
@@ -106,7 +106,7 @@ def test_rehydrate_array_interface_config():
def test_rehydrate_array_interface_config_keyerror():
- dp = load_datapackage(ZipFS(dirpath / "test-fixture.zip"))
+ dp = load_datapackage(ZipFileSystem(dirpath / "test-fixture.zip"))
data, resource = dp.get_resource("sa-array-interface.data")
with pytest.raises(KeyError):
diff --git a/tests/io_parquet_helpers.py b/tests/io_parquet_helpers.py
index 5a712b2..c4e5a19 100644
--- a/tests/io_parquet_helpers.py
+++ b/tests/io_parquet_helpers.py
@@ -14,10 +14,7 @@
)
from bw_processing.errors import WrongDatatype
-from bw_processing.io_parquet_helpers import (
- load_ndarray_from_parquet,
- save_arr_to_parquet,
-)
+from bw_processing.io_parquet_helpers import load_ndarray_from_parquet, save_arr_to_parquet
ARR_LIST = [
("indices_vector", "vector", "indices"),
@@ -34,13 +31,9 @@ def test_save_load_parquet_file(
arr = request.getfixturevalue(arr_fixture_name) # get fixture from name
file = tmp_path_factory.mktemp("data") / (arr_fixture_name + ".parquet")
- with file as fp:
- save_arr_to_parquet(
- file=fp, arr=arr, meta_object=meta_object, meta_type=meta_type
- )
+ save_arr_to_parquet(file=file, arr=arr, meta_object=meta_object, meta_type=meta_type)
- with file as fp:
- loaded_arr = load_ndarray_from_parquet(fp)
+ loaded_arr = load_ndarray_from_parquet(file)
assert arr.dtype == loaded_arr.dtype and np.array_equal(arr, loaded_arr)
@@ -52,11 +45,9 @@ def test_save_load_parquet_file_data_vector(dtype, tmp_path_factory):
arr = data_vector(dtype=dtype)
file = tmp_path_factory.mktemp("data") / "data_vector.parquet"
- with file as fp:
- save_arr_to_parquet(file=fp, arr=arr, meta_object="vector", meta_type="generic")
+ save_arr_to_parquet(file=file, arr=arr, meta_object="vector", meta_type="generic")
- with file as fp:
- loaded_arr = load_ndarray_from_parquet(fp)
+ loaded_arr = load_ndarray_from_parquet(file)
assert arr.dtype == loaded_arr.dtype and np.array_equal(arr, loaded_arr)
@@ -78,17 +69,13 @@ def test_save_load_parquet_file_data_matrix(dtype, tmp_path_factory):
@pytest.mark.skipif(sys.version_info[:2] == (3, 8), reason="Doesn't work in CI filesystem")
-def test_save_load_parquet_file_distribution_vector(
- distributions_vector, tmp_path_factory
-):
+def test_save_load_parquet_file_distribution_vector(distributions_vector, tmp_path_factory):
arr = distributions_vector
file = tmp_path_factory.mktemp("data") / "distributions_vector.parquet"
with file as fp:
- save_arr_to_parquet(
- file=fp, arr=arr, meta_object="vector", meta_type="distributions"
- )
+ save_arr_to_parquet(file=fp, arr=arr, meta_object="vector", meta_type="distributions")
with file as fp:
loaded_arr = load_ndarray_from_parquet(fp)
diff --git a/tests/io_pyarrow_helpers.py b/tests/io_pyarrow_helpers.py
index cae9dc9..6dbaba1 100644
--- a/tests/io_pyarrow_helpers.py
+++ b/tests/io_pyarrow_helpers.py
@@ -49,15 +49,11 @@ def test_double_conversion_flip_vector(flip_vector):
def test_double_conversion_distribution_vector(distributions_vector):
- table = numpy_distributions_vector_to_pyarrow_distributions_vector_table(
- distributions_vector
- )
+ table = numpy_distributions_vector_to_pyarrow_distributions_vector_table(distributions_vector)
arr = pyarrow_distributions_vector_table_to_numpy_distributions_vector(table)
assert arr.dtype == distributions_vector.dtype
- assert vector_equal_with_uncertainty_dtype(
- arr, distributions_vector, equal_nan=True
- )
+ assert vector_equal_with_uncertainty_dtype(arr, distributions_vector, equal_nan=True)
@pytest.mark.parametrize("dtype", [np.int8, np.int32, np.float64])
diff --git a/tests/merging.py b/tests/merging.py
index 70def0a..c5cbaa5 100644
--- a/tests/merging.py
+++ b/tests/merging.py
@@ -3,9 +3,8 @@
import numpy as np
import pytest
-from fs.memoryfs import MemoryFS
-from fs.osfs import OSFS
-from fs.zipfs import ZipFS
+from fsspec.implementations.zip import ZipFileSystem
+from morefs.dict import DictFS
from bw_processing import (
INDICES_DTYPE,
@@ -15,15 +14,14 @@
merge_datapackages_with_mask,
)
from bw_processing.errors import LengthMismatch
+from bw_processing.io_helpers import generic_directory_filesystem
fixture_dir = Path(__file__).parent.resolve() / "fixtures"
def test_basic_merging_functionality():
- first = load_datapackage(ZipFS(str(fixture_dir / "merging" / "merging_first.zip")))
- second = load_datapackage(
- ZipFS(str(fixture_dir / "merging" / "merging_second.zip"))
- )
+ first = load_datapackage(ZipFileSystem(fixture_dir / "merging" / "merging_first.zip"))
+ second = load_datapackage(ZipFileSystem(fixture_dir / "merging" / "merging_second.zip"))
result = merge_datapackages_with_mask(
first_dp=first,
first_resource_group_label="sa-data-vector",
@@ -32,7 +30,7 @@ def test_basic_merging_functionality():
mask_array=np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=bool),
)
assert isinstance(result, DatapackageBase)
- assert isinstance(result.fs, MemoryFS)
+ assert isinstance(result.fs, DictFS)
assert len(result.resources) == 5
d, r = result.get_resource("sa-data-vector.data")
@@ -56,12 +54,10 @@ def test_basic_merging_functionality():
def test_write_new_datapackage():
- first = load_datapackage(ZipFS(str(fixture_dir / "merging" / "merging_first.zip")))
- second = load_datapackage(
- ZipFS(str(fixture_dir / "merging" / "merging_second.zip"))
- )
+ first = load_datapackage(ZipFileSystem(fixture_dir / "merging" / "merging_first.zip"))
+ second = load_datapackage(ZipFileSystem(fixture_dir / "merging" / "merging_second.zip"))
with tempfile.TemporaryDirectory() as td:
- temp_fs = OSFS(td)
+ temp_fs = generic_directory_filesystem(dirpath=Path(td))
result = merge_datapackages_with_mask(
first_dp=first,
first_resource_group_label="sa-data-vector",
@@ -70,10 +66,10 @@ def test_write_new_datapackage():
mask_array=np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=bool),
output_fs=temp_fs,
)
- result = load_datapackage(OSFS(td))
+ result = load_datapackage(generic_directory_filesystem(dirpath=Path(td)))
assert isinstance(result, DatapackageBase)
- assert not isinstance(result.fs, MemoryFS)
+ assert not isinstance(result.fs, DictFS)
assert len(result.resources) == 5
for suffix in {"indices", "data", "distributions", "flip"}:
@@ -106,10 +102,8 @@ def test_write_new_datapackage():
def test_add_suffix():
- first = load_datapackage(ZipFS(str(fixture_dir / "merging" / "merging_same_1.zip")))
- second = load_datapackage(
- ZipFS(str(fixture_dir / "merging" / "merging_same_2.zip"))
- )
+ first = load_datapackage(ZipFileSystem(str(fixture_dir / "merging" / "merging_same_1.zip")))
+ second = load_datapackage(ZipFileSystem(str(fixture_dir / "merging" / "merging_same_2.zip")))
with pytest.warns(UserWarning):
result = merge_datapackages_with_mask(
first_dp=first,
@@ -152,10 +146,8 @@ def test_add_suffix():
def test_wrong_resource_group_name():
- first = load_datapackage(ZipFS(str(fixture_dir / "merging" / "merging_first.zip")))
- second = load_datapackage(
- ZipFS(str(fixture_dir / "merging" / "merging_second.zip"))
- )
+ first = load_datapackage(ZipFileSystem(fixture_dir / "merging" / "merging_first.zip"))
+ second = load_datapackage(ZipFileSystem(fixture_dir / "merging" / "merging_second.zip"))
with pytest.raises(ValueError):
merge_datapackages_with_mask(
first_dp=first,
@@ -189,9 +181,7 @@ def test_shape_mismatch_data():
dp2 = create_datapackage()
data_array = np.arange(5)
- indices_array = np.array(
- [(x, y) for x, y in zip(range(5), range(10, 15))], dtype=INDICES_DTYPE
- )
+ indices_array = np.array([(x, y) for x, y in zip(range(5), range(10, 15))], dtype=INDICES_DTYPE)
dp2.add_persistent_vector(
matrix="sa_matrix",
data_array=data_array,
@@ -209,10 +199,8 @@ def test_shape_mismatch_data():
def test_shape_mismatch_mask():
- first = load_datapackage(ZipFS(str(fixture_dir / "merging" / "merging_first.zip")))
- second = load_datapackage(
- ZipFS(str(fixture_dir / "merging" / "merging_second.zip"))
- )
+ first = load_datapackage(ZipFileSystem(fixture_dir / "merging" / "merging_first.zip"))
+ second = load_datapackage(ZipFileSystem(fixture_dir / "merging" / "merging_second.zip"))
with pytest.raises(LengthMismatch):
merge_datapackages_with_mask(
first_dp=first,
@@ -224,10 +212,8 @@ def test_shape_mismatch_mask():
def test_new_metadata():
- first = load_datapackage(ZipFS(str(fixture_dir / "merging" / "merging_first.zip")))
- second = load_datapackage(
- ZipFS(str(fixture_dir / "merging" / "merging_second.zip"))
- )
+ first = load_datapackage(ZipFileSystem(fixture_dir / "merging" / "merging_first.zip"))
+ second = load_datapackage(ZipFileSystem(fixture_dir / "merging" / "merging_second.zip"))
result = merge_datapackages_with_mask(
first_dp=first,
first_resource_group_label="sa-data-vector",
@@ -253,10 +239,8 @@ def test_new_metadata():
def test_default_metadata():
- first = load_datapackage(ZipFS(str(fixture_dir / "merging" / "merging_first.zip")))
- second = load_datapackage(
- ZipFS(str(fixture_dir / "merging" / "merging_second.zip"))
- )
+ first = load_datapackage(ZipFileSystem(fixture_dir / "merging" / "merging_first.zip"))
+ second = load_datapackage(ZipFileSystem(fixture_dir / "merging" / "merging_second.zip"))
result = merge_datapackages_with_mask(
first_dp=first,
first_resource_group_label="sa-data-vector",
@@ -313,7 +297,7 @@ def create_fixtures():
(fixture_dir / "merging").mkdir(exist_ok=True)
dp = create_datapackage(
- fs=ZipFS(str(fixture_dir / "merging" / "merging_first.zip"), write=True),
+ fs=ZipFileSystem(str(fixture_dir / "merging" / "merging_first.zip"), mode="w"),
name="merging-fixture",
id_="fixture-42",
)
@@ -332,7 +316,7 @@ def create_fixtures():
dp.finalize_serialization()
dp = create_datapackage(
- fs=ZipFS(str(fixture_dir / "merging" / "merging_second.zip"), write=True),
+ fs=ZipFileSystem(str(fixture_dir / "merging" / "merging_second.zip"), mode="w"),
name="merging-fixture",
id_="fixture-42",
)
@@ -347,7 +331,7 @@ def create_fixtures():
dp.finalize_serialization()
dp = create_datapackage(
- fs=ZipFS(str(fixture_dir / "merging" / "merging_same_1.zip"), write=True),
+ fs=ZipFileSystem(str(fixture_dir / "merging" / "merging_same_1.zip"), mode="w"),
name="merging-fixture",
id_="fixture-42",
)
@@ -366,7 +350,7 @@ def create_fixtures():
dp.finalize_serialization()
dp = create_datapackage(
- fs=ZipFS(str(fixture_dir / "merging" / "merging_same_2.zip"), write=True),
+ fs=ZipFileSystem(str(fixture_dir / "merging" / "merging_same_2.zip"), mode="w"),
name="merging-fixture",
id_="fixture-42",
)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 2695baf..99841a1 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -7,12 +7,7 @@
from bw_processing import __version__
from bw_processing.errors import InvalidName
-from bw_processing.utils import (
- check_name,
- check_suffix,
- dictionary_formatter,
- load_bytes,
-)
+from bw_processing.utils import check_name, check_suffix, dictionary_formatter, load_bytes
def test_version():