diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 9c7e095..0000000 Binary files a/.DS_Store and /dev/null differ diff --git a/.gitignore b/.gitignore index d568cd1..8da5368 100644 --- a/.gitignore +++ b/.gitignore @@ -138,3 +138,5 @@ dmypy.json # Sphinx _build /docs/api/ + +.DS_Store \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in index b4809dd..990abce 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,3 @@ -include bw_processing/examples/simple.zip +include bw_processing/examples/*.zip +include bw_processing/examples/datapackage_1/*.parquet recursive-exclude dev * diff --git a/README.md b/README.md index 36e79cb..644c803 100644 --- a/README.md +++ b/README.md @@ -107,10 +107,10 @@ class ExampleArrayInterface: Serialized datapackages cannot contain executable code, both because of our chosen data formats, and for security reasons. Therefore, when loading a datapackage with an interface, that interface object needs to be reconstituted as Python code - we call this cycle dehydration and rehydration. Dehydration happens automatically when a datapackage is finalized with `finalize_serialization()`, but rehydration needs to be done manually using `rehydrate_interface()`. For example: ```python -from fs.zipfs import ZipFS +from fsspec.implementations.zip import ZipFileSystem from bw_processing import load_datapackage -my_dp = load_datapackage(ZipFS("some-path.zip")) +my_dp = load_datapackage(ZipFileSystem("some-path.zip")) my_dp.rehydrate_interface("some-resource-name", ExampleVectorInterface()) ``` @@ -119,7 +119,7 @@ You can list the dehydrated interfaces present with `.dehydrated_interfaces()`. You can store useful information for the interface object initialization under the resource key `config`. This can be used in instantiating an interface if you pass `initialize_with_config`: ```python -from fs.zipfs import ZipFS +from fsspec.implementations.zip import ZipFileSystem from bw_processing import load_datapackage import requests import numpy as np @@ -133,7 +133,7 @@ class MyInterface: return np.array(requests.get(self.url).json()) -my_dp = load_datapackage(ZipFS("some-path.zip")) +my_dp = load_datapackage(ZipFileSystem("some-path.zip")) data_obj, resource_metadata = my_dp.get_resource("some-interface") print(resource_metadata['config']) >>> {"url": "example.com"} diff --git a/bw_processing/__init__.py b/bw_processing/__init__.py index d2b9a87..47a5ad4 100644 --- a/bw_processing/__init__.py +++ b/bw_processing/__init__.py @@ -30,12 +30,7 @@ from .array_creation import create_array, create_structured_array -from .constants import ( - DEFAULT_LICENSES, - INDICES_DTYPE, - UNCERTAINTY_DTYPE, - MatrixSerializeFormat, -) +from .constants import DEFAULT_LICENSES, INDICES_DTYPE, UNCERTAINTY_DTYPE, MatrixSerializeFormat from .datapackage import ( Datapackage, DatapackageBase, diff --git a/bw_processing/datapackage.py b/bw_processing/datapackage.py index 67028aa..471a419 100644 --- a/bw_processing/datapackage.py +++ b/bw_processing/datapackage.py @@ -6,9 +6,10 @@ import numpy as np import pandas as pd -from fs.base import FS -from fs.errors import ResourceNotFound -from fs.memoryfs import MemoryFS +from fsspec import AbstractFileSystem + +# Use this instead of fsspec MemoryFileSystem because that is a singleton!? +from morefs.dict import DictFS from .constants import ( DEFAULT_LICENSES, @@ -109,15 +110,13 @@ def _get_index(self, name_or_index: Union[str, int]) -> int: def del_resource(self, name_or_index: Union[str, int]) -> None: """Remove a resource, and delete its data file, if any.""" if self._modified: - raise PotentialInconsistency( - "Datapackage is modified; save modifications or reload" - ) + raise PotentialInconsistency("Datapackage is modified; save modifications or reload") index = self._get_index(name_or_index) try: - self.fs.remove(self.resources[index]["path"]) - except (KeyError, ResourceNotFound): + self.fs.rm(self.resources[index]["path"]) + except (KeyError, FileNotFoundError): # Interface has no path pass @@ -130,26 +129,18 @@ def del_resource_group(self, name: str) -> None: Use ``exclude_resource_group`` if you want to keep the underlying resource in the filesystem. """ if self._modified: - raise PotentialInconsistency( - "Datapackage is modified; save modifications or reload" - ) + raise PotentialInconsistency("Datapackage is modified; save modifications or reload") - indices = [ - i - for i, resource in enumerate(self.resources) - if resource.get("group") == name - ] + indices = [i for i, resource in enumerate(self.resources) if resource.get("group") == name] for obj in (obj for i, obj in enumerate(self.resources) if i in indices): try: - self.fs.remove(obj["path"]) - except (KeyError, ResourceNotFound): + self.fs.rm(obj["path"]) + except (KeyError, FileNotFoundError): # Interface has no path pass - self.resources = [ - obj for i, obj in enumerate(self.resources) if i not in indices - ] + self.resources = [obj for i, obj in enumerate(self.resources) if i not in indices] self.data = [obj for i, obj in enumerate(self.data) if i not in indices] def get_resource(self, name_or_index: Union[str, int]) -> (Any, dict): @@ -188,9 +179,7 @@ def filter_by_attribute(self, key: str, value: Any) -> "FilteredDatapackage": fdp.fs = self.fs fdp.metadata = {k: v for k, v in self.metadata.items() if k != "resources"} fdp.metadata["resources"] = [] - to_include = [ - i for i, resource in enumerate(self.resources) if resource.get(key) == value - ] + to_include = [i for i, resource in enumerate(self.resources) if resource.get(key) == value] fdp.data = [o for i, o in enumerate(self.data) if i in to_include] fdp.resources = [o for i, o in enumerate(self.resources) if i in to_include] if hasattr(self, "indexer"): @@ -225,17 +214,13 @@ def exclude(self, filters: Dict[str, str]) -> "FilteredDatapackage": if any(resource.get(key) != value for key, value in filters.items()) ] fdp.data = [o for i, o in enumerate(self.data) if i in indices_to_include] - fdp.resources = [ - o for i, o in enumerate(self.resources) if i in indices_to_include - ] + fdp.resources = [o for i, o in enumerate(self.resources) if i in indices_to_include] return fdp def _dehydrate_interfaces(self) -> None: """Substitute an interface resource with ``UndefinedInterface``, in preparation for finalizing data on disk.""" interface_indices = [ - index - for index, obj in enumerate(self.resources) - if obj["profile"] == "interface" + index for index, obj in enumerate(self.resources) if obj["profile"] == "interface" ] for index in interface_indices: @@ -301,7 +286,7 @@ def __hash__(self): return hash((self.fs, self.metadata)) def __eq__(self, other): - return (self.fs, self.metadata) == (other.fs, other.metadata) + return (id(self.fs), self.metadata) == (id(other.fs), other.metadata) def _check_length_consistency(self) -> None: if len(self.resources) != len(self.data): @@ -312,7 +297,7 @@ def _check_length_consistency(self) -> None: ) def _load( - self, fs: FS, mmap_mode: Optional[str] = None, proxy: bool = False + self, fs: AbstractFileSystem, mmap_mode: Optional[str] = None, proxy: bool = False ) -> None: self.fs = fs self.metadata = file_reader( @@ -338,7 +323,7 @@ def _load_all(self, mmap_mode: Optional[str] = None, proxy: bool = False) -> Non def _create( self, - fs: Optional[FS], + fs: Optional[AbstractFileSystem], name: Optional[str], id_: Optional[str], metadata: Optional[dict], @@ -358,7 +343,7 @@ def _create( name = clean_datapackage_name(name or uuid.uuid4().hex) check_name(name) - self.fs = fs or MemoryFS() + self.fs = fs or DictFS() if not isinstance(matrix_serialize_format_type, MatrixSerializeFormat): raise TypeError( f"Matrix serialize format type ({matrix_serialize_format_type}) not recognized!" @@ -388,7 +373,7 @@ def _create( def finalize_serialization(self) -> None: if self._finalized: raise Closed("Datapackage already finalized") - elif isinstance(self.fs, MemoryFS): + elif isinstance(self.fs, DictFS): raise ValueError("In-memory file systems can't be serialized") self._dehydrate_interfaces() @@ -400,7 +385,8 @@ def finalize_serialization(self) -> None: resource="datapackage.json", mimetype="application/json", ) - self.fs.close() + if hasattr(self.fs, "close"): + self.fs.close() self._finalized = True def _prepare_modifications(self) -> None: @@ -471,9 +457,7 @@ def add_persistent_vector( # Check lengths - kwargs.update( - {"matrix": matrix, "category": "vector", "nrows": len(indices_array)} - ) + kwargs.update({"matrix": matrix, "category": "vector", "nrows": len(indices_array)}) name = self._prepare_name(name) indices_array = load_bytes(indices_array) @@ -516,9 +500,7 @@ def add_persistent_vector( if distributions_array is not None: distributions_array = load_bytes(distributions_array) # If no uncertainty, don't need to store it - if (distributions_array["uncertainty_type"] < 2).sum() < len( - distributions_array - ): + if (distributions_array["uncertainty_type"] < 2).sum() < len(distributions_array): if distributions_array.shape != indices_array.shape: raise ShapeMismatch( "`distributions_array` shape ({}) doesn't match `indices_array` ({}).".format( @@ -542,9 +524,7 @@ def add_persistent_vector( if flip_array.sum(): if flip_array.dtype != bool: raise WrongDatatype( - "`flip_array` dtype is {}, but must be `bool`".format( - flip_array.dtype - ) + "`flip_array` dtype is {}, but must be `bool`".format(flip_array.dtype) ) elif flip_array.shape != indices_array.shape: raise ShapeMismatch( @@ -579,9 +559,7 @@ def add_persistent_array( """ """ self._prepare_modifications() - kwargs.update( - {"matrix": matrix, "category": "array", "nrows": len(indices_array)} - ) + kwargs.update({"matrix": matrix, "category": "array", "nrows": len(indices_array)}) name = self._prepare_name(name) indices_array = load_bytes(indices_array) @@ -626,9 +604,7 @@ def add_persistent_array( if flip_array.sum(): if flip_array.dtype != bool: raise WrongDatatype( - "`flip_array` dtype is {}, but must be `bool`".format( - flip_array.dtype - ) + "`flip_array` dtype is {}, but must be `bool`".format(flip_array.dtype) ) elif flip_array.shape != indices_array.shape: raise ShapeMismatch( @@ -716,9 +692,7 @@ def _add_numpy_array_resource( meta_type: Optional[str] = None, **kwargs, ) -> None: - assert ( - array.ndim <= 2 - ), f"Numpy array should be of dim 2 or less instead of {array.ndim}!" + assert array.ndim <= 2, f"Numpy array should be of dim 2 or less instead of {array.ndim}!" if matrix_serialize_format_type is None: # use instance default serialization format @@ -741,13 +715,13 @@ def _add_numpy_array_resource( f"Matrix serialize format type {matrix_serialize_format_type} is not recognized!" ) - if not isinstance(self.fs, MemoryFS): + if not isinstance(self.fs, DictFS): file_writer( data=array, fs=self.fs, resource=filename, mimetype="application/octet-stream", - matrix_serialize_format_type=matrix_serialize_format_type, # NIKO + matrix_serialize_format_type=matrix_serialize_format_type, meta_object=meta_object, meta_type=meta_type, ) @@ -793,9 +767,7 @@ def add_dynamic_vector( ) -> None: self._prepare_modifications() - kwargs.update( - {"matrix": matrix, "category": "vector", "nrows": len(indices_array)} - ) + kwargs.update({"matrix": matrix, "category": "vector", "nrows": len(indices_array)}) name = self._prepare_name(name) indices_array = load_bytes(indices_array) @@ -815,9 +787,7 @@ def add_dynamic_vector( if flip_array.sum(): if flip_array.dtype != bool: raise WrongDatatype( - "`flip_array` dtype is {}, but must be `bool`".format( - flip_array.dtype - ) + "`flip_array` dtype is {}, but must be `bool`".format(flip_array.dtype) ) elif flip_array.shape != indices_array.shape: raise ShapeMismatch( @@ -865,9 +835,7 @@ def add_dynamic_array( if isinstance(flip_array, np.ndarray) and not flip_array.sum(): flip_array = None - kwargs.update( - {"matrix": matrix, "category": "array", "nrows": len(indices_array)} - ) + kwargs.update({"matrix": matrix, "category": "array", "nrows": len(indices_array)}) name = self._prepare_name(name) indices_array = load_bytes(indices_array) @@ -887,9 +855,7 @@ def add_dynamic_array( if flip_array.sum(): if flip_array.dtype != bool: raise WrongDatatype( - "`flip_array` dtype is {}, but must be `bool`".format( - flip_array.dtype - ) + "`flip_array` dtype is {}, but must be `bool`".format(flip_array.dtype) ) elif flip_array.shape != indices_array.shape: raise ShapeMismatch( @@ -977,9 +943,7 @@ def add_csv_metadata( ) self.resources.append(kwargs) - def add_json_metadata( - self, *, data: Any, valid_for: str, name: str = None, **kwargs - ) -> None: + def add_json_metadata(self, *, data: Any, valid_for: str, name: str = None, **kwargs) -> None: """Add an iterable metadata object to be stored as a JSON file. The purpose of storing metadata is to enable data exchange; therefore, this method assumes that data is written to disk. @@ -1015,9 +979,7 @@ def add_json_metadata( filename = check_suffix(name, ".json") - file_writer( - data=data, fs=self.fs, resource=filename, mimetype="application/json" - ) + file_writer(data=data, fs=self.fs, resource=filename, mimetype="application/json") self.data.append(data) kwargs.update( @@ -1035,7 +997,7 @@ def add_json_metadata( def create_datapackage( - fs: Optional[FS] = None, + fs: Optional[AbstractFileSystem] = None, name: Optional[str] = None, id_: Optional[str] = None, metadata: Optional[dict] = None, @@ -1048,14 +1010,14 @@ def create_datapackage( ) -> Datapackage: """Create a new data package. - All arguments are optional; if a `PyFilesystem2 `__ filesystem is not provided, a `MemoryFS `__ will be used. + All arguments are optional; if a `fsspec `__ filesystem is not provided, an in-memory `DictFS `__ will be used. All metadata elements should follow the `datapackage specification `__. Licenses are specified as a list in ``metadata``. The default license is the `Open Data Commons Public Domain Dedication and License v1.0 `__. Args: - * fs: A ``Filesystem``, optional. A new ``MemoryFS`` is used if not provided. + * fs: A ``Filesystem``, optional. A new ``DictFS`` is used if not provided. * name: ``str``, optional. A new uuid is used if not provided. * `id_`: ``str``, optional. A new uuid is used if not provided. * metadata: ``dict``, optional. Metadata dictionary following datapackage specification; see above. @@ -1088,7 +1050,7 @@ def create_datapackage( def load_datapackage( - fs_or_obj: Union[DatapackageBase, FS], + fs_or_obj: Union[DatapackageBase, AbstractFileSystem], mmap_mode: Optional[str] = None, proxy: bool = False, ) -> Datapackage: @@ -1117,7 +1079,7 @@ def load_datapackage( return obj -def simple_graph(data: dict, fs: Optional[FS] = None, **metadata) -> Datapackage: +def simple_graph(data: dict, fs: Optional[AbstractFileSystem] = None, **metadata) -> Datapackage: """Easy creation of simple datapackages with only persistent vectors. Args: @@ -1140,13 +1102,11 @@ def simple_graph(data: dict, fs: Optional[FS] = None, **metadata) -> Datapackage the datapackage. """ - dp = create_datapackage(fs=fs, **metadata) + dp = create_datapackage(fs=fs or DictFS(), **metadata) for key, value in data.items(): indices_array = np.array([row[:2] for row in value], dtype=INDICES_DTYPE) data_array = np.array([row[2] for row in value]) - flip_array = np.array( - [row[3] if len(row) > 3 else False for row in value], dtype=bool - ) + flip_array = np.array([row[3] if len(row) > 3 else False for row in value], dtype=bool) dp.add_persistent_vector( matrix=key, data_array=data_array, diff --git a/bw_processing/examples/parquet_files.py b/bw_processing/examples/parquet_files.py index 45c8ea0..edad78b 100644 --- a/bw_processing/examples/parquet_files.py +++ b/bw_processing/examples/parquet_files.py @@ -5,10 +5,10 @@ from pathlib import Path import numpy as np -from fs.osfs import OSFS -from fs.zipfs import ZipFS +from fsspec.implementations.zip import ZipFileSystem import bw_processing as bwp +from bw_processing.io_helpers import generic_directory_filesystem if __name__ == "__main__": print("This is a basic example on how to use parquet files.") @@ -30,13 +30,13 @@ # VERSION OSFS # Directory must exist for OSFS otherwise use OSFS(dirpath, create=True)! # Every created object will be saved in that same directory - dp_dir = OSFS(str(dirpath / "datapackage_1"), create=True) + dp_dir = generic_directory_filesystem(dirpath=dirpath / "datapackage_1", create=True) dp = bwp.create_datapackage( fs=dp_dir, matrix_serialize_format_type=bwp.MatrixSerializeFormat.NUMPY ) else: # VERSION ZIP - dp_zip_file = ZipFS(str(dirpath / "datapackage_2.zip"), write=True) + dp_zip_file = ZipFileSystem(str(dirpath / "datapackage_2.zip"), mode="w") dp = bwp.create_datapackage( fs=dp_zip_file, matrix_serialize_format_type=bwp.MatrixSerializeFormat.NUMPY ) # bwp.create_datapackage(fs=dp_zip_file, serialize_type=SerializeENum.parquet) @@ -98,10 +98,10 @@ # OSFS must be open! (and it was closed with finalize_serialization()) if USE_OSFS: - dp_dir = OSFS(str(dirpath / "datapackage_1")) + dp_dir = generic_directory_filesystem(dirpath=dirpath / "datapackage_1") dp2 = bwp.load_datapackage(fs_or_obj=dp_dir) else: - dp_zip_file = ZipFS(str(dirpath / "datapackage_2.zip")) + dp_zip_file = ZipFileSystem(dirpath / "datapackage_2.zip") dp2 = bwp.load_datapackage(fs_or_obj=dp_zip_file) print("Done!") diff --git a/bw_processing/filesystem.py b/bw_processing/filesystem.py index 28baf12..a8bdb4a 100644 --- a/bw_processing/filesystem.py +++ b/bw_processing/filesystem.py @@ -16,9 +16,7 @@ def clean_datapackage_name(name: str) -> str: return re.sub(MULTI_RE, "_", re.sub(SUBSTITUTION_RE, "_", name).strip("_")).strip() -def safe_filename( - string: Union[str, bytes], add_hash: bool = True, full: bool = False -) -> str: +def safe_filename(string: Union[str, bytes], add_hash: bool = True, full: bool = False) -> str: """Convert arbitrary strings to make them safe for filenames. Substitutes strange characters, and uses unicode normalization. if `add_hash`, appends hash of `string` to avoid name collisions. diff --git a/bw_processing/indexing.py b/bw_processing/indexing.py index 6e3f343..6cd19d1 100644 --- a/bw_processing/indexing.py +++ b/bw_processing/indexing.py @@ -3,14 +3,14 @@ import numpy as np import pandas as pd -from fs.base import FS +from fsspec import AbstractFileSystem from .datapackage import Datapackage, load_datapackage from .errors import NonUnique def _get_csv_data( - datapackage: Union[Datapackage, FS], metadata_name: str + datapackage: Union[Datapackage, AbstractFileSystem], metadata_name: str ) -> (Datapackage, pd.DataFrame, dict, List[np.ndarray], List[int]): """Utility function to get CSV data from datapackage. @@ -40,14 +40,15 @@ def _get_csv_data( if not isinstance(df, pd.DataFrame): raise ValueError("Given metadata is not a CSV file") resources = [ - dp.get_resource(key + ".indices")[0][label] - for key, label in metadata["valid_for"] + dp.get_resource(key + ".indices")[0][label] for key, label in metadata["valid_for"] ] indices = [dp._get_index(key + ".indices") for key, _ in metadata["valid_for"]] return dp, df, metadata, resources, indices -def reset_index(datapackage: Union[Datapackage, FS], metadata_name: str) -> Datapackage: +def reset_index( + datapackage: Union[Datapackage, AbstractFileSystem], metadata_name: str +) -> Datapackage: """Reset the numerical indices in ``datapackage`` to sequential integers starting from zero. Updates the datapackage in place. @@ -79,7 +80,7 @@ def reset_index(datapackage: Union[Datapackage, FS], metadata_name: str) -> Data def reindex( - datapackage: Union[Datapackage, FS], + datapackage: Union[Datapackage, AbstractFileSystem], metadata_name: str, data_iterable: Iterable, fields: List[str] = None, diff --git a/bw_processing/io_helpers.py b/bw_processing/io_helpers.py index 1e7b5f7..bdf5f7c 100644 --- a/bw_processing/io_helpers.py +++ b/bw_processing/io_helpers.py @@ -5,9 +5,11 @@ import numpy as np import pandas as pd -from fs.base import FS -from fs.osfs import OSFS -from fs.zipfs import ZipFS +from fsspec import AbstractFileSystem +from fsspec.implementations.dirfs import DirFileSystem +from fsspec.implementations.local import LocalFileSystem +from fsspec.implementations.zip import ZipFileSystem +from morefs.dict import DictFS from .constants import MatrixSerializeFormat from .errors import InvalidMimetype @@ -23,29 +25,27 @@ PARQUET = False -def generic_directory_filesystem(*, dirpath: Path) -> OSFS: +def generic_directory_filesystem(*, dirpath: Path) -> DirFileSystem: assert isinstance(dirpath, Path), "`dirpath` must be a `pathlib.Path` instance" if not dirpath.is_dir(): if not dirpath.parent.is_dir(): - raise ValueError( - "Parent directory `{}` doesn't exist".format(dirpath.parent) - ) + raise ValueError("Parent directory `{}` doesn't exist".format(dirpath.parent)) dirpath.mkdir() - return OSFS(dirpath) + return DirFileSystem(path=dirpath, fs=LocalFileSystem()) def generic_zipfile_filesystem( *, dirpath: Path, filename: str, write: bool = True -) -> ZipFS: +) -> ZipFileSystem: assert isinstance(dirpath, Path), "`dirpath` must be a `pathlib.Path` instance" if not dirpath.is_dir(): raise ValueError("Destination directory `{}` doesn't exist".format(dirpath)) - return ZipFS(dirpath / filename, write=write) + return ZipFileSystem(dirpath / filename, mode="w" if write else "r") def file_reader( *, - fs: FS, + fs: AbstractFileSystem, resource: str, mimetype: str, proxy: bool = False, @@ -116,7 +116,7 @@ def file_reader( def file_writer( *, data: Any, - fs: FS, + fs: AbstractFileSystem, resource: str, mimetype: str, matrix_serialize_format_type: MatrixSerializeFormat = MatrixSerializeFormat.NUMPY, # NIKO @@ -129,7 +129,8 @@ def file_writer( if mimetype == "application/octet-stream": if matrix_serialize_format_type == MatrixSerializeFormat.NUMPY: - return np.save(fs.open(resource, mode="wb"), data, allow_pickle=False) + with fs.open(resource, mode="wb") as fo: + return np.save(fo, data, allow_pickle=False) elif matrix_serialize_format_type == MatrixSerializeFormat.PARQUET: if not PARQUET: raise ImportError("`pyarrow` library not installed") diff --git a/bw_processing/io_parquet_helpers.py b/bw_processing/io_parquet_helpers.py index 8f7638a..365c2a7 100644 --- a/bw_processing/io_parquet_helpers.py +++ b/bw_processing/io_parquet_helpers.py @@ -7,15 +7,14 @@ import os # for annotation -from io import BufferedWriter, RawIOBase +from io import BufferedWriter, IOBase, RawIOBase import numpy import numpy as np import pyarrow.parquet as pq -from fs.iotools import RawWrapper -from bw_processing.errors import WrongDatatype -from bw_processing.io_pyarrow_helpers import ( +from .errors import WrongDatatype +from .io_pyarrow_helpers import ( numpy_distributions_vector_to_pyarrow_distributions_vector_table, numpy_generic_matrix_to_pyarrow_generic_matrix_table, numpy_generic_vector_to_pyarrow_generic_vector_table, @@ -49,9 +48,7 @@ def write_ndarray_to_parquet_file( elif meta_type == "generic": table = numpy_generic_vector_to_pyarrow_generic_vector_table(arr=arr) elif meta_type == "distributions": - table = numpy_distributions_vector_to_pyarrow_distributions_vector_table( - arr=arr - ) + table = numpy_distributions_vector_to_pyarrow_distributions_vector_table(arr=arr) else: raise NotImplementedError(f"Vector of type {meta_type} is not recognized!") else: @@ -61,12 +58,12 @@ def write_ndarray_to_parquet_file( pq.write_table(table, file) -def read_parquet_file_to_ndarray(file: RawWrapper) -> numpy.ndarray: +def read_parquet_file_to_ndarray(file: RawIOBase) -> numpy.ndarray: """ Read an `ndarray` from a `parquet` file. Args: - file (fs.iotools.RawWrapper): File to read from. + file (io.RawIOBase or fsspec file object): File to read from. Raises: `WrongDatatype` if the correct metadata is not found in the `parquet` file. @@ -81,9 +78,7 @@ def read_parquet_file_to_ndarray(file: RawWrapper) -> numpy.ndarray: binary_meta_object = table.schema.metadata[b"object"] binary_meta_type = table.schema.metadata[b"type"] except KeyError: - raise WrongDatatype( - f"Parquet file {file} does not contain the right metadata format!" - ) + raise WrongDatatype(f"Parquet file {file} does not contain the right metadata format!") arr = None if binary_meta_object == b"matrix": @@ -94,9 +89,7 @@ def read_parquet_file_to_ndarray(file: RawWrapper) -> numpy.ndarray: elif binary_meta_type == b"generic": arr = pyarrow_generic_vector_table_to_numpy_generic_vector(table=table) elif binary_meta_type == b"distributions": - arr = pyarrow_distributions_vector_table_to_numpy_distributions_vector( - table=table - ) + arr = pyarrow_distributions_vector_table_to_numpy_distributions_vector(table=table) else: raise NotImplementedError("Vector type not recognized") else: @@ -105,9 +98,7 @@ def read_parquet_file_to_ndarray(file: RawWrapper) -> numpy.ndarray: return arr -def save_arr_to_parquet( - file: RawIOBase, arr: np.ndarray, meta_object: str, meta_type: str -) -> None: +def save_arr_to_parquet(file: RawIOBase, arr: np.ndarray, meta_object: str, meta_type: str) -> None: """ Serialize a `numpy` `ndarray` to a `parquet` `file`. @@ -127,17 +118,15 @@ def save_arr_to_parquet( with file_ctx as fid: arr = np.asanyarray(arr) - write_ndarray_to_parquet_file( - fid, arr, meta_object=meta_object, meta_type=meta_type - ) + write_ndarray_to_parquet_file(fid, arr, meta_object=meta_object, meta_type=meta_type) -def load_ndarray_from_parquet(file: RawWrapper) -> np.ndarray: +def load_ndarray_from_parquet(file: RawIOBase) -> np.ndarray: """ Deserialize a `numpy` `ndarray` from a `parquet` `file`. Parameters - file (fs.iotools.RawWrapper): File to read from. + file (io.RawIOBase or fsspec file object): File to read from. Returns The corresponding `numpy` `ndarray`. diff --git a/bw_processing/io_pyarrow_helpers.py b/bw_processing/io_pyarrow_helpers.py index 4684dd2..76d7b4b 100644 --- a/bw_processing/io_pyarrow_helpers.py +++ b/bw_processing/io_pyarrow_helpers.py @@ -160,9 +160,7 @@ def pyarrow_indices_vector_table_to_numpy_indices_vector(table: pa.Table) -> np. PA_UNCERTAINTY_FIELDS, metadata={"object": "vector", "type": "distributions"} ) -UNCERTAINTY_FIELDS_NAMES = [ - UNCERTAINTY_DTYPE[i][0] for i in range(NBR_UNCERTAINTY_FIELDS) -] +UNCERTAINTY_FIELDS_NAMES = [UNCERTAINTY_DTYPE[i][0] for i in range(NBR_UNCERTAINTY_FIELDS)] def numpy_distributions_vector_to_pyarrow_distributions_vector_table( @@ -224,9 +222,7 @@ def pyarrow_distributions_vector_table_to_numpy_distributions_vector( distributions_array = [] for el in zip(*distributions_arrays_list): - distributions_array.append( - tuple(el[i].as_py() for i in range(NBR_UNCERTAINTY_FIELDS)) - ) + distributions_array.append(tuple(el[i].as_py() for i in range(NBR_UNCERTAINTY_FIELDS))) arr = np.array(distributions_array, dtype=UNCERTAINTY_DTYPE) return arr @@ -254,10 +250,7 @@ def numpy_generic_matrix_to_pyarrow_generic_matrix_table(arr: np.ndarray) -> pa. arr_dtype = arr.dtype metadata = {"object": "matrix", "type": "generic"} nbr_rows, nbr_cols = arr.shape - arrays = [ - pa.array(arr[:, j], type=pa.from_numpy_dtype(arr_dtype)) - for j in range(nbr_cols) - ] + arrays = [pa.array(arr[:, j], type=pa.from_numpy_dtype(arr_dtype)) for j in range(nbr_cols)] table = pa.Table.from_arrays( arrays=arrays, names=[str(j) for j in range(nbr_cols)], # give names to each column diff --git a/bw_processing/merging.py b/bw_processing/merging.py index aac7110..137921d 100644 --- a/bw_processing/merging.py +++ b/bw_processing/merging.py @@ -4,8 +4,8 @@ import numpy as np import pandas as pd -from fs.base import FS -from fs.memoryfs import MemoryFS +from fsspec import AbstractFileSystem +from morefs.dict import DictFS from .datapackage import DatapackageBase, create_datapackage from .errors import LengthMismatch @@ -63,8 +63,8 @@ def add_resource_suffix(metadata: dict, suffix: str) -> dict: return metadata -def write_data_to_fs(resource: dict, data: Any, fs: FS) -> None: - if isinstance(fs, MemoryFS): +def write_data_to_fs(resource: dict, data: Any, fs: AbstractFileSystem) -> None: + if isinstance(fs, DictFS): return file_writer( data=data, @@ -80,7 +80,7 @@ def merge_datapackages_with_mask( second_dp: DatapackageBase, second_resource_group_label: str, mask_array: np.ndarray, - output_fs: Optional[FS] = None, + output_fs: Optional[AbstractFileSystem] = None, metadata: Optional[dict] = None, ) -> DatapackageBase: """Merge two resources using a Numpy boolean mask. Returns elements from ``first_dp`` where the mask is ``True``, otherwise ``second_dp``. @@ -108,35 +108,27 @@ def merge_datapackages_with_mask( """ if first_resource_group_label == second_resource_group_label: add_suffix = True - warnings.warn( - "Adding suffixes '_true' and '_false' as resource group labels are identical" - ) + warnings.warn("Adding suffixes '_true' and '_false' as resource group labels are identical") else: add_suffix = False try: first_dp = first_dp.groups[first_resource_group_label] except KeyError: - raise ValueError( - f"Resource group not {first_resource_group_label} not in ``first_dp``" - ) + raise ValueError(f"Resource group not {first_resource_group_label} not in ``first_dp``") try: second_dp = second_dp.groups[second_resource_group_label] except KeyError: - raise ValueError( - f"Resource group not {second_resource_group_label} not in ``second_dp``" - ) + raise ValueError(f"Resource group not {second_resource_group_label} not in ``second_dp``") DIMENSION_ERROR = """Dimension mismatch. All array lengths must be the same, but got:\n\tFirst DP: {}\n\tSecond DP: {}\n\t Mask array: {}""" if not (len(first_dp.data[0]) == len(first_dp.data[0]) == len(mask_array)): raise LengthMismatch( - DIMENSION_ERROR.format( - len(first_dp.data[0]), len(first_dp.data[0]), len(mask_array) - ) + DIMENSION_ERROR.format(len(first_dp.data[0]), len(first_dp.data[0]), len(mask_array)) ) if output_fs is None: - output_fs = MemoryFS() + output_fs = DictFS() if any(resource["profile"] == "interface" for resource in first_dp.resources): raise ValueError("Unsupported interface found in ``first_dp``") diff --git a/pyproject.toml b/pyproject.toml index c1247af..a2ce894 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,8 @@ requires-python = ">=3.9" dependencies = [ # dependencies as strings with quotes, e.g. "foo" # You can add version requirements - "fs", + "fsspec", + "morefs", "numpy", "pandas", @@ -64,7 +65,7 @@ dev = [ [tool.setuptools] license-files = ["LICENSE"] include-package-data = true -packages = ["bw_processing"] +packages = ["bw_processing", "bw_processing.examples"] [tool.setuptools.dynamic] version = {attr = "bw_processing.__version__"} diff --git a/tests/calculation_package.py b/tests/calculation_package.py deleted file mode 100644 index a35f119..0000000 --- a/tests/calculation_package.py +++ /dev/null @@ -1,148 +0,0 @@ -# from bw_processing import ( -# as_unique_attributes, -# chunked, -# COMMON_DTYPE, -# create_package, -# create_datapackage_metadata, -# create_structured_array, -# create_processed_datapackage, -# format_calculation_resource, -# greedy_set_cover, -# NAME_RE, -# ) -# from copy import deepcopy -# import pytest -# import tempfile - - -# def test_format_calculation_resource(): -# given = { -# "path": "basic_array", -# "name": "test-name", -# "matrix": "technosphere", -# "description": "some words", -# "foo": "bar", -# } -# expected = { -# "format": "npy", -# "mediatype": "application/octet-stream", -# "path": "basic_array.npy", -# "name": "test-name", -# "profile": "data-resource", -# "matrix": "technosphere", -# "description": "some words", -# "foo": "bar", -# } -# assert format_calculation_resource(given) == expected - - -# def test_calculation_package(): -# resources = [ -# { -# "name": "first-resource", -# "path": "some-array.npy", -# "matrix": "technosphere", -# "data": [ -# tuple(list(range(11)) + [False, False]), -# tuple(list(range(12, 23)) + [True, True]), -# ], -# } -# ] -# with tempfile.TemporaryDirectory() as td: -# fp = create_package( -# name="test-package", resources=resources, path=td, replace=False -# ) -# # Test data in fp - - -# def test_calculation_package_directory(): -# resources = [ -# { -# "name": "first-resource", -# "path": "some-array.npy", -# "matrix": "technosphere", -# "data": [ -# tuple(list(range(11)) + [False, False]), -# tuple(list(range(12, 23)) + [True, True]), -# ], -# } -# ] -# with tempfile.TemporaryDirectory() as td: -# fp = create_package( -# name="test-package", resources=resources, path=td, compress=False -# ) -# # Test data in fp - - -# def test_calculation_package_in_memory(): -# resources = [ -# { -# "name": "first-resource", -# "path": "some-array.npy", -# "matrix": "technosphere", -# "data": [ -# tuple(list(range(11)) + [False, False]), -# tuple(list(range(12, 23)) + [True, True]), -# ], -# } -# ] -# fp = create_package(name="test-package", resources=resources) -# # Test data in fp - - -# def test_calculation_package_replace(): -# resources = [ -# { -# "name": "first-resource", -# "path": "some-array.npy", -# "matrix": "technosphere", -# "data": [ -# tuple(list(range(11)) + [False, False]), -# tuple(list(range(12, 23)) + [True, True]), -# ], -# } -# ] -# with tempfile.TemporaryDirectory() as td: -# create_package( -# name="test-package", resources=deepcopy(resources), path=td -# ) -# create_package( -# name="test-package", resources=deepcopy(resources), path=td, replace=True -# ) - - -# def test_calculation_package_replace_error(): -# resources = [ -# { -# "name": "first-resource", -# "path": "some-array.npy", -# "matrix": "technosphere", -# "data": [ -# tuple(list(range(11)) + [False, False]), -# tuple(list(range(12, 23)) + [True, True]), -# ], -# } -# ] -# with tempfile.TemporaryDirectory() as td: -# create_package( -# name="test-package", resources=deepcopy(resources), path=td -# ) -# with pytest.raises(ValueError): -# create_package( -# name="test-package", -# resources=deepcopy(resources), -# path=td, -# replace=False, -# ) - - -# def test_calculation_package_name_conflict(): -# pass - - -# def test_calculation_package_specify_id(): -# pass - - -# def test_calculation_package_metadata(): -# pass diff --git a/tests/conftest.py b/tests/conftest.py index 1d4b6e5..c27004f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,17 +2,11 @@ from pathlib import Path import pytest -from fixtures.basic_arrays import ( # noqa: F401 - distributions_vector, - flip_vector, - indices_vector, -) +from fixtures.basic_arrays import distributions_vector, flip_vector, indices_vector # noqa: F401 def pytest_addoption(parser): - parser.addoption( - "--runslow", action="store_true", default=False, help="run slow tests" - ) + parser.addoption("--runslow", action="store_true", default=False, help="run slow tests") def pytest_configure(config): diff --git a/tests/datapackage.py b/tests/datapackage.py index 68e60e7..89deace 100644 --- a/tests/datapackage.py +++ b/tests/datapackage.py @@ -4,18 +4,13 @@ import numpy as np import pandas as pd import pytest -from fs.memoryfs import MemoryFS -from fs.osfs import OSFS -from fs.zipfs import ZipFS +from fsspec.implementations.zip import ZipFileSystem +from morefs.dict import DictFS from bw_processing import create_datapackage, load_datapackage, simple_graph from bw_processing.constants import INDICES_DTYPE, UNCERTAINTY_DTYPE -from bw_processing.errors import ( - NonUnique, - PotentialInconsistency, - ShapeMismatch, - WrongDatatype, -) +from bw_processing.errors import NonUnique, PotentialInconsistency, ShapeMismatch, WrongDatatype +from bw_processing.io_helpers import generic_directory_filesystem dirpath = Path(__file__).parent.resolve() / "fixtures" @@ -84,7 +79,7 @@ def copy_fixture(fixture_name, dest): def test_group_ordering_consistent(): - dp = load_datapackage(ZipFS(dirpath / "test-fixture.zip")) + dp = load_datapackage(ZipFileSystem(dirpath / "test-fixture.zip")) assert list(dp.groups) == [ "sa-data-vector-from-dict", "sa-data-vector", @@ -111,7 +106,7 @@ def test_add_resource_with_same_name(): def test_save_modifications(tmp_path): copy_fixture("tfd", tmp_path) - dp = load_datapackage(OSFS(str(tmp_path))) + dp = load_datapackage(generic_directory_filesystem(dirpath=tmp_path)) assert dp.resources[1]["name"] == "sa-data-vector-from-dict.data" assert np.allclose(dp.data[1], [3.3, 8.3]) @@ -123,13 +118,13 @@ def test_save_modifications(tmp_path): assert np.allclose(dp.data[1], 42) assert not dp._modified - dp = load_datapackage(OSFS(str(tmp_path))) + dp = load_datapackage(generic_directory_filesystem(dirpath=tmp_path)) assert np.allclose(dp.data[1], 42) def test_del_resource_filesystem(tmp_path): copy_fixture("tfd", tmp_path) - dp = load_datapackage(OSFS(str(tmp_path))) + dp = load_datapackage(generic_directory_filesystem(dirpath=tmp_path)) reference_length = len(dp) assert "sa-vector-interface.indices.npy" in [o.name for o in tmp_path.iterdir()] dp.del_resource("sa-vector-interface.indices") @@ -143,7 +138,7 @@ def test_del_resource_filesystem(tmp_path): def test_del_resource_in_memory(): dp = create_datapackage() add_data(dp) - assert isinstance(dp.fs, MemoryFS) + assert isinstance(dp.fs, DictFS) reference_length = len(dp) assert "sa-vector-interface.indices" in [o["name"] for o in dp.resources] @@ -157,7 +152,7 @@ def test_del_resource_in_memory(): def test_del_resource_error_modifications(tmp_path): copy_fixture("tfd", tmp_path) - dp = load_datapackage(OSFS(str(tmp_path))) + dp = load_datapackage(generic_directory_filesystem(dirpath=tmp_path)) dp._modified = [1] with pytest.raises(PotentialInconsistency): dp.del_resource(1) @@ -165,7 +160,7 @@ def test_del_resource_error_modifications(tmp_path): def test_del_resource_group_filesystem(tmp_path): copy_fixture("tfd", tmp_path) - dp = load_datapackage(OSFS(str(tmp_path))) + dp = load_datapackage(generic_directory_filesystem(dirpath=tmp_path)) reference_length = len(dp) assert "sa-data-vector.indices.npy" in [o.name for o in tmp_path.iterdir()] @@ -180,7 +175,7 @@ def test_del_resource_group_filesystem(tmp_path): def test_del_resource_group_in_memory(): dp = create_datapackage() add_data(dp) - assert isinstance(dp.fs, MemoryFS) + assert isinstance(dp.fs, DictFS) reference_length = len(dp) assert "sa-data-vector.indices" in [o["name"] for o in dp.resources] @@ -194,7 +189,7 @@ def test_del_resource_group_in_memory(): def test_del_resource_group_error_modifications(tmp_path): copy_fixture("tfd", tmp_path) - dp = load_datapackage(OSFS(str(tmp_path))) + dp = load_datapackage(generic_directory_filesystem(dirpath=tmp_path)) dp._modified = [1] with pytest.raises(PotentialInconsistency): dp.del_resource_group("sa-vector-interface") @@ -203,7 +198,7 @@ def test_del_resource_group_error_modifications(tmp_path): def test_exclude_basic(): dp = create_datapackage() add_data(dp) - assert isinstance(dp.fs, MemoryFS) + assert isinstance(dp.fs, DictFS) reference_length = len(dp) assert "sa-data-vector.indices" in [o["name"] for o in dp.resources] @@ -220,7 +215,7 @@ def test_exclude_basic(): def test_exclude_no_match(): dp = create_datapackage() add_data(dp) - assert isinstance(dp.fs, MemoryFS) + assert isinstance(dp.fs, DictFS) reference_length = len(dp) assert "sa-data-vector.indices" in [o["name"] for o in dp.resources] @@ -234,7 +229,7 @@ def test_exclude_no_match(): def test_exclude_multiple_filters(): dp = create_datapackage() add_data(dp) - assert isinstance(dp.fs, MemoryFS) + assert isinstance(dp.fs, DictFS) reference_length = len(dp) assert "sa-array-interface.indices" in [o["name"] for o in dp.resources] @@ -251,7 +246,7 @@ def test_exclude_multiple_filters(): def test_exclude_multiple_matrix(): dp = create_datapackage() add_data(dp) - assert isinstance(dp.fs, MemoryFS) + assert isinstance(dp.fs, DictFS) assert "sa-data-vector.indices" in [o["name"] for o in dp.resources] ndp = dp.exclude({"matrix": "sa_matrix"}) diff --git a/tests/filesystem.py b/tests/filesystem.py index 84f0f31..ef282bc 100644 --- a/tests/filesystem.py +++ b/tests/filesystem.py @@ -24,9 +24,7 @@ def test_safe_filename(): safe_filename("Wave your hand yeah 🙋!", full=True) == "Wave-your-hand-yeah.f7952a3d4b0534cdac0e0cbbf66aac73" ) - assert ( - safe_filename("Wave your hand yeah 🙋!", add_hash=False) == "Wave-your-hand-yeah" - ) + assert safe_filename("Wave your hand yeah 🙋!", add_hash=False) == "Wave-your-hand-yeah" def test_clean_datapackage_name(): diff --git a/tests/filtered_datapackage.py b/tests/filtered_datapackage.py index adc584e..0970552 100644 --- a/tests/filtered_datapackage.py +++ b/tests/filtered_datapackage.py @@ -2,21 +2,16 @@ import numpy as np import pytest -from fs.osfs import OSFS -from fs.zipfs import ZipFS +from fsspec.implementations.zip import ZipFileSystem -from bw_processing import ( - INDICES_DTYPE, - UNCERTAINTY_DTYPE, - create_datapackage, - load_datapackage, -) +from bw_processing import INDICES_DTYPE, UNCERTAINTY_DTYPE, create_datapackage, load_datapackage +from bw_processing.io_helpers import generic_directory_filesystem dirpath = Path(__file__).parent.resolve() / "fixtures" def test_metadata_is_the_same_object(): - dp = load_datapackage(fs_or_obj=ZipFS(str(dirpath / "test-fixture.zip"))) + dp = load_datapackage(fs_or_obj=ZipFileSystem(dirpath / "test-fixture.zip")) fdp = dp.filter_by_attribute("matrix", "sa_matrix") for k, v in fdp.metadata.items(): @@ -28,21 +23,21 @@ def test_metadata_is_the_same_object(): def test_fs_is_the_same_object(): - dp = load_datapackage(fs_or_obj=ZipFS(str(dirpath / "test-fixture.zip"))) + dp = load_datapackage(fs_or_obj=ZipFileSystem(dirpath / "test-fixture.zip")) fdp = dp.filter_by_attribute("matrix", "sa_matrix") assert fdp.fs is dp.fs def test_indexer_is_the_same_object(): - dp = load_datapackage(fs_or_obj=ZipFS(str(dirpath / "test-fixture.zip"))) + dp = load_datapackage(fs_or_obj=ZipFileSystem(dirpath / "test-fixture.zip")) dp.indexer = lambda x: False fdp = dp.filter_by_attribute("matrix", "sa_matrix") assert fdp.indexer is dp.indexer def test_data_is_the_same_object_when_not_proxy(): - dp = load_datapackage(fs_or_obj=ZipFS(str(dirpath / "test-fixture.zip"))) + dp = load_datapackage(fs_or_obj=ZipFileSystem(dirpath / "test-fixture.zip")) fdp = dp.filter_by_attribute("matrix", "sa_matrix") arr1, _ = dp.get_resource("sa-data-array.data") @@ -54,9 +49,7 @@ def test_data_is_the_same_object_when_not_proxy(): def test_data_is_readable_multiple_times_when_proxy_zipfs(): - dp = load_datapackage( - fs_or_obj=ZipFS(str(dirpath / "test-fixture.zip")), proxy=True - ) + dp = load_datapackage(fs_or_obj=ZipFileSystem(dirpath / "test-fixture.zip"), proxy=True) fdp = dp.filter_by_attribute("matrix", "sa_matrix") arr1, _ = dp.get_resource("sa-data-array.data") @@ -69,7 +62,9 @@ def test_data_is_readable_multiple_times_when_proxy_zipfs(): def test_data_is_readable_multiple_times_when_proxy_directory(): - dp = load_datapackage(fs_or_obj=OSFS(str(dirpath / "tfd")), proxy=True) + dp = load_datapackage( + fs_or_obj=generic_directory_filesystem(dirpath=dirpath / "tfd"), proxy=True + ) fdp = dp.filter_by_attribute("matrix", "sa_matrix") arr1, _ = dp.get_resource("sa-data-array.data") @@ -82,9 +77,7 @@ def test_data_is_readable_multiple_times_when_proxy_directory(): def test_fdp_can_load_proxy_first(): - dp = load_datapackage( - fs_or_obj=ZipFS(str(dirpath / "test-fixture.zip")), proxy=True - ) + dp = load_datapackage(fs_or_obj=ZipFileSystem(dirpath / "test-fixture.zip"), proxy=True) fdp = dp.filter_by_attribute("matrix", "sa_matrix") arr2, _ = fdp.get_resource("sa-data-array.data") arr1, _ = dp.get_resource("sa-data-array.data") @@ -97,9 +90,7 @@ def test_fdp_can_load_proxy_first(): @pytest.fixture def erg(): - dp = create_datapackage( - fs=None, name="frg-fixture", id_="something something danger zone" - ) + dp = create_datapackage(fs=None, name="frg-fixture", id_="something something danger zone") data_array = np.arange(3) indices_array = np.array([(0, 1), (2, 3), (4, 5)], dtype=INDICES_DTYPE) diff --git a/tests/fixtures/test-fixture.zip b/tests/fixtures/test-fixture.zip index 30e5651..7d4c1aa 100644 Binary files a/tests/fixtures/test-fixture.zip and b/tests/fixtures/test-fixture.zip differ diff --git a/tests/fixtures/tfd/datapackage.json b/tests/fixtures/tfd/datapackage.json index 660ff54..9c04974 100644 --- a/tests/fixtures/tfd/datapackage.json +++ b/tests/fixtures/tfd/datapackage.json @@ -203,10 +203,11 @@ "valid_for": "sa-data-array" } ], - "created": "2021-10-29T21:46:22.583269Z", + "created": "2024-05-22T12:20:16.587994+00:00Z", "combinatorial": false, "sequential": false, "seed": null, "sum_intra_duplicates": true, - "sum_inter_duplicates": false -} + "sum_inter_duplicates": false, + "matrix_serialize_format_type": "numpy" +} \ No newline at end of file diff --git a/tests/fixtures/tfd/sa-data-array-json-metadata.json b/tests/fixtures/tfd/sa-data-array-json-metadata.json index 836c4a6..51a78ad 100644 --- a/tests/fixtures/tfd/sa-data-array-json-metadata.json +++ b/tests/fixtures/tfd/sa-data-array-json-metadata.json @@ -4,4 +4,4 @@ }, 1, true -] +] \ No newline at end of file diff --git a/tests/fixtures/tfd/sa-data-array-json-parameters.json b/tests/fixtures/tfd/sa-data-array-json-parameters.json index e8e7bfe..a8d36fc 100644 --- a/tests/fixtures/tfd/sa-data-array-json-parameters.json +++ b/tests/fixtures/tfd/sa-data-array-json-parameters.json @@ -1,4 +1,4 @@ [ "a", "foo" -] +] \ No newline at end of file diff --git a/tests/fixtures/tfd/sa-data-vector-csv-metadata.csv b/tests/fixtures/tfd/sa-data-vector-csv-metadata.csv index a4796ad..9fad506 100644 --- a/tests/fixtures/tfd/sa-data-vector-csv-metadata.csv +++ b/tests/fixtures/tfd/sa-data-vector-csv-metadata.csv @@ -1,4 +1,4 @@ -id,a,c,d -1,1,3,11 -2,2,4,11 -3,1,4,11 +a,c,d +1,3,11 +2,4,11 +1,4,11 diff --git a/tests/indexing.py b/tests/indexing.py index 57995dd..37c0197 100644 --- a/tests/indexing.py +++ b/tests/indexing.py @@ -3,12 +3,12 @@ import numpy as np import pandas as pd import pytest -from fs.osfs import OSFS from bw_processing import create_datapackage, load_datapackage from bw_processing.constants import INDICES_DTYPE from bw_processing.errors import NonUnique from bw_processing.indexing import reindex, reset_index +from bw_processing.io_helpers import generic_directory_filesystem ### Fixture @@ -44,20 +44,14 @@ def add_data(dp, id_field="id"): name="array", flip_array=flip_array, ) - dp.add_csv_metadata( - dataframe=df, valid_for=[("vector", "row")], name="vector-csv-rows" - ) - dp.add_csv_metadata( - dataframe=df, valid_for=[("vector", "col")], name="vector-csv-cols" - ) + dp.add_csv_metadata(dataframe=df, valid_for=[("vector", "row")], name="vector-csv-rows") + dp.add_csv_metadata(dataframe=df, valid_for=[("vector", "col")], name="vector-csv-cols") dp.add_csv_metadata( dataframe=df, valid_for=[("vector", "row"), ("vector", "col")], name="vector-csv-both", ) - dp.add_csv_metadata( - dataframe=df, valid_for=[("array", "row")], name="array-csv-rows" - ) + dp.add_csv_metadata(dataframe=df, valid_for=[("array", "row")], name="array-csv-rows") dp.add_csv_metadata( dataframe=df, valid_for=[("array", "row"), ("array", "col")], @@ -73,7 +67,9 @@ def add_data(dp, id_field="id"): @pytest.fixture def fixture(): dp = load_datapackage( - OSFS(str(Path(__file__).parent.resolve() / "fixtures" / "indexing")) + generic_directory_filesystem( + dirpath=Path(__file__).parent.resolve() / "fixtures" / "indexing" + ) ) dp_unchanged(dp) return dp @@ -82,7 +78,9 @@ def fixture(): def dp_unchanged(dp=None): if dp is None: dp = load_datapackage( - OSFS(str(Path(__file__).parent.resolve() / "fixtures" / "indexing")) + generic_directory_filesystem( + dirpath=Path(__file__).parent.resolve() / "fixtures" / "indexing" + ) ) array, _ = dp.get_resource("vector.indices") @@ -135,7 +133,9 @@ def test_reset_index_modified(fixture): assert fixture._modified == set([fixture._get_index("vector.indices")]) fixture = load_datapackage( - OSFS(str(Path(__file__).parent.resolve() / "fixtures" / "indexing")) + generic_directory_filesystem( + dirpath=Path(__file__).parent.resolve() / "fixtures" / "indexing" + ) ) assert not fixture._modified @@ -376,7 +376,9 @@ def test_reindex_data_iterable_wrong_type(fixture): (dirpath / "indexing").mkdir(exist_ok=True) dp = create_datapackage( - fs=OSFS(str(dirpath / "indexing")), name="indexing-fixture", id_="fixture-i" + fs=generic_directory_filesystem(dirpath=dirpath / "indexing"), + name="indexing-fixture", + id_="fixture-i", ) add_data(dp) dp.finalize_serialization() diff --git a/tests/integration.py b/tests/integration.py index c9c048a..548388d 100644 --- a/tests/integration.py +++ b/tests/integration.py @@ -6,12 +6,12 @@ import numpy as np import pandas as pd import pytest -from fs import open_fs -from fs.memoryfs import MemoryFS -from fs.osfs import OSFS -from fs.zipfs import ZipFS +from fsspec.implementations.ftp import FTPFileSystem +from fsspec.implementations.zip import ZipFileSystem +from morefs.dict import DictFS from bw_processing import INDICES_DTYPE, create_datapackage, load_datapackage +from bw_processing.io_helpers import generic_directory_filesystem _windows = platform.system() == "Windows" @@ -339,7 +339,7 @@ def check_metadata(dp, as_tuples=True): def test_integration_test_in_memory(): dp = create_datapackage(fs=None, name="test-fixture", id_="fixture-42") - assert isinstance(dp.fs, MemoryFS) + assert isinstance(dp.fs, DictFS) add_data(dp) check_metadata(dp) @@ -348,7 +348,9 @@ def test_integration_test_in_memory(): def test_integration_test_directory(): dp = load_datapackage( - fs_or_obj=open_fs(str(Path(__file__).parent.resolve() / "fixtures" / "tfd")) + fs_or_obj=generic_directory_filesystem( + dirpath=Path(__file__).parent.resolve() / "fixtures" / "tfd" + ) ) check_metadata(dp, False) @@ -357,53 +359,54 @@ def test_integration_test_directory(): @pytest.mark.slow def test_integration_test_ftp(): - dp = load_datapackage(fs_or_obj=open_fs("ftp://brightway.dev/tfd/")) + dp = load_datapackage(fs_or_obj=FTPFileSystem(host="ftp://brightway.dev/tfd/")) check_metadata(dp, False) check_data(dp) -@pytest.mark.slow -def test_integration_test_s3(): - try: - import fs_s3fs - from botocore.exceptions import NoCredentialsError - except ImportError: - raise ImportError( - "https://github.com/PyFilesystem/s3fs must be installed for this test." - ) - try: - dp = load_datapackage(fs_or_obj=open_fs("s3://bwprocessing")) - check_metadata(dp, False) - check_data(dp) - except NoCredentialsError: - raise NoCredentialsError( - "Supply AWS credentials (https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html)" - ) +# TBD: https://s3fs.readthedocs.io/en/latest/ +# @pytest.mark.slow +# def test_integration_test_s3(): +# try: +# import fs_s3fs +# from botocore.exceptions import NoCredentialsError +# except ImportError: +# raise ImportError( +# "https://github.com/PyFilesystem/s3fs must be installed for this test." +# ) +# try: +# dp = load_datapackage(fs_or_obj=open_fs("s3://bwprocessing")) +# check_metadata(dp, False) +# check_data(dp) +# except NoCredentialsError: +# raise NoCredentialsError( +# "Supply AWS credentials (https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html)" +# ) @pytest.mark.skipif(_windows, reason="Permission errors on Windows CI") def test_integration_test_fs_temp_directory(): with tempfile.TemporaryDirectory() as td: - dp = create_datapackage(fs=OSFS(td), name="test-fixture", id_="fixture-42") + dp = create_datapackage( + fs=generic_directory_filesystem(dirpath=Path(td)), name="test-fixture", id_="fixture-42" + ) add_data(dp) dp.finalize_serialization() check_metadata(dp) check_data(dp) - loaded = load_datapackage(OSFS(td)) + loaded = load_datapackage(generic_directory_filesystem(dirpath=Path(td))) check_metadata(loaded, False) check_data(loaded) - loaded.fs.close() - @pytest.mark.skipif(_windows, reason="Permission errors on Windows CI") def test_integration_test_new_zipfile(): with tempfile.TemporaryDirectory() as td: dp = create_datapackage( - fs=ZipFS(str(Path(td) / "foo.zip"), write=True), + fs=ZipFileSystem(Path(td) / "foo.zip", mode="w"), name="test-fixture", id_="fixture-42", ) @@ -413,7 +416,7 @@ def test_integration_test_new_zipfile(): check_metadata(dp) check_data(dp) - loaded = load_datapackage(ZipFS(str(Path(td) / "foo.zip"), write=False)) + loaded = load_datapackage(ZipFileSystem(Path(td) / "foo.zip", mode="r")) check_metadata(loaded, False) check_data(loaded) @@ -421,9 +424,9 @@ def test_integration_test_new_zipfile(): def test_integration_test_fixture_zipfile(): loaded = load_datapackage( - ZipFS( - str(Path(__file__).parent.resolve() / "fixtures" / "test-fixture.zip"), - write=False, + ZipFileSystem( + Path(__file__).parent.resolve() / "fixtures" / "test-fixture.zip", + mode="r", ) ) @@ -439,13 +442,15 @@ def test_integration_test_fixture_zipfile(): (dirpath / "tfd").mkdir(exist_ok=True) dp = create_datapackage( - fs=OSFS(str(dirpath / "tfd")), name="test-fixture", id_="fixture-42" + fs=generic_directory_filesystem(dirpath=dirpath / "tfd"), + name="test-fixture", + id_="fixture-42", ) add_data(dp) dp.finalize_serialization() dp = create_datapackage( - fs=ZipFS(str(dirpath / "test-fixture.zip"), write=True), + fs=ZipFileSystem(dirpath / "test-fixture.zip", mode="w"), name="test-fixture", id_="fixture-42", ) diff --git a/tests/interfaces.py b/tests/interfaces.py index e0e53fb..53fce33 100644 --- a/tests/interfaces.py +++ b/tests/interfaces.py @@ -1,7 +1,7 @@ from pathlib import Path import pytest -from fs.zipfs import ZipFS +from fsspec.implementations.zip import ZipFileSystem from bw_processing import load_datapackage @@ -25,7 +25,7 @@ def __getitem__(self, args): def test_list_dehydrated_interfaces(): - dp = load_datapackage(ZipFS(dirpath / "test-fixture.zip")) + dp = load_datapackage(ZipFileSystem(dirpath / "test-fixture.zip")) assert dp.dehydrated_interfaces() == ["sa-vector-interface", "sa-array-interface"] dp.rehydrate_interface("sa-vector-interface.data", Vector()) @@ -33,7 +33,7 @@ def test_list_dehydrated_interfaces(): def test_rehydrate_vector_interface(): - dp = load_datapackage(ZipFS(dirpath / "test-fixture.zip")) + dp = load_datapackage(ZipFileSystem(dirpath / "test-fixture.zip")) dp.rehydrate_interface("sa-vector-interface.data", Vector()) data, resource = dp.get_resource("sa-vector-interface.data") assert next(data) == 1 @@ -51,14 +51,14 @@ def test_rehydrate_vector_interface(): def test_rehydrate_vector_interface_fix_name(): - dp = load_datapackage(ZipFS(dirpath / "test-fixture.zip")) + dp = load_datapackage(ZipFileSystem(dirpath / "test-fixture.zip")) dp.rehydrate_interface("sa-vector-interface", Vector()) data, resource = dp.get_resource("sa-vector-interface.data") assert next(data) == 1 def test_rehydrate_vector_interface_config(): - dp = load_datapackage(ZipFS(dirpath / "test-fixture.zip")) + dp = load_datapackage(ZipFileSystem(dirpath / "test-fixture.zip")) data, resource = dp.get_resource("sa-vector-interface.data") resource["config"] = {"foo": "bar"} @@ -69,7 +69,7 @@ def test_rehydrate_vector_interface_config(): def test_rehydrate_vector_interface_config_keyerror(): - dp = load_datapackage(ZipFS(dirpath / "test-fixture.zip")) + dp = load_datapackage(ZipFileSystem(dirpath / "test-fixture.zip")) data, resource = dp.get_resource("sa-vector-interface.data") with pytest.raises(KeyError): @@ -77,7 +77,7 @@ def test_rehydrate_vector_interface_config_keyerror(): def test_rehydrate_array_interface(): - dp = load_datapackage(ZipFS(dirpath / "test-fixture.zip")) + dp = load_datapackage(ZipFileSystem(dirpath / "test-fixture.zip")) dp.rehydrate_interface("sa-array-interface.data", Array()) data, resource = dp.get_resource("sa-array-interface.data") assert data[7] == 7 @@ -95,7 +95,7 @@ def test_rehydrate_array_interface(): def test_rehydrate_array_interface_config(): - dp = load_datapackage(ZipFS(dirpath / "test-fixture.zip")) + dp = load_datapackage(ZipFileSystem(dirpath / "test-fixture.zip")) data, resource = dp.get_resource("sa-array-interface.data") resource["config"] = {"foo": "bar"} @@ -106,7 +106,7 @@ def test_rehydrate_array_interface_config(): def test_rehydrate_array_interface_config_keyerror(): - dp = load_datapackage(ZipFS(dirpath / "test-fixture.zip")) + dp = load_datapackage(ZipFileSystem(dirpath / "test-fixture.zip")) data, resource = dp.get_resource("sa-array-interface.data") with pytest.raises(KeyError): diff --git a/tests/io_parquet_helpers.py b/tests/io_parquet_helpers.py index 5a712b2..c4e5a19 100644 --- a/tests/io_parquet_helpers.py +++ b/tests/io_parquet_helpers.py @@ -14,10 +14,7 @@ ) from bw_processing.errors import WrongDatatype -from bw_processing.io_parquet_helpers import ( - load_ndarray_from_parquet, - save_arr_to_parquet, -) +from bw_processing.io_parquet_helpers import load_ndarray_from_parquet, save_arr_to_parquet ARR_LIST = [ ("indices_vector", "vector", "indices"), @@ -34,13 +31,9 @@ def test_save_load_parquet_file( arr = request.getfixturevalue(arr_fixture_name) # get fixture from name file = tmp_path_factory.mktemp("data") / (arr_fixture_name + ".parquet") - with file as fp: - save_arr_to_parquet( - file=fp, arr=arr, meta_object=meta_object, meta_type=meta_type - ) + save_arr_to_parquet(file=file, arr=arr, meta_object=meta_object, meta_type=meta_type) - with file as fp: - loaded_arr = load_ndarray_from_parquet(fp) + loaded_arr = load_ndarray_from_parquet(file) assert arr.dtype == loaded_arr.dtype and np.array_equal(arr, loaded_arr) @@ -52,11 +45,9 @@ def test_save_load_parquet_file_data_vector(dtype, tmp_path_factory): arr = data_vector(dtype=dtype) file = tmp_path_factory.mktemp("data") / "data_vector.parquet" - with file as fp: - save_arr_to_parquet(file=fp, arr=arr, meta_object="vector", meta_type="generic") + save_arr_to_parquet(file=file, arr=arr, meta_object="vector", meta_type="generic") - with file as fp: - loaded_arr = load_ndarray_from_parquet(fp) + loaded_arr = load_ndarray_from_parquet(file) assert arr.dtype == loaded_arr.dtype and np.array_equal(arr, loaded_arr) @@ -78,17 +69,13 @@ def test_save_load_parquet_file_data_matrix(dtype, tmp_path_factory): @pytest.mark.skipif(sys.version_info[:2] == (3, 8), reason="Doesn't work in CI filesystem") -def test_save_load_parquet_file_distribution_vector( - distributions_vector, tmp_path_factory -): +def test_save_load_parquet_file_distribution_vector(distributions_vector, tmp_path_factory): arr = distributions_vector file = tmp_path_factory.mktemp("data") / "distributions_vector.parquet" with file as fp: - save_arr_to_parquet( - file=fp, arr=arr, meta_object="vector", meta_type="distributions" - ) + save_arr_to_parquet(file=fp, arr=arr, meta_object="vector", meta_type="distributions") with file as fp: loaded_arr = load_ndarray_from_parquet(fp) diff --git a/tests/io_pyarrow_helpers.py b/tests/io_pyarrow_helpers.py index cae9dc9..6dbaba1 100644 --- a/tests/io_pyarrow_helpers.py +++ b/tests/io_pyarrow_helpers.py @@ -49,15 +49,11 @@ def test_double_conversion_flip_vector(flip_vector): def test_double_conversion_distribution_vector(distributions_vector): - table = numpy_distributions_vector_to_pyarrow_distributions_vector_table( - distributions_vector - ) + table = numpy_distributions_vector_to_pyarrow_distributions_vector_table(distributions_vector) arr = pyarrow_distributions_vector_table_to_numpy_distributions_vector(table) assert arr.dtype == distributions_vector.dtype - assert vector_equal_with_uncertainty_dtype( - arr, distributions_vector, equal_nan=True - ) + assert vector_equal_with_uncertainty_dtype(arr, distributions_vector, equal_nan=True) @pytest.mark.parametrize("dtype", [np.int8, np.int32, np.float64]) diff --git a/tests/merging.py b/tests/merging.py index 70def0a..c5cbaa5 100644 --- a/tests/merging.py +++ b/tests/merging.py @@ -3,9 +3,8 @@ import numpy as np import pytest -from fs.memoryfs import MemoryFS -from fs.osfs import OSFS -from fs.zipfs import ZipFS +from fsspec.implementations.zip import ZipFileSystem +from morefs.dict import DictFS from bw_processing import ( INDICES_DTYPE, @@ -15,15 +14,14 @@ merge_datapackages_with_mask, ) from bw_processing.errors import LengthMismatch +from bw_processing.io_helpers import generic_directory_filesystem fixture_dir = Path(__file__).parent.resolve() / "fixtures" def test_basic_merging_functionality(): - first = load_datapackage(ZipFS(str(fixture_dir / "merging" / "merging_first.zip"))) - second = load_datapackage( - ZipFS(str(fixture_dir / "merging" / "merging_second.zip")) - ) + first = load_datapackage(ZipFileSystem(fixture_dir / "merging" / "merging_first.zip")) + second = load_datapackage(ZipFileSystem(fixture_dir / "merging" / "merging_second.zip")) result = merge_datapackages_with_mask( first_dp=first, first_resource_group_label="sa-data-vector", @@ -32,7 +30,7 @@ def test_basic_merging_functionality(): mask_array=np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=bool), ) assert isinstance(result, DatapackageBase) - assert isinstance(result.fs, MemoryFS) + assert isinstance(result.fs, DictFS) assert len(result.resources) == 5 d, r = result.get_resource("sa-data-vector.data") @@ -56,12 +54,10 @@ def test_basic_merging_functionality(): def test_write_new_datapackage(): - first = load_datapackage(ZipFS(str(fixture_dir / "merging" / "merging_first.zip"))) - second = load_datapackage( - ZipFS(str(fixture_dir / "merging" / "merging_second.zip")) - ) + first = load_datapackage(ZipFileSystem(fixture_dir / "merging" / "merging_first.zip")) + second = load_datapackage(ZipFileSystem(fixture_dir / "merging" / "merging_second.zip")) with tempfile.TemporaryDirectory() as td: - temp_fs = OSFS(td) + temp_fs = generic_directory_filesystem(dirpath=Path(td)) result = merge_datapackages_with_mask( first_dp=first, first_resource_group_label="sa-data-vector", @@ -70,10 +66,10 @@ def test_write_new_datapackage(): mask_array=np.array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0], dtype=bool), output_fs=temp_fs, ) - result = load_datapackage(OSFS(td)) + result = load_datapackage(generic_directory_filesystem(dirpath=Path(td))) assert isinstance(result, DatapackageBase) - assert not isinstance(result.fs, MemoryFS) + assert not isinstance(result.fs, DictFS) assert len(result.resources) == 5 for suffix in {"indices", "data", "distributions", "flip"}: @@ -106,10 +102,8 @@ def test_write_new_datapackage(): def test_add_suffix(): - first = load_datapackage(ZipFS(str(fixture_dir / "merging" / "merging_same_1.zip"))) - second = load_datapackage( - ZipFS(str(fixture_dir / "merging" / "merging_same_2.zip")) - ) + first = load_datapackage(ZipFileSystem(str(fixture_dir / "merging" / "merging_same_1.zip"))) + second = load_datapackage(ZipFileSystem(str(fixture_dir / "merging" / "merging_same_2.zip"))) with pytest.warns(UserWarning): result = merge_datapackages_with_mask( first_dp=first, @@ -152,10 +146,8 @@ def test_add_suffix(): def test_wrong_resource_group_name(): - first = load_datapackage(ZipFS(str(fixture_dir / "merging" / "merging_first.zip"))) - second = load_datapackage( - ZipFS(str(fixture_dir / "merging" / "merging_second.zip")) - ) + first = load_datapackage(ZipFileSystem(fixture_dir / "merging" / "merging_first.zip")) + second = load_datapackage(ZipFileSystem(fixture_dir / "merging" / "merging_second.zip")) with pytest.raises(ValueError): merge_datapackages_with_mask( first_dp=first, @@ -189,9 +181,7 @@ def test_shape_mismatch_data(): dp2 = create_datapackage() data_array = np.arange(5) - indices_array = np.array( - [(x, y) for x, y in zip(range(5), range(10, 15))], dtype=INDICES_DTYPE - ) + indices_array = np.array([(x, y) for x, y in zip(range(5), range(10, 15))], dtype=INDICES_DTYPE) dp2.add_persistent_vector( matrix="sa_matrix", data_array=data_array, @@ -209,10 +199,8 @@ def test_shape_mismatch_data(): def test_shape_mismatch_mask(): - first = load_datapackage(ZipFS(str(fixture_dir / "merging" / "merging_first.zip"))) - second = load_datapackage( - ZipFS(str(fixture_dir / "merging" / "merging_second.zip")) - ) + first = load_datapackage(ZipFileSystem(fixture_dir / "merging" / "merging_first.zip")) + second = load_datapackage(ZipFileSystem(fixture_dir / "merging" / "merging_second.zip")) with pytest.raises(LengthMismatch): merge_datapackages_with_mask( first_dp=first, @@ -224,10 +212,8 @@ def test_shape_mismatch_mask(): def test_new_metadata(): - first = load_datapackage(ZipFS(str(fixture_dir / "merging" / "merging_first.zip"))) - second = load_datapackage( - ZipFS(str(fixture_dir / "merging" / "merging_second.zip")) - ) + first = load_datapackage(ZipFileSystem(fixture_dir / "merging" / "merging_first.zip")) + second = load_datapackage(ZipFileSystem(fixture_dir / "merging" / "merging_second.zip")) result = merge_datapackages_with_mask( first_dp=first, first_resource_group_label="sa-data-vector", @@ -253,10 +239,8 @@ def test_new_metadata(): def test_default_metadata(): - first = load_datapackage(ZipFS(str(fixture_dir / "merging" / "merging_first.zip"))) - second = load_datapackage( - ZipFS(str(fixture_dir / "merging" / "merging_second.zip")) - ) + first = load_datapackage(ZipFileSystem(fixture_dir / "merging" / "merging_first.zip")) + second = load_datapackage(ZipFileSystem(fixture_dir / "merging" / "merging_second.zip")) result = merge_datapackages_with_mask( first_dp=first, first_resource_group_label="sa-data-vector", @@ -313,7 +297,7 @@ def create_fixtures(): (fixture_dir / "merging").mkdir(exist_ok=True) dp = create_datapackage( - fs=ZipFS(str(fixture_dir / "merging" / "merging_first.zip"), write=True), + fs=ZipFileSystem(str(fixture_dir / "merging" / "merging_first.zip"), mode="w"), name="merging-fixture", id_="fixture-42", ) @@ -332,7 +316,7 @@ def create_fixtures(): dp.finalize_serialization() dp = create_datapackage( - fs=ZipFS(str(fixture_dir / "merging" / "merging_second.zip"), write=True), + fs=ZipFileSystem(str(fixture_dir / "merging" / "merging_second.zip"), mode="w"), name="merging-fixture", id_="fixture-42", ) @@ -347,7 +331,7 @@ def create_fixtures(): dp.finalize_serialization() dp = create_datapackage( - fs=ZipFS(str(fixture_dir / "merging" / "merging_same_1.zip"), write=True), + fs=ZipFileSystem(str(fixture_dir / "merging" / "merging_same_1.zip"), mode="w"), name="merging-fixture", id_="fixture-42", ) @@ -366,7 +350,7 @@ def create_fixtures(): dp.finalize_serialization() dp = create_datapackage( - fs=ZipFS(str(fixture_dir / "merging" / "merging_same_2.zip"), write=True), + fs=ZipFileSystem(str(fixture_dir / "merging" / "merging_same_2.zip"), mode="w"), name="merging-fixture", id_="fixture-42", ) diff --git a/tests/test_utils.py b/tests/test_utils.py index 2695baf..99841a1 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -7,12 +7,7 @@ from bw_processing import __version__ from bw_processing.errors import InvalidName -from bw_processing.utils import ( - check_name, - check_suffix, - dictionary_formatter, - load_bytes, -) +from bw_processing.utils import check_name, check_suffix, dictionary_formatter, load_bytes def test_version():