From 65bbcfe86ad30902ed56f0d7d60174a66991d543 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 16 Nov 2022 12:45:44 +0000 Subject: [PATCH 01/14] introduce yaml config file handling --- activestorage/active.py | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/activestorage/active.py b/activestorage/active.py index 00d5d09d..669d4004 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -1,8 +1,12 @@ import os import numpy as np +import yaml + +import activestorage #FIXME: Consider using h5py throughout, for more generality from netCDF4 import Dataset +from pathlib import Path from zarr.indexing import ( OrthogonalIndexer, ) @@ -10,6 +14,22 @@ from activestorage import netcdf_to_zarr as nz +def _read_config_file(storage_type): + """Read config user file and store settings in a dictionary.""" + base_path = Path(activestorage.__file__).parent + if storage_type == "S3": + config_file = base_path / Path("config-s3-storage.yml") + elif storage_type == "Posix": + config_file = base_path / Path("config-Posix-storage.yml") + if not config_file.exists(): + raise IOError(f'Config file `{config_file}` does not exist.') + + with open(config_file, 'r') as file: + cfg = yaml.safe_load(file) + + return cfg + + class Active: """ Instantiates an interface to active storage which contains either zarr files @@ -34,7 +54,9 @@ def __new__(cls, *args, **kwargs): } return instance - def __init__(self, uri, ncvar, missing_value=None, fill_value=None, valid_min=None, valid_max=None): + def __init__(self, uri, ncvar, storage_type="Posix", + missing_value=None, fill_value=None, + valid_min=None, valid_max=None): """ Instantiate with a NetCDF4 dataset and the variable of interest within that file. (We need the variable, because we need variable specific metadata from within that @@ -52,7 +74,14 @@ def __init__(self, uri, ncvar, missing_value=None, fill_value=None, valid_min=No raise ValueError("Must set a netCDF variable name to slice") self.zds = None - self._version = 1 + # storage type + self.storage_type = storage_type + + # read config file + self._config = _read_config_file(self.storage_type) + + # read version, components + self._version = self._config.get("version", 1) self._components = False self._method = None From 0e8a416cc2ff4aa229bfeb26c9561dd9d3735a40 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 16 Nov 2022 12:46:26 +0000 Subject: [PATCH 02/14] s3 file --- activestorage/config-s3-storage.yml | 1 + 1 file changed, 1 insertion(+) create mode 100644 activestorage/config-s3-storage.yml diff --git a/activestorage/config-s3-storage.yml b/activestorage/config-s3-storage.yml new file mode 100644 index 00000000..b8255184 --- /dev/null +++ b/activestorage/config-s3-storage.yml @@ -0,0 +1 @@ +version: 1 From 9f178c7d4cad213b7ae97d0a59b98c48ad227daa Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 16 Nov 2022 12:46:35 +0000 Subject: [PATCH 03/14] posix file --- activestorage/config-Posix-storage.yml | 1 + 1 file changed, 1 insertion(+) create mode 100644 activestorage/config-Posix-storage.yml diff --git a/activestorage/config-Posix-storage.yml b/activestorage/config-Posix-storage.yml new file mode 100644 index 00000000..b8255184 --- /dev/null +++ b/activestorage/config-Posix-storage.yml @@ -0,0 +1 @@ +version: 1 From 8d84cd4cb4b4330d72720b31593db2874178497c Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 16 Nov 2022 13:34:55 +0000 Subject: [PATCH 04/14] fix implementation --- activestorage/active.py | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/activestorage/active.py b/activestorage/active.py index 669d4004..7710d62f 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -41,19 +41,6 @@ class Active: Version 2 will add methods for actual active storage. """ - def __new__(cls, *args, **kwargs): - """Store reduction methods.""" - instance = super().__new__(cls) - instance._methods = { - "min": np.min, - "max": np.max, - "sum": np.sum, - # For the unweighted mean we calulate the sum and divide - # by the number of non-missing elements - "mean": np.sum, - } - return instance - def __init__(self, uri, ncvar, storage_type="Posix", missing_value=None, fill_value=None, valid_min=None, valid_max=None): @@ -80,8 +67,11 @@ def __init__(self, uri, ncvar, storage_type="Posix", # read config file self._config = _read_config_file(self.storage_type) - # read version, components + # read methods version, components self._version = self._config.get("version", 1) + self._methods = self._config.get("methods", None) + if not self._methods: + raise ValueError(f"Configuration dict {self._config} needs a valid methods group.") self._components = False self._method = None @@ -177,13 +167,14 @@ def method(self): ========== ================================================== """ - return self._methods.get(self._method) + method = self._methods.get(self._method, None) + if method: + return eval(method) @method.setter def method(self, value): if value is not None and value not in self._methods: raise ValueError(f"Bad 'method': {value}. Choose from min/max/mean/sum.") - self._method = value @property From 3c3b7d0a642f47ff0933b303732e654c8f3b2e90 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 16 Nov 2022 13:35:06 +0000 Subject: [PATCH 05/14] fix config file --- activestorage/config-Posix-storage.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/activestorage/config-Posix-storage.yml b/activestorage/config-Posix-storage.yml index b8255184..ccfaa808 100644 --- a/activestorage/config-Posix-storage.yml +++ b/activestorage/config-Posix-storage.yml @@ -1 +1,6 @@ version: 1 +methods: + min: np.min + max: np.max + sum: np.sum + mean: np.sum From 5fc56b5492092dbd8220b160452039a167ffa5ea Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 16 Nov 2022 14:23:55 +0000 Subject: [PATCH 06/14] do not use eval; bad eval --- activestorage/active.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/activestorage/active.py b/activestorage/active.py index 7710d62f..c63816a7 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -30,6 +30,14 @@ def _read_config_file(storage_type): return cfg +def _extract_method(method): + """Extract functional method from string. Works like eval but more secure.""" + if method.split(".")[0] == "np" or method.split(".")[0] == "numpy": + return getattr(np, method.split(".")[1]) + else: + raise ValueError(f"Could not recognize {method} as permitted.") + + class Active: """ Instantiates an interface to active storage which contains either zarr files @@ -169,7 +177,7 @@ def method(self): """ method = self._methods.get(self._method, None) if method: - return eval(method) + return _extract_method(method) @method.setter def method(self, value): From 67aa2213534d7dbf782cb6922d8af4e1dd819621 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 16 Nov 2022 14:26:53 +0000 Subject: [PATCH 07/14] toss a check --- activestorage/active.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/activestorage/active.py b/activestorage/active.py index c63816a7..6391d9ae 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -33,7 +33,11 @@ def _read_config_file(storage_type): def _extract_method(method): """Extract functional method from string. Works like eval but more secure.""" if method.split(".")[0] == "np" or method.split(".")[0] == "numpy": - return getattr(np, method.split(".")[1]) + try: + func = getattr(np, method.split(".")[1]) + return func + except AttributeError: + raise AttributeError(f"Method {method} is not a valid Numpy method.") else: raise ValueError(f"Could not recognize {method} as permitted.") From f9674fbc7bca04352d61539f6f4667324edb213e Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 16 Nov 2022 14:48:32 +0000 Subject: [PATCH 08/14] unit test for conf --- tests/unit/test_active.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/unit/test_active.py b/tests/unit/test_active.py index 5de297e1..b61eb073 100644 --- a/tests/unit/test_active.py +++ b/tests/unit/test_active.py @@ -83,3 +83,23 @@ def test_active(): init = active.__init__(uri=uri, ncvar=ncvar, missing_value=True, fill_value=1e20, valid_min=-1, valid_max=1200) + + +def test_config_s3(): + uri = "tests/test_data/cesm2_native.nc" + ncvar = "TREFHT" + active = Active(uri, ncvar=ncvar, storage_type="S3") + assert active._methods == {'max': 'max', 'mean': 'mean', + 'min': 'min', 'sum': 'dimsum'} + assert active.method is None + assert active._version == 1 + + +def test_config_Posix(): + uri = "tests/test_data/cesm2_native.nc" + ncvar = "TREFHT" + active = Active(uri, ncvar=ncvar, storage_type="Posix") + assert active._methods == {'max': 'np.max', 'mean': 'np.sum', + 'min': 'np.min', 'sum': 'np.sum'} + assert active.method is None + assert active._version == 1 From 67bdff3dc9901efdce55b4d0b6a3a3569719d410 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 16 Nov 2022 14:48:53 +0000 Subject: [PATCH 09/14] add dummy funcs for s3 --- activestorage/config-s3-storage.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/activestorage/config-s3-storage.yml b/activestorage/config-s3-storage.yml index b8255184..3bb535b0 100644 --- a/activestorage/config-s3-storage.yml +++ b/activestorage/config-s3-storage.yml @@ -1 +1,6 @@ version: 1 +methods: + min: min + max: max + sum: dimsum + mean: mean From 66606319ddf4aed477541e1f4887ad90becc59cc Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 16 Nov 2022 14:59:08 +0000 Subject: [PATCH 10/14] change error message --- activestorage/active.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/activestorage/active.py b/activestorage/active.py index 6391d9ae..635690c5 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -39,7 +39,7 @@ def _extract_method(method): except AttributeError: raise AttributeError(f"Method {method} is not a valid Numpy method.") else: - raise ValueError(f"Could not recognize {method} as permitted.") + raise ValueError(f"Could not recognize method {method} as permitted.") class Active: From 0ff4fb8fa0e3cb3bf3989c47a256370ff21a3335 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 16 Nov 2022 15:18:15 +0000 Subject: [PATCH 11/14] add check on storage type --- activestorage/active.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/activestorage/active.py b/activestorage/active.py index 635690c5..6fc3e382 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -21,6 +21,8 @@ def _read_config_file(storage_type): config_file = base_path / Path("config-s3-storage.yml") elif storage_type == "Posix": config_file = base_path / Path("config-Posix-storage.yml") + else: + raise ValueError(f"Storage type {storage_type} not known.") if not config_file.exists(): raise IOError(f'Config file `{config_file}` does not exist.') From c22280fe2dc19f8d9b23f4ccd5c69a43ba544c7c Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 16 Nov 2022 15:18:32 +0000 Subject: [PATCH 12/14] expand test cases --- tests/unit/test_active.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/unit/test_active.py b/tests/unit/test_active.py index b61eb073..a47caee2 100644 --- a/tests/unit/test_active.py +++ b/tests/unit/test_active.py @@ -94,6 +94,19 @@ def test_config_s3(): assert active.method is None assert active._version == 1 + active._version = 2 + + # statistical method can not be executed + active.method = "mean" + with pytest.raises(ValueError) as exc: + active[:] + assert str(exc.value) == "Could not recognize method mean as permitted." + + # bad name for statistical method + with pytest.raises(ValueError) as exc: + active.method = "meany" + assert str(exc.value) == "Bad 'method': meany. Choose from min/max/mean/sum." + def test_config_Posix(): uri = "tests/test_data/cesm2_native.nc" @@ -103,3 +116,23 @@ def test_config_Posix(): 'min': 'np.min', 'sum': 'np.sum'} assert active.method is None assert active._version == 1 + + active._version = 2 + + # usual run + active.method = "mean" # will exec np.mean from config + assert active[:] == 284.22694905598956 + + # passing wrong numpy method + active._methods["mean"] = "np.meany" + with pytest.raises(AttributeError) as exc: + active[:] + assert str(exc.value) == "Method np.meany is not a valid Numpy method." + + +def test_config_invalid_storage_type(): + uri = "tests/test_data/cesm2_native.nc" + ncvar = "TREFHT" + with pytest.raises(ValueError) as exc: + Active(uri, ncvar=ncvar, storage_type="cowabunga") + assert str(exc.value) == "Storage type cowabunga not known." From 55fdefa64e64365ec87db892d26763baa5476fe5 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 23 Nov 2022 14:36:12 +0000 Subject: [PATCH 13/14] comment out unneeded error catchers --- activestorage/active.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/activestorage/active.py b/activestorage/active.py index 6fc3e382..b0db0912 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -23,8 +23,9 @@ def _read_config_file(storage_type): config_file = base_path / Path("config-Posix-storage.yml") else: raise ValueError(f"Storage type {storage_type} not known.") - if not config_file.exists(): - raise IOError(f'Config file `{config_file}` does not exist.') + # should not need this if conf file is at package-level + # if not config_file.exists(): + # raise IOError(f'Config file `{config_file}` does not exist.') with open(config_file, 'r') as file: cfg = yaml.safe_load(file) @@ -84,8 +85,9 @@ def __init__(self, uri, ncvar, storage_type="Posix", # read methods version, components self._version = self._config.get("version", 1) self._methods = self._config.get("methods", None) - if not self._methods: - raise ValueError(f"Configuration dict {self._config} needs a valid methods group.") + # should not need this if conf file is at package-level + # if not self._methods: + # raise ValueError(f"Configuration dict {self._config} needs a valid methods group.") self._components = False self._method = None From 8ae79991c322b74a7ced69e204c4e8c9dbce4846 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Wed, 23 Nov 2022 14:36:25 +0000 Subject: [PATCH 14/14] add to package level test --- tests/test_package.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/test_package.py b/tests/test_package.py index 3ee2f41c..51834a5c 100644 --- a/tests/test_package.py +++ b/tests/test_package.py @@ -1,6 +1,7 @@ import activestorage from activestorage import Active as act +from activestorage.active import _read_config_file as read_conf # test version @@ -29,3 +30,19 @@ def test_active_class_attrs(): assert hasattr(act, "components") assert hasattr(act, "method") assert hasattr(act, "ncvar") + + +# check validity of conf files +def test_read_config_file(): + """Test validity of package-level files.""" + posix_mandatory_keys = ["version", "methods"] + s3_mandatory_keys = ["version", "methods"] + posix_file = read_conf("Posix") + s3_file = read_conf("S3") + print(posix_file) + print(s3_file) + for mandatory_key in posix_mandatory_keys: + assert mandatory_key in posix_file + for mandatory_key in s3_mandatory_keys: + assert mandatory_key in s3_file +