diff --git a/activestorage/active.py b/activestorage/active.py index 00d5d09d..b0db0912 100644 --- a/activestorage/active.py +++ b/activestorage/active.py @@ -1,8 +1,12 @@ import os import numpy as np +import yaml + +import activestorage #FIXME: Consider using h5py throughout, for more generality from netCDF4 import Dataset +from pathlib import Path from zarr.indexing import ( OrthogonalIndexer, ) @@ -10,6 +14,37 @@ from activestorage import netcdf_to_zarr as nz +def _read_config_file(storage_type): + """Read config user file and store settings in a dictionary.""" + base_path = Path(activestorage.__file__).parent + if storage_type == "S3": + config_file = base_path / Path("config-s3-storage.yml") + elif storage_type == "Posix": + config_file = base_path / Path("config-Posix-storage.yml") + else: + raise ValueError(f"Storage type {storage_type} not known.") + # should not need this if conf file is at package-level + # if not config_file.exists(): + # raise IOError(f'Config file `{config_file}` does not exist.') + + with open(config_file, 'r') as file: + cfg = yaml.safe_load(file) + + return cfg + + +def _extract_method(method): + """Extract functional method from string. Works like eval but more secure.""" + if method.split(".")[0] == "np" or method.split(".")[0] == "numpy": + try: + func = getattr(np, method.split(".")[1]) + return func + except AttributeError: + raise AttributeError(f"Method {method} is not a valid Numpy method.") + else: + raise ValueError(f"Could not recognize method {method} as permitted.") + + class Active: """ Instantiates an interface to active storage which contains either zarr files @@ -21,20 +56,9 @@ class Active: Version 2 will add methods for actual active storage. """ - def __new__(cls, *args, **kwargs): - """Store reduction methods.""" - instance = super().__new__(cls) - instance._methods = { - "min": np.min, - "max": np.max, - "sum": np.sum, - # For the unweighted mean we calulate the sum and divide - # by the number of non-missing elements - "mean": np.sum, - } - return instance - - def __init__(self, uri, ncvar, missing_value=None, fill_value=None, valid_min=None, valid_max=None): + def __init__(self, uri, ncvar, storage_type="Posix", + missing_value=None, fill_value=None, + valid_min=None, valid_max=None): """ Instantiate with a NetCDF4 dataset and the variable of interest within that file. (We need the variable, because we need variable specific metadata from within that @@ -52,7 +76,18 @@ def __init__(self, uri, ncvar, missing_value=None, fill_value=None, valid_min=No raise ValueError("Must set a netCDF variable name to slice") self.zds = None - self._version = 1 + # storage type + self.storage_type = storage_type + + # read config file + self._config = _read_config_file(self.storage_type) + + # read methods version, components + self._version = self._config.get("version", 1) + self._methods = self._config.get("methods", None) + # should not need this if conf file is at package-level + # if not self._methods: + # raise ValueError(f"Configuration dict {self._config} needs a valid methods group.") self._components = False self._method = None @@ -148,13 +183,14 @@ def method(self): ========== ================================================== """ - return self._methods.get(self._method) + method = self._methods.get(self._method, None) + if method: + return _extract_method(method) @method.setter def method(self, value): if value is not None and value not in self._methods: raise ValueError(f"Bad 'method': {value}. Choose from min/max/mean/sum.") - self._method = value @property diff --git a/activestorage/config-Posix-storage.yml b/activestorage/config-Posix-storage.yml new file mode 100644 index 00000000..ccfaa808 --- /dev/null +++ b/activestorage/config-Posix-storage.yml @@ -0,0 +1,6 @@ +version: 1 +methods: + min: np.min + max: np.max + sum: np.sum + mean: np.sum diff --git a/activestorage/config-s3-storage.yml b/activestorage/config-s3-storage.yml new file mode 100644 index 00000000..3bb535b0 --- /dev/null +++ b/activestorage/config-s3-storage.yml @@ -0,0 +1,6 @@ +version: 1 +methods: + min: min + max: max + sum: dimsum + mean: mean diff --git a/tests/test_package.py b/tests/test_package.py index 3ee2f41c..51834a5c 100644 --- a/tests/test_package.py +++ b/tests/test_package.py @@ -1,6 +1,7 @@ import activestorage from activestorage import Active as act +from activestorage.active import _read_config_file as read_conf # test version @@ -29,3 +30,19 @@ def test_active_class_attrs(): assert hasattr(act, "components") assert hasattr(act, "method") assert hasattr(act, "ncvar") + + +# check validity of conf files +def test_read_config_file(): + """Test validity of package-level files.""" + posix_mandatory_keys = ["version", "methods"] + s3_mandatory_keys = ["version", "methods"] + posix_file = read_conf("Posix") + s3_file = read_conf("S3") + print(posix_file) + print(s3_file) + for mandatory_key in posix_mandatory_keys: + assert mandatory_key in posix_file + for mandatory_key in s3_mandatory_keys: + assert mandatory_key in s3_file + diff --git a/tests/unit/test_active.py b/tests/unit/test_active.py index 5de297e1..a47caee2 100644 --- a/tests/unit/test_active.py +++ b/tests/unit/test_active.py @@ -83,3 +83,56 @@ def test_active(): init = active.__init__(uri=uri, ncvar=ncvar, missing_value=True, fill_value=1e20, valid_min=-1, valid_max=1200) + + +def test_config_s3(): + uri = "tests/test_data/cesm2_native.nc" + ncvar = "TREFHT" + active = Active(uri, ncvar=ncvar, storage_type="S3") + assert active._methods == {'max': 'max', 'mean': 'mean', + 'min': 'min', 'sum': 'dimsum'} + assert active.method is None + assert active._version == 1 + + active._version = 2 + + # statistical method can not be executed + active.method = "mean" + with pytest.raises(ValueError) as exc: + active[:] + assert str(exc.value) == "Could not recognize method mean as permitted." + + # bad name for statistical method + with pytest.raises(ValueError) as exc: + active.method = "meany" + assert str(exc.value) == "Bad 'method': meany. Choose from min/max/mean/sum." + + +def test_config_Posix(): + uri = "tests/test_data/cesm2_native.nc" + ncvar = "TREFHT" + active = Active(uri, ncvar=ncvar, storage_type="Posix") + assert active._methods == {'max': 'np.max', 'mean': 'np.sum', + 'min': 'np.min', 'sum': 'np.sum'} + assert active.method is None + assert active._version == 1 + + active._version = 2 + + # usual run + active.method = "mean" # will exec np.mean from config + assert active[:] == 284.22694905598956 + + # passing wrong numpy method + active._methods["mean"] = "np.meany" + with pytest.raises(AttributeError) as exc: + active[:] + assert str(exc.value) == "Method np.meany is not a valid Numpy method." + + +def test_config_invalid_storage_type(): + uri = "tests/test_data/cesm2_native.nc" + ncvar = "TREFHT" + with pytest.raises(ValueError) as exc: + Active(uri, ncvar=ncvar, storage_type="cowabunga") + assert str(exc.value) == "Storage type cowabunga not known."