Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add configuration file handling and move stat method and version there #47

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
70 changes: 53 additions & 17 deletions activestorage/active.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,50 @@
import os
import numpy as np
import yaml

import activestorage

#FIXME: Consider using h5py throughout, for more generality
from netCDF4 import Dataset
from pathlib import Path
from zarr.indexing import (
OrthogonalIndexer,
)
from activestorage.storage import reduce_chunk
from activestorage import netcdf_to_zarr as nz


def _read_config_file(storage_type):
"""Read config user file and store settings in a dictionary."""
base_path = Path(activestorage.__file__).parent
if storage_type == "S3":
config_file = base_path / Path("config-s3-storage.yml")
elif storage_type == "Posix":
config_file = base_path / Path("config-Posix-storage.yml")
Comment on lines +21 to +23

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For "production" it would be better to pull in config from a location outside of the Python installation.

else:
raise ValueError(f"Storage type {storage_type} not known.")
# should not need this if conf file is at package-level
# if not config_file.exists():
# raise IOError(f'Config file `{config_file}` does not exist.')

with open(config_file, 'r') as file:
cfg = yaml.safe_load(file)

return cfg


def _extract_method(method):
"""Extract functional method from string. Works like eval but more secure."""
if method.split(".")[0] == "np" or method.split(".")[0] == "numpy":
try:
func = getattr(np, method.split(".")[1])
return func
except AttributeError:
raise AttributeError(f"Method {method} is not a valid Numpy method.")
else:
raise ValueError(f"Could not recognize method {method} as permitted.")


class Active:
"""
Instantiates an interface to active storage which contains either zarr files
Expand All @@ -21,20 +56,9 @@ class Active:
Version 2 will add methods for actual active storage.

"""
def __new__(cls, *args, **kwargs):
"""Store reduction methods."""
instance = super().__new__(cls)
instance._methods = {
"min": np.min,
"max": np.max,
"sum": np.sum,
# For the unweighted mean we calulate the sum and divide
# by the number of non-missing elements
"mean": np.sum,
}
return instance

def __init__(self, uri, ncvar, missing_value=None, fill_value=None, valid_min=None, valid_max=None):
def __init__(self, uri, ncvar, storage_type="Posix",
missing_value=None, fill_value=None,
valid_min=None, valid_max=None):
"""
Instantiate with a NetCDF4 dataset and the variable of interest within that file.
(We need the variable, because we need variable specific metadata from within that
Expand All @@ -52,7 +76,18 @@ def __init__(self, uri, ncvar, missing_value=None, fill_value=None, valid_min=No
raise ValueError("Must set a netCDF variable name to slice")
self.zds = None

self._version = 1
# storage type
self.storage_type = storage_type

# read config file
self._config = _read_config_file(self.storage_type)

# read methods version, components
self._version = self._config.get("version", 1)
self._methods = self._config.get("methods", None)
# should not need this if conf file is at package-level
# if not self._methods:
# raise ValueError(f"Configuration dict {self._config} needs a valid methods group.")
self._components = False
self._method = None

Expand Down Expand Up @@ -148,13 +183,14 @@ def method(self):
========== ==================================================

"""
return self._methods.get(self._method)
method = self._methods.get(self._method, None)
if method:
return _extract_method(method)

@method.setter
def method(self, value):
if value is not None and value not in self._methods:
raise ValueError(f"Bad 'method': {value}. Choose from min/max/mean/sum.")

self._method = value

@property
Expand Down
6 changes: 6 additions & 0 deletions activestorage/config-Posix-storage.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
version: 1
methods:
min: np.min

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps the defaults should live in Python, with YAML providing overrides only?

max: np.max
sum: np.sum
mean: np.sum
6 changes: 6 additions & 0 deletions activestorage/config-s3-storage.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
version: 1
methods:
min: min
max: max
sum: dimsum
mean: mean
17 changes: 17 additions & 0 deletions tests/test_package.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import activestorage

from activestorage import Active as act
from activestorage.active import _read_config_file as read_conf


# test version
Expand Down Expand Up @@ -29,3 +30,19 @@ def test_active_class_attrs():
assert hasattr(act, "components")
assert hasattr(act, "method")
assert hasattr(act, "ncvar")


# check validity of conf files
def test_read_config_file():
"""Test validity of package-level files."""
posix_mandatory_keys = ["version", "methods"]
s3_mandatory_keys = ["version", "methods"]
posix_file = read_conf("Posix")
s3_file = read_conf("S3")
print(posix_file)
print(s3_file)
for mandatory_key in posix_mandatory_keys:
assert mandatory_key in posix_file
for mandatory_key in s3_mandatory_keys:
assert mandatory_key in s3_file

53 changes: 53 additions & 0 deletions tests/unit/test_active.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,56 @@ def test_active():
init = active.__init__(uri=uri, ncvar=ncvar, missing_value=True,
fill_value=1e20, valid_min=-1,
valid_max=1200)


def test_config_s3():
uri = "tests/test_data/cesm2_native.nc"
ncvar = "TREFHT"
active = Active(uri, ncvar=ncvar, storage_type="S3")
assert active._methods == {'max': 'max', 'mean': 'mean',
'min': 'min', 'sum': 'dimsum'}
assert active.method is None
assert active._version == 1

active._version = 2

# statistical method can not be executed
active.method = "mean"
with pytest.raises(ValueError) as exc:
active[:]
assert str(exc.value) == "Could not recognize method mean as permitted."

# bad name for statistical method
with pytest.raises(ValueError) as exc:
active.method = "meany"
assert str(exc.value) == "Bad 'method': meany. Choose from min/max/mean/sum."


def test_config_Posix():
uri = "tests/test_data/cesm2_native.nc"
ncvar = "TREFHT"
active = Active(uri, ncvar=ncvar, storage_type="Posix")
assert active._methods == {'max': 'np.max', 'mean': 'np.sum',
'min': 'np.min', 'sum': 'np.sum'}
assert active.method is None
assert active._version == 1

active._version = 2

# usual run
active.method = "mean" # will exec np.mean from config
assert active[:] == 284.22694905598956

# passing wrong numpy method
active._methods["mean"] = "np.meany"
with pytest.raises(AttributeError) as exc:
active[:]
assert str(exc.value) == "Method np.meany is not a valid Numpy method."


def test_config_invalid_storage_type():
uri = "tests/test_data/cesm2_native.nc"
ncvar = "TREFHT"
with pytest.raises(ValueError) as exc:
Active(uri, ncvar=ncvar, storage_type="cowabunga")
assert str(exc.value) == "Storage type cowabunga not known."