Skip to content

Commit

Permalink
Merge pull request #3 from ecmwf/feature/remote-matrices
Browse files Browse the repository at this point in the history
WIP: use remote matrices
  • Loading branch information
sandorkertesz authored Nov 29, 2023
2 parents 9a2b692 + 6dce4dc commit 09e713b
Show file tree
Hide file tree
Showing 44 changed files with 2,782 additions and 166 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,8 @@ tempCodeRunnerFile*
dev/

# data and matrix files
.grib
*.grib
*.grib1
*.grib2
*.json
*.npz
3 changes: 1 addition & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,12 @@ repos:
- id: mdformat
exclude: cruft-update-template.md
- repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
rev: v2.4.0
rev: v2.11.0
hooks:
- id: pretty-format-yaml
args: [--autofix, --preserve-quotes]
- id: pretty-format-toml
args: [--autofix]
additional_dependencies: [toml-sort<0.22.0]
- repo: https://github.com/PyCQA/pydocstyle.git
rev: 6.1.1
hooks:
Expand Down
12 changes: 12 additions & 0 deletions docs/examples.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
.. _examples:

Examples
============

Here is a list of example notebooks to illustrate how to use earthkit-regrid.


.. toctree::
:maxdepth: 1

examples/interpolation.ipynb
184 changes: 184 additions & 0 deletions docs/examples/interpolation.ipynb

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@ Welcome to earthkit-regrids's documentation

**earthkit-regrid** is a Python package for regridding.

.. toctree::
:maxdepth: 1
:caption: Examples
:titlesonly:

examples

.. toctree::
:maxdepth: 1
:caption: Installation
Expand Down
1 change: 1 addition & 0 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ Pygments>=2.6.1
Sphinx
sphinx-rtd-theme
setuptools
nbsphinx
104 changes: 13 additions & 91 deletions earthkit/regrid/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import json
import os

from scipy.sparse import load_npz

from .mir import mir_cached_matrix_to_file
# (C) Copyright 2023 ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.
#

try:
# NOTE: the `version.py` file must not be present in the git repository
Expand All @@ -14,89 +16,9 @@
__version__ = "999"


here = os.path.dirname(os.path.abspath(__file__))
MATRICES = os.path.join(here, "matrices")


def match(a, b):
# Just a proof of concept, not the real thing
return a["grid"] == b["grid"]


def find_matrix(gridspec_in, gridspec_out):
with open(os.path.join(MATRICES, "index.json")) as f:
index = json.load(f)
for name, entry in index.items():
if match(gridspec_in, entry["input"]) and match(gridspec_out, entry["output"]):
# The matrix should be hosted elsewhere
z = load_npz(os.path.join(MATRICES, entry["name"] + ".npz"))
return z, entry["output"]["shape"]

return None, None


def interpolate(x, gridspec_in, gridspec_out):
z, shape = find_matrix(gridspec_in, gridspec_out)

if z is None:
raise ValueError("No matrix found that matches the input and output gridspecs")

# This should check for 1D (GG) and 2D (LL) matrices
x = x.reshape(-1, 1)

x = z @ x

return x.reshape(shape)


def regular_ll(entry):
return {
"grid": [entry["increments"][x] for x in ("west_east", "south_north")],
"shape": [entry["nj"], entry["ni"]],
}


def reduced_gg(entry):
pl = entry["pl"]
G = "O" if pl[1] - pl[0] == 4 else "N"
N = entry["N"]

return {
"grid": f"{G}{N}",
"shape": [sum(pl)],
}


def make_matrix(path):
with open(path) as f:
entry = json.load(f)

cache_file = entry.pop("cache_file")
name, _ = os.path.splitext(os.path.basename(cache_file))

npz_file = os.path.join(MATRICES, name + ".npz")

mir_cached_matrix_to_file(cache_file, npz_file)

index_file = os.path.join(MATRICES, "index.json")
if os.path.exists(index_file):
with open(index_file) as f:
index = json.load(f)
else:
index = {}

def convert(x):
proc = globals()[x["type"]]
return proc(x)

index[name] = dict(
name=name,
input=convert(entry["input"]),
output=convert(entry["output"]),
)

with open(index_file, "w") as f:
json.dump(index, f, indent=4)
from earthkit.regrid.interpolate import interpolate

print("Written", npz_file)
print("Written", index_file)
__all__ = [
"interpolate",
"__version__",
]
146 changes: 146 additions & 0 deletions earthkit/regrid/db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
# (C) Copyright 2023 ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.
#

import json
import logging
import os
from contextlib import contextmanager

from scipy.sparse import load_npz

from earthkit.regrid.gridspec import GridSpec
from earthkit.regrid.utils import no_progress_bar
from earthkit.regrid.utils.download import download_and_cache

LOG = logging.getLogger(__name__)

_URL = "https://get.ecmwf.int/repository/earthkit/regrid/matrices"
_INDEX_FILENAME = "index.json"


class UrlAccessor:
def __init__(self, url):
self.url = url

def index_path(self):
# checking the out of date status does not work for this file,
# so we have to force the download.
path = download_and_cache(
os.path.join(self.url, _INDEX_FILENAME),
owner="url",
verify=True,
force=True,
chunk_size=1024 * 1024,
http_headers=None,
update_if_out_of_date=True,
progress_bar=no_progress_bar,
)
return path

def matrix_path(self, name):
path = download_and_cache(
os.path.join(self.url, name),
owner="url",
verify=True,
force=None,
chunk_size=1024 * 1024,
http_headers=None,
update_if_out_of_date=False,
)
return path


class LocalAccessor:
"""Only used for test purposes"""

def __init__(self, path):
self.path = path

def index_path(self):
return os.path.join(self.path, _INDEX_FILENAME)

def matrix_path(self, name):
return os.path.join(self.path, name)


@contextmanager
def _use_local_index(path):
"""Context manager for testing only. Allow using local index
file and matrices.
"""
DB.clear_index()
DB.accessor = LocalAccessor(path)
try:
yield None
finally:
DB.clear_index()
DB.accessor = UrlAccessor(_URL)


class MatrixDb:
def __init__(self):
self._index = None
self.accessor = UrlAccessor(_URL)

@property
def index(self):
if self._index is None:
self.load_index()
return self._index

def load_index(self):
self._index = {}
path = self.accessor.index_path()

with open(path, "r") as f:
index = json.load(f)
for name, entry in index.items():
entry["input"] = GridSpec.from_dict(entry["input"])
entry["output"] = GridSpec.from_dict(entry["output"])
# print("input=", entry["input"])
# print("output=", entry["output"])
self._index[name] = entry

def clear_index(self):
"""For testing only"""
self._index = None

def find(self, gridspec_in, gridspec_out, matrix_version=None):
entry = self.find_entry(gridspec_in, gridspec_out)

if entry is not None:
versions = entry["versions"]
if matrix_version is not None:
if matrix_version not in versions:
raise ValueError(f"Unsupported matrix_version={matrix_version}")
else:
matrix_version = sorted(versions)[0]

z = self.load_matrix(entry["name"], matrix_version)
return z, entry["output"]["shape"]
return None, None

def find_entry(self, gridspec_in, gridspec_out):
gridspec_in = GridSpec.from_dict(gridspec_in)
gridspec_out = GridSpec.from_dict(gridspec_out)

for _, entry in self.index.items():
if gridspec_in == entry["input"] and gridspec_out == entry["output"]:
return entry

return None

def load_matrix(self, name, version):
name = f"{name}-{version}.npz"
path = self.accessor.matrix_path(name)
z = load_npz(path)
return z


DB = MatrixDb()
Loading

0 comments on commit 09e713b

Please sign in to comment.