diff --git a/examples/example-item/example-item.json b/examples/example-item/example-item.json index 17cf5cf..002c3c6 100644 --- a/examples/example-item/example-item.json +++ b/examples/example-item/example-item.json @@ -1,87 +1,616 @@ { "type": "Feature", "stac_version": "1.0.0", - "id": "example-item", + "id": "20231010-short_range-conus-channel_rt-0-1", "properties": { - "proj:epsg": 32621, - "proj:transform": [ - 100.01126757344893, - 0.0, - 373185.0, - 0.0, - -100.01126757344893, - 8286015.0 - ], - "proj:shape": [ - 2667, - 2658 - ], - "custom_attribute": "foo", - "datetime": "2023-09-12T11:48:03.199913Z" - }, - "geometry": { - "type": "Polygon", - "coordinates": [ - [ - [ - -52.916275, - 72.229798 + "nwm:category": "short_range", + "nwm:region": "conus", + "nwm:product": "channel_rt", + "nwm:forecast_hour": 1, + "cube:dimensions": { + "time": { + "extent": [ + "2023-10-10T01:00:00Z", + "2023-10-10T01:00:00Z" ], - [ - -52.301599, - 74.613784 + "description": "valid output time", + "type": "temporal", + "kerchunk:zarray": { + "chunks": [ + 1024 + ], + "compressor": { + "id": "zlib", + "level": 2 + }, + "dtype": "=0.4.0"] +dependencies = ["stactools>=0.4.0", "xstac>=1.2.0", "fsspec[http]"] [project.optional-dependencies] dev = [ diff --git a/scripts/update-examples b/scripts/update-examples index 716d8f4..e627743 100755 --- a/scripts/update-examples +++ b/scripts/update-examples @@ -1,21 +1,33 @@ #!/usr/bin/env python - -import shutil +import json from pathlib import Path +from stactools.noaa_nwm import stac +import kerchunk.hdf + +href = "https://noaanwm.blob.core.windows.net/nwm/nwm.20231010/short_range/nwm.t00z.short_range.channel_rt.f001.conus.nc" +indices = kerchunk.hdf.SingleHdf5ToZarr(href).translate() +item = stac.create_item(href, kerchunk_indices=indices) + +with open("examples/example-item/example-item.json", "w") as f: + json.dump(item.to_dict(), f, indent=2) + + + +# import shutil + +# import stactools.noaa_nwm.stac +# from pystac import CatalogType + +# root = Path(__file__).parents[1] +# examples = root / "examples" -import stactools.noaa_nwm.stac -from pystac import CatalogType - -root = Path(__file__).parents[1] -examples = root / "examples" - -collection = stactools.noaa_nwm.stac.create_collection() -item = stactools.noaa_nwm.stac.create_item(str(root / "tests" / "data" / "asset.tif")) -collection.add_item(item) -collection.update_extent_from_items() -collection.normalize_hrefs(str(examples)) -collection.make_all_asset_hrefs_relative() -if examples.exists(): - shutil.rmtree(examples) - examples.mkdir() -collection.save(CatalogType.SELF_CONTAINED) +# collection = stactools.noaa_nwm.stac.create_collection() +# item = stactools.noaa_nwm.stac.create_item(str(root / "tests" / "data" / "asset.tif")) +# collection.add_item(item) +# collection.update_extent_from_items() +# collection.normalize_hrefs(str(examples)) +# collection.make_all_asset_hrefs_relative() +# if examples.exists(): +# shutil.rmtree(examples) +# examples.mkdir() +# collection.save(CatalogType.SELF_CONTAINED) diff --git a/src/stactools/noaa_nwm/stac.py b/src/stactools/noaa_nwm/stac.py index 346aaa5..581291c 100644 --- a/src/stactools/noaa_nwm/stac.py +++ b/src/stactools/noaa_nwm/stac.py @@ -1,67 +1,159 @@ -from datetime import datetime, timezone - -import stactools.core.create -from pystac import ( - Collection, - Extent, - Item, - SpatialExtent, - TemporalExtent, -) - - -def create_collection() -> Collection: - """Creates a STAC Collection. - - This function should create a collection for this dataset. See `the STAC - specification - `_ - for information about collection fields, and - `Collection`_ - for information about the PySTAC class. - - Returns: - Collection: STAC Collection object +import pystac +import re +import typing +import dataclasses +import datetime +import enum +import fsspec +import xarray as xr +import xstac + + +class Product(str, enum.Enum): + CHANNEL_RT = "channel_rt" + LAND = "land" + RESERVOIR = "reservoir" + TERRAIN_RT = "terrain_rt" + + +class Region(str, enum.Enum): + CONUS = "conus" + + +class Category(str, enum.Enum): + SHORT_RANGE = "short_range" + + +@dataclasses.dataclass +class NWMInfo: """ - extent = Extent( - SpatialExtent([[-180.0, 90.0, 180.0, -90.0]]), - TemporalExtent([[datetime.now(tz=timezone.utc), None]]), - ) + Examples + -------- + >>> info = NWMInfo.from_filename( + ... "nwm.20231010/short_range/nwm.t00z.short_range.channel_rt.f001.conus.nc" + ... ) + >>> info + """ + + date: datetime.datetime + category: Category # literal / enum + cycle_runtime: int # literal / enum + product: Product + forecast_hour: int + region: Region - collection = Collection( - id="example-collection", - title="Example collection", - description="An example collection", - extent=extent, - extra_fields={"custom_attribute": "foo"}, + pattern = re.compile( + "nwm\.(?P\d{8})/" + "(?Pshort_range)" + "/nwm\.t(?P" + "\d{2})z\.short_range\." + "(?P(channel_rt|land|reservoir|terrain_rt))\." + "f(?P\d{3})\." + "(?Pconus)\.nc" ) - return collection + @classmethod + def from_filename(cls, s: str): + m = cls.pattern.match(s) + assert m + d = m.groupdict() -def create_item(asset_href: str) -> Item: - """Creates a STAC item from a raster asset. + date = datetime.datetime.strptime(d["date"], "%Y%m%d") + category = Category(d["category"]) + cycle_runtime = int(d["cycle_runtime"]) + forecast_hour = int(d["forecast_hour"]) + region = Region(d["region"]) + product = Product(d["product"]) - This example function uses :py:func:`stactools.core.utils.create_item` to - generate an example item. Datasets should customize the item with - dataset-specific information, e.g. extracted from metadata files. + return cls( + date=date, + category=category, + cycle_runtime=cycle_runtime, + forecast_hour=forecast_hour, + region=region, + product=product, + ) - See `the STAC specification - `_ - for information about an item's fields, and - `Item`_ for - information on the PySTAC class. + @property + def filename(self): + # reconstruct it + ... - This function should be updated to take all hrefs needed to build the item. - It is an anti-pattern to assume that related files (e.g. metadata) are in - the same directory as the primary file. + @property + def id(self): + return "-".join( + [ + self.date.strftime("%Y%m%d"), + self.category, + self.region, + self.product, + str(self.cycle_runtime), + str(self.forecast_hour), + ] + ) - Args: - asset_href (str): The HREF pointing to an asset associated with the item + @property + def datetime(self): + # TODO: confirm this is correct + return self.date + datetime.timedelta(hours=self.forecast_hour) + + @property + def extra_properties(self): + # TODO: use the forecast extension + return { + "nwm:category": self.category.value, + "nwm:region": self.region.value, + "nwm:product": self.product.value, + "nwm:forecast_hour": self.forecast_hour, + } + + +def create_item( + href: str, + read_href_modifier=None, + kerchunk_indices: dict[str, typing.Any] | None = None, +): + path = "/".join(href.rsplit("/", 3)[-3:]) + info = NWMInfo.from_filename(path) + + ds = xr.open_dataset(fsspec.open(href).open()) + ds = xstac.fix_attrs(ds) + + # TODO: geometry from the region (conus, ...) + template = pystac.Item( + info.id, + geometry=None, + bbox=None, + datetime=info.datetime, + properties=dict(info.extra_properties), + ) + + additional_dimensions = { + "feature_id": { + "type": "ID", + "description": ds.feature_id.attrs["comment"], + "extent": [None, None], + }, + "reference_time": { + "type": "reference-time", + "description": ds.reference_time.attrs["long_name"], + "extent": [None, None], + }, + } + + item = xstac.xarray_to_stac( + ds, + template, + x_dimension=False, + y_dimension=False, + kerchunk_indices=kerchunk_indices, + **additional_dimensions, + ) + + item.assets["data"] = pystac.Asset(href=href, media_type="application/x-netcdf") - Returns: - Item: STAC Item object - """ - item = stactools.core.create.item(asset_href) - item.id = "example-item" - item.properties["custom_attribute"] = "foo" return item + + +def create_collection(): + ...