diff --git a/recipes/vnp46a2-h11v07/meta.yaml b/recipes/vnp46a2-h11v07/meta.yaml new file mode 100644 index 0000000000..ecb71df7ed --- /dev/null +++ b/recipes/vnp46a2-h11v07/meta.yaml @@ -0,0 +1,49 @@ +# Name for dataset. User chosen. +title: 'VNP46A2 - VIIRS/NPP Gap-Filled Lunar BRDF-Adjusted Nighttime Lights Daily (h11v07)' +# Description of dataset. User chosen, roughly 1 sentence in length. +description: ' + The second of the two VIIRS DNB-based datasets is a daily moonlight- and atmosphere-corrected Nighttime Lights (NTL) product called + VIIRS/NPP Gap-Filled Lunar BRDF-Adjusted Nighttime Lights Daily L3 Global 500m Linear Lat Lon Grid. + Known by its short-name, VNP46A2, this product contains seven Science Data Sets (SDS) that include DNB BRDF-Corrected NTL, + Gap-Filled DNB BRDF-Corrected NTL, DNB Lunar Irradiance, Latest High-Quality Retrieval, Mandatory Quality Flag, Cloud Mask Quality Flag, and Snow Flag. + VNP46A2 products are provided in standard Hierarchical Data Format-Earth Observing System (HDF-EOS5) format. + This collection starts from January 19, 2012 and forward. + + This ZARR version of the dataset currently only includes the grid file at h11v07, around the Caribbean Sea. + ' +# Version of pangeo_forge_recipes library that was used +pangeo_forge_version: '0.9.2' +# The recipes section tells Pangeo Cloud where to find the recipes within your PR. +# Many recipe PRs will have just 1 recipe, in which case this section will look similar to the example below. +# If your PR contains multiple recipes, you may add additional elements to the list below. +recipes: + # User chosen name for recipe. Likely similiar to dataset name, ~25 characters in length + - id: vnp46a2-h11v07 + # The `object` below tells Pangeo Cloud specifically where your recipe instance(s) are located and uses the format : + # is name of .py file where the Python recipe object is defined. + # For example, if is given as "recipe", Pangeo Cloud will expect a file named `recipe.py` to exist in your PR. + # is the name of the recipe object (i.e. Python class instance) _within_ the specified file. + # For example, if you have defined `recipe = XarrayZarrRecipe(...)` within a file named `recipe.py`, then your `object` below would be `"recipe:recipe"` + object: 'recipe:recipe' +provenance: + # Data provider object. Follow STAC spec. + # https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md#provider-object + providers: + - name: 'NASA LAADS DAAC' + description: 'National Aeronautics and Space Administration Level-1 and Atmosphere Archive & Distribution System Distributed Active Archive Center' + roles: + - host + - producer + - licensor + url: https://ladsweb.modaps.eosdis.nasa.gov/missions-and-measurements/products/VNP46A2/ + # This is a required field for provider. Follow STAC spec + # https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md#license + license: 'CC-BY-NC-ND-4.0' +maintainers: + # Information about recipe creator. name and github are required + - name: 'José M. Zavala González' + github: jzavala-gonzalez +# The specific bakery (i.e. cloud infrastructure) that your recipe will run on. +# Available bakeries can be found on the Pangeo Forge website https://pangeo-forge.org/dashboard/bakeries +bakery: + id: 'pangeo-ldeo-nsf-earthcube' diff --git a/recipes/vnp46a2-h11v07/recipe.py b/recipes/vnp46a2-h11v07/recipe.py new file mode 100644 index 0000000000..14bcb6f8a8 --- /dev/null +++ b/recipes/vnp46a2-h11v07/recipe.py @@ -0,0 +1,145 @@ +""" +A recipe to move VNP46A2 to a cloud analysis ready format. +Based off briannapagan's GPM IMERG recipe. +""" + +import datetime +import functools + +import pandas as pd +import xarray as xr +from cmr import GranuleQuery + +from pangeo_forge_recipes.patterns import ConcatDim, FilePattern +from pangeo_forge_recipes.recipes import XarrayZarrRecipe + +# Query VNP46A2 dataset files at h11v07 +api = GranuleQuery() +vnp_api = api.short_name('VNP46A2').point(-66, 18) # Cerca de Puerto Rico (h11v07) +granules = vnp_api.get_all() + +# Extract the link corresponding to each file +downloadable_urls = [] +for g in granules: + for link in g['links']: + + if link['rel'] == 'http://esipfed.org/ns/fedsearch/1.1/data#': + # print('adding url: ' + link['href']) + if not ('h11v07' in link['href']): + continue # Double-checking we only capture h11v07 + downloadable_urls.append(link['href']) + break + +print(f'{len(downloadable_urls)} urls') + + +# Dictionaries containing the same information about each granule, +# they just vary what variable you use as key to access them. +vnp_date_dict = dict() # Use granule date as key +href_date_dict = dict() # Granule download link as key + +vnp_dates = [] # List of granule dates, which is passed to ConcatDim later on + +for i in range(len(downloadable_urls)): + + # Update broken links from the CMR for this dataset + href_orig = downloadable_urls[i] + href_new = ( + href_orig.replace( + 'https://ladsweb.modaps.eosdis.nasa.gov/archive/', + 'https://ladsweb.modaps.eosdis.nasa.gov/opendap/RemoteResources/laads/', + ) + + '.nc4' + ) + + # Convert julian date string to Python date object + year_julian = '-'.join(href_new.split('/')[-3:-1]) + date_href = datetime.datetime.strptime(year_julian, '%Y-%j').date() + + # Save this info into each dictionary and list + info_dict = dict( + i=i, + href=href_new, + date=date_href, + year_julian=year_julian, + ) + vnp_date_dict[date_href] = info_dict + href_date_dict[href_new] = info_dict + vnp_dates.append(date_href) + +print('Earliest date:', min(vnp_dates).strftime('%Y-%m-%d')) +print('Latest date: ', max(vnp_dates).strftime('%Y-%m-%d')) + + +def make_full_path(date: datetime.date, vnp_date_dict=None) -> str: + ''' + For each date, return the URL from the collected dictionary. + ''' + return vnp_date_dict[date]['href'] + + +make_full_path = functools.partial(make_full_path, vnp_date_dict=vnp_date_dict) + + +# Concatenate files along the date dimension (one day per file) +date_concat_dim = ConcatDim('date', vnp_dates, nitems_per_file=1) + +pattern = FilePattern(make_full_path, date_concat_dim) + + +def add_date_dimension(ds: xr.Dataset, filename: str) -> xr.Dataset: + ''' + Expand the dimensions of the input dataset to include a date dimension + which references that image's collection date. + ''' + # print('Hello from', filename) + hn = filename # href_new + # date_href = href_date_dict[hn]['date'] + year_julian = '-'.join(hn.split('/')[-3:-1]) + date_href = datetime.datetime.strptime(year_julian, '%Y-%j').date() + date_index = pd.DatetimeIndex([date_href]) + date_da = xr.DataArray(date_index, [('date', date_index)]) + ds = ds.expand_dims(date=date_da) + return ds + + +# Recipe! +recipe = XarrayZarrRecipe(pattern, process_input=add_date_dimension) + +# ------------------------------------------------------------------- +# Only use below for LOCAL testing: + +# import os +# from fsspec.implementations.local import LocalFileSystem +# from pangeo_forge_recipes.storage import FSSpecTarget, StorageConfig, CacheFSSpecTarget + +# cwd = os.getcwd() # Files are saved relative to working directory + +# # Target directory for generated ZARR dataset +# target_fs = LocalFileSystem() +# target_path = os.path.join(cwd, 'my-dataset.zarr') +# target = FSSpecTarget(fs=target_fs, root_path=target_path) + +# # Cache directory for files downloaded from provider +# cache_fs = LocalFileSystem() +# cache_path = os.path.join(cwd, 'cache_dir') +# cache_spec = CacheFSSpecTarget(fs=cache_fs, root_path=cache_path) + +# # Config recipe to use both target ZARR and cache +# recipe.storage_config = StorageConfig(target, cache_spec) + + +# from pangeo_forge_recipes.recipes import setup_logging +# setup_logging() + +# recipe_pruned = recipe.copy_pruned() # Prune to only download 2 files + +# print('Full recipe: ', recipe.file_pattern) +# print('Pruned recipe:', recipe_pruned.file_pattern) + +# run_function = recipe_pruned.to_function() +# run_function() # Run pruned recipe + +# # Attempt opening the resulting dataset +# ds = xr.open_zarr(recipe_pruned.target_mapper, consolidated=True) +# print(ds)