pangeo-forge · jzavala-gonzalez · Nov 4, 2022 · Nov 4, 2022 · Nov 4, 2022 · Nov 4, 2022
diff --git a/recipes/vnp46a2-h11v07/meta.yaml b/recipes/vnp46a2-h11v07/meta.yaml
@@ -0,0 +1,49 @@
+# Name for dataset. User chosen.
+title: 'VNP46A2 - VIIRS/NPP Gap-Filled Lunar BRDF-Adjusted Nighttime Lights Daily (h11v07)'
+# Description of dataset.  User chosen, roughly 1 sentence in length.
+description: '
+  The second of the two VIIRS DNB-based datasets is a daily moonlight- and atmosphere-corrected Nighttime Lights (NTL) product called
+  VIIRS/NPP Gap-Filled Lunar BRDF-Adjusted Nighttime Lights Daily L3 Global 500m Linear Lat Lon Grid.
+  Known by its short-name, VNP46A2, this product contains seven Science Data Sets (SDS) that include DNB BRDF-Corrected NTL,
+  Gap-Filled DNB BRDF-Corrected NTL, DNB Lunar Irradiance, Latest High-Quality Retrieval, Mandatory Quality Flag, Cloud Mask Quality Flag, and Snow Flag.
+  VNP46A2 products are provided in standard Hierarchical Data Format-Earth Observing System (HDF-EOS5) format.
+  This collection starts from January 19, 2012 and forward.
+
+  This ZARR version of the dataset currently only includes the grid file at h11v07, around the Caribbean Sea.
+  '
+# Version of pangeo_forge_recipes library that was used
+pangeo_forge_version: '0.9.2'
+# The recipes section tells Pangeo Cloud where to find the recipes within your PR.
+# Many recipe PRs will have just 1 recipe, in which case this section will look similar to the example below.
+# If your PR contains multiple recipes, you may add additional elements to the list below.
+recipes:
+  # User chosen name for recipe. Likely similiar to dataset name, ~25 characters in length
+  - id: vnp46a2-h11v07
+    # The `object` below tells Pangeo Cloud specifically where your recipe instance(s) are located and uses the format <filename>:<object_name>
+    # <filename> is name of .py file where the Python recipe object is defined.
+    # For example, if <filename> is given as "recipe", Pangeo Cloud will expect a file named `recipe.py` to exist in your PR.
+    # <object_name> is the name of the recipe object (i.e. Python class instance) _within_ the specified file.
+    # For example, if you have defined `recipe = XarrayZarrRecipe(...)` within a file named `recipe.py`, then your  `object` below would be `"recipe:recipe"`
+    object: 'recipe:recipe'
+provenance:
+  # Data provider object.  Follow STAC spec.
+  # https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md#provider-object
+  providers:
+    - name: 'NASA LAADS DAAC'
+      description: 'National Aeronautics and Space Administration Level-1 and Atmosphere Archive & Distribution System Distributed Active Archive Center'
+      roles:
+        - host
+        - producer
+        - licensor
+      url: https://ladsweb.modaps.eosdis.nasa.gov/missions-and-measurements/products/VNP46A2/
+  # This is a required field for provider. Follow STAC spec
+  # https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md#license
+  license: 'CC-BY-NC-ND-4.0'
+maintainers:
+  # Information about recipe creator. name and github are required
+  - name: 'José M. Zavala González'
+    github: jzavala-gonzalez
+# The specific bakery (i.e. cloud infrastructure) that your recipe will run on.
+# Available bakeries can be found on the Pangeo Forge website https://pangeo-forge.org/dashboard/bakeries
+bakery:
+  id: 'pangeo-ldeo-nsf-earthcube'
diff --git a/recipes/vnp46a2-h11v07/recipe.py b/recipes/vnp46a2-h11v07/recipe.py
@@ -0,0 +1,139 @@
+"""
+A recipe to move VNP46A2 to a cloud analysis ready format.
+Based off briannapagan's GPM IMERG recipe.
+"""
+
+import datetime
+
+import pandas as pd
+import xarray as xr
+from cmr import GranuleQuery
+
+from pangeo_forge_recipes.patterns import ConcatDim, FilePattern
+from pangeo_forge_recipes.recipes import XarrayZarrRecipe
+
+# Query VNP46A2 dataset files at h11v07
+api = GranuleQuery()
+vnp_api = api.short_name('VNP46A2').point(-66, 18)  # Cerca de Puerto Rico (h11v07)
+granules = vnp_api.get_all()
+
+# Extract the link corresponding to each file
+downloadable_urls = []
+for g in granules:
+    for link in g['links']:
+
+        if link['rel'] == 'http://esipfed.org/ns/fedsearch/1.1/data#':
+            # print('adding url: ' + link['href'])
+            if not ('h11v07' in link['href']):
+                continue  # Double-checking we only capture h11v07
+            downloadable_urls.append(link['href'])
+            break
+
+print(f'{len(downloadable_urls)} urls')
+
+
+# Dictionaries containing the same information about each granule,
+# they just vary what variable you use as key to access them.
+vnp_date_dict = dict()  # Use granule date as key
+href_date_dict = dict()  # Granule download link as key
+
+vnp_dates = []  # List of granule dates, which is passed to ConcatDim later on
+
+for i in range(len(downloadable_urls)):
+
+    # Update broken links from the CMR for this dataset
+    href_orig = downloadable_urls[i]
+    href_new = (
+        href_orig.replace(
+            'https://ladsweb.modaps.eosdis.nasa.gov/archive/',
+            'https://ladsweb.modaps.eosdis.nasa.gov/opendap/RemoteResources/laads/',
+        )
+        + '.nc4'
+    )
+
+    # Convert julian date string to Python date object
+    year_julian = '-'.join(href_new.split('/')[-3:-1])
+    date_href = datetime.datetime.strptime(year_julian, '%Y-%j').date()
+
+    # Save this info into each dictionary and list
+    info_dict = dict(
+        i=i,
+        href=href_new,
+        date=date_href,
+        year_julian=year_julian,
+    )
+    vnp_date_dict[date_href] = info_dict
+    href_date_dict[href_new] = info_dict
+    vnp_dates.append(date_href)
+
+print('Earliest date:', min(vnp_dates).strftime('%Y-%m-%d'))
+print('Latest date:  ', max(vnp_dates).strftime('%Y-%m-%d'))
+
+
+def make_full_path(date: datetime.date) -> str:
+    '''
+    For each date, return the URL from the collected dictionary.
+    '''
+    return vnp_date_dict[date]['href']
+
+
+# Concatenate files along the date dimension (one day per file)
+date_concat_dim = ConcatDim('date', vnp_dates, nitems_per_file=1)
+
+pattern = FilePattern(make_full_path, date_concat_dim)
+
+
+def add_date_dimension(ds: xr.Dataset, filename: str) -> xr.Dataset:
+    '''
+    Expand the dimensions of the input dataset to include a date dimension
+    which references that image's collection date.
+    '''
+    # print('Hello from', filename)
+    hn = filename  # href_new
+    date_href = href_date_dict[hn]['date']
+    date_index = pd.DatetimeIndex([date_href])
+    date_da = xr.DataArray(date_index, [('date', date_index)])
+    ds = ds.expand_dims(date=date_da)
+    return ds
+
+
+# Recipe!
+recipe = XarrayZarrRecipe(pattern, process_input=add_date_dimension)
+
+# -------------------------------------------------------------------
+# Only use below for LOCAL testing:
+
+# import os
+# from fsspec.implementations.local import LocalFileSystem
+# from pangeo_forge_recipes.storage import FSSpecTarget, StorageConfig, CacheFSSpecTarget
+
+# cwd = os.getcwd() # Files are saved relative to working directory
+
+# # Target directory for generated ZARR dataset
+# target_fs = LocalFileSystem()
+# target_path = os.path.join(cwd, 'my-dataset.zarr')
+# target = FSSpecTarget(fs=target_fs, root_path=target_path)
+
+# # Cache directory for files downloaded from provider
+# cache_fs = LocalFileSystem()
+# cache_path = os.path.join(cwd, 'cache_dir')
+# cache_spec = CacheFSSpecTarget(fs=cache_fs, root_path=cache_path)
+
+# # Config recipe to use both target ZARR and cache
+# recipe.storage_config = StorageConfig(target, cache_spec)
+
+
+# from pangeo_forge_recipes.recipes import setup_logging
+# setup_logging()
+
+# recipe_pruned = recipe.copy_pruned() # Prune to only download 2 files
+
+# print('Full recipe:  ', recipe.file_pattern)
+# print('Pruned recipe:', recipe_pruned.file_pattern)
+
+# run_function = recipe_pruned.to_function()
+# run_function() # Run pruned recipe
+
+# # Attempt opening the resulting dataset
+# ds = xr.open_zarr(recipe_pruned.target_mapper, consolidated=True)
+# print(ds)