From a08c390b7a4cce8aab9d0d72feca46572dad488f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20M=2E=20Zavala=20Gonz=C3=A1lez?= Date: Fri, 4 Nov 2022 14:37:30 -0400 Subject: [PATCH 1/6] Initial recipe import --- recipes/vnp46a2-h11v07/meta.yaml | 49 +++++++++++ recipes/vnp46a2-h11v07/recipe.py | 135 +++++++++++++++++++++++++++++++ 2 files changed, 184 insertions(+) create mode 100644 recipes/vnp46a2-h11v07/meta.yaml create mode 100644 recipes/vnp46a2-h11v07/recipe.py diff --git a/recipes/vnp46a2-h11v07/meta.yaml b/recipes/vnp46a2-h11v07/meta.yaml new file mode 100644 index 0000000000..02b7fb6dd4 --- /dev/null +++ b/recipes/vnp46a2-h11v07/meta.yaml @@ -0,0 +1,49 @@ +# Name for dataset. User chosen. +title: "VNP46A2 - VIIRS/NPP Gap-Filled Lunar BRDF-Adjusted Nighttime Lights Daily (h11v07)" +# Description of dataset. User chosen, roughly 1 sentence in length. +description: " +The second of the two VIIRS DNB-based datasets is a daily moonlight- and atmosphere-corrected Nighttime Lights (NTL) product called +VIIRS/NPP Gap-Filled Lunar BRDF-Adjusted Nighttime Lights Daily L3 Global 500m Linear Lat Lon Grid. +Known by its short-name, VNP46A2, this product contains seven Science Data Sets (SDS) that include DNB BRDF-Corrected NTL, +Gap-Filled DNB BRDF-Corrected NTL, DNB Lunar Irradiance, Latest High-Quality Retrieval, Mandatory Quality Flag, Cloud Mask Quality Flag, and Snow Flag. +VNP46A2 products are provided in standard Hierarchical Data Format-Earth Observing System (HDF-EOS5) format. +This collection starts from January 19, 2012 and forward. + +This ZARR version of the dataset currently only includes the grid file at h11v07, around the Caribbean Sea. +" +# Version of pangeo_forge_recipes library that was used +pangeo_forge_version: "0.9.2" +# The recipes section tells Pangeo Cloud where to find the recipes within your PR. +# Many recipe PRs will have just 1 recipe, in which case this section will look similar to the example below. +# If your PR contains multiple recipes, you may add additional elements to the list below. +recipes: + # User chosen name for recipe. Likely similiar to dataset name, ~25 characters in length + - id: vnp46a2-h11v07 + # The `object` below tells Pangeo Cloud specifically where your recipe instance(s) are located and uses the format : + # is name of .py file where the Python recipe object is defined. + # For example, if is given as "recipe", Pangeo Cloud will expect a file named `recipe.py` to exist in your PR. + # is the name of the recipe object (i.e. Python class instance) _within_ the specified file. + # For example, if you have defined `recipe = XarrayZarrRecipe(...)` within a file named `recipe.py`, then your `object` below would be `"recipe:recipe"` + object: "recipe:recipe" +provenance: + # Data provider object. Follow STAC spec. + # https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md#provider-object + providers: + - name: "NASA LAADS DAAC" + description: "National Aeronautics and Space Administration Level-1 and Atmosphere Archive & Distribution System Distributed Active Archive Center" + roles: + - host + - producer + - licensor + url: https://ladsweb.modaps.eosdis.nasa.gov/missions-and-measurements/products/VNP46A2/ + # This is a required field for provider. Follow STAC spec + # https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md#license + license: "CC-BY-NC-ND-4.0" +maintainers: + # Information about recipe creator. name and github are required + - name: "José M. Zavala González" + github: jzavala-gonzalez +# The specific bakery (i.e. cloud infrastructure) that your recipe will run on. +# Available bakeries can be found on the Pangeo Forge website https://pangeo-forge.org/dashboard/bakeries +bakery: + id: "pangeo-ldeo-nsf-earthcube" \ No newline at end of file diff --git a/recipes/vnp46a2-h11v07/recipe.py b/recipes/vnp46a2-h11v07/recipe.py new file mode 100644 index 0000000000..aedb3a42eb --- /dev/null +++ b/recipes/vnp46a2-h11v07/recipe.py @@ -0,0 +1,135 @@ +""" +A recipe to move VNP46A2 to a cloud analysis ready format. +Based off briannapagan's GPM IMERG recipe. +""" + +import datetime + +from pangeo_forge_recipes.patterns import FilePattern, ConcatDim +from pangeo_forge_recipes.recipes import XarrayZarrRecipe + +from cmr import GranuleQuery +import pandas as pd +import xarray as xr + +# Query VNP46A2 dataset files at h11v07 +api = GranuleQuery() +vnp_api = ( + api.short_name("VNP46A2") + .point(-66, 18) # Cerca de Puerto Rico (h11v07) +) +granules = vnp_api.get_all() + +# Extract the link corresponding to each file +downloadable_urls = [] +for g in granules: + for link in (g['links']): + + if link['rel'] == 'http://esipfed.org/ns/fedsearch/1.1/data#': + # print('adding url: ' + link['href']) + if not ('h11v07' in link['href']): + continue # Double-checking we only capture h11v07 + downloadable_urls.append(link['href']) + break + +print(f"{len(downloadable_urls)} urls") + + +# Dictionaries containing the same information about each granule, +# they just vary what variable you use as key to access them. +vnp_date_dict = dict() # Use granule date as key +href_date_dict = dict() # Granule download link as key + +vnp_dates = [] # List of granule dates, which is passed to ConcatDim later on + +for i in range(len(downloadable_urls)): + + # Update broken links from the CMR for this dataset + href_orig = downloadable_urls[i] + href_new = href_orig.replace('https://ladsweb.modaps.eosdis.nasa.gov/archive/', + 'https://ladsweb.modaps.eosdis.nasa.gov/opendap/RemoteResources/laads/' + )+'.nc4' + + # Convert julian date string to Python date object + year_julian = '-'.join(href_new.split('/')[-3:-1]) + date_href = datetime.datetime.strptime(year_julian, '%Y-%j').date() + + # Save this info into each dictionary and list + info_dict = dict( + i=i, + href=href_new, + date=date_href, + year_julian=year_julian, + ) + vnp_date_dict[date_href] = info_dict + href_date_dict[href_new] = info_dict + vnp_dates.append(date_href) + +print('Earliest date:', min(vnp_dates).strftime('%Y-%m-%d')) +print('Latest date: ', max(vnp_dates).strftime('%Y-%m-%d')) + +def make_full_path(date: datetime.date) -> str: + ''' + For each date, return the URL from the collected dictionary. + ''' + return vnp_date_dict[date]['href'] + + +# Concatenate files along the date dimension (one day per file) +date_concat_dim = ConcatDim('date', vnp_dates, nitems_per_file=1) + +pattern = FilePattern(make_full_path, date_concat_dim) + + +def add_date_dimension(ds : xr.Dataset, filename : str) -> xr.Dataset: + ''' + Expand the dimensions of the input dataset to include a date dimension which references that image's collection date. + ''' + # print('Hello from', filename) + hn = filename # href_new + date_href = href_date_dict[hn]['date'] + date_index = pd.DatetimeIndex([date_href]) + date_da = xr.DataArray( date_index, [('date', date_index)] ) + ds = ds.expand_dims(date=date_da) + return ds + +# Recipe! +recipe = XarrayZarrRecipe(pattern, process_input=add_date_dimension) + +# ------------------------------------------------------------------- +# Only use below for LOCAL testing: + +# import os +# from fsspec.implementations.local import LocalFileSystem +# from pangeo_forge_recipes.storage import FSSpecTarget, StorageConfig, CacheFSSpecTarget + +# cwd = os.getcwd() # Files are saved relative to working directory + +# # Target directory for generated ZARR dataset +# target_fs = LocalFileSystem() +# target_path = os.path.join(cwd, 'my-dataset.zarr') +# target = FSSpecTarget(fs=target_fs, root_path=target_path) + +# # Cache directory for files downloaded from provider +# cache_fs = LocalFileSystem() +# cache_path = os.path.join(cwd, 'cache_dir') +# cache_spec = CacheFSSpecTarget(fs=cache_fs, root_path=cache_path) + +# # Config recipe to use both target ZARR and cache +# recipe.storage_config = StorageConfig(target, cache_spec) + + +# from pangeo_forge_recipes.recipes import setup_logging +# setup_logging() + +# recipe_pruned = recipe.copy_pruned() # Prune to only download 2 files + +# print('Full recipe: ', recipe.file_pattern) +# print('Pruned recipe:', recipe_pruned.file_pattern) + +# run_function = recipe_pruned.to_function() +# run_function() # Run pruned recipe + +# # Attempt opening the resulting dataset +# ds = xr.open_zarr(recipe_pruned.target_mapper, consolidated=True) +# print(ds) From c5bdde786da4c77d28f61868663d10fbce7614be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20M=2E=20Zavala=20Gonz=C3=A1lez?= Date: Fri, 4 Nov 2022 15:24:21 -0400 Subject: [PATCH 2/6] attempt at fixing double-quotes --- recipes/vnp46a2-h11v07/meta.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/recipes/vnp46a2-h11v07/meta.yaml b/recipes/vnp46a2-h11v07/meta.yaml index 02b7fb6dd4..af247a1a98 100644 --- a/recipes/vnp46a2-h11v07/meta.yaml +++ b/recipes/vnp46a2-h11v07/meta.yaml @@ -1,7 +1,7 @@ # Name for dataset. User chosen. title: "VNP46A2 - VIIRS/NPP Gap-Filled Lunar BRDF-Adjusted Nighttime Lights Daily (h11v07)" # Description of dataset. User chosen, roughly 1 sentence in length. -description: " +description: | The second of the two VIIRS DNB-based datasets is a daily moonlight- and atmosphere-corrected Nighttime Lights (NTL) product called VIIRS/NPP Gap-Filled Lunar BRDF-Adjusted Nighttime Lights Daily L3 Global 500m Linear Lat Lon Grid. Known by its short-name, VNP46A2, this product contains seven Science Data Sets (SDS) that include DNB BRDF-Corrected NTL, @@ -9,8 +9,7 @@ Gap-Filled DNB BRDF-Corrected NTL, DNB Lunar Irradiance, Latest High-Quality Ret VNP46A2 products are provided in standard Hierarchical Data Format-Earth Observing System (HDF-EOS5) format. This collection starts from January 19, 2012 and forward. -This ZARR version of the dataset currently only includes the grid file at h11v07, around the Caribbean Sea. -" +This ZARR version of the dataset currently only includes the grid file at h11v07, around the Caribbean Sea. " # Version of pangeo_forge_recipes library that was used pangeo_forge_version: "0.9.2" # The recipes section tells Pangeo Cloud where to find the recipes within your PR. From 173028fe81699e4a5c233620384928e1430659bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20M=2E=20Zavala=20Gonz=C3=A1lez?= Date: Fri, 4 Nov 2022 15:31:32 -0400 Subject: [PATCH 3/6] one more yaml fix --- recipes/vnp46a2-h11v07/meta.yaml | 35 ++++++++++++++++---------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/recipes/vnp46a2-h11v07/meta.yaml b/recipes/vnp46a2-h11v07/meta.yaml index af247a1a98..ecb71df7ed 100644 --- a/recipes/vnp46a2-h11v07/meta.yaml +++ b/recipes/vnp46a2-h11v07/meta.yaml @@ -1,17 +1,18 @@ # Name for dataset. User chosen. -title: "VNP46A2 - VIIRS/NPP Gap-Filled Lunar BRDF-Adjusted Nighttime Lights Daily (h11v07)" +title: 'VNP46A2 - VIIRS/NPP Gap-Filled Lunar BRDF-Adjusted Nighttime Lights Daily (h11v07)' # Description of dataset. User chosen, roughly 1 sentence in length. -description: | -The second of the two VIIRS DNB-based datasets is a daily moonlight- and atmosphere-corrected Nighttime Lights (NTL) product called -VIIRS/NPP Gap-Filled Lunar BRDF-Adjusted Nighttime Lights Daily L3 Global 500m Linear Lat Lon Grid. -Known by its short-name, VNP46A2, this product contains seven Science Data Sets (SDS) that include DNB BRDF-Corrected NTL, -Gap-Filled DNB BRDF-Corrected NTL, DNB Lunar Irradiance, Latest High-Quality Retrieval, Mandatory Quality Flag, Cloud Mask Quality Flag, and Snow Flag. -VNP46A2 products are provided in standard Hierarchical Data Format-Earth Observing System (HDF-EOS5) format. -This collection starts from January 19, 2012 and forward. +description: ' + The second of the two VIIRS DNB-based datasets is a daily moonlight- and atmosphere-corrected Nighttime Lights (NTL) product called + VIIRS/NPP Gap-Filled Lunar BRDF-Adjusted Nighttime Lights Daily L3 Global 500m Linear Lat Lon Grid. + Known by its short-name, VNP46A2, this product contains seven Science Data Sets (SDS) that include DNB BRDF-Corrected NTL, + Gap-Filled DNB BRDF-Corrected NTL, DNB Lunar Irradiance, Latest High-Quality Retrieval, Mandatory Quality Flag, Cloud Mask Quality Flag, and Snow Flag. + VNP46A2 products are provided in standard Hierarchical Data Format-Earth Observing System (HDF-EOS5) format. + This collection starts from January 19, 2012 and forward. -This ZARR version of the dataset currently only includes the grid file at h11v07, around the Caribbean Sea. " -# Version of pangeo_forge_recipes library that was used -pangeo_forge_version: "0.9.2" + This ZARR version of the dataset currently only includes the grid file at h11v07, around the Caribbean Sea. + ' +# Version of pangeo_forge_recipes library that was used +pangeo_forge_version: '0.9.2' # The recipes section tells Pangeo Cloud where to find the recipes within your PR. # Many recipe PRs will have just 1 recipe, in which case this section will look similar to the example below. # If your PR contains multiple recipes, you may add additional elements to the list below. @@ -23,13 +24,13 @@ recipes: # For example, if is given as "recipe", Pangeo Cloud will expect a file named `recipe.py` to exist in your PR. # is the name of the recipe object (i.e. Python class instance) _within_ the specified file. # For example, if you have defined `recipe = XarrayZarrRecipe(...)` within a file named `recipe.py`, then your `object` below would be `"recipe:recipe"` - object: "recipe:recipe" + object: 'recipe:recipe' provenance: # Data provider object. Follow STAC spec. # https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md#provider-object providers: - - name: "NASA LAADS DAAC" - description: "National Aeronautics and Space Administration Level-1 and Atmosphere Archive & Distribution System Distributed Active Archive Center" + - name: 'NASA LAADS DAAC' + description: 'National Aeronautics and Space Administration Level-1 and Atmosphere Archive & Distribution System Distributed Active Archive Center' roles: - host - producer @@ -37,12 +38,12 @@ provenance: url: https://ladsweb.modaps.eosdis.nasa.gov/missions-and-measurements/products/VNP46A2/ # This is a required field for provider. Follow STAC spec # https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md#license - license: "CC-BY-NC-ND-4.0" + license: 'CC-BY-NC-ND-4.0' maintainers: # Information about recipe creator. name and github are required - - name: "José M. Zavala González" + - name: 'José M. Zavala González' github: jzavala-gonzalez # The specific bakery (i.e. cloud infrastructure) that your recipe will run on. # Available bakeries can be found on the Pangeo Forge website https://pangeo-forge.org/dashboard/bakeries bakery: - id: "pangeo-ldeo-nsf-earthcube" \ No newline at end of file + id: 'pangeo-ldeo-nsf-earthcube' From f058d2a1f64f6ae070e5dfd3672c8fa0b68ecba5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20M=2E=20Zavala=20Gonz=C3=A1lez?= Date: Fri, 4 Nov 2022 15:33:41 -0400 Subject: [PATCH 4/6] shorten line length --- recipes/vnp46a2-h11v07/recipe.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/recipes/vnp46a2-h11v07/recipe.py b/recipes/vnp46a2-h11v07/recipe.py index aedb3a42eb..9505adc41a 100644 --- a/recipes/vnp46a2-h11v07/recipe.py +++ b/recipes/vnp46a2-h11v07/recipe.py @@ -83,7 +83,8 @@ def make_full_path(date: datetime.date) -> str: def add_date_dimension(ds : xr.Dataset, filename : str) -> xr.Dataset: ''' - Expand the dimensions of the input dataset to include a date dimension which references that image's collection date. + Expand the dimensions of the input dataset to include a date dimension + which references that image's collection date. ''' # print('Hello from', filename) hn = filename # href_new From 49fbc845d2b32ee99f186c025abcf7dfd3af42d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20M=2E=20Zavala=20Gonz=C3=A1lez?= Date: Fri, 4 Nov 2022 15:46:56 -0400 Subject: [PATCH 5/6] I think pre-commit fixed it this time --- recipes/vnp46a2-h11v07/recipe.py | 41 +++++++++++++++++--------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/recipes/vnp46a2-h11v07/recipe.py b/recipes/vnp46a2-h11v07/recipe.py index 9505adc41a..7d45a224dc 100644 --- a/recipes/vnp46a2-h11v07/recipe.py +++ b/recipes/vnp46a2-h11v07/recipe.py @@ -5,50 +5,51 @@ import datetime -from pangeo_forge_recipes.patterns import FilePattern, ConcatDim -from pangeo_forge_recipes.recipes import XarrayZarrRecipe - -from cmr import GranuleQuery import pandas as pd import xarray as xr +from cmr import GranuleQuery + +from pangeo_forge_recipes.patterns import ConcatDim, FilePattern +from pangeo_forge_recipes.recipes import XarrayZarrRecipe # Query VNP46A2 dataset files at h11v07 api = GranuleQuery() -vnp_api = ( - api.short_name("VNP46A2") - .point(-66, 18) # Cerca de Puerto Rico (h11v07) -) +vnp_api = api.short_name('VNP46A2').point(-66, 18) # Cerca de Puerto Rico (h11v07) granules = vnp_api.get_all() # Extract the link corresponding to each file downloadable_urls = [] for g in granules: - for link in (g['links']): + for link in g['links']: if link['rel'] == 'http://esipfed.org/ns/fedsearch/1.1/data#': # print('adding url: ' + link['href']) if not ('h11v07' in link['href']): - continue # Double-checking we only capture h11v07 + continue # Double-checking we only capture h11v07 downloadable_urls.append(link['href']) break -print(f"{len(downloadable_urls)} urls") +print(f'{len(downloadable_urls)} urls') # Dictionaries containing the same information about each granule, # they just vary what variable you use as key to access them. vnp_date_dict = dict() # Use granule date as key -href_date_dict = dict() # Granule download link as key +href_date_dict = dict() # Granule download link as key -vnp_dates = [] # List of granule dates, which is passed to ConcatDim later on +vnp_dates = [] # List of granule dates, which is passed to ConcatDim later on for i in range(len(downloadable_urls)): # Update broken links from the CMR for this dataset href_orig = downloadable_urls[i] - href_new = href_orig.replace('https://ladsweb.modaps.eosdis.nasa.gov/archive/', - 'https://ladsweb.modaps.eosdis.nasa.gov/opendap/RemoteResources/laads/' - )+'.nc4' + href_new = ( + href_orig.replace( + 'https://ladsweb.modaps.eosdis.nasa.gov/archive/', + 'https://ladsweb.modaps.eosdis.nasa.gov/opendap/RemoteResources/laads/', + ) + + '.nc4' + ) # Convert julian date string to Python date object year_julian = '-'.join(href_new.split('/')[-3:-1]) @@ -68,6 +69,7 @@ print('Earliest date:', min(vnp_dates).strftime('%Y-%m-%d')) print('Latest date: ', max(vnp_dates).strftime('%Y-%m-%d')) + def make_full_path(date: datetime.date) -> str: ''' For each date, return the URL from the collected dictionary. @@ -81,19 +83,20 @@ def make_full_path(date: datetime.date) -> str: pattern = FilePattern(make_full_path, date_concat_dim) -def add_date_dimension(ds : xr.Dataset, filename : str) -> xr.Dataset: +def add_date_dimension(ds: xr.Dataset, filename: str) -> xr.Dataset: ''' Expand the dimensions of the input dataset to include a date dimension which references that image's collection date. ''' # print('Hello from', filename) - hn = filename # href_new + hn = filename # href_new date_href = href_date_dict[hn]['date'] date_index = pd.DatetimeIndex([date_href]) - date_da = xr.DataArray( date_index, [('date', date_index)] ) + date_da = xr.DataArray(date_index, [('date', date_index)]) ds = ds.expand_dims(date=date_da) return ds + # Recipe! recipe = XarrayZarrRecipe(pattern, process_input=add_date_dimension) From ffc4d2b3035e98684133e60accbe0be05c651fe5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20M=2E=20Zavala=20Gonz=C3=A1lez?= Date: Fri, 4 Nov 2022 22:52:30 -0400 Subject: [PATCH 6/6] Add functools partial call for pattern path --- recipes/vnp46a2-h11v07/recipe.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/recipes/vnp46a2-h11v07/recipe.py b/recipes/vnp46a2-h11v07/recipe.py index 7d45a224dc..14bcb6f8a8 100644 --- a/recipes/vnp46a2-h11v07/recipe.py +++ b/recipes/vnp46a2-h11v07/recipe.py @@ -4,6 +4,7 @@ """ import datetime +import functools import pandas as pd import xarray as xr @@ -70,13 +71,16 @@ print('Latest date: ', max(vnp_dates).strftime('%Y-%m-%d')) -def make_full_path(date: datetime.date) -> str: +def make_full_path(date: datetime.date, vnp_date_dict=None) -> str: ''' For each date, return the URL from the collected dictionary. ''' return vnp_date_dict[date]['href'] +make_full_path = functools.partial(make_full_path, vnp_date_dict=vnp_date_dict) + + # Concatenate files along the date dimension (one day per file) date_concat_dim = ConcatDim('date', vnp_dates, nitems_per_file=1) @@ -90,7 +94,9 @@ def add_date_dimension(ds: xr.Dataset, filename: str) -> xr.Dataset: ''' # print('Hello from', filename) hn = filename # href_new - date_href = href_date_dict[hn]['date'] + # date_href = href_date_dict[hn]['date'] + year_julian = '-'.join(hn.split('/')[-3:-1]) + date_href = datetime.datetime.strptime(year_julian, '%Y-%j').date() date_index = pd.DatetimeIndex([date_href]) date_da = xr.DataArray(date_index, [('date', date_index)]) ds = ds.expand_dims(date=date_da)