diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index 943fd3a5..adca286a 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -83,3 +83,5 @@ jobs: } env: GOOGLE_APPLICATION_CREDENTIALS: "${{ steps.auth.outputs.credentials_file_path }}" + EARTHDATA_USERNAME: "${{ secrets.EARTHDATA_USERNAME }}" + EARTHDATA_PASSWORD: "${{ secrets.EARTHDATA_PASSWORD }}" diff --git a/environment.yml b/environment.yml new file mode 100644 index 00000000..8b1cb727 --- /dev/null +++ b/environment.yml @@ -0,0 +1,14 @@ +name: data-management-dev +channels: + - conda-forge + - defaults +dependencies: + - python=3.10 + - fsspec + - xarray + - ipykernel + - requests + - aiohttp + - scipy + - netcdf4 + - h5netcdf diff --git a/feedstock/meta.yaml b/feedstock/meta.yaml index 001f25f1..d0e233fa 100644 --- a/feedstock/meta.yaml +++ b/feedstock/meta.yaml @@ -84,6 +84,18 @@ recipes: - licensor url: https://arxiv.org/abs/2306.08754 license: "Apache-2.0" + - id: MODIS_COSP + object: "modis-cosp:MODIS_COSP" + description: "MCD06COSP_M3_MODIS - MODIS (Aqua/Terra) Cloud Properties " + provenance: + providers: + - name: "NASA" + description: "NASA" + roles: + - producer + - host + url: https://ladsweb.modaps.eosdis.nasa.gov/missions-and-measurements/products/MCD06COSP_M3_MODIS#overview + license: "unknown" maintainers: - name: "Julius Busecke" orcid: 0000-0001-8571-865X diff --git a/feedstock/modis-cosp.py b/feedstock/modis-cosp.py new file mode 100644 index 00000000..952022b2 --- /dev/null +++ b/feedstock/modis-cosp.py @@ -0,0 +1,38 @@ +import os + +import aiohttp +import apache_beam as beam +from pangeo_forge_recipes.patterns import pattern_from_file_sequence +from pangeo_forge_recipes.transforms import OpenURLWithFSSpec, OpenWithXarray, StoreToZarr + +username, password = os.environ['EARTHDATA_USERNAME'], os.environ['EARTHDATA_PASSWORD'] +client_kwargs = { + 'auth': aiohttp.BasicAuth(username, password), + 'trust_env': True, +} + +# the urls are a bit hard to construct, so lets try with a few hardcoded ones +input_urls = [ + 'https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/62/MCD06COSP_M3_MODIS/2023/182/MCD06COSP_M3_MODIS.A2023182.062.2023223000656.nc', + 'https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/62/MCD06COSP_M3_MODIS/2023/213/MCD06COSP_M3_MODIS.A2023213.062.2023254000930.nc', + 'https://ladsweb.modaps.eosdis.nasa.gov/archive/allData/62/MCD06COSP_M3_MODIS/2023/244/MCD06COSP_M3_MODIS.A2023244.062.2023285000449.nc', +] + +# ? the files have no time coordinate, so we need to construct it? +# For now just try to concat along a non-coordinate dimension + +pattern = pattern_from_file_sequence(input_urls, concat_dim='time') + +MODIS_COSP = ( + beam.Create(pattern.items()) + | OpenURLWithFSSpec( + open_kwargs={'block_size': 0, 'client_kwargs': client_kwargs}, + max_concurrency=10, + ) + | OpenWithXarray() + | StoreToZarr( + target_chunks={'time': 3}, + store_name='MODIS_COSP.zarr', + combine_dims=pattern.combine_dim_keys, + ) +) diff --git a/feedstock/modis-cosp/README.md b/feedstock/modis-cosp/README.md new file mode 100644 index 00000000..4af7493a --- /dev/null +++ b/feedstock/modis-cosp/README.md @@ -0,0 +1,6 @@ +- We need to get an earthdata login and store it as repo secret +- We are taking https://github.com/pangeo-forge/aqua-modis-feedstock as example: + - Deploy action setup: https://github.com/pangeo-forge/aqua-modis-feedstock/blob/main/.github/workflows/deploy.yaml + - Alternative auth: https://github.com/yuvipanda/pangeo-forge-earthdatalogin/blob/main/pangeo_forge_earthdatalogin/__init__.py + +> New PGF-docs: https://pangeo-forge.readthedocs.io/en/latest/composition/index.html diff --git a/feedstock/modis-cosp/meta.yaml b/feedstock/modis-cosp/meta.yaml new file mode 100644 index 00000000..e69de29b diff --git a/feedstock/modis-cosp/requirements.txt b/feedstock/modis-cosp/requirements.txt new file mode 100644 index 00000000..e69de29b