diff --git a/.binder/conda-lock.yml b/.binder/conda-lock.yml
index 3805a2f2..594f4785 100644
--- a/.binder/conda-lock.yml
+++ b/.binder/conda-lock.yml
@@ -15,9 +15,9 @@ metadata:
- url: conda-forge
used_env_vars: []
content_hash:
- linux-64: a9096c0ec9e0da28a616a7cd5195dab1031d21fb745df69a95feb6a283140daa
- osx-64: 89c1c35d776c0f1b3d62e1fd19c5976432944021d6ac9f353375f840c488f237
- win-64: 2562905301719ce1a1031bd3e8be2439edb55ff28fb5ae2de57cad0279b6ba28
+ linux-64: a06ab1f6b457a8e01ffc7a0c8389db5666da7a52528bfea3171b297122101e98
+ osx-64: ab65fbc2b501e3e5c402a28483b0fb11501ec06cae542e31b8d7179a96cc0e48
+ win-64: 1d8ac907bf70156023b42600406482d8e59b586e2b3c87dabd321f2b1c0348d3
platforms:
- linux-64
- osx-64
@@ -2079,6 +2079,18 @@ package:
platform: linux-64
url: https://conda.anaconda.org/conda-forge/linux-64/antlr-python-runtime-4.7.2-py311h38be061_1003.tar.bz2
version: 4.7.2
+- category: main
+ dependencies:
+ python: ''
+ hash:
+ md5: c0481c9de49f040272556e2cedf42816
+ sha256: b3e9369529fe7d721b66f18680ff4b561e20dbf6507e209e1f60eac277c97560
+ manager: conda
+ name: asciitree
+ optional: false
+ platform: linux-64
+ url: https://conda.anaconda.org/conda-forge/noarch/asciitree-0.3.3-py_2.tar.bz2
+ version: 0.3.3
- category: main
dependencies:
python: '>2.7'
@@ -2417,6 +2429,18 @@ package:
platform: linux-64
url: https://conda.anaconda.org/conda-forge/noarch/executing-1.2.0-pyhd8ed1ab_0.tar.bz2
version: 1.2.0
+- category: main
+ dependencies:
+ python: '>=3.6'
+ hash:
+ md5: 348e27e78a5e39090031448c72f66d5e
+ sha256: 42be6ac8478051b26751d778490d6a71de12e5c6443e145ff3eddbc577d9bcda
+ manager: conda
+ name: fasteners
+ optional: false
+ platform: linux-64
+ url: https://conda.anaconda.org/conda-forge/noarch/fasteners-0.17.3-pyhd8ed1ab_0.tar.bz2
+ version: 0.17.3
- category: main
dependencies:
python: '>=3.7'
@@ -2895,6 +2919,21 @@ package:
platform: linux-64
url: https://conda.anaconda.org/conda-forge/noarch/mpmath-1.3.0-pyhd8ed1ab_0.conda
version: 1.3.0
+- category: main
+ dependencies:
+ libgcc-ng: '>=12'
+ libstdcxx-ng: '>=12'
+ python: '>=3.11,<3.12.0a0'
+ python_abi: 3.11.* *_cp311
+ hash:
+ md5: 7415f24f8c44e44152623d93c5015000
+ sha256: 9dea30d75b5eb31dac447aee56bf0648b5d58438a686123a2e085a166ed69900
+ manager: conda
+ name: msgpack-python
+ optional: false
+ platform: linux-64
+ url: https://conda.anaconda.org/conda-forge/linux-64/msgpack-python-1.0.5-py311ha3edf6b_0.conda
+ version: 1.0.5
- category: main
dependencies:
python: ''
@@ -3251,14 +3290,14 @@ package:
dependencies:
python: '>=3.6'
hash:
- md5: b8afba5fbf891019eae564c3edb28b9e
- sha256: 904c98c6bb45302f6349656d5e2f2743677da4254ac76be660475ecdd0fd0c6a
+ md5: de631f19ba156d224d80241e3fc7d32f
+ sha256: c0781a1aacc2227379c9614852bf92b967a0e8b52f66c04b5723e0b7b2bd4f1e
manager: conda
name: python-tzdata
optional: false
platform: linux-64
- url: https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2022.7-pyhd8ed1ab_0.conda
- version: '2022.7'
+ url: https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2023.2-pyhd8ed1ab_0.conda
+ version: '2023.2'
- category: main
dependencies:
libgcc-ng: '>=12'
@@ -4544,6 +4583,24 @@ package:
platform: linux-64
url: https://conda.anaconda.org/conda-forge/linux-64/numba-0.56.3-py310ha5257ce_0.tar.bz2
version: 0.56.3
+- category: main
+ dependencies:
+ entrypoints: ''
+ libgcc-ng: '>=12'
+ libstdcxx-ng: '>=12'
+ msgpack-python: ''
+ numpy: '>=1.7'
+ python: '>=3.11,<3.12.0a0'
+ python_abi: 3.11.* *_cp311
+ hash:
+ md5: ecdaf0772e524ed51218f6d52ef74424
+ sha256: 0837de92c8251f2ce6940bcb404f6cb8a2bf5f6cfbc1f65d1c823ef223bcc6ca
+ manager: conda
+ name: numcodecs
+ optional: false
+ platform: linux-64
+ url: https://conda.anaconda.org/conda-forge/linux-64/numcodecs-0.11.0-py311hcafe171_1.conda
+ version: 0.11.0
- category: main
dependencies:
attrs: '>=19.2.0'
@@ -5283,6 +5340,19 @@ package:
platform: linux-64
url: https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-6.0.0-h8e241bc_0.conda
version: 6.0.0
+- category: main
+ dependencies:
+ importlib-metadata: ''
+ python: '>=3.7'
+ hash:
+ md5: b2ec66de3480db2f5124f547cad7e7a4
+ sha256: 0569bf2c7ad1f647b4f4b83dd1152df093585ff9a2ba38af98d7a2a348b2b74a
+ manager: conda
+ name: humanize
+ optional: false
+ platform: linux-64
+ url: https://conda.anaconda.org/conda-forge/noarch/humanize-4.6.0-pyhd8ed1ab_0.conda
+ version: 4.6.0
- category: main
dependencies:
importlib-metadata: '>=6.1.0,<6.1.1.0a0'
@@ -5740,6 +5810,22 @@ package:
platform: linux-64
url: https://conda.anaconda.org/conda-forge/noarch/wsproto-1.2.0-pyhd8ed1ab_0.tar.bz2
version: 1.2.0
+- category: main
+ dependencies:
+ asciitree: ''
+ fasteners: ''
+ numcodecs: '>=0.10.0'
+ numpy: '>=1.7'
+ python: '>=3.5'
+ hash:
+ md5: 0c5776fe65a12a421d7ddf90411a6c3f
+ sha256: 0f029f7efea00b8258782b5e68989fc140c227e6d9edd231d46fdd954b39d23f
+ manager: conda
+ name: zarr
+ optional: false
+ platform: linux-64
+ url: https://conda.anaconda.org/conda-forge/noarch/zarr-2.14.2-pyhd8ed1ab_0.conda
+ version: 2.14.2
- category: main
dependencies:
argon2-cffi-bindings: ''
@@ -9768,7 +9854,6 @@ package:
- category: main
dependencies:
joblib: '>=1.1.1'
- libcblas: '>=3.9.0,<4.0a0'
libgcc-ng: '>=12'
libstdcxx-ng: '>=12'
numpy: '>=1.23.5,<2.0a0'
@@ -9777,13 +9862,13 @@ package:
scipy: ''
threadpoolctl: '>=2.0.0'
hash:
- md5: c928bd2c1f3dfe6e5c89bbc5f03af284
- sha256: 344b84f89a2d537c250d4eff98ee85c4bfbfb0227174448c4e03bbf4e6126345
+ md5: 1a69529b0bcf0e3a03e6585903659df7
+ sha256: 7bf78d2c375b53e3a61fcd70847b12d2152a2699e43224ba9817badb199a62f1
manager: conda
name: scikit-learn
optional: false
platform: linux-64
- url: https://conda.anaconda.org/conda-forge/linux-64/scikit-learn-1.2.2-py311h67c5ca5_0.conda
+ url: https://conda.anaconda.org/conda-forge/linux-64/scikit-learn-1.2.2-py311h103fc68_1.conda
version: 1.2.2
- category: main
dependencies:
@@ -12665,6 +12750,18 @@ package:
platform: osx-64
url: https://conda.anaconda.org/conda-forge/noarch/appnope-0.1.3-pyhd8ed1ab_0.tar.bz2
version: 0.1.3
+- category: main
+ dependencies:
+ python: ''
+ hash:
+ md5: c0481c9de49f040272556e2cedf42816
+ sha256: b3e9369529fe7d721b66f18680ff4b561e20dbf6507e209e1f60eac277c97560
+ manager: conda
+ name: asciitree
+ optional: false
+ platform: osx-64
+ url: https://conda.anaconda.org/conda-forge/noarch/asciitree-0.3.3-py_2.tar.bz2
+ version: 0.3.3
- category: main
dependencies:
python: '>2.7'
@@ -13013,6 +13110,18 @@ package:
platform: osx-64
url: https://conda.anaconda.org/conda-forge/noarch/executing-1.2.0-pyhd8ed1ab_0.tar.bz2
version: 1.2.0
+- category: main
+ dependencies:
+ python: '>=3.6'
+ hash:
+ md5: 348e27e78a5e39090031448c72f66d5e
+ sha256: 42be6ac8478051b26751d778490d6a71de12e5c6443e145ff3eddbc577d9bcda
+ manager: conda
+ name: fasteners
+ optional: false
+ platform: osx-64
+ url: https://conda.anaconda.org/conda-forge/noarch/fasteners-0.17.3-pyhd8ed1ab_0.tar.bz2
+ version: 0.17.3
- category: main
dependencies:
python: '>=3.7'
@@ -13407,6 +13516,20 @@ package:
platform: osx-64
url: https://conda.anaconda.org/conda-forge/noarch/mpmath-1.3.0-pyhd8ed1ab_0.conda
version: 1.3.0
+- category: main
+ dependencies:
+ libcxx: '>=14.0.6'
+ python: '>=3.11,<3.12.0a0'
+ python_abi: 3.11.* *_cp311
+ hash:
+ md5: d3a60c5422b7d61b2740c7c5df508c86
+ sha256: ebe4d269e0605e7de3b9a9199e1e20d96c66945ac67fe1fccf778177d1a615a7
+ manager: conda
+ name: msgpack-python
+ optional: false
+ platform: osx-64
+ url: https://conda.anaconda.org/conda-forge/osx-64/msgpack-python-1.0.5-py311hd2070f0_0.conda
+ version: 1.0.5
- category: main
dependencies:
python: ''
@@ -13758,14 +13881,14 @@ package:
dependencies:
python: '>=3.6'
hash:
- md5: b8afba5fbf891019eae564c3edb28b9e
- sha256: 904c98c6bb45302f6349656d5e2f2743677da4254ac76be660475ecdd0fd0c6a
+ md5: de631f19ba156d224d80241e3fc7d32f
+ sha256: c0781a1aacc2227379c9614852bf92b967a0e8b52f66c04b5723e0b7b2bd4f1e
manager: conda
name: python-tzdata
optional: false
platform: osx-64
- url: https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2022.7-pyhd8ed1ab_0.conda
- version: '2022.7'
+ url: https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2023.2-pyhd8ed1ab_0.conda
+ version: '2023.2'
- category: main
dependencies:
python: '>=3.11,<3.12.0a0'
@@ -15636,6 +15759,19 @@ package:
platform: osx-64
url: https://conda.anaconda.org/conda-forge/osx-64/harfbuzz-6.0.0-h08f8713_0.conda
version: 6.0.0
+- category: main
+ dependencies:
+ importlib-metadata: ''
+ python: '>=3.7'
+ hash:
+ md5: b2ec66de3480db2f5124f547cad7e7a4
+ sha256: 0569bf2c7ad1f647b4f4b83dd1152df093585ff9a2ba38af98d7a2a348b2b74a
+ manager: conda
+ name: humanize
+ optional: false
+ platform: osx-64
+ url: https://conda.anaconda.org/conda-forge/noarch/humanize-4.6.0-pyhd8ed1ab_0.conda
+ version: 4.6.0
- category: main
dependencies:
importlib-metadata: '>=6.1.0,<6.1.1.0a0'
@@ -16522,6 +16658,23 @@ package:
platform: osx-64
url: https://conda.anaconda.org/conda-forge/osx-64/numba-0.56.3-py310h62db5c2_0.tar.bz2
version: 0.56.3
+- category: main
+ dependencies:
+ entrypoints: ''
+ libcxx: '>=14.0.6'
+ msgpack-python: ''
+ numpy: '>=1.7'
+ python: '>=3.11,<3.12.0a0'
+ python_abi: 3.11.* *_cp311
+ hash:
+ md5: 21f2ae35161c19b8c4ad0791d12ef2a3
+ sha256: 27c7eb0a5f6b3726252c5331b6101d492b8280af0a041b1c8b5fcd45ee56268d
+ manager: conda
+ name: numcodecs
+ optional: false
+ platform: osx-64
+ url: https://conda.anaconda.org/conda-forge/osx-64/numcodecs-0.11.0-py311h814d153_1.conda
+ version: 0.11.0
- category: main
dependencies:
libcxx: '>=14.0.6'
@@ -17043,6 +17196,22 @@ package:
platform: osx-64
url: https://conda.anaconda.org/conda-forge/noarch/xarray-2023.3.0-pyhd8ed1ab_0.conda
version: 2023.3.0
+- category: main
+ dependencies:
+ asciitree: ''
+ fasteners: ''
+ numcodecs: '>=0.10.0'
+ numpy: '>=1.7'
+ python: '>=3.5'
+ hash:
+ md5: 0c5776fe65a12a421d7ddf90411a6c3f
+ sha256: 0f029f7efea00b8258782b5e68989fc140c227e6d9edd231d46fdd954b39d23f
+ manager: conda
+ name: zarr
+ optional: false
+ platform: osx-64
+ url: https://conda.anaconda.org/conda-forge/noarch/zarr-2.14.2-pyhd8ed1ab_0.conda
+ version: 2.14.2
- category: main
dependencies:
python: '>=3.8'
@@ -18165,7 +18334,6 @@ package:
- category: main
dependencies:
joblib: '>=1.1.1'
- libcblas: '>=3.9.0,<4.0a0'
libcxx: '>=14.0.6'
llvm-openmp: '>=14.0.6'
numpy: '>=1.23.5,<2.0a0'
@@ -18174,13 +18342,13 @@ package:
scipy: ''
threadpoolctl: '>=2.0.0'
hash:
- md5: 7eedaa9f41426c4208d97ebea14b59b7
- sha256: 6765710a393afdde4e96d75047d81ca5276dc6c1b3a0390ee78c9a829c054b33
+ md5: e2dd2bd2dcf23b11d5af2d6df01904a6
+ sha256: 5595daa14a0d93752eef7b266b0bdf0a8c1b12c1260c6f1e6862c52ab030772b
manager: conda
name: scikit-learn
optional: false
platform: osx-64
- url: https://conda.anaconda.org/conda-forge/osx-64/scikit-learn-1.2.2-py311h087fafe_0.conda
+ url: https://conda.anaconda.org/conda-forge/osx-64/scikit-learn-1.2.2-py311hda7f639_1.conda
version: 1.2.2
- category: main
dependencies:
@@ -22877,6 +23045,18 @@ package:
platform: win-64
url: https://conda.anaconda.org/conda-forge/win-64/antlr-python-runtime-4.7.2-py311h1ea47a8_1003.tar.bz2
version: 4.7.2
+- category: main
+ dependencies:
+ python: ''
+ hash:
+ md5: c0481c9de49f040272556e2cedf42816
+ sha256: b3e9369529fe7d721b66f18680ff4b561e20dbf6507e209e1f60eac277c97560
+ manager: conda
+ name: asciitree
+ optional: false
+ platform: win-64
+ url: https://conda.anaconda.org/conda-forge/noarch/asciitree-0.3.3-py_2.tar.bz2
+ version: 0.3.3
- category: main
dependencies:
python: '>2.7'
@@ -23225,6 +23405,18 @@ package:
platform: win-64
url: https://conda.anaconda.org/conda-forge/noarch/executing-1.2.0-pyhd8ed1ab_0.tar.bz2
version: 1.2.0
+- category: main
+ dependencies:
+ python: '>=3.6'
+ hash:
+ md5: 348e27e78a5e39090031448c72f66d5e
+ sha256: 42be6ac8478051b26751d778490d6a71de12e5c6443e145ff3eddbc577d9bcda
+ manager: conda
+ name: fasteners
+ optional: false
+ platform: win-64
+ url: https://conda.anaconda.org/conda-forge/noarch/fasteners-0.17.3-pyhd8ed1ab_0.tar.bz2
+ version: 0.17.3
- category: main
dependencies:
python: '>=3.7'
@@ -23712,6 +23904,22 @@ package:
platform: win-64
url: https://conda.anaconda.org/conda-forge/noarch/mpmath-1.3.0-pyhd8ed1ab_0.conda
version: 1.3.0
+- category: main
+ dependencies:
+ python: '>=3.11,<3.12.0a0'
+ python_abi: 3.11.* *_cp311
+ ucrt: '>=10.0.20348.0'
+ vc: '>=14.2,<15'
+ vs2015_runtime: '>=14.29.30139'
+ hash:
+ md5: 01a252f384a5d1ad338cff1184d9a9c0
+ sha256: 0df20f87b92ad3e86f4ed468d753901bf2da4e4661e395e8dbc5ee4f652ca9cc
+ manager: conda
+ name: msgpack-python
+ optional: false
+ platform: win-64
+ url: https://conda.anaconda.org/conda-forge/win-64/msgpack-python-1.0.5-py311h005e61a_0.conda
+ version: 1.0.5
- category: main
dependencies:
python: ''
@@ -24024,14 +24232,14 @@ package:
dependencies:
python: '>=3.6'
hash:
- md5: b8afba5fbf891019eae564c3edb28b9e
- sha256: 904c98c6bb45302f6349656d5e2f2743677da4254ac76be660475ecdd0fd0c6a
+ md5: de631f19ba156d224d80241e3fc7d32f
+ sha256: c0781a1aacc2227379c9614852bf92b967a0e8b52f66c04b5723e0b7b2bd4f1e
manager: conda
name: python-tzdata
optional: false
platform: win-64
- url: https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2022.7-pyhd8ed1ab_0.conda
- version: '2022.7'
+ url: https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2023.2-pyhd8ed1ab_0.conda
+ version: '2023.2'
- category: main
dependencies:
python: '>=3.11,<3.12.0a0'
@@ -26021,6 +26229,19 @@ package:
platform: win-64
url: https://conda.anaconda.org/conda-forge/win-64/glib-2.74.1-h12be248_1.tar.bz2
version: 2.74.1
+- category: main
+ dependencies:
+ importlib-metadata: ''
+ python: '>=3.7'
+ hash:
+ md5: b2ec66de3480db2f5124f547cad7e7a4
+ sha256: 0569bf2c7ad1f647b4f4b83dd1152df093585ff9a2ba38af98d7a2a348b2b74a
+ manager: conda
+ name: humanize
+ optional: false
+ platform: win-64
+ url: https://conda.anaconda.org/conda-forge/noarch/humanize-4.6.0-pyhd8ed1ab_0.conda
+ version: 4.6.0
- category: main
dependencies:
importlib-metadata: '>=6.1.0,<6.1.1.0a0'
@@ -27342,6 +27563,25 @@ package:
platform: win-64
url: https://conda.anaconda.org/conda-forge/win-64/numba-0.56.3-py310h19bcfe9_0.tar.bz2
version: 0.56.3
+- category: main
+ dependencies:
+ entrypoints: ''
+ msgpack-python: ''
+ numpy: '>=1.7'
+ python: '>=3.11,<3.12.0a0'
+ python_abi: 3.11.* *_cp311
+ ucrt: '>=10.0.20348.0'
+ vc: '>=14.2,<15'
+ vs2015_runtime: '>=14.29.30139'
+ hash:
+ md5: 9b66ec550742f950ba5fdc1fe650ce26
+ sha256: 07dc9ec52ddb18a70f8ada65a9be9a982a427b6bcd479725659cc0d34d9e8fa0
+ manager: conda
+ name: numcodecs
+ optional: false
+ platform: win-64
+ url: https://conda.anaconda.org/conda-forge/win-64/numcodecs-0.11.0-py311h12c1d0e_1.conda
+ version: 0.11.0
- category: main
dependencies:
dataclasses: ''
@@ -29471,6 +29711,22 @@ package:
platform: win-64
url: https://conda.anaconda.org/conda-forge/noarch/xarray-2023.3.0-pyhd8ed1ab_0.conda
version: 2023.3.0
+- category: main
+ dependencies:
+ asciitree: ''
+ fasteners: ''
+ numcodecs: '>=0.10.0'
+ numpy: '>=1.7'
+ python: '>=3.5'
+ hash:
+ md5: 0c5776fe65a12a421d7ddf90411a6c3f
+ sha256: 0f029f7efea00b8258782b5e68989fc140c227e6d9edd231d46fdd954b39d23f
+ manager: conda
+ name: zarr
+ optional: false
+ platform: win-64
+ url: https://conda.anaconda.org/conda-forge/noarch/zarr-2.14.2-pyhd8ed1ab_0.conda
+ version: 2.14.2
- category: main
dependencies:
geos: '>=3.11.2,<3.11.3.0a0'
@@ -30241,7 +30497,6 @@ package:
- category: main
dependencies:
joblib: '>=1.1.1'
- libcblas: '>=3.9.0,<4.0a0'
numpy: '>=1.23.5,<2.0a0'
python: '>=3.11,<3.12.0a0'
python_abi: 3.11.* *_cp311
@@ -30251,13 +30506,13 @@ package:
vc: '>=14.2,<15'
vs2015_runtime: '>=14.29.30139'
hash:
- md5: 05b85818eef8dd80bfd854b9bde11e09
- sha256: 0f7f139c9218348c0ab53057e0f6a78da240a911ebef0e9e21af3bdecd29f39b
+ md5: ce1dbe81f1199a0e2719c9876715f7d4
+ sha256: d362742096303d8d4eb0f052d53327074e9bbbae6ad297f7bde7a92c16f238ea
manager: conda
name: scikit-learn
optional: false
platform: win-64
- url: https://conda.anaconda.org/conda-forge/win-64/scikit-learn-1.2.2-py311h6619ee7_0.conda
+ url: https://conda.anaconda.org/conda-forge/win-64/scikit-learn-1.2.2-py311h142b183_1.conda
version: 1.2.2
- category: main
dependencies:
diff --git a/.binder/environment-python_and_r.yml b/.binder/environment-python_and_r.yml
index 01e961c3..0dd44b2c 100644
--- a/.binder/environment-python_and_r.yml
+++ b/.binder/environment-python_and_r.yml
@@ -23,6 +23,7 @@ dependencies:
- geopandas
- geoplot
- gridgeo
+ - humanize
- hvplot
- ioos_qc
- ipyleaflet
@@ -63,6 +64,7 @@ dependencies:
- xarray
- xlrd
- xmltodict
+ - zarr
# R packages.
- rpy2
- r-base=4
diff --git a/.binder/environment.yml b/.binder/environment.yml
index 3723d6d5..f6ba20cb 100644
--- a/.binder/environment.yml
+++ b/.binder/environment.yml
@@ -22,6 +22,7 @@ dependencies:
- geopandas
- geoplot
- gridgeo
+ - humanize
- hvplot
- ioos_qc
- ipyleaflet
@@ -62,3 +63,4 @@ dependencies:
- xarray
- xlrd
- xmltodict
+ - zarr
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 3ab1c1fb..b0a06279 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -35,12 +35,12 @@ repos:
- id: add-trailing-comma
- repo: https://github.com/charliermarsh/ruff-pre-commit
- rev: v0.0.254
+ rev: v0.0.257
hooks:
- id: ruff
- repo: https://github.com/nbQA-dev/nbQA
- rev: 1.6.3
+ rev: 1.6.4
hooks:
# mdformat works on the CLI but not as pre-commit yet.
# Use `nbqa mdformat jupyterbook --nbqa-md` to run it locally.
diff --git a/jupyterbook/content/code_gallery/data_management_notebooks/2023-03-20-Reading_and_writing_zarr.ipynb b/jupyterbook/content/code_gallery/data_management_notebooks/2023-03-20-Reading_and_writing_zarr.ipynb
new file mode 100644
index 00000000..76be410b
--- /dev/null
+++ b/jupyterbook/content/code_gallery/data_management_notebooks/2023-03-20-Reading_and_writing_zarr.ipynb
@@ -0,0 +1,1681 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "29370978",
+ "metadata": {},
+ "source": [
+ "# Reading and writting zarr files with xarray\n",
+ "\n",
+ "\n",
+ "Created: 2023-03-20\n",
+ "\n",
+ "\n",
+ "\n",
+ "[The zarr format](https://zarr.readthedocs.io/en/stable/) is a file storage based specification for chunked, compressed, N-dimensional arrays. The format is based on an open-source specification and its main goal is to make cloud data read/write a bit easier and more effective.\n",
+ "\n",
+ "The main propblems in data storage are:\n",
+ "\n",
+ "1. Read/write data that is larger than memory\n",
+ "1. Being able to parallelize computations\n",
+ "1. Reduce the I/O botteneck\n",
+ "1. Compression\n",
+ "1. Speed\n",
+ "\n",
+ "One solution is to use a chunked\\* parallel computing framework and a chunked parallel storage library. Zarr helps us with the latter.\n",
+ "\n",
+ "In this example we will load an ocean model data, stored as netCDF and served via THREDDS, subset it and save as zarr. Let's start by saving a single time step for the surface layer temperature and salinity.\n",
+ "\n",
+ "\n",
+ "\\* Many data formats can take advantage of storing the data in chunks for faster access, the zarr approach is different in that each chunk is a different object in cloud storage, making them better for parallel access. The chunks can be compressed to reduce their size and improve cloud performance even further. Zarr has a nice tutorial on how to balance chunk size for performance. Check it out: https://zarr.readthedocs.io/en/stable/tutorial.html#chunk-optimizations."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "2961833f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "
<xarray.Dataset>\n",
+ "Dimensions: (time: 1, eta_rho: 106, xi_rho: 242)\n",
+ "Coordinates:\n",
+ " s_rho float64 -0.0125\n",
+ " lon_rho (eta_rho, xi_rho) float64 ...\n",
+ " lat_rho (eta_rho, xi_rho) float64 ...\n",
+ " * time (time) datetime64[ns] 2022-06-06T12:00:00\n",
+ " time_run (time) datetime64[ns] ...\n",
+ "Dimensions without coordinates: eta_rho, xi_rho\n",
+ "Data variables:\n",
+ " temp (time, eta_rho, xi_rho) float64 ...\n",
+ " salt (time, eta_rho, xi_rho) float64 ...\n",
+ "Attributes: (12/46)\n",
+ " file: doppio_avg_6280_0004.nc\n",
+ " format: netCDF-4/HDF5 file\n",
+ " Conventions: CF-1.4, SGRID-0.3\n",
+ " type: ROMS/TOMS nonlinear model averages file\n",
+ " title: ROMS doppio Real-Time Operational PSAS F...\n",
+ " var_info: ../Data/varinfo1040t_daily.dat\n",
+ " ... ...\n",
+ " his_base: doppio_his_6280\n",
+ " cdm_data_type: GRID\n",
+ " featureType: GRID\n",
+ " location: Proto fmrc:doppio_2017_da_avg\n",
+ " summary: doppio\n",
+ " DODS_EXTRA.Unlimited_Dimension: ocean_time Dimensions: time : 1eta_rho : 106xi_rho : 242
Coordinates: (5)
Data variables: (2)
Indexes: (1)
Attributes: (46)
file : doppio_avg_6280_0004.nc format : netCDF-4/HDF5 file Conventions : CF-1.4, SGRID-0.3 type : ROMS/TOMS nonlinear model averages file title : ROMS doppio Real-Time Operational PSAS Forecast System Version 1 FMRC Averages var_info : ../Data/varinfo1040t_daily.dat rst_file : tmp_doppio_rst_6280.nc avg_base : doppio_avg_6280 flt_file : doppio_flt_6280.nc grd_file : /home/om/roms/doppio/7km/grid_doppio_JJA_v13.nc ini_file : doppio_rst_6280.nc tide_file : /home/om/roms/doppio/7km/doppio_tide_7km.nc frc_file_01 : ../Data/lwrad_down_ncepnam_3hourly_MAB_and_GoM.nc frc_file_02 : ../Data/Pair_ncepnam_3hourly_MAB_and_GoM.nc frc_file_03 : ../Data/Qair_ncepnam_3hourly_MAB_and_GoM.nc frc_file_04 : ../Data/rain_ncepnam_3hourly_MAB_and_GoM.nc frc_file_05 : ../Data/swrad_daily_ncepnam_3hourly_MAB_and_GoM.nc frc_file_06 : ../Data/Tair_ncepnam_3hourly_MAB_and_GoM.nc frc_file_07 : ../Data/Winds_ncepnam_3hourly_MAB_and_GoM.nc clm_file_01 : ../Data/doppio_clm.nc nud_file : /home/om/roms/doppio/7km/doppio_nudgcoef_7km_1500-2000_GS.nc script_file : nl_ocean_doppio.in fpos_file : floats.in NLM_TADV : \n",
+ "ADVECTION: HORIZONTAL VERTICAL \n",
+ "temp: Akima4 Akima4 \n",
+ "salt: Akima4 Akima4 NLM_LBC : \n",
+ "EDGE: WEST SOUTH EAST NORTH \n",
+ "zeta: Cha Cha Cha Clo \n",
+ "ubar: Fla Fla Fla Clo \n",
+ "vbar: Fla Fla Fla Clo \n",
+ "u: RadNud RadNud RadNud Clo \n",
+ "v: RadNud RadNud RadNud Clo \n",
+ "temp: Rad Rad Rad Clo \n",
+ "salt: Rad Rad Rad Clo \n",
+ "tke: Gra Gra Gra Clo svn_url : https://www.myroms.org/svn/src/trunk svn_rev : 1040 code_dir : /home/julia/ROMS/doppio/svn1040t header_dir : /home/julia/ROMS/doppio/Compile/fwd header_file : doppio.h os : Linux cpu : x86_64 compiler_system : ifort compiler_command : /opt/sw/apps/intel-18.0.1/openmpi/3.1.2/bin/mpif90 compiler_flags : -fp-model precise -heap-arrays -ip -O3 -traceback -check uninit tiling : 004x004 history : ROMS/TOMS, Version 3.9, Thursday - March 16, 2023 - 4:32:53 AM ;\n",
+ "FMRC Best Dataset ana_file : ROMS/Functionals/ana_btflux.h, ROMS/Functionals/ana_srflux.h CPP_options : DOPPIO, ADD_FSOBC, ADD_M2OBC, ANA_BSFLUX, ANA_BTFLUX, ASSUMED_SHAPE, ATM_PRESS, AVERAGES, !BOUNDARY_A BULK_FLUXES, !COLLECT_ALL..., CHARNOK, CRAIG_BANNER, CURVGRID, DEFLATE, DIURNAL_SRFLUX, DJ_GRADPS, DOUBLE_PRECISION, EMINUSP, FLOATS, FORWARD_WRITE, GLS_MIXING, HDF5, KANTHA_CLAYSON, LONGWAVE_OUT, MASKING, MIX_GEO_TS, MIX_S_UV, MPI, NONLINEAR, NONLIN_EOS, NO_LBC_ATT, N2S2_HORAVG, OUT_DOUBLE, POWER_LAW, PROFILE, K_GSCHEME, REDUCE_ALLREDUCE, !RST_SINGLE, SALINITY, SOLAR_SOURCE, SOLVE3D, SSH_TIDES, TS_DIF2, UV_ADV, UV_COR, UV_U3HADVECTION, UV_C4VADVECTION, UV_QDRAG, UV_TIDES, UV_VIS2, VAR_RHO_2D, WIND_MINUS_CURRENT _CoordSysBuilder : ucar.nc2.dataset.conv.CF1Convention his_base : doppio_his_6280 cdm_data_type : GRID featureType : GRID location : Proto fmrc:doppio_2017_da_avg summary : doppio DODS_EXTRA.Unlimited_Dimension : ocean_time "
+ ],
+ "text/plain": [
+ "\n",
+ "Dimensions: (time: 1, eta_rho: 106, xi_rho: 242)\n",
+ "Coordinates:\n",
+ " s_rho float64 -0.0125\n",
+ " lon_rho (eta_rho, xi_rho) float64 ...\n",
+ " lat_rho (eta_rho, xi_rho) float64 ...\n",
+ " * time (time) datetime64[ns] 2022-06-06T12:00:00\n",
+ " time_run (time) datetime64[ns] ...\n",
+ "Dimensions without coordinates: eta_rho, xi_rho\n",
+ "Data variables:\n",
+ " temp (time, eta_rho, xi_rho) float64 ...\n",
+ " salt (time, eta_rho, xi_rho) float64 ...\n",
+ "Attributes: (12/46)\n",
+ " file: doppio_avg_6280_0004.nc\n",
+ " format: netCDF-4/HDF5 file\n",
+ " Conventions: CF-1.4, SGRID-0.3\n",
+ " type: ROMS/TOMS nonlinear model averages file\n",
+ " title: ROMS doppio Real-Time Operational PSAS F...\n",
+ " var_info: ../Data/varinfo1040t_daily.dat\n",
+ " ... ...\n",
+ " his_base: doppio_his_6280\n",
+ " cdm_data_type: GRID\n",
+ " featureType: GRID\n",
+ " location: Proto fmrc:doppio_2017_da_avg\n",
+ " summary: doppio\n",
+ " DODS_EXTRA.Unlimited_Dimension: ocean_time"
+ ]
+ },
+ "execution_count": 1,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import xarray as xr\n",
+ "\n",
+ "url = (\n",
+ " \"https://tds.marine.rutgers.edu/thredds/dodsC/roms/doppio/2017_da/avg/Averages_Best\"\n",
+ ")\n",
+ "\n",
+ "ds = xr.open_dataset(url)\n",
+ "\n",
+ "time_slice = {\"time\": \"2022-06-06\"}\n",
+ "surface = {\"s_rho\": -1}\n",
+ "\n",
+ "ds = ds[[\"temp\", \"salt\"]].sel(time_slice).isel(surface)\n",
+ "\n",
+ "ds"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "d8fa04f9",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'820.9 kB'"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import humanize\n",
+ "\n",
+ "humanize.naturalsize(ds.nbytes)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a661b199",
+ "metadata": {},
+ "source": [
+ "It is a small subset but it is enough to ilustrate zarr's compression options.\n",
+ "\n",
+ "Now let's choose a compression level and save it as zarr."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "d8e3601c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import zarr\n",
+ "\n",
+ "compressor = zarr.Blosc(clevel=2, shuffle=-1)\n",
+ "\n",
+ "\n",
+ "fname = \"doppio/doppio_compressed.zarr\"\n",
+ "\n",
+ "ds.to_zarr(\n",
+ " fname,\n",
+ " mode=\"w\",\n",
+ " safe_chunks=True,\n",
+ " consolidated=True,\n",
+ " encoding={var: {\"compressor\": compressor} for var in ds.variables},\n",
+ ");"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "d982caef",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[01;34mdoppio/doppio_compressed.zarr\u001b[0m\n",
+ "├── \u001b[01;34mlat_rho\u001b[0m\n",
+ "│ └── 0.0\n",
+ "├── \u001b[01;34mlon_rho\u001b[0m\n",
+ "│ └── 0.0\n",
+ "├── \u001b[01;34msalt\u001b[0m\n",
+ "│ └── 0.0.0\n",
+ "├── \u001b[01;34ms_rho\u001b[0m\n",
+ "│ └── 0\n",
+ "├── \u001b[01;34mtemp\u001b[0m\n",
+ "│ └── 0.0.0\n",
+ "├── \u001b[01;34mtime\u001b[0m\n",
+ "│ └── 0\n",
+ "└── \u001b[01;34mtime_run\u001b[0m\n",
+ " └── 0\n",
+ "\n",
+ "7 directories, 7 files\n",
+ "144K\tdoppio/doppio_compressed.zarr/salt\n",
+ "156K\tdoppio/doppio_compressed.zarr/lat_rho\n",
+ "148K\tdoppio/doppio_compressed.zarr/temp\n",
+ "16K\tdoppio/doppio_compressed.zarr/time\n",
+ "16K\tdoppio/doppio_compressed.zarr/s_rho\n",
+ "16K\tdoppio/doppio_compressed.zarr/time_run\n",
+ "156K\tdoppio/doppio_compressed.zarr/lon_rho\n",
+ "676K\tdoppio/doppio_compressed.zarr\n"
+ ]
+ }
+ ],
+ "source": [
+ "!tree doppio/*zarr\n",
+ "!du -h doppio/*zarr"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e345b204",
+ "metadata": {},
+ "source": [
+ "The first thing to observe is that the zarr format is a directory based storage. That structure should be familiar for HDF5 users. However, instead of being a filesystem inside a filesystem, zarr is layed out directly on the disk filesystem.\n",
+ "\n",
+ "Each variable and coordinate has its own directory and the data chunks are stored in subdirectories. For more information [check this awesome](https://www.youtube.com/watch?v=qyJXBlrdzBs&list=PLKbXDtRY2ZfU6OfZ8JQimBEY-u1WLCpwp) presentation from one of zarr authors.\n",
+ "\n",
+ "Note that the stored size is quite smaller too! We went from 820.9 kB to 676 kB. Zarr has many modern compression oprions as plugins, [including some bitinformation based methods](https://xbitinfo.readthedocs.io/en/latest/api/xbitinfo.save_compressed.ToCompressed_Zarr.html).\n",
+ "\n",
+ "The data attributes, groups, and metdata are stored in the `.zattrs`, `.zgroup`, and `.zmetadata`. They are plain text JSON files and easy to parse:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "44822485",
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'metadata': {'.zattrs': {'CPP_options': 'DOPPIO, ADD_FSOBC, ADD_M2OBC, ANA_BSFLUX, ANA_BTFLUX, ASSUMED_SHAPE, ATM_PRESS, AVERAGES, !BOUNDARY_A BULK_FLUXES, !COLLECT_ALL..., CHARNOK, CRAIG_BANNER, CURVGRID, DEFLATE, DIURNAL_SRFLUX, DJ_GRADPS, DOUBLE_PRECISION, EMINUSP, FLOATS, FORWARD_WRITE, GLS_MIXING, HDF5, KANTHA_CLAYSON, LONGWAVE_OUT, MASKING, MIX_GEO_TS, MIX_S_UV, MPI, NONLINEAR, NONLIN_EOS, NO_LBC_ATT, N2S2_HORAVG, OUT_DOUBLE, POWER_LAW, PROFILE, K_GSCHEME, REDUCE_ALLREDUCE, !RST_SINGLE, SALINITY, SOLAR_SOURCE, SOLVE3D, SSH_TIDES, TS_DIF2, UV_ADV, UV_COR, UV_U3HADVECTION, UV_C4VADVECTION, UV_QDRAG, UV_TIDES, UV_VIS2, VAR_RHO_2D, WIND_MINUS_CURRENT',\n",
+ " 'Conventions': 'CF-1.4, SGRID-0.3',\n",
+ " 'DODS_EXTRA.Unlimited_Dimension': 'ocean_time',\n",
+ " 'NLM_LBC': '\\nEDGE: WEST SOUTH EAST NORTH \\nzeta: Cha Cha Cha Clo \\nubar: Fla Fla Fla Clo \\nvbar: Fla Fla Fla Clo \\nu: RadNud RadNud RadNud Clo \\nv: RadNud RadNud RadNud Clo \\ntemp: Rad Rad Rad Clo \\nsalt: Rad Rad Rad Clo \\ntke: Gra Gra Gra Clo',\n",
+ " 'NLM_TADV': '\\nADVECTION: HORIZONTAL VERTICAL \\ntemp: Akima4 Akima4 \\nsalt: Akima4 Akima4',\n",
+ " '_CoordSysBuilder': 'ucar.nc2.dataset.conv.CF1Convention',\n",
+ " 'ana_file': 'ROMS/Functionals/ana_btflux.h, ROMS/Functionals/ana_srflux.h',\n",
+ " 'avg_base': 'doppio_avg_6280',\n",
+ " 'cdm_data_type': 'GRID',\n",
+ " 'clm_file_01': '../Data/doppio_clm.nc',\n",
+ " 'code_dir': '/home/julia/ROMS/doppio/svn1040t',\n",
+ " 'compiler_command': '/opt/sw/apps/intel-18.0.1/openmpi/3.1.2/bin/mpif90',\n",
+ " 'compiler_flags': '-fp-model precise -heap-arrays -ip -O3 -traceback -check uninit',\n",
+ " 'compiler_system': 'ifort',\n",
+ " 'cpu': 'x86_64',\n",
+ " 'featureType': 'GRID',\n",
+ " 'file': 'doppio_avg_6280_0004.nc',\n",
+ " 'flt_file': 'doppio_flt_6280.nc',\n",
+ " 'format': 'netCDF-4/HDF5 file',\n",
+ " 'fpos_file': 'floats.in',\n",
+ " 'frc_file_01': '../Data/lwrad_down_ncepnam_3hourly_MAB_and_GoM.nc',\n",
+ " 'frc_file_02': '../Data/Pair_ncepnam_3hourly_MAB_and_GoM.nc',\n",
+ " 'frc_file_03': '../Data/Qair_ncepnam_3hourly_MAB_and_GoM.nc',\n",
+ " 'frc_file_04': '../Data/rain_ncepnam_3hourly_MAB_and_GoM.nc',\n",
+ " 'frc_file_05': '../Data/swrad_daily_ncepnam_3hourly_MAB_and_GoM.nc',\n",
+ " 'frc_file_06': '../Data/Tair_ncepnam_3hourly_MAB_and_GoM.nc',\n",
+ " 'frc_file_07': '../Data/Winds_ncepnam_3hourly_MAB_and_GoM.nc',\n",
+ " 'grd_file': '/home/om/roms/doppio/7km/grid_doppio_JJA_v13.nc',\n",
+ " 'header_dir': '/home/julia/ROMS/doppio/Compile/fwd',\n",
+ " 'header_file': 'doppio.h',\n",
+ " 'his_base': 'doppio_his_6280',\n",
+ " 'history': 'ROMS/TOMS, Version 3.9, Thursday - March 16, 2023 - 4:32:53 AM ;\\nFMRC Best Dataset',\n",
+ " 'ini_file': 'doppio_rst_6280.nc',\n",
+ " 'location': 'Proto fmrc:doppio_2017_da_avg',\n",
+ " 'nud_file': '/home/om/roms/doppio/7km/doppio_nudgcoef_7km_1500-2000_GS.nc',\n",
+ " 'os': 'Linux',\n",
+ " 'rst_file': 'tmp_doppio_rst_6280.nc',\n",
+ " 'script_file': 'nl_ocean_doppio.in',\n",
+ " 'summary': 'doppio',\n",
+ " 'svn_rev': '1040',\n",
+ " 'svn_url': 'https://www.myroms.org/svn/src/trunk',\n",
+ " 'tide_file': '/home/om/roms/doppio/7km/doppio_tide_7km.nc',\n",
+ " 'tiling': '004x004',\n",
+ " 'title': 'ROMS doppio Real-Time Operational PSAS Forecast System Version 1 FMRC Averages',\n",
+ " 'type': 'ROMS/TOMS nonlinear model averages file',\n",
+ " 'var_info': '../Data/varinfo1040t_daily.dat'},\n",
+ " '.zgroup': {'zarr_format': 2},\n",
+ " 'lat_rho/.zarray': {'chunks': [106, 242],\n",
+ " 'compressor': {'blocksize': 0,\n",
+ " 'clevel': 2,\n",
+ " 'cname': 'lz4',\n",
+ " 'id': 'blosc',\n",
+ " 'shuffle': -1},\n",
+ " 'dtype': '\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "<xarray.Dataset>\n",
+ "Dimensions: (eta_rho: 106, xi_rho: 242, time: 1)\n",
+ "Coordinates:\n",
+ " lat_rho (eta_rho, xi_rho) float64 dask.array<chunksize=(106, 242), meta=np.ndarray>\n",
+ " lon_rho (eta_rho, xi_rho) float64 dask.array<chunksize=(106, 242), meta=np.ndarray>\n",
+ " s_rho float64 ...\n",
+ " * time (time) datetime64[ns] 2022-06-06T12:00:00\n",
+ " time_run (time) datetime64[ns] dask.array<chunksize=(1,), meta=np.ndarray>\n",
+ "Dimensions without coordinates: eta_rho, xi_rho\n",
+ "Data variables:\n",
+ " salt (time, eta_rho, xi_rho) float64 dask.array<chunksize=(1, 106, 242), meta=np.ndarray>\n",
+ " temp (time, eta_rho, xi_rho) float64 dask.array<chunksize=(1, 106, 242), meta=np.ndarray>\n",
+ "Attributes: (12/46)\n",
+ " CPP_options: DOPPIO, ADD_FSOBC, ADD_M2OBC, ANA_BSFLUX...\n",
+ " Conventions: CF-1.4, SGRID-0.3\n",
+ " DODS_EXTRA.Unlimited_Dimension: ocean_time\n",
+ " NLM_LBC: \\nEDGE: WEST SOUTH EAST NORTH \\nz...\n",
+ " NLM_TADV: \\nADVECTION: HORIZONTAL VERTICAL ...\n",
+ " _CoordSysBuilder: ucar.nc2.dataset.conv.CF1Convention\n",
+ " ... ...\n",
+ " svn_url: https://www.myroms.org/svn/src/trunk\n",
+ " tide_file: /home/om/roms/doppio/7km/doppio_tide_7km.nc\n",
+ " tiling: 004x004\n",
+ " title: ROMS doppio Real-Time Operational PSAS F...\n",
+ " type: ROMS/TOMS nonlinear model averages file\n",
+ " var_info: ../Data/varinfo1040t_daily.dat Dimensions: eta_rho : 106xi_rho : 242time : 1
Coordinates: (5)
lat_rho
(eta_rho, xi_rho)
float64
dask.array<chunksize=(106, 242), meta=np.ndarray>
_ChunkSizes : [106, 242] _CoordinateAxisType : Lat field : lat_rho, scalar long_name : latitude of RHO-points standard_name : latitude units : degrees_north \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Array \n",
+ " Chunk \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Bytes \n",
+ " 200.41 kiB \n",
+ " 200.41 kiB \n",
+ " \n",
+ " \n",
+ " \n",
+ " Shape \n",
+ " (106, 242) \n",
+ " (106, 242) \n",
+ " \n",
+ " \n",
+ " Dask graph \n",
+ " 1 chunks in 2 graph layers \n",
+ " \n",
+ " \n",
+ " Data type \n",
+ " float64 numpy.ndarray \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " 242 \n",
+ " 106 \n",
+ " \n",
+ " \n",
+ " \n",
+ "
lon_rho
(eta_rho, xi_rho)
float64
dask.array<chunksize=(106, 242), meta=np.ndarray>
_ChunkSizes : [106, 242] _CoordinateAxisType : Lon field : lon_rho, scalar long_name : longitude of RHO-points standard_name : longitude units : degrees_east \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Array \n",
+ " Chunk \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Bytes \n",
+ " 200.41 kiB \n",
+ " 200.41 kiB \n",
+ " \n",
+ " \n",
+ " \n",
+ " Shape \n",
+ " (106, 242) \n",
+ " (106, 242) \n",
+ " \n",
+ " \n",
+ " Dask graph \n",
+ " 1 chunks in 2 graph layers \n",
+ " \n",
+ " \n",
+ " Data type \n",
+ " float64 numpy.ndarray \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " 242 \n",
+ " 106 \n",
+ " \n",
+ " \n",
+ " \n",
+ "
s_rho
()
float64
...
_CoordinateAxes : s_rho _CoordinateAxisType : GeoZ _CoordinateTransformType : Vertical _CoordinateZisPositive : up field : s_rho, scalar formula_terms : s: s_rho C: Cs_r eta: zeta depth: h depth_c: hc long_name : S-coordinate at RHO-points positive : up standard_name : ocean_s_coordinate_g2 units : valid_max : 0.0 valid_min : -1.0 [1 values with dtype=float64] time
(time)
datetime64[ns]
2022-06-06T12:00:00
_CoordinateAxisType : Time long_name : Forecast time for ForecastModelRunCollection standard_name : time array(['2022-06-06T12:00:00.000000000'], dtype='datetime64[ns]') time_run
(time)
datetime64[ns]
dask.array<chunksize=(1,), meta=np.ndarray>
_CoordinateAxisType : RunTime long_name : run times for coordinate = time standard_name : forecast_reference_time \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Array \n",
+ " Chunk \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Bytes \n",
+ " 8 B \n",
+ " 8 B \n",
+ " \n",
+ " \n",
+ " \n",
+ " Shape \n",
+ " (1,) \n",
+ " (1,) \n",
+ " \n",
+ " \n",
+ " Dask graph \n",
+ " 1 chunks in 2 graph layers \n",
+ " \n",
+ " \n",
+ " Data type \n",
+ " datetime64[ns] numpy.ndarray \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " 1 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " \n",
+ "
Data variables: (2)
salt
(time, eta_rho, xi_rho)
float64
dask.array<chunksize=(1, 106, 242), meta=np.ndarray>
_ChunkSizes : [1, 20, 53, 121] field : salinity, scalar, series grid : grid location : face long_name : time-averaged salinity standard_name : sea_water_practical_salinity time : ocean_time \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Array \n",
+ " Chunk \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Bytes \n",
+ " 200.41 kiB \n",
+ " 200.41 kiB \n",
+ " \n",
+ " \n",
+ " \n",
+ " Shape \n",
+ " (1, 106, 242) \n",
+ " (1, 106, 242) \n",
+ " \n",
+ " \n",
+ " Dask graph \n",
+ " 1 chunks in 2 graph layers \n",
+ " \n",
+ " \n",
+ " Data type \n",
+ " float64 numpy.ndarray \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " 242 \n",
+ " 106 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " \n",
+ "
temp
(time, eta_rho, xi_rho)
float64
dask.array<chunksize=(1, 106, 242), meta=np.ndarray>
_ChunkSizes : [1, 20, 53, 121] field : temperature, scalar, series grid : grid location : face long_name : time-averaged potential temperature standard_name : sea_water_potential_temperature time : ocean_time units : Celsius \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Array \n",
+ " Chunk \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Bytes \n",
+ " 200.41 kiB \n",
+ " 200.41 kiB \n",
+ " \n",
+ " \n",
+ " \n",
+ " Shape \n",
+ " (1, 106, 242) \n",
+ " (1, 106, 242) \n",
+ " \n",
+ " \n",
+ " Dask graph \n",
+ " 1 chunks in 2 graph layers \n",
+ " \n",
+ " \n",
+ " Data type \n",
+ " float64 numpy.ndarray \n",
+ " \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ " 242 \n",
+ " 106 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " \n",
+ "
Indexes: (1)
Attributes: (46)
CPP_options : DOPPIO, ADD_FSOBC, ADD_M2OBC, ANA_BSFLUX, ANA_BTFLUX, ASSUMED_SHAPE, ATM_PRESS, AVERAGES, !BOUNDARY_A BULK_FLUXES, !COLLECT_ALL..., CHARNOK, CRAIG_BANNER, CURVGRID, DEFLATE, DIURNAL_SRFLUX, DJ_GRADPS, DOUBLE_PRECISION, EMINUSP, FLOATS, FORWARD_WRITE, GLS_MIXING, HDF5, KANTHA_CLAYSON, LONGWAVE_OUT, MASKING, MIX_GEO_TS, MIX_S_UV, MPI, NONLINEAR, NONLIN_EOS, NO_LBC_ATT, N2S2_HORAVG, OUT_DOUBLE, POWER_LAW, PROFILE, K_GSCHEME, REDUCE_ALLREDUCE, !RST_SINGLE, SALINITY, SOLAR_SOURCE, SOLVE3D, SSH_TIDES, TS_DIF2, UV_ADV, UV_COR, UV_U3HADVECTION, UV_C4VADVECTION, UV_QDRAG, UV_TIDES, UV_VIS2, VAR_RHO_2D, WIND_MINUS_CURRENT Conventions : CF-1.4, SGRID-0.3 DODS_EXTRA.Unlimited_Dimension : ocean_time NLM_LBC : \n",
+ "EDGE: WEST SOUTH EAST NORTH \n",
+ "zeta: Cha Cha Cha Clo \n",
+ "ubar: Fla Fla Fla Clo \n",
+ "vbar: Fla Fla Fla Clo \n",
+ "u: RadNud RadNud RadNud Clo \n",
+ "v: RadNud RadNud RadNud Clo \n",
+ "temp: Rad Rad Rad Clo \n",
+ "salt: Rad Rad Rad Clo \n",
+ "tke: Gra Gra Gra Clo NLM_TADV : \n",
+ "ADVECTION: HORIZONTAL VERTICAL \n",
+ "temp: Akima4 Akima4 \n",
+ "salt: Akima4 Akima4 _CoordSysBuilder : ucar.nc2.dataset.conv.CF1Convention ana_file : ROMS/Functionals/ana_btflux.h, ROMS/Functionals/ana_srflux.h avg_base : doppio_avg_6280 cdm_data_type : GRID clm_file_01 : ../Data/doppio_clm.nc code_dir : /home/julia/ROMS/doppio/svn1040t compiler_command : /opt/sw/apps/intel-18.0.1/openmpi/3.1.2/bin/mpif90 compiler_flags : -fp-model precise -heap-arrays -ip -O3 -traceback -check uninit compiler_system : ifort cpu : x86_64 featureType : GRID file : doppio_avg_6280_0004.nc flt_file : doppio_flt_6280.nc format : netCDF-4/HDF5 file fpos_file : floats.in frc_file_01 : ../Data/lwrad_down_ncepnam_3hourly_MAB_and_GoM.nc frc_file_02 : ../Data/Pair_ncepnam_3hourly_MAB_and_GoM.nc frc_file_03 : ../Data/Qair_ncepnam_3hourly_MAB_and_GoM.nc frc_file_04 : ../Data/rain_ncepnam_3hourly_MAB_and_GoM.nc frc_file_05 : ../Data/swrad_daily_ncepnam_3hourly_MAB_and_GoM.nc frc_file_06 : ../Data/Tair_ncepnam_3hourly_MAB_and_GoM.nc frc_file_07 : ../Data/Winds_ncepnam_3hourly_MAB_and_GoM.nc grd_file : /home/om/roms/doppio/7km/grid_doppio_JJA_v13.nc header_dir : /home/julia/ROMS/doppio/Compile/fwd header_file : doppio.h his_base : doppio_his_6280 history : ROMS/TOMS, Version 3.9, Thursday - March 16, 2023 - 4:32:53 AM ;\n",
+ "FMRC Best Dataset ini_file : doppio_rst_6280.nc location : Proto fmrc:doppio_2017_da_avg nud_file : /home/om/roms/doppio/7km/doppio_nudgcoef_7km_1500-2000_GS.nc os : Linux rst_file : tmp_doppio_rst_6280.nc script_file : nl_ocean_doppio.in summary : doppio svn_rev : 1040 svn_url : https://www.myroms.org/svn/src/trunk tide_file : /home/om/roms/doppio/7km/doppio_tide_7km.nc tiling : 004x004 title : ROMS doppio Real-Time Operational PSAS Forecast System Version 1 FMRC Averages type : ROMS/TOMS nonlinear model averages file var_info : ../Data/varinfo1040t_daily.dat "
+ ],
+ "text/plain": [
+ "\n",
+ "Dimensions: (eta_rho: 106, xi_rho: 242, time: 1)\n",
+ "Coordinates:\n",
+ " lat_rho (eta_rho, xi_rho) float64 dask.array\n",
+ " lon_rho (eta_rho, xi_rho) float64 dask.array\n",
+ " s_rho float64 ...\n",
+ " * time (time) datetime64[ns] 2022-06-06T12:00:00\n",
+ " time_run (time) datetime64[ns] dask.array\n",
+ "Dimensions without coordinates: eta_rho, xi_rho\n",
+ "Data variables:\n",
+ " salt (time, eta_rho, xi_rho) float64 dask.array\n",
+ " temp (time, eta_rho, xi_rho) float64 dask.array\n",
+ "Attributes: (12/46)\n",
+ " CPP_options: DOPPIO, ADD_FSOBC, ADD_M2OBC, ANA_BSFLUX...\n",
+ " Conventions: CF-1.4, SGRID-0.3\n",
+ " DODS_EXTRA.Unlimited_Dimension: ocean_time\n",
+ " NLM_LBC: \\nEDGE: WEST SOUTH EAST NORTH \\nz...\n",
+ " NLM_TADV: \\nADVECTION: HORIZONTAL VERTICAL ...\n",
+ " _CoordSysBuilder: ucar.nc2.dataset.conv.CF1Convention\n",
+ " ... ...\n",
+ " svn_url: https://www.myroms.org/svn/src/trunk\n",
+ " tide_file: /home/om/roms/doppio/7km/doppio_tide_7km.nc\n",
+ " tiling: 004x004\n",
+ " title: ROMS doppio Real-Time Operational PSAS F...\n",
+ " type: ROMS/TOMS nonlinear model averages file\n",
+ " var_info: ../Data/varinfo1040t_daily.dat"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "subset = xr.open_zarr(fname)\n",
+ "\n",
+ "subset"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "eeded07f",
+ "metadata": {},
+ "source": [
+ "And a quick plot to check the data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "fb7122f3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "subset[\"temp\"].squeeze().plot(x=\"lon_rho\", y=\"lat_rho\");"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0254598f",
+ "metadata": {},
+ "source": [
+ "What is the current workflow and what are the altearnatives? Most ocean data are stored as modern netCDF files that are, under the hood HDF5 files with more strict metadata stuture. HDF5 has some limitations like,\n",
+ "\n",
+ "- no thread-based parallelism\n",
+ "- cannot do parallel writes with compression\n",
+ "- no support for could object stores\n",
+ "\n",
+ "However, for most workflows what really matters is the chunking, not the data format. Leaving the parallelism, compression, and cloud support to be built on top of it with `dask`, `numcodecs`, and `fsspec`, respectively. That raises the question: Should one convert all the existing data to `zarr`? Luckily no! We can adopt a more inexpensive workflow and kerchunk to create virtual cloud-optimized CF-compliant datasets that access files in any format using the Zarr library.\n",
+ "\n",
+ "We can write the data in whatever format we need (maybe you are NASA and require HDF5, maybe you have R users who like netcdf, or want to use a visualization tool that only reads geotiff), then rechunk the data to best support the expected use cases."
+ ]
+ }
+ ],
+ "metadata": {
+ "_draft": {
+ "nbviewer_url": "https://gist.github.com/9fc25f2a1a1a653535bc15428798dfab"
+ },
+ "gist": {
+ "data": {
+ "description": "netcdf-c-zarr.ipynb",
+ "public": true
+ },
+ "id": "9fc25f2a1a1a653535bc15428798dfab"
+ },
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/pytest.ini b/pytest.ini
index 9f40dab6..b2fbea15 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -5,7 +5,7 @@ addopts =
--ignore="jupyterbook/content/code_gallery/data_access_notebooks/2016-12-22-boston_light_swim.ipynb"
--ignore="jupyterbook/content/code_gallery/data_access_notebooks/2017-03-21-ERDDAP_IOOS_Sensor_Map.ipynb"
--ignore="jupyterbook/content/code_gallery/data_access_notebooks/2017-07-25-HFRadar_currents.ipynb"
- --ignore="jupyterbook/content/code_gallery/data_access_notebooks/2017-09-09-hurricane_irma"
+ --ignore="jupyterbook/content/code_gallery/data_access_notebooks/2017-09-09-hurricane_irma.ipynb"
--ignore="jupyterbook/content/code_gallery/data_access_notebooks/2017-12-15-finding_HFRadar_currents.ipynb"
--ignore="jupyterbook/content/code_gallery/data_access_notebooks/2019-02-26-hurricane_gis_part01.ipynb"
--ignore="jupyterbook/content/code_gallery/data_access_notebooks/2019-03-08-grids-temperature"