From b644ec0b73a17179f28b4722a49cc8a7d97bdc24 Mon Sep 17 00:00:00 2001 From: Pier Lorenzo Marasco Date: Sun, 5 Nov 2023 18:40:50 +0100 Subject: [PATCH 1/5] UPDATE added the query in the catalog search --- .../part3/data_exploitability_pangeo.ipynb | 156 ++++-------------- 1 file changed, 29 insertions(+), 127 deletions(-) diff --git a/tutorial/part3/data_exploitability_pangeo.ipynb b/tutorial/part3/data_exploitability_pangeo.ipynb index 9f02b7a..d536746 100644 --- a/tutorial/part3/data_exploitability_pangeo.ipynb +++ b/tutorial/part3/data_exploitability_pangeo.ipynb @@ -59,9 +59,6 @@ "execution_count": null, "metadata": { "collapsed": false, - "jupyter": { - "outputs_hidden": false - }, "tags": [] }, "outputs": [], @@ -159,9 +156,6 @@ "execution_count": null, "metadata": { "collapsed": false, - "jupyter": { - "outputs_hidden": false - }, "tags": [] }, "outputs": [], @@ -183,9 +177,6 @@ "execution_count": null, "metadata": { "collapsed": false, - "jupyter": { - "outputs_hidden": false - }, "tags": [] }, "outputs": [], @@ -232,9 +223,6 @@ "execution_count": null, "metadata": { "collapsed": false, - "jupyter": { - "outputs_hidden": false - }, "tags": [] }, "outputs": [], @@ -264,9 +252,6 @@ "execution_count": null, "metadata": { "collapsed": false, - "jupyter": { - "outputs_hidden": false - }, "tags": [] }, "outputs": [], @@ -277,6 +262,7 @@ " intersects=aoi_geojson,\n", " collections=[\"sentinel-2-l2a\"],\n", " datetime=\"2019-02-01/2019-06-10\"\n", + " # query={\"eo:cloud_cover\": {\"lt\": 60}}, # uncomment to filter by cloud cover\n", ").item_collection()\n", "len(items)" ] @@ -293,10 +279,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -322,10 +305,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -351,10 +331,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -374,10 +351,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -411,10 +385,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -434,10 +405,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -479,10 +447,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -503,10 +468,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -525,10 +487,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -546,10 +505,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -576,10 +532,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -590,10 +543,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -611,10 +561,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -625,10 +572,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -649,10 +593,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -802,10 +743,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -817,10 +755,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -832,10 +767,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -847,10 +779,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -873,10 +802,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -887,10 +813,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -901,10 +824,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -916,10 +836,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -931,10 +848,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -945,10 +859,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -967,10 +878,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -988,10 +896,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ @@ -1005,10 +910,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } + "collapsed": false }, "outputs": [], "source": [ From 43344d95971a6382a396c8b12969c3594f178bb3 Mon Sep 17 00:00:00 2001 From: Pier Lorenzo Marasco Date: Sun, 5 Nov 2023 18:49:45 +0100 Subject: [PATCH 2/5] UPDATE added correct name for the bands --- tutorial/part3/data_exploitability_pangeo.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorial/part3/data_exploitability_pangeo.ipynb b/tutorial/part3/data_exploitability_pangeo.ipynb index d536746..092eeba 100644 --- a/tutorial/part3/data_exploitability_pangeo.ipynb +++ b/tutorial/part3/data_exploitability_pangeo.ipynb @@ -295,7 +295,7 @@ "source": [ "#### Load data\n", "We will use the stackstac library to load the data. The stackstac library is a library that allows loading data from a STAC API into an xarray dataset.\n", - "Here we will load the green and swir16 bands, which are the bands we will use to calculate the snow cover. We will also load the scl band, which is the scene classification layer, which we will use to mask out clouds.\n", + "Here we will load the green and swir16 bands (on the original dataset named B03 and B11), which are the bands we will use to calculate the snow cover. We will also load the scl band, which is the scene classification layer, which we will use to mask out clouds.\n", "Spatial resolution of 20m is selected for the analysis. The data is loaded in chunks of 2048x2048 pixels.\n", "\n", "[Stackstac](https://stackstac.readthedocs.io/en/latest/) is not the only way to create a xarray dataset from a STAC API. Other libraries can be used, such as [xpystac](https://github.com/stac-utils/xpystac) or [odc.stac](https://github.com/opendatacube/odc-stac). The choice of the library depends on the use case and specific needs." From 0803e6f15e97b81e8156bffca49eedee9ebe99c5 Mon Sep 17 00:00:00 2001 From: Pier Lorenzo Marasco Date: Sun, 5 Nov 2023 19:17:32 +0100 Subject: [PATCH 3/5] UPDATE added noted on the stack.stac --- .../part3/data_exploitability_pangeo.ipynb | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/tutorial/part3/data_exploitability_pangeo.ipynb b/tutorial/part3/data_exploitability_pangeo.ipynb index 092eeba..e2e5800 100644 --- a/tutorial/part3/data_exploitability_pangeo.ipynb +++ b/tutorial/part3/data_exploitability_pangeo.ipynb @@ -308,13 +308,40 @@ "collapsed": false }, "outputs": [], + "source": [ + "stackstac.stack(items)" + ] + }, + { + "cell_type": "markdown", + "source": [ + "When the results of the STAC query are compiled into an xarray dataset, the result is a four-dimensional dataset: time, band, x, and y. The 'band' dimension comprises the various spectral bands, while 'x' and 'y' dimensions represent the spatial information. By examining the dataset's visual representation, we can quickly estimate its total size. Without any filtering, we expect the dataset to be around 5.42 terabytes.\n", + "\n", + "Since we require only certain bands and are focused on the Area of Interest (AOI), we will apply additional filters to the dataset to pare down the data volume to what is strictly necessary.\n", + "\n", + "- The 'bounds_latlon' parameter defines the Area of Interest with four values: the minimum and maximum longitudes and latitudes. We will input the catchment's boundaries to set our area of interest.\n", + "- The 'resolution' parameter determines the dataset's spatial resolution, requiring a single value. We will select a resolution of 20 meters.\n", + "- The 'chunksize' parameter sets the dimensions for data chunking, accepting one value to define chunk size. We will opt for chunks that are 2048 by 2048 pixels. GDAL will handle the data chunking during the loading process as per our specifications.\n", + "- Lastly, the 'assets' parameter selects the data bands to be loaded, requiring a list of the band names as strings. We will load the 'green' and 'swir16' bands for snow cover analysis, along with the 'scl' band, the scene classification layer, to filter out clouds\n" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], "source": [ "ds = stackstac.stack(items,\n", " bounds_latlon=aoi.iloc[0].geometry.bounds,\n", " resolution=20,\n", " chunksize=2048,\n", " assets=['green', 'swir16', 'scl'])" - ] + ], + "metadata": { + "collapsed": false + } }, { "cell_type": "markdown", From 8795470ea0f6e32007cabb5110ead12ce5702c42 Mon Sep 17 00:00:00 2001 From: Pier Lorenzo Marasco Date: Sun, 5 Nov 2023 19:20:20 +0100 Subject: [PATCH 4/5] UPDATE removed the alternatives --- tutorial/part3/data_exploitability_pangeo.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tutorial/part3/data_exploitability_pangeo.ipynb b/tutorial/part3/data_exploitability_pangeo.ipynb index e2e5800..4bbaec5 100644 --- a/tutorial/part3/data_exploitability_pangeo.ipynb +++ b/tutorial/part3/data_exploitability_pangeo.ipynb @@ -455,7 +455,7 @@ }, "outputs": [], "source": [ - "mask = np.logical_not(scl.isin([8, 9, 3])) # alternative you can use the mask = (scl != 8) & (scl != 9) & (scl != 3) \n", + "mask = np.logical_not(scl.isin([8, 9, 3])) \n", "snow_cloud = xr.where(mask, snowmap, 2)" ] }, @@ -550,7 +550,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Data aggregation is a very important step in the analysis. It allows to reduce the amount of data and to make the analysis more efficient. Moreover as in this case we are going to aggregate the date to daily values, this will allow use to compute statistic on the data at the basin scale later on.\n", + "Data aggregation is a very important step in the analysis. It allows to reduce the amount of data and to make the analysis more efficient. Moreover, as in this case, we are going to aggregate the date to daily values, this will allow use to compute statistic on the data at the basin scale later on.\n", "\n", "The `groupby` method allows to group the data by a specific dimension. We will group the data by the time dimension, aggregating to the date and removing the time information, once the group is obtained we will aggregate the data by taking the maximum value." ] From 64763280483eb1bfe5db95994324ad054d867f0c Mon Sep 17 00:00:00 2001 From: Pier Lorenzo Marasco Date: Sun, 5 Nov 2023 19:38:28 +0100 Subject: [PATCH 5/5] UPDATE added link to xarray tutorial --- tutorial/part3/data_exploitability_pangeo.ipynb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tutorial/part3/data_exploitability_pangeo.ipynb b/tutorial/part3/data_exploitability_pangeo.ipynb index 4bbaec5..8109ec9 100644 --- a/tutorial/part3/data_exploitability_pangeo.ipynb +++ b/tutorial/part3/data_exploitability_pangeo.ipynb @@ -44,7 +44,8 @@ "### Relevant resources\n", "\n", "* More information on Pangeo can be found here: https://pangeo.io/\n", - "* More information on the STAC specification can be found here: https://stacspec.org/\n" + "* More information on the STAC specification can be found here: https://stacspec.org/\n", + "* More examples on how to use xarray can be found here: https://tutorial.xarray.dev/en/latest/\n" ] }, {