From 61ab51ac181674b73ea5af7d910d5977664f4776 Mon Sep 17 00:00:00 2001 From: Tina Odaka <46813815+tinaok@users.noreply.github.com> Date: Wed, 25 Oct 2023 19:11:33 +0200 Subject: [PATCH] update dask --- tutorial/part3/scaling_dask.ipynb | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/tutorial/part3/scaling_dask.ipynb b/tutorial/part3/scaling_dask.ipynb index e819b96..7fb11c7 100755 --- a/tutorial/part3/scaling_dask.ipynb +++ b/tutorial/part3/scaling_dask.ipynb @@ -317,14 +317,16 @@ "import xarray as xr\n", "\n", "catalogue=\"https://object-store.cloud.muni.cz/swift/v1/foss4g-catalogue/c_gls_NDVI-LTS_1999-2019.json\"\n", - "LTS = xr.open_mfdataset(\n", + "#catalogue=\"test.json\"\n", + "\n", + "LTS = xr.open_dataset(\n", " \"reference://\", engine=\"zarr\",\n", " backend_kwargs={\n", " \"storage_options\": {\n", " \"fo\":catalogue\n", " },\n", " \"consolidated\": False\n", - " }\n", + " },chunks={}\n", ")\n", "LTS" ] @@ -347,7 +349,7 @@ "outputs": [], "source": [ "save = LTS.sel(lat=45.50, lon=9.36, method='nearest')['min'].mean()\n", - "save.data" + "save" ] }, { @@ -359,7 +361,9 @@ "\n", "We didn't 'compute' anything. We just built a Dask task graph with it's size indicated as count above, but did not ask Dask to return a result.\n", "\n", - "But the 'task Count' we see above is more than 6000 for just computing a mean on 36 temporal steps. This is too much. If you have such case, to avoid unecessary operations, you can optimize the task using `dask.optimize`. \n", + "Here, you can check 'Dask graph' with how many layers of graph you have, to estimate the complexity of your computation.\n", + "\n", + "It is indicated that you have '7 graph'. this can be optimised with following step \n", "\n", "Lets try to plot the dask graph before computation and understand what dask workers will do to compute the value we asked for. " ] @@ -375,8 +379,10 @@ { "cell_type": "code", "execution_count": null, - "id": "22c6888b-de87-4989-8975-50a0d2a1fcbe", - "metadata": {}, + "id": "c0a8c5ab-eda3-4d1c-a2dd-2e616c0d9ade", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import dask\n", @@ -389,7 +395,7 @@ "id": "537cd461-8f9d-4651-9190-73d5eb6a40ef", "metadata": {}, "source": [ - "Now our task is reduced to about 100. Lets try to visualise it:" + "Now our graph is reduced 1. Lets try to visualise it:" ] }, { @@ -976,7 +982,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.11.6" } }, "nbformat": 4,