From 91da0495c3064367f949a05b112bcbf4a54b651d Mon Sep 17 00:00:00 2001 From: Justin Terry Date: Thu, 16 May 2024 16:50:38 -0700 Subject: [PATCH 1/3] Allow writing layers --- city_metrix/layers/layer.py | 51 ++++++++++++++++++++++++++++- notebooks/tutorial/get layers.ipynb | 2 +- 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/city_metrix/layers/layer.py b/city_metrix/layers/layer.py index 0b2da49..71a1e2d 100644 --- a/city_metrix/layers/layer.py +++ b/city_metrix/layers/layer.py @@ -1,7 +1,11 @@ +import os from abc import abstractmethod -from typing import Union, Tuple, List +from typing import Union, Tuple +from uuid import uuid4 +from osgeo import gdal import ee +from dask.bytes.tests.test_s3 import boto3 from dask.diagnostics import ProgressBar from ee import ImageCollection from geocube.api.core import make_geocube @@ -52,6 +56,33 @@ def groupby(self, zones, layer=None): """ return LayerGroupBy(self.aggregate, zones, layer, self.masks) + def write(self, bbox, output_path, tile_degrees=None): + """ + Write the layer to a path. Does not apply masks. + + :param bbox: (min x, min y, max x, max y) + :param output_path: local or s3 path to output to + :param tile_degrees: optional param to tile the results into multiple files with a VRT. + Degrees to tile by. `output_path` must be a valid folder if provided. + :return: + """ + + if tile_degrees is not None: + tiles = create_fishnet_grid(*bbox, tile_degrees) + + file_names = [] + for tile in tiles["geometry"]: + data = self.aggregate.get_data(*tile.bounds) + file_name = f"{output_path}/{uuid4()}.tif" + file_names.append(file_name) + + write_layer(file_name, data) + + gdal.BuildVRT(output_path, file_names) + else: + data = self.aggregate.get_data(bbox) + write_layer(output_path, data) + class LayerGroupBy: def __init__(self, aggregate, zones, layer=None, masks=[]): @@ -240,3 +271,21 @@ def get_image_collection( return data + +def write_layer(path, data): + if isinstance(data, xr.DataArray): + # for rasters, need to write to locally first then copy to cloud storage + if path.startswith("s3://"): + tmp_path = f"{uuid4()}.tif" + data.rio.to_raster(raster_path=tmp_path, driver="COG") + + s3 = boto3.client('s3') + s3.upload_file(tmp_path, path.split('/')[2], '/'.join(path.split('/')[3:])) + + os.remove(tmp_path) + else: + data.rio.to_raster(raster_path=path, driver="COG") + elif isinstance(data, gpd.GeoDataFrame): + data.to_file(path, driver="GeoJSON") + else: + raise NotImplementedError("Can only write DataArray or GeoDataFrame") diff --git a/notebooks/tutorial/get layers.ipynb b/notebooks/tutorial/get layers.ipynb index 8382493..3de8b97 100644 --- a/notebooks/tutorial/get layers.ipynb +++ b/notebooks/tutorial/get layers.ipynb @@ -4432,4 +4432,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file From c8a0b734f4b9b084788df187fdc08d22a5f09884 Mon Sep 17 00:00:00 2001 From: Justin Terry Date: Thu, 16 May 2024 17:21:15 -0700 Subject: [PATCH 2/3] Fix issues --- city_metrix/layers/layer.py | 12 ++- environment.yml | 1 + notebooks/tutorial/get layers.ipynb | 149 ++++++++++++++++++++++------ setup.py | 1 + 4 files changed, 127 insertions(+), 36 deletions(-) diff --git a/city_metrix/layers/layer.py b/city_metrix/layers/layer.py index 71a1e2d..38fa56c 100644 --- a/city_metrix/layers/layer.py +++ b/city_metrix/layers/layer.py @@ -5,7 +5,7 @@ from osgeo import gdal import ee -from dask.bytes.tests.test_s3 import boto3 +import boto3 from dask.diagnostics import ProgressBar from ee import ImageCollection from geocube.api.core import make_geocube @@ -63,22 +63,26 @@ def write(self, bbox, output_path, tile_degrees=None): :param bbox: (min x, min y, max x, max y) :param output_path: local or s3 path to output to :param tile_degrees: optional param to tile the results into multiple files with a VRT. - Degrees to tile by. `output_path` must be a valid folder if provided. + Degrees to tile by. `output_path` should be a folder path to store the tiles. :return: """ if tile_degrees is not None: tiles = create_fishnet_grid(*bbox, tile_degrees) + if not os.path.exists(output_path): + os.makedirs(output_path) + file_names = [] for tile in tiles["geometry"]: - data = self.aggregate.get_data(*tile.bounds) + data = self.aggregate.get_data(tile.bounds) + file_name = f"{output_path}/{uuid4()}.tif" file_names.append(file_name) write_layer(file_name, data) - gdal.BuildVRT(output_path, file_names) + gdal.BuildVRT(f"{output_path}.vrt", file_names) else: data = self.aggregate.get_data(bbox) write_layer(output_path, data) diff --git a/environment.yml b/environment.yml index b00d08f..6ca0397 100644 --- a/environment.yml +++ b/environment.yml @@ -17,6 +17,7 @@ dependencies: - dask[complete]=2023.11.0 - matplotlib=3.8.2 - jupyterlab=4.0.10 + - s3fs=2024.5.0 - pip=23.3.1 - pip: - cartoframes==1.2.5 diff --git a/notebooks/tutorial/get layers.ipynb b/notebooks/tutorial/get layers.ipynb index 3de8b97..175bba5 100644 --- a/notebooks/tutorial/get layers.ipynb +++ b/notebooks/tutorial/get layers.ipynb @@ -43,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "7ed2c665-e6d8-4e98-95ac-41aab749493f", "metadata": {}, "outputs": [], @@ -57,17 +57,17 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "602a6217-fd80-4cec-b40b-20de68b8f62b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'C:\\\\Users\\\\Saif.Shabou\\\\OneDrive - World Resources Institute\\\\Documents\\\\cities-indicators-framework\\\\citymetrix\\\\cities-cif'" + "'/Users/jt/dev/cities-cif'" ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -78,9 +78,25 @@ "os.getcwd()" ] }, + { + "cell_type": "markdown", + "id": "5c506fca", + "metadata": {}, + "source": [] + }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, + "id": "286103d4", + "metadata": {}, + "outputs": [], + "source": [ + "import boto3" + ] + }, + { + "cell_type": "code", + "execution_count": 3, "id": "d4f01f2f-5164-4a05-9997-f70b2abe6b37", "metadata": {}, "outputs": [], @@ -92,7 +108,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "id": "014804d3-5473-48c1-919a-8275a7cba083", "metadata": {}, "outputs": [ @@ -110,7 +126,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "id": "53554a74-6fa9-4030-8ee7-dd1df79f0d75", "metadata": {}, "outputs": [ @@ -165,7 +181,7 @@ "0 2022-08-03 MULTIPOLYGON (((-38.50135 -13.01134, -38.50140... " ] }, - "execution_count": 6, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -189,7 +205,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "id": "5eb883fd-c533-47b7-8735-65393afca89d", "metadata": {}, "outputs": [], @@ -199,7 +215,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "id": "0703cfe0-201d-4cae-ab43-7a4d0e5082cf", "metadata": {}, "outputs": [ @@ -207,8 +223,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Extracting tree cover layer:\n", - "[########################################] | 100% Completed | 19.58 s\n" + "Extracting layer tree cover from Google Earth Engine:\n", + "[########################################] | 100% Completed | 9.24 ss\n" ] }, { @@ -577,40 +593,40 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
<xarray.DataArray 'ttc' (y: 2878, x: 3721)>\n",
-       "array([[ nan,  nan,  nan, ..., 100., 100.,  90.],\n",
-       "       [ nan,  nan,  nan, ...,  90.,  90.,  90.],\n",
+       "
<xarray.DataArray 'ttc' (y: 2878, x: 3722)>\n",
+       "array([[ nan,  nan,  nan, ..., 100.,  90.,  90.],\n",
        "       [ nan,  nan,  nan, ...,  90.,  90.,  90.],\n",
+       "       [ nan,  nan,  nan, ...,  90.,  90.,  80.],\n",
        "       ...,\n",
        "       [ nan,  nan,  nan, ...,  nan,  nan,  nan],\n",
        "       [ nan,  nan,  nan, ...,  nan,  nan,  nan],\n",
        "       [ nan,  nan,  nan, ...,  nan,  nan,  nan]])\n",
        "Coordinates:\n",
-       "    time     int32 0\n",
+       "    time     int64 0\n",
        "  * x        (x) float32 -38.65 -38.65 -38.65 -38.65 ... -38.3 -38.3 -38.3 -38.3\n",
        "  * y        (y) float32 -12.76 -12.76 -12.76 -12.76 ... -13.02 -13.02 -13.02\n",
        "Attributes:\n",
        "    id:             ttc\n",
        "    data_type:      {'type': 'PixelType', 'precision': 'double', 'min': 0, 'm...\n",
        "    crs:            EPSG:4326\n",
-       "    crs_transform:  [1, 0, 0, 0, 1, 0]
  • id :
    ttc
    data_type :
    {'type': 'PixelType', 'precision': 'double', 'min': 0, 'max': 255}
    crs :
    EPSG:4326
    crs_transform :
    [1, 0, 0, 0, 1, 0]
  • " ], "text/plain": [ - "\n", - "array([[ nan, nan, nan, ..., 100., 100., 90.],\n", - " [ nan, nan, nan, ..., 90., 90., 90.],\n", + "\n", + "array([[ nan, nan, nan, ..., 100., 90., 90.],\n", " [ nan, nan, nan, ..., 90., 90., 90.],\n", + " [ nan, nan, nan, ..., 90., 90., 80.],\n", " ...,\n", " [ nan, nan, nan, ..., nan, nan, nan],\n", " [ nan, nan, nan, ..., nan, nan, nan],\n", " [ nan, nan, nan, ..., nan, nan, nan]])\n", "Coordinates:\n", - " time int32 0\n", + " time int64 0\n", " * x (x) float32 -38.65 -38.65 -38.65 -38.65 ... -38.3 -38.3 -38.3 -38.3\n", " * y (y) float32 -12.76 -12.76 -12.76 -12.76 ... -13.02 -13.02 -13.02\n", "Attributes:\n", @@ -641,7 +657,7 @@ " crs_transform: [1, 0, 0, 0, 1, 0]" ] }, - "execution_count": 8, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -652,6 +668,75 @@ "city_TreeCover" ] }, + { + "cell_type": "code", + "execution_count": 7, + "id": "8c2477dc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracting layer tree cover from Google Earth Engine:\n", + "[########################################] | 100% Completed | 8.91 ss\n" + ] + } + ], + "source": [ + "city_TreeCover = TreeCover().write(city_gdf.total_bounds, \"output.tif\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "cb58b256", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracting layer tree cover from Google Earth Engine:\n", + "[########################################] | 100% Completed | 1.63 sms\n", + "Extracting layer tree cover from Google Earth Engine:\n", + "[########################################] | 100% Completed | 1.52 sms\n", + "Extracting layer tree cover from Google Earth Engine:\n", + "[########################################] | 100% Completed | 1.53 sms\n", + "Extracting layer tree cover from Google Earth Engine:\n", + "[########################################] | 100% Completed | 1.38 sms\n", + "Extracting layer tree cover from Google Earth Engine:\n", + "[########################################] | 100% Completed | 1.35 sms\n", + "Extracting layer tree cover from Google Earth Engine:\n", + "[########################################] | 100% Completed | 2.47 sms\n", + "Extracting layer tree cover from Google Earth Engine:\n", + "[########################################] | 100% Completed | 1.53 sms\n", + "Extracting layer tree cover from Google Earth Engine:\n", + "[########################################] | 100% Completed | 1.45 sms\n", + "Extracting layer tree cover from Google Earth Engine:\n", + "[########################################] | 100% Completed | 1.46 sms\n", + "Extracting layer tree cover from Google Earth Engine:\n", + "[########################################] | 100% Completed | 1.54 sms\n", + "Extracting layer tree cover from Google Earth Engine:\n", + "[########################################] | 100% Completed | 1.69 sms\n", + "Extracting layer tree cover from Google Earth Engine:\n", + "[########################################] | 100% Completed | 1.59 sms\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/anaconda3/envs/cities-cif/lib/python3.10/site-packages/osgeo/gdal.py:287: FutureWarning: Neither gdal.UseExceptions() nor gdal.DontUseExceptions() has been explicitly called. In GDAL 4.0, exceptions will be enabled by default.\n", + " warnings.warn(\n", + "ERROR 4: Failed to open output to write.\n" + ] + } + ], + "source": [ + "city_TreeCover = TreeCover().write(city_gdf.total_bounds, \"output\", tile_degrees=0.1)" + ] + }, { "cell_type": "code", "execution_count": 9, diff --git a/setup.py b/setup.py index bff48fe..c2d24d8 100644 --- a/setup.py +++ b/setup.py @@ -20,5 +20,6 @@ "utm", "osmnx", "geopandas", + "s3fs", ], ) From 3ba9456cac72845c89db93c85e9dd2e43435e6b9 Mon Sep 17 00:00:00 2001 From: Justin Terry Date: Thu, 6 Jun 2024 16:24:16 -0700 Subject: [PATCH 3/3] Don't create VRT --- city_metrix/layers/layer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/city_metrix/layers/layer.py b/city_metrix/layers/layer.py index 38fa56c..2bfba79 100644 --- a/city_metrix/layers/layer.py +++ b/city_metrix/layers/layer.py @@ -81,8 +81,6 @@ def write(self, bbox, output_path, tile_degrees=None): file_names.append(file_name) write_layer(file_name, data) - - gdal.BuildVRT(f"{output_path}.vrt", file_names) else: data = self.aggregate.get_data(bbox) write_layer(output_path, data)