Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CIF-165] Write layers directly #42

Merged
merged 5 commits into from
Jun 6, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 54 additions & 1 deletion city_metrix/layers/layer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import os
from abc import abstractmethod
from typing import Union, Tuple, List
from typing import Union, Tuple
from uuid import uuid4
from osgeo import gdal

import ee
import boto3
from dask.diagnostics import ProgressBar
from ee import ImageCollection
from geocube.api.core import make_geocube
Expand Down Expand Up @@ -52,6 +56,37 @@ def groupby(self, zones, layer=None):
"""
return LayerGroupBy(self.aggregate, zones, layer, self.masks)

def write(self, bbox, output_path, tile_degrees=None):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we use MAX_TILE_SIZE as the default tile_degrees?

"""
Write the layer to a path. Does not apply masks.

:param bbox: (min x, min y, max x, max y)
:param output_path: local or s3 path to output to
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The output_path needs to be a folder name if written to multiple files, but needs to be a file name with a suffix if written to one file. We could either add instructions for this in the notebook or update the code to make it consistent.

:param tile_degrees: optional param to tile the results into multiple files with a VRT.
Degrees to tile by. `output_path` should be a folder path to store the tiles.
:return:
"""

if tile_degrees is not None:
tiles = create_fishnet_grid(*bbox, tile_degrees)

if not os.path.exists(output_path):
os.makedirs(output_path)

file_names = []
for tile in tiles["geometry"]:
data = self.aggregate.get_data(tile.bounds)

file_name = f"{output_path}/{uuid4()}.tif"
file_names.append(file_name)

write_layer(file_name, data)

gdal.BuildVRT(f"{output_path}.vrt", file_names)
else:
data = self.aggregate.get_data(bbox)
write_layer(output_path, data)


class LayerGroupBy:
def __init__(self, aggregate, zones, layer=None, masks=[]):
Expand Down Expand Up @@ -240,3 +275,21 @@ def get_image_collection(

return data


def write_layer(path, data):
if isinstance(data, xr.DataArray):
# for rasters, need to write to locally first then copy to cloud storage
if path.startswith("s3://"):
tmp_path = f"{uuid4()}.tif"
data.rio.to_raster(raster_path=tmp_path, driver="COG")

s3 = boto3.client('s3')
s3.upload_file(tmp_path, path.split('/')[2], '/'.join(path.split('/')[3:]))

os.remove(tmp_path)
else:
data.rio.to_raster(raster_path=path, driver="COG")
elif isinstance(data, gpd.GeoDataFrame):
data.to_file(path, driver="GeoJSON")
else:
raise NotImplementedError("Can only write DataArray or GeoDataFrame")
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ dependencies:
- dask[complete]=2023.11.0
- matplotlib=3.8.2
- jupyterlab=4.0.10
- s3fs=2024.5.0
- pip=23.3.1
- pip:
- cartoframes==1.2.5
Loading