From 250a25b17565eb7c5b40ab175cda4825450ff4cc Mon Sep 17 00:00:00 2001 From: Alex Leith Date: Thu, 4 Jan 2024 11:31:39 +1100 Subject: [PATCH] Simplify data loading by using geometry directly --- .pre-commit-config.yaml | 10 ++++++++++ dep_tools/loaders.py | 17 ++++------------- 2 files changed, 14 insertions(+), 13 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..309c112 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,10 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v2.3.0 + hooks: + - id: end-of-file-fixer + - id: trailing-whitespace +- repo: https://github.com/psf/black + rev: 22.10.0 + hooks: + - id: black diff --git a/dep_tools/loaders.py b/dep_tools/loaders.py index dadc8a6..4cd166f 100644 --- a/dep_tools/loaders.py +++ b/dep_tools/loaders.py @@ -167,25 +167,16 @@ def _get_xr( items, areas: GeoDataFrame, ) -> DataArray | Dataset: - # For most EO data native dtype is int. Loading as such saves space but - # the only more-or-less universally accepted nodata value is nan, - # which is not available for int types. So we need to load as float and - # then replace existing nodata values (usually 0) with nan. At least - # I _think_ all this is necessary and there's not an easier way I didn't - # see in the docs. - areas_proj = areas.to_crs(self._current_epsg) - bounds = areas_proj.total_bounds.tolist() - data_type = "uint16" if self.keep_ints else "float32" xr = load( items, + geopolygon=areas, crs=self._current_epsg, chunks=self.dask_chunksize, - x=(bounds[0], bounds[2]), - y=(bounds[1], bounds[3]), - **self.odc_load_kwargs, dtype=data_type, + nodata=self.nodata, + **self.odc_load_kwargs, ) if self.nodata is not None: @@ -212,7 +203,7 @@ def _get_xr( .rio.write_crs(self._current_epsg) .rio.write_nodata(float("nan")) .rio.clip( - areas_proj.geometry, + areas.to_crs(self._current_epsg).geometry, all_touched=True, from_disk=True, )