Include large internal lakes (#268)

* Fixes to make river estimation more robust to weird or incomplete geometries * Added lakes and added test coverage * renamed to make functions more general * fixup: Format Python code with Black * Update version * Update tests.yml after sunsetting of mumbaforge Migrate from mamba-forge * Updated tests for new Westport tiles * fixed spelling of patches. --------- Co-authored-by: github-actions <[email protected]>
rosepearson · Nov 11, 2024 · dcc694f · dcc694f
1 parent 90e6de2
commit dcc694f
Show file tree

Hide file tree

Showing 16 changed files with 211 additions and 98 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -39,14 +39,15 @@ jobs:
     - name: Setup miniconda
       uses: conda-incubator/setup-miniconda@v2
       with:
-        auto-update-conda: true
-        miniforge-variant: Mambaforge
+        auto-update-conda: true # false
+        miniforge-version: latest
         channels: conda-forge # defaults automatically added
         python-version: ${{ matrix.python-version }}
         activate-environment: geofabrics_CI
         environment-file: environment_CI.yml
-        use-mamba: true
         auto-activate-base: false
+        # use-only-tar-bz2: true
+
 
     - name: Conda list
       shell: pwsh

diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "geofabrics"
-version = "1.1.23"
+version = "1.1.24"
 description = "A package for creating geofabrics for flood modelling."
 readme = "README.md"
 authors = [{ name = "Rose pearson", email = "[email protected]" }]

diff --git a/src/geofabrics/bathymetry_estimation.py b/src/geofabrics/bathymetry_estimation.py
@@ -1925,6 +1925,7 @@ def _create_flat_water_polygon(self, cross_sections: geopandas.GeoDataFrame):
         start_xy = geopandas.GeoDataFrame(
             geometry=[shapely.geometry.LineString(start_xy)], crs=cross_sections.crs
         )
+
         start_xy = Channel(start_xy, resolution=self.cross_section_spacing)
         start_xy_spline = start_xy.get_parametric_spline_fit_points()
 
@@ -2296,7 +2297,6 @@ def estimate_width_and_slope(
             maximum_threshold=max_threshold,
             min_channel_width=min_channel_width,
         )
-
         # generate a flat water polygon
         river_polygon = self._create_flat_water_polygon(
             cross_sections=cross_sections,

diff --git a/src/geofabrics/dem.py b/src/geofabrics/dem.py
@@ -300,6 +300,7 @@ class DemBase(abc.ABC):
         "patch": 6,
         "stopbanks": 7,
         "masked feature": 8,
+        "lakes": 9,
         "interpolated": 0,
         "no data": -1,
     }
@@ -1106,9 +1107,9 @@ def clip_within_polygon(self, polygon_paths: list, label: str):
                 f"No clipping. Polygons {polygon_paths} do not overlap DEM."
             )
 
-    def interpolate_elevations_within_polygon(
+    def add_points_within_polygon_chunked(
         self,
-        elevations: geometry.EstimatedElevationPoints,
+        elevations: geometry.ElevationPoints,
         method: str,
         cache_path: pathlib.Path,
         label: str,
@@ -1171,7 +1172,7 @@ def interpolate_elevations_within_polygon(
             point_cloud = numpy.concatenate([edge_points, point_cloud])
 
         # Save river points in a temporary laz file
-        lidar_file = cache_path / "waterways_points.laz"
+        lidar_file = cache_path / f"{label}_points.laz"
         pdal_pipeline_instructions = [
             {
                 "type": "writers.las",
@@ -1196,7 +1197,7 @@ def interpolate_elevations_within_polygon(
         self.logger.info(f"Preparing {[len(chunked_dim_x), len(chunked_dim_y)]} chunks")
 
         # cycle through index chunks - and collect in a delayed array
-        self.logger.info("Running over ocean chunked")
+        self.logger.info(f"Running over {label} chunked")
         delayed_chunked_matrix = []
         for i, dim_y in enumerate(chunked_dim_y):
             delayed_chunked_x = []
@@ -1246,11 +1247,12 @@ def interpolate_elevations_within_polygon(
         )
         self._write_netcdf_conventions_in_place(self._dem, self.catchment_geometry.crs)
 
-    def interpolate_rivers(
+    def add_points_within_polygon_nearest_chunked(
         self,
-        elevations: geometry.EstimatedElevationPoints,
+        elevations: geometry.ElevationPoints,
         method: str,
         cache_path: pathlib.Path,
+        label: str,
         k_nearest_neighbours: int = 100,
     ) -> xarray.Dataset:
         """Performs interpolation from estimated bathymetry points within a polygon
@@ -1273,23 +1275,21 @@ def interpolate_rivers(
         # Define the region to rasterise
         region_to_rasterise = elevations.polygons
 
-        # Extract and saveriver elevations
-        river_points = elevations.points_array
-        river_points_file = cache_path / "river_points.laz"
+        # Tempoarily save the points to add
+        points = elevations.points_array
+        points_file = cache_path / f"{label}_points.laz"
         pdal_pipeline_instructions = [
             {
                 "type": "writers.las",
                 "a_srs": f"EPSG:" f"{crs['horizontal']}+" f"{crs['vertical']}",
-                "filename": str(river_points_file),
+                "filename": str(points_file),
                 "compression": "laszip",
             }
         ]
-        pdal_pipeline = pdal.Pipeline(
-            json.dumps(pdal_pipeline_instructions), [river_points]
-        )
+        pdal_pipeline = pdal.Pipeline(json.dumps(pdal_pipeline_instructions), [points])
         pdal_pipeline.execute()
 
-        # Extract and save river/fan adjacent elevations from DEM
+        # Tempoarily save the adjacent points from the DEM
         edge_dem = self._dem.rio.clip(
             region_to_rasterise.dissolve().buffer(self.catchment_geometry.resolution),
             drop=True,
@@ -1306,19 +1306,19 @@ def interpolate_rivers(
             drop=True,
         )
 
-        # Define the river and mouth edge points
+        # Save provided points
         grid_x, grid_y = numpy.meshgrid(edge_dem.x, edge_dem.y)
         flat_x = grid_x.flatten()
         flat_y = grid_y.flatten()
         flat_z = edge_dem.z.values.flatten()
         mask_z = ~numpy.isnan(flat_z)
 
-        # Interpolate the estimated river bank heights along only the river
+        # Interpolate the estimated bank heights around the polygon if they exist
         if elevations.bank_heights_exist():
-            # Get the estimated river bank heights and define a mask where nan
-            river_bank_points = elevations.bank_height_points()
-            river_bank_nan_mask = numpy.logical_not(numpy.isnan(river_bank_points["Z"]))
-            # Interpolate from the estimated river bank heights
+            # Get the estimated  bank heights and define a mask where nan
+            bank_points = elevations.bank_height_points()
+            bank_nan_mask = numpy.logical_not(numpy.isnan(bank_points["Z"]))
+            # Interpolate from the estimated bank heights
             xy_out = numpy.concatenate(
                 [[flat_x[mask_z]], [flat_y[mask_z]]], axis=0
             ).transpose()
@@ -1328,19 +1328,17 @@ def interpolate_rivers(
                 "method": "linear",
                 "strict": False,
             }
-            estimated_river_edge_z = elevation_from_points(
-                point_cloud=river_bank_points[river_bank_nan_mask],
+            estimated_edge_z = elevation_from_points(
+                point_cloud=bank_points[bank_nan_mask],
                 xy_out=xy_out,
                 options=options,
             )
 
             # Use the estimated bank heights where lower than the DEM edge values
-            mask_z_river_edge = mask_z.copy()
-            mask_z_river_edge[:] = False
-            mask_z_river_edge[mask_z] = flat_z[mask_z] > estimated_river_edge_z
-            flat_z[mask_z_river_edge] = estimated_river_edge_z[
-                flat_z[mask_z] > estimated_river_edge_z
-            ]
+            mask_z_edge = mask_z.copy()
+            mask_z_edge[:] = False
+            mask_z_edge[mask_z] = flat_z[mask_z] > estimated_edge_z
+            flat_z[mask_z_edge] = estimated_edge_z[flat_z[mask_z] > estimated_edge_z]
 
         # Use the flat_x/y/z to define edge points and heights
         edge_points = numpy.empty(
@@ -1355,12 +1353,12 @@ def interpolate_rivers(
         edge_points["Y"] = flat_y[mask_z]
         edge_points["Z"] = flat_z[mask_z]
 
-        river_edge_file = cache_path / "river_edge_points.laz"
+        edge_file = cache_path / f"{label}_edge_points.laz"
         pdal_pipeline_instructions = [
             {
                 "type": "writers.las",
                 "a_srs": f"EPSG:" f"{crs['horizontal']}+" f"{crs['vertical']}",
-                "filename": str(river_edge_file),
+                "filename": str(edge_file),
                 "compression": "laszip",
             }
         ]
@@ -1370,18 +1368,18 @@ def interpolate_rivers(
         pdal_pipeline.execute()
 
         if (
-            len(river_points) < k_nearest_neighbours
-            or len(edge_points) < k_nearest_neighbours
+            len(points) < raster_options["k_nearest_neighbours"]
+            or len(edge_points) < raster_options["k_nearest_neighbours"]
         ):
             logging.info(
-                f"Fewer river or edge points than the default expected {k_nearest_neighbours}. "
-                f"Updating k_nearest_neighbours to {min(len(river_points), len(edge_points))}."
+                f"Fewer points or edge points than the default expected {raster_options['k_nearest_neighbours']}. "
+                f"Updating k_nearest_neighbours to {min(len(points), len(edge_points))}."
             )
-            k_nearest_neighbours = min(len(river_points), len(edge_points))
-        if k_nearest_neighbours < 3:
+            raster_options["k_nearest_neighbours"] = min(len(points), len(edge_points))
+        if raster_options["k_nearest_neighbours"] < 3:
             logging.warning(
-                f"Not enough river or edge points to meaningfully include {k_nearest_neighbours}. "
-                f"Exiting without including the river and edge points."
+                f"Not enough points or edge points to meaningfully include {raster_options['k_nearest_neighbours']}. "
+                f"Exiting without including the points and edge points."
             )
             return
 
@@ -1396,22 +1394,24 @@ def interpolate_rivers(
         self.logger.info(f"Preparing {[len(chunked_dim_x), len(chunked_dim_y)]} chunks")
 
         # cycle through index chunks - and collect in a delayed array
-        self.logger.info("Running over ocean chunked")
+        self.logger.info(
+            "Running over points chunked - nearest of points & edge points"
+        )
         delayed_chunked_matrix = []
         for i, dim_y in enumerate(chunked_dim_y):
             delayed_chunked_x = []
             for j, dim_x in enumerate(chunked_dim_x):
                 self.logger.debug(f"\tLiDAR chunk {[i, j]}")
                 # Load in points
-                river_points = delayed_load_tiles_in_chunk(
-                    lidar_files=[river_points_file],
+                points = delayed_load_tiles_in_chunk(
+                    lidar_files=[points_file],
                     source_crs=raster_options["crs"],
                     chunk_region_to_tile=None,
                     crs=raster_options["crs"],
                 )
 
-                river_edge_points = delayed_load_tiles_in_chunk(
-                    lidar_files=[river_edge_file],
+                edge_points = delayed_load_tiles_in_chunk(
+                    lidar_files=[edge_file],
                     source_crs=raster_options["crs"],
                     chunk_region_to_tile=None,
                     crs=raster_options["crs"],
@@ -1423,8 +1423,8 @@ def interpolate_rivers(
                         delayed_elevation_over_chunk_from_nearest(
                             dim_x=dim_x,
                             dim_y=dim_y,
-                            points=river_points,
-                            edge_points=river_edge_points,
+                            points=points,
+                            edge_points=edge_points,
                             options=raster_options,
                         ),
                         shape=(len(dim_y), len(dim_x)),
@@ -1437,17 +1437,17 @@ def interpolate_rivers(
         elevations = dask.array.block(delayed_chunked_matrix)
 
         # Update DEM layers - copy everyhere within the region to rasterise
-        rivers_mask = clip_mask(
+        polygon_mask = clip_mask(
             self._dem.z,
             region_to_rasterise.geometry,
             self.chunk_size,
         )
-        rivers_mask.load()
-        self._dem["z"] = self._dem.z.where(~rivers_mask, elevations)
-        mask = ~(rivers_mask & self._dem.z.notnull())
+        polygon_mask.load()
+        self._dem["z"] = self._dem.z.where(~polygon_mask, elevations)
+        mask = ~(polygon_mask & self._dem.z.notnull())
         self._dem["data_source"] = self._dem.data_source.where(
             mask,
-            self.SOURCE_CLASSIFICATION["rivers and fans"],
+            self.SOURCE_CLASSIFICATION[label],
         )
         self._dem["lidar_source"] = self._dem.lidar_source.where(
             mask, self.SOURCE_CLASSIFICATION["no data"]

diff --git a/src/geofabrics/geometry.py b/src/geofabrics/geometry.py
@@ -553,7 +553,7 @@ def sample(self) -> numpy.ndarray:
         return points
 
 
-class EstimatedElevationPoints:
+class ElevationPoints:
     """A class for accessing estimated or measured river, mouth and waterway
     elevations as points. Paired elevation and polygon files are expected.
     The elevations are used to interpolate elevations within the polygons.
@@ -630,7 +630,7 @@ def _set_up(
                 polygon_list.append(polygon_i)
         # Set CRS, clip to size and reset index
         if len(points_list) == 0:
-            self.logger.warning("No waterways elevations. Ignoring.")
+            self.logger.warning("No elevations. Ignoring.")
             self._points = []
             self._polygon = []
             return
@@ -758,6 +758,43 @@ def z(self):
         return self._z
 
 
+class ElevationContours(ElevationPoints):
+    """Resample at spatial resolution at points"""
+
+    def __init__(
+        self,
+        points_files: list,
+        polygon_files: list,
+        catchment_geometry: CatchmentGeometry,
+        z_labels: list = None,
+    ):
+        super(ElevationContours, self).__init__(
+            points_files=points_files,
+            polygon_files=polygon_files,
+            catchment_geometry=catchment_geometry,
+            filter_osm_ids=[],
+            z_labels=z_labels,
+        )
+        self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
+
+        # convert contoursto samples points at resolution
+        self.sample_contours(self.catchment_geometry.resolution)
+
+    def sample_contours(self, resolution: float) -> numpy.ndarray:
+        """Sample the contours at the specified resolution."""
+
+        # convert contours to multipoints
+        self._points.loc[:, "geometry"] = self._points.geometry.apply(
+            lambda row: shapely.geometry.MultiPoint(
+                [
+                    row.interpolate(i * resolution)
+                    for i in range(int(numpy.ceil(row.length / resolution)))
+                ]
+            )
+        )
+        self._points = self._points.explode(index_parts=True, ignore_index=True)
+
+
 class TileInfo:
     """A class for working with tiling information."""
 
@@ -854,7 +891,7 @@ def _get_mouth_alignment(self):
 
         # Get the river alignment and clip to the river polygon
         aligned_channel = geopandas.read_file(self.aligned_channel_file)
-        river_polygon = geopandas.read_file(self.river_polygon_file)
+        river_polygon = geopandas.read_file(self.river_polygon_file).make_valid()
         aligned_channel = aligned_channel.clip(river_polygon)
         # Explode incase the aligned channel is clipped into a MultiPolyLine
         (x, y) = aligned_channel.explode().iloc[0].geometry.xy