diff --git a/city_metrix/layers/layer.py b/city_metrix/layers/layer.py index 4a96b7a..83fa9eb 100644 --- a/city_metrix/layers/layer.py +++ b/city_metrix/layers/layer.py @@ -19,7 +19,7 @@ import shapely.geometry as geometry import pandas as pd -MAX_TILE_SIDE_SIZE_METERS = 60000 # Approx. 0.5 degrees latitudinal offset. TODO How selected? +MAX_TILE_SIZE_DEGREES = 0.5 # TODO Why was this value selected? class Layer: def __init__(self, aggregate=None, masks=[]): @@ -55,22 +55,22 @@ def groupby(self, zones, layer=None): """ return LayerGroupBy(self.aggregate, zones, layer, self.masks) - def write(self, bbox, output_path, tile_side_meters=None, tile_buffer_meters=None, **kwargs): + def write(self, bbox, output_path, tile_degrees=None, buffer_size=None, **kwargs): """ Write the layer to a path. Does not apply masks. :param bbox: (min x, min y, max x, max y) :param output_path: local or s3 path to output to - :param tile_side_meters: optional param to tile the results into multiple files specified as tile length on a side in meters - : param tile_buffer_meters: tile buffer distance in meters + :param tile_degrees: optional param to tile the results into multiple files specified as tile length on a side + :param buffer_size: tile buffer distance :return: """ - if tile_side_meters is None: + if tile_degrees is None: clipped_data = self.aggregate.get_data(bbox) write_layer(output_path, clipped_data) else: - tile_grid, unbuffered_tile_grid = _get_tile_boundaries(bbox, tile_side_meters, tile_buffer_meters) + tile_grid, unbuffered_tile_grid = _get_tile_boundaries(bbox, tile_degrees, buffer_size) # write raster data to files if not os.path.exists(output_path): @@ -92,13 +92,13 @@ def write(self, bbox, output_path, tile_side_meters=None, tile_buffer_meters=Non _write_tile_grid(unbuffered_tile_grid, output_path, 'tile_grid_unbuffered.geojson') -def _get_tile_boundaries(bbox, tile_side_meters, tile_buffer_meters): - has_buffer = True if tile_buffer_meters is not None and tile_buffer_meters != 0 else False +def _get_tile_boundaries(bbox, tile_degrees, buffer_size): + has_buffer = True if buffer_size is not None and buffer_size != 0 else False if has_buffer: - tiles = create_fishnet_grid(*bbox, tile_side_meters, tile_buffer_meters) - unbuffered_tiles = create_fishnet_grid(*bbox, tile_side_meters) + tiles = create_fishnet_grid(*bbox, tile_degrees, buffer_size) + unbuffered_tiles = create_fishnet_grid(*bbox, tile_degrees) else: - tiles = create_fishnet_grid(*bbox, tile_side_meters) + tiles = create_fishnet_grid(*bbox, tile_degrees) unbuffered_tiles = None tile_grid = [] @@ -136,7 +136,7 @@ def count(self): return self._zonal_stats("count") def _zonal_stats(self, stats_func): - if box(*self.zones.total_bounds).area <= MAX_TILE_SIDE_SIZE_METERS**2: + if box(*self.zones.total_bounds).area <= MAX_TILE_SIZE_DEGREES**2: stats = self._zonal_stats_tile(self.zones, [stats_func]) else: stats = self._zonal_stats_fishnet(stats_func) @@ -160,7 +160,7 @@ def group_layer_values(df): def _zonal_stats_fishnet(self, stats_func): # fishnet GeoDataFrame into smaller tiles - fishnet = create_fishnet_grid(*self.zones.total_bounds, MAX_TILE_SIDE_SIZE_METERS) + fishnet = create_fishnet_grid(*self.zones.total_bounds, MAX_TILE_SIZE_DEGREES) # spatial join with fishnet grid and then intersect geometries with fishnet tiles joined = self.zones.sjoin(fishnet) @@ -257,17 +257,29 @@ def get_utm_zone_epsg(bbox) -> str: return f"EPSG:{epsg}" -def create_fishnet_grid(min_lon, min_lat, max_lon, max_lat, tile_side_meters, tile_buffer_meters=0): +def create_fishnet_grid(min_lon, min_lat, max_lon, max_lat, cell_size, buffer_size=0, tile_units_in_degrees=True): lon_coord, lat_coord = (min_lon, min_lat) geom_array = [] - center_lat = (min_lat + max_lat) / 2 - lon_side_offset, lat_side_offset = offset_meters_to_geographic_degrees(center_lat, tile_side_meters) - if tile_buffer_meters == 0: - lon_buffer_offset = 0 - lat_buffer_offset = 0 + if tile_units_in_degrees: + if cell_size > 0.5: + raise Exception('Value for cell_size must be < 0.5 degrees.') + + lon_side_offset = cell_size + lat_side_offset = cell_size + lon_buffer_offset = buffer_size + lat_buffer_offset = buffer_size else: - lon_buffer_offset, lat_buffer_offset = offset_meters_to_geographic_degrees(center_lat, tile_buffer_meters) + if cell_size < 10: + raise Exception('Value for cell_size must be >= 10 meters.') + + center_lat = (min_lat + max_lat) / 2 + lon_side_offset, lat_side_offset = offset_meters_to_geographic_degrees(center_lat, cell_size) + if buffer_size == 0: + lon_buffer_offset = 0 + lat_buffer_offset = 0 + else: + lon_buffer_offset, lat_buffer_offset = offset_meters_to_geographic_degrees(center_lat, buffer_size) # Polygon Size while lat_coord < max_lat: diff --git a/tests/conftest.py b/tests/conftest.py index b56318c..992894d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,38 +1,19 @@ from city_metrix.layers import Layer - -import shapely.geometry as geometry -import geopandas as gpd +from city_metrix.layers.layer import create_fishnet_grid from geocube.api.core import make_geocube - -def create_fishnet_grid(min_x, min_y, max_x, max_y, cell_size): - x, y = (min_x, min_y) - geom_array = [] - - # Polygon Size - while y < (max_y - 0.000001): - while x < (max_x - 0.000001): - geom = geometry.Polygon( - [ - (x, y), - (x, y + cell_size), - (x + cell_size, y + cell_size), - (x + cell_size, y), - (x, y), - ] - ) - geom_array.append(geom) - x += cell_size - x = min_x - y += cell_size - - fishnet = gpd.GeoDataFrame(geom_array, columns=["geometry"]).set_crs("EPSG:4326") +def test_create_fishnet_grid(min_x, min_y, max_x, max_y, cell_size): + # Slightly reduce aoi to avoid partial cells + reduction = 0.000001 + max_y = (max_y - reduction) + max_x = (max_x - reduction) + fishnet = create_fishnet_grid(min_x, min_y, max_x, max_y, cell_size) + fishnet.drop('fishnet_geometry', axis=1, inplace=True) return fishnet - -# Test zones of a regular 0.01x0.01 grid over a 0.1x0.1 extent -ZONES = create_fishnet_grid(106.7, -6.3, 106.8, -6.2, 0.01).reset_index() -LARGE_ZONES = create_fishnet_grid(106, -7, 107, -6, 0.1).reset_index() +# Test zones of a regular 0.01x0.01 grid over a 0.1x0.1 extent by degrees +ZONES = test_create_fishnet_grid(106.7, -6.3, 106.8, -6.2, 0.01).reset_index() +LARGE_ZONES = test_create_fishnet_grid(106, -7, 107, -6, 0.1).reset_index() class MockLayer(Layer): @@ -54,7 +35,8 @@ class MockMaskLayer(Layer): Simple layer where even indices are masked """ def get_data(self, bbox): - mask_gdf = create_fishnet_grid(*bbox, 0.01).reset_index() + cell_size_degrees = 0.01 + mask_gdf = test_create_fishnet_grid(*bbox, cell_size_degrees).reset_index() mask_gdf['index'] = mask_gdf['index'] % 2 mask = make_geocube( vector_data=mask_gdf, @@ -72,7 +54,8 @@ class MockGroupByLayer(Layer): Simple categorical layer with alternating 1s and 2s """ def get_data(self, bbox): - group_by_gdf = create_fishnet_grid(*bbox, 0.001).reset_index() + cell_size_degrees = 0.001 + group_by_gdf = test_create_fishnet_grid(*bbox, cell_size_degrees).reset_index() group_by_gdf['index'] = (group_by_gdf['index'] % 2) + 1 group_by = make_geocube( vector_data=group_by_gdf, @@ -104,7 +87,8 @@ class MockLargeGroupByLayer(Layer): """ def get_data(self, bbox): - group_by_gdf = create_fishnet_grid(*bbox, 0.01).reset_index() + cell_size_degrees = 0.01 + group_by_gdf = test_create_fishnet_grid(*bbox, cell_size_degrees).reset_index() group_by_gdf['index'] = (group_by_gdf['index'] % 2) + 1 group_by = make_geocube( vector_data=group_by_gdf, @@ -113,4 +97,4 @@ def get_data(self, bbox): output_crs=4326, ).index - return group_by \ No newline at end of file + return group_by diff --git a/tests/resources/layer_dumps_for_br_lauro_de_freitas/test_write_layers_to_qgis_files.py b/tests/resources/layer_dumps_for_br_lauro_de_freitas/test_write_layers_to_qgis_files.py index 1467cd0..9c38211 100644 --- a/tests/resources/layer_dumps_for_br_lauro_de_freitas/test_write_layers_to_qgis_files.py +++ b/tests/resources/layer_dumps_for_br_lauro_de_freitas/test_write_layers_to_qgis_files.py @@ -31,7 +31,7 @@ def test_write_albedo(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'albedo.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(Albedo()) - Albedo(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) + Albedo(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') @@ -39,22 +39,22 @@ def test_write_albedo_tiled_unbuffered(target_folder, bbox_info, target_spatial_ file_path = prep_output_path(target_folder, 'albedo_tiled.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(Albedo()) (Albedo(spatial_resolution=target_resolution). - write(bbox_info.bounds, file_path, tile_side_meters=1000, tile_buffer_meters=None)) + write(bbox_info.bounds, file_path, tile_degrees=0.01, buffer_size=None)) file_count = get_file_count_in_folder(file_path) - expected_file_count = 7 # includes 6 tiles and one geojson file + expected_file_count = 5 # includes 4 tiles and one geojson file assert file_count == expected_file_count @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_albedo_tiled_buffered(target_folder, bbox_info, target_spatial_resolution_multiplier): - buffer_meters = 500 + buffer_degrees = 0.001 file_path = prep_output_path(target_folder, 'albedo_tiled_buffered.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(Albedo()) (Albedo(spatial_resolution=target_resolution). - write(bbox_info.bounds, file_path, tile_side_meters=1000, tile_buffer_meters=buffer_meters)) + write(bbox_info.bounds, file_path, tile_degrees=0.01, buffer_size=buffer_degrees)) file_count = get_file_count_in_folder(file_path) - expected_file_count = 8 # includes 6 tiles and two geojson files + expected_file_count = 6 # includes 4 tiles and two geojson files assert file_count == expected_file_count @@ -62,42 +62,42 @@ def test_write_albedo_tiled_buffered(target_folder, bbox_info, target_spatial_re def test_write_alos_dsm(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'alos_dsm.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(AlosDSM()) - AlosDSM(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) + AlosDSM(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_average_net_building_height(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'average_net_building_height.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(AverageNetBuildingHeight()) - AverageNetBuildingHeight(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) + AverageNetBuildingHeight(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_esa_world_cover(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'esa_world_cover.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(EsaWorldCover()) - EsaWorldCover(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) + EsaWorldCover(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_high_land_surface_temperature(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'high_land_surface_temperature.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(HighLandSurfaceTemperature()) - HighLandSurfaceTemperature(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) + HighLandSurfaceTemperature(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_impervious_surface(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'impervious_surface.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(ImperviousSurface()) - LandSurfaceTemperature(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) + LandSurfaceTemperature(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_land_surface_temperature(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'land_surface_temperature.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(LandSurfaceTemperature()) - LandSurfaceTemperature(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) + LandSurfaceTemperature(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) assert verify_file_is_populated(file_path) # TODO Class is no longer used, but may be useful later @@ -112,27 +112,27 @@ def test_write_land_surface_temperature(target_folder, bbox_info, target_spatial def test_write_nasa_dem(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'nasa_dem.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(NasaDEM()) - NasaDEM(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) + NasaDEM(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_natural_areas(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'natural_areas.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(NaturalAreas()) - NaturalAreas(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) + NaturalAreas(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_ndvi_sentinel2_gee(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'ndvi_sentinel2_gee.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(NdviSentinel2()) - NdviSentinel2(year=2023, spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) + NdviSentinel2(year=2023, spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_openbuildings(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'open_buildings.geojson') - OpenBuildings(bbox_info.country).write(bbox_info.bounds, file_path, tile_side_meters=None) + OpenBuildings(bbox_info.country).write(bbox_info.bounds, file_path, tile_degrees=None) assert verify_file_is_populated(file_path) # TODO Class write is not functional. Is class still needed or have we switched to overture? @@ -145,7 +145,7 @@ def test_write_openbuildings(target_folder, bbox_info, target_spatial_resolution @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_overture_buildings(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'overture_buildings.geojson') - OvertureBuildings().write(bbox_info.bounds, file_path, tile_side_meters=None) + OvertureBuildings().write(bbox_info.bounds, file_path, tile_degrees=None) assert verify_file_is_populated(file_path) # TODO Class is no longer used, but may be useful later @@ -160,33 +160,33 @@ def test_write_overture_buildings(target_folder, bbox_info, target_spatial_resol def test_write_smart_surface_lulc(target_folder, bbox_info, target_spatial_resolution_multiplier): # Note: spatial_resolution not implemented for this raster class file_path = prep_output_path(target_folder, 'smart_surface_lulc.tif') - SmartSurfaceLULC().write(bbox_info.bounds, file_path, tile_side_meters=None) + SmartSurfaceLULC().write(bbox_info.bounds, file_path, tile_degrees=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_tree_canopy_height(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'tree_canopy_height.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(TreeCanopyHeight()) - TreeCanopyHeight(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) + TreeCanopyHeight(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_tree_cover(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'tree_cover.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(TreeCover()) - TreeCover(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) + TreeCover(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_urban_land_use(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'urban_land_use.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(UrbanLandUse()) - UrbanLandUse(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) + UrbanLandUse(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) assert verify_file_is_populated(file_path) @pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False') def test_write_world_pop(target_folder, bbox_info, target_spatial_resolution_multiplier): file_path = prep_output_path(target_folder, 'world_pop.tif') target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(WorldPop()) - WorldPop(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None) + WorldPop(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None) assert verify_file_is_populated(file_path) diff --git a/tests/test_methods.py b/tests/test_methods.py index bb7ad39..5fc6cc9 100644 --- a/tests/test_methods.py +++ b/tests/test_methods.py @@ -1,4 +1,5 @@ import numpy as np +import pytest from city_metrix.layers.layer import create_fishnet_grid, offset_meters_to_geographic_degrees from .conftest import ( LARGE_ZONES, @@ -10,7 +11,6 @@ MockMaskLayer, ) - def test_count(): counts = MockLayer().groupby(ZONES).count() assert counts.size == 100 @@ -33,7 +33,7 @@ def test_fishnetted_mean(): assert means.size == 100 assert all([mean == i for i, mean in enumerate(means)]) -def test_fishnet(): +def test_fishnet_in_meters(): min_x = -38.3 min_y = -80.1 max_x = -38.2 @@ -41,7 +41,7 @@ def test_fishnet(): tile_side_meters = 1000 tile_buffer_meters = 100 result_fishnet = ( - create_fishnet_grid(min_x, min_y, max_x, max_y, tile_side_meters, tile_buffer_meters)) + create_fishnet_grid(min_x, min_y, max_x, max_y, tile_side_meters, tile_buffer_meters, tile_units_in_degrees=False)) actual_count = result_fishnet.geometry.count() expected_count = 24 @@ -77,3 +77,23 @@ def test_group_by_large_layer(): MockLargeLayer().groupby(LARGE_ZONES, layer=MockLargeGroupByLayer()).count() ) assert all([count == {1: 50.0, 2: 50.0} for count in counts]) + +def test_extreme_large_degrees(): + min_x = 100 + min_y = 45 + max_x = 100.5 + max_y = 45.5 + cell_size_degrees = 1 + + with pytest.raises(Exception): + create_fishnet_grid(min_x, min_y, max_x, max_y, cell_size_degrees) + +def test_extreme_small_meters(): + min_x = 100 + min_y = 45 + max_x = 100.5 + max_y = 45.5 + cell_size_meters= 1 + + with pytest.raises(Exception) as e_info: + create_fishnet_grid(min_x, min_y, max_x, max_y, cell_size_meters, tile_units_in_degrees=False)