Skip to content

Commit

Permalink
Remove Rasterio (#4079)
Browse files Browse the repository at this point in the history
- removes rasterio as dependency
   - uses gdal/affine instead
- removes some unused code: get_surrounding_grid
- closes Remove Rasterio #4080
  • Loading branch information
brettedw authored Nov 12, 2024
1 parent c661d8c commit 4105c3e
Show file tree
Hide file tree
Showing 7 changed files with 127 additions and 321 deletions.
66 changes: 26 additions & 40 deletions api/app/c_haines/c_haines_index.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
""" Logic pertaining to the generation of c_haines index from GDAL datasets.
"""
"""Logic pertaining to the generation of c_haines index from GDAL datasets."""

import logging
import struct
from typing import Final, Tuple
Expand All @@ -8,16 +8,15 @@
from osgeo import gdal
from affine import Affine
from app.geospatial import NAD83_CRS
from app.weather_models.process_grib import (
calculate_geographic_coordinate, get_dataset_geometry, get_transformer)
from app.weather_models.process_grib import calculate_geographic_coordinate, get_dataset_transform, get_transformer
from app.c_haines import GDALData


logger = logging.getLogger(__name__)


def calculate_c_haines_index(t700: float, t850: float, d850: float) -> float:
""" Given temperature and dew points values, calculate c-haines.
"""Given temperature and dew points values, calculate c-haines.
Based on original work:
Graham A. Mills and Lachlan McCaw (2010). Atmospheric Stability Environments
and Fire Weather in Australia – extending the Haines Index.
Expand Down Expand Up @@ -66,7 +65,7 @@ def calculate_c_haines_index(t700: float, t850: float, d850: float) -> float:


def read_scanline(band, yoff):
""" Read a band scanline (up to the y-offset), returning an array of values.
"""Read a band scanline (up to the y-offset), returning an array of values.
A raster (image) may consist of multiple bands (e.g. for a colour image, one may have a band for
red, green, blue, and alpha).
Expand All @@ -75,26 +74,23 @@ def read_scanline(band, yoff):
band, definition: https://gdal.org/user/raster_data_model.html#raster-band
fetching a raster band: https://gdal.org/tutorials/raster_api_tut.html#fetching-a-raster-band
"""
scanline = band.ReadRaster(xoff=0, yoff=yoff,
xsize=band.XSize, ysize=1,
buf_xsize=band.XSize, buf_ysize=1,
buf_type=gdal.GDT_Float32)
return struct.unpack('f' * band.XSize, scanline)
scanline = band.ReadRaster(xoff=0, yoff=yoff, xsize=band.XSize, ysize=1, buf_xsize=band.XSize, buf_ysize=1, buf_type=gdal.GDT_Float32)
return struct.unpack("f" * band.XSize, scanline)


def get_geographic_bounding_box() -> Tuple[float]:
""" Get the geographical area (the bounding box) that we want to generate data for.
Currently hard coded to be an area around B.C. """
"""Get the geographical area (the bounding box) that we want to generate data for.
Currently hard coded to be an area around B.C."""
# S->N 46->70
# E->W -110->-140
top_left_geo = (-140.0, 70.0)
bottom_right_geo = (-110.0, 46.0)
return (top_left_geo, bottom_right_geo)


class BoundingBoxChecker():
""" Class used to check if a given raster coordinate lies within a specified
geographic bounding box. """
class BoundingBoxChecker:
"""Class used to check if a given raster coordinate lies within a specified
geographic bounding box."""

def __init__(self, transform: Affine, raster_to_geo_transformer):
self.geo_bounding_box = get_geographic_bounding_box()
Expand All @@ -103,12 +99,9 @@ def __init__(self, transform: Affine, raster_to_geo_transformer):

@lru_cache(maxsize=None)
def is_inside(self, raster_x, raster_y):
""" Check if raster coordinate is inside the geographic bounding box """
"""Check if raster coordinate is inside the geographic bounding box"""
# Calculate lat/long and check bounds.
lon, lat = calculate_geographic_coordinate(
(raster_x, raster_y),
self.transform,
self.raster_to_geo_transformer)
lon, lat = calculate_geographic_coordinate((raster_x, raster_y), self.transform, self.raster_to_geo_transformer)
lon0 = self.geo_bounding_box[0][0]
lat0 = self.geo_bounding_box[0][1]
lon1 = self.geo_bounding_box[1][0]
Expand All @@ -118,30 +111,26 @@ def is_inside(self, raster_x, raster_y):
return False


class CHainesGenerator():
""" Class for generating c_haines data """
class CHainesGenerator:
"""Class for generating c_haines data"""

def __init__(self):
self.bound_checker: BoundingBoxChecker = None

def _prepare_bound_checker(self, grib_tmp_700: gdal.Dataset, grib_tmp_700_filename: str):
""" Prepare the boundary checker. """
"""Prepare the boundary checker."""
if not self.bound_checker:
logger.info('Creating bound checker.')
transform: Affine = get_dataset_geometry(grib_tmp_700_filename)
logger.info("Creating bound checker.")
transform: Affine = get_dataset_transform(grib_tmp_700_filename)
crs = CRS.from_string(grib_tmp_700.GetProjection())
# Create a transformer to go from whatever the raster is, to geographic coordinates.
raster_to_geo_transformer = get_transformer(crs, NAD83_CRS)
self.bound_checker = BoundingBoxChecker(transform, raster_to_geo_transformer)
else:
logger.info('Re-using bound checker.')

def calculate_row_data(self,
tmp_700_raster_band: gdal.Band,
tmp_850_raster_band: gdal.Band,
dew_850_raster_band: gdal.Band,
y_row_index: int):
""" Calculate a row of c-haines raster data """
logger.info("Re-using bound checker.")

def calculate_row_data(self, tmp_700_raster_band: gdal.Band, tmp_850_raster_band: gdal.Band, dew_850_raster_band: gdal.Band, y_row_index: int):
"""Calculate a row of c-haines raster data"""
c_haines_row: Final = []
# Read the scanlines.
row_tmp_700: Final = read_scanline(tmp_700_raster_band, y_row_index)
Expand All @@ -150,15 +139,14 @@ def calculate_row_data(self,

# Iterate through values in row.
for x_column_index, (t700, t850, d850) in enumerate(zip(row_tmp_700, row_tmp_850, row_dew_850)):

if self.bound_checker.is_inside(x_column_index, y_row_index):
c_haines_row.append(calculate_c_haines_index(t700, t850, d850))
else:
c_haines_row.append(0)
return c_haines_row

def generate_c_haines(self, source_data: GDALData):
""" Given grib data sources, generate c_haines data. """
"""Given grib data sources, generate c_haines data."""

# Prepare the boundary checker.
# Boundary checking is very slow. We have to repeatedly convert a raster coordinate to a geographic
Expand All @@ -174,11 +162,9 @@ def generate_c_haines(self, source_data: GDALData):
c_haines_data: Final = []

# Iterate through rows (assuming all the raster bands have the same amount of rows)
logger.info('Generating c-haines index data.')
logger.info("Generating c-haines index data.")
for y_row_index in range(tmp_850_raster_band.YSize):

c_haines_row = self.calculate_row_data(
tmp_700_raster_band, tmp_850_raster_band, dew_850_raster_band, y_row_index)
c_haines_row = self.calculate_row_data(tmp_700_raster_band, tmp_850_raster_band, dew_850_raster_band, y_row_index)

c_haines_data.append(c_haines_row)

Expand Down
4 changes: 2 additions & 2 deletions api/app/c_haines/severity_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from app.c_haines.object_store import ObjectTypeEnum, generate_full_object_store_path
from app.c_haines.kml import save_as_kml_to_s3
from app import config
from app.weather_models.process_grib import get_dataset_geometry
from app.weather_models.process_grib import get_dataset_transform


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -401,7 +401,7 @@ def _generate_c_haines_data(self, payload: EnvCanadaPayload):
c_haines_data = self.c_haines_generator.generate_c_haines(source_data)
# Store the projection and geotransform for later.
projection = source_data.grib_tmp_700.GetProjection()
geotransform: Affine = get_dataset_geometry(source_data.grib_tmp_700_filename)
geotransform: Affine = get_dataset_transform(source_data.grib_tmp_700_filename)
# Store the dimensions for later.
band = source_data.grib_tmp_700.GetRasterBand(1)
rows = band.YSize
Expand Down
31 changes: 1 addition & 30 deletions api/app/tests/weather_models/test_grib_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,43 +40,14 @@ def read_file_contents(filename):
)
def test_get_dataset_geometry(filename, origin, pixel_size):
grib_path = get_grib_file_path(filename)
dataset_geometry = process_grib.get_dataset_geometry(grib_path)
dataset_geometry = process_grib.get_dataset_transform(grib_path)
geotransform = dataset_geometry.to_gdal()
actual_origin = itemgetter(0, 3)(geotransform)
actual_pixel_size = itemgetter(1, 5)(geotransform)
assert actual_origin == origin
assert actual_pixel_size == pixel_size


@pytest.mark.parametrize(
"filename,raster_coordinate,points,values",
[
(
"CMC_glb_RH_TGL_2_latlon.15x.15_2020071300_P000.grib2",
(10, 10),
[[10, 10], [11, 10], [11, 11], [10, 11]],
[91.99049377441406, 91.99049377441406, 92.24049377441406, 92.24049377441406],
),
(
"CMC_hrdps_continental_RH_TGL_2_ps2.5km_2020100700_P007-00.grib2",
(694, 1262),
[[694, 1262], [695, 1262], [695, 1263], [694, 1263]],
[44.272186279296875, 42.796443939208984, 44.272186279296875, 44.272186279296875],
),
],
)
def test_get_surrounding_grid(filename, raster_coordinate, points, values):
dataset = open_grib_file(filename)
raster_band = dataset.GetRasterBand(1)
x, y = raster_coordinate
surrounding_grid = process_grib.get_surrounding_grid(raster_band, x, y)
# Check points
assert surrounding_grid[0] == points
# Check values
assert surrounding_grid[1] == values
dataset = None


@pytest.mark.parametrize(
"geotransform,wkt_projection_string,geographic_coordinate,raster_coordinate",
[
Expand Down
5 changes: 3 additions & 2 deletions api/app/tests/weather_models/test_process_grib.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@ def test_convert_mps_to_kph_zero_wind_speed():
kilometres_per_hour_speed = process_grib.convert_mps_to_kph(metres_per_second_speed)
assert kilometres_per_hour_speed == 0


def test_read_single_raster_value(monkeypatch: pytest.MonkeyPatch):
"""
Verified with gdallocationinfo CMC_reg_RH_TGL_2_ps10km_2020110500_P034.grib2 -wgs84 -120.4816667 50.6733333
"""
monkeypatch.setattr(ClientSession, "get", default_mock_client_get)
filename = os.path.join(os.path.dirname(__file__), 'CMC_reg_RH_TGL_2_ps10km_2020110500_P034.grib2')
filename = os.path.join(os.path.dirname(__file__), "CMC_reg_RH_TGL_2_ps10km_2020110500_P034.grib2")
dataset = gdal.Open(filename, gdal.GA_ReadOnly)

# Ensure that grib file uses EPSG:4269 (NAD83) coordinate system
Expand All @@ -34,7 +35,7 @@ def test_read_single_raster_value(monkeypatch: pytest.MonkeyPatch):
crs = CRS.from_string(wkt)
raster_to_geo_transformer = process_grib.get_transformer(crs, NAD83_CRS)
geo_to_raster_transformer = process_grib.get_transformer(NAD83_CRS, crs)
padf_transform = process_grib.get_dataset_geometry(filename)
padf_transform = process_grib.get_dataset_transform(filename)

processor = process_grib.GribFileProcessor(padf_transform, raster_to_geo_transformer, geo_to_raster_transformer)

Expand Down
Loading

0 comments on commit 4105c3e

Please sign in to comment.