From ddf179b270da2d6ca7cbde4289960f68ac9bd478 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Mon, 9 Sep 2024 20:13:32 +1200 Subject: [PATCH 1/4] :heavy_plus_sign: Add num-traits Numeric traits for generic mathematics! Repo at https://github.com/rust-num/num-traits --- Cargo.lock | 5 +++-- Cargo.toml | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 846734a..f184355 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -159,6 +159,7 @@ dependencies = [ "bytes", "geo", "ndarray", + "num-traits", "numpy", "object_store", "pyo3", @@ -770,9 +771,9 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", "libm", diff --git a/Cargo.toml b/Cargo.toml index 43531d6..fa9e898 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ crate-type = ["cdylib", "rlib"] bytes = "1.5.0" geo = { git = "https://github.com/georust/geo.git", version = "0.28.0", rev = "481196b4e50a488442b3919e02496ad909fc5412" } ndarray = "0.15.6" +num-traits = "0.2.19" numpy = "0.21.0" object_store = { version = "0.9.0", features = ["http"] } pyo3 = { version = "0.21.1", features = ["abi3-py310", "extension-module"] } From d152e51de16b91e6c86c87248de93bcaa6662391 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Mon, 9 Sep 2024 21:07:04 +1200 Subject: [PATCH 2/4] :sparkles: Support reading uint/int/float dtypes Add support on the Rust side for reading u8/u16/u32/u64/i8/i16/i32/i64/f32/f64 dtypes via a num_traits::FromPrimitive bound. Different dtypes can be selected via the turbofish operator e.g. by calling `.ndarray::()`. Added a unit test to check that reading a uint16 tif file works. --- src/io/geotiff.rs | 75 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 61 insertions(+), 14 deletions(-) diff --git a/src/io/geotiff.rs b/src/io/geotiff.rs index 6a68854..3c40eda 100644 --- a/src/io/geotiff.rs +++ b/src/io/geotiff.rs @@ -2,6 +2,7 @@ use std::io::{Read, Seek}; use geo::AffineTransform; use ndarray::{Array, Array1, Array3}; +use num_traits::FromPrimitive; use tiff::decoder::{Decoder, DecodingResult, Limits}; use tiff::tags::Tag; use tiff::{ColorType, TiffError, TiffFormatError, TiffResult, TiffUnsupportedError}; @@ -23,7 +24,7 @@ impl CogReader { } /// Decode GeoTIFF image to an [`ndarray::Array`] - pub fn ndarray(&mut self) -> TiffResult> { + pub fn ndarray(&mut self) -> TiffResult> { // Count number of bands let color_type = self.decoder.colortype()?; let num_bands: usize = match color_type { @@ -44,19 +45,45 @@ impl CogReader { // Get image pixel data let decode_result = self.decoder.read_image()?; - let image_data: Vec = match decode_result { - DecodingResult::F32(img_data) => img_data, - _ => { - return Err(TiffError::UnsupportedError( - TiffUnsupportedError::UnsupportedDataType, - )) + let image_data: Vec = match decode_result { + DecodingResult::U8(img_data) => { + img_data.iter().map(|v| T::from_u8(*v).unwrap()).collect() + } + DecodingResult::U16(img_data) => { + img_data.iter().map(|v| T::from_u16(*v).unwrap()).collect() + } + DecodingResult::U32(img_data) => { + img_data.iter().map(|v| T::from_u32(*v).unwrap()).collect() + } + DecodingResult::U64(img_data) => { + img_data.iter().map(|v| T::from_u64(*v).unwrap()).collect() + } + DecodingResult::I8(img_data) => { + img_data.iter().map(|v| T::from_i8(*v).unwrap()).collect() + } + DecodingResult::I16(img_data) => { + img_data.iter().map(|v| T::from_i16(*v).unwrap()).collect() + } + DecodingResult::I32(img_data) => { + img_data.iter().map(|v| T::from_i32(*v).unwrap()).collect() + } + DecodingResult::I64(img_data) => { + img_data.iter().map(|v| T::from_i64(*v).unwrap()).collect() + } + DecodingResult::F32(img_data) => { + img_data.iter().map(|v| T::from_f32(*v).unwrap()).collect() + } + DecodingResult::F64(img_data) => { + img_data.iter().map(|v| T::from_f64(*v).unwrap()).collect() } }; // Put image pixel data into an ndarray - let array_data = - Array3::from_shape_vec((num_bands, height as usize, width as usize), image_data) - .map_err(|_| TiffFormatError::InconsistentSizesEncountered)?; + let array_data: Array3 = Array3::from_shape_vec( + (num_bands, height as usize, width as usize), + image_data.into(), + ) + .map_err(|_| TiffFormatError::InconsistentSizesEncountered)?; Ok(array_data) } @@ -138,12 +165,14 @@ impl CogReader { } /// Synchronously read a GeoTIFF file into an [`ndarray::Array`] -pub fn read_geotiff(stream: R) -> TiffResult> { +pub fn read_geotiff( + stream: R, +) -> TiffResult> { // Open TIFF stream with decoder let mut reader = CogReader::new(stream)?; // Decode TIFF into ndarray - let array_data: Array3 = reader.ndarray()?; + let array_data: Array3 = reader.ndarray()?; Ok(array_data) } @@ -205,7 +234,25 @@ mod tests { let array = reader.ndarray().unwrap(); assert_eq!(array.dim(), (2, 512, 512)); - assert_eq!(array.mean(), Some(225.17654)); + assert_eq!(array.mean(), Some(225.17439122416545)); + } + + #[tokio::test] + async fn test_read_geotiff_uint16_dtype() { + let cog_url: &str = + "https://github.com/OSGeo/gdal/raw/v3.9.2/autotest/gcore/data/uint16.tif"; + let tif_url = Url::parse(cog_url).unwrap(); + let (store, location) = parse_url(&tif_url).unwrap(); + + let result = store.get(&location).await.unwrap(); + let bytes = result.bytes().await.unwrap(); + let stream = Cursor::new(bytes); + + let mut reader = CogReader::new(stream).unwrap(); + let array = reader.ndarray::().unwrap(); + + assert_eq!(array.dim(), (1, 20, 20)); + assert_eq!(array.mean(), Some(126)); } #[tokio::test] @@ -219,7 +266,7 @@ mod tests { let stream = Cursor::new(bytes); let mut reader = CogReader::new(stream).unwrap(); - let array = reader.ndarray().unwrap(); + let array = reader.ndarray::().unwrap(); assert_eq!(array.shape(), [1, 2, 3]); assert_eq!(array, array![[[1.41, 1.23, 0.78], [0.32, -0.23, -1.88]]]) From 3e737d02515dd0cb6cf862d463b97ae7b347e025 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Mon, 9 Sep 2024 21:39:57 +1200 Subject: [PATCH 3/4] :memo: Tick off multi-dtype support in roadmap and update timeline Took longer than expected, but support for reading multiple dtypes finally landed (albeit only in the Rust bindings). Stretched out the timeline in the roadmap further into the future, and mentioning aiocogeo-rs under related crates. --- README.md | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 0e3c3c5..2a8d2ea 100644 --- a/README.md +++ b/README.md @@ -91,9 +91,9 @@ assert dataarray.dtype == "float32" ``` > [!NOTE] -> Currently, this crate/library only supports reading single or multi-band float32 -> GeoTIFF files, i.e. other dtypes (e.g. uint16) don't work yet. See roadmap below on -> future plans. +> Currently, the Python library supports reading single or multi-band GeoTIFF files into +> a float32 array only, i.e. other dtypes (e.g. uint16) don't work yet. There is support +> for reading into different dtypes in the Rust crate via a turbofish operator though! ## Roadmap @@ -104,19 +104,20 @@ Short term (Q1 2024): - [x] Read from HTTP remote storage (using [`object-store`](https://github.com/apache/arrow-rs/tree/object_store_0.9.0/object_store)) -Medium term (Q2 2024): +Medium term (Q2-Q4 2024): - [x] Integration with `xarray` as a [`BackendEntrypoint`](https://docs.xarray.dev/en/v2024.02.0/internals/how-to-add-new-backend.html) -- [ ] Implement single-band GeoTIFF reader for multiple dtypes (uint/int/float) (relying - on [`geotiff`](https://github.com/georust/geotiff) crate) +- [x] Implement single-band GeoTIFF reader for multiple dtypes (uint/int/float) (based + on [`geotiff`](https://github.com/georust/geotiff) crate, Rust-only) -Longer term (Q3-Q4 2024): +Longer term (2025): - [ ] Parallel reader (TBD on multi-threaded or asynchronous) - [ ] Direct-to-GPU loading ## Related crates +- https://github.com/developmentseed/aiocogeo-rs - https://github.com/georust/geotiff - https://github.com/jblindsay/whitebox-tools - https://github.com/pka/georaster From 3cbc6f23c37f1c20d9a73b36c0b59a79d556897b Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Thu, 12 Sep 2024 13:01:32 +1200 Subject: [PATCH 4/4] :memo: Document how to set output dtype using turbofish operator Show how the turbofish operator (e.g. `::`) can be used to set the output dtype from the `read_geotiff` function. Mention all supported dtypes in crate-level docs at src/lib.rs. --- README.md | 2 +- src/lib.rs | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2a8d2ea..2d65726 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ async fn main() { }; // Read GeoTIFF into an ndarray::Array - let arr: Array3 = read_geotiff(stream).unwrap(); + let arr: Array3 = read_geotiff::(stream).unwrap(); assert_eq!(arr.dim(), (1, 549, 549)); assert_eq!(arr[[0, 500, 500]], 0.13482364); } diff --git a/src/lib.rs b/src/lib.rs index 1cec2fc..9c250f9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -38,11 +38,16 @@ //! Cursor::new(bytes) //! }; //! -//! let arr: Array3 = read_geotiff(stream).unwrap(); +//! let arr: Array3 = read_geotiff::(stream).unwrap(); //! assert_eq!(arr.dim(), (1, 549, 549)); //! assert_eq!(arr[[0, 500, 500]], 0.13482364); //! } //! ``` +//! +//! Note that the output dtype can be specified either by using a type hint +//! (`let arr: Array3`) or via the turbofish operator (`read_geotiff::`). +//! Currently supported dtypes include uint (u8/u16/u32/u64), int (i8/i16/i32/i64) and +//! float (f32/f64). /// Modules for handling Input/Output of GeoTIFF data pub mod io;