Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ST_Read_Meta() mismatch type error on repeated calls #451

Open
rcoup opened this issue Nov 7, 2024 · 1 comment · May be fixed by #452
Open

ST_Read_Meta() mismatch type error on repeated calls #451

rcoup opened this issue Nov 7, 2024 · 1 comment · May be fixed by #452

Comments

@rcoup
Copy link

rcoup commented Nov 7, 2024

Calling ST_Read_Meta('my.shp') against a vanilla shapefile; subsequent calls to ST_Read_Meta() for the same or different shapefiles regularly throw a type mismatch error:

Mismatch Type Error: Type STRUCT("name" VARCHAR, "type" VARCHAR, nullable BOOLEAN) does not match with STRUCT("name" VARCHAR, "type" VARCHAR, nullable BOOLEAN, crs STRUCT("name" VARCHAR, auth_name VARCHAR, auth_code VARCHAR, wkt VARCHAR, proj4 VARCHAR, projjson VARCHAR)). Cannot cast STRUCTs of different size

Sometimes it'll work several times in a row, then fail one or more times, then work again.

CLI Reproducer

$ duckdb
v1.1.3 19864453f7
Enter ".help" for usage hints.
Connected to a transient in-memory database.
Use ".open FILENAME" to reopen on a persistent database.
D LOAD spatial;
D .mode line
D select * from ST_Read_Meta('./osm_buildings_a.shp');
        file_name = ./osm_buildings_a.shp
driver_short_name = ESRI Shapefile
 driver_long_name = ESRI Shapefile
           layers = [{'name': osm_buildings_a, 'feature_count': 3108045, 'geometry_fields': [{'name': geom, 'type': Polygon, 'nullable': true, 'crs': {'name': WGS 84, 'auth_name': EPSG, 'auth_code': 4326, 'wkt': GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]], 'proj4': +proj=longlat +datum=WGS84 +no_defs, 'projjson': {
  "$schema": "https://proj.org/schemas/v0.5/projjson.schema.json",
  "type": "GeographicCRS",
  "name": "WGS 84",
  "datum": {
    "type": "GeodeticReferenceFrame",
    "name": "World Geodetic System 1984",
    "ellipsoid": {
      "name": "WGS 84",
      "semi_major_axis": 6378137,
      "inverse_flattening": 298.257223563
    }
  },
  "coordinate_system": {
    "subtype": "ellipsoidal",
    "axis": [
      {
        "name": "Latitude",
        "abbreviation": "lat",
        "direction": "north",
        "unit": "degree"
      },
      {
        "name": "Longitude",
        "abbreviation": "lon",
        "direction": "east",
        "unit": "degree"
      }
    ]
  },
  "id": {
    "authority": "EPSG",
    "code": 4326
  }
}}}], 'fields': [{'name': osm_id, 'type': String, 'subtype': None, 'nullable': true, 'unique': false, 'width': 12, 'precision': 0}, {'name': code, 'type': Integer, 'subtype': None, 'nullable': true, 'unique': false, 'width': 4, 'precision': 0}, {'name': fclass, 'type': String, 'subtype': None, 'nullable': true, 'unique': false, 'width': 28, 'precision': 0}, {'name': name, 'type': String, 'subtype': None, 'nullable': true, 'unique': false, 'width': 100, 'precision': 0}, {'name': type, 'type': String, 'subtype': None, 'nullable': true, 'unique': false, 'width': 20, 'precision': 0}]}]
D select * from ST_Read_Meta('./osm_buildings_a.shp');
Mismatch Type Error: Type STRUCT("name" VARCHAR, "type" VARCHAR, nullable BOOLEAN) does not match with STRUCT("name" VARCHAR, "type" VARCHAR, nullable BOOLEAN, crs STRUCT("name" VARCHAR, auth_name VARCHAR, auth_code VARCHAR, wkt VARCHAR, proj4 VARCHAR, projjson VARCHAR)). Cannot cast STRUCTs of different size
D select * from ST_Read_Meta('./osm_buildings_a.shp');
        file_name = ./osm_buildings_a.shp
driver_short_name = ESRI Shapefile
 driver_long_name = ESRI Shapefile
           layers = [{'name': osm_buildings_a, 'feature_count': 3108045, 'geometry_fields': [{'name': geom, 'type': Polygon, 'nullable': true, 'crs': {'name': WGS 84, 'auth_name': EPSG, 'auth_code': 4326, 'wkt': GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]], 'proj4': +proj=longlat +datum=WGS84 +no_defs, 'projjson': {
  "$schema": "https://proj.org/schemas/v0.5/projjson.schema.json",
  "type": "GeographicCRS",
  "name": "WGS 84",
  "datum": {
    "type": "GeodeticReferenceFrame",
    "name": "World Geodetic System 1984",
    "ellipsoid": {
      "name": "WGS 84",
      "semi_major_axis": 6378137,
      "inverse_flattening": 298.257223563
    }
  },
  "coordinate_system": {
    "subtype": "ellipsoidal",
    "axis": [
      {
        "name": "Latitude",
        "abbreviation": "lat",
        "direction": "north",
        "unit": "degree"
      },
      {
        "name": "Longitude",
        "abbreviation": "lon",
        "direction": "east",
        "unit": "degree"
      }
    ]
  },
  "id": {
    "authority": "EPSG",
    "code": 4326
  }
}}}], 'fields': [{'name': osm_id, 'type': String, 'subtype': None, 'nullable': true, 'unique': false, 'width': 12, 'precision': 0}, {'name': code, 'type': Integer, 'subtype': None, 'nullable': true, 'unique': false, 'width': 4, 'precision': 0}, {'name': fclass, 'type': String, 'subtype': None, 'nullable': true, 'unique': false, 'width': 28, 'precision': 0}, {'name': name, 'type': String, 'subtype': None, 'nullable': true, 'unique': false, 'width': 100, 'precision': 0}, {'name': type, 'type': String, 'subtype': None, 'nullable': true, 'unique': false, 'width': 20, 'precision': 0}]}]

ogrinfo output for the shapefile

$ ogrinfo ./osm_buildings_a.shp -so -al
INFO: Open of `./osm_buildings_a.shp'
      using driver `ESRI Shapefile' successful.

Layer name: osm_buildings_a
Geometry: Polygon
Feature Count: 3108045
Extent: (73.392011, -54.772455) - (167.994444, -9.229781)
Layer SRS WKT:
GEOGCRS["WGS 84",
    DATUM["World Geodetic System 1984",
        ELLIPSOID["WGS 84",6378137,298.257223563,
            LENGTHUNIT["metre",1]]],
    PRIMEM["Greenwich",0,
        ANGLEUNIT["degree",0.0174532925199433]],
    CS[ellipsoidal,2],
        AXIS["latitude",north,
            ORDER[1],
            ANGLEUNIT["degree",0.0174532925199433]],
        AXIS["longitude",east,
            ORDER[2],
            ANGLEUNIT["degree",0.0174532925199433]],
    ID["EPSG",4326]]
Data axis to CRS axis mapping: 2,1
osm_id: String (12.0)
code: Integer (4.0)
fclass: String (28.0)
name: String (100.0)
type: String (20.0)

Software/OS details

  • Duckdb v1.1.3 19864453f7
  • Spatial extension version 7ea79b6
  • Linux, Debian Bookworm, Docker environment

Reproducing

  • Reproducible with different shapefiles
  • Reproducible on both container & bind-mounted filesystems
  • Reproducible on both aarch64 and amd64
  • Reproducible in both in-memory and persistent modes
  • Haven't reproduced on macOS with the same duckdb & extension versions.
  • Haven't reproduced with GeoPackage data format.
@rcoup
Copy link
Author

rcoup commented Nov 7, 2024

Running a debug build yields some different information:

root@5e913d8be5fa:/src# yes "select * from st_read_meta('/mnt/osm_buildings_a.shp');" | $DUCKDB -json -echo -cmd 'load spatial' | cut -c "1-$(tput cols)"

load spatial
select * from st_read_meta('/mnt/osm_buildings_a.shp');
[{"file_name":"/mnt/osm_buildings_a.shp","driver_short_name":"ESRI Shapefile","driver_long_name":"ESRI Shap
select * from st_read_meta('/mnt/osm_buildings_a.shp');
[{"file_name":"/mnt/osm_buildings_a.shp","driver_short_name":"ESRI Shapefile","driver_long_name":"ESRI Shap
select * from st_read_meta('/mnt/osm_buildings_a.shp');
[{"file_name":"/mnt/osm_buildings_a.shp","driver_short_name":"ESRI Shapefile","driver_long_name":"ESRI Shap
/src/spatial/src/spatial/gdal/file_handler.cpp:87:10: runtime error: load of value 32, which is not a valid value for type 'bool'

(with a similar pattern of successes/failures as in the SQL errors above)

And this patch seems to fix it in some limited testing: I can see an undefined behaviour path by calling DuckDBFileHandle::Eof() before any calls to DuckDBFileHandle::Seek().

diff --git a/spatial/src/spatial/gdal/file_handler.cpp b/spatial/src/spatial/gdal/file_handler.cpp
index 2111de0..bde8661 100644
--- a/spatial/src/spatial/gdal/file_handler.cpp
+++ b/spatial/src/spatial/gdal/file_handler.cpp
@@ -24,7 +24,7 @@ private:
 	bool is_eof;

 public:
-	explicit DuckDBFileHandle(unique_ptr<FileHandle> file_handle_p) : file_handle(std::move(file_handle_p)) {
+	explicit DuckDBFileHandle(unique_ptr<FileHandle> file_handle_p) : file_handle(std::move(file_handle_p)), is_eof(false) {
 	}

 	vsi_l_offset Tell() override {

@rcoup rcoup linked a pull request Nov 7, 2024 that will close this issue
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging a pull request may close this issue.

1 participant