Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

issue #678 support shapely in load collection spatial extent #682

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added

- Automatically use `load_url` when providing a URL as geometries to `DataCube.aggregate_spatial()`, `DataCube.mask_polygon()`, etc. ([#104](https://github.com/Open-EO/openeo-python-client/issues/104), [#457](https://github.com/Open-EO/openeo-python-client/issues/457))
- Argument `spatial_extent` in `load_collection` supports Shapely objects and loading GeoJSON from a local path.

### Changed

Expand Down
13 changes: 11 additions & 2 deletions openeo/rest/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -1230,7 +1230,7 @@ def datacube_from_json(self, src: Union[str, Path], parameters: Optional[dict] =
def load_collection(
self,
collection_id: Union[str, Parameter],
spatial_extent: Union[Dict[str, float], Parameter, None] = None,
spatial_extent: Union[Dict[str, float], Parameter, shapely.geometry.base.BaseGeometry, None] = None,
temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None,
bands: Union[None, List[str], Parameter] = None,
properties: Union[
Expand All @@ -1243,7 +1243,13 @@ def load_collection(
Load a DataCube by collection id.

:param collection_id: image collection identifier
:param spatial_extent: limit data to specified bounding box or polygons
:param spatial_extent: limit data to specified bounding box or polygons. Can be provided in different ways:
- a bounding box dictionary
- a shapely geometry
ElienVandermaesenVITO marked this conversation as resolved.
Show resolved Hide resolved
- a GeoJSON-style dictionary,
- a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file,
which will be loaded automatically to get the geometries as GeoJSON construct.
- a :py:class:`~openeo.api.process.Parameter` instance.
:param temporal_extent: limit data to specified temporal interval.
Typically, just a two-item list or tuple containing start and end date.
See :ref:`filtering-on-temporal-extent-section` for more details on temporal extent handling and shorthand notation.
Expand All @@ -1262,6 +1268,9 @@ def load_collection(

.. versionchanged:: 0.26.0
Add :py:func:`~openeo.rest.graph_building.collection_property` support to ``properties`` argument.

.. versionchanged:: 0.37.0
Add support for shapely geometry and local path to GeoJSON file to spatial_extent argument.
"""
return DataCube.load_collection(
collection_id=collection_id,
Expand Down
186 changes: 106 additions & 80 deletions openeo/rest/datacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def load_collection(
cls,
collection_id: Union[str, Parameter],
connection: Optional[Connection] = None,
spatial_extent: Union[Dict[str, float], Parameter, None] = None,
spatial_extent: Union[Dict[str, float], Parameter, shapely.geometry.base.BaseGeometry, None] = None,
ElienVandermaesenVITO marked this conversation as resolved.
Show resolved Hide resolved
temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None,
bands: Union[None, List[str], Parameter] = None,
fetch_metadata: bool = True,
Expand All @@ -158,7 +158,13 @@ def load_collection(
:param collection_id: image collection identifier
:param connection: The backend connection to use.
Can be ``None`` to work without connection and collection metadata.
:param spatial_extent: limit data to specified bounding box or polygons
:param spatial_extent: limit data to specified bounding box or polygons. Can be provided in different ways:
- a bounding box dictionary
- a shapely geometry
- a GeoJSON-style dictionary,
- a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file,
which will be loaded automatically to get the geometries as GeoJSON construct.
- a :py:class:`~openeo.api.process.Parameter` instance.
:param temporal_extent: limit data to specified temporal interval.
Typically, just a two-item list or tuple containing start and end date.
See :ref:`filtering-on-temporal-extent-section` for more details on temporal extent handling and shorthand notation.
Expand All @@ -177,6 +183,9 @@ def load_collection(

.. versionchanged:: 0.26.0
Add :py:func:`~openeo.rest.graph_building.collection_property` support to ``properties`` argument.

.. versionchanged:: 0.37.0
Add support for shapely geometry and local path to GeoJSON file to spatial_extent argument.
"""
if temporal_extent:
temporal_extent = cls._get_temporal_extent(extent=temporal_extent)
Expand All @@ -187,9 +196,19 @@ def load_collection(
"Unexpected parameterized `spatial_extent` in `load_collection`:"
f" expected schema with type 'object' but got {spatial_extent.schema!r}."
)
elif spatial_extent is None or (
isinstance(spatial_extent, dict) and spatial_extent.keys() & {"west", "east", "north", "south"}
):
pass
else:
valid_geojson_types = [
"Polygon", "MultiPolygon", "Feature", "FeatureCollection"
]
spatial_extent = _get_geometry_argument(argument=spatial_extent, valid_geojson_types=valid_geojson_types,
connection=connection)
ElienVandermaesenVITO marked this conversation as resolved.
Show resolved Hide resolved

arguments = {
'id': collection_id,
# TODO: spatial_extent could also be a "geojson" subtype object, so we might want to allow (and convert) shapely shapes as well here.
'spatial_extent': spatial_extent,
'temporal_extent': temporal_extent,
}
Expand Down Expand Up @@ -628,10 +647,16 @@ def filter_spatial(
(which will be loaded client-side to get the geometries as GeoJSON construct).
"""
valid_geojson_types = [
"Point", "MultiPoint", "LineString", "MultiLineString",
ElienVandermaesenVITO marked this conversation as resolved.
Show resolved Hide resolved
"Polygon", "MultiPolygon", "GeometryCollection", "FeatureCollection"
"Point",
"MultiPoint",
"LineString",
"MultiLineString",
"Polygon",
"MultiPolygon",
"GeometryCollection",
"FeatureCollection",
]
geometries = self._get_geometry_argument(geometries, valid_geojson_types=valid_geojson_types, crs=None)
geometries = _get_geometry_argument(geometries, valid_geojson_types=valid_geojson_types, connection=self.connection, crs=None)
return self.process(
process_id='filter_spatial',
arguments={
Expand Down Expand Up @@ -1058,75 +1083,6 @@ def _merge_operator_binary_cubes(
}
))

def _get_geometry_argument(
self,
argument: Union[
shapely.geometry.base.BaseGeometry,
dict,
str,
pathlib.Path,
Parameter,
_FromNodeMixin,
],
valid_geojson_types: List[str],
crs: Optional[str] = None,
) -> Union[dict, Parameter, PGNode]:
"""
Convert input to a geometry as "geojson" subtype object or vectorcube.

:param crs: value that encodes a coordinate reference system.
See :py:func:`openeo.util.normalize_crs` for more details about additional normalization that is applied to this argument.
"""
if isinstance(argument, Parameter):
return argument
elif isinstance(argument, _FromNodeMixin):
return argument.from_node()

if isinstance(argument, str) and re.match(r"^https?://", argument, flags=re.I):
# Geometry provided as URL: load with `load_url` (with best-effort format guess)
url = urllib.parse.urlparse(argument)
suffix = pathlib.Path(url.path.lower()).suffix
format = {
".json": "GeoJSON",
".geojson": "GeoJSON",
".pq": "Parquet",
".parquet": "Parquet",
".geoparquet": "Parquet",
}.get(suffix, suffix.split(".")[-1])
return self.connection.load_url(url=argument, format=format)

if (
isinstance(argument, (str, pathlib.Path))
and pathlib.Path(argument).is_file()
and pathlib.Path(argument).suffix.lower() in [".json", ".geojson"]
):
geometry = load_json(argument)
elif isinstance(argument, shapely.geometry.base.BaseGeometry):
geometry = mapping(argument)
elif isinstance(argument, dict):
geometry = argument
else:
raise OpenEoClientException(f"Invalid geometry argument: {argument!r}")

if geometry.get("type") not in valid_geojson_types:
raise OpenEoClientException("Invalid geometry type {t!r}, must be one of {s}".format(
t=geometry.get("type"), s=valid_geojson_types
))
if crs:
# TODO: don't warn when the crs is Lon-Lat like EPSG:4326?
warnings.warn(f"Geometry with non-Lon-Lat CRS {crs!r} is only supported by specific back-ends.")
# TODO #204 alternative for non-standard CRS in GeoJSON object?
epsg_code = normalize_crs(crs)
if epsg_code is not None:
# proj did recognize the CRS
crs_name = f"EPSG:{epsg_code}"
else:
# proj did not recognise this CRS
warnings.warn(f"non-Lon-Lat CRS {crs!r} is not known to the proj library and might not be supported.")
crs_name = crs
geometry["crs"] = {"type": "name", "properties": {"name": crs_name}}
return geometry

@openeo_process
def aggregate_spatial(
self,
Expand Down Expand Up @@ -1198,7 +1154,7 @@ def aggregate_spatial(
"Point", "MultiPoint", "LineString", "MultiLineString",
"Polygon", "MultiPolygon", "GeometryCollection", "Feature", "FeatureCollection"
]
geometries = self._get_geometry_argument(geometries, valid_geojson_types=valid_geojson_types, crs=crs)
geometries = _get_geometry_argument(geometries, valid_geojson_types=valid_geojson_types, connection= self.connection, crs=crs)
reducer = build_child_callback(reducer, parent_parameters=["data"])
return VectorCube(
graph=self._build_pgnode(
Expand Down Expand Up @@ -1478,8 +1434,8 @@ def chunk_polygon(
"Feature",
"FeatureCollection",
]
chunks = self._get_geometry_argument(
chunks, valid_geojson_types=valid_geojson_types
chunks = _get_geometry_argument(
chunks, valid_geojson_types=valid_geojson_types, connection=self.connection
)
mask_value = float(mask_value) if mask_value is not None else None
return self.process(
Expand Down Expand Up @@ -1568,7 +1524,7 @@ def apply_polygon(

process = build_child_callback(process, parent_parameters=["data"], connection=self.connection)
valid_geojson_types = ["Polygon", "MultiPolygon", "Feature", "FeatureCollection"]
geometries = self._get_geometry_argument(geometries, valid_geojson_types=valid_geojson_types)
geometries = _get_geometry_argument(geometries, valid_geojson_types=valid_geojson_types, connection=self.connection)
mask_value = float(mask_value) if mask_value is not None else None
return self.process(
process_id="apply_polygon",
Expand Down Expand Up @@ -2056,7 +2012,7 @@ def mask_polygon(
(which will be loaded client-side to get the geometries as GeoJSON construct).
"""
valid_geojson_types = ["Polygon", "MultiPolygon", "GeometryCollection", "Feature", "FeatureCollection"]
mask = self._get_geometry_argument(mask, valid_geojson_types=valid_geojson_types, crs=srs)
mask = _get_geometry_argument(mask, valid_geojson_types=valid_geojson_types, connection=self.connection, crs=srs)
return self.process(
process_id="mask_polygon",
arguments=dict_no_none(
Expand Down Expand Up @@ -2860,3 +2816,73 @@ def unflatten_dimension(self, dimension: str, target_dimensions: List[str], labe
label_separator=label_separator,
),
)


def _get_geometry_argument(
ElienVandermaesenVITO marked this conversation as resolved.
Show resolved Hide resolved
argument: Union[
shapely.geometry.base.BaseGeometry,
dict,
str,
pathlib.Path,
Parameter,
_FromNodeMixin,
],
valid_geojson_types: List[str],
connection: Connection = None,
crs: Optional[str] = None,
) -> Union[dict, Parameter, PGNode]:
"""
Convert input to a geometry as "geojson" subtype object or vectorcube.

:param crs: value that encodes a coordinate reference system.
See :py:func:`openeo.util.normalize_crs` for more details about additional normalization that is applied to this argument.
"""
if isinstance(argument, Parameter):
return argument
elif isinstance(argument, _FromNodeMixin):
return argument.from_node()

if isinstance(argument, str) and re.match(r"^https?://", argument, flags=re.I):
# Geometry provided as URL: load with `load_url` (with best-effort format guess)
url = urllib.parse.urlparse(argument)
suffix = pathlib.Path(url.path.lower()).suffix
format = {
".json": "GeoJSON",
".geojson": "GeoJSON",
".pq": "Parquet",
".parquet": "Parquet",
".geoparquet": "Parquet",
}.get(suffix, suffix.split(".")[-1])
return connection.load_url(url=argument, format=format)
#
if (
isinstance(argument, (str, pathlib.Path))
and pathlib.Path(argument).is_file()
and pathlib.Path(argument).suffix.lower() in [".json", ".geojson"]
):
geometry = load_json(argument)
elif isinstance(argument, shapely.geometry.base.BaseGeometry):
geometry = mapping(argument)
elif isinstance(argument, dict):
geometry = argument
else:
raise OpenEoClientException(f"Invalid geometry argument: {argument!r}")

if geometry.get("type") not in valid_geojson_types:
raise OpenEoClientException("Invalid geometry type {t!r}, must be one of {s}".format(
t=geometry.get("type"), s=valid_geojson_types
))
if crs:
# TODO: don't warn when the crs is Lon-Lat like EPSG:4326?
warnings.warn(f"Geometry with non-Lon-Lat CRS {crs!r} is only supported by specific back-ends.")
# TODO #204 alternative for non-standard CRS in GeoJSON object?
epsg_code = normalize_crs(crs)
if epsg_code is not None:
# proj did recognize the CRS
crs_name = f"EPSG:{epsg_code}"
else:
# proj did not recognise this CRS
warnings.warn(f"non-Lon-Lat CRS {crs!r} is not known to the proj library and might not be supported.")
crs_name = crs
geometry["crs"] = {"type": "name", "properties": {"name": crs_name}}
return geometry
27 changes: 27 additions & 0 deletions tests/rest/datacube/test_datacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,33 @@ def test_load_collection_connectionless_temporal_extent_shortcut(self):
}
}

def test_load_collection_connectionless_shapely_spatial_extent(self):
polygon = shapely.Polygon(((0.0,1.0),(2.0,1.0),(3.0,2.0),(1.5,0.0),(0.0,1.0)))
cube = DataCube.load_collection("T3", spatial_extent=polygon)
assert cube.flat_graph() == {
"loadcollection1": {
"arguments": {"id": "T3", "spatial_extent":
{'coordinates': (((0.0,1.0),(2.0,1.0),(3.0,2.0),(1.5,0.0),(0.0,1.0)),),'type': 'Polygon'},
"temporal_extent": None},
"process_id": "load_collection",
"result": True,
}
}

ElienVandermaesenVITO marked this conversation as resolved.
Show resolved Hide resolved
@pytest.mark.parametrize("path_factory", [str, pathlib.Path])
def test_load_collection_connectionless_local_path_spatial_extent(self, path_factory, test_data):
path = path_factory(test_data.get_path("geojson/polygon02.json"))
cube = DataCube.load_collection("T3", spatial_extent=path)
assert cube.flat_graph() == {
"loadcollection1": {
"arguments": {"id": "T3", "spatial_extent":
{"type": "Polygon", "coordinates": [[[3, 50], [4, 50], [4, 51], [3, 50]]]},
"temporal_extent": None},
"process_id": "load_collection",
"result": True,
}
}

def test_load_collection_connectionless_save_result(self):
cube = DataCube.load_collection("T3").save_result(format="GTiff")
assert cube.flat_graph() == {
Expand Down
Loading