Skip to content

Commit

Permalink
Add collection_property helper to simplify building property filter…
Browse files Browse the repository at this point in the history
…s for load_collection
  • Loading branch information
soxofaan committed Nov 14, 2023
1 parent d1ce448 commit 40be725
Show file tree
Hide file tree
Showing 8 changed files with 231 additions and 12 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Support new UDF signature: `def apply_datacube(cube: DataArray, context: dict) -> DataArray`
([#310](https://github.com/Open-EO/openeo-python-client/issues/310))
- Add `collection_property()` helper to easily build collection metadata property filters for `Connection.load_collection()`
([#331](https://github.com/Open-EO/openeo-python-client/pull/331))

### Changed

Expand Down
7 changes: 6 additions & 1 deletion docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,13 @@ openeo.processes
:members: process


openeo.internal
Graph building
----------------

Various utilities and helpers to simplify the construction of openEO process graphs.

.. automodule:: openeo.rest.graph_building
:members: collection_property, CollectionProperty

.. automodule:: openeo.internal.graph_building
:members: PGNode, FlatGraphableMixin
1 change: 1 addition & 0 deletions openeo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class BaseOpenEoException(Exception):
from openeo._version import __version__
from openeo.rest.connection import Connection, connect, session
from openeo.rest.datacube import UDF, DataCube
from openeo.rest.graph_building import collection_property
from openeo.rest.job import BatchJob, RESTJob


Expand Down
5 changes: 4 additions & 1 deletion openeo/internal/graph_building.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
"""
Internal openEO process graph building utilities
''''''''''''''''''''''''''''''''''''''''''''''''''
Internal functionality for abstracting, building, manipulating and processing openEO process graphs.
Functionality for abstracting, building, manipulating and processing openEO process graphs.
"""

from __future__ import annotations
Expand Down
13 changes: 10 additions & 3 deletions openeo/rest/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
OidcResourceOwnerPasswordAuthenticator,
)
from openeo.rest.datacube import DataCube, InputDate
from openeo.rest.graph_building import CollectionProperty
from openeo.rest.job import BatchJob, RESTJob
from openeo.rest.mlmodel import MlModel
from openeo.rest.rest_capabilities import RESTCapabilities
Expand Down Expand Up @@ -1142,7 +1143,9 @@ def load_collection(
spatial_extent: Optional[Dict[str, float]] = None,
temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None,
bands: Union[None, List[str], Parameter] = None,
properties: Optional[Dict[str, Union[str, PGNode, Callable]]] = None,
properties: Union[
None, Dict[str, Union[str, PGNode, Callable]], List[CollectionProperty], CollectionProperty
] = None,
max_cloud_cover: Optional[float] = None,
fetch_metadata=True,
) -> DataCube:
Expand All @@ -1154,8 +1157,9 @@ def load_collection(
:param temporal_extent: limit data to specified temporal interval.
Typically, just a two-item list or tuple containing start and end date.
See :ref:`filtering-on-temporal-extent-section` for more details on temporal extent handling and shorthand notation.
:param bands: only add the specified bands
:param properties: limit data by metadata property predicates
:param bands: only add the specified bands.
:param properties: limit data by collection metadata property predicates.
See :py:func:`~openeo.rest.graph_building.collection_property` for easy construction of such predicates.
:param max_cloud_cover: shortcut to set maximum cloud cover ("eo:cloud_cover" collection property)
:return: a datacube containing the requested data
Expand All @@ -1165,6 +1169,9 @@ def load_collection(
.. versionchanged:: 0.23.0
Argument ``temporal_extent``: add support for year/month shorthand notation
as discussed at :ref:`date-shorthand-handling`.
.. versionchanged:: 0.26.0
Add :py:func:`~openeo.rest.graph_building.collection_property` support to ``properties`` argument.
"""
return DataCube.load_collection(
collection_id=collection_id,
Expand Down
30 changes: 23 additions & 7 deletions openeo/rest/datacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,14 @@
from openeo.internal.documentation import openeo_process
from openeo.internal.graph_building import PGNode, ReduceNode, _FromNodeMixin
from openeo.internal.jupyter import in_jupyter_context
from openeo.internal.processes.builder import ProcessBuilderBase
from openeo.internal.processes.builder import ProcessBuilderBase, convert_callable_to_pgnode, get_parameter_names
from openeo.internal.warnings import UserDeprecationWarning, deprecated, legacy_alias
from openeo.metadata import Band, CollectionMetadata
from openeo.metadata import Band, BandDimension, CollectionMetadata, SpatialDimension, TemporalDimension
from openeo.processes import ProcessBuilder
from openeo.rest import BandMathException, OpenEoClientException, OperatorException
from openeo.rest._datacube import THIS, UDF, _ProcessGraphAbstraction, build_child_callback
from openeo.rest.job import BatchJob
from openeo.rest.graph_building import CollectionProperty
from openeo.rest.job import BatchJob, RESTJob
from openeo.rest.mlmodel import MlModel
from openeo.rest.service import Service
from openeo.rest.udp import RESTUserDefinedProcess
Expand Down Expand Up @@ -123,7 +125,9 @@ def load_collection(
temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None,
bands: Union[None, List[str], Parameter] = None,
fetch_metadata: bool = True,
properties: Optional[Dict[str, Union[str, PGNode, typing.Callable]]] = None,
properties: Union[
None, Dict[str, Union[str, PGNode, typing.Callable]], List[CollectionProperty], CollectionProperty
] = None,
max_cloud_cover: Optional[float] = None,
) -> DataCube:
"""
Expand All @@ -135,8 +139,9 @@ def load_collection(
:param temporal_extent: limit data to specified temporal interval.
Typically, just a two-item list or tuple containing start and end date.
See :ref:`filtering-on-temporal-extent-section` for more details on temporal extent handling and shorthand notation.
:param bands: only add the specified bands
:param properties: limit data by metadata property predicates
:param bands: only add the specified bands.
:param properties: limit data by metadata property predicates.
See :py:func:`~openeo.rest.graph_building.collection_property` for easy construction of such predicates.
:param max_cloud_cover: shortcut to set maximum cloud cover ("eo:cloud_cover" collection property)
:return: new DataCube containing the collection
Expand All @@ -146,6 +151,9 @@ def load_collection(
.. versionchanged:: 0.23.0
Argument ``temporal_extent``: add support for year/month shorthand notation
as discussed at :ref:`date-shorthand-handling`.
.. versionchanged:: 0.26.0
Add :py:func:`~openeo.rest.graph_building.collection_property` support to ``properties`` argument.
"""
if temporal_extent:
temporal_extent = cls._get_temporal_extent(extent=temporal_extent)
Expand All @@ -169,8 +177,15 @@ def load_collection(
bands = [b if isinstance(b, str) else metadata.band_dimension.band_name(b) for b in bands]
metadata = metadata.filter_bands(bands)
arguments['bands'] = bands

if isinstance(properties, list):
# TODO: warn about items that are not CollectionProperty objects instead of silently dropping them.
properties = {p.name: p.from_node() for p in properties if isinstance(p, CollectionProperty)}
if isinstance(properties, CollectionProperty):
properties = {properties.name: properties.from_node()}
elif properties is None:
properties = {}
if max_cloud_cover:
properties = properties or {}
properties["eo:cloud_cover"] = lambda v: v <= max_cloud_cover
if properties:
summaries = metadata and metadata.get("summaries") or {}
Expand All @@ -184,6 +199,7 @@ def load_collection(
arguments["properties"] = {
prop: build_child_callback(pred, parent_parameters=["value"]) for prop, pred in properties.items()
}

pg = PGNode(
process_id='load_collection',
arguments=arguments
Expand Down
78 changes: 78 additions & 0 deletions openeo/rest/graph_building.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
"""
Public openEO process graph building utilities
'''''''''''''''''''''''''''''''''''''''''''''''
"""
from __future__ import annotations

from typing import Optional

from openeo.internal.graph_building import PGNode, _FromNodeMixin
from openeo.processes import ProcessBuilder


class CollectionProperty(_FromNodeMixin):
"""
Helper object to easily create simple collection metadata property filters
to be used with :py:meth:`Connection.load_collection() <openeo.rest.connection.Connection.load_collection>`.
.. note:: This class should not be used directly by end user code.
Use the :py:func:`~openeo.rest.graph_building.collection_property` factory instead.
.. warning:: this is an experimental feature, naming might change.
"""

def __init__(self, name: str, _builder: Optional[ProcessBuilder] = None):
self.name = name
self._builder = _builder or ProcessBuilder(pgnode={"from_parameter": "value"})

def from_node(self) -> PGNode:
return self._builder.from_node()

def __eq__(self, other) -> CollectionProperty:
return CollectionProperty(self.name, _builder=self._builder == other)

def __ne__(self, other) -> CollectionProperty:
return CollectionProperty(self.name, _builder=self._builder != other)

def __gt__(self, other) -> CollectionProperty:
return CollectionProperty(self.name, _builder=self._builder > other)

def __ge__(self, other) -> CollectionProperty:
return CollectionProperty(self.name, _builder=self._builder >= other)

def __lt__(self, other) -> CollectionProperty:
return CollectionProperty(self.name, _builder=self._builder < other)

def __le__(self, other) -> CollectionProperty:
return CollectionProperty(self.name, _builder=self._builder <= other)


def collection_property(name: str) -> CollectionProperty:
"""
Helper to easily create simple collection metadata property filters
to be used with :py:meth:`Connection.load_collection() <openeo.rest.connection.Connection.load_collection>`.
Usage example:
.. code-block:: python
from openeo import collection_property
...
connection.load_collection(
...
properties=[
collection_property("eo:cloud_cover") <= 75,
collection_property("platform") == "Sentinel-2B",
]
)
.. warning:: this is an experimental feature, naming might change.
.. versionadded:: 0.26.0
:param name: name of the collection property to filter on
:return: an object that supports operators like ``<=``, ``==`` to easily build simple property filters.
"""
return CollectionProperty(name=name)
107 changes: 107 additions & 0 deletions tests/rest/datacube/test_datacube100.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import openeo.metadata
import openeo.processes
from openeo import collection_property
from openeo.api.process import Parameter
from openeo.capabilities import ComparableVersion
from openeo.internal.graph_building import PGNode
Expand Down Expand Up @@ -1645,6 +1646,112 @@ def test_load_collection_max_cloud_cover_summaries_warning(
assert len(recwarn.list) == 0


def test_load_collection_with_collection_properties(con100):
cube = con100.load_collection(
"S2",
properties=[
collection_property("eo:cloud_cover") <= 75,
collection_property("platform") == "Sentinel-2B",
],
)
assert cube.flat_graph()["loadcollection1"]["arguments"]["properties"] == {
"eo:cloud_cover": {
"process_graph": {
"lte1": {
"process_id": "lte",
"arguments": {"x": {"from_parameter": "value"}, "y": 75},
"result": True,
}
}
},
"platform": {
"process_graph": {
"eq1": {
"process_id": "eq",
"arguments": {"x": {"from_parameter": "value"}, "y": "Sentinel-2B"},
"result": True,
}
}
},
}


def test_load_collection_with_collection_properties_and_cloud_cover(con100):
cube = con100.load_collection(
"S2",
properties=[
collection_property("platform") == "Sentinel-2B",
],
max_cloud_cover=66,
)
assert cube.flat_graph()["loadcollection1"]["arguments"]["properties"] == {
"eo:cloud_cover": {
"process_graph": {
"lte1": {
"process_id": "lte",
"arguments": {"x": {"from_parameter": "value"}, "y": 66},
"result": True,
}
}
},
"platform": {
"process_graph": {
"eq1": {
"process_id": "eq",
"arguments": {"x": {"from_parameter": "value"}, "y": "Sentinel-2B"},
"result": True,
}
}
},
}


def test_load_collection_with_single_collection_property(con100):
cube = con100.load_collection(
"S2",
properties=collection_property("platform") == "Sentinel-2B",
)
assert cube.flat_graph()["loadcollection1"]["arguments"]["properties"] == {
"platform": {
"process_graph": {
"eq1": {
"process_id": "eq",
"arguments": {"x": {"from_parameter": "value"}, "y": "Sentinel-2B"},
"result": True,
}
}
},
}


def test_load_collection_with_single_collection_property_and_cloud_cover(con100):
cube = con100.load_collection(
"S2",
properties=collection_property("platform") == "Sentinel-2B",
max_cloud_cover=66,
)
assert cube.flat_graph()["loadcollection1"]["arguments"]["properties"] == {
"eo:cloud_cover": {
"process_graph": {
"lte1": {
"process_id": "lte",
"arguments": {"x": {"from_parameter": "value"}, "y": 66},
"result": True,
}
}
},
"platform": {
"process_graph": {
"eq1": {
"process_id": "eq",
"arguments": {"x": {"from_parameter": "value"}, "y": "Sentinel-2B"},
"result": True,
}
}
},
}


def test_load_collection_temporal_extent_process_builder_function(con100):
from openeo.processes import date_shift

Expand Down

0 comments on commit 40be725

Please sign in to comment.