Skip to content

Commit

Permalink
fix: remove use of JSON LD from CQL JSON parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
recalcitrantsupplant committed Dec 4, 2024
1 parent 7327709 commit 47447a7
Show file tree
Hide file tree
Showing 111 changed files with 264 additions and 291 deletions.
17 changes: 6 additions & 11 deletions prez/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,8 @@ async def cql_post_parser_dependency(
) -> CQLParser:
try:
body = await request.json()
context = json.load(
(Path(__file__).parent / "reference_data/cql/default_context.json").open()
)
cql_parser = CQLParser(
cql=body, context=context, queryable_props=queryable_props
cql=body, queryable_props=queryable_props
)
cql_parser.generate_jsonld()
cql_parser.parse()
Expand All @@ -175,16 +172,14 @@ async def cql_get_parser_dependency(
try:
crs = query_params.filter_crs
query = json.loads(query_params.filter)
context = json.load(
(
Path(__file__).parent / "reference_data/cql/default_context.json"
).open()
)
cql_parser = CQLParser(
cql=query, context=context, crs=crs, queryable_props=queryable_props
cql=query, crs=crs, queryable_props=queryable_props
)
cql_parser.generate_jsonld()
cql_parser.parse()
try:
cql_parser.parse()
except Exception as e:
raise e
return cql_parser
except json.JSONDecodeError:
raise HTTPException(status_code=400, detail="Invalid JSON format.")
Expand Down
4 changes: 2 additions & 2 deletions prez/examples/cql/geo_contains_inverse.json
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@
"op": "=",
"args": [
{
"property": "^http://www.w3.org/2000/01/rdf-schema#member"
"property": "http://www.w3.org/2000/01/rdf-schema#member"
},
{ "@id": "http://example.com/datasets/sandgate/facilities" }
"http://example.com/datasets/sandgate/facilities"
]
}
]
Expand Down
38 changes: 10 additions & 28 deletions prez/examples/cql/geo_crosses.json
Original file line number Diff line number Diff line change
@@ -1,35 +1,17 @@
{
"op": "s_crosses",
"args": [
{
"property": "geometry"
},
{ "property": "geometry" },
{
"type": "LineString",
"coordinates": [
[
[
153.06307,
-27.3151243
],
[
153.069877,
-27.3151243
],
[
153.069877,
-27.2859541
],
[
153.06307,
-27.2859541
],
[
153.06307,
-27.3151243
]
]
]
"coordinates": [ [ 172.03086, 1.5 ],
[ 1.1, -90.0 ],
[ -159.757695, 0.99999 ],
[ -180.0, 0.5 ],
[ -12.111235, 81.336403 ],
[ -0.5, 64.43958 ],
[ 0.0, 81.991815 ],
[ -155.93831, 90.0 ] ]
}
]
}
}
19 changes: 11 additions & 8 deletions prez/repositories/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,17 @@ async def send_queries(
tabular_queries: List[Tuple[URIRef | None, str]] = None,
) -> Tuple[Graph, List]:
# Common logic to send both query types in parallel
results = await asyncio.gather(
*[self.rdf_query_to_graph(query) for query in rdf_queries if query],
*[
self.tabular_query_to_table(query, context)
for context, query in tabular_queries
if query
],
)
try:
results = await asyncio.gather(
*[self.rdf_query_to_graph(query) for query in rdf_queries if query],
*[
self.tabular_query_to_table(query, context)
for context, query in tabular_queries
if query
],
)
except Exception as e:
print(e)
g = Graph(namespace_manager=prefix_graph.namespace_manager)
tabular_results = []
for result in results:
Expand Down
105 changes: 45 additions & 60 deletions prez/services/query_generation/cql.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ class CQLParser:
def __init__(
self,
cql=None,
context: dict = None,
cql_json: dict = None,
crs=None,
queryable_props=None,
Expand All @@ -98,7 +97,6 @@ def __init__(
self.inner_select_gpnt_list = None
self.inner_select_vars: list[Var] = []
self.cql: dict = cql
self.context = context
self.cql_json = cql_json
self.var_counter = 0
self.query_object = None
Expand All @@ -110,8 +108,7 @@ def __init__(
self.queryable_props = queryable_props

def generate_jsonld(self):
combined = {"@context": self.context, **self.cql}
self.cql_json = jsonld.expand(combined, options={"base": "h"})[0]
self.cql_json = self.cql

def parse(self):
root = self.cql_json
Expand Down Expand Up @@ -142,8 +139,8 @@ def parse(self):
def parse_logical_operators(
self, element, existing_ggps=None
) -> Generator[GroupGraphPatternSub, None, None]:
operator = element.get(str(CQL.operator))[0].get("@value")
args = element.get(str(CQL.args))
operator = element.get("op")
args = element.get("args")

ggps = existing_ggps if existing_ggps is not None else GroupGraphPatternSub()

Expand Down Expand Up @@ -208,11 +205,11 @@ def _add_triple(self, ggps, subject, predicate, object):
def _handle_comparison(self, operator, args, existing_ggps=None):
ggps, object = self._add_tss_tssp(args, existing_ggps)

val = args[1].get("@value")
val = args[1]
if not val: # then should be an IRI
val = args[1].get("@id")
val = args[1]
value = IRI(value=val)
elif val.startswith("http"): # hack
elif isinstance(val, str) and val.startswith("http"): # hack
value = IRI(value=val)
elif isinstance(val, str): # literal string
value = RDFLiteral(value=val)
Expand Down Expand Up @@ -240,7 +237,7 @@ def _handle_comparison(self, operator, args, existing_ggps=None):
def _add_tss_tssp(self, args, existing_ggps):
self.var_counter += 1
ggps = existing_ggps if existing_ggps is not None else GroupGraphPatternSub()
prop = args[0].get(str(CQL.property))[0].get("@id")
prop = args[0].get("property")
if prop in self.queryable_props:
object = self._handle_shacl_defined_prop(prop)
else:
Expand All @@ -255,7 +252,6 @@ def _handle_like(self, args, existing_ggps=None):

value = (
args[1]
.get("@value")
.replace("%", ".*")
.replace("_", ".")
.replace("\\", "\\\\")
Expand Down Expand Up @@ -286,16 +282,16 @@ def _handle_spatial(self, operator, args, existing_ggps=None):
self.var_counter += 1
ggps = existing_ggps if existing_ggps is not None else GroupGraphPatternSub()

coordinates_list = args[1].get("http://example.com/vocab/coordinates")
coordinates, geom_type = self._extract_spatial_info(coordinates_list, args)
if geom_type in ["Polygon", "MultiPolygon"]:
coordinates = [coordinates]
coordinates = args[1].get("coordinates")
geom_type = args[1].get("type")
if args[1].get("bbox"):
geom_type = "Polygon"

if coordinates:
wkt = get_wkt_from_coords(coordinates, geom_type)
wkt_with_crs = f"<{self.crs}> {wkt}"
prop = args[0].get(str(CQL.property))[0].get("@id")
if prop == "http://example.com/geometry":
prop = args[0].get("property")
if prop == "geometry":
subject = Var(value="focus_node")
else:
subject = IRI(value=prop)
Expand Down Expand Up @@ -327,11 +323,17 @@ def _handle_spatial(self, operator, args, existing_ggps=None):
def _handle_in(self, args, existing_ggps=None):
ggps, object = self._add_tss_tssp(args, existing_ggps)

literal_values = [item["@value"] for item in args if "@value" in item]
uri_values = [item["@id"] for item in args if "@id" in item]
for i, lit_val in enumerate(literal_values):
if lit_val.startswith("http"): # hack
uri_values.append(literal_values.pop(i))
uri_values = []
literal_values = []
numeric_values = []
for arg in args[1]:
if isinstance(arg, str) and arg.startswith("http"):
uri_values.append(arg)
elif isinstance(arg, (int, float)):
numeric_values.append(arg)
else:
literal_values.append(arg)

grammar_uri_values = [IRI(value=URIRef(value)) for value in uri_values]
grammar_literal_values = []
for val in literal_values:
Expand Down Expand Up @@ -367,15 +369,16 @@ def _extract_spatial_info(self, coordinates_list, args):
coordinates = []
geom_type = None
if coordinates_list:
coordinates = [
[coordinates_list[i]["@value"], coordinates_list[i + 1]["@value"]]
for i in range(0, len(coordinates_list), 2)
]
geom_type = args[1]["http://www.opengis.net/ont/sf#type"][0]["@value"]
bbox_list = args[1].get("http://example.com/vocab/bbox")
# coordinates = [
# [coordinates_list[i], coordinates_list[i + 1]]
# for i in range(0, len(coordinates_list), 2)
# ]
coordinates = coordinates_list
geom_type = args[1].get("type")
bbox_list = args[1].get("bbox")
if bbox_list:
geom_type = "Polygon"
bbox_values = [item["@value"] for item in bbox_list]
bbox_values = [item for item in bbox_list]
coordinates = format_coordinates_as_wkt(bbox_values, coordinates)
return coordinates, geom_type

Expand All @@ -390,35 +393,34 @@ def _handle_temporal(self, comp_func, args, existing_ggps=None):
operands = {}
for i, arg in enumerate(args, start=1):
# check if the arg is an interval
interval_list = arg.get(str(CQL.interval))
interval_list = arg.get("interval")
if interval_list:
for n, item in enumerate(interval_list):
label = "start" if n == 0 else "end"
prop = item.get(str(CQL.property))
if prop:
self._triple_for_time_prop(ggps, i, label, prop, operands)
date_val = item.get("@value")
if date_val:
self._dt_to_rdf_literal(i, date_val, label, operands)
if isinstance(item, dict):
prop = item.get("property")
if prop:
self._triple_for_time_prop(ggps, i, label, prop, operands)
elif isinstance(item, str):
self._dt_to_rdf_literal(i, item, label, operands)
continue

# handle instants - prop and date
label = "instant"
# check if the arg is a property
prop = arg.get(str(CQL.property))
prop = arg.get("property")
if prop:
self._triple_for_time_prop(ggps, i, label, prop, operands)
continue

# check if the arg is a date
date = (
arg.get(str(CQL.date))
or arg.get(str(CQL.datetime))
or arg.get(str(CQL.timestamp))
arg.get("date")
or arg.get("datetime")
or arg.get("timestamp")
)
if date:
date_val = date[0].get("@value")
self._dt_to_rdf_literal(i, date_val, label, operands)
self._dt_to_rdf_literal(i, date, label, operands)

gpnt = self.process_temporal_function(comp_func, operands)

Expand Down Expand Up @@ -479,28 +481,11 @@ def process_temporal_function(self, comp_func, operands):
)

def _triple_for_time_prop(self, ggps, i, label, prop, operands):
prop_uri = prop[0].get("@id")
value = IRI(value=prop_uri)
value = IRI(value=prop)
var = Var(value=f"dt_{i}_{label}")
operands[f"t{i}_{label}"] = var
self._add_triple(ggps, Var(value="focus_node"), value, var)

def _handle_interval_list(self, all_args, comparator_args, interval_list):
for item in interval_list:
if item.get(str(CQL.property)):
prop = item.get(str(CQL.property))[0].get("@id")
comparator_args.append(IRI(value=prop))
elif item.get("@value"):
val = item.get("@value")
# self._dt_to_rdf_literal(comparator_args, val)
dt, _ = parse_datetime(val)
comparator_args.append(
RDFLiteral(
value=dt.isoformat(),
datatype=IRI(value="http://www.w3.org/2001/XMLSchema#dateTime"),
)
)
all_args.append(comparator_args)

def _dt_to_rdf_literal(self, i, dt_str, label, operands):
if dt_str == "..":
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
CONSTRUCT {
?focus_node <http://example.com/updated_at> ?dt_1_instant
?focus_node <ex:updated_at> ?dt_1_instant
}
WHERE {
?focus_node <http://example.com/updated_at> ?dt_1_instant
?focus_node <ex:updated_at> ?dt_1_instant
FILTER (?dt_1_instant > "2012-08-10T05:30:00+00:00"^^<http://www.w3.org/2001/XMLSchema#dateTime> || ?dt_1_instant < "2012-08-10T05:30:00+00:00"^^<http://www.w3.org/2001/XMLSchema#dateTime>)
}
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
CONSTRUCT {
?focus_node <http://example.com/ends_at> ?dt_1_end .
?focus_node <http://example.com/starts_at> ?dt_1_start
?focus_node <ex:ends_at> ?dt_1_end .
?focus_node <ex:starts_at> ?dt_1_start
}
WHERE {
?focus_node <http://example.com/ends_at> ?dt_1_end .
?focus_node <http://example.com/starts_at> ?dt_1_start
?focus_node <ex:ends_at> ?dt_1_end .
?focus_node <ex:starts_at> ?dt_1_start

FILTER (?dt_1_start > "2017-06-10T07:30:00+00:00"^^<http://www.w3.org/2001/XMLSchema#dateTime> && ?dt_1_end < "2017-06-11T10:30:00+00:00"^^<http://www.w3.org/2001/XMLSchema#dateTime>)
}
4 changes: 2 additions & 2 deletions test_data/cql/expected_generated_queries/clause7_12.rq
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
CONSTRUCT {
?focus_node <http://example.com/event_time> ?dt_1_instant
?focus_node <ex:event_time> ?dt_1_instant
}
WHERE {
?focus_node <http://example.com/event_time> ?dt_1_instant
?focus_node <ex:event_time> ?dt_1_instant
FILTER (! (?dt_1_instant > "1969-07-24T16:50:35+00:00"^^<http://www.w3.org/2001/XMLSchema#dateTime> || ?dt_1_instant < "1969-07-16T05:32:00+00:00"^^<http://www.w3.org/2001/XMLSchema#dateTime>))
}
8 changes: 4 additions & 4 deletions test_data/cql/expected_generated_queries/clause7_13.rq
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
CONSTRUCT {
?focus_node <http://example.com/liftOff> ?dt_1_end .
?focus_node <http://example.com/touchdown> ?dt_1_start
?focus_node <ex:liftOff> ?dt_1_end .
?focus_node <ex:touchdown> ?dt_1_start
}
WHERE {
?focus_node <http://example.com/liftOff> ?dt_1_end .
?focus_node <http://example.com/touchdown> ?dt_1_start
?focus_node <ex:liftOff> ?dt_1_end .
?focus_node <ex:touchdown> ?dt_1_start

FILTER (?dt_1_start > "1969-07-16T13:32:00+00:00"^^<http://www.w3.org/2001/XMLSchema#dateTime> && ?dt_1_end < "1969-07-24T16:50:35+00:00"^^<http://www.w3.org/2001/XMLSchema#dateTime>)
}
Loading

0 comments on commit 47447a7

Please sign in to comment.