Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

QueryBuilder: Implementation of contains Filter Operator applied to JSON Fields for SQLite backend #6619

Open
wants to merge 33 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
a9165f9
add tests to `contains` filter operator on PostgreSQL backend
dependabot[bot] Nov 6, 2024
ab88f00
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 19, 2024
518cc6b
Merge branch 'main' into test-json-contains
rabbull Nov 19, 2024
ec36aae
temp
rabbull Nov 19, 2024
474dd26
add tests for nested arrays
rabbull Nov 19, 2024
a759d43
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 19, 2024
5b7b62b
update
rabbull Nov 19, 2024
e23ec32
custom function
rabbull Nov 19, 2024
e530f03
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 19, 2024
6df03e3
cleanup
rabbull Nov 19, 2024
84c50f6
catchup
rabbull Nov 19, 2024
f787404
fix compilation error on py39
rabbull Nov 20, 2024
36c7102
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 20, 2024
dcd3cf9
add benchmark
rabbull Nov 20, 2024
d293d18
ignore benchmark results
rabbull Nov 20, 2024
a34623f
Merge branch 'test-json-contains' into sqlite-json-contains
rabbull Nov 20, 2024
079cc32
remove requires_psql marks for sqlite tests
rabbull Nov 20, 2024
598f821
temp
rabbull Nov 21, 2024
93ad037
add benchmark
rabbull Nov 23, 2024
93966cd
Merge branch 'sqlite-json-contains' of github.com:rabbull/aiida-core …
rabbull Nov 23, 2024
9293c67
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 23, 2024
180bec0
Merge branch 'main' of github.com:rabbull/aiida-core into sqlite-json…
rabbull Nov 26, 2024
edfe2b2
Merge branch 'main' of github.com:rabbull/aiida-core into sqlite-json…
rabbull Nov 26, 2024
2189a81
migrate sqlite filter tests to orm
rabbull Nov 26, 2024
ffa0b11
add comment on impl for psql of_type
rabbull Nov 26, 2024
1ca1d3c
Merge branch 'main' of github.com:rabbull/aiida-core into sqlite-json…
rabbull Dec 5, 2024
c699484
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 5, 2024
9d56802
Merge branch 'main' of github.com:rabbull/aiida-core into sqlite-json…
rabbull Dec 10, 2024
3eca114
Merge branch 'main' of github.com:rabbull/aiida-core into sqlite-json…
rabbull Dec 10, 2024
af544e7
add tests for custom functions
rabbull Dec 10, 2024
fbb7ee3
enable sqlite database backend testing in github actions
rabbull Dec 10, 2024
57beea7
add sqlite to coverage report workflow
rabbull Dec 10, 2024
7e78b22
Merge branch 'main' of github.com:rabbull/aiida-core into sqlite-json…
rabbull Dec 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,6 @@ pplot_out/

# docker
docker-bake.override.json

# benchmark
.benchmarks/
3 changes: 1 addition & 2 deletions src/aiida/storage/sqlite_zip/orm.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,8 +285,7 @@
return case((type_filter, casted_entity.ilike(value, escape='\\')), else_=False)

if operator == 'contains':
# to-do, see: https://github.com/sqlalchemy/sqlalchemy/discussions/7836
raise NotImplementedError('The operator `contains` is not implemented for SQLite-based storage plugins.')
return func.json_contains(database_entity, json.dumps(value))

Check warning on line 288 in src/aiida/storage/sqlite_zip/orm.py

View check run for this annotation

Codecov / codecov/patch

src/aiida/storage/sqlite_zip/orm.py#L288

Added line #L288 was not covered by tests

if operator == 'has_key':
return (
Expand Down
32 changes: 32 additions & 0 deletions src/aiida/storage/sqlite_zip/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,44 @@
cursor.close()


def _contains(lhs: Union[dict, list], rhs: Union[dict, list]):
rabbull marked this conversation as resolved.
Show resolved Hide resolved
if isinstance(lhs, dict) and isinstance(rhs, dict):
for key in rhs:
if key not in lhs or not _contains(lhs[key], rhs[key]):
return False
return True

Check warning on line 56 in src/aiida/storage/sqlite_zip/utils.py

View check run for this annotation

Codecov / codecov/patch

src/aiida/storage/sqlite_zip/utils.py#L52-L56

Added lines #L52 - L56 were not covered by tests

elif isinstance(lhs, list) and isinstance(rhs, list):
for item in rhs:
if not any(_contains(e, item) for e in lhs):
return False
return True

Check warning on line 62 in src/aiida/storage/sqlite_zip/utils.py

View check run for this annotation

Codecov / codecov/patch

src/aiida/storage/sqlite_zip/utils.py#L58-L62

Added lines #L58 - L62 were not covered by tests
else:
return lhs == rhs

Check warning on line 64 in src/aiida/storage/sqlite_zip/utils.py

View check run for this annotation

Codecov / codecov/patch

src/aiida/storage/sqlite_zip/utils.py#L64

Added line #L64 was not covered by tests


def _json_contains(lhs: Union[str, bytes, bytearray, dict, list], rhs: Union[str, bytes, bytearray, dict, list]):
try:
if isinstance(lhs, (str, bytes, bytearray)):
lhs = json.loads(lhs)
if isinstance(rhs, (str, bytes, bytearray)):
rhs = json.loads(rhs)
except json.JSONDecodeError:
return 0
return int(_contains(lhs, rhs))

Check warning on line 75 in src/aiida/storage/sqlite_zip/utils.py

View check run for this annotation

Codecov / codecov/patch

src/aiida/storage/sqlite_zip/utils.py#L68-L75

Added lines #L68 - L75 were not covered by tests


def register_json_contains(dbapi_connection, _):
dbapi_connection.create_function('json_contains', 2, _json_contains)


def create_sqla_engine(path: Union[str, Path], *, enforce_foreign_keys: bool = True, **kwargs) -> Engine:
"""Create a new engine instance."""
engine = create_engine(f'sqlite:///{path}', json_serializer=json.dumps, json_deserializer=json.loads, **kwargs)
event.listen(engine, 'connect', sqlite_case_sensitive_like)
if enforce_foreign_keys:
event.listen(engine, 'connect', sqlite_enforce_foreign_keys)
event.listen(engine, 'connect', register_json_contains)
return engine


Expand Down
138 changes: 138 additions & 0 deletions tests/benchmark/test_json_contains.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import random
import string

import pytest

from aiida import orm
from aiida.orm.querybuilder import QueryBuilder

GROUP_NAME = 'json-contains'


COMPLEX_JSON_DEPTH_RANGE = [2**i for i in range(4)]
COMPLEX_JSON_BREADTH_RANGE = [2**i for i in range(4)]
LARGE_TABLE_SIZE_RANGE = [2**i for i in range(1, 11)]


def gen_json(depth: int, breadth: int):
def gen_str(n: int, with_digits: bool = True):
population = string.ascii_letters
if with_digits:
population += string.digits
return ''.join(random.choices(population, k=n))

if depth == 0: # random primitive value
# real numbers are not included as their equivalence is tricky
return random.choice(
[
random.randint(-114, 514), # integers
gen_str(6), # strings
random.choice([True, False]), # booleans
None, # nulls
]
)

else:
gen_dict = random.choice([True, False])
data = [gen_json(depth - 1, breadth) for _ in range(breadth)]
if gen_dict:
keys = set()
while len(keys) < breadth:
keys.add(gen_str(6, False))
data = dict(zip(list(keys), data))
return data


def extract_component(data, p: float = -1):
if random.random() < p:
return data

if isinstance(data, dict) and data:
key = random.choice(list(data.keys()))
return {key: extract_component(data[key])}
elif isinstance(data, list) and data:
element = random.choice(data)
return [extract_component(element)]
else:
return data


@pytest.mark.benchmark(group=GROUP_NAME)
@pytest.mark.parametrize('depth', [1, 2, 4, 8])
@pytest.mark.parametrize('breadth', [1, 2, 4])
@pytest.mark.usefixtures('aiida_profile_clean')
def test_deep_json(benchmark, depth, breadth):
lhs = gen_json(depth, breadth)
rhs = extract_component(lhs, p=1.0 / depth)
assert 0 == len(QueryBuilder().append(orm.Dict).all())

orm.Dict(
{
'id': f'{depth}-{breadth}',
'data': lhs,
}
).store()
qb = QueryBuilder().append(
orm.Dict,
filters={
'attributes.data': {'contains': rhs},
},
project=['attributes.id'],
)
qb.all()
result = benchmark(qb.all)
assert len(result) == 1


@pytest.mark.benchmark(group=GROUP_NAME)
@pytest.mark.parametrize('depth', [2])
@pytest.mark.parametrize('breadth', [1, 10, 100])
@pytest.mark.usefixtures('aiida_profile_clean')
def test_wide_json(benchmark, depth, breadth):
lhs = gen_json(depth, breadth)
rhs = extract_component(lhs, p=1.0 / depth)
assert 0 == len(QueryBuilder().append(orm.Dict).all())

orm.Dict(
{
'id': f'{depth}-{breadth}',
'data': lhs,
}
).store()
qb = QueryBuilder().append(
orm.Dict,
filters={
'attributes.data': {'contains': rhs},
},
project=['attributes.id'],
)
qb.all()
result = benchmark(qb.all)
assert len(result) == 1


@pytest.mark.benchmark(group=GROUP_NAME)
@pytest.mark.parametrize('num_entries', LARGE_TABLE_SIZE_RANGE)
@pytest.mark.usefixtures('aiida_profile_clean')
def test_large_table(benchmark, num_entries):
data = gen_json(2, 10)
rhs = extract_component(data)
assert 0 == len(QueryBuilder().append(orm.Dict).all())

for i in range(num_entries):
orm.Dict(
{
'id': f'N={num_entries}, i={i}',
'data': data,
}
).store()
qb = QueryBuilder().append(
orm.Dict,
filters={
'attributes.data': {'contains': rhs},
},
project=['attributes.id'],
)
qb.all()
result = benchmark(qb.all)
assert len(result) == num_entries
Loading
Loading