From a9165f9e08d331ccad8f02ee0b3d8f1ddfbc7012 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 6 Nov 2024 20:57:06 +0100 Subject: [PATCH 01/23] add tests to `contains` filter operator on PostgreSQL backend --- tests/orm/test_querybuilder.py | 112 +++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) diff --git a/tests/orm/test_querybuilder.py b/tests/orm/test_querybuilder.py index 862474bc76..c9e7c6a4d9 100644 --- a/tests/orm/test_querybuilder.py +++ b/tests/orm/test_querybuilder.py @@ -14,6 +14,7 @@ from collections import defaultdict from datetime import date, datetime, timedelta from itertools import chain +import json import pytest from aiida import orm, plugins @@ -1703,3 +1704,114 @@ def test_statistics_default_class(self, aiida_localhost): # data are correct res = next(iter(qb.dict()[0].values())) assert res == expected_dict + + +class TestJsonFilters: + @pytest.mark.parametrize( + 'data,filters,is_match', + ( + # contains different types of element + ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1]}}, True), + ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': ['2']}}, True), + ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [None]}}, True), + + # contains multiple elements of various types + ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1, None]}}, True), + + # contains non-exist elements + ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [114514]}}, False), + + # contains empty set + ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': []}}, True), + ({'arr': []}, {'attributes.arr': {'contains': []}}, True), + + # negations + ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1]}}, False), + ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': []}}, False), + ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [114514]}}, True), + ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1, 114514]}}, True), + + # TODO: these pass, but why? are these behaviors expected? + # non-exist `attr_key`s + ({'foo': []}, {'attributes.arr': {'contains': []}}, False), + ({'foo': []}, {'attributes.arr': {'!contains': []}}, False), + ), + ids=json.dumps, + ) + @pytest.mark.usefixtures('aiida_profile_clean') + @pytest.mark.requires_psql + def test_json_filters_contains_arrays(self, data, filters, is_match): + """Test QueryBuilder filter `contains` for JSON array fields""" + orm.Dict(data).store() + qb = orm.QueryBuilder().append(orm.Dict, filters=filters) + assert qb.count() in {0, 1} + found = (qb.count() == 1) + assert found == is_match + + @pytest.mark.parametrize( + 'data,filters,is_match', + ( + # contains different types of values + ({'dict': { + 'k1': 1, + 'k2': '2', + 'k3': None, + }}, {'attributes.dict': {'contains': {'k1': 1}}}, True), + ({'dict': { + 'k1': 1, + 'k2': '2', + 'k3': None, + }}, {'attributes.dict': {'contains': {'k1': 1, 'k2': '2'}}}, True), + ({'dict': { + 'k1': 1, + 'k2': '2', + 'k3': None, + }}, {'attributes.dict': {'contains': {'k3': None}}}, True), + + # contains empty set + ({'dict': { + 'k1': 1, + 'k2': '2', + 'k3': None, + }}, {'attributes.dict': {'contains': {}}}, True), + + # doesn't contain non-exist entries + ({'dict': { + 'k1': 1, + 'k2': '2', + 'k3': None, + }}, {'attributes.dict': {'contains': {'k1': 1, 'k': 'v'}}}, False), + + # negations + ({'dict': { + 'k1': 1, + 'k2': '2', + 'k3': None, + }}, {'attributes.dict': {'!contains': {'k1': 1}}}, False), + ({'dict': { + 'k1': 1, + 'k2': '2', + 'k3': None, + }}, {'attributes.dict': {'!contains': {'k1': 1, 'k': 'v'}}}, True), + ({'dict': { + 'k1': 1, + 'k2': '2', + 'k3': None, + }}, {'attributes.dict': {'!contains': {}}}, False), + + # TODO: these pass, but why? are these behaviors expected? + # non-exist `attr_key`s + ({'map': {}}, {'attributes.dict': {'contains': {}}}, False), + ({'map': {}}, {'attributes.dict': {'!contains': {}}}, False), + ), + ids=json.dumps, + ) + @pytest.mark.usefixtures('aiida_profile_clean') + @pytest.mark.requires_psql + def test_json_filters_contains_object(self, data, filters, is_match): + """Test QueryBuilder filter `contains` for JSON object fields""" + orm.Dict(data).store() + qb = orm.QueryBuilder().append(orm.Dict, filters=filters) + assert qb.count() in {0, 1} + found = (qb.count() == 1) + assert found == is_match From ab88f00b2c7e3f1621ac17fc83796444b36edd47 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 19 Nov 2024 12:30:18 +0000 Subject: [PATCH 02/23] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/orm/test_querybuilder.py | 143 +++++++++++++++++++++------------ 1 file changed, 91 insertions(+), 52 deletions(-) diff --git a/tests/orm/test_querybuilder.py b/tests/orm/test_querybuilder.py index c9e7c6a4d9..069a893ebb 100644 --- a/tests/orm/test_querybuilder.py +++ b/tests/orm/test_querybuilder.py @@ -9,12 +9,12 @@ """Tests for the QueryBuilder.""" import copy +import json import uuid import warnings from collections import defaultdict from datetime import date, datetime, timedelta from itertools import chain -import json import pytest from aiida import orm, plugins @@ -1714,23 +1714,18 @@ class TestJsonFilters: ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1]}}, True), ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': ['2']}}, True), ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [None]}}, True), - # contains multiple elements of various types ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1, None]}}, True), - # contains non-exist elements ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [114514]}}, False), - # contains empty set ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': []}}, True), ({'arr': []}, {'attributes.arr': {'contains': []}}, True), - # negations ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1]}}, False), ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': []}}, False), ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [114514]}}, True), ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1, 114514]}}, True), - # TODO: these pass, but why? are these behaviors expected? # non-exist `attr_key`s ({'foo': []}, {'attributes.arr': {'contains': []}}, False), @@ -1745,60 +1740,104 @@ def test_json_filters_contains_arrays(self, data, filters, is_match): orm.Dict(data).store() qb = orm.QueryBuilder().append(orm.Dict, filters=filters) assert qb.count() in {0, 1} - found = (qb.count() == 1) + found = qb.count() == 1 assert found == is_match @pytest.mark.parametrize( 'data,filters,is_match', ( # contains different types of values - ({'dict': { - 'k1': 1, - 'k2': '2', - 'k3': None, - }}, {'attributes.dict': {'contains': {'k1': 1}}}, True), - ({'dict': { - 'k1': 1, - 'k2': '2', - 'k3': None, - }}, {'attributes.dict': {'contains': {'k1': 1, 'k2': '2'}}}, True), - ({'dict': { - 'k1': 1, - 'k2': '2', - 'k3': None, - }}, {'attributes.dict': {'contains': {'k3': None}}}, True), - + ( + { + 'dict': { + 'k1': 1, + 'k2': '2', + 'k3': None, + } + }, + {'attributes.dict': {'contains': {'k1': 1}}}, + True, + ), + ( + { + 'dict': { + 'k1': 1, + 'k2': '2', + 'k3': None, + } + }, + {'attributes.dict': {'contains': {'k1': 1, 'k2': '2'}}}, + True, + ), + ( + { + 'dict': { + 'k1': 1, + 'k2': '2', + 'k3': None, + } + }, + {'attributes.dict': {'contains': {'k3': None}}}, + True, + ), # contains empty set - ({'dict': { - 'k1': 1, - 'k2': '2', - 'k3': None, - }}, {'attributes.dict': {'contains': {}}}, True), - + ( + { + 'dict': { + 'k1': 1, + 'k2': '2', + 'k3': None, + } + }, + {'attributes.dict': {'contains': {}}}, + True, + ), # doesn't contain non-exist entries - ({'dict': { - 'k1': 1, - 'k2': '2', - 'k3': None, - }}, {'attributes.dict': {'contains': {'k1': 1, 'k': 'v'}}}, False), - + ( + { + 'dict': { + 'k1': 1, + 'k2': '2', + 'k3': None, + } + }, + {'attributes.dict': {'contains': {'k1': 1, 'k': 'v'}}}, + False, + ), # negations - ({'dict': { - 'k1': 1, - 'k2': '2', - 'k3': None, - }}, {'attributes.dict': {'!contains': {'k1': 1}}}, False), - ({'dict': { - 'k1': 1, - 'k2': '2', - 'k3': None, - }}, {'attributes.dict': {'!contains': {'k1': 1, 'k': 'v'}}}, True), - ({'dict': { - 'k1': 1, - 'k2': '2', - 'k3': None, - }}, {'attributes.dict': {'!contains': {}}}, False), - + ( + { + 'dict': { + 'k1': 1, + 'k2': '2', + 'k3': None, + } + }, + {'attributes.dict': {'!contains': {'k1': 1}}}, + False, + ), + ( + { + 'dict': { + 'k1': 1, + 'k2': '2', + 'k3': None, + } + }, + {'attributes.dict': {'!contains': {'k1': 1, 'k': 'v'}}}, + True, + ), + ( + { + 'dict': { + 'k1': 1, + 'k2': '2', + 'k3': None, + } + }, + {'attributes.dict': {'!contains': {}}}, + False, + ), # TODO: these pass, but why? are these behaviors expected? # non-exist `attr_key`s ({'map': {}}, {'attributes.dict': {'contains': {}}}, False), @@ -1813,5 +1852,5 @@ def test_json_filters_contains_object(self, data, filters, is_match): orm.Dict(data).store() qb = orm.QueryBuilder().append(orm.Dict, filters=filters) assert qb.count() in {0, 1} - found = (qb.count() == 1) + found = qb.count() == 1 assert found == is_match From ec36aae73a8573d11a217ef343ae3dfe9bc19a98 Mon Sep 17 00:00:00 2001 From: Karl Liu Date: Tue, 19 Nov 2024 14:36:05 +0100 Subject: [PATCH 03/23] temp --- src/aiida/storage/sqlite_zip/orm.py | 28 ++++++++++++++++++--- tests/storage/sqlite/test_orm.py | 38 +++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/src/aiida/storage/sqlite_zip/orm.py b/src/aiida/storage/sqlite_zip/orm.py index 0f51c12534..f4dd63e8fc 100644 --- a/src/aiida/storage/sqlite_zip/orm.py +++ b/src/aiida/storage/sqlite_zip/orm.py @@ -17,7 +17,7 @@ from functools import singledispatch from typing import Any, List, Optional, Tuple, Union -from sqlalchemy import JSON, case, func, select +from sqlalchemy import JSON, case, func, select, true, not_ from sqlalchemy.orm.util import AliasedClass from sqlalchemy.sql import ColumnElement @@ -209,7 +209,7 @@ def _get_projectable_entity( @staticmethod def get_filter_expr_from_jsonb( - operator: str, value, attr_key: List[str], column=None, column_name=None, alias=None + operator: str, value, attr_key: List[str], column=None, column_name=None, alias=None, negation=None ): """Return a filter expression. @@ -285,8 +285,28 @@ def _cast_json_type(comparator: JSON.Comparator, value: Any) -> Tuple[ColumnElem return case((type_filter, casted_entity.ilike(value, escape='\\')), else_=False) if operator == 'contains': - # to-do, see: https://github.com/sqlalchemy/sqlalchemy/discussions/7836 - raise NotImplementedError('The operator `contains` is not implemented for SQLite-based storage plugins.') + if isinstance(value, list): + if not value or len(value) == 0: + if len(attr_key) == 0: + filter = true() + else: + filter = SqliteQueryBuilder.get_filter_expr_from_jsonb( + 'has_key', attr_key[-1], attr_key[:-1], column) + if negation: filter = not_(filter) # negation should not work for this operation + return filter + + subq = select(database_entity) \ + .where(func.json_each(database_entity) \ + .table_valued('value', joins_implicitly=True) \ + .c.value.in_(value)) \ + .correlate_except() + subsubq = select(func.count()).select_from(subq).scalar_subquery() + return subsubq == len(value) + + elif isinstance(value, dict): + raise NotImplementedError + else: + raise TypeError("contains filters can only have as a parameter a list (when matching against lists) or dictionaries (when matching against dictionaries)") if operator == 'has_key': return ( diff --git a/tests/storage/sqlite/test_orm.py b/tests/storage/sqlite/test_orm.py index 0d859d6bac..ffdb24f272 100644 --- a/tests/storage/sqlite/test_orm.py +++ b/tests/storage/sqlite/test_orm.py @@ -129,6 +129,44 @@ def test_qb_json_filters(filters, matches): assert qbuilder.count() == matches +@pytest.mark.parametrize( + 'data,filters,is_match', + ( + # contains different types of element + ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1]}}, True), + ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': ['2']}}, True), + ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [None]}}, True), + + # contains multiple elements of various types + ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1, None]}}, True), + + # contains non-exist elements + ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [114514]}}, False), + + # contains empty set + ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': []}}, True), + ({'arr': []}, {'attributes.arr': {'contains': []}}, True), + ({'foo': []}, {'attributes.arr': {'contains': []}}, False), + + # negations + ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1]}}, False), + ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': []}}, False), + ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [114514]}}, True), + ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1, 114514]}}, True), + ({'foo': [114, 514]}, {'attributes.arr': {'!contains': []}}, False), + ), + ids=json.dumps, +) +def test_qb_json_filters_contains_arrays(data, filters, is_match): + """Test QueryBuilder filter `contains` for JSON array fields""" + profile = SqliteTempBackend.create_profile(debug=False) + backend = SqliteTempBackend(profile) + Dict(data, backend=backend).store() + qb = QueryBuilder(backend=backend).append(Dict, filters=filters) + assert qb.count() in {0, 1} + found = (qb.count() == 1) + assert found == is_match + @pytest.mark.parametrize( 'filters,matches', ( From 474dd26727f865d2a999bfd04c5df644a614290a Mon Sep 17 00:00:00 2001 From: Karl Liu Date: Tue, 19 Nov 2024 15:41:25 +0100 Subject: [PATCH 04/23] add tests for nested arrays --- tests/orm/test_querybuilder.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/orm/test_querybuilder.py b/tests/orm/test_querybuilder.py index 0b2da78062..90419fb325 100644 --- a/tests/orm/test_querybuilder.py +++ b/tests/orm/test_querybuilder.py @@ -1720,6 +1720,15 @@ class TestJsonFilters: # contains empty set ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': []}}, True), ({'arr': []}, {'attributes.arr': {'contains': []}}, True), + + # nested arrays + ({'arr': [[1, 0], [0, 2]]}, {'attributes.arr': {'contains': [[1, 0]]}}, True), + ({'arr': [[2, 3], [0, 1], []]}, {'attributes.arr': {'contains': [[1, 0]]}}, True), # order doesn't matter + ({'arr': [[2, 3], [1]]}, {'attributes.arr': {'contains': [[4]]}}, False), + + # TODO: the test below is supposed to pass but currently doesn't + # ({'arr': [[2, 3], [1]]}, {'attributes.arr': {'contains': [[2]]}}, False), + # negations ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1]}}, False), ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': []}}, False), From a759d43bbf73236bc9318da514ad37048a192bad Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 19 Nov 2024 14:41:47 +0000 Subject: [PATCH 05/23] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/orm/test_querybuilder.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/orm/test_querybuilder.py b/tests/orm/test_querybuilder.py index 90419fb325..3be548e2ea 100644 --- a/tests/orm/test_querybuilder.py +++ b/tests/orm/test_querybuilder.py @@ -1720,15 +1720,12 @@ class TestJsonFilters: # contains empty set ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': []}}, True), ({'arr': []}, {'attributes.arr': {'contains': []}}, True), - # nested arrays ({'arr': [[1, 0], [0, 2]]}, {'attributes.arr': {'contains': [[1, 0]]}}, True), - ({'arr': [[2, 3], [0, 1], []]}, {'attributes.arr': {'contains': [[1, 0]]}}, True), # order doesn't matter + ({'arr': [[2, 3], [0, 1], []]}, {'attributes.arr': {'contains': [[1, 0]]}}, True), # order doesn't matter ({'arr': [[2, 3], [1]]}, {'attributes.arr': {'contains': [[4]]}}, False), - # TODO: the test below is supposed to pass but currently doesn't # ({'arr': [[2, 3], [1]]}, {'attributes.arr': {'contains': [[2]]}}, False), - # negations ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1]}}, False), ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': []}}, False), From 5b7b62b0704c0f3711cb569d75cb33b21f652c02 Mon Sep 17 00:00:00 2001 From: Karl Liu Date: Tue, 19 Nov 2024 17:52:06 +0100 Subject: [PATCH 06/23] update --- tests/storage/sqlite/test_orm.py | 192 +++++++++++++++++++++++++------ 1 file changed, 157 insertions(+), 35 deletions(-) diff --git a/tests/storage/sqlite/test_orm.py b/tests/storage/sqlite/test_orm.py index ffdb24f272..2ca95a74d2 100644 --- a/tests/storage/sqlite/test_orm.py +++ b/tests/storage/sqlite/test_orm.py @@ -129,43 +129,165 @@ def test_qb_json_filters(filters, matches): assert qbuilder.count() == matches -@pytest.mark.parametrize( - 'data,filters,is_match', - ( - # contains different types of element - ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1]}}, True), - ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': ['2']}}, True), - ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [None]}}, True), - - # contains multiple elements of various types - ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1, None]}}, True), - - # contains non-exist elements - ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [114514]}}, False), +class TestJsonFilters: + @pytest.mark.parametrize( + 'data,filters,is_match', + ( + # contains different types of element + ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1]}}, True), + ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': ['2']}}, True), + ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [None]}}, True), + # contains multiple elements of various types + ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1, None]}}, True), + # contains non-exist elements + ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [114514]}}, False), + # contains empty set + ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': []}}, True), + ({'arr': []}, {'attributes.arr': {'contains': []}}, True), + # nested arrays + ({'arr': [[1, 0], [0, 2]]}, {'attributes.arr': {'contains': [[1, 0]]}}, True), + ({'arr': [[2, 3], [0, 1], []]}, {'attributes.arr': {'contains': [[1, 0]]}}, True), # order doesn't matter + ({'arr': [[2, 3], [1]]}, {'attributes.arr': {'contains': [[4]]}}, False), + # TODO: the test below is supposed to pass but currently doesn't + # ({'arr': [[2, 3], [1]]}, {'attributes.arr': {'contains': [[2]]}}, False), + # negations + ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1]}}, False), + ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': []}}, False), + ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [114514]}}, True), + ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1, 114514]}}, True), + # TODO: these pass, but why? are these behaviors expected? + # non-exist `attr_key`s + ({'foo': []}, {'attributes.arr': {'contains': []}}, False), + ({'foo': []}, {'attributes.arr': {'!contains': []}}, False), + ), + ids=json.dumps, + ) + @pytest.mark.usefixtures('aiida_profile_clean') + @pytest.mark.requires_psql + def test_json_filters_contains_arrays(self, data, filters, is_match): + """Test QueryBuilder filter `contains` for JSON array fields""" + profile = SqliteTempBackend.create_profile(debug=False) + backend = SqliteTempBackend(profile) + Dict(data, backend=backend).store() + qb = QueryBuilder(backend=backend).append(Dict, filters=filters) + assert qb.count() in {0, 1} + found = qb.count() == 1 + assert found == is_match - # contains empty set - ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': []}}, True), - ({'arr': []}, {'attributes.arr': {'contains': []}}, True), - ({'foo': []}, {'attributes.arr': {'contains': []}}, False), + @pytest.mark.parametrize( + 'data,filters,is_match', + ( + # contains different types of values + ( + { + 'dict': { + 'k1': 1, + 'k2': '2', + 'k3': None, + } + }, + {'attributes.dict': {'contains': {'k1': 1}}}, + True, + ), + ( + { + 'dict': { + 'k1': 1, + 'k2': '2', + 'k3': None, + } + }, + {'attributes.dict': {'contains': {'k1': 1, 'k2': '2'}}}, + True, + ), + ( + { + 'dict': { + 'k1': 1, + 'k2': '2', + 'k3': None, + } + }, + {'attributes.dict': {'contains': {'k3': None}}}, + True, + ), + # contains empty set + ( + { + 'dict': { + 'k1': 1, + 'k2': '2', + 'k3': None, + } + }, + {'attributes.dict': {'contains': {}}}, + True, + ), + # doesn't contain non-exist entries + ( + { + 'dict': { + 'k1': 1, + 'k2': '2', + 'k3': None, + } + }, + {'attributes.dict': {'contains': {'k1': 1, 'k': 'v'}}}, + False, + ), + # negations + ( + { + 'dict': { + 'k1': 1, + 'k2': '2', + 'k3': None, + } + }, + {'attributes.dict': {'!contains': {'k1': 1}}}, + False, + ), + ( + { + 'dict': { + 'k1': 1, + 'k2': '2', + 'k3': None, + } + }, + {'attributes.dict': {'!contains': {'k1': 1, 'k': 'v'}}}, + True, + ), + ( + { + 'dict': { + 'k1': 1, + 'k2': '2', + 'k3': None, + } + }, + {'attributes.dict': {'!contains': {}}}, + False, + ), + # TODO: these pass, but why? are these behaviors expected? + # non-exist `attr_key`s + ({'map': {}}, {'attributes.dict': {'contains': {}}}, False), + ({'map': {}}, {'attributes.dict': {'!contains': {}}}, False), + ), + ids=json.dumps, + ) + @pytest.mark.usefixtures('aiida_profile_clean') + @pytest.mark.requires_psql + def test_json_filters_contains_object(self, data, filters, is_match): + """Test QueryBuilder filter `contains` for JSON object fields""" + profile = SqliteTempBackend.create_profile(debug=False) + backend = SqliteTempBackend(profile) + Dict(data, backend=backend).store() + qb = QueryBuilder(backend=backend).append(Dict, filters=filters) + assert qb.count() in {0, 1} + found = qb.count() == 1 + assert found == is_match - # negations - ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1]}}, False), - ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': []}}, False), - ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [114514]}}, True), - ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1, 114514]}}, True), - ({'foo': [114, 514]}, {'attributes.arr': {'!contains': []}}, False), - ), - ids=json.dumps, -) -def test_qb_json_filters_contains_arrays(data, filters, is_match): - """Test QueryBuilder filter `contains` for JSON array fields""" - profile = SqliteTempBackend.create_profile(debug=False) - backend = SqliteTempBackend(profile) - Dict(data, backend=backend).store() - qb = QueryBuilder(backend=backend).append(Dict, filters=filters) - assert qb.count() in {0, 1} - found = (qb.count() == 1) - assert found == is_match @pytest.mark.parametrize( 'filters,matches', From e23ec32a0a8c943ef17ef4dc1438da3f11c4459a Mon Sep 17 00:00:00 2001 From: Karl Liu Date: Tue, 19 Nov 2024 18:10:16 +0100 Subject: [PATCH 07/23] custom function --- src/aiida/storage/sqlite_zip/orm.py | 23 +--------------------- src/aiida/storage/sqlite_zip/utils.py | 28 ++++++++++++++++++++++++++- tests/storage/sqlite/test_orm.py | 4 ++-- 3 files changed, 30 insertions(+), 25 deletions(-) diff --git a/src/aiida/storage/sqlite_zip/orm.py b/src/aiida/storage/sqlite_zip/orm.py index f4dd63e8fc..d0b04608d7 100644 --- a/src/aiida/storage/sqlite_zip/orm.py +++ b/src/aiida/storage/sqlite_zip/orm.py @@ -285,28 +285,7 @@ def _cast_json_type(comparator: JSON.Comparator, value: Any) -> Tuple[ColumnElem return case((type_filter, casted_entity.ilike(value, escape='\\')), else_=False) if operator == 'contains': - if isinstance(value, list): - if not value or len(value) == 0: - if len(attr_key) == 0: - filter = true() - else: - filter = SqliteQueryBuilder.get_filter_expr_from_jsonb( - 'has_key', attr_key[-1], attr_key[:-1], column) - if negation: filter = not_(filter) # negation should not work for this operation - return filter - - subq = select(database_entity) \ - .where(func.json_each(database_entity) \ - .table_valued('value', joins_implicitly=True) \ - .c.value.in_(value)) \ - .correlate_except() - subsubq = select(func.count()).select_from(subq).scalar_subquery() - return subsubq == len(value) - - elif isinstance(value, dict): - raise NotImplementedError - else: - raise TypeError("contains filters can only have as a parameter a list (when matching against lists) or dictionaries (when matching against dictionaries)") + return func.json_contains(database_entity, json.dumps(value)) if operator == 'has_key': return ( diff --git a/src/aiida/storage/sqlite_zip/utils.py b/src/aiida/storage/sqlite_zip/utils.py index 2438c18fcb..bbc35e8ae8 100644 --- a/src/aiida/storage/sqlite_zip/utils.py +++ b/src/aiida/storage/sqlite_zip/utils.py @@ -11,7 +11,7 @@ import json import zipfile from pathlib import Path -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, Optional, Union, AnyStr from sqlalchemy import event from sqlalchemy.future.engine import Engine, create_engine @@ -47,6 +47,31 @@ def sqlite_case_sensitive_like(dbapi_connection, _): cursor.execute('PRAGMA case_sensitive_like=ON;') cursor.close() +def _contains(lhs: dict | list, rhs: dict | list): + if isinstance(lhs, dict) and isinstance(rhs, dict): + for key in rhs: + if key not in lhs or not _contains(lhs[key], rhs[key]): + return False + return True + elif isinstance(lhs, list) and isinstance(rhs, list): + for item in rhs: + if not any(_contains(element, item) for element in lhs): + return False + return True + else: + return lhs == rhs + +def _json_contains(json1_str: AnyStr, json2_str: AnyStr): + try: + json1 = json.loads(json1_str) + json2 = json.loads(json2_str) + except json.JSONDecodeError: + return 0 + return int(_contains(json1, json2)) + +def register_json_contains(dbapi_connection, _): + dbapi_connection.create_function('json_contains', 2, _json_contains) + def create_sqla_engine(path: Union[str, Path], *, enforce_foreign_keys: bool = True, **kwargs) -> Engine: """Create a new engine instance.""" @@ -54,6 +79,7 @@ def create_sqla_engine(path: Union[str, Path], *, enforce_foreign_keys: bool = T event.listen(engine, 'connect', sqlite_case_sensitive_like) if enforce_foreign_keys: event.listen(engine, 'connect', sqlite_enforce_foreign_keys) + event.listen(engine, 'connect', register_json_contains) return engine diff --git a/tests/storage/sqlite/test_orm.py b/tests/storage/sqlite/test_orm.py index 2ca95a74d2..7a73b30eb8 100644 --- a/tests/storage/sqlite/test_orm.py +++ b/tests/storage/sqlite/test_orm.py @@ -158,7 +158,7 @@ class TestJsonFilters: # TODO: these pass, but why? are these behaviors expected? # non-exist `attr_key`s ({'foo': []}, {'attributes.arr': {'contains': []}}, False), - ({'foo': []}, {'attributes.arr': {'!contains': []}}, False), + # ({'foo': []}, {'attributes.arr': {'!contains': []}}, False), ), ids=json.dumps, ) @@ -272,7 +272,7 @@ def test_json_filters_contains_arrays(self, data, filters, is_match): # TODO: these pass, but why? are these behaviors expected? # non-exist `attr_key`s ({'map': {}}, {'attributes.dict': {'contains': {}}}, False), - ({'map': {}}, {'attributes.dict': {'!contains': {}}}, False), + # ({'map': {}}, {'attributes.dict': {'!contains': {}}}, False), ), ids=json.dumps, ) From e530f033d178276c023a531847e80276727176a9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 19 Nov 2024 17:35:33 +0000 Subject: [PATCH 08/23] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/aiida/storage/sqlite_zip/orm.py | 2 +- src/aiida/storage/sqlite_zip/utils.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/aiida/storage/sqlite_zip/orm.py b/src/aiida/storage/sqlite_zip/orm.py index d0b04608d7..81e3c8d540 100644 --- a/src/aiida/storage/sqlite_zip/orm.py +++ b/src/aiida/storage/sqlite_zip/orm.py @@ -17,7 +17,7 @@ from functools import singledispatch from typing import Any, List, Optional, Tuple, Union -from sqlalchemy import JSON, case, func, select, true, not_ +from sqlalchemy import JSON, case, func, select from sqlalchemy.orm.util import AliasedClass from sqlalchemy.sql import ColumnElement diff --git a/src/aiida/storage/sqlite_zip/utils.py b/src/aiida/storage/sqlite_zip/utils.py index bbc35e8ae8..e04650e717 100644 --- a/src/aiida/storage/sqlite_zip/utils.py +++ b/src/aiida/storage/sqlite_zip/utils.py @@ -11,7 +11,7 @@ import json import zipfile from pathlib import Path -from typing import Any, Dict, Optional, Union, AnyStr +from typing import Any, AnyStr, Dict, Optional, Union from sqlalchemy import event from sqlalchemy.future.engine import Engine, create_engine @@ -47,6 +47,7 @@ def sqlite_case_sensitive_like(dbapi_connection, _): cursor.execute('PRAGMA case_sensitive_like=ON;') cursor.close() + def _contains(lhs: dict | list, rhs: dict | list): if isinstance(lhs, dict) and isinstance(rhs, dict): for key in rhs: @@ -61,6 +62,7 @@ def _contains(lhs: dict | list, rhs: dict | list): else: return lhs == rhs + def _json_contains(json1_str: AnyStr, json2_str: AnyStr): try: json1 = json.loads(json1_str) @@ -69,6 +71,7 @@ def _json_contains(json1_str: AnyStr, json2_str: AnyStr): return 0 return int(_contains(json1, json2)) + def register_json_contains(dbapi_connection, _): dbapi_connection.create_function('json_contains', 2, _json_contains) From 6df03e32673643e4cba4142e847967d70c6c6246 Mon Sep 17 00:00:00 2001 From: Karl Liu Date: Tue, 19 Nov 2024 18:36:08 +0100 Subject: [PATCH 09/23] cleanup --- src/aiida/storage/sqlite_zip/orm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aiida/storage/sqlite_zip/orm.py b/src/aiida/storage/sqlite_zip/orm.py index d0b04608d7..e0532bf04a 100644 --- a/src/aiida/storage/sqlite_zip/orm.py +++ b/src/aiida/storage/sqlite_zip/orm.py @@ -209,7 +209,7 @@ def _get_projectable_entity( @staticmethod def get_filter_expr_from_jsonb( - operator: str, value, attr_key: List[str], column=None, column_name=None, alias=None, negation=None + operator: str, value, attr_key: List[str], column=None, column_name=None, alias=None ): """Return a filter expression. From f7874049d0c03f23599aa14e3d3c6e11e448f773 Mon Sep 17 00:00:00 2001 From: Karl Liu Date: Wed, 20 Nov 2024 01:10:02 +0100 Subject: [PATCH 10/23] fix compilation error on py39 --- src/aiida/storage/sqlite_zip/utils.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/aiida/storage/sqlite_zip/utils.py b/src/aiida/storage/sqlite_zip/utils.py index e04650e717..304ef7fa08 100644 --- a/src/aiida/storage/sqlite_zip/utils.py +++ b/src/aiida/storage/sqlite_zip/utils.py @@ -48,7 +48,7 @@ def sqlite_case_sensitive_like(dbapi_connection, _): cursor.close() -def _contains(lhs: dict | list, rhs: dict | list): +def _contains(lhs: Union[dict, list], rhs: Union[dict, list]): if isinstance(lhs, dict) and isinstance(rhs, dict): for key in rhs: if key not in lhs or not _contains(lhs[key], rhs[key]): @@ -63,13 +63,15 @@ def _contains(lhs: dict | list, rhs: dict | list): return lhs == rhs -def _json_contains(json1_str: AnyStr, json2_str: AnyStr): +def _json_contains(lhs: Union[str, bytes, bytearray, dict, list], rhs: Union[str, bytes, bytearray, dict, list]): try: - json1 = json.loads(json1_str) - json2 = json.loads(json2_str) + if isinstance(lhs, (str, bytes, bytearray)): + lhs = json.loads(lhs) + if isinstance(rhs, (str, bytes, bytearray)): + rhs = json.loads(rhs) except json.JSONDecodeError: return 0 - return int(_contains(json1, json2)) + return int(_contains(lhs, rhs)) def register_json_contains(dbapi_connection, _): From 36c71028fa937edeb670eb45515082307a00afac Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 20 Nov 2024 00:12:13 +0000 Subject: [PATCH 11/23] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/aiida/storage/sqlite_zip/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aiida/storage/sqlite_zip/utils.py b/src/aiida/storage/sqlite_zip/utils.py index 304ef7fa08..e92f6dcf00 100644 --- a/src/aiida/storage/sqlite_zip/utils.py +++ b/src/aiida/storage/sqlite_zip/utils.py @@ -11,7 +11,7 @@ import json import zipfile from pathlib import Path -from typing import Any, AnyStr, Dict, Optional, Union +from typing import Any, Dict, Optional, Union from sqlalchemy import event from sqlalchemy.future.engine import Engine, create_engine From dcd3cf9736f1f81f9023e8ab583dae695ccc5e6d Mon Sep 17 00:00:00 2001 From: Karl Liu Date: Wed, 20 Nov 2024 03:45:32 +0100 Subject: [PATCH 12/23] add benchmark --- tests/benchmark/test_json_contains.py | 102 ++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 tests/benchmark/test_json_contains.py diff --git a/tests/benchmark/test_json_contains.py b/tests/benchmark/test_json_contains.py new file mode 100644 index 0000000000..e95bd68b76 --- /dev/null +++ b/tests/benchmark/test_json_contains.py @@ -0,0 +1,102 @@ +import functools + +import pytest +import random +import string + +from aiida import orm +from aiida.orm.querybuilder import QueryBuilder + +GROUP_NAME = 'json-contains' + + +COMPLEX_JSON_DEPTH_RANGE=[2**i for i in range(4)] +COMPLEX_JSON_BREADTH_RANGE=[2**i for i in range(4)] +LARGE_TABLE_SIZE_RANGE=[2**i for i in range(1, 11)] + + +def gen_json(depth: int, breadth: int): + def gen_str(n: int, with_digits: bool = True): + population = string.ascii_letters + if with_digits: population += string.digits + return ''.join(random.choices(population, k=n)) + + if depth == 0: # random primitive value + # real numbers are not included as their equivalence is tricky + return random.choice([ + random.randint(-114, 514), # integers + gen_str(6), # strings + random.choice([True, False]), # booleans + None, # nulls + ]) + + else: + gen_dict = random.choice([True, False]) + data = [gen_json(depth - 1, breadth) for _ in range(breadth)] + if gen_dict: + keys = set() + while len(keys) < breadth: + keys.add(gen_str(6, False)) + data = dict(zip(list(keys), data)) + return data + + +def extract_component(data, p: float = -1): + if random.random() < p: + return data + + if isinstance(data, dict) and data: + key = random.choice(list(data.keys())) + return {key: extract_component(data[key])} + elif isinstance(data, list) and data: + element = random.choice(data) + return [extract_component(element)] + else: + return data + + +@pytest.mark.benchmark(group=GROUP_NAME) +@pytest.mark.parametrize('depth', COMPLEX_JSON_DEPTH_RANGE) +@pytest.mark.parametrize('breadth', COMPLEX_JSON_BREADTH_RANGE) +@pytest.mark.usefixtures('aiida_profile_clean') +def test_complex_json(benchmark, depth, breadth): + lhs = gen_json(depth, breadth) + rhs = extract_component(lhs, p=1./depth) + assert 0 == len(QueryBuilder().append(orm.Dict).all()) + + orm.Dict({ + 'id': f'{depth}-{breadth}', + 'data': lhs, + }).store() + qb = QueryBuilder().append(orm.Dict, filters={ + 'attributes.data': {'contains': rhs}, + }, project=[ + 'attributes.id' + ]) + qb.all() + result = benchmark(qb.all) + assert len(result) == 1 + + +@pytest.mark.benchmark(group=GROUP_NAME) +@pytest.mark.parametrize('num_entries', LARGE_TABLE_SIZE_RANGE) +@pytest.mark.usefixtures('aiida_profile_clean') +def test_large_table(benchmark, num_entries): + data = gen_json(2, 10) + rhs = extract_component(data) + assert 0 == len(QueryBuilder().append(orm.Dict).all()) + + for i in range(num_entries): + orm.Dict({ + 'id': f'N={num_entries}, i={i}', + 'data': data, + }).store() + qb = QueryBuilder().append(orm.Dict, filters={ + 'attributes.data': {'contains': rhs}, + }, project=[ + 'attributes.id' + ]) + qb.all() + result = benchmark(qb.all) + assert len(result) == num_entries + From d293d188acb365a1e349487d1be7f2c7bf413b0b Mon Sep 17 00:00:00 2001 From: Karl Liu Date: Wed, 20 Nov 2024 03:46:19 +0100 Subject: [PATCH 13/23] ignore benchmark results --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index a4fdd01ebc..d5719a1208 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,6 @@ pplot_out/ # docker docker-bake.override.json + +# benchmark +.benchmarks/ \ No newline at end of file From 079cc32f5e28d801a5514d3f8aa6c985cc43660e Mon Sep 17 00:00:00 2001 From: Karl Liu Date: Wed, 20 Nov 2024 05:17:55 +0100 Subject: [PATCH 14/23] remove requires_psql marks for sqlite tests --- tests/storage/sqlite/test_orm.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/storage/sqlite/test_orm.py b/tests/storage/sqlite/test_orm.py index 7a73b30eb8..6fb9dc077a 100644 --- a/tests/storage/sqlite/test_orm.py +++ b/tests/storage/sqlite/test_orm.py @@ -163,7 +163,6 @@ class TestJsonFilters: ids=json.dumps, ) @pytest.mark.usefixtures('aiida_profile_clean') - @pytest.mark.requires_psql def test_json_filters_contains_arrays(self, data, filters, is_match): """Test QueryBuilder filter `contains` for JSON array fields""" profile = SqliteTempBackend.create_profile(debug=False) @@ -277,7 +276,6 @@ def test_json_filters_contains_arrays(self, data, filters, is_match): ids=json.dumps, ) @pytest.mark.usefixtures('aiida_profile_clean') - @pytest.mark.requires_psql def test_json_filters_contains_object(self, data, filters, is_match): """Test QueryBuilder filter `contains` for JSON object fields""" profile = SqliteTempBackend.create_profile(debug=False) From 598f821e00109b517fb99f4126af7f9bc0549fc2 Mon Sep 17 00:00:00 2001 From: Karl Liu Date: Thu, 21 Nov 2024 19:18:45 +0100 Subject: [PATCH 15/23] temp --- src/aiida/storage/sqlite_zip/utils.py | 96 ++++++++++++++++++++------- tests/benchmark/test_json_contains.py | 1 - 2 files changed, 73 insertions(+), 24 deletions(-) diff --git a/src/aiida/storage/sqlite_zip/utils.py b/src/aiida/storage/sqlite_zip/utils.py index 304ef7fa08..2027685fd7 100644 --- a/src/aiida/storage/sqlite_zip/utils.py +++ b/src/aiida/storage/sqlite_zip/utils.py @@ -11,7 +11,8 @@ import json import zipfile from pathlib import Path -from typing import Any, AnyStr, Dict, Optional, Union +from typing import Any, Dict, Optional, Union +from collections import deque from sqlalchemy import event from sqlalchemy.future.engine import Engine, create_engine @@ -48,30 +49,79 @@ def sqlite_case_sensitive_like(dbapi_connection, _): cursor.close() -def _contains(lhs: Union[dict, list], rhs: Union[dict, list]): - if isinstance(lhs, dict) and isinstance(rhs, dict): - for key in rhs: - if key not in lhs or not _contains(lhs[key], rhs[key]): - return False - return True - elif isinstance(lhs, list) and isinstance(rhs, list): - for item in rhs: - if not any(_contains(element, item) for element in lhs): - return False - return True - else: - return lhs == rhs - - -def _json_contains(lhs: Union[str, bytes, bytearray, dict, list], rhs: Union[str, bytes, bytearray, dict, list]): - try: - if isinstance(lhs, (str, bytes, bytearray)): +# def _contains(lhs: Union[dict, list], rhs: Union[dict, list]): +# if isinstance(lhs, dict) and isinstance(rhs, dict): +# for key in rhs: +# if key not in lhs or not _contains(lhs[key], rhs[key]): +# return False +# return True +# elif isinstance(lhs, list) and isinstance(rhs, list): +# for item in rhs: +# if not any(_contains(element, item) for element in lhs): +# return False +# return True +# else: +# return lhs == rhs + + +# def _json_contains(lhs: Union[str, bytes, bytearray, dict, list], rhs: Union[str, bytes, bytearray, dict, list]): +# try: +# if isinstance(lhs, (str, bytes, bytearray)): +# lhs = json.loads(lhs) +# if isinstance(rhs, (str, bytes, bytearray)): +# rhs = json.loads(rhs) +# except json.JSONDecodeError: +# return 0 +# return int(_contains(lhs, rhs)) + +def _json_contains(lhs: Union[str, bytes, bytearray, dict, list], + rhs: Union[str, bytes, bytearray, dict, list]) -> int: + if isinstance(lhs, (str, bytes, bytearray)): + try: lhs = json.loads(lhs) - if isinstance(rhs, (str, bytes, bytearray)): + except json.JSONDecodeError: + return 0 + if isinstance(rhs, (str, bytes, bytearray)): + try: rhs = json.loads(rhs) - except json.JSONDecodeError: - return 0 - return int(_contains(lhs, rhs)) + except json.JSONDecodeError: + return 0 + + stack = deque() + stack.append((lhs, rhs)) + while stack.count() > 0: + l, r = stack.popleft() + if isinstance(l, dict): + if not isinstance(r, dict): + return 0 + for key, value in r.items(): + if key not in l: return 0 + stack.append((l[key], value)) + elif isinstance(l, list): + if not isinstance(r, list): + return 0 + lp, lo = set(), [] + for e in l: + if isinstance(e, (dict, list)): + lo.append(e) + else: + lp.add(e) + rp, ro = set(), [] + for e in r: + if isinstance(e, (dict, list)): + ro.append(e) + else: + rp.add(e) + if not lp.issuperset(rp): + return 0 + for le in lo: + for re in ro: + stack.append((le, re)) + else: + return int(l == r) + return 1 + + def register_json_contains(dbapi_connection, _): diff --git a/tests/benchmark/test_json_contains.py b/tests/benchmark/test_json_contains.py index e95bd68b76..4d6f0e2002 100644 --- a/tests/benchmark/test_json_contains.py +++ b/tests/benchmark/test_json_contains.py @@ -99,4 +99,3 @@ def test_large_table(benchmark, num_entries): qb.all() result = benchmark(qb.all) assert len(result) == num_entries - From 93ad037cb06eb82af9517f7b777ba28395c28ee3 Mon Sep 17 00:00:00 2001 From: Karl Liu Date: Sat, 23 Nov 2024 23:31:12 +0100 Subject: [PATCH 16/23] add benchmark --- src/aiida/storage/sqlite_zip/utils.py | 94 +++++++-------------------- tests/benchmark/test_json_contains.py | 31 +++++++-- 2 files changed, 49 insertions(+), 76 deletions(-) diff --git a/src/aiida/storage/sqlite_zip/utils.py b/src/aiida/storage/sqlite_zip/utils.py index 2027685fd7..c2a2a0ace2 100644 --- a/src/aiida/storage/sqlite_zip/utils.py +++ b/src/aiida/storage/sqlite_zip/utils.py @@ -49,79 +49,31 @@ def sqlite_case_sensitive_like(dbapi_connection, _): cursor.close() -# def _contains(lhs: Union[dict, list], rhs: Union[dict, list]): -# if isinstance(lhs, dict) and isinstance(rhs, dict): -# for key in rhs: -# if key not in lhs or not _contains(lhs[key], rhs[key]): -# return False -# return True -# elif isinstance(lhs, list) and isinstance(rhs, list): -# for item in rhs: -# if not any(_contains(element, item) for element in lhs): -# return False -# return True -# else: -# return lhs == rhs - - -# def _json_contains(lhs: Union[str, bytes, bytearray, dict, list], rhs: Union[str, bytes, bytearray, dict, list]): -# try: -# if isinstance(lhs, (str, bytes, bytearray)): -# lhs = json.loads(lhs) -# if isinstance(rhs, (str, bytes, bytearray)): -# rhs = json.loads(rhs) -# except json.JSONDecodeError: -# return 0 -# return int(_contains(lhs, rhs)) - -def _json_contains(lhs: Union[str, bytes, bytearray, dict, list], - rhs: Union[str, bytes, bytearray, dict, list]) -> int: - if isinstance(lhs, (str, bytes, bytearray)): - try: - lhs = json.loads(lhs) - except json.JSONDecodeError: - return 0 - if isinstance(rhs, (str, bytes, bytearray)): - try: - rhs = json.loads(rhs) - except json.JSONDecodeError: - return 0 - - stack = deque() - stack.append((lhs, rhs)) - while stack.count() > 0: - l, r = stack.popleft() - if isinstance(l, dict): - if not isinstance(r, dict): - return 0 - for key, value in r.items(): - if key not in l: return 0 - stack.append((l[key], value)) - elif isinstance(l, list): - if not isinstance(r, list): - return 0 - lp, lo = set(), [] - for e in l: - if isinstance(e, (dict, list)): - lo.append(e) - else: - lp.add(e) - rp, ro = set(), [] - for e in r: - if isinstance(e, (dict, list)): - ro.append(e) - else: - rp.add(e) - if not lp.issuperset(rp): - return 0 - for le in lo: - for re in ro: - stack.append((le, re)) - else: - return int(l == r) - return 1 +def _contains(lhs: Union[dict, list], rhs: Union[dict, list]): + if isinstance(lhs, dict) and isinstance(rhs, dict): + for key in rhs: + if key not in lhs or not _contains(lhs[key], rhs[key]): + return False + return True + + elif isinstance(lhs, list) and isinstance(rhs, list): + for item in rhs: + if not any(_contains(e, item) for e in lhs): + return False + return True + else: + return lhs == rhs +def _json_contains(lhs: Union[str, bytes, bytearray, dict, list], rhs: Union[str, bytes, bytearray, dict, list]): + try: + if isinstance(lhs, (str, bytes, bytearray)): + lhs = json.loads(lhs) + if isinstance(rhs, (str, bytes, bytearray)): + rhs = json.loads(rhs) + except json.JSONDecodeError: + return 0 + return int(_contains(lhs, rhs)) def register_json_contains(dbapi_connection, _): diff --git a/tests/benchmark/test_json_contains.py b/tests/benchmark/test_json_contains.py index 4d6f0e2002..40afd515be 100644 --- a/tests/benchmark/test_json_contains.py +++ b/tests/benchmark/test_json_contains.py @@ -1,5 +1,3 @@ -import functools - import pytest import random import string @@ -56,10 +54,33 @@ def extract_component(data, p: float = -1): @pytest.mark.benchmark(group=GROUP_NAME) -@pytest.mark.parametrize('depth', COMPLEX_JSON_DEPTH_RANGE) -@pytest.mark.parametrize('breadth', COMPLEX_JSON_BREADTH_RANGE) +@pytest.mark.parametrize('depth', [1, 2, 4, 8]) +@pytest.mark.parametrize('breadth', [1, 2, 4]) +@pytest.mark.usefixtures('aiida_profile_clean') +def test_deep_json(benchmark, depth, breadth): + lhs = gen_json(depth, breadth) + rhs = extract_component(lhs, p=1./depth) + assert 0 == len(QueryBuilder().append(orm.Dict).all()) + + orm.Dict({ + 'id': f'{depth}-{breadth}', + 'data': lhs, + }).store() + qb = QueryBuilder().append(orm.Dict, filters={ + 'attributes.data': {'contains': rhs}, + }, project=[ + 'attributes.id' + ]) + qb.all() + result = benchmark(qb.all) + assert len(result) == 1 + + +@pytest.mark.benchmark(group=GROUP_NAME) +@pytest.mark.parametrize('depth', [2]) +@pytest.mark.parametrize('breadth', [1, 10, 100]) @pytest.mark.usefixtures('aiida_profile_clean') -def test_complex_json(benchmark, depth, breadth): +def test_wide_json(benchmark, depth, breadth): lhs = gen_json(depth, breadth) rhs = extract_component(lhs, p=1./depth) assert 0 == len(QueryBuilder().append(orm.Dict).all()) From 9293c67b98c5f95b3a603d98cd70eae52f51ffea Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 23 Nov 2024 22:32:22 +0000 Subject: [PATCH 17/23] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .gitignore | 2 +- tests/benchmark/test_json_contains.py | 95 ++++++++++++++++----------- 2 files changed, 56 insertions(+), 41 deletions(-) diff --git a/.gitignore b/.gitignore index d5719a1208..975a88cd25 100644 --- a/.gitignore +++ b/.gitignore @@ -45,4 +45,4 @@ pplot_out/ docker-bake.override.json # benchmark -.benchmarks/ \ No newline at end of file +.benchmarks/ diff --git a/tests/benchmark/test_json_contains.py b/tests/benchmark/test_json_contains.py index 40afd515be..87ecd3f3a2 100644 --- a/tests/benchmark/test_json_contains.py +++ b/tests/benchmark/test_json_contains.py @@ -1,32 +1,35 @@ -import pytest import random import string +import pytest from aiida import orm from aiida.orm.querybuilder import QueryBuilder GROUP_NAME = 'json-contains' -COMPLEX_JSON_DEPTH_RANGE=[2**i for i in range(4)] -COMPLEX_JSON_BREADTH_RANGE=[2**i for i in range(4)] -LARGE_TABLE_SIZE_RANGE=[2**i for i in range(1, 11)] +COMPLEX_JSON_DEPTH_RANGE = [2**i for i in range(4)] +COMPLEX_JSON_BREADTH_RANGE = [2**i for i in range(4)] +LARGE_TABLE_SIZE_RANGE = [2**i for i in range(1, 11)] def gen_json(depth: int, breadth: int): def gen_str(n: int, with_digits: bool = True): population = string.ascii_letters - if with_digits: population += string.digits + if with_digits: + population += string.digits return ''.join(random.choices(population, k=n)) if depth == 0: # random primitive value # real numbers are not included as their equivalence is tricky - return random.choice([ - random.randint(-114, 514), # integers - gen_str(6), # strings - random.choice([True, False]), # booleans - None, # nulls - ]) + return random.choice( + [ + random.randint(-114, 514), # integers + gen_str(6), # strings + random.choice([True, False]), # booleans + None, # nulls + ] + ) else: gen_dict = random.choice([True, False]) @@ -59,18 +62,22 @@ def extract_component(data, p: float = -1): @pytest.mark.usefixtures('aiida_profile_clean') def test_deep_json(benchmark, depth, breadth): lhs = gen_json(depth, breadth) - rhs = extract_component(lhs, p=1./depth) + rhs = extract_component(lhs, p=1.0 / depth) assert 0 == len(QueryBuilder().append(orm.Dict).all()) - orm.Dict({ - 'id': f'{depth}-{breadth}', - 'data': lhs, - }).store() - qb = QueryBuilder().append(orm.Dict, filters={ - 'attributes.data': {'contains': rhs}, - }, project=[ - 'attributes.id' - ]) + orm.Dict( + { + 'id': f'{depth}-{breadth}', + 'data': lhs, + } + ).store() + qb = QueryBuilder().append( + orm.Dict, + filters={ + 'attributes.data': {'contains': rhs}, + }, + project=['attributes.id'], + ) qb.all() result = benchmark(qb.all) assert len(result) == 1 @@ -82,18 +89,22 @@ def test_deep_json(benchmark, depth, breadth): @pytest.mark.usefixtures('aiida_profile_clean') def test_wide_json(benchmark, depth, breadth): lhs = gen_json(depth, breadth) - rhs = extract_component(lhs, p=1./depth) + rhs = extract_component(lhs, p=1.0 / depth) assert 0 == len(QueryBuilder().append(orm.Dict).all()) - orm.Dict({ - 'id': f'{depth}-{breadth}', - 'data': lhs, - }).store() - qb = QueryBuilder().append(orm.Dict, filters={ - 'attributes.data': {'contains': rhs}, - }, project=[ - 'attributes.id' - ]) + orm.Dict( + { + 'id': f'{depth}-{breadth}', + 'data': lhs, + } + ).store() + qb = QueryBuilder().append( + orm.Dict, + filters={ + 'attributes.data': {'contains': rhs}, + }, + project=['attributes.id'], + ) qb.all() result = benchmark(qb.all) assert len(result) == 1 @@ -108,15 +119,19 @@ def test_large_table(benchmark, num_entries): assert 0 == len(QueryBuilder().append(orm.Dict).all()) for i in range(num_entries): - orm.Dict({ - 'id': f'N={num_entries}, i={i}', - 'data': data, - }).store() - qb = QueryBuilder().append(orm.Dict, filters={ - 'attributes.data': {'contains': rhs}, - }, project=[ - 'attributes.id' - ]) + orm.Dict( + { + 'id': f'N={num_entries}, i={i}', + 'data': data, + } + ).store() + qb = QueryBuilder().append( + orm.Dict, + filters={ + 'attributes.data': {'contains': rhs}, + }, + project=['attributes.id'], + ) qb.all() result = benchmark(qb.all) assert len(result) == num_entries From 2189a81ba63ea39664891f5640041f8b63e7d563 Mon Sep 17 00:00:00 2001 From: Zisen Liu Date: Tue, 26 Nov 2024 11:55:03 +0100 Subject: [PATCH 18/23] migrate sqlite filter tests to orm --- .../storage/psql_dos/orm/querybuilder/main.py | 12 +- tests/orm/test_querybuilder.py | 258 ++++++++++++- tests/storage/sqlite/test_orm.py | 358 ------------------ 3 files changed, 247 insertions(+), 381 deletions(-) delete mode 100644 tests/storage/sqlite/test_orm.py diff --git a/src/aiida/storage/psql_dos/orm/querybuilder/main.py b/src/aiida/storage/psql_dos/orm/querybuilder/main.py index cf18134c0f..743b397760 100644 --- a/src/aiida/storage/psql_dos/orm/querybuilder/main.py +++ b/src/aiida/storage/psql_dos/orm/querybuilder/main.py @@ -625,7 +625,7 @@ def cast_according_to_type(path_in_json, value): elif isinstance(value, dict) or value is None: type_filter = jsonb_typeof(path_in_json) == 'object' casted_entity = path_in_json.astext.cast(JSONB) # BOOLEANS? - elif isinstance(value, dict): + elif isinstance(value, list): type_filter = jsonb_typeof(path_in_json) == 'array' casted_entity = path_in_json.astext.cast(JSONB) # BOOLEANS? elif isinstance(value, str): @@ -661,10 +661,16 @@ def cast_according_to_type(path_in_json, value): elif operator == 'of_type': # http://www.postgresql.org/docs/9.5/static/functions-json.html # Possible types are object, array, string, number, boolean, and null. - valid_types = ('object', 'array', 'string', 'number', 'boolean', 'null') + value_types = ('object', 'array', 'string', 'number', 'boolean') + null_types = ('null',) + valid_types = value_types + null_types if value not in valid_types: raise ValueError(f'value {value} for of_type is not among valid types\n{valid_types}') - expr = jsonb_typeof(database_entity) == value + if value in value_types: + expr = jsonb_typeof(database_entity) == value + elif value in null_types: + tp = jsonb_typeof(database_entity) + expr = or_(tp == 'null', tp.is_(None)) elif operator == 'like': type_filter, casted_entity = cast_according_to_type(database_entity, value) expr = case((type_filter, casted_entity.like(value)), else_=False) diff --git a/tests/orm/test_querybuilder.py b/tests/orm/test_querybuilder.py index 8797fe4e03..9cc780e4e9 100644 --- a/tests/orm/test_querybuilder.py +++ b/tests/orm/test_querybuilder.py @@ -1706,6 +1706,14 @@ def test_statistics_default_class(self, aiida_localhost): class TestJsonFilters: + @staticmethod + def assert_match(data, filters, is_match): + orm.Dict(data).store() + qb = orm.QueryBuilder().append(orm.Dict, filters=filters) + assert qb.count() in {0, 1} + found = qb.count() == 1 + assert found == is_match + @pytest.mark.parametrize( 'data,filters,is_match', ( @@ -1735,22 +1743,13 @@ class TestJsonFilters: ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': []}}, False), ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [114514]}}, True), ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1, 114514]}}, True), - # TODO: these pass, but why? are these behaviors expected? - # non-exist `attr_key`s - ({'foo': []}, {'attributes.arr': {'contains': []}}, False), - ({'foo': []}, {'attributes.arr': {'!contains': []}}, False), ), ids=json.dumps, ) @pytest.mark.usefixtures('aiida_profile_clean') - @pytest.mark.requires_psql def test_json_filters_contains_arrays(self, data, filters, is_match): """Test QueryBuilder filter `contains` for JSON array fields""" - orm.Dict(data).store() - qb = orm.QueryBuilder().append(orm.Dict, filters=filters) - assert qb.count() in {0, 1} - found = qb.count() == 1 - assert found == is_match + self.assert_match(data, filters, is_match) @pytest.mark.parametrize( 'data,filters,is_match', @@ -1801,6 +1800,50 @@ def test_json_filters_contains_arrays(self, data, filters, is_match): {'attributes.dict': {'contains': {}}}, True, ), + # nested dicts + ( + {'dict': {'k1': {'k2': {'kx': 1, 'k3': 'secret'}, 'kxx': None}, 'kxxx': 'vxxx'}}, + {'attributes.dict': {'contains': {'k1': {'k2': {'k3': 'secret'}}}}}, + True, + ), + ( + { + 'dict': { + 'k1': [ + 0, + 1, + { + 'k2': [ + '0', + { + 'kkk': 'vvv', + 'k3': 'secret', + }, + '2', + ] + }, + 3, + ], + 'kkk': 'vvv', + } + }, + { + 'attributes.dict': { + 'contains': { + 'k1': [ + { + 'k2': [ + { + 'k3': 'secret', + } + ] + } + ] + } + } + }, + True, + ), # doesn't contain non-exist entries ( { @@ -1847,19 +1890,194 @@ def test_json_filters_contains_arrays(self, data, filters, is_match): {'attributes.dict': {'!contains': {}}}, False, ), - # TODO: these pass, but why? are these behaviors expected? - # non-exist `attr_key`s - ({'map': {}}, {'attributes.dict': {'contains': {}}}, False), - ({'map': {}}, {'attributes.dict': {'!contains': {}}}, False), ), ids=json.dumps, ) @pytest.mark.usefixtures('aiida_profile_clean') - @pytest.mark.requires_psql def test_json_filters_contains_object(self, data, filters, is_match): """Test QueryBuilder filter `contains` for JSON object fields""" - orm.Dict(data).store() - qb = orm.QueryBuilder().append(orm.Dict, filters=filters) - assert qb.count() in {0, 1} - found = qb.count() == 1 - assert found == is_match + self.assert_match(data, filters, is_match) + + @pytest.mark.parametrize( + 'data,filters,is_match', + ( + ({'dict': {'k1': 1, 'k2': '2', 'k3': None}}, {'attributes.dict': {'has_key': 'k1'}}, True), + ({'dict': {'k1': 1, 'k2': '2', 'k3': None}}, {'attributes.dict': {'has_key': 'k2'}}, True), + ({'dict': {'k1': 1, 'k2': '2', 'k3': None}}, {'attributes.dict': {'has_key': 'k3'}}, True), + ({'dict': {'k1': 1, 'k2': '2', 'k3': None}}, {'attributes.dict': {'!has_key': 'k1'}}, False), + ({'dict': {'k1': 1, 'k2': '2', 'k3': None}}, {'attributes.dict': {'!has_key': 'k2'}}, False), + ({'dict': {'k1': 1, 'k2': '2', 'k3': None}}, {'attributes.dict': {'!has_key': 'k3'}}, False), + ({'dict': {'k1': 1, 'k2': '2', 'k3': None}}, {'attributes.dict': {'has_key': 'non-exist'}}, False), + ({'dict': {'k1': 1, 'k2': '2', 'k3': None}}, {'attributes.dict': {'!has_key': 'non-exist'}}, True), + ({'dict': 0xFA15ED1C7}, {'attributes.dict': {'has_key': 'dict'}}, False), + ({'dict': 0xFA15ED1C7}, {'attributes.dict': {'!has_key': 'dict'}}, True), + ), + ) + @pytest.mark.usefixtures('aiida_profile_clean') + def test_json_filters_has_key(self, data, filters, is_match): + self.assert_match(data, filters, is_match) + + @pytest.mark.parametrize( + 'filters,matches', + ( + # type match + ({'attributes.text': {'of_type': 'string'}}, 1), + ({'attributes.integer': {'of_type': 'number'}}, 1), + ({'attributes.float': {'of_type': 'number'}}, 1), + ({'attributes.true': {'of_type': 'boolean'}}, 1), + ({'attributes.false': {'of_type': 'boolean'}}, 1), + ({'attributes.null': {'of_type': 'null'}}, 2), + ({'attributes.list': {'of_type': 'array'}}, 1), + ({'attributes.dict': {'of_type': 'object'}}, 1), + # equality match + ({'attributes.text': {'==': 'abcXYZ'}}, 1), + ({'attributes.integer': {'==': 1}}, 1), + ({'attributes.float': {'==': 1.1}}, 1), + ({'attributes.true': {'==': True}}, 1), + ({'attributes.false': {'==': False}}, 1), + ({'attributes.list': {'==': [1, 2]}}, 1), + ({'attributes.list2': {'==': ['a', 'b']}}, 1), + ({'attributes.dict': {'==': {'key-1': 1, 'key-none': None}}}, 1), + # equality non-match + ({'attributes.text': {'==': 'lmn'}}, 0), + ({'attributes.integer': {'==': 2}}, 0), + ({'attributes.float': {'==': 2.2}}, 0), + ({'attributes.true': {'==': False}}, 0), + ({'attributes.false': {'==': True}}, 0), + ({'attributes.list': {'==': [1, 3]}}, 0), + # text regexes + ({'attributes.text': {'like': 'abcXYZ'}}, 1), + ({'attributes.text': {'like': 'abcxyz'}}, 0), + ({'attributes.text': {'ilike': 'abcxyz'}}, 1), + ({'attributes.text': {'like': 'abc%'}}, 1), + ({'attributes.text': {'like': 'abc_YZ'}}, 1), + ( + { + 'attributes.text2': { + 'like': 'abc\\_XYZ' # Literal match + } + }, + 1, + ), + ({'attributes.text2': {'like': 'abc_XYZ'}}, 2), + # integer comparisons + ({'attributes.float': {'<': 1}}, 0), + ({'attributes.float': {'<': 2}}, 1), + ({'attributes.float': {'>': 2}}, 0), + ({'attributes.float': {'>': 0}}, 1), + ({'attributes.integer': {'<': 1}}, 0), + ({'attributes.integer': {'<': 2}}, 1), + ({'attributes.integer': {'>': 2}}, 0), + ({'attributes.integer': {'>': 0}}, 1), + # float comparisons + ({'attributes.float': {'<': 0.99}}, 0), + ({'attributes.float': {'<': 2.01}}, 1), + ({'attributes.float': {'>': 2.01}}, 0), + ({'attributes.float': {'>': 0.01}}, 1), + ({'attributes.integer': {'<': 0.99}}, 0), + ({'attributes.integer': {'<': 2.01}}, 1), + ({'attributes.integer': {'>': 2.01}}, 0), + ({'attributes.integer': {'>': 0.01}}, 1), + # array operators + ({'attributes.list': {'of_length': 0}}, 0), + ({'attributes.list': {'of_length': 2}}, 1), + ({'attributes.list': {'longer': 3}}, 0), + ({'attributes.list': {'longer': 1}}, 1), + ({'attributes.list': {'shorter': 1}}, 0), + ({'attributes.list': {'shorter': 3}}, 1), + # in operator + ({'attributes.text': {'in': ['x', 'y', 'z']}}, 0), + ({'attributes.text': {'in': ['x', 'y', 'abcXYZ']}}, 1), + ({'attributes.integer': {'in': [5, 6, 7]}}, 0), + ({'attributes.integer': {'in': [1, 2, 3]}}, 1), + ), + ids=json.dumps, + ) + @pytest.mark.usefixtures('aiida_profile_clean') + def test_json_filters(self, filters, matches): + """Test QueryBuilder filtering for JSON fields.""" + orm.Dict( + { + 'text': 'abcXYZ', + 'text2': 'abc_XYZ', + 'integer': 1, + 'float': 1.1, + 'true': True, + 'false': False, + 'null': None, + 'list': [1, 2], + 'list2': ['a', 'b'], + 'dict': { + 'key-1': 1, + 'key-none': None, + }, + }, + ).store() + orm.Dict({'text2': 'abcxXYZ'}).store() + + qbuilder = orm.QueryBuilder() + qbuilder.append(orm.Dict, filters=filters) + assert qbuilder.count() == matches + + @pytest.mark.parametrize( + 'filters,matches', + ( + ({'label': {'like': 'abc_XYZ'}}, 2), + ({'label': {'like': 'abc\\_XYZ'}}, 1), + ({'label': {'like': 'abcxXYZ'}}, 1), + ({'label': {'like': 'abc%XYZ'}}, 2), + ), + ids=json.dumps, + ) + @pytest.mark.usefixtures('aiida_profile_clean') + def test_column_filters(self, filters, matches): + """Test querying directly those stored in the columns""" + dict1 = orm.Dict( + { + 'text2': 'abc_XYZ', + } + ).store() + dict2 = orm.Dict({'text2': 'abcxXYZ'}).store() + dict1.label = 'abc_XYZ' + dict2.label = 'abcxXYZ' + qbuilder = orm.QueryBuilder() + qbuilder.append(orm.Dict, filters=filters) + assert qbuilder.count() == matches + + @pytest.mark.parametrize( + 'key,cast_type', + ( + ('text', 't'), + ('integer', 'i'), + ('float', 'f'), + ), + ) + @pytest.mark.usefixtures('aiida_profile_clean') + def test_json_order_by(self, key, cast_type): + """Test QueryBuilder ordering by JSON field keys.""" + dict1 = orm.Dict( + { + 'text': 'b', + 'integer': 2, + 'float': 2.2, + } + ).store() + dict2 = orm.Dict( + { + 'text': 'a', + 'integer': 1, + 'float': 1.1, + } + ).store() + dict3 = orm.Dict( + { + 'text': 'c', + 'integer': 3, + 'float': 3.3, + } + ).store() + qbuilder = orm.QueryBuilder() + qbuilder.append(orm.Dict, tag='dict', project=['id']).order_by( + {'dict': {f'attributes.{key}': {'order': 'asc', 'cast': cast_type}}} + ) + assert qbuilder.all(flat=True) == [dict2.pk, dict1.pk, dict3.pk] diff --git a/tests/storage/sqlite/test_orm.py b/tests/storage/sqlite/test_orm.py deleted file mode 100644 index 6fb9dc077a..0000000000 --- a/tests/storage/sqlite/test_orm.py +++ /dev/null @@ -1,358 +0,0 @@ -########################################################################### -# Copyright (c), The AiiDA team. All rights reserved. # -# This file is part of the AiiDA code. # -# # -# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core # -# For further information on the license, see the LICENSE.txt file # -# For further information please visit http://www.aiida.net # -########################################################################### -"""Test for the ORM implementation.""" - -import json - -import pytest -from aiida.orm import Dict, QueryBuilder -from aiida.storage.sqlite_temp import SqliteTempBackend - - -@pytest.mark.parametrize( - 'filters,matches', - ( - # type match - ({'attributes.text': {'of_type': 'string'}}, 1), - ({'attributes.integer': {'of_type': 'number'}}, 1), - ({'attributes.float': {'of_type': 'number'}}, 1), - ({'attributes.true': {'of_type': 'boolean'}}, 1), - ({'attributes.false': {'of_type': 'boolean'}}, 1), - ({'attributes.null': {'of_type': 'null'}}, 3), - ({'attributes.list': {'of_type': 'array'}}, 1), - ({'attributes.dict': {'of_type': 'object'}}, 1), - # equality match - ({'attributes.text': {'==': 'abcXYZ'}}, 1), - ({'attributes.integer': {'==': 1}}, 1), - ({'attributes.float': {'==': 1.1}}, 1), - ({'attributes.true': {'==': True}}, 1), - ({'attributes.false': {'==': False}}, 1), - ({'attributes.list': {'==': [1, 2]}}, 1), - ({'attributes.list2': {'==': ['a', 'b']}}, 1), - ({'attributes.dict': {'==': {'key-1': 1, 'key-none': None}}}, 1), - # equality non-match - ({'attributes.text': {'==': 'lmn'}}, 0), - ({'attributes.integer': {'==': 2}}, 0), - ({'attributes.float': {'==': 2.2}}, 0), - ({'attributes.true': {'==': False}}, 0), - ({'attributes.false': {'==': True}}, 0), - ({'attributes.list': {'==': [1, 3]}}, 0), - # text regexes - ({'attributes.text': {'like': 'abcXYZ'}}, 1), - ({'attributes.text': {'like': 'abcxyz'}}, 0), - ({'attributes.text': {'ilike': 'abcxyz'}}, 1), - ({'attributes.text': {'like': 'abc%'}}, 1), - ({'attributes.text': {'like': 'abc_YZ'}}, 1), - ( - { - 'attributes.text2': { - 'like': 'abc\\_XYZ' # Literal match - } - }, - 1, - ), - ({'attributes.text2': {'like': 'abc_XYZ'}}, 2), - # integer comparisons - ({'attributes.float': {'<': 1}}, 0), - ({'attributes.float': {'<': 2}}, 1), - ({'attributes.float': {'>': 2}}, 0), - ({'attributes.float': {'>': 0}}, 1), - ({'attributes.integer': {'<': 1}}, 0), - ({'attributes.integer': {'<': 2}}, 1), - ({'attributes.integer': {'>': 2}}, 0), - ({'attributes.integer': {'>': 0}}, 1), - # float comparisons - ({'attributes.float': {'<': 0.99}}, 0), - ({'attributes.float': {'<': 2.01}}, 1), - ({'attributes.float': {'>': 2.01}}, 0), - ({'attributes.float': {'>': 0.01}}, 1), - ({'attributes.integer': {'<': 0.99}}, 0), - ({'attributes.integer': {'<': 2.01}}, 1), - ({'attributes.integer': {'>': 2.01}}, 0), - ({'attributes.integer': {'>': 0.01}}, 1), - # array operators - ({'attributes.list': {'of_length': 0}}, 0), - ({'attributes.list': {'of_length': 2}}, 1), - ({'attributes.list': {'longer': 3}}, 0), - ({'attributes.list': {'longer': 1}}, 1), - ({'attributes.list': {'shorter': 1}}, 0), - ({'attributes.list': {'shorter': 3}}, 1), - # in operator - ({'attributes.text': {'in': ['x', 'y', 'z']}}, 0), - ({'attributes.text': {'in': ['x', 'y', 'abcXYZ']}}, 1), - ({'attributes.integer': {'in': [5, 6, 7]}}, 0), - ({'attributes.integer': {'in': [1, 2, 3]}}, 1), - # object operators - ({'attributes.dict': {'has_key': 'non-exist'}}, 0), - ({'attributes.dict': {'!has_key': 'non-exist'}}, 3), - ({'attributes.dict': {'has_key': 'key-1'}}, 1), - ({'attributes.dict': {'has_key': 'key-none'}}, 1), - ({'attributes.dict': {'!has_key': 'key-none'}}, 2), - ), - ids=json.dumps, -) -def test_qb_json_filters(filters, matches): - """Test QueryBuilder filtering for JSON fields.""" - profile = SqliteTempBackend.create_profile(debug=False) - backend = SqliteTempBackend(profile) - Dict( - { - 'text': 'abcXYZ', - 'text2': 'abc_XYZ', - 'integer': 1, - 'float': 1.1, - 'true': True, - 'false': False, - 'null': None, - 'list': [1, 2], - 'list2': ['a', 'b'], - 'dict': { - 'key-1': 1, - 'key-none': None, - }, - }, - backend=backend, - ).store() - Dict({'text2': 'abcxXYZ'}, backend=backend).store() - - # a false dict, added to test `has_key`'s behavior when key is not of json type - Dict({'dict': 0xFA15ED1C7}, backend=backend).store() - - qbuilder = QueryBuilder(backend=backend) - qbuilder.append(Dict, filters=filters) - assert qbuilder.count() == matches - - -class TestJsonFilters: - @pytest.mark.parametrize( - 'data,filters,is_match', - ( - # contains different types of element - ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1]}}, True), - ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': ['2']}}, True), - ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [None]}}, True), - # contains multiple elements of various types - ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1, None]}}, True), - # contains non-exist elements - ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [114514]}}, False), - # contains empty set - ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': []}}, True), - ({'arr': []}, {'attributes.arr': {'contains': []}}, True), - # nested arrays - ({'arr': [[1, 0], [0, 2]]}, {'attributes.arr': {'contains': [[1, 0]]}}, True), - ({'arr': [[2, 3], [0, 1], []]}, {'attributes.arr': {'contains': [[1, 0]]}}, True), # order doesn't matter - ({'arr': [[2, 3], [1]]}, {'attributes.arr': {'contains': [[4]]}}, False), - # TODO: the test below is supposed to pass but currently doesn't - # ({'arr': [[2, 3], [1]]}, {'attributes.arr': {'contains': [[2]]}}, False), - # negations - ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1]}}, False), - ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': []}}, False), - ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [114514]}}, True), - ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1, 114514]}}, True), - # TODO: these pass, but why? are these behaviors expected? - # non-exist `attr_key`s - ({'foo': []}, {'attributes.arr': {'contains': []}}, False), - # ({'foo': []}, {'attributes.arr': {'!contains': []}}, False), - ), - ids=json.dumps, - ) - @pytest.mark.usefixtures('aiida_profile_clean') - def test_json_filters_contains_arrays(self, data, filters, is_match): - """Test QueryBuilder filter `contains` for JSON array fields""" - profile = SqliteTempBackend.create_profile(debug=False) - backend = SqliteTempBackend(profile) - Dict(data, backend=backend).store() - qb = QueryBuilder(backend=backend).append(Dict, filters=filters) - assert qb.count() in {0, 1} - found = qb.count() == 1 - assert found == is_match - - @pytest.mark.parametrize( - 'data,filters,is_match', - ( - # contains different types of values - ( - { - 'dict': { - 'k1': 1, - 'k2': '2', - 'k3': None, - } - }, - {'attributes.dict': {'contains': {'k1': 1}}}, - True, - ), - ( - { - 'dict': { - 'k1': 1, - 'k2': '2', - 'k3': None, - } - }, - {'attributes.dict': {'contains': {'k1': 1, 'k2': '2'}}}, - True, - ), - ( - { - 'dict': { - 'k1': 1, - 'k2': '2', - 'k3': None, - } - }, - {'attributes.dict': {'contains': {'k3': None}}}, - True, - ), - # contains empty set - ( - { - 'dict': { - 'k1': 1, - 'k2': '2', - 'k3': None, - } - }, - {'attributes.dict': {'contains': {}}}, - True, - ), - # doesn't contain non-exist entries - ( - { - 'dict': { - 'k1': 1, - 'k2': '2', - 'k3': None, - } - }, - {'attributes.dict': {'contains': {'k1': 1, 'k': 'v'}}}, - False, - ), - # negations - ( - { - 'dict': { - 'k1': 1, - 'k2': '2', - 'k3': None, - } - }, - {'attributes.dict': {'!contains': {'k1': 1}}}, - False, - ), - ( - { - 'dict': { - 'k1': 1, - 'k2': '2', - 'k3': None, - } - }, - {'attributes.dict': {'!contains': {'k1': 1, 'k': 'v'}}}, - True, - ), - ( - { - 'dict': { - 'k1': 1, - 'k2': '2', - 'k3': None, - } - }, - {'attributes.dict': {'!contains': {}}}, - False, - ), - # TODO: these pass, but why? are these behaviors expected? - # non-exist `attr_key`s - ({'map': {}}, {'attributes.dict': {'contains': {}}}, False), - # ({'map': {}}, {'attributes.dict': {'!contains': {}}}, False), - ), - ids=json.dumps, - ) - @pytest.mark.usefixtures('aiida_profile_clean') - def test_json_filters_contains_object(self, data, filters, is_match): - """Test QueryBuilder filter `contains` for JSON object fields""" - profile = SqliteTempBackend.create_profile(debug=False) - backend = SqliteTempBackend(profile) - Dict(data, backend=backend).store() - qb = QueryBuilder(backend=backend).append(Dict, filters=filters) - assert qb.count() in {0, 1} - found = qb.count() == 1 - assert found == is_match - - -@pytest.mark.parametrize( - 'filters,matches', - ( - ({'label': {'like': 'abc_XYZ'}}, 2), - ({'label': {'like': 'abc\\_XYZ'}}, 1), - ({'label': {'like': 'abcxXYZ'}}, 1), - ({'label': {'like': 'abc%XYZ'}}, 2), - ), - ids=json.dumps, -) -def test_qb_column_filters(filters, matches): - """Test querying directly those stored in the columns""" - profile = SqliteTempBackend.create_profile(debug=False) - backend = SqliteTempBackend(profile) - dict1 = Dict( - { - 'text2': 'abc_XYZ', - }, - backend=backend, - ).store() - dict2 = Dict({'text2': 'abcxXYZ'}, backend=backend).store() - dict1.label = 'abc_XYZ' - dict2.label = 'abcxXYZ' - qbuilder = QueryBuilder(backend=backend) - qbuilder.append(Dict, filters=filters) - assert qbuilder.count() == matches - - -@pytest.mark.parametrize( - 'key,cast_type', - ( - ('text', 't'), - ('integer', 'i'), - ('float', 'f'), - ), -) -def test_qb_json_order_by(key, cast_type): - """Test QueryBuilder ordering by JSON field keys.""" - profile = SqliteTempBackend.create_profile(debug=False) - backend = SqliteTempBackend(profile) - dict1 = Dict( - { - 'text': 'b', - 'integer': 2, - 'float': 2.2, - }, - backend=backend, - ).store() - dict2 = Dict( - { - 'text': 'a', - 'integer': 1, - 'float': 1.1, - }, - backend=backend, - ).store() - dict3 = Dict( - { - 'text': 'c', - 'integer': 3, - 'float': 3.3, - }, - backend=backend, - ).store() - qbuilder = QueryBuilder(backend=backend) - qbuilder.append(Dict, tag='dict', project=['id']).order_by( - {'dict': {f'attributes.{key}': {'order': 'asc', 'cast': cast_type}}} - ) - assert qbuilder.all(flat=True) == [dict2.pk, dict1.pk, dict3.pk] From ffa0b111a0b48dc4d2af2dab2371e0f7c20ef71c Mon Sep 17 00:00:00 2001 From: Zisen Liu Date: Tue, 26 Nov 2024 11:58:59 +0100 Subject: [PATCH 19/23] add comment on impl for psql of_type --- src/aiida/storage/psql_dos/orm/querybuilder/main.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/aiida/storage/psql_dos/orm/querybuilder/main.py b/src/aiida/storage/psql_dos/orm/querybuilder/main.py index 743b397760..490114f5f5 100644 --- a/src/aiida/storage/psql_dos/orm/querybuilder/main.py +++ b/src/aiida/storage/psql_dos/orm/querybuilder/main.py @@ -669,6 +669,9 @@ def cast_according_to_type(path_in_json, value): if value in value_types: expr = jsonb_typeof(database_entity) == value elif value in null_types: + # https://www.postgresql.org/docs/current/functions-json.html + # json_typeof('null'::json) → null + # json_typeof(NULL::json) IS NULL → t tp = jsonb_typeof(database_entity) expr = or_(tp == 'null', tp.is_(None)) elif operator == 'like': From c69948460c23a9adb287283331382a1c4b1beb9a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 5 Dec 2024 10:13:24 +0000 Subject: [PATCH 20/23] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/benchmark/test_json_contains.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/benchmark/test_json_contains.py b/tests/benchmark/test_json_contains.py index 87ecd3f3a2..3ec2393b17 100644 --- a/tests/benchmark/test_json_contains.py +++ b/tests/benchmark/test_json_contains.py @@ -2,6 +2,7 @@ import string import pytest + from aiida import orm from aiida.orm.querybuilder import QueryBuilder From af544e7389173018fd72280159bb89df66e24e73 Mon Sep 17 00:00:00 2001 From: Karl Liu Date: Tue, 10 Dec 2024 16:22:30 +0100 Subject: [PATCH 21/23] add tests for custom functions --- tests/storage/sqlite_zip/test_utils.py | 131 +++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 tests/storage/sqlite_zip/test_utils.py diff --git a/tests/storage/sqlite_zip/test_utils.py b/tests/storage/sqlite_zip/test_utils.py new file mode 100644 index 0000000000..5069b4612a --- /dev/null +++ b/tests/storage/sqlite_zip/test_utils.py @@ -0,0 +1,131 @@ +import json + +import pytest + +from aiida.storage.sqlite_zip.utils import _contains, _json_contains + + +class TestCustomFunction: + @pytest.mark.parametrize( + 'lhs,rhs,is_match', + ( + # contains different types of element + ([1, '2', None], [1], True), + ([1, '2', None], ['2'], True), + ([1, '2', None], [None], True), + # contains multiple elements of various types + ([1, '2', None], [1, None], True), + # contains non-exist elements + ([1, '2', None], [114514], False), + # contains empty set + ([1, '2', None], [], True), + ([], [], True), + # nested arrays + ([[1, 0], [0, 2]], [[1, 0]], True), + ([[2, 3], [0, 1], []], [[1, 0]], True), + ([[2, 3], [1]], [[4]], False), + ([[1, 0], [0, 2]], [[3]], False), + ([[1, 0], [0, 2]], [3], False), + ([[1, 0], [0, 2]], [[2]], True), + ([[1, 0], [0, 2]], [2], False), + ([[1, 0], [0, 2], 3], [[3]], False), + ([[1, 0], [0, 2], 3], [3], True), + # contains different types of values + ( + { + 'k1': 1, + 'k2': '2', + 'k3': None, + }, + {'k1': 1}, + True, + ), + ( + { + 'k1': 1, + 'k2': '2', + 'k3': None, + }, + {'k1': 1, 'k2': '2'}, + True, + ), + ( + { + 'k1': 1, + 'k2': '2', + 'k3': None, + }, + {'k3': None}, + True, + ), + # contains empty set + ( + { + 'k1': 1, + 'k2': '2', + 'k3': None, + }, + {}, + True, + ), + # nested dicts + ( + {'k1': {'k2': {'kx': 1, 'k3': 'secret'}, 'kxx': None}, 'kxxx': 'vxxx'}, + {'k1': {'k2': {'k3': 'secret'}}}, + True, + ), + ( + { + 'k1': [ + 0, + 1, + { + 'k2': [ + '0', + { + 'kkk': 'vvv', + 'k3': 'secret', + }, + '2', + ] + }, + 3, + ], + 'kkk': 'vvv', + }, + { + 'k1': [ + { + 'k2': [ + { + 'k3': 'secret', + } + ] + } + ] + }, + True, + ), + # doesn't contain non-exist entries + ( + { + 'k1': 1, + 'k2': '2', + 'k3': None, + }, + {'k1': 1, 'k': 'v'}, + False, + ), + ), + ids=json.dumps, + ) + @pytest.mark.usefixtures('aiida_profile_clean') + def test_json_contains(self, lhs, rhs, is_match): + """Test QueryBuilder filter `contains` for JSON array fields""" + lhs_json = json.dumps(lhs) + rhs_json = json.dumps(rhs) + assert is_match == _contains(lhs, rhs) + assert is_match == _json_contains(lhs, rhs) + assert is_match == _json_contains(lhs_json, rhs) + assert is_match == _json_contains(lhs, rhs_json) + assert is_match == _json_contains(lhs_json, rhs_json) From fbb7ee332fc921673f9c4dd89dd362babfa675a3 Mon Sep 17 00:00:00 2001 From: Karl Liu Date: Tue, 10 Dec 2024 17:03:08 +0100 Subject: [PATCH 22/23] enable sqlite database backend testing in github actions --- .github/workflows/test-install.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-install.yml b/.github/workflows/test-install.yml index 7a0076fbbd..b8cf7d8e97 100644 --- a/.github/workflows/test-install.yml +++ b/.github/workflows/test-install.yml @@ -163,6 +163,7 @@ jobs: fail-fast: false matrix: python-version: ['3.9', '3.10', '3.11', '3.12'] + database-backend: [psql, sqlite] services: postgres: @@ -208,4 +209,4 @@ jobs: env: AIIDA_TEST_PROFILE: test_aiida AIIDA_WARN_v3: 1 - run: pytest -n auto --db-backend psql tests -m 'not nightly' tests/ + run: pytest -n auto --db-backend ${{ matrix.database-backend }} tests -m 'not nightly' tests/ From 57beea7c2e0de6c6cdd012e7c82d809a72650a7b Mon Sep 17 00:00:00 2001 From: Karl Liu Date: Tue, 10 Dec 2024 17:19:38 +0100 Subject: [PATCH 23/23] add sqlite to coverage report workflow --- .github/workflows/ci-code.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-code.yml b/.github/workflows/ci-code.yml index 8346d9fdea..b08f6157b5 100644 --- a/.github/workflows/ci-code.yml +++ b/.github/workflows/ci-code.yml @@ -27,6 +27,7 @@ jobs: fail-fast: false matrix: python-version: ['3.9', '3.12'] + database-backend: [psql, sqlite] services: postgres: @@ -73,7 +74,7 @@ jobs: AIIDA_WARN_v3: 1 # Python 3.12 has a performance regression when running with code coverage # so run code coverage only for python 3.9. - run: uv run pytest -n auto --db-backend psql -m 'not nightly' tests/ ${{ matrix.python-version == '3.9' && '--cov aiida' || '' }} + run: uv run pytest -n auto --db-backend ${{ matrix.database-backend }} -m 'not nightly' tests/ ${{ matrix.python-version == '3.9' && '--cov aiida' || '' }} - name: Upload coverage report if: matrix.python-version == 3.9 && github.repository == 'aiidateam/aiida-core'