From a9165f9e08d331ccad8f02ee0b3d8f1ddfbc7012 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 6 Nov 2024 20:57:06 +0100
Subject: [PATCH 01/23] add tests to `contains` filter operator on PostgreSQL
 backend

---
 tests/orm/test_querybuilder.py | 112 +++++++++++++++++++++++++++++++++
 1 file changed, 112 insertions(+)

diff --git a/tests/orm/test_querybuilder.py b/tests/orm/test_querybuilder.py
index 862474bc76..c9e7c6a4d9 100644
--- a/tests/orm/test_querybuilder.py
+++ b/tests/orm/test_querybuilder.py
@@ -14,6 +14,7 @@
 from collections import defaultdict
 from datetime import date, datetime, timedelta
 from itertools import chain
+import json
 
 import pytest
 from aiida import orm, plugins
@@ -1703,3 +1704,114 @@ def test_statistics_default_class(self, aiida_localhost):
         # data are correct
         res = next(iter(qb.dict()[0].values()))
         assert res == expected_dict
+
+
+class TestJsonFilters:
+    @pytest.mark.parametrize(
+        'data,filters,is_match',
+        (
+            # contains different types of element
+            ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1]}}, True),
+            ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': ['2']}}, True),
+            ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [None]}}, True),
+
+            # contains multiple elements of various types
+            ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1, None]}}, True),
+
+            # contains non-exist elements
+            ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [114514]}}, False),
+
+            # contains empty set
+            ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': []}}, True),
+            ({'arr': []}, {'attributes.arr': {'contains': []}}, True),
+
+            # negations
+            ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1]}}, False),
+            ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': []}}, False),
+            ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [114514]}}, True),
+            ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1, 114514]}}, True),
+
+            # TODO: these pass, but why? are these behaviors expected?
+            # non-exist `attr_key`s
+            ({'foo': []}, {'attributes.arr': {'contains': []}}, False),
+            ({'foo': []}, {'attributes.arr': {'!contains': []}}, False),
+        ),
+        ids=json.dumps,
+    )
+    @pytest.mark.usefixtures('aiida_profile_clean')
+    @pytest.mark.requires_psql
+    def test_json_filters_contains_arrays(self, data, filters, is_match):
+        """Test QueryBuilder filter `contains` for JSON array fields"""
+        orm.Dict(data).store()
+        qb = orm.QueryBuilder().append(orm.Dict, filters=filters)
+        assert qb.count() in {0, 1}
+        found = (qb.count() == 1)
+        assert found == is_match
+
+    @pytest.mark.parametrize(
+        'data,filters,is_match',
+        (
+            # contains different types of values
+            ({'dict': {
+                'k1': 1,
+                'k2': '2',
+                'k3': None,
+            }}, {'attributes.dict': {'contains': {'k1': 1}}}, True),
+            ({'dict': {
+                'k1': 1,
+                'k2': '2',
+                'k3': None,
+            }}, {'attributes.dict': {'contains': {'k1': 1, 'k2': '2'}}}, True),
+            ({'dict': {
+                'k1': 1,
+                'k2': '2',
+                'k3': None,
+            }}, {'attributes.dict': {'contains': {'k3': None}}}, True),
+
+            # contains empty set
+            ({'dict': {
+                'k1': 1,
+                'k2': '2',
+                'k3': None,
+            }}, {'attributes.dict': {'contains': {}}}, True),
+
+            # doesn't contain non-exist entries
+            ({'dict': {
+                'k1': 1,
+                'k2': '2',
+                'k3': None,
+            }}, {'attributes.dict': {'contains': {'k1': 1, 'k': 'v'}}}, False),
+
+            # negations
+            ({'dict': {
+                'k1': 1,
+                'k2': '2',
+                'k3': None,
+            }}, {'attributes.dict': {'!contains': {'k1': 1}}}, False),
+            ({'dict': {
+                'k1': 1,
+                'k2': '2',
+                'k3': None,
+            }}, {'attributes.dict': {'!contains': {'k1': 1, 'k': 'v'}}}, True),
+            ({'dict': {
+                'k1': 1,
+                'k2': '2',
+                'k3': None,
+            }}, {'attributes.dict': {'!contains': {}}}, False),
+
+            # TODO: these pass, but why? are these behaviors expected?
+            # non-exist `attr_key`s
+            ({'map': {}}, {'attributes.dict': {'contains': {}}}, False),
+            ({'map': {}}, {'attributes.dict': {'!contains': {}}}, False),
+        ),
+        ids=json.dumps,
+    )
+    @pytest.mark.usefixtures('aiida_profile_clean')
+    @pytest.mark.requires_psql
+    def test_json_filters_contains_object(self, data, filters, is_match):
+        """Test QueryBuilder filter `contains` for JSON object fields"""
+        orm.Dict(data).store()
+        qb = orm.QueryBuilder().append(orm.Dict, filters=filters)
+        assert qb.count() in {0, 1}
+        found = (qb.count() == 1)
+        assert found == is_match

From ab88f00b2c7e3f1621ac17fc83796444b36edd47 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 19 Nov 2024 12:30:18 +0000
Subject: [PATCH 02/23] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/orm/test_querybuilder.py | 143 +++++++++++++++++++++------------
 1 file changed, 91 insertions(+), 52 deletions(-)

diff --git a/tests/orm/test_querybuilder.py b/tests/orm/test_querybuilder.py
index c9e7c6a4d9..069a893ebb 100644
--- a/tests/orm/test_querybuilder.py
+++ b/tests/orm/test_querybuilder.py
@@ -9,12 +9,12 @@
 """Tests for the QueryBuilder."""
 
 import copy
+import json
 import uuid
 import warnings
 from collections import defaultdict
 from datetime import date, datetime, timedelta
 from itertools import chain
-import json
 
 import pytest
 from aiida import orm, plugins
@@ -1714,23 +1714,18 @@ class TestJsonFilters:
             ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1]}}, True),
             ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': ['2']}}, True),
             ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [None]}}, True),
-
             # contains multiple elements of various types
             ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1, None]}}, True),
-
             # contains non-exist elements
             ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [114514]}}, False),
-
             # contains empty set
             ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': []}}, True),
             ({'arr': []}, {'attributes.arr': {'contains': []}}, True),
-
             # negations
             ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1]}}, False),
             ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': []}}, False),
             ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [114514]}}, True),
             ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1, 114514]}}, True),
-
             # TODO: these pass, but why? are these behaviors expected?
             # non-exist `attr_key`s
             ({'foo': []}, {'attributes.arr': {'contains': []}}, False),
@@ -1745,60 +1740,104 @@ def test_json_filters_contains_arrays(self, data, filters, is_match):
         orm.Dict(data).store()
         qb = orm.QueryBuilder().append(orm.Dict, filters=filters)
         assert qb.count() in {0, 1}
-        found = (qb.count() == 1)
+        found = qb.count() == 1
         assert found == is_match
 
     @pytest.mark.parametrize(
         'data,filters,is_match',
         (
             # contains different types of values
-            ({'dict': {
-                'k1': 1,
-                'k2': '2',
-                'k3': None,
-            }}, {'attributes.dict': {'contains': {'k1': 1}}}, True),
-            ({'dict': {
-                'k1': 1,
-                'k2': '2',
-                'k3': None,
-            }}, {'attributes.dict': {'contains': {'k1': 1, 'k2': '2'}}}, True),
-            ({'dict': {
-                'k1': 1,
-                'k2': '2',
-                'k3': None,
-            }}, {'attributes.dict': {'contains': {'k3': None}}}, True),
-
+            (
+                {
+                    'dict': {
+                        'k1': 1,
+                        'k2': '2',
+                        'k3': None,
+                    }
+                },
+                {'attributes.dict': {'contains': {'k1': 1}}},
+                True,
+            ),
+            (
+                {
+                    'dict': {
+                        'k1': 1,
+                        'k2': '2',
+                        'k3': None,
+                    }
+                },
+                {'attributes.dict': {'contains': {'k1': 1, 'k2': '2'}}},
+                True,
+            ),
+            (
+                {
+                    'dict': {
+                        'k1': 1,
+                        'k2': '2',
+                        'k3': None,
+                    }
+                },
+                {'attributes.dict': {'contains': {'k3': None}}},
+                True,
+            ),
             # contains empty set
-            ({'dict': {
-                'k1': 1,
-                'k2': '2',
-                'k3': None,
-            }}, {'attributes.dict': {'contains': {}}}, True),
-
+            (
+                {
+                    'dict': {
+                        'k1': 1,
+                        'k2': '2',
+                        'k3': None,
+                    }
+                },
+                {'attributes.dict': {'contains': {}}},
+                True,
+            ),
             # doesn't contain non-exist entries
-            ({'dict': {
-                'k1': 1,
-                'k2': '2',
-                'k3': None,
-            }}, {'attributes.dict': {'contains': {'k1': 1, 'k': 'v'}}}, False),
-
+            (
+                {
+                    'dict': {
+                        'k1': 1,
+                        'k2': '2',
+                        'k3': None,
+                    }
+                },
+                {'attributes.dict': {'contains': {'k1': 1, 'k': 'v'}}},
+                False,
+            ),
             # negations
-            ({'dict': {
-                'k1': 1,
-                'k2': '2',
-                'k3': None,
-            }}, {'attributes.dict': {'!contains': {'k1': 1}}}, False),
-            ({'dict': {
-                'k1': 1,
-                'k2': '2',
-                'k3': None,
-            }}, {'attributes.dict': {'!contains': {'k1': 1, 'k': 'v'}}}, True),
-            ({'dict': {
-                'k1': 1,
-                'k2': '2',
-                'k3': None,
-            }}, {'attributes.dict': {'!contains': {}}}, False),
-
+            (
+                {
+                    'dict': {
+                        'k1': 1,
+                        'k2': '2',
+                        'k3': None,
+                    }
+                },
+                {'attributes.dict': {'!contains': {'k1': 1}}},
+                False,
+            ),
+            (
+                {
+                    'dict': {
+                        'k1': 1,
+                        'k2': '2',
+                        'k3': None,
+                    }
+                },
+                {'attributes.dict': {'!contains': {'k1': 1, 'k': 'v'}}},
+                True,
+            ),
+            (
+                {
+                    'dict': {
+                        'k1': 1,
+                        'k2': '2',
+                        'k3': None,
+                    }
+                },
+                {'attributes.dict': {'!contains': {}}},
+                False,
+            ),
             # TODO: these pass, but why? are these behaviors expected?
             # non-exist `attr_key`s
             ({'map': {}}, {'attributes.dict': {'contains': {}}}, False),
@@ -1813,5 +1852,5 @@ def test_json_filters_contains_object(self, data, filters, is_match):
         orm.Dict(data).store()
         qb = orm.QueryBuilder().append(orm.Dict, filters=filters)
         assert qb.count() in {0, 1}
-        found = (qb.count() == 1)
+        found = qb.count() == 1
         assert found == is_match

From ec36aae73a8573d11a217ef343ae3dfe9bc19a98 Mon Sep 17 00:00:00 2001
From: Karl Liu <karl.liu.1024@gmail.com>
Date: Tue, 19 Nov 2024 14:36:05 +0100
Subject: [PATCH 03/23] temp

---
 src/aiida/storage/sqlite_zip/orm.py | 28 ++++++++++++++++++---
 tests/storage/sqlite/test_orm.py    | 38 +++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 4 deletions(-)

diff --git a/src/aiida/storage/sqlite_zip/orm.py b/src/aiida/storage/sqlite_zip/orm.py
index 0f51c12534..f4dd63e8fc 100644
--- a/src/aiida/storage/sqlite_zip/orm.py
+++ b/src/aiida/storage/sqlite_zip/orm.py
@@ -17,7 +17,7 @@
 from functools import singledispatch
 from typing import Any, List, Optional, Tuple, Union
 
-from sqlalchemy import JSON, case, func, select
+from sqlalchemy import JSON, case, func, select, true, not_
 from sqlalchemy.orm.util import AliasedClass
 from sqlalchemy.sql import ColumnElement
 
@@ -209,7 +209,7 @@ def _get_projectable_entity(
 
     @staticmethod
     def get_filter_expr_from_jsonb(
-        operator: str, value, attr_key: List[str], column=None, column_name=None, alias=None
+        operator: str, value, attr_key: List[str], column=None, column_name=None, alias=None, negation=None
     ):
         """Return a filter expression.
 
@@ -285,8 +285,28 @@ def _cast_json_type(comparator: JSON.Comparator, value: Any) -> Tuple[ColumnElem
             return case((type_filter, casted_entity.ilike(value, escape='\\')), else_=False)
 
         if operator == 'contains':
-            # to-do, see: https://github.com/sqlalchemy/sqlalchemy/discussions/7836
-            raise NotImplementedError('The operator `contains` is not implemented for SQLite-based storage plugins.')
+            if isinstance(value, list):
+                if not value or len(value) == 0:
+                    if len(attr_key) == 0:
+                        filter = true()
+                    else: 
+                        filter = SqliteQueryBuilder.get_filter_expr_from_jsonb(
+                                    'has_key', attr_key[-1], attr_key[:-1], column)
+                    if negation: filter = not_(filter) # negation should not work for this operation
+                    return filter
+
+                subq = select(database_entity) \
+                        .where(func.json_each(database_entity) \
+                                .table_valued('value', joins_implicitly=True) \
+                                .c.value.in_(value)) \
+                        .correlate_except()
+                subsubq = select(func.count()).select_from(subq).scalar_subquery()
+                return subsubq == len(value)
+
+            elif isinstance(value, dict):
+                raise NotImplementedError
+            else:
+                raise TypeError("contains filters can only have as a parameter a list (when matching against lists) or dictionaries (when matching against dictionaries)")
 
         if operator == 'has_key':
             return (
diff --git a/tests/storage/sqlite/test_orm.py b/tests/storage/sqlite/test_orm.py
index 0d859d6bac..ffdb24f272 100644
--- a/tests/storage/sqlite/test_orm.py
+++ b/tests/storage/sqlite/test_orm.py
@@ -129,6 +129,44 @@ def test_qb_json_filters(filters, matches):
     assert qbuilder.count() == matches
 
 
+@pytest.mark.parametrize(
+    'data,filters,is_match',
+    (
+        # contains different types of element
+        ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1]}}, True),
+        ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': ['2']}}, True),
+        ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [None]}}, True),
+
+        # contains multiple elements of various types
+        ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1, None]}}, True),
+
+        # contains non-exist elements
+        ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [114514]}}, False),
+
+        # contains empty set
+        ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': []}}, True),
+        ({'arr': []}, {'attributes.arr': {'contains': []}}, True),
+        ({'foo': []}, {'attributes.arr': {'contains': []}}, False),
+
+        # negations
+        ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1]}}, False),
+        ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': []}}, False),
+        ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [114514]}}, True),
+        ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1, 114514]}}, True),
+        ({'foo': [114, 514]}, {'attributes.arr': {'!contains': []}}, False),
+    ),
+    ids=json.dumps,
+)
+def test_qb_json_filters_contains_arrays(data, filters, is_match):
+    """Test QueryBuilder filter `contains` for JSON array fields"""
+    profile = SqliteTempBackend.create_profile(debug=False)
+    backend = SqliteTempBackend(profile)
+    Dict(data, backend=backend).store()
+    qb = QueryBuilder(backend=backend).append(Dict, filters=filters)
+    assert qb.count() in {0, 1}
+    found = (qb.count() == 1)
+    assert found == is_match
+
 @pytest.mark.parametrize(
     'filters,matches',
     (

From 474dd26727f865d2a999bfd04c5df644a614290a Mon Sep 17 00:00:00 2001
From: Karl Liu <karl.liu.1024@gmail.com>
Date: Tue, 19 Nov 2024 15:41:25 +0100
Subject: [PATCH 04/23] add tests for nested arrays

---
 tests/orm/test_querybuilder.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tests/orm/test_querybuilder.py b/tests/orm/test_querybuilder.py
index 0b2da78062..90419fb325 100644
--- a/tests/orm/test_querybuilder.py
+++ b/tests/orm/test_querybuilder.py
@@ -1720,6 +1720,15 @@ class TestJsonFilters:
             # contains empty set
             ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': []}}, True),
             ({'arr': []}, {'attributes.arr': {'contains': []}}, True),
+            
+            # nested arrays
+            ({'arr': [[1, 0], [0, 2]]}, {'attributes.arr': {'contains': [[1, 0]]}}, True),
+            ({'arr': [[2, 3], [0, 1], []]}, {'attributes.arr': {'contains': [[1, 0]]}}, True), # order doesn't matter
+            ({'arr': [[2, 3], [1]]}, {'attributes.arr': {'contains': [[4]]}}, False),
+
+            # TODO: the test below is supposed to pass but currently doesn't
+            # ({'arr': [[2, 3], [1]]}, {'attributes.arr': {'contains': [[2]]}}, False),
+
             # negations
             ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1]}}, False),
             ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': []}}, False),

From a759d43bbf73236bc9318da514ad37048a192bad Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 19 Nov 2024 14:41:47 +0000
Subject: [PATCH 05/23] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/orm/test_querybuilder.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tests/orm/test_querybuilder.py b/tests/orm/test_querybuilder.py
index 90419fb325..3be548e2ea 100644
--- a/tests/orm/test_querybuilder.py
+++ b/tests/orm/test_querybuilder.py
@@ -1720,15 +1720,12 @@ class TestJsonFilters:
             # contains empty set
             ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': []}}, True),
             ({'arr': []}, {'attributes.arr': {'contains': []}}, True),
-            
             # nested arrays
             ({'arr': [[1, 0], [0, 2]]}, {'attributes.arr': {'contains': [[1, 0]]}}, True),
-            ({'arr': [[2, 3], [0, 1], []]}, {'attributes.arr': {'contains': [[1, 0]]}}, True), # order doesn't matter
+            ({'arr': [[2, 3], [0, 1], []]}, {'attributes.arr': {'contains': [[1, 0]]}}, True),  # order doesn't matter
             ({'arr': [[2, 3], [1]]}, {'attributes.arr': {'contains': [[4]]}}, False),
-
             # TODO: the test below is supposed to pass but currently doesn't
             # ({'arr': [[2, 3], [1]]}, {'attributes.arr': {'contains': [[2]]}}, False),
-
             # negations
             ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1]}}, False),
             ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': []}}, False),

From 5b7b62b0704c0f3711cb569d75cb33b21f652c02 Mon Sep 17 00:00:00 2001
From: Karl Liu <karl.liu.1024@gmail.com>
Date: Tue, 19 Nov 2024 17:52:06 +0100
Subject: [PATCH 06/23] update

---
 tests/storage/sqlite/test_orm.py | 192 +++++++++++++++++++++++++------
 1 file changed, 157 insertions(+), 35 deletions(-)

diff --git a/tests/storage/sqlite/test_orm.py b/tests/storage/sqlite/test_orm.py
index ffdb24f272..2ca95a74d2 100644
--- a/tests/storage/sqlite/test_orm.py
+++ b/tests/storage/sqlite/test_orm.py
@@ -129,43 +129,165 @@ def test_qb_json_filters(filters, matches):
     assert qbuilder.count() == matches
 
 
-@pytest.mark.parametrize(
-    'data,filters,is_match',
-    (
-        # contains different types of element
-        ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1]}}, True),
-        ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': ['2']}}, True),
-        ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [None]}}, True),
-
-        # contains multiple elements of various types
-        ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1, None]}}, True),
-
-        # contains non-exist elements
-        ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [114514]}}, False),
+class TestJsonFilters:
+    @pytest.mark.parametrize(
+        'data,filters,is_match',
+        (
+            # contains different types of element
+            ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1]}}, True),
+            ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': ['2']}}, True),
+            ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [None]}}, True),
+            # contains multiple elements of various types
+            ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1, None]}}, True),
+            # contains non-exist elements
+            ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [114514]}}, False),
+            # contains empty set
+            ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': []}}, True),
+            ({'arr': []}, {'attributes.arr': {'contains': []}}, True),
+            # nested arrays
+            ({'arr': [[1, 0], [0, 2]]}, {'attributes.arr': {'contains': [[1, 0]]}}, True),
+            ({'arr': [[2, 3], [0, 1], []]}, {'attributes.arr': {'contains': [[1, 0]]}}, True),  # order doesn't matter
+            ({'arr': [[2, 3], [1]]}, {'attributes.arr': {'contains': [[4]]}}, False),
+            # TODO: the test below is supposed to pass but currently doesn't
+            # ({'arr': [[2, 3], [1]]}, {'attributes.arr': {'contains': [[2]]}}, False),
+            # negations
+            ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1]}}, False),
+            ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': []}}, False),
+            ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [114514]}}, True),
+            ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1, 114514]}}, True),
+            # TODO: these pass, but why? are these behaviors expected?
+            # non-exist `attr_key`s
+            ({'foo': []}, {'attributes.arr': {'contains': []}}, False),
+            ({'foo': []}, {'attributes.arr': {'!contains': []}}, False),
+        ),
+        ids=json.dumps,
+    )
+    @pytest.mark.usefixtures('aiida_profile_clean')
+    @pytest.mark.requires_psql
+    def test_json_filters_contains_arrays(self, data, filters, is_match):
+        """Test QueryBuilder filter `contains` for JSON array fields"""
+        profile = SqliteTempBackend.create_profile(debug=False)
+        backend = SqliteTempBackend(profile)
+        Dict(data, backend=backend).store()
+        qb = QueryBuilder(backend=backend).append(Dict, filters=filters)
+        assert qb.count() in {0, 1}
+        found = qb.count() == 1
+        assert found == is_match
 
-        # contains empty set
-        ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': []}}, True),
-        ({'arr': []}, {'attributes.arr': {'contains': []}}, True),
-        ({'foo': []}, {'attributes.arr': {'contains': []}}, False),
+    @pytest.mark.parametrize(
+        'data,filters,is_match',
+        (
+            # contains different types of values
+            (
+                {
+                    'dict': {
+                        'k1': 1,
+                        'k2': '2',
+                        'k3': None,
+                    }
+                },
+                {'attributes.dict': {'contains': {'k1': 1}}},
+                True,
+            ),
+            (
+                {
+                    'dict': {
+                        'k1': 1,
+                        'k2': '2',
+                        'k3': None,
+                    }
+                },
+                {'attributes.dict': {'contains': {'k1': 1, 'k2': '2'}}},
+                True,
+            ),
+            (
+                {
+                    'dict': {
+                        'k1': 1,
+                        'k2': '2',
+                        'k3': None,
+                    }
+                },
+                {'attributes.dict': {'contains': {'k3': None}}},
+                True,
+            ),
+            # contains empty set
+            (
+                {
+                    'dict': {
+                        'k1': 1,
+                        'k2': '2',
+                        'k3': None,
+                    }
+                },
+                {'attributes.dict': {'contains': {}}},
+                True,
+            ),
+            # doesn't contain non-exist entries
+            (
+                {
+                    'dict': {
+                        'k1': 1,
+                        'k2': '2',
+                        'k3': None,
+                    }
+                },
+                {'attributes.dict': {'contains': {'k1': 1, 'k': 'v'}}},
+                False,
+            ),
+            # negations
+            (
+                {
+                    'dict': {
+                        'k1': 1,
+                        'k2': '2',
+                        'k3': None,
+                    }
+                },
+                {'attributes.dict': {'!contains': {'k1': 1}}},
+                False,
+            ),
+            (
+                {
+                    'dict': {
+                        'k1': 1,
+                        'k2': '2',
+                        'k3': None,
+                    }
+                },
+                {'attributes.dict': {'!contains': {'k1': 1, 'k': 'v'}}},
+                True,
+            ),
+            (
+                {
+                    'dict': {
+                        'k1': 1,
+                        'k2': '2',
+                        'k3': None,
+                    }
+                },
+                {'attributes.dict': {'!contains': {}}},
+                False,
+            ),
+            # TODO: these pass, but why? are these behaviors expected?
+            # non-exist `attr_key`s
+            ({'map': {}}, {'attributes.dict': {'contains': {}}}, False),
+            ({'map': {}}, {'attributes.dict': {'!contains': {}}}, False),
+        ),
+        ids=json.dumps,
+    )
+    @pytest.mark.usefixtures('aiida_profile_clean')
+    @pytest.mark.requires_psql
+    def test_json_filters_contains_object(self, data, filters, is_match):
+        """Test QueryBuilder filter `contains` for JSON object fields"""
+        profile = SqliteTempBackend.create_profile(debug=False)
+        backend = SqliteTempBackend(profile)
+        Dict(data, backend=backend).store()
+        qb = QueryBuilder(backend=backend).append(Dict, filters=filters)
+        assert qb.count() in {0, 1}
+        found = qb.count() == 1
+        assert found == is_match
 
-        # negations
-        ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1]}}, False),
-        ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': []}}, False),
-        ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [114514]}}, True),
-        ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1, 114514]}}, True),
-        ({'foo': [114, 514]}, {'attributes.arr': {'!contains': []}}, False),
-    ),
-    ids=json.dumps,
-)
-def test_qb_json_filters_contains_arrays(data, filters, is_match):
-    """Test QueryBuilder filter `contains` for JSON array fields"""
-    profile = SqliteTempBackend.create_profile(debug=False)
-    backend = SqliteTempBackend(profile)
-    Dict(data, backend=backend).store()
-    qb = QueryBuilder(backend=backend).append(Dict, filters=filters)
-    assert qb.count() in {0, 1}
-    found = (qb.count() == 1)
-    assert found == is_match
 
 @pytest.mark.parametrize(
     'filters,matches',

From e23ec32a0a8c943ef17ef4dc1438da3f11c4459a Mon Sep 17 00:00:00 2001
From: Karl Liu <karl.liu.1024@gmail.com>
Date: Tue, 19 Nov 2024 18:10:16 +0100
Subject: [PATCH 07/23] custom function

---
 src/aiida/storage/sqlite_zip/orm.py   | 23 +---------------------
 src/aiida/storage/sqlite_zip/utils.py | 28 ++++++++++++++++++++++++++-
 tests/storage/sqlite/test_orm.py      |  4 ++--
 3 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/src/aiida/storage/sqlite_zip/orm.py b/src/aiida/storage/sqlite_zip/orm.py
index f4dd63e8fc..d0b04608d7 100644
--- a/src/aiida/storage/sqlite_zip/orm.py
+++ b/src/aiida/storage/sqlite_zip/orm.py
@@ -285,28 +285,7 @@ def _cast_json_type(comparator: JSON.Comparator, value: Any) -> Tuple[ColumnElem
             return case((type_filter, casted_entity.ilike(value, escape='\\')), else_=False)
 
         if operator == 'contains':
-            if isinstance(value, list):
-                if not value or len(value) == 0:
-                    if len(attr_key) == 0:
-                        filter = true()
-                    else: 
-                        filter = SqliteQueryBuilder.get_filter_expr_from_jsonb(
-                                    'has_key', attr_key[-1], attr_key[:-1], column)
-                    if negation: filter = not_(filter) # negation should not work for this operation
-                    return filter
-
-                subq = select(database_entity) \
-                        .where(func.json_each(database_entity) \
-                                .table_valued('value', joins_implicitly=True) \
-                                .c.value.in_(value)) \
-                        .correlate_except()
-                subsubq = select(func.count()).select_from(subq).scalar_subquery()
-                return subsubq == len(value)
-
-            elif isinstance(value, dict):
-                raise NotImplementedError
-            else:
-                raise TypeError("contains filters can only have as a parameter a list (when matching against lists) or dictionaries (when matching against dictionaries)")
+            return func.json_contains(database_entity, json.dumps(value))
 
         if operator == 'has_key':
             return (
diff --git a/src/aiida/storage/sqlite_zip/utils.py b/src/aiida/storage/sqlite_zip/utils.py
index 2438c18fcb..bbc35e8ae8 100644
--- a/src/aiida/storage/sqlite_zip/utils.py
+++ b/src/aiida/storage/sqlite_zip/utils.py
@@ -11,7 +11,7 @@
 import json
 import zipfile
 from pathlib import Path
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional, Union, AnyStr
 
 from sqlalchemy import event
 from sqlalchemy.future.engine import Engine, create_engine
@@ -47,6 +47,31 @@ def sqlite_case_sensitive_like(dbapi_connection, _):
     cursor.execute('PRAGMA case_sensitive_like=ON;')
     cursor.close()
 
+def _contains(lhs: dict | list, rhs: dict | list):
+    if isinstance(lhs, dict) and isinstance(rhs, dict):
+        for key in rhs:
+            if key not in lhs or not _contains(lhs[key], rhs[key]):
+                return False
+        return True
+    elif isinstance(lhs, list) and isinstance(rhs, list):
+        for item in rhs:
+            if not any(_contains(element, item) for element in lhs):
+                return False
+        return True
+    else:
+        return lhs == rhs
+
+def _json_contains(json1_str: AnyStr, json2_str: AnyStr):
+    try:
+        json1 = json.loads(json1_str)
+        json2 = json.loads(json2_str)
+    except json.JSONDecodeError:
+        return 0
+    return int(_contains(json1, json2))
+
+def register_json_contains(dbapi_connection, _):
+    dbapi_connection.create_function('json_contains', 2, _json_contains)
+
 
 def create_sqla_engine(path: Union[str, Path], *, enforce_foreign_keys: bool = True, **kwargs) -> Engine:
     """Create a new engine instance."""
@@ -54,6 +79,7 @@ def create_sqla_engine(path: Union[str, Path], *, enforce_foreign_keys: bool = T
     event.listen(engine, 'connect', sqlite_case_sensitive_like)
     if enforce_foreign_keys:
         event.listen(engine, 'connect', sqlite_enforce_foreign_keys)
+    event.listen(engine, 'connect', register_json_contains)
     return engine
 
 
diff --git a/tests/storage/sqlite/test_orm.py b/tests/storage/sqlite/test_orm.py
index 2ca95a74d2..7a73b30eb8 100644
--- a/tests/storage/sqlite/test_orm.py
+++ b/tests/storage/sqlite/test_orm.py
@@ -158,7 +158,7 @@ class TestJsonFilters:
             # TODO: these pass, but why? are these behaviors expected?
             # non-exist `attr_key`s
             ({'foo': []}, {'attributes.arr': {'contains': []}}, False),
-            ({'foo': []}, {'attributes.arr': {'!contains': []}}, False),
+            # ({'foo': []}, {'attributes.arr': {'!contains': []}}, False),
         ),
         ids=json.dumps,
     )
@@ -272,7 +272,7 @@ def test_json_filters_contains_arrays(self, data, filters, is_match):
             # TODO: these pass, but why? are these behaviors expected?
             # non-exist `attr_key`s
             ({'map': {}}, {'attributes.dict': {'contains': {}}}, False),
-            ({'map': {}}, {'attributes.dict': {'!contains': {}}}, False),
+            # ({'map': {}}, {'attributes.dict': {'!contains': {}}}, False),
         ),
         ids=json.dumps,
     )

From e530f033d178276c023a531847e80276727176a9 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 19 Nov 2024 17:35:33 +0000
Subject: [PATCH 08/23] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/aiida/storage/sqlite_zip/orm.py   | 2 +-
 src/aiida/storage/sqlite_zip/utils.py | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/aiida/storage/sqlite_zip/orm.py b/src/aiida/storage/sqlite_zip/orm.py
index d0b04608d7..81e3c8d540 100644
--- a/src/aiida/storage/sqlite_zip/orm.py
+++ b/src/aiida/storage/sqlite_zip/orm.py
@@ -17,7 +17,7 @@
 from functools import singledispatch
 from typing import Any, List, Optional, Tuple, Union
 
-from sqlalchemy import JSON, case, func, select, true, not_
+from sqlalchemy import JSON, case, func, select
 from sqlalchemy.orm.util import AliasedClass
 from sqlalchemy.sql import ColumnElement
 
diff --git a/src/aiida/storage/sqlite_zip/utils.py b/src/aiida/storage/sqlite_zip/utils.py
index bbc35e8ae8..e04650e717 100644
--- a/src/aiida/storage/sqlite_zip/utils.py
+++ b/src/aiida/storage/sqlite_zip/utils.py
@@ -11,7 +11,7 @@
 import json
 import zipfile
 from pathlib import Path
-from typing import Any, Dict, Optional, Union, AnyStr
+from typing import Any, AnyStr, Dict, Optional, Union
 
 from sqlalchemy import event
 from sqlalchemy.future.engine import Engine, create_engine
@@ -47,6 +47,7 @@ def sqlite_case_sensitive_like(dbapi_connection, _):
     cursor.execute('PRAGMA case_sensitive_like=ON;')
     cursor.close()
 
+
 def _contains(lhs: dict | list, rhs: dict | list):
     if isinstance(lhs, dict) and isinstance(rhs, dict):
         for key in rhs:
@@ -61,6 +62,7 @@ def _contains(lhs: dict | list, rhs: dict | list):
     else:
         return lhs == rhs
 
+
 def _json_contains(json1_str: AnyStr, json2_str: AnyStr):
     try:
         json1 = json.loads(json1_str)
@@ -69,6 +71,7 @@ def _json_contains(json1_str: AnyStr, json2_str: AnyStr):
         return 0
     return int(_contains(json1, json2))
 
+
 def register_json_contains(dbapi_connection, _):
     dbapi_connection.create_function('json_contains', 2, _json_contains)
 

From 6df03e32673643e4cba4142e847967d70c6c6246 Mon Sep 17 00:00:00 2001
From: Karl Liu <karl.liu.1024@gmail.com>
Date: Tue, 19 Nov 2024 18:36:08 +0100
Subject: [PATCH 09/23] cleanup

---
 src/aiida/storage/sqlite_zip/orm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/aiida/storage/sqlite_zip/orm.py b/src/aiida/storage/sqlite_zip/orm.py
index d0b04608d7..e0532bf04a 100644
--- a/src/aiida/storage/sqlite_zip/orm.py
+++ b/src/aiida/storage/sqlite_zip/orm.py
@@ -209,7 +209,7 @@ def _get_projectable_entity(
 
     @staticmethod
     def get_filter_expr_from_jsonb(
-        operator: str, value, attr_key: List[str], column=None, column_name=None, alias=None, negation=None
+        operator: str, value, attr_key: List[str], column=None, column_name=None, alias=None
     ):
         """Return a filter expression.
 

From f7874049d0c03f23599aa14e3d3c6e11e448f773 Mon Sep 17 00:00:00 2001
From: Karl Liu <karl.liu.1024@gmail.com>
Date: Wed, 20 Nov 2024 01:10:02 +0100
Subject: [PATCH 10/23] fix compilation error on py39

---
 src/aiida/storage/sqlite_zip/utils.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/aiida/storage/sqlite_zip/utils.py b/src/aiida/storage/sqlite_zip/utils.py
index e04650e717..304ef7fa08 100644
--- a/src/aiida/storage/sqlite_zip/utils.py
+++ b/src/aiida/storage/sqlite_zip/utils.py
@@ -48,7 +48,7 @@ def sqlite_case_sensitive_like(dbapi_connection, _):
     cursor.close()
 
 
-def _contains(lhs: dict | list, rhs: dict | list):
+def _contains(lhs: Union[dict, list], rhs: Union[dict, list]):
     if isinstance(lhs, dict) and isinstance(rhs, dict):
         for key in rhs:
             if key not in lhs or not _contains(lhs[key], rhs[key]):
@@ -63,13 +63,15 @@ def _contains(lhs: dict | list, rhs: dict | list):
         return lhs == rhs
 
 
-def _json_contains(json1_str: AnyStr, json2_str: AnyStr):
+def _json_contains(lhs: Union[str, bytes, bytearray, dict, list], rhs: Union[str, bytes, bytearray, dict, list]):
     try:
-        json1 = json.loads(json1_str)
-        json2 = json.loads(json2_str)
+        if isinstance(lhs, (str, bytes, bytearray)):
+            lhs = json.loads(lhs)
+        if isinstance(rhs, (str, bytes, bytearray)):
+            rhs = json.loads(rhs)
     except json.JSONDecodeError:
         return 0
-    return int(_contains(json1, json2))
+    return int(_contains(lhs, rhs))
 
 
 def register_json_contains(dbapi_connection, _):

From 36c71028fa937edeb670eb45515082307a00afac Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 20 Nov 2024 00:12:13 +0000
Subject: [PATCH 11/23] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/aiida/storage/sqlite_zip/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/aiida/storage/sqlite_zip/utils.py b/src/aiida/storage/sqlite_zip/utils.py
index 304ef7fa08..e92f6dcf00 100644
--- a/src/aiida/storage/sqlite_zip/utils.py
+++ b/src/aiida/storage/sqlite_zip/utils.py
@@ -11,7 +11,7 @@
 import json
 import zipfile
 from pathlib import Path
-from typing import Any, AnyStr, Dict, Optional, Union
+from typing import Any, Dict, Optional, Union
 
 from sqlalchemy import event
 from sqlalchemy.future.engine import Engine, create_engine

From dcd3cf9736f1f81f9023e8ab583dae695ccc5e6d Mon Sep 17 00:00:00 2001
From: Karl Liu <karl.liu.1024@gmail.com>
Date: Wed, 20 Nov 2024 03:45:32 +0100
Subject: [PATCH 12/23] add benchmark

---
 tests/benchmark/test_json_contains.py | 102 ++++++++++++++++++++++++++
 1 file changed, 102 insertions(+)
 create mode 100644 tests/benchmark/test_json_contains.py

diff --git a/tests/benchmark/test_json_contains.py b/tests/benchmark/test_json_contains.py
new file mode 100644
index 0000000000..e95bd68b76
--- /dev/null
+++ b/tests/benchmark/test_json_contains.py
@@ -0,0 +1,102 @@
+import functools
+
+import pytest
+import random
+import string
+
+from aiida import orm
+from aiida.orm.querybuilder import QueryBuilder
+
+GROUP_NAME = 'json-contains'
+
+
+COMPLEX_JSON_DEPTH_RANGE=[2**i for i in range(4)]
+COMPLEX_JSON_BREADTH_RANGE=[2**i for i in range(4)]
+LARGE_TABLE_SIZE_RANGE=[2**i for i in range(1, 11)]
+
+
+def gen_json(depth: int, breadth: int):
+    def gen_str(n: int, with_digits: bool = True):
+        population = string.ascii_letters
+        if with_digits: population += string.digits
+        return ''.join(random.choices(population, k=n))
+
+    if depth == 0:  # random primitive value
+        # real numbers are not included as their equivalence is tricky
+        return random.choice([
+            random.randint(-114, 514),  # integers
+            gen_str(6),  # strings
+            random.choice([True, False]),  # booleans
+            None,  # nulls
+        ])
+
+    else:
+        gen_dict = random.choice([True, False])
+        data = [gen_json(depth - 1, breadth) for _ in range(breadth)]
+        if gen_dict:
+            keys = set()
+            while len(keys) < breadth:
+                keys.add(gen_str(6, False))
+            data = dict(zip(list(keys), data))
+        return data
+
+
+def extract_component(data, p: float = -1):
+    if random.random() < p:
+        return data
+
+    if isinstance(data, dict) and data:
+        key = random.choice(list(data.keys()))
+        return {key: extract_component(data[key])}
+    elif isinstance(data, list) and data:
+        element = random.choice(data)
+        return [extract_component(element)]
+    else:
+        return data
+
+
+@pytest.mark.benchmark(group=GROUP_NAME)
+@pytest.mark.parametrize('depth', COMPLEX_JSON_DEPTH_RANGE)
+@pytest.mark.parametrize('breadth', COMPLEX_JSON_BREADTH_RANGE)
+@pytest.mark.usefixtures('aiida_profile_clean')
+def test_complex_json(benchmark, depth, breadth):
+    lhs = gen_json(depth, breadth)
+    rhs = extract_component(lhs, p=1./depth)
+    assert 0 == len(QueryBuilder().append(orm.Dict).all())
+
+    orm.Dict({
+        'id': f'{depth}-{breadth}',
+        'data': lhs,
+    }).store()
+    qb = QueryBuilder().append(orm.Dict, filters={
+        'attributes.data': {'contains': rhs},
+    }, project=[
+        'attributes.id'
+    ])
+    qb.all()
+    result = benchmark(qb.all)
+    assert len(result) == 1
+
+
+@pytest.mark.benchmark(group=GROUP_NAME)
+@pytest.mark.parametrize('num_entries', LARGE_TABLE_SIZE_RANGE)
+@pytest.mark.usefixtures('aiida_profile_clean')
+def test_large_table(benchmark, num_entries):
+    data = gen_json(2, 10)
+    rhs = extract_component(data)
+    assert 0 == len(QueryBuilder().append(orm.Dict).all())
+
+    for i in range(num_entries):
+        orm.Dict({
+            'id': f'N={num_entries}, i={i}',
+            'data': data,
+        }).store()
+    qb = QueryBuilder().append(orm.Dict, filters={
+        'attributes.data': {'contains': rhs},
+    }, project=[
+        'attributes.id'
+    ])
+    qb.all()
+    result = benchmark(qb.all)
+    assert len(result) == num_entries
+

From d293d188acb365a1e349487d1be7f2c7bf413b0b Mon Sep 17 00:00:00 2001
From: Karl Liu <karl.liu.1024@gmail.com>
Date: Wed, 20 Nov 2024 03:46:19 +0100
Subject: [PATCH 13/23] ignore benchmark results

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index a4fdd01ebc..d5719a1208 100644
--- a/.gitignore
+++ b/.gitignore
@@ -43,3 +43,6 @@ pplot_out/
 
 # docker
 docker-bake.override.json
+
+# benchmark
+.benchmarks/
\ No newline at end of file

From 079cc32f5e28d801a5514d3f8aa6c985cc43660e Mon Sep 17 00:00:00 2001
From: Karl Liu <karl.liu.1024@gmail.com>
Date: Wed, 20 Nov 2024 05:17:55 +0100
Subject: [PATCH 14/23] remove requires_psql marks for sqlite tests

---
 tests/storage/sqlite/test_orm.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/storage/sqlite/test_orm.py b/tests/storage/sqlite/test_orm.py
index 7a73b30eb8..6fb9dc077a 100644
--- a/tests/storage/sqlite/test_orm.py
+++ b/tests/storage/sqlite/test_orm.py
@@ -163,7 +163,6 @@ class TestJsonFilters:
         ids=json.dumps,
     )
     @pytest.mark.usefixtures('aiida_profile_clean')
-    @pytest.mark.requires_psql
     def test_json_filters_contains_arrays(self, data, filters, is_match):
         """Test QueryBuilder filter `contains` for JSON array fields"""
         profile = SqliteTempBackend.create_profile(debug=False)
@@ -277,7 +276,6 @@ def test_json_filters_contains_arrays(self, data, filters, is_match):
         ids=json.dumps,
     )
     @pytest.mark.usefixtures('aiida_profile_clean')
-    @pytest.mark.requires_psql
     def test_json_filters_contains_object(self, data, filters, is_match):
         """Test QueryBuilder filter `contains` for JSON object fields"""
         profile = SqliteTempBackend.create_profile(debug=False)

From 598f821e00109b517fb99f4126af7f9bc0549fc2 Mon Sep 17 00:00:00 2001
From: Karl Liu <karl.liu.1024@gmail.com>
Date: Thu, 21 Nov 2024 19:18:45 +0100
Subject: [PATCH 15/23] temp

---
 src/aiida/storage/sqlite_zip/utils.py | 96 ++++++++++++++++++++-------
 tests/benchmark/test_json_contains.py |  1 -
 2 files changed, 73 insertions(+), 24 deletions(-)

diff --git a/src/aiida/storage/sqlite_zip/utils.py b/src/aiida/storage/sqlite_zip/utils.py
index 304ef7fa08..2027685fd7 100644
--- a/src/aiida/storage/sqlite_zip/utils.py
+++ b/src/aiida/storage/sqlite_zip/utils.py
@@ -11,7 +11,8 @@
 import json
 import zipfile
 from pathlib import Path
-from typing import Any, AnyStr, Dict, Optional, Union
+from typing import Any, Dict, Optional, Union
+from collections import deque
 
 from sqlalchemy import event
 from sqlalchemy.future.engine import Engine, create_engine
@@ -48,30 +49,79 @@ def sqlite_case_sensitive_like(dbapi_connection, _):
     cursor.close()
 
 
-def _contains(lhs: Union[dict, list], rhs: Union[dict, list]):
-    if isinstance(lhs, dict) and isinstance(rhs, dict):
-        for key in rhs:
-            if key not in lhs or not _contains(lhs[key], rhs[key]):
-                return False
-        return True
-    elif isinstance(lhs, list) and isinstance(rhs, list):
-        for item in rhs:
-            if not any(_contains(element, item) for element in lhs):
-                return False
-        return True
-    else:
-        return lhs == rhs
-
-
-def _json_contains(lhs: Union[str, bytes, bytearray, dict, list], rhs: Union[str, bytes, bytearray, dict, list]):
-    try:
-        if isinstance(lhs, (str, bytes, bytearray)):
+# def _contains(lhs: Union[dict, list], rhs: Union[dict, list]):
+#     if isinstance(lhs, dict) and isinstance(rhs, dict):
+#         for key in rhs:
+#             if key not in lhs or not _contains(lhs[key], rhs[key]):
+#                 return False
+#         return True
+#     elif isinstance(lhs, list) and isinstance(rhs, list):
+#         for item in rhs:
+#             if not any(_contains(element, item) for element in lhs):
+#                 return False
+#         return True
+#     else:
+#         return lhs == rhs
+
+
+# def _json_contains(lhs: Union[str, bytes, bytearray, dict, list], rhs: Union[str, bytes, bytearray, dict, list]):
+#     try:
+#         if isinstance(lhs, (str, bytes, bytearray)):
+#             lhs = json.loads(lhs)
+#         if isinstance(rhs, (str, bytes, bytearray)):
+#             rhs = json.loads(rhs)
+#     except json.JSONDecodeError:
+#         return 0
+#     return int(_contains(lhs, rhs))
+
+def _json_contains(lhs: Union[str, bytes, bytearray, dict, list],
+                   rhs: Union[str, bytes, bytearray, dict, list]) -> int:
+    if isinstance(lhs, (str, bytes, bytearray)):
+        try:
             lhs = json.loads(lhs)
-        if isinstance(rhs, (str, bytes, bytearray)):
+        except json.JSONDecodeError:
+            return 0
+    if isinstance(rhs, (str, bytes, bytearray)):
+        try:
             rhs = json.loads(rhs)
-    except json.JSONDecodeError:
-        return 0
-    return int(_contains(lhs, rhs))
+        except json.JSONDecodeError:
+            return 0
+
+    stack = deque()
+    stack.append((lhs, rhs))
+    while stack.count() > 0:
+        l, r = stack.popleft()
+        if isinstance(l, dict):
+            if not isinstance(r, dict):
+                return 0
+            for key, value in r.items():
+                if key not in l: return 0
+                stack.append((l[key], value))
+        elif isinstance(l, list):
+            if not isinstance(r, list):
+                return 0
+            lp, lo = set(), []
+            for e in l:
+                if isinstance(e, (dict, list)):
+                    lo.append(e)
+                else:
+                    lp.add(e)
+            rp, ro = set(), []
+            for e in r:
+                if isinstance(e, (dict, list)):
+                    ro.append(e)
+                else:
+                    rp.add(e)
+            if not lp.issuperset(rp):
+                return 0
+            for le in lo:
+                for re in ro:
+                    stack.append((le, re))
+        else:
+            return int(l == r)
+    return 1
+
+
 
 
 def register_json_contains(dbapi_connection, _):
diff --git a/tests/benchmark/test_json_contains.py b/tests/benchmark/test_json_contains.py
index e95bd68b76..4d6f0e2002 100644
--- a/tests/benchmark/test_json_contains.py
+++ b/tests/benchmark/test_json_contains.py
@@ -99,4 +99,3 @@ def test_large_table(benchmark, num_entries):
     qb.all()
     result = benchmark(qb.all)
     assert len(result) == num_entries
-

From 93ad037cb06eb82af9517f7b777ba28395c28ee3 Mon Sep 17 00:00:00 2001
From: Karl Liu <karl.liu.1024@gmail.com>
Date: Sat, 23 Nov 2024 23:31:12 +0100
Subject: [PATCH 16/23] add benchmark

---
 src/aiida/storage/sqlite_zip/utils.py | 94 +++++++--------------------
 tests/benchmark/test_json_contains.py | 31 +++++++--
 2 files changed, 49 insertions(+), 76 deletions(-)

diff --git a/src/aiida/storage/sqlite_zip/utils.py b/src/aiida/storage/sqlite_zip/utils.py
index 2027685fd7..c2a2a0ace2 100644
--- a/src/aiida/storage/sqlite_zip/utils.py
+++ b/src/aiida/storage/sqlite_zip/utils.py
@@ -49,79 +49,31 @@ def sqlite_case_sensitive_like(dbapi_connection, _):
     cursor.close()
 
 
-# def _contains(lhs: Union[dict, list], rhs: Union[dict, list]):
-#     if isinstance(lhs, dict) and isinstance(rhs, dict):
-#         for key in rhs:
-#             if key not in lhs or not _contains(lhs[key], rhs[key]):
-#                 return False
-#         return True
-#     elif isinstance(lhs, list) and isinstance(rhs, list):
-#         for item in rhs:
-#             if not any(_contains(element, item) for element in lhs):
-#                 return False
-#         return True
-#     else:
-#         return lhs == rhs
-
-
-# def _json_contains(lhs: Union[str, bytes, bytearray, dict, list], rhs: Union[str, bytes, bytearray, dict, list]):
-#     try:
-#         if isinstance(lhs, (str, bytes, bytearray)):
-#             lhs = json.loads(lhs)
-#         if isinstance(rhs, (str, bytes, bytearray)):
-#             rhs = json.loads(rhs)
-#     except json.JSONDecodeError:
-#         return 0
-#     return int(_contains(lhs, rhs))
-
-def _json_contains(lhs: Union[str, bytes, bytearray, dict, list],
-                   rhs: Union[str, bytes, bytearray, dict, list]) -> int:
-    if isinstance(lhs, (str, bytes, bytearray)):
-        try:
-            lhs = json.loads(lhs)
-        except json.JSONDecodeError:
-            return 0
-    if isinstance(rhs, (str, bytes, bytearray)):
-        try:
-            rhs = json.loads(rhs)
-        except json.JSONDecodeError:
-            return 0
-
-    stack = deque()
-    stack.append((lhs, rhs))
-    while stack.count() > 0:
-        l, r = stack.popleft()
-        if isinstance(l, dict):
-            if not isinstance(r, dict):
-                return 0
-            for key, value in r.items():
-                if key not in l: return 0
-                stack.append((l[key], value))
-        elif isinstance(l, list):
-            if not isinstance(r, list):
-                return 0
-            lp, lo = set(), []
-            for e in l:
-                if isinstance(e, (dict, list)):
-                    lo.append(e)
-                else:
-                    lp.add(e)
-            rp, ro = set(), []
-            for e in r:
-                if isinstance(e, (dict, list)):
-                    ro.append(e)
-                else:
-                    rp.add(e)
-            if not lp.issuperset(rp):
-                return 0
-            for le in lo:
-                for re in ro:
-                    stack.append((le, re))
-        else:
-            return int(l == r)
-    return 1
+def _contains(lhs: Union[dict, list], rhs: Union[dict, list]):
+    if isinstance(lhs, dict) and isinstance(rhs, dict):
+        for key in rhs:
+            if key not in lhs or not _contains(lhs[key], rhs[key]):
+                return False
+        return True
+
+    elif isinstance(lhs, list) and isinstance(rhs, list):
+        for item in rhs:
+            if not any(_contains(e, item) for e in lhs):
+                return False
+        return True
+    else:
+        return lhs == rhs
 
 
+def _json_contains(lhs: Union[str, bytes, bytearray, dict, list], rhs: Union[str, bytes, bytearray, dict, list]):
+    try:
+        if isinstance(lhs, (str, bytes, bytearray)):
+            lhs = json.loads(lhs)
+        if isinstance(rhs, (str, bytes, bytearray)):
+            rhs = json.loads(rhs)
+    except json.JSONDecodeError:
+        return 0
+    return int(_contains(lhs, rhs))
 
 
 def register_json_contains(dbapi_connection, _):
diff --git a/tests/benchmark/test_json_contains.py b/tests/benchmark/test_json_contains.py
index 4d6f0e2002..40afd515be 100644
--- a/tests/benchmark/test_json_contains.py
+++ b/tests/benchmark/test_json_contains.py
@@ -1,5 +1,3 @@
-import functools
-
 import pytest
 import random
 import string
@@ -56,10 +54,33 @@ def extract_component(data, p: float = -1):
 
 
 @pytest.mark.benchmark(group=GROUP_NAME)
-@pytest.mark.parametrize('depth', COMPLEX_JSON_DEPTH_RANGE)
-@pytest.mark.parametrize('breadth', COMPLEX_JSON_BREADTH_RANGE)
+@pytest.mark.parametrize('depth', [1, 2, 4, 8])
+@pytest.mark.parametrize('breadth', [1, 2, 4])
+@pytest.mark.usefixtures('aiida_profile_clean')
+def test_deep_json(benchmark, depth, breadth):
+    lhs = gen_json(depth, breadth)
+    rhs = extract_component(lhs, p=1./depth)
+    assert 0 == len(QueryBuilder().append(orm.Dict).all())
+
+    orm.Dict({
+        'id': f'{depth}-{breadth}',
+        'data': lhs,
+    }).store()
+    qb = QueryBuilder().append(orm.Dict, filters={
+        'attributes.data': {'contains': rhs},
+    }, project=[
+        'attributes.id'
+    ])
+    qb.all()
+    result = benchmark(qb.all)
+    assert len(result) == 1
+
+
+@pytest.mark.benchmark(group=GROUP_NAME)
+@pytest.mark.parametrize('depth', [2])
+@pytest.mark.parametrize('breadth', [1, 10, 100])
 @pytest.mark.usefixtures('aiida_profile_clean')
-def test_complex_json(benchmark, depth, breadth):
+def test_wide_json(benchmark, depth, breadth):
     lhs = gen_json(depth, breadth)
     rhs = extract_component(lhs, p=1./depth)
     assert 0 == len(QueryBuilder().append(orm.Dict).all())

From 9293c67b98c5f95b3a603d98cd70eae52f51ffea Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 23 Nov 2024 22:32:22 +0000
Subject: [PATCH 17/23] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .gitignore                            |  2 +-
 tests/benchmark/test_json_contains.py | 95 ++++++++++++++++-----------
 2 files changed, 56 insertions(+), 41 deletions(-)

diff --git a/.gitignore b/.gitignore
index d5719a1208..975a88cd25 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,4 +45,4 @@ pplot_out/
 docker-bake.override.json
 
 # benchmark
-.benchmarks/
\ No newline at end of file
+.benchmarks/
diff --git a/tests/benchmark/test_json_contains.py b/tests/benchmark/test_json_contains.py
index 40afd515be..87ecd3f3a2 100644
--- a/tests/benchmark/test_json_contains.py
+++ b/tests/benchmark/test_json_contains.py
@@ -1,32 +1,35 @@
-import pytest
 import random
 import string
 
+import pytest
 from aiida import orm
 from aiida.orm.querybuilder import QueryBuilder
 
 GROUP_NAME = 'json-contains'
 
 
-COMPLEX_JSON_DEPTH_RANGE=[2**i for i in range(4)]
-COMPLEX_JSON_BREADTH_RANGE=[2**i for i in range(4)]
-LARGE_TABLE_SIZE_RANGE=[2**i for i in range(1, 11)]
+COMPLEX_JSON_DEPTH_RANGE = [2**i for i in range(4)]
+COMPLEX_JSON_BREADTH_RANGE = [2**i for i in range(4)]
+LARGE_TABLE_SIZE_RANGE = [2**i for i in range(1, 11)]
 
 
 def gen_json(depth: int, breadth: int):
     def gen_str(n: int, with_digits: bool = True):
         population = string.ascii_letters
-        if with_digits: population += string.digits
+        if with_digits:
+            population += string.digits
         return ''.join(random.choices(population, k=n))
 
     if depth == 0:  # random primitive value
         # real numbers are not included as their equivalence is tricky
-        return random.choice([
-            random.randint(-114, 514),  # integers
-            gen_str(6),  # strings
-            random.choice([True, False]),  # booleans
-            None,  # nulls
-        ])
+        return random.choice(
+            [
+                random.randint(-114, 514),  # integers
+                gen_str(6),  # strings
+                random.choice([True, False]),  # booleans
+                None,  # nulls
+            ]
+        )
 
     else:
         gen_dict = random.choice([True, False])
@@ -59,18 +62,22 @@ def extract_component(data, p: float = -1):
 @pytest.mark.usefixtures('aiida_profile_clean')
 def test_deep_json(benchmark, depth, breadth):
     lhs = gen_json(depth, breadth)
-    rhs = extract_component(lhs, p=1./depth)
+    rhs = extract_component(lhs, p=1.0 / depth)
     assert 0 == len(QueryBuilder().append(orm.Dict).all())
 
-    orm.Dict({
-        'id': f'{depth}-{breadth}',
-        'data': lhs,
-    }).store()
-    qb = QueryBuilder().append(orm.Dict, filters={
-        'attributes.data': {'contains': rhs},
-    }, project=[
-        'attributes.id'
-    ])
+    orm.Dict(
+        {
+            'id': f'{depth}-{breadth}',
+            'data': lhs,
+        }
+    ).store()
+    qb = QueryBuilder().append(
+        orm.Dict,
+        filters={
+            'attributes.data': {'contains': rhs},
+        },
+        project=['attributes.id'],
+    )
     qb.all()
     result = benchmark(qb.all)
     assert len(result) == 1
@@ -82,18 +89,22 @@ def test_deep_json(benchmark, depth, breadth):
 @pytest.mark.usefixtures('aiida_profile_clean')
 def test_wide_json(benchmark, depth, breadth):
     lhs = gen_json(depth, breadth)
-    rhs = extract_component(lhs, p=1./depth)
+    rhs = extract_component(lhs, p=1.0 / depth)
     assert 0 == len(QueryBuilder().append(orm.Dict).all())
 
-    orm.Dict({
-        'id': f'{depth}-{breadth}',
-        'data': lhs,
-    }).store()
-    qb = QueryBuilder().append(orm.Dict, filters={
-        'attributes.data': {'contains': rhs},
-    }, project=[
-        'attributes.id'
-    ])
+    orm.Dict(
+        {
+            'id': f'{depth}-{breadth}',
+            'data': lhs,
+        }
+    ).store()
+    qb = QueryBuilder().append(
+        orm.Dict,
+        filters={
+            'attributes.data': {'contains': rhs},
+        },
+        project=['attributes.id'],
+    )
     qb.all()
     result = benchmark(qb.all)
     assert len(result) == 1
@@ -108,15 +119,19 @@ def test_large_table(benchmark, num_entries):
     assert 0 == len(QueryBuilder().append(orm.Dict).all())
 
     for i in range(num_entries):
-        orm.Dict({
-            'id': f'N={num_entries}, i={i}',
-            'data': data,
-        }).store()
-    qb = QueryBuilder().append(orm.Dict, filters={
-        'attributes.data': {'contains': rhs},
-    }, project=[
-        'attributes.id'
-    ])
+        orm.Dict(
+            {
+                'id': f'N={num_entries}, i={i}',
+                'data': data,
+            }
+        ).store()
+    qb = QueryBuilder().append(
+        orm.Dict,
+        filters={
+            'attributes.data': {'contains': rhs},
+        },
+        project=['attributes.id'],
+    )
     qb.all()
     result = benchmark(qb.all)
     assert len(result) == num_entries

From 2189a81ba63ea39664891f5640041f8b63e7d563 Mon Sep 17 00:00:00 2001
From: Zisen Liu <karl.liu.1024@gmail.com>
Date: Tue, 26 Nov 2024 11:55:03 +0100
Subject: [PATCH 18/23] migrate sqlite filter tests to orm

---
 .../storage/psql_dos/orm/querybuilder/main.py |  12 +-
 tests/orm/test_querybuilder.py                | 258 ++++++++++++-
 tests/storage/sqlite/test_orm.py              | 358 ------------------
 3 files changed, 247 insertions(+), 381 deletions(-)
 delete mode 100644 tests/storage/sqlite/test_orm.py

diff --git a/src/aiida/storage/psql_dos/orm/querybuilder/main.py b/src/aiida/storage/psql_dos/orm/querybuilder/main.py
index cf18134c0f..743b397760 100644
--- a/src/aiida/storage/psql_dos/orm/querybuilder/main.py
+++ b/src/aiida/storage/psql_dos/orm/querybuilder/main.py
@@ -625,7 +625,7 @@ def cast_according_to_type(path_in_json, value):
             elif isinstance(value, dict) or value is None:
                 type_filter = jsonb_typeof(path_in_json) == 'object'
                 casted_entity = path_in_json.astext.cast(JSONB)  # BOOLEANS?
-            elif isinstance(value, dict):
+            elif isinstance(value, list):
                 type_filter = jsonb_typeof(path_in_json) == 'array'
                 casted_entity = path_in_json.astext.cast(JSONB)  # BOOLEANS?
             elif isinstance(value, str):
@@ -661,10 +661,16 @@ def cast_according_to_type(path_in_json, value):
         elif operator == 'of_type':
             # http://www.postgresql.org/docs/9.5/static/functions-json.html
             #  Possible types are object, array, string, number, boolean, and null.
-            valid_types = ('object', 'array', 'string', 'number', 'boolean', 'null')
+            value_types = ('object', 'array', 'string', 'number', 'boolean')
+            null_types = ('null',)
+            valid_types = value_types + null_types
             if value not in valid_types:
                 raise ValueError(f'value {value} for of_type is not among valid types\n{valid_types}')
-            expr = jsonb_typeof(database_entity) == value
+            if value in value_types:
+                expr = jsonb_typeof(database_entity) == value
+            elif value in null_types:
+                tp = jsonb_typeof(database_entity)
+                expr = or_(tp == 'null', tp.is_(None))
         elif operator == 'like':
             type_filter, casted_entity = cast_according_to_type(database_entity, value)
             expr = case((type_filter, casted_entity.like(value)), else_=False)
diff --git a/tests/orm/test_querybuilder.py b/tests/orm/test_querybuilder.py
index 8797fe4e03..9cc780e4e9 100644
--- a/tests/orm/test_querybuilder.py
+++ b/tests/orm/test_querybuilder.py
@@ -1706,6 +1706,14 @@ def test_statistics_default_class(self, aiida_localhost):
 
 
 class TestJsonFilters:
+    @staticmethod
+    def assert_match(data, filters, is_match):
+        orm.Dict(data).store()
+        qb = orm.QueryBuilder().append(orm.Dict, filters=filters)
+        assert qb.count() in {0, 1}
+        found = qb.count() == 1
+        assert found == is_match
+
     @pytest.mark.parametrize(
         'data,filters,is_match',
         (
@@ -1735,22 +1743,13 @@ class TestJsonFilters:
             ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': []}}, False),
             ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [114514]}}, True),
             ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1, 114514]}}, True),
-            # TODO: these pass, but why? are these behaviors expected?
-            # non-exist `attr_key`s
-            ({'foo': []}, {'attributes.arr': {'contains': []}}, False),
-            ({'foo': []}, {'attributes.arr': {'!contains': []}}, False),
         ),
         ids=json.dumps,
     )
     @pytest.mark.usefixtures('aiida_profile_clean')
-    @pytest.mark.requires_psql
     def test_json_filters_contains_arrays(self, data, filters, is_match):
         """Test QueryBuilder filter `contains` for JSON array fields"""
-        orm.Dict(data).store()
-        qb = orm.QueryBuilder().append(orm.Dict, filters=filters)
-        assert qb.count() in {0, 1}
-        found = qb.count() == 1
-        assert found == is_match
+        self.assert_match(data, filters, is_match)
 
     @pytest.mark.parametrize(
         'data,filters,is_match',
@@ -1801,6 +1800,50 @@ def test_json_filters_contains_arrays(self, data, filters, is_match):
                 {'attributes.dict': {'contains': {}}},
                 True,
             ),
+            # nested dicts
+            (
+                {'dict': {'k1': {'k2': {'kx': 1, 'k3': 'secret'}, 'kxx': None}, 'kxxx': 'vxxx'}},
+                {'attributes.dict': {'contains': {'k1': {'k2': {'k3': 'secret'}}}}},
+                True,
+            ),
+            (
+                {
+                    'dict': {
+                        'k1': [
+                            0,
+                            1,
+                            {
+                                'k2': [
+                                    '0',
+                                    {
+                                        'kkk': 'vvv',
+                                        'k3': 'secret',
+                                    },
+                                    '2',
+                                ]
+                            },
+                            3,
+                        ],
+                        'kkk': 'vvv',
+                    }
+                },
+                {
+                    'attributes.dict': {
+                        'contains': {
+                            'k1': [
+                                {
+                                    'k2': [
+                                        {
+                                            'k3': 'secret',
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    }
+                },
+                True,
+            ),
             # doesn't contain non-exist entries
             (
                 {
@@ -1847,19 +1890,194 @@ def test_json_filters_contains_arrays(self, data, filters, is_match):
                 {'attributes.dict': {'!contains': {}}},
                 False,
             ),
-            # TODO: these pass, but why? are these behaviors expected?
-            # non-exist `attr_key`s
-            ({'map': {}}, {'attributes.dict': {'contains': {}}}, False),
-            ({'map': {}}, {'attributes.dict': {'!contains': {}}}, False),
         ),
         ids=json.dumps,
     )
     @pytest.mark.usefixtures('aiida_profile_clean')
-    @pytest.mark.requires_psql
     def test_json_filters_contains_object(self, data, filters, is_match):
         """Test QueryBuilder filter `contains` for JSON object fields"""
-        orm.Dict(data).store()
-        qb = orm.QueryBuilder().append(orm.Dict, filters=filters)
-        assert qb.count() in {0, 1}
-        found = qb.count() == 1
-        assert found == is_match
+        self.assert_match(data, filters, is_match)
+
+    @pytest.mark.parametrize(
+        'data,filters,is_match',
+        (
+            ({'dict': {'k1': 1, 'k2': '2', 'k3': None}}, {'attributes.dict': {'has_key': 'k1'}}, True),
+            ({'dict': {'k1': 1, 'k2': '2', 'k3': None}}, {'attributes.dict': {'has_key': 'k2'}}, True),
+            ({'dict': {'k1': 1, 'k2': '2', 'k3': None}}, {'attributes.dict': {'has_key': 'k3'}}, True),
+            ({'dict': {'k1': 1, 'k2': '2', 'k3': None}}, {'attributes.dict': {'!has_key': 'k1'}}, False),
+            ({'dict': {'k1': 1, 'k2': '2', 'k3': None}}, {'attributes.dict': {'!has_key': 'k2'}}, False),
+            ({'dict': {'k1': 1, 'k2': '2', 'k3': None}}, {'attributes.dict': {'!has_key': 'k3'}}, False),
+            ({'dict': {'k1': 1, 'k2': '2', 'k3': None}}, {'attributes.dict': {'has_key': 'non-exist'}}, False),
+            ({'dict': {'k1': 1, 'k2': '2', 'k3': None}}, {'attributes.dict': {'!has_key': 'non-exist'}}, True),
+            ({'dict': 0xFA15ED1C7}, {'attributes.dict': {'has_key': 'dict'}}, False),
+            ({'dict': 0xFA15ED1C7}, {'attributes.dict': {'!has_key': 'dict'}}, True),
+        ),
+    )
+    @pytest.mark.usefixtures('aiida_profile_clean')
+    def test_json_filters_has_key(self, data, filters, is_match):
+        self.assert_match(data, filters, is_match)
+
+    @pytest.mark.parametrize(
+        'filters,matches',
+        (
+            # type match
+            ({'attributes.text': {'of_type': 'string'}}, 1),
+            ({'attributes.integer': {'of_type': 'number'}}, 1),
+            ({'attributes.float': {'of_type': 'number'}}, 1),
+            ({'attributes.true': {'of_type': 'boolean'}}, 1),
+            ({'attributes.false': {'of_type': 'boolean'}}, 1),
+            ({'attributes.null': {'of_type': 'null'}}, 2),
+            ({'attributes.list': {'of_type': 'array'}}, 1),
+            ({'attributes.dict': {'of_type': 'object'}}, 1),
+            # equality match
+            ({'attributes.text': {'==': 'abcXYZ'}}, 1),
+            ({'attributes.integer': {'==': 1}}, 1),
+            ({'attributes.float': {'==': 1.1}}, 1),
+            ({'attributes.true': {'==': True}}, 1),
+            ({'attributes.false': {'==': False}}, 1),
+            ({'attributes.list': {'==': [1, 2]}}, 1),
+            ({'attributes.list2': {'==': ['a', 'b']}}, 1),
+            ({'attributes.dict': {'==': {'key-1': 1, 'key-none': None}}}, 1),
+            # equality non-match
+            ({'attributes.text': {'==': 'lmn'}}, 0),
+            ({'attributes.integer': {'==': 2}}, 0),
+            ({'attributes.float': {'==': 2.2}}, 0),
+            ({'attributes.true': {'==': False}}, 0),
+            ({'attributes.false': {'==': True}}, 0),
+            ({'attributes.list': {'==': [1, 3]}}, 0),
+            # text regexes
+            ({'attributes.text': {'like': 'abcXYZ'}}, 1),
+            ({'attributes.text': {'like': 'abcxyz'}}, 0),
+            ({'attributes.text': {'ilike': 'abcxyz'}}, 1),
+            ({'attributes.text': {'like': 'abc%'}}, 1),
+            ({'attributes.text': {'like': 'abc_YZ'}}, 1),
+            (
+                {
+                    'attributes.text2': {
+                        'like': 'abc\\_XYZ'  # Literal match
+                    }
+                },
+                1,
+            ),
+            ({'attributes.text2': {'like': 'abc_XYZ'}}, 2),
+            # integer comparisons
+            ({'attributes.float': {'<': 1}}, 0),
+            ({'attributes.float': {'<': 2}}, 1),
+            ({'attributes.float': {'>': 2}}, 0),
+            ({'attributes.float': {'>': 0}}, 1),
+            ({'attributes.integer': {'<': 1}}, 0),
+            ({'attributes.integer': {'<': 2}}, 1),
+            ({'attributes.integer': {'>': 2}}, 0),
+            ({'attributes.integer': {'>': 0}}, 1),
+            # float comparisons
+            ({'attributes.float': {'<': 0.99}}, 0),
+            ({'attributes.float': {'<': 2.01}}, 1),
+            ({'attributes.float': {'>': 2.01}}, 0),
+            ({'attributes.float': {'>': 0.01}}, 1),
+            ({'attributes.integer': {'<': 0.99}}, 0),
+            ({'attributes.integer': {'<': 2.01}}, 1),
+            ({'attributes.integer': {'>': 2.01}}, 0),
+            ({'attributes.integer': {'>': 0.01}}, 1),
+            # array operators
+            ({'attributes.list': {'of_length': 0}}, 0),
+            ({'attributes.list': {'of_length': 2}}, 1),
+            ({'attributes.list': {'longer': 3}}, 0),
+            ({'attributes.list': {'longer': 1}}, 1),
+            ({'attributes.list': {'shorter': 1}}, 0),
+            ({'attributes.list': {'shorter': 3}}, 1),
+            # in operator
+            ({'attributes.text': {'in': ['x', 'y', 'z']}}, 0),
+            ({'attributes.text': {'in': ['x', 'y', 'abcXYZ']}}, 1),
+            ({'attributes.integer': {'in': [5, 6, 7]}}, 0),
+            ({'attributes.integer': {'in': [1, 2, 3]}}, 1),
+        ),
+        ids=json.dumps,
+    )
+    @pytest.mark.usefixtures('aiida_profile_clean')
+    def test_json_filters(self, filters, matches):
+        """Test QueryBuilder filtering for JSON fields."""
+        orm.Dict(
+            {
+                'text': 'abcXYZ',
+                'text2': 'abc_XYZ',
+                'integer': 1,
+                'float': 1.1,
+                'true': True,
+                'false': False,
+                'null': None,
+                'list': [1, 2],
+                'list2': ['a', 'b'],
+                'dict': {
+                    'key-1': 1,
+                    'key-none': None,
+                },
+            },
+        ).store()
+        orm.Dict({'text2': 'abcxXYZ'}).store()
+
+        qbuilder = orm.QueryBuilder()
+        qbuilder.append(orm.Dict, filters=filters)
+        assert qbuilder.count() == matches
+
+    @pytest.mark.parametrize(
+        'filters,matches',
+        (
+            ({'label': {'like': 'abc_XYZ'}}, 2),
+            ({'label': {'like': 'abc\\_XYZ'}}, 1),
+            ({'label': {'like': 'abcxXYZ'}}, 1),
+            ({'label': {'like': 'abc%XYZ'}}, 2),
+        ),
+        ids=json.dumps,
+    )
+    @pytest.mark.usefixtures('aiida_profile_clean')
+    def test_column_filters(self, filters, matches):
+        """Test querying directly those stored in the columns"""
+        dict1 = orm.Dict(
+            {
+                'text2': 'abc_XYZ',
+            }
+        ).store()
+        dict2 = orm.Dict({'text2': 'abcxXYZ'}).store()
+        dict1.label = 'abc_XYZ'
+        dict2.label = 'abcxXYZ'
+        qbuilder = orm.QueryBuilder()
+        qbuilder.append(orm.Dict, filters=filters)
+        assert qbuilder.count() == matches
+
+    @pytest.mark.parametrize(
+        'key,cast_type',
+        (
+            ('text', 't'),
+            ('integer', 'i'),
+            ('float', 'f'),
+        ),
+    )
+    @pytest.mark.usefixtures('aiida_profile_clean')
+    def test_json_order_by(self, key, cast_type):
+        """Test QueryBuilder ordering by JSON field keys."""
+        dict1 = orm.Dict(
+            {
+                'text': 'b',
+                'integer': 2,
+                'float': 2.2,
+            }
+        ).store()
+        dict2 = orm.Dict(
+            {
+                'text': 'a',
+                'integer': 1,
+                'float': 1.1,
+            }
+        ).store()
+        dict3 = orm.Dict(
+            {
+                'text': 'c',
+                'integer': 3,
+                'float': 3.3,
+            }
+        ).store()
+        qbuilder = orm.QueryBuilder()
+        qbuilder.append(orm.Dict, tag='dict', project=['id']).order_by(
+            {'dict': {f'attributes.{key}': {'order': 'asc', 'cast': cast_type}}}
+        )
+        assert qbuilder.all(flat=True) == [dict2.pk, dict1.pk, dict3.pk]
diff --git a/tests/storage/sqlite/test_orm.py b/tests/storage/sqlite/test_orm.py
deleted file mode 100644
index 6fb9dc077a..0000000000
--- a/tests/storage/sqlite/test_orm.py
+++ /dev/null
@@ -1,358 +0,0 @@
-###########################################################################
-# Copyright (c), The AiiDA team. All rights reserved.                     #
-# This file is part of the AiiDA code.                                    #
-#                                                                         #
-# The code is hosted on GitHub at https://github.com/aiidateam/aiida-core #
-# For further information on the license, see the LICENSE.txt file        #
-# For further information please visit http://www.aiida.net               #
-###########################################################################
-"""Test for the ORM implementation."""
-
-import json
-
-import pytest
-from aiida.orm import Dict, QueryBuilder
-from aiida.storage.sqlite_temp import SqliteTempBackend
-
-
-@pytest.mark.parametrize(
-    'filters,matches',
-    (
-        # type match
-        ({'attributes.text': {'of_type': 'string'}}, 1),
-        ({'attributes.integer': {'of_type': 'number'}}, 1),
-        ({'attributes.float': {'of_type': 'number'}}, 1),
-        ({'attributes.true': {'of_type': 'boolean'}}, 1),
-        ({'attributes.false': {'of_type': 'boolean'}}, 1),
-        ({'attributes.null': {'of_type': 'null'}}, 3),
-        ({'attributes.list': {'of_type': 'array'}}, 1),
-        ({'attributes.dict': {'of_type': 'object'}}, 1),
-        # equality match
-        ({'attributes.text': {'==': 'abcXYZ'}}, 1),
-        ({'attributes.integer': {'==': 1}}, 1),
-        ({'attributes.float': {'==': 1.1}}, 1),
-        ({'attributes.true': {'==': True}}, 1),
-        ({'attributes.false': {'==': False}}, 1),
-        ({'attributes.list': {'==': [1, 2]}}, 1),
-        ({'attributes.list2': {'==': ['a', 'b']}}, 1),
-        ({'attributes.dict': {'==': {'key-1': 1, 'key-none': None}}}, 1),
-        # equality non-match
-        ({'attributes.text': {'==': 'lmn'}}, 0),
-        ({'attributes.integer': {'==': 2}}, 0),
-        ({'attributes.float': {'==': 2.2}}, 0),
-        ({'attributes.true': {'==': False}}, 0),
-        ({'attributes.false': {'==': True}}, 0),
-        ({'attributes.list': {'==': [1, 3]}}, 0),
-        # text regexes
-        ({'attributes.text': {'like': 'abcXYZ'}}, 1),
-        ({'attributes.text': {'like': 'abcxyz'}}, 0),
-        ({'attributes.text': {'ilike': 'abcxyz'}}, 1),
-        ({'attributes.text': {'like': 'abc%'}}, 1),
-        ({'attributes.text': {'like': 'abc_YZ'}}, 1),
-        (
-            {
-                'attributes.text2': {
-                    'like': 'abc\\_XYZ'  # Literal match
-                }
-            },
-            1,
-        ),
-        ({'attributes.text2': {'like': 'abc_XYZ'}}, 2),
-        # integer comparisons
-        ({'attributes.float': {'<': 1}}, 0),
-        ({'attributes.float': {'<': 2}}, 1),
-        ({'attributes.float': {'>': 2}}, 0),
-        ({'attributes.float': {'>': 0}}, 1),
-        ({'attributes.integer': {'<': 1}}, 0),
-        ({'attributes.integer': {'<': 2}}, 1),
-        ({'attributes.integer': {'>': 2}}, 0),
-        ({'attributes.integer': {'>': 0}}, 1),
-        # float comparisons
-        ({'attributes.float': {'<': 0.99}}, 0),
-        ({'attributes.float': {'<': 2.01}}, 1),
-        ({'attributes.float': {'>': 2.01}}, 0),
-        ({'attributes.float': {'>': 0.01}}, 1),
-        ({'attributes.integer': {'<': 0.99}}, 0),
-        ({'attributes.integer': {'<': 2.01}}, 1),
-        ({'attributes.integer': {'>': 2.01}}, 0),
-        ({'attributes.integer': {'>': 0.01}}, 1),
-        # array operators
-        ({'attributes.list': {'of_length': 0}}, 0),
-        ({'attributes.list': {'of_length': 2}}, 1),
-        ({'attributes.list': {'longer': 3}}, 0),
-        ({'attributes.list': {'longer': 1}}, 1),
-        ({'attributes.list': {'shorter': 1}}, 0),
-        ({'attributes.list': {'shorter': 3}}, 1),
-        # in operator
-        ({'attributes.text': {'in': ['x', 'y', 'z']}}, 0),
-        ({'attributes.text': {'in': ['x', 'y', 'abcXYZ']}}, 1),
-        ({'attributes.integer': {'in': [5, 6, 7]}}, 0),
-        ({'attributes.integer': {'in': [1, 2, 3]}}, 1),
-        # object operators
-        ({'attributes.dict': {'has_key': 'non-exist'}}, 0),
-        ({'attributes.dict': {'!has_key': 'non-exist'}}, 3),
-        ({'attributes.dict': {'has_key': 'key-1'}}, 1),
-        ({'attributes.dict': {'has_key': 'key-none'}}, 1),
-        ({'attributes.dict': {'!has_key': 'key-none'}}, 2),
-    ),
-    ids=json.dumps,
-)
-def test_qb_json_filters(filters, matches):
-    """Test QueryBuilder filtering for JSON fields."""
-    profile = SqliteTempBackend.create_profile(debug=False)
-    backend = SqliteTempBackend(profile)
-    Dict(
-        {
-            'text': 'abcXYZ',
-            'text2': 'abc_XYZ',
-            'integer': 1,
-            'float': 1.1,
-            'true': True,
-            'false': False,
-            'null': None,
-            'list': [1, 2],
-            'list2': ['a', 'b'],
-            'dict': {
-                'key-1': 1,
-                'key-none': None,
-            },
-        },
-        backend=backend,
-    ).store()
-    Dict({'text2': 'abcxXYZ'}, backend=backend).store()
-
-    # a false dict, added to test `has_key`'s behavior when key is not of json type
-    Dict({'dict': 0xFA15ED1C7}, backend=backend).store()
-
-    qbuilder = QueryBuilder(backend=backend)
-    qbuilder.append(Dict, filters=filters)
-    assert qbuilder.count() == matches
-
-
-class TestJsonFilters:
-    @pytest.mark.parametrize(
-        'data,filters,is_match',
-        (
-            # contains different types of element
-            ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1]}}, True),
-            ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': ['2']}}, True),
-            ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [None]}}, True),
-            # contains multiple elements of various types
-            ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [1, None]}}, True),
-            # contains non-exist elements
-            ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': [114514]}}, False),
-            # contains empty set
-            ({'arr': [1, '2', None]}, {'attributes.arr': {'contains': []}}, True),
-            ({'arr': []}, {'attributes.arr': {'contains': []}}, True),
-            # nested arrays
-            ({'arr': [[1, 0], [0, 2]]}, {'attributes.arr': {'contains': [[1, 0]]}}, True),
-            ({'arr': [[2, 3], [0, 1], []]}, {'attributes.arr': {'contains': [[1, 0]]}}, True),  # order doesn't matter
-            ({'arr': [[2, 3], [1]]}, {'attributes.arr': {'contains': [[4]]}}, False),
-            # TODO: the test below is supposed to pass but currently doesn't
-            # ({'arr': [[2, 3], [1]]}, {'attributes.arr': {'contains': [[2]]}}, False),
-            # negations
-            ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1]}}, False),
-            ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': []}}, False),
-            ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [114514]}}, True),
-            ({'arr': [1, '2', None]}, {'attributes.arr': {'!contains': [1, 114514]}}, True),
-            # TODO: these pass, but why? are these behaviors expected?
-            # non-exist `attr_key`s
-            ({'foo': []}, {'attributes.arr': {'contains': []}}, False),
-            # ({'foo': []}, {'attributes.arr': {'!contains': []}}, False),
-        ),
-        ids=json.dumps,
-    )
-    @pytest.mark.usefixtures('aiida_profile_clean')
-    def test_json_filters_contains_arrays(self, data, filters, is_match):
-        """Test QueryBuilder filter `contains` for JSON array fields"""
-        profile = SqliteTempBackend.create_profile(debug=False)
-        backend = SqliteTempBackend(profile)
-        Dict(data, backend=backend).store()
-        qb = QueryBuilder(backend=backend).append(Dict, filters=filters)
-        assert qb.count() in {0, 1}
-        found = qb.count() == 1
-        assert found == is_match
-
-    @pytest.mark.parametrize(
-        'data,filters,is_match',
-        (
-            # contains different types of values
-            (
-                {
-                    'dict': {
-                        'k1': 1,
-                        'k2': '2',
-                        'k3': None,
-                    }
-                },
-                {'attributes.dict': {'contains': {'k1': 1}}},
-                True,
-            ),
-            (
-                {
-                    'dict': {
-                        'k1': 1,
-                        'k2': '2',
-                        'k3': None,
-                    }
-                },
-                {'attributes.dict': {'contains': {'k1': 1, 'k2': '2'}}},
-                True,
-            ),
-            (
-                {
-                    'dict': {
-                        'k1': 1,
-                        'k2': '2',
-                        'k3': None,
-                    }
-                },
-                {'attributes.dict': {'contains': {'k3': None}}},
-                True,
-            ),
-            # contains empty set
-            (
-                {
-                    'dict': {
-                        'k1': 1,
-                        'k2': '2',
-                        'k3': None,
-                    }
-                },
-                {'attributes.dict': {'contains': {}}},
-                True,
-            ),
-            # doesn't contain non-exist entries
-            (
-                {
-                    'dict': {
-                        'k1': 1,
-                        'k2': '2',
-                        'k3': None,
-                    }
-                },
-                {'attributes.dict': {'contains': {'k1': 1, 'k': 'v'}}},
-                False,
-            ),
-            # negations
-            (
-                {
-                    'dict': {
-                        'k1': 1,
-                        'k2': '2',
-                        'k3': None,
-                    }
-                },
-                {'attributes.dict': {'!contains': {'k1': 1}}},
-                False,
-            ),
-            (
-                {
-                    'dict': {
-                        'k1': 1,
-                        'k2': '2',
-                        'k3': None,
-                    }
-                },
-                {'attributes.dict': {'!contains': {'k1': 1, 'k': 'v'}}},
-                True,
-            ),
-            (
-                {
-                    'dict': {
-                        'k1': 1,
-                        'k2': '2',
-                        'k3': None,
-                    }
-                },
-                {'attributes.dict': {'!contains': {}}},
-                False,
-            ),
-            # TODO: these pass, but why? are these behaviors expected?
-            # non-exist `attr_key`s
-            ({'map': {}}, {'attributes.dict': {'contains': {}}}, False),
-            # ({'map': {}}, {'attributes.dict': {'!contains': {}}}, False),
-        ),
-        ids=json.dumps,
-    )
-    @pytest.mark.usefixtures('aiida_profile_clean')
-    def test_json_filters_contains_object(self, data, filters, is_match):
-        """Test QueryBuilder filter `contains` for JSON object fields"""
-        profile = SqliteTempBackend.create_profile(debug=False)
-        backend = SqliteTempBackend(profile)
-        Dict(data, backend=backend).store()
-        qb = QueryBuilder(backend=backend).append(Dict, filters=filters)
-        assert qb.count() in {0, 1}
-        found = qb.count() == 1
-        assert found == is_match
-
-
-@pytest.mark.parametrize(
-    'filters,matches',
-    (
-        ({'label': {'like': 'abc_XYZ'}}, 2),
-        ({'label': {'like': 'abc\\_XYZ'}}, 1),
-        ({'label': {'like': 'abcxXYZ'}}, 1),
-        ({'label': {'like': 'abc%XYZ'}}, 2),
-    ),
-    ids=json.dumps,
-)
-def test_qb_column_filters(filters, matches):
-    """Test querying directly those stored in the columns"""
-    profile = SqliteTempBackend.create_profile(debug=False)
-    backend = SqliteTempBackend(profile)
-    dict1 = Dict(
-        {
-            'text2': 'abc_XYZ',
-        },
-        backend=backend,
-    ).store()
-    dict2 = Dict({'text2': 'abcxXYZ'}, backend=backend).store()
-    dict1.label = 'abc_XYZ'
-    dict2.label = 'abcxXYZ'
-    qbuilder = QueryBuilder(backend=backend)
-    qbuilder.append(Dict, filters=filters)
-    assert qbuilder.count() == matches
-
-
-@pytest.mark.parametrize(
-    'key,cast_type',
-    (
-        ('text', 't'),
-        ('integer', 'i'),
-        ('float', 'f'),
-    ),
-)
-def test_qb_json_order_by(key, cast_type):
-    """Test QueryBuilder ordering by JSON field keys."""
-    profile = SqliteTempBackend.create_profile(debug=False)
-    backend = SqliteTempBackend(profile)
-    dict1 = Dict(
-        {
-            'text': 'b',
-            'integer': 2,
-            'float': 2.2,
-        },
-        backend=backend,
-    ).store()
-    dict2 = Dict(
-        {
-            'text': 'a',
-            'integer': 1,
-            'float': 1.1,
-        },
-        backend=backend,
-    ).store()
-    dict3 = Dict(
-        {
-            'text': 'c',
-            'integer': 3,
-            'float': 3.3,
-        },
-        backend=backend,
-    ).store()
-    qbuilder = QueryBuilder(backend=backend)
-    qbuilder.append(Dict, tag='dict', project=['id']).order_by(
-        {'dict': {f'attributes.{key}': {'order': 'asc', 'cast': cast_type}}}
-    )
-    assert qbuilder.all(flat=True) == [dict2.pk, dict1.pk, dict3.pk]

From ffa0b111a0b48dc4d2af2dab2371e0f7c20ef71c Mon Sep 17 00:00:00 2001
From: Zisen Liu <karl.liu.1024@gmail.com>
Date: Tue, 26 Nov 2024 11:58:59 +0100
Subject: [PATCH 19/23] add comment on impl for psql of_type

---
 src/aiida/storage/psql_dos/orm/querybuilder/main.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/aiida/storage/psql_dos/orm/querybuilder/main.py b/src/aiida/storage/psql_dos/orm/querybuilder/main.py
index 743b397760..490114f5f5 100644
--- a/src/aiida/storage/psql_dos/orm/querybuilder/main.py
+++ b/src/aiida/storage/psql_dos/orm/querybuilder/main.py
@@ -669,6 +669,9 @@ def cast_according_to_type(path_in_json, value):
             if value in value_types:
                 expr = jsonb_typeof(database_entity) == value
             elif value in null_types:
+                # https://www.postgresql.org/docs/current/functions-json.html
+                # json_typeof('null'::json) → null
+                # json_typeof(NULL::json) IS NULL → t
                 tp = jsonb_typeof(database_entity)
                 expr = or_(tp == 'null', tp.is_(None))
         elif operator == 'like':

From c69948460c23a9adb287283331382a1c4b1beb9a Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 5 Dec 2024 10:13:24 +0000
Subject: [PATCH 20/23] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/benchmark/test_json_contains.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/benchmark/test_json_contains.py b/tests/benchmark/test_json_contains.py
index 87ecd3f3a2..3ec2393b17 100644
--- a/tests/benchmark/test_json_contains.py
+++ b/tests/benchmark/test_json_contains.py
@@ -2,6 +2,7 @@
 import string
 
 import pytest
+
 from aiida import orm
 from aiida.orm.querybuilder import QueryBuilder
 

From af544e7389173018fd72280159bb89df66e24e73 Mon Sep 17 00:00:00 2001
From: Karl Liu <karl.liu.1024@gmail.com>
Date: Tue, 10 Dec 2024 16:22:30 +0100
Subject: [PATCH 21/23] add tests for custom functions

---
 tests/storage/sqlite_zip/test_utils.py | 131 +++++++++++++++++++++++++
 1 file changed, 131 insertions(+)
 create mode 100644 tests/storage/sqlite_zip/test_utils.py

diff --git a/tests/storage/sqlite_zip/test_utils.py b/tests/storage/sqlite_zip/test_utils.py
new file mode 100644
index 0000000000..5069b4612a
--- /dev/null
+++ b/tests/storage/sqlite_zip/test_utils.py
@@ -0,0 +1,131 @@
+import json
+
+import pytest
+
+from aiida.storage.sqlite_zip.utils import _contains, _json_contains
+
+
+class TestCustomFunction:
+    @pytest.mark.parametrize(
+        'lhs,rhs,is_match',
+        (
+            # contains different types of element
+            ([1, '2', None], [1], True),
+            ([1, '2', None], ['2'], True),
+            ([1, '2', None], [None], True),
+            # contains multiple elements of various types
+            ([1, '2', None], [1, None], True),
+            # contains non-exist elements
+            ([1, '2', None], [114514], False),
+            # contains empty set
+            ([1, '2', None], [], True),
+            ([], [], True),
+            # nested arrays
+            ([[1, 0], [0, 2]], [[1, 0]], True),
+            ([[2, 3], [0, 1], []], [[1, 0]], True),
+            ([[2, 3], [1]], [[4]], False),
+            ([[1, 0], [0, 2]], [[3]], False),
+            ([[1, 0], [0, 2]], [3], False),
+            ([[1, 0], [0, 2]], [[2]], True),
+            ([[1, 0], [0, 2]], [2], False),
+            ([[1, 0], [0, 2], 3], [[3]], False),
+            ([[1, 0], [0, 2], 3], [3], True),
+            # contains different types of values
+            (
+                {
+                    'k1': 1,
+                    'k2': '2',
+                    'k3': None,
+                },
+                {'k1': 1},
+                True,
+            ),
+            (
+                {
+                    'k1': 1,
+                    'k2': '2',
+                    'k3': None,
+                },
+                {'k1': 1, 'k2': '2'},
+                True,
+            ),
+            (
+                {
+                    'k1': 1,
+                    'k2': '2',
+                    'k3': None,
+                },
+                {'k3': None},
+                True,
+            ),
+            # contains empty set
+            (
+                {
+                    'k1': 1,
+                    'k2': '2',
+                    'k3': None,
+                },
+                {},
+                True,
+            ),
+            # nested dicts
+            (
+                {'k1': {'k2': {'kx': 1, 'k3': 'secret'}, 'kxx': None}, 'kxxx': 'vxxx'},
+                {'k1': {'k2': {'k3': 'secret'}}},
+                True,
+            ),
+            (
+                {
+                    'k1': [
+                        0,
+                        1,
+                        {
+                            'k2': [
+                                '0',
+                                {
+                                    'kkk': 'vvv',
+                                    'k3': 'secret',
+                                },
+                                '2',
+                            ]
+                        },
+                        3,
+                    ],
+                    'kkk': 'vvv',
+                },
+                {
+                    'k1': [
+                        {
+                            'k2': [
+                                {
+                                    'k3': 'secret',
+                                }
+                            ]
+                        }
+                    ]
+                },
+                True,
+            ),
+            # doesn't contain non-exist entries
+            (
+                {
+                    'k1': 1,
+                    'k2': '2',
+                    'k3': None,
+                },
+                {'k1': 1, 'k': 'v'},
+                False,
+            ),
+        ),
+        ids=json.dumps,
+    )
+    @pytest.mark.usefixtures('aiida_profile_clean')
+    def test_json_contains(self, lhs, rhs, is_match):
+        """Test QueryBuilder filter `contains` for JSON array fields"""
+        lhs_json = json.dumps(lhs)
+        rhs_json = json.dumps(rhs)
+        assert is_match == _contains(lhs, rhs)
+        assert is_match == _json_contains(lhs, rhs)
+        assert is_match == _json_contains(lhs_json, rhs)
+        assert is_match == _json_contains(lhs, rhs_json)
+        assert is_match == _json_contains(lhs_json, rhs_json)

From fbb7ee332fc921673f9c4dd89dd362babfa675a3 Mon Sep 17 00:00:00 2001
From: Karl Liu <karl.liu.1024@gmail.com>
Date: Tue, 10 Dec 2024 17:03:08 +0100
Subject: [PATCH 22/23] enable sqlite database backend testing in github
 actions

---
 .github/workflows/test-install.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test-install.yml b/.github/workflows/test-install.yml
index 7a0076fbbd..b8cf7d8e97 100644
--- a/.github/workflows/test-install.yml
+++ b/.github/workflows/test-install.yml
@@ -163,6 +163,7 @@ jobs:
       fail-fast: false
       matrix:
         python-version: ['3.9', '3.10', '3.11', '3.12']
+        database-backend: [psql, sqlite]
 
     services:
       postgres:
@@ -208,4 +209,4 @@ jobs:
       env:
         AIIDA_TEST_PROFILE: test_aiida
         AIIDA_WARN_v3: 1
-      run: pytest -n auto --db-backend psql tests -m 'not nightly' tests/
+      run: pytest -n auto --db-backend ${{ matrix.database-backend }} tests -m 'not nightly' tests/

From 57beea7c2e0de6c6cdd012e7c82d809a72650a7b Mon Sep 17 00:00:00 2001
From: Karl Liu <karl.liu.1024@gmail.com>
Date: Tue, 10 Dec 2024 17:19:38 +0100
Subject: [PATCH 23/23] add sqlite to coverage report workflow

---
 .github/workflows/ci-code.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci-code.yml b/.github/workflows/ci-code.yml
index 8346d9fdea..b08f6157b5 100644
--- a/.github/workflows/ci-code.yml
+++ b/.github/workflows/ci-code.yml
@@ -27,6 +27,7 @@ jobs:
       fail-fast: false
       matrix:
         python-version: ['3.9', '3.12']
+        database-backend: [psql, sqlite]
 
     services:
       postgres:
@@ -73,7 +74,7 @@ jobs:
         AIIDA_WARN_v3: 1
       # Python 3.12 has a performance regression when running with code coverage
       # so run code coverage only for python 3.9.
-      run: uv run pytest -n auto --db-backend psql -m 'not nightly' tests/ ${{ matrix.python-version == '3.9' && '--cov aiida' || '' }}
+      run: uv run pytest -n auto --db-backend ${{ matrix.database-backend }} -m 'not nightly' tests/ ${{ matrix.python-version == '3.9' && '--cov aiida' || '' }}
 
     - name: Upload coverage report
       if: matrix.python-version == 3.9 && github.repository == 'aiidateam/aiida-core'