From 9432e9656101ebd2e2f97c59ff0943a41f355a8f Mon Sep 17 00:00:00 2001 From: Christoph Zwerschke Date: Tue, 6 Aug 2024 10:46:46 +0200 Subject: [PATCH] Support multi valued facets (#29) --- .pre-commit-config.yaml | 2 +- .readme_generation/description.md | 5 +- README.md | 9 +- config_schema.json | 4 +- lock/requirements-dev.txt | 215 ++++++++++-------- lock/requirements.txt | 13 +- openapi.yaml | 5 +- .../adapters/inbound/fastapi_/configure.py | 8 +- src/mass/adapters/outbound/utils.py | 60 ++++- src/mass/core/models.py | 4 +- tests/fixtures/test_config.yaml | 16 +- tests/fixtures/test_data/FilteringTests.json | 51 +++++ .../{DatasetEmbedded.json => NestedData.json} | 26 +-- tests/test_api.py | 26 ++- tests/test_consumer.py | 22 +- tests/test_filtering.py | 159 +++++++++++++ tests/test_logging.py | 4 +- tests/test_resources.py | 67 +++--- tests/test_sorting.py | 8 +- 19 files changed, 500 insertions(+), 204 deletions(-) create mode 100644 tests/fixtures/test_data/FilteringTests.json rename tests/fixtures/test_data/{DatasetEmbedded.json => NestedData.json} (76%) create mode 100644 tests/test_filtering.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5d79d76..ebaea52 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -48,7 +48,7 @@ repos: - id: no-commit-to-branch args: [--branch, dev, --branch, int, --branch, main] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.5.5 + rev: v0.5.6 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] diff --git a/.readme_generation/description.md b/.readme_generation/description.md index 8cd1d98..0bb99ae 100644 --- a/.readme_generation/description.md +++ b/.readme_generation/description.md @@ -15,7 +15,8 @@ occur in these embedded classes, too. Along with the hits, facet options are reported that can be used to filter down the hits by performing the same search query again but with specific facet selections being set. -The search endpoint supports pagination to deal with large hit lists. Facet options can -help avoid having to rely on this feature by filtering down the number of hits to a single page. +The search endpoint supports pagination to deal with a large number of search results. +Facet options can help avoid having to rely on this feature by filtering down the number +of hits to a single page. For more information see the OpenAPI spec linked below. diff --git a/README.md b/README.md index 992f5dd..cbdc143 100644 --- a/README.md +++ b/README.md @@ -24,8 +24,9 @@ occur in these embedded classes, too. Along with the hits, facet options are reported that can be used to filter down the hits by performing the same search query again but with specific facet selections being set. -The search endpoint supports pagination to deal with large hit lists. Facet options can -help avoid having to rely on this feature by filtering down the number of hits to a single page. +The search endpoint supports pagination to deal with a large number of search results. +Facet options can help avoid having to rely on this feature by filtering down the number +of hits to a single page. For more information see the OpenAPI spec linked below. @@ -317,11 +318,11 @@ The service requires the following configuration parameters: - **`description`** *(string, required)*: A brief description of the resource type. - - **`facetable_fields`** *(array)*: A list of the facetable fields for the resource type (leave empty to not use faceting). Default: `[]`. + - **`facetable_fields`** *(array)*: A list of the facetable fields for the resource type (leave empty to not use faceting, use dotted notation for nested fields). Default: `[]`. - **Items**: Refer to *[#/$defs/FieldLabel](#%24defs/FieldLabel)*. - - **`selected_fields`** *(array)*: A list of the returned fields for the resource type (leave empty to return all). Default: `[]`. + - **`selected_fields`** *(array)*: A list of the returned fields for the resource type (leave empty to return all, use dotted notation for nested fields). Default: `[]`. - **Items**: Refer to *[#/$defs/FieldLabel](#%24defs/FieldLabel)*. diff --git a/config_schema.json b/config_schema.json index ce6fbfc..4acff5c 100644 --- a/config_schema.json +++ b/config_schema.json @@ -31,7 +31,7 @@ }, "facetable_fields": { "default": [], - "description": "A list of the facetable fields for the resource type (leave empty to not use faceting)", + "description": "A list of the facetable fields for the resource type (leave empty to not use faceting, use dotted notation for nested fields)", "items": { "$ref": "#/$defs/FieldLabel" }, @@ -40,7 +40,7 @@ }, "selected_fields": { "default": [], - "description": "A list of the returned fields for the resource type (leave empty to return all)", + "description": "A list of the returned fields for the resource type (leave empty to return all, use dotted notation for nested fields)", "items": { "$ref": "#/$defs/FieldLabel" }, diff --git a/lock/requirements-dev.txt b/lock/requirements-dev.txt index 22902da..a980690 100644 --- a/lock/requirements-dev.txt +++ b/lock/requirements-dev.txt @@ -46,9 +46,9 @@ async-timeout==4.0.3 \ --hash=sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f \ --hash=sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028 # via aiokafka -attrs==23.2.0 \ - --hash=sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30 \ - --hash=sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1 +attrs==24.1.0 \ + --hash=sha256:377b47448cb61fea38533f671fba0d0f8a96fd58facd4dc518e3dac9dbea0905 \ + --hash=sha256:adbdec84af72d38be7628e353a09b6a6790d15cd71819f6e9d7b0faa8a125745 # via # jsonschema # referencing @@ -162,59 +162,79 @@ click==8.1.7 \ # -r lock/requirements-dev-template.in # typer # uvicorn -coverage==7.6.0 \ - --hash=sha256:0086cd4fc71b7d485ac93ca4239c8f75732c2ae3ba83f6be1c9be59d9e2c6382 \ - --hash=sha256:01c322ef2bbe15057bc4bf132b525b7e3f7206f071799eb8aa6ad1940bcf5fb1 \ - --hash=sha256:03cafe82c1b32b770a29fd6de923625ccac3185a54a5e66606da26d105f37dac \ - --hash=sha256:044a0985a4f25b335882b0966625270a8d9db3d3409ddc49a4eb00b0ef5e8cee \ - --hash=sha256:07ed352205574aad067482e53dd606926afebcb5590653121063fbf4e2175166 \ - --hash=sha256:0d1b923fc4a40c5832be4f35a5dab0e5ff89cddf83bb4174499e02ea089daf57 \ - --hash=sha256:0e7b27d04131c46e6894f23a4ae186a6a2207209a05df5b6ad4caee6d54a222c \ - --hash=sha256:1fad32ee9b27350687035cb5fdf9145bc9cf0a094a9577d43e909948ebcfa27b \ - --hash=sha256:289cc803fa1dc901f84701ac10c9ee873619320f2f9aff38794db4a4a0268d51 \ - --hash=sha256:3c59105f8d58ce500f348c5b56163a4113a440dad6daa2294b5052a10db866da \ - --hash=sha256:46c3d091059ad0b9c59d1034de74a7f36dcfa7f6d3bde782c49deb42438f2450 \ - --hash=sha256:482855914928c8175735a2a59c8dc5806cf7d8f032e4820d52e845d1f731dca2 \ - --hash=sha256:49c76cdfa13015c4560702574bad67f0e15ca5a2872c6a125f6327ead2b731dd \ - --hash=sha256:4b03741e70fb811d1a9a1d75355cf391f274ed85847f4b78e35459899f57af4d \ - --hash=sha256:4bea27c4269234e06f621f3fac3925f56ff34bc14521484b8f66a580aacc2e7d \ - --hash=sha256:4d5fae0a22dc86259dee66f2cc6c1d3e490c4a1214d7daa2a93d07491c5c04b6 \ - --hash=sha256:543ef9179bc55edfd895154a51792b01c017c87af0ebaae092720152e19e42ca \ - --hash=sha256:54dece71673b3187c86226c3ca793c5f891f9fc3d8aa183f2e3653da18566169 \ - --hash=sha256:6379688fb4cfa921ae349c76eb1a9ab26b65f32b03d46bb0eed841fd4cb6afb1 \ - --hash=sha256:65fa405b837060db569a61ec368b74688f429b32fa47a8929a7a2f9b47183713 \ - --hash=sha256:6616d1c9bf1e3faea78711ee42a8b972367d82ceae233ec0ac61cc7fec09fa6b \ - --hash=sha256:6fe885135c8a479d3e37a7aae61cbd3a0fb2deccb4dda3c25f92a49189f766d6 \ - --hash=sha256:7221f9ac9dad9492cecab6f676b3eaf9185141539d5c9689d13fd6b0d7de840c \ - --hash=sha256:76d5f82213aa78098b9b964ea89de4617e70e0d43e97900c2778a50856dac605 \ - --hash=sha256:7792f0ab20df8071d669d929c75c97fecfa6bcab82c10ee4adb91c7a54055463 \ - --hash=sha256:831b476d79408ab6ccfadaaf199906c833f02fdb32c9ab907b1d4aa0713cfa3b \ - --hash=sha256:9146579352d7b5f6412735d0f203bbd8d00113a680b66565e205bc605ef81bc6 \ - --hash=sha256:9cc44bf0315268e253bf563f3560e6c004efe38f76db03a1558274a6e04bf5d5 \ - --hash=sha256:a73d18625f6a8a1cbb11eadc1d03929f9510f4131879288e3f7922097a429f63 \ - --hash=sha256:a8659fd33ee9e6ca03950cfdcdf271d645cf681609153f218826dd9805ab585c \ - --hash=sha256:a94925102c89247530ae1dab7dc02c690942566f22e189cbd53579b0693c0783 \ - --hash=sha256:ad4567d6c334c46046d1c4c20024de2a1c3abc626817ae21ae3da600f5779b44 \ - --hash=sha256:b2e16f4cd2bc4d88ba30ca2d3bbf2f21f00f382cf4e1ce3b1ddc96c634bc48ca \ - --hash=sha256:bbdf9a72403110a3bdae77948b8011f644571311c2fb35ee15f0f10a8fc082e8 \ - --hash=sha256:beb08e8508e53a568811016e59f3234d29c2583f6b6e28572f0954a6b4f7e03d \ - --hash=sha256:c4cbe651f3904e28f3a55d6f371203049034b4ddbce65a54527a3f189ca3b390 \ - --hash=sha256:c7b525ab52ce18c57ae232ba6f7010297a87ced82a2383b1afd238849c1ff933 \ - --hash=sha256:ca5d79cfdae420a1d52bf177de4bc2289c321d6c961ae321503b2ca59c17ae67 \ - --hash=sha256:cdab02a0a941af190df8782aafc591ef3ad08824f97850b015c8c6a8b3877b0b \ - --hash=sha256:d17c6a415d68cfe1091d3296ba5749d3d8696e42c37fca5d4860c5bf7b729f03 \ - --hash=sha256:d39bd10f0ae453554798b125d2f39884290c480f56e8a02ba7a6ed552005243b \ - --hash=sha256:d4b3cd1ca7cd73d229487fa5caca9e4bc1f0bca96526b922d61053ea751fe791 \ - --hash=sha256:d50a252b23b9b4dfeefc1f663c568a221092cbaded20a05a11665d0dbec9b8fb \ - --hash=sha256:da8549d17489cd52f85a9829d0e1d91059359b3c54a26f28bec2c5d369524807 \ - --hash=sha256:dcd070b5b585b50e6617e8972f3fbbee786afca71b1936ac06257f7e178f00f6 \ - --hash=sha256:ddaaa91bfc4477d2871442bbf30a125e8fe6b05da8a0015507bfbf4718228ab2 \ - --hash=sha256:df423f351b162a702c053d5dddc0fc0ef9a9e27ea3f449781ace5f906b664428 \ - --hash=sha256:dff044f661f59dace805eedb4a7404c573b6ff0cdba4a524141bc63d7be5c7fd \ - --hash=sha256:e7e128f85c0b419907d1f38e616c4f1e9f1d1b37a7949f44df9a73d5da5cd53c \ - --hash=sha256:ed8d1d1821ba5fc88d4a4f45387b65de52382fa3ef1f0115a4f7a20cdfab0e94 \ - --hash=sha256:f2501d60d7497fd55e391f423f965bbe9e650e9ffc3c627d5f0ac516026000b8 \ - --hash=sha256:f7db0b6ae1f96ae41afe626095149ecd1b212b424626175a6633c2999eaad45b +coverage==7.6.1 \ + --hash=sha256:06a737c882bd26d0d6ee7269b20b12f14a8704807a01056c80bb881a4b2ce6ca \ + --hash=sha256:07e2ca0ad381b91350c0ed49d52699b625aab2b44b65e1b4e02fa9df0e92ad2d \ + --hash=sha256:0c0420b573964c760df9e9e86d1a9a622d0d27f417e1a949a8a66dd7bcee7bc6 \ + --hash=sha256:0dbde0f4aa9a16fa4d754356a8f2e36296ff4d83994b2c9d8398aa32f222f989 \ + --hash=sha256:1125ca0e5fd475cbbba3bb67ae20bd2c23a98fac4e32412883f9bcbaa81c314c \ + --hash=sha256:13b0a73a0896988f053e4fbb7de6d93388e6dd292b0d87ee51d106f2c11b465b \ + --hash=sha256:166811d20dfea725e2e4baa71fffd6c968a958577848d2131f39b60043400223 \ + --hash=sha256:170d444ab405852903b7d04ea9ae9b98f98ab6d7e63e1115e82620807519797f \ + --hash=sha256:1f4aa8219db826ce6be7099d559f8ec311549bfc4046f7f9fe9b5cea5c581c56 \ + --hash=sha256:225667980479a17db1048cb2bf8bfb39b8e5be8f164b8f6628b64f78a72cf9d3 \ + --hash=sha256:260933720fdcd75340e7dbe9060655aff3af1f0c5d20f46b57f262ab6c86a5e8 \ + --hash=sha256:2bdb062ea438f22d99cba0d7829c2ef0af1d768d1e4a4f528087224c90b132cb \ + --hash=sha256:2c09f4ce52cb99dd7505cd0fc8e0e37c77b87f46bc9c1eb03fe3bc9991085388 \ + --hash=sha256:3115a95daa9bdba70aea750db7b96b37259a81a709223c8448fa97727d546fe0 \ + --hash=sha256:3e0cadcf6733c09154b461f1ca72d5416635e5e4ec4e536192180d34ec160f8a \ + --hash=sha256:3f1156e3e8f2872197af3840d8ad307a9dd18e615dc64d9ee41696f287c57ad8 \ + --hash=sha256:4421712dbfc5562150f7554f13dde997a2e932a6b5f352edcce948a815efee6f \ + --hash=sha256:44df346d5215a8c0e360307d46ffaabe0f5d3502c8a1cefd700b34baf31d411a \ + --hash=sha256:502753043567491d3ff6d08629270127e0c31d4184c4c8d98f92c26f65019962 \ + --hash=sha256:547f45fa1a93154bd82050a7f3cddbc1a7a4dd2a9bf5cb7d06f4ae29fe94eaf8 \ + --hash=sha256:5621a9175cf9d0b0c84c2ef2b12e9f5f5071357c4d2ea6ca1cf01814f45d2391 \ + --hash=sha256:609b06f178fe8e9f89ef676532760ec0b4deea15e9969bf754b37f7c40326dbc \ + --hash=sha256:645786266c8f18a931b65bfcefdbf6952dd0dea98feee39bd188607a9d307ed2 \ + --hash=sha256:6878ef48d4227aace338d88c48738a4258213cd7b74fd9a3d4d7582bb1d8a155 \ + --hash=sha256:6a89ecca80709d4076b95f89f308544ec8f7b4727e8a547913a35f16717856cb \ + --hash=sha256:6db04803b6c7291985a761004e9060b2bca08da6d04f26a7f2294b8623a0c1a0 \ + --hash=sha256:6e2cd258d7d927d09493c8df1ce9174ad01b381d4729a9d8d4e38670ca24774c \ + --hash=sha256:6e81d7a3e58882450ec4186ca59a3f20a5d4440f25b1cff6f0902ad890e6748a \ + --hash=sha256:702855feff378050ae4f741045e19a32d57d19f3e0676d589df0575008ea5004 \ + --hash=sha256:78b260de9790fd81e69401c2dc8b17da47c8038176a79092a89cb2b7d945d060 \ + --hash=sha256:7bb65125fcbef8d989fa1dd0e8a060999497629ca5b0efbca209588a73356232 \ + --hash=sha256:7dea0889685db8550f839fa202744652e87c60015029ce3f60e006f8c4462c93 \ + --hash=sha256:8284cf8c0dd272a247bc154eb6c95548722dce90d098c17a883ed36e67cdb129 \ + --hash=sha256:877abb17e6339d96bf08e7a622d05095e72b71f8afd8a9fefc82cf30ed944163 \ + --hash=sha256:8929543a7192c13d177b770008bc4e8119f2e1f881d563fc6b6305d2d0ebe9de \ + --hash=sha256:8ae539519c4c040c5ffd0632784e21b2f03fc1340752af711f33e5be83a9d6c6 \ + --hash=sha256:8f59d57baca39b32db42b83b2a7ba6f47ad9c394ec2076b084c3f029b7afca23 \ + --hash=sha256:9054a0754de38d9dbd01a46621636689124d666bad1936d76c0341f7d71bf569 \ + --hash=sha256:953510dfb7b12ab69d20135a0662397f077c59b1e6379a768e97c59d852ee51d \ + --hash=sha256:95cae0efeb032af8458fc27d191f85d1717b1d4e49f7cb226cf526ff28179778 \ + --hash=sha256:9bc572be474cafb617672c43fe989d6e48d3c83af02ce8de73fff1c6bb3c198d \ + --hash=sha256:9c56863d44bd1c4fe2abb8a4d6f5371d197f1ac0ebdee542f07f35895fc07f36 \ + --hash=sha256:9e0b2df163b8ed01d515807af24f63de04bebcecbd6c3bfeff88385789fdf75a \ + --hash=sha256:a09ece4a69cf399510c8ab25e0950d9cf2b42f7b3cb0374f95d2e2ff594478a6 \ + --hash=sha256:a1ac0ae2b8bd743b88ed0502544847c3053d7171a3cff9228af618a068ed9c34 \ + --hash=sha256:a318d68e92e80af8b00fa99609796fdbcdfef3629c77c6283566c6f02c6d6704 \ + --hash=sha256:a4acd025ecc06185ba2b801f2de85546e0b8ac787cf9d3b06e7e2a69f925b106 \ + --hash=sha256:a6d3adcf24b624a7b778533480e32434a39ad8fa30c315208f6d3e5542aeb6e9 \ + --hash=sha256:a78d169acd38300060b28d600344a803628c3fd585c912cacc9ea8790fe96862 \ + --hash=sha256:a95324a9de9650a729239daea117df21f4b9868ce32e63f8b650ebe6cef5595b \ + --hash=sha256:abd5fd0db5f4dc9289408aaf34908072f805ff7792632250dcb36dc591d24255 \ + --hash=sha256:b06079abebbc0e89e6163b8e8f0e16270124c154dc6e4a47b413dd538859af16 \ + --hash=sha256:b43c03669dc4618ec25270b06ecd3ee4fa94c7f9b3c14bae6571ca00ef98b0d3 \ + --hash=sha256:b48f312cca9621272ae49008c7f613337c53fadca647d6384cc129d2996d1133 \ + --hash=sha256:b5d7b556859dd85f3a541db6a4e0167b86e7273e1cdc973e5b175166bb634fdb \ + --hash=sha256:b9f222de8cded79c49bf184bdbc06630d4c58eec9459b939b4a690c82ed05657 \ + --hash=sha256:c3c02d12f837d9683e5ab2f3d9844dc57655b92c74e286c262e0fc54213c216d \ + --hash=sha256:c44fee9975f04b33331cb8eb272827111efc8930cfd582e0320613263ca849ca \ + --hash=sha256:cf4b19715bccd7ee27b6b120e7e9dd56037b9c0681dcc1adc9ba9db3d417fa36 \ + --hash=sha256:d0c212c49b6c10e6951362f7c6df3329f04c2b1c28499563d4035d964ab8e08c \ + --hash=sha256:d3296782ca4eab572a1a4eca686d8bfb00226300dcefdf43faa25b5242ab8a3e \ + --hash=sha256:d85f5e9a5f8b73e2350097c3756ef7e785f55bd71205defa0bfdaf96c31616ff \ + --hash=sha256:da511e6ad4f7323ee5702e6633085fb76c2f893aaf8ce4c51a0ba4fc07580ea7 \ + --hash=sha256:e05882b70b87a18d937ca6768ff33cc3f72847cbc4de4491c8e73880766718e5 \ + --hash=sha256:e61c0abb4c85b095a784ef23fdd4aede7a2628478e7baba7c5e3deba61070a02 \ + --hash=sha256:e6a08c0be454c3b3beb105c0596ebdc2371fab6bb90c0c0297f4e58fd7e1012c \ + --hash=sha256:e9a6e0eb86070e8ccaedfbd9d38fec54864f3125ab95419970575b42af7541df \ + --hash=sha256:ed37bd3c3b063412f7620464a9ac1314d33100329f39799255fb8d3027da50d3 \ + --hash=sha256:f1adfc8ac319e1a348af294106bc6a8458a0f1633cc62a1446aebc30c5fa186a \ + --hash=sha256:f5796e664fe802da4f57a168c85359a8fbf3eab5e55cd4e4569fbacecc903959 \ + --hash=sha256:fc5a77d0c516700ebad189b587de289a20a78324bc54baee03dd486f0855d234 \ + --hash=sha256:fd21f6ae3f08b41004dfb433fa895d858f3f5979e7762d052b12aef444e29afc # via pytest-cov distlib==0.3.8 \ --hash=sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784 \ @@ -240,9 +260,9 @@ fastapi==0.111.1 \ --hash=sha256:4f51cfa25d72f9fbc3280832e84b32494cf186f50158d364a8765aabf22587bf \ --hash=sha256:ddd1ac34cb1f76c2e2d7f8545a4bcb5463bce4834e81abf0b189e0c359ab2413 # via ghga-service-commons -fastapi-cli==0.0.4 \ - --hash=sha256:a2552f3a7ae64058cdbb530be6fa6dbfc975dc165e4fa66d224c3d396e25e809 \ - --hash=sha256:e2e9ffaffc1f7767f488d6da34b6f5a377751c996f397902eb6abb99a67bde32 +fastapi-cli==0.0.5 \ + --hash=sha256:d30e1239c6f46fcb95e606f02cdda59a1e2fa778a54b64686b3ff27f6211ff9f \ + --hash=sha256:e94d847524648c748a5350673546bbf9bcaeb086b33c24f2e82e021436866a46 # via fastapi filelock==3.15.4 \ --hash=sha256:2207938cbc1844345cb01a5a95524dae30f0ce089eba5b00378295a17e3e90cb \ @@ -856,25 +876,25 @@ rpds-py==0.19.1 \ # via # jsonschema # referencing -ruff==0.5.5 \ - --hash=sha256:00817603822a3e42b80f7c3298c8269e09f889ee94640cd1fc7f9329788d7bf8 \ - --hash=sha256:187a60f555e9f865a2ff2c6984b9afeffa7158ba6e1eab56cb830404c942b0f3 \ - --hash=sha256:3191317d967af701f1b73a31ed5788795936e423b7acce82a2b63e26eb3e89d6 \ - --hash=sha256:3687d002f911e8a5faf977e619a034d159a8373514a587249cc00f211c67a091 \ - --hash=sha256:4ad25dd9c5faac95c8e9efb13e15803cd8bbf7f4600645a60ffe17c73f60779b \ - --hash=sha256:50f36d77f52d4c9c2f1361ccbfbd09099a1b2ea5d2b2222c586ab08885cf3445 \ - --hash=sha256:605d589ec35d1da9213a9d4d7e7a9c761d90bba78fc8790d1c5e65026c1b9eaf \ - --hash=sha256:696f18463b47a94575db635ebb4c178188645636f05e934fdf361b74edf1bb2d \ - --hash=sha256:a09b43e02f76ac0145f86a08e045e2ea452066f7ba064fd6b0cdccb486f7c3e7 \ - --hash=sha256:ac9dc814e510436e30d0ba535f435a7f3dc97f895f844f5b3f347ec8c228a523 \ - --hash=sha256:af9bdf6c389b5add40d89b201425b531e0a5cceb3cfdcc69f04d3d531c6be74f \ - --hash=sha256:cab904683bf9e2ecbbe9ff235bfe056f0eba754d0168ad5407832928d579e7ab \ - --hash=sha256:cc5516bdb4858d972fbc31d246bdb390eab8df1a26e2353be2dbc0c2d7f5421a \ - --hash=sha256:cfd7de17cef6ab559e9f5ab859f0d3296393bc78f69030967ca4d87a541b97a0 \ - --hash=sha256:d0b856cb19c60cd40198be5d8d4b556228e3dcd545b4f423d1ad812bfdca5884 \ - --hash=sha256:d40a8533ed545390ef8315b8e25c4bb85739b90bd0f3fe1280a29ae364cc55d8 \ - --hash=sha256:f70737c157d7edf749bcb952d13854e8f745cec695a01bdc6e29c29c288fc36e \ - --hash=sha256:fe26fc46fa8c6e0ae3f47ddccfbb136253c831c3289bba044befe68f467bfb16 +ruff==0.5.6 \ + --hash=sha256:07c9e3c2a8e1fe377dd460371c3462671a728c981c3205a5217291422209f642 \ + --hash=sha256:111a99cdb02f69ddb2571e2756e017a1496c2c3a2aeefe7b988ddab38b416d36 \ + --hash=sha256:1f77c1c3aa0669fb230b06fb24ffa3e879391a3ba3f15e3d633a752da5a3e670 \ + --hash=sha256:4d394940f61f7720ad371ddedf14722ee1d6250fd8d020f5ea5a86e7be217daf \ + --hash=sha256:563a7ae61ad284187d3071d9041c08019975693ff655438d8d4be26e492760bd \ + --hash=sha256:57c6c0dd997b31b536bff49b9eee5ed3194d60605a4427f735eeb1f9c1b8d264 \ + --hash=sha256:80521b88d26a45e871f31e4b88938fd87db7011bb961d8afd2664982dfc3641a \ + --hash=sha256:94fe60869bfbf0521e04fd62b74cbca21cbc5beb67cbb75ab33fe8c174f54414 \ + --hash=sha256:a0ef5930799a05522985b9cec8290b185952f3fcd86c1772c3bdbd732667fdcd \ + --hash=sha256:b652dc14f6ef5d1552821e006f747802cc32d98d5509349e168f6bf0ee9f8f42 \ + --hash=sha256:c476acb43c3c51e3c614a2e878ee1589655fa02dab19fe2db0423a06d6a5b1b6 \ + --hash=sha256:c94e084ba3eaa80c2172918c2ca2eb2230c3f15925f4ed8b6297260c6ef179ad \ + --hash=sha256:d7fe7dccb1a89dc66785d7aa0ac283b2269712d8ed19c63af908fdccca5ccc1a \ + --hash=sha256:d9bc8f328a9f1309ae80e4d392836e7dbc77303b38ed4a7112699e63d3b066ab \ + --hash=sha256:e2ff8003f5252fd68425fd53d27c1f08b201d7ed714bb31a55c9ac1d4c13e2eb \ + --hash=sha256:e395daba77a79f6dc0d07311f94cc0560375ca20c06f354c7c99af3bf4560c5d \ + --hash=sha256:e6a584c1de6f8591c2570e171cc7ce482bb983d49c70ddf014393cd39e9dfaed \ + --hash=sha256:f908148c93c02873210a52cad75a6eda856b2cbb72250370ce3afef6fb99b1ed # via -r lock/requirements-dev-template.in setuptools==72.1.0 \ --hash=sha256:5a03e1860cf56bb6ef48ce186b0e557fdba433237481a9a625176c2831be15d1 \ @@ -948,31 +968,32 @@ urllib3==2.2.2 \ # docker # requests # testcontainers -uv==0.2.32 \ - --hash=sha256:045f90cbc4df9117e845234fd73029ff0696701e0d831e1ac2a7f17ecc2d09c4 \ - --hash=sha256:089f5d082d745ad40ed8c7bff9d551db942fd13ca85e7dc1c6ef47d67365b4c0 \ - --hash=sha256:127f3009abf342b3f189eeddd3719db0ff9c0aa43f3c5faeb76d5a963de3c23c \ - --hash=sha256:195b5eade2b0866ab1a84accc2c3ddb2b73d7902f7274c49016afd08c28ddf3e \ - --hash=sha256:37d6b4a23792e4b9bd148d0696a678a14eb987e7d027ec94533ed053e9515192 \ - --hash=sha256:389e0e32718fbbadf9b710fb0e2a27b539a7879466c32669f95f1172940c76fc \ - --hash=sha256:6b173f71252a504b7d62e84262ed29a46db77de3689faf34c499d2463e59c6dc \ - --hash=sha256:71bb1f302d5ee1741f99fddbde3b2d62ea3838260c34b112a3468451cac73b1b \ - --hash=sha256:7621b7cac2b41fbba42735c34bb914ef340e30116e3524daea333e870e6f5f1c \ - --hash=sha256:7dfbd0eda969c1cb1d1dd601172e667f4724ef7de0d72bc1d6cb79d8a56a4660 \ - --hash=sha256:862ed5fe4eebe782e00b9ba6f220c82d710ad2c765e7db5550ccad162301c81e \ - --hash=sha256:907049dfa1341ad36928fedc3df2187bc0f419a743023b5ee0e3a21c14bd191e \ - --hash=sha256:90e4889b26a4624c42f460e284dcd20400d0ef577058d346b6a5da2f3ab4db4f \ - --hash=sha256:a0379a57603731927c8ce74ee4a4c7876765fc86c5c28f7c0f8ea019b61e4838 \ - --hash=sha256:a168943ead36a262b883971d8e1d9aa2a11842f5d997a1fd1a14d50cb49aeb20 \ - --hash=sha256:dbec58b12b9e96871c947a255a51c015387fb90c403b956bc723f489a11c6320 \ - --hash=sha256:e86fabae3553c02ed715b2ccdfb19ed5f5e98470cac33e5078cfb4911b33d7fd \ - --hash=sha256:fa45c91f3955f8229c3a817a064e85d88cd1f1944e1161ddd7acb913f4e547dc +uv==0.2.33 \ + --hash=sha256:02ed3b62049ea1f40404d33a02a69d3808f3b0e001e5565938804ca76beafbc4 \ + --hash=sha256:181ccdb22058465c6690dca22e506fec234dcae5bcbe6389fd5330971910250e \ + --hash=sha256:2fe685e73f198b2630e08e89ece0d858d58646a038a6d9cb2b06126dcca856d1 \ + --hash=sha256:37924a3b502117fd74b1ddf08e9288b397da7895dd8cad46005422eefffe6e88 \ + --hash=sha256:42b65bbf78b5186a40ea4423fab030fb01c9354432a7c0a3b5db67a3f4e246c5 \ + --hash=sha256:48cfdb8efd237eb00086b8f0d0dc7281e517fd8afb55f698538087379bf45a8d \ + --hash=sha256:676231a93001db051ecf98cb380f2d48d3f6b95add66ff4546073e30911a737a \ + --hash=sha256:714351e10f27e41052897e26cd4acfe66e35250903fdc20f762d29461cf3ec4a \ + --hash=sha256:73031edf35195289f02f6f1a603c512b57c8f921cb62fd442dbb63fd2a77c801 \ + --hash=sha256:744eb9743e4b850af5de9f3c727d84a60a763ae0f4f5183dcdfa8a065879694d \ + --hash=sha256:86f6237102deedbb17201804eb821833c5bad3f551f16f2695ae2b85e9f066de \ + --hash=sha256:8eba96cbff1bc492c270e143235b39cfbe6dddebd842228ea14124d6b7d944e8 \ + --hash=sha256:90b74796ce75594e63345c8e090fbac832a8f6db876691ae2b57b0b8d6011559 \ + --hash=sha256:93c45d07ab440c03f2796540d646c34e58b4707feebfb9f70ded1306830408b0 \ + --hash=sha256:ace6cb8383203fdfeaf8dbbc1ecb3bb945e040ca10558e233b63c84af82f6636 \ + --hash=sha256:dbe497a1a16be9569d42cf4a7562e14bb3c3d9b33cc65e59095f1c3f8ab983df \ + --hash=sha256:ede51de6795f9571b182c104d6078690c3a10b3fbe6fcf414b2e38c8d394e575 \ + --hash=sha256:fb6f282ac92fbc05e82fa3a93e6515ad5b044e8c845ba16d815b5889799eebd1 # via -r lock/requirements-dev-template.in uvicorn==0.29.0 \ --hash=sha256:2c2aac7ff4f4365c206fd773a39bf4ebd1047c238f8b8268ad996829323473de \ --hash=sha256:6a69214c0b6a087462412670b3ef21224fa48cae0e452b5883e8e8bdfdd11dd0 # via # fastapi + # fastapi-cli # ghga-service-commons uvloop==0.19.0 \ --hash=sha256:0246f4fd1bf2bf702e06b0d45ee91677ee5c31242f39aab4ea6fe0c51aedd0fd \ diff --git a/lock/requirements.txt b/lock/requirements.txt index dd0d6df..0cff02f 100644 --- a/lock/requirements.txt +++ b/lock/requirements.txt @@ -53,9 +53,9 @@ async-timeout==4.0.3 \ # via # -c lock/requirements-dev.txt # aiokafka -attrs==23.2.0 \ - --hash=sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30 \ - --hash=sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1 +attrs==24.1.0 \ + --hash=sha256:377b47448cb61fea38533f671fba0d0f8a96fd58facd4dc518e3dac9dbea0905 \ + --hash=sha256:adbdec84af72d38be7628e353a09b6a6790d15cd71819f6e9d7b0faa8a125745 # via # -c lock/requirements-dev.txt # jsonschema @@ -94,9 +94,9 @@ fastapi==0.111.1 \ # via # -c lock/requirements-dev.txt # ghga-service-commons -fastapi-cli==0.0.4 \ - --hash=sha256:a2552f3a7ae64058cdbb530be6fa6dbfc975dc165e4fa66d224c3d396e25e809 \ - --hash=sha256:e2e9ffaffc1f7767f488d6da34b6f5a377751c996f397902eb6abb99a67bde32 +fastapi-cli==0.0.5 \ + --hash=sha256:d30e1239c6f46fcb95e606f02cdda59a1e2fa778a54b64686b3ff27f6211ff9f \ + --hash=sha256:e94d847524648c748a5350673546bbf9bcaeb086b33c24f2e82e021436866a46 # via # -c lock/requirements-dev.txt # fastapi @@ -694,6 +694,7 @@ uvicorn==0.29.0 \ # via # -c lock/requirements-dev.txt # fastapi + # fastapi-cli # ghga-service-commons uvloop==0.19.0 \ --hash=sha256:0246f4fd1bf2bf702e06b0d45ee91677ee5c31242f39aab4ea6fe0c51aedd0fd \ diff --git a/openapi.yaml b/openapi.yaml index dd29c48..2b7ed50 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -128,7 +128,7 @@ components: facetable_fields: default: [] description: A list of the facetable fields for the resource type (leave - empty to not use faceting) + empty to not use faceting, use dotted notation for nested fields) items: $ref: '#/components/schemas/FieldLabel' title: Facetable Fields @@ -136,7 +136,7 @@ components: selected_fields: default: [] description: A list of the returned fields for the resource type (leave - empty to return all) + empty to return all, use dotted notation for nested fields) items: $ref: '#/components/schemas/FieldLabel' title: Selected Fields @@ -177,6 +177,7 @@ components: info: contact: email: contact@ghga.de + name: German Human Genome Phenome Archive (GHGA) license: name: Apache 2.0 summary: A service for searching metadata artifacts and filtering results. diff --git a/src/mass/adapters/inbound/fastapi_/configure.py b/src/mass/adapters/inbound/fastapi_/configure.py index c6edbe9..53c390e 100644 --- a/src/mass/adapters/inbound/fastapi_/configure.py +++ b/src/mass/adapters/inbound/fastapi_/configure.py @@ -26,7 +26,13 @@ def get_configured_app(*, config: Config) -> FastAPI: """Create and configure a REST API application.""" summary = metadata["Summary"] - author = metadata["Author"] + author = metadata.get("Author") + email = metadata["Author-email"] + if not author and email.endswith(">"): + # author is contained in Author-email + author, email = email.rsplit("<", 1) + author = author.strip().strip('"') + email = email[:-1] email = metadata["Author-email"] license = metadata["License"] title, summary = summary.split(" - ", 1) diff --git a/src/mass/adapters/outbound/utils.py b/src/mass/adapters/outbound/utils.py index 3bede27..3f86d26 100644 --- a/src/mass/adapters/outbound/utils.py +++ b/src/mass/adapters/outbound/utils.py @@ -30,6 +30,11 @@ } +def name_from_key(key: str) -> str: + """Auto generate a suitable name from a key""" + return key.title().replace("_", " ") + + def pipeline_match_text_search(*, query: str) -> JsonObject: """Build text search segment of aggregation pipeline""" text_search = {"$text": {"$search": query}} @@ -45,18 +50,37 @@ def args_for_getfield(*, root_object_name: str, field_name: str) -> tuple[str, s specified_field = pieces[-1] prefix += "." + ".".join(pieces[:-1]) - return (prefix, specified_field) + return prefix, specified_field def pipeline_match_filters_stage(*, filters: list[models.Filter]) -> JsonObject: """Build segment of pipeline to apply search filters""" - segment: dict[str, dict[str, list[str]]] = defaultdict(lambda: {"$in": []}) + filter_values = defaultdict(list) for item in filters: - filter_key = "content." + str(item.key) - filter_value = item.value - segment[filter_key]["$in"].append(filter_value) - - return {"$match": segment} + filter_values[item.key].append(item.value) + segment = [] + for key, values in filter_values.items(): + if key != "id_": + key = "content." + key + segment.append( + { + "$or": [ + { + "$and": [ + {key: {"$not": {"$type": "array"}}}, + {key: {"$in": values}}, + ] + }, + { + "$and": [ + {key: {"$type": "array"}}, + {key: {"$elemMatch": {"$in": values}}}, + ] + }, + ] + } + ) + return {"$match": {"$and": segment}} def pipeline_facet_sort_and_paginate( @@ -74,8 +98,16 @@ def pipeline_facet_sort_and_paginate( prefix, specified_field = args_for_getfield( root_object_name="content", field_name=facet.key ) - - segment[facet.name] = [ + name = facet.name + if not name: + name = name_from_key(facet.key) + segment[name] = [ + { + "$unwind": { + "path": f"{prefix}.{specified_field}", + "preserveNullAndEmptyArrays": True, + } + }, { "$group": { "_id": {"$getField": {"field": specified_field, "input": prefix}}, @@ -116,8 +148,16 @@ def pipeline_project(*, facet_fields: list[models.FieldLabel]) -> JsonObject: # add a segment for each facet to summarize the options for facet in facet_fields: + key = facet.key + name = facet.name + if not name: + name = name_from_key(key) segment["facets"].append( - {"key": facet.key, "name": facet.name, "options": f"${facet.name}"} + { + "key": key, + "name": name, + "options": f"${name}", + } ) return {"$project": segment} diff --git a/src/mass/core/models.py b/src/mass/core/models.py index 5a64d83..333436e 100644 --- a/src/mass/core/models.py +++ b/src/mass/core/models.py @@ -55,12 +55,12 @@ class SearchableClass(BaseModel): facetable_fields: list[FieldLabel] = Field( [], description="A list of the facetable fields for the resource type" - " (leave empty to not use faceting)", + " (leave empty to not use faceting, use dotted notation for nested fields)", ) selected_fields: list[FieldLabel] = Field( [], description="A list of the returned fields for the resource type" - " (leave empty to return all)", + " (leave empty to return all, use dotted notation for nested fields)", ) diff --git a/tests/fixtures/test_config.yaml b/tests/fixtures/test_config.yaml index ab8fc53..1f1a8ee 100644 --- a/tests/fixtures/test_config.yaml +++ b/tests/fixtures/test_config.yaml @@ -17,21 +17,21 @@ db_connection_str: mongodb://localhost:27017 db_name: metadata-store searchable_classes: - DatasetEmbedded: + NestedData: description: Dataset with embedded references. facetable_fields: - key: category name: Category - - key: field1 + - key: city name: Field 1 - - key: "has_object.type" + - key: "object.type" name: Object Type selected_fields: - key: id_ name: ID - key: type name: Location Type - - key: "has_object.type" + - key: "object.type" name: Object Type EmptyCollection: description: An empty collection to test the index creation. @@ -53,6 +53,14 @@ searchable_classes: - key: data name: Data selected_fields: [] + FilteringTests: + description: Data for testing filtering on using single and multi-valued fields. + facetable_fields: + - key: species + - key: eats + name: Food + selected_fields: + - key: name resource_change_event_topic: searchable_resources resource_deletion_event_type: searchable_resource_deleted resource_upsertion_event_type: searchable_resource_upserted diff --git a/tests/fixtures/test_data/FilteringTests.json b/tests/fixtures/test_data/FilteringTests.json new file mode 100644 index 0000000..209f6c0 --- /dev/null +++ b/tests/fixtures/test_data/FilteringTests.json @@ -0,0 +1,51 @@ +{ + "items": [ + { + "eats": [ + "bananas" + ], + "id_": "1", + "name": "Jack", + "species": "monkey" + }, + { + "eats": [ + "dog food", + "treats" + ], + "id_": "2", + "name": "Bruiser", + "species": "dog" + }, + { + "eats": [ + "spaghetti", + "meatballs" + ], + "id_": "3", + "name": "Lady", + "species": "dog" + }, + { + "eats": [ + "fish", + "lasagna", + "meatballs", + "spaghetti", + "treats" + ], + "id_": "4", + "name": "Garfield", + "species": "cat" + }, + { + "eats": [ + "fish", + "shrimp" + ], + "id_": "5", + "name": "Flipper", + "species": "dolphin" + } + ] +} diff --git a/tests/fixtures/test_data/DatasetEmbedded.json b/tests/fixtures/test_data/NestedData.json similarity index 76% rename from tests/fixtures/test_data/DatasetEmbedded.json rename to tests/fixtures/test_data/NestedData.json index 09a31a6..78ad1cf 100644 --- a/tests/fixtures/test_data/DatasetEmbedded.json +++ b/tests/fixtures/test_data/NestedData.json @@ -2,12 +2,13 @@ "items": [ { "category": "hotel", - "field1": "Miami", - "has_object": { + "city": "Miami", + "id_": "1HotelAlpha-id", + "object": { "id_": "HotelAlphaObject", "type": "piano" }, - "has_rooms": [ + "rooms": [ { "id_": "HotelAlphaLarge", "type": "large room" @@ -17,31 +18,30 @@ "type": "poolside room" } ], - "id_": "1HotelAlpha-id", "type": "resort" }, { "category": "hotel", - "field1": "Denver", - "has_object": { + "city": "Denver", + "id_": "2HotelBeta-id", + "object": { "id_": "HotelBetaObject", "type": "kitchen" }, - "id_": "2HotelBeta-id", "type": "luxury" }, { - "category": "zoo", - "field1": "Amsterdam", - "has_animal": { + "animal": { "id_": "ZooAnimal", "type": "giraffe" }, - "has_object": { + "category": "zoo", + "city": "Amsterdam", + "id_": "3zoo-id", + "object": { "id_": "zoo-object", "type": "concessions stand" - }, - "id_": "3zoo-id" + } } ] } diff --git a/tests/test_api.py b/tests/test_api.py index 98c532f..16525a8 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -28,6 +28,8 @@ pytestmark = pytest.mark.asyncio() +CLASS_NAME = "NestedData" + def compare( *, @@ -45,7 +47,7 @@ def compare( assert results.facets == facets else: config = get_config() - dataset_embedded_class = config.searchable_classes["DatasetEmbedded"] + dataset_embedded_class = config.searchable_classes[CLASS_NAME] assert dataset_embedded_class is not None configured_facets = dataset_embedded_class.facetable_fields assert len(results.facets) == len(configured_facets) @@ -80,16 +82,16 @@ async def test_malformed_document( resource = models.Resource( id_="added-resource", content={ - "has_object": {"type": "added-resource-object", "id": "98u44-f4jo4"}, - "field1": 42, # expected to be a string + "object": {"type": "added-resource-object", "id": "98u44-f4jo4"}, + "city": 42, # expected to be a string "category": "test object", }, ) await joint_fixture.query_handler.load_resource( - resource=resource, class_name="DatasetEmbedded" + resource=resource, class_name=CLASS_NAME ) - params: QueryParams = {"class_name": "DatasetEmbedded"} + params: QueryParams = {"class_name": CLASS_NAME} with caplog.at_level(logging.WARNING): with pytest.raises( @@ -108,7 +110,7 @@ async def test_malformed_document( async def test_search(joint_fixture: JointFixture): """Basic query to pull back all documents for class name""" - params: QueryParams = {"class_name": "DatasetEmbedded"} + params: QueryParams = {"class_name": CLASS_NAME} results = await joint_fixture.call_search_endpoint(params) compare(results=results, count=3, hit_length=3) @@ -116,14 +118,14 @@ async def test_search(joint_fixture: JointFixture): async def test_search_with_limit(joint_fixture: JointFixture): """Make sure we get a count of 3 but only 1 hit""" - params: QueryParams = {"class_name": "DatasetEmbedded", "limit": 1} + params: QueryParams = {"class_name": CLASS_NAME, "limit": 1} results = await joint_fixture.call_search_endpoint(params) hit = { "id_": "1HotelAlpha-id", "content": { "type": "resort", - "has_object": {"type": "piano"}, + "object": {"type": "piano"}, }, } hits = [models.Resource(**hit)] # type: ignore[arg-type] @@ -132,7 +134,7 @@ async def test_search_with_limit(joint_fixture: JointFixture): async def test_search_keywords(joint_fixture: JointFixture): """Make sure the query string is passed through intact""" - params: QueryParams = {"class_name": "DatasetEmbedded", "query": "hotel"} + params: QueryParams = {"class_name": CLASS_NAME, "query": "hotel"} results = await joint_fixture.call_search_endpoint(params) compare(results=results, count=2, hit_length=2) @@ -141,8 +143,8 @@ async def test_search_keywords(joint_fixture: JointFixture): async def test_search_filters(joint_fixture: JointFixture): """Make sure filters work""" params: QueryParams = { - "class_name": "DatasetEmbedded", - "filter_by": ["has_object.type"], + "class_name": CLASS_NAME, + "filter_by": ["object.type"], "value": ["piano"], } @@ -162,7 +164,7 @@ async def test_auto_recreation_of_indexes( joint_fixture: JointFixture, caplog: pytest.LogCaptureFixture ): """Make sure the indexes are recreated on the fly when they were deleted""" - params: QueryParams = {"class_name": "DatasetEmbedded", "query": "hotel"} + params: QueryParams = {"class_name": CLASS_NAME, "query": "hotel"} # should not give a warning when indexes are present with caplog.at_level(logging.WARNING): diff --git a/tests/test_consumer.py b/tests/test_consumer.py index 8be1c21..2ebed8c 100644 --- a/tests/test_consumer.py +++ b/tests/test_consumer.py @@ -23,6 +23,8 @@ pytestmark = pytest.mark.asyncio() +CLASS_NAME = "NestedData" + @pytest.mark.parametrize( "resource_id,is_insert", @@ -37,14 +39,14 @@ async def test_resource_upsert( """Try upserting with no pre-existing resource with matching ID (i.e. insert)""" # get all the documents in the collection results_all = await joint_fixture.query_handler.handle_query( - class_name="DatasetEmbedded", query="", filters=[] + class_name=CLASS_NAME, query="", filters=[] ) assert results_all.count > 0 # define content of resource content: dict = { - "has_object": {"type": "added-resource-object", "id": "98u44-f4jo4"}, - "field1": "something", + "object": {"type": "added-resource-object", "id": "98u44-f4jo4"}, + "city": "something", "category": "test object", } @@ -54,7 +56,7 @@ async def test_resource_upsert( # put together event payload payload = event_schemas.SearchableResource( accession=resource_id, - class_name="DatasetEmbedded", + class_name=CLASS_NAME, content=content, ).model_dump() @@ -71,7 +73,7 @@ async def test_resource_upsert( # verify that the resource was added updated_resources = await joint_fixture.query_handler.handle_query( - class_name="DatasetEmbedded", query="", filters=[] + class_name=CLASS_NAME, query="", filters=[] ) if is_insert: assert updated_resources.count - results_all.count == 1 @@ -80,9 +82,9 @@ async def test_resource_upsert( # remove unselected fields content = resource.content # type: ignore - del content["field1"] + del content["city"] del content["category"] - del content["has_object"]["id"] + del content["object"]["id"] assert resource in updated_resources.hits assert resource not in results_all.hits @@ -92,7 +94,7 @@ async def test_resource_delete(joint_fixture: JointFixture): """Test resource deletion via event consumption""" # get all the documents in the collection targeted_initial_results = await joint_fixture.query_handler.handle_query( - class_name="DatasetEmbedded", + class_name=CLASS_NAME, query='"1HotelAlpha-id"', filters=[], ) @@ -100,7 +102,7 @@ async def test_resource_delete(joint_fixture: JointFixture): assert targeted_initial_results.hits[0].id_ == "1HotelAlpha-id" resource_info = event_schemas.SearchableResourceInfo( - accession="1HotelAlpha-id", class_name="DatasetEmbedded" + accession="1HotelAlpha-id", class_name=CLASS_NAME ) await joint_fixture.kafka.publish_event( @@ -115,7 +117,7 @@ async def test_resource_delete(joint_fixture: JointFixture): # get all the documents in the collection results_post_delete = await joint_fixture.query_handler.handle_query( - class_name="DatasetEmbedded", query='"1HotelAlpha-id"', filters=[] + class_name=CLASS_NAME, query='"1HotelAlpha-id"', filters=[] ) assert results_post_delete.count == 0 diff --git a/tests/test_filtering.py b/tests/test_filtering.py new file mode 100644 index 0000000..bc2791b --- /dev/null +++ b/tests/test_filtering.py @@ -0,0 +1,159 @@ +# Copyright 2021 - 2024 Universität Tübingen, DKFZ, EMBL, and Universität zu Köln +# for the German Human Genome-Phenome Archive (GHGA) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Tests concerning the filtering functionality""" + +import pytest + +from tests.fixtures.joint import JointFixture, QueryParams + +CLASS_NAME = "FilteringTests" + + +@pytest.mark.asyncio +async def test_facets(joint_fixture: JointFixture): + """Test that the facets are returned properly""" + params: QueryParams = {"class_name": CLASS_NAME} + + results = await joint_fixture.call_search_endpoint(params) + + facets = results.facets + assert len(facets) == 2 + + facet = facets[0] + assert facet.key == "species" + assert facet.name == "Species" + options = {option.value: option.count for option in facet.options} + assert options == {"cat": 1, "dog": 2, "dolphin": 1, "monkey": 1} + + facet = facets[1] + assert facet.key == "eats" + assert facet.name == "Food" + options = {option.value: option.count for option in facet.options} + assert options == { + "bananas": 1, + "dog food": 1, + "fish": 2, + "lasagna": 1, + "meatballs": 2, + "shrimp": 1, + "spaghetti": 2, + "treats": 2, + } + + +@pytest.mark.parametrize( + "species,names", + [("mouse", []), ("cat", ["Garfield"]), ("dog", ["Bruiser", "Lady"])], + ids=[0, 1, 2], +) +@pytest.mark.asyncio +async def test_single_valued_with_with_single_filter( + species: str, names: list[str], joint_fixture: JointFixture +): + """Test that we can filter a single-valued field using a single value""" + params: QueryParams = { + "class_name": CLASS_NAME, + "filter_by": "species", + "value": species, + } + + results = await joint_fixture.call_search_endpoint(params) + + # Check that the expected names are returned + returned_names = [resource.content["name"] for resource in results.hits] + assert returned_names == names + + # Check that the facet only contains the filtered values + facets = results.facets + assert len(facets) == 2 + facet = facets[0] + assert facet.key == "species" + assert facet.name == "Species" + options = facet.options + if names: + assert len(options) == 1 + option = options[0] + assert option.count == len(names) + assert option.value == species + else: + assert not options + + +@pytest.mark.parametrize( + "food,names", + [("broccoli", []), ("bananas", ["Jack"]), ("fish", ["Garfield", "Flipper"])], + ids=[0, 1, 2], +) +@pytest.mark.asyncio +async def test_multi_valued_with_with_single_filter( + food: str, names: list[str], joint_fixture: JointFixture +): + """Test that we can filter a multi-valued field using a single value""" + params: QueryParams = { + "class_name": CLASS_NAME, + "filter_by": "eats", + "value": food, + } + + results = await joint_fixture.call_search_endpoint(params) + + # Check that the expected names are returned + returned_names = [resource.content["name"] for resource in results.hits] + assert returned_names == names + + # Check that the facet only contains the filtered values + facets = results.facets + assert len(facets) == 2 + facet = facets[1] + assert facet.key == "eats" + assert facet.name == "Food" + options = facet.options + if names: + values = {option.value: option.count for option in options} + if food == "fish": + # should get everything that Garfield or Flipper eat + assert values == { + "fish": 2, + "lasagna": 1, + "meatballs": 1, + "shrimp": 1, + "spaghetti": 1, + "treats": 1, + } + else: + assert values == {food: 1} + else: + assert not options + + +@pytest.mark.asyncio +async def test_multiple_filters(joint_fixture: JointFixture): + """Test the combination of multiple filters. + + Check that we use AND for different fields, but OR for the same fields. + """ + # Query cats, dogs or monkeys that eat fish or bananas + params: QueryParams = { + "class_name": CLASS_NAME, + "filter_by": ["species", "species", "species", "eats", "eats"], + "value": ["cat", "dog", "monkey", "fish", "bananas"], + } + + results = await joint_fixture.call_search_endpoint(params) + + # Only Jack and Garfield fulfill these conditions + returned_names = [resource.content["name"] for resource in results.hits] + assert returned_names == ["Jack", "Garfield"] diff --git a/tests/test_logging.py b/tests/test_logging.py index 57845d7..a6031b1 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -39,7 +39,7 @@ accession="1HotelAlpha-id", class_name=BAD_CLASS_NAME, content={} ) BAD_ACCESSION_DELETE = SearchableResourceInfo( - accession=BAD_ACCESSION, class_name="DatasetEmbedded" + accession=BAD_ACCESSION, class_name="NestedData" ) UPSERT_EVENT = "upsert" DELETE_EVENT = "delete" @@ -111,7 +111,7 @@ async def test_event_sub_logging( """ # get all the documents in the collection all_results = await joint_fixture.query_handler.handle_query( - class_name="DatasetEmbedded", + class_name="NestedData", query="", filters=[], ) diff --git a/tests/test_resources.py b/tests/test_resources.py index eb44b80..ad83234 100644 --- a/tests/test_resources.py +++ b/tests/test_resources.py @@ -24,11 +24,14 @@ pytestmark = pytest.mark.asyncio() +CLASS_NAME = "NestedData" + + async def test_basic_query(joint_fixture: JointFixture): """Make sure we can pull back the documents as expected""" # pull back all 3 test documents results = await joint_fixture.query_handler.handle_query( - class_name="DatasetEmbedded", query="", filters=[] + class_name=CLASS_NAME, query="", filters=[] ) assert results.count == 3 @@ -37,7 +40,7 @@ async def test_basic_query(joint_fixture: JointFixture): async def test_text_search(joint_fixture: JointFixture): """Test basic text search""" results_text = await joint_fixture.query_handler.handle_query( - class_name="DatasetEmbedded", query="poolside", filters=[] + class_name=CLASS_NAME, query="poolside", filters=[] ) assert results_text.count == 1 @@ -47,20 +50,20 @@ async def test_text_search(joint_fixture: JointFixture): async def test_filters_work(joint_fixture: JointFixture): """Test a query with filters selected but no query string""" results_filtered = await joint_fixture.query_handler.handle_query( - class_name="DatasetEmbedded", + class_name=CLASS_NAME, query="", - filters=[models.Filter(key="field1", value="Amsterdam")], + filters=[models.Filter(key="city", value="Amsterdam")], ) assert results_filtered.count == 1 assert results_filtered.hits[0].id_ == "3zoo-id" results_multi_filter = await joint_fixture.query_handler.handle_query( - class_name="DatasetEmbedded", + class_name=CLASS_NAME, query="", filters=[ models.Filter(key="category", value="hotel"), - models.Filter(key="has_object.type", value="piano"), + models.Filter(key="object.type", value="piano"), ], ) @@ -71,14 +74,14 @@ async def test_filters_work(joint_fixture: JointFixture): async def test_facets_returned(joint_fixture: JointFixture): """Verify that facet fields are returned correctly""" results_faceted = await joint_fixture.query_handler.handle_query( - class_name="DatasetEmbedded", + class_name=CLASS_NAME, query="", filters=[models.Filter(key="category", value="hotel")], ) config = get_config() facets: list[models.FieldLabel] = config.searchable_classes[ - "DatasetEmbedded" + "NestedData" ].facetable_fields facet_key_to_name = {x.key: x.name for x in facets} @@ -89,7 +92,7 @@ async def test_facets_returned(joint_fixture: JointFixture): hotel_options = [x for x in facet.options if x.value == "hotel"] assert len(hotel_options) == 1 assert hotel_options[0].count == 2 - elif facet.key == "field1": + elif facet.key == "city": miami_options = [x for x in facet.options if x.value == "Miami"] assert len(miami_options) == 1 assert miami_options[0].count == 1 @@ -112,7 +115,7 @@ async def test_facets_returned(joint_fixture: JointFixture): async def test_limit_parameter(joint_fixture: JointFixture): """Test that the limit parameter works""" results_limited = await joint_fixture.query_handler.handle_query( - class_name="DatasetEmbedded", query="", filters=[], limit=2 + class_name=CLASS_NAME, query="", filters=[], limit=2 ) assert len(results_limited.hits) == 2 @@ -120,7 +123,7 @@ async def test_limit_parameter(joint_fixture: JointFixture): async def test_skip_parameter(joint_fixture: JointFixture): """Test that the skip parameter works""" results_skip = await joint_fixture.query_handler.handle_query( - class_name="DatasetEmbedded", query="", filters=[], skip=1 + class_name=CLASS_NAME, query="", filters=[], skip=1 ) assert len(results_skip.hits) == 2 assert [x.id_ for x in results_skip.hits] == ["2HotelBeta-id", "3zoo-id"] @@ -129,7 +132,7 @@ async def test_skip_parameter(joint_fixture: JointFixture): async def test_all_parameters(joint_fixture: JointFixture): """Sanity check - make sure it all works together""" results_all = await joint_fixture.query_handler.handle_query( - class_name="DatasetEmbedded", + class_name=CLASS_NAME, query="hotel", filters=[models.Filter(key="category", value="hotel")], skip=1, @@ -144,12 +147,12 @@ async def test_resource_load(joint_fixture: JointFixture): """Test the load function in the query handler""" # get all the documents in the collection results_all = await joint_fixture.query_handler.handle_query( - class_name="DatasetEmbedded", query="", filters=[] + class_name=CLASS_NAME, query="", filters=[] ) content: dict = { - "has_object": {"type": "added-resource-object", "id": "98u44-f4jo4"}, - "field1": "something", + "object": {"type": "added-resource-object", "id": "98u44-f4jo4"}, + "city": "something", "category": "test object", } @@ -157,17 +160,17 @@ async def test_resource_load(joint_fixture: JointFixture): resource = models.Resource(id_="added-resource", content=content) await joint_fixture.query_handler.load_resource( - resource=resource, class_name="DatasetEmbedded" + resource=resource, class_name=CLASS_NAME ) # make sure the new resource is added to the collection results_after_load = await joint_fixture.query_handler.handle_query( - class_name="DatasetEmbedded", query="", filters=[] + class_name=CLASS_NAME, query="", filters=[] ) assert results_after_load.count - results_all.count == 1 target_search = await joint_fixture.query_handler.handle_query( - class_name="DatasetEmbedded", + class_name=CLASS_NAME, query="added-resource", filters=[], skip=0, @@ -179,9 +182,9 @@ async def test_resource_load(joint_fixture: JointFixture): # remove unselected fields content = resource.content # type: ignore - del content["field1"] + del content["city"] del content["category"] - del content["has_object"]["id"] + del content["object"]["id"] assert validated_resource.content == content @@ -192,8 +195,8 @@ async def test_loading_non_configured_resource(joint_fixture: JointFixture): resource = models.Resource( id_="added-resource", content={ - "has_object": {"type": "added-resource-object", "id": "98u44-f4jo4"}, - "field1": "something", + "object": {"type": "added-resource-object", "id": "98u44-f4jo4"}, + "city": "something", "category": "test object", }, ) @@ -210,19 +213,19 @@ async def test_error_from_malformed_resource(joint_fixture: JointFixture): resource = models.Resource( id_="added-resource", content={ - "has_object": {"type": "added-resource-object", "id": "98u44-f4jo4"}, - "field3": "something", # expects field1 to exist + "object": {"type": "added-resource-object", "id": "98u44-f4jo4"}, + "field3": "something", # expects city to exist "category": "test object", }, ) await joint_fixture.query_handler.load_resource( - resource=resource, class_name="DatasetEmbedded" + resource=resource, class_name=CLASS_NAME ) with pytest.raises(joint_fixture.query_handler.ValidationError): await joint_fixture.query_handler.handle_query( - class_name="DatasetEmbedded", query="", filters=[] + class_name=CLASS_NAME, query="", filters=[] ) @@ -240,17 +243,17 @@ async def test_resource_deletion(joint_fixture: JointFixture): Verify that the targeted resource is deleted and nothing else. """ all_resources = await joint_fixture.query_handler.handle_query( - class_name="DatasetEmbedded", query="", filters=[] + class_name=CLASS_NAME, query="", filters=[] ) assert all_resources.count > 1 await joint_fixture.query_handler.delete_resource( - resource_id="1HotelAlpha-id", class_name="DatasetEmbedded" + resource_id="1HotelAlpha-id", class_name=CLASS_NAME ) # see if deletion occurred, and make sure only one item was deleted results_after_deletion = await joint_fixture.query_handler.handle_query( - class_name="DatasetEmbedded", query="", filters=[] + class_name=CLASS_NAME, query="", filters=[] ) assert all_resources.count - results_after_deletion.count == 1 @@ -262,7 +265,7 @@ async def test_resource_deletion(joint_fixture: JointFixture): async def test_resource_deletion_failure(joint_fixture: JointFixture): """Test for correct error when failing to delete a resource""" all_resources = await joint_fixture.query_handler.handle_query( - class_name="DatasetEmbedded", query="", filters=[] + class_name=CLASS_NAME, query="", filters=[] ) assert all_resources.count > 0 @@ -270,12 +273,12 @@ async def test_resource_deletion_failure(joint_fixture: JointFixture): # try to delete a resource that doesn't exist with pytest.raises(joint_fixture.query_handler.ResourceNotFoundError): await joint_fixture.query_handler.delete_resource( - resource_id="not-here", class_name="DatasetEmbedded" + resource_id="not-here", class_name=CLASS_NAME ) # verify that nothing was actually deleted all_resources_again = await joint_fixture.query_handler.handle_query( - class_name="DatasetEmbedded", query="", filters=[] + class_name=CLASS_NAME, query="", filters=[] ) assert all_resources_again.count == all_resources.count diff --git a/tests/test_sorting.py b/tests/test_sorting.py index 97c4970..e3977ab 100644 --- a/tests/test_sorting.py +++ b/tests/test_sorting.py @@ -243,13 +243,13 @@ async def test_sort_with_superfluous_sort(joint_fixture: JointFixture): @pytest.mark.parametrize("reverse", [False, True], ids=["normal", "reversed"]) -@pytest.mark.parametrize("field", ["type", "has_object.type"]) +@pytest.mark.parametrize("field", ["type", "object.type"]) @pytest.mark.asyncio async def test_sort_with_one_of_the_selected_fields( joint_fixture: JointFixture, reverse: bool, field: str ): """Test sorting when fields are selected and one of them is used for sorting.""" - class_name = "DatasetEmbedded" + class_name = "NestedData" selected = joint_fixture.config.searchable_classes[class_name].selected_fields assert selected # this resource has selected fields assert any(f.key == field for f in selected) # field is selected @@ -267,13 +267,13 @@ async def test_sort_with_one_of_the_selected_fields( @pytest.mark.parametrize("reverse", [False, True], ids=["normal", "reversed"]) -@pytest.mark.parametrize("field", ["category", "field1"]) +@pytest.mark.parametrize("field", ["category", "city"]) @pytest.mark.asyncio async def test_sort_with_one_of_the_unselected_fields( joint_fixture: JointFixture, reverse: bool, field: str ): """Test sorting when fields are selected but sorted by an unselected field.""" - class_name = "DatasetEmbedded" + class_name = "NestedData" selected = joint_fixture.config.searchable_classes[class_name].selected_fields assert selected # this resource has selected fields assert not any(f.key == field for f in selected) # field is unselected