From 2a31733fe97e27192701c23e5f8c813c034e423f Mon Sep 17 00:00:00 2001 From: Michael Benowitz Date: Thu, 24 Jun 2021 14:43:37 -0400 Subject: [PATCH 1/6] NOREF Improve default sorting In order to provide a default sorting order for all searches the API is assigning a default UUID sort. However this overrides the ElasticSearch scoring sorting which is the desired behavior. This adds an explicit `_score` sorting to ensure that patrons see the most relevant search results. --- api/elastic.py | 12 +++++++++--- tests/unit/test_api_es.py | 6 +++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/api/elastic.py b/api/elastic.py index f925155ccd..ac3697700d 100644 --- a/api/elastic.py +++ b/api/elastic.py @@ -99,6 +99,8 @@ def executeSearchQuery(self, params, page, perPage): else: res = self.query[startPos:endPos].execute() + for r in res.hits: + print(r.meta.score, r.title) if not searchFromStr: try: @@ -139,9 +141,10 @@ def executeDeepQuery(self, startPos, perPage): iteration += 1 def setPageResultCache(self, cacheKey, sort): + print(sort) self.redis.set( '{}/queryPaging/{}'.format(self.environment, cacheKey), - '|'.join(sort), + '|'.join([str(s) for s in sort]), ex=60*60*24 ) @@ -190,7 +193,7 @@ def languageQuery(self, workTotals): def titleQuery(cls, titleText): return Q('bool', should=[ - Q('query_string', query=titleText, fields=['title', 'alt_titles'], default_operator='and'), + Q('query_string', query=titleText, fields=['title^3', 'alt_titles'], default_operator='and'), Q('nested', path='editions', query=Q('query_string', query=titleText, fields=['editions.title'], default_operator='and')) ] ) @@ -199,7 +202,7 @@ def titleQuery(cls, titleText): def authorQuery(cls, authorText): workAgentQuery = Q('bool', must=[ - Q('query_string', query=authorText, fields=['agents.name'], default_operator='and'), + Q('query_string', query=authorText, fields=['agents.name^2'], default_operator='and'), Q('terms', agents__roles=cls.ROLE_ALLOWLIST) ] ) @@ -276,6 +279,9 @@ def addSortClause(self, sortParams, reverse=False): sortValues.append(self.dateSort) + if len(sortValues) < 1: + sortValues.append({'_score': 'desc'}) + sortValues.append({'uuid': 'asc' if reverse is False else 'desc'}) self.query = self.query.sort(*sortValues) diff --git a/tests/unit/test_api_es.py b/tests/unit/test_api_es.py index a5a4b2ec75..dda1dba798 100644 --- a/tests/unit/test_api_es.py +++ b/tests/unit/test_api_es.py @@ -439,7 +439,7 @@ def test_titleQuery(self): testQuery = testQueryES.to_dict() assert testQuery['bool']['should'][0]['query_string']['query'] == 'testTitle' - assert testQuery['bool']['should'][0]['query_string']['fields'] == ['title', 'alt_titles'] + assert testQuery['bool']['should'][0]['query_string']['fields'] == ['title^3', 'alt_titles'] assert testQuery['bool']['should'][1]['nested']['path'] == 'editions' assert testQuery['bool']['should'][1]['nested']['query']['query_string']['query'] == 'testTitle' assert testQuery['bool']['should'][1]['nested']['query']['query_string']['fields'] == ['editions.title'] @@ -451,7 +451,7 @@ def test_authorQuery(self): assert testQuery['bool']['should'][0]['nested']['path'] == 'agents' assert testQuery['bool']['should'][0]['nested']['query']['bool']['must'][0]['query_string']['query'] == 'testAuthor' - assert testQuery['bool']['should'][0]['nested']['query']['bool']['must'][0]['query_string']['fields'] == ['agents.name'] + assert testQuery['bool']['should'][0]['nested']['query']['bool']['must'][0]['query_string']['fields'] == ['agents.name^2'] assert testQuery['bool']['should'][0]['nested']['query']['bool']['must'][1]['terms']['agents.roles'] == ElasticClient.ROLE_ALLOWLIST assert testQuery['bool']['should'][1]['nested']['path'] == 'editions.agents' assert testQuery['bool']['should'][1]['nested']['query']['bool']['must'][0]['query_string']['query'] == 'testAuthor' @@ -586,7 +586,7 @@ def test_addSortClause_root_sort_only(self, testInstance, mocker): testInstance.addSortClause([]) assert testInstance.query == 'sortQuery' - mockQuery.sort.assert_called_once_with({'uuid': 'asc'}) + mockQuery.sort.assert_called_once_with({'_score': 'desc'}, {'uuid': 'asc'}) def test_addSortClause_reverse_true(self, testInstance, mocker): mockQuery = mocker.MagicMock() From fe6242b57770aea3cfe4372c11fc89450d7750ef Mon Sep 17 00:00:00 2001 From: Michael Benowitz Date: Thu, 24 Jun 2021 14:45:31 -0400 Subject: [PATCH 2/6] NOREF Update CHANGELOG --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a642d2574c..130d92a5d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # CHANGELOG +## unreleased -- v0.6.6 +### Fixed +- Default sorting options + ## 2021-06-24 -- v0.6.5 ### Fixed - Handling of empty result sets from ElasticSearch From 0e3ad0257348cff1e7410adebb5f6709aa05fad9 Mon Sep 17 00:00:00 2001 From: Michael Benowitz Date: Tue, 29 Jun 2021 11:11:47 -0400 Subject: [PATCH 3/6] NOREF Update Utils endpoint with Redis client When the redis client was added to the API process it needed to be passed wherever the ElasticSearch client is instantiated. Because it does not currently interact with the `utils/languages` endpoint it was omitted from this method, which caused an error. This adds it as well as updates the relevant unit test to verify this. --- CHANGELOG.md | 1 + api/blueprints/drbUtils.py | 4 ++-- tests/unit/test_api_utils_blueprint.py | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 130d92a5d0..093322f689 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## unreleased -- v0.6.6 ### Fixed - Default sorting options +- Added Redis client to `utils/languages` endpoint to resolve error ## 2021-06-24 -- v0.6.5 ### Fixed diff --git a/api/blueprints/drbUtils.py b/api/blueprints/drbUtils.py index 594b5a31df..7904f9a594 100644 --- a/api/blueprints/drbUtils.py +++ b/api/blueprints/drbUtils.py @@ -1,4 +1,4 @@ -from flask import Blueprint, jsonify, current_app, request +from flask import Blueprint, current_app, request from ..db import DBClient from ..elastic import ElasticClient @@ -12,7 +12,7 @@ @utils.route('/languages', methods=['GET']) def languageCounts(): - esClient = ElasticClient() + esClient = ElasticClient(current_app.config['REDIS_CLIENT']) reqParams = APIUtils.normalizeQueryParams(request.args) workCounts = reqParams.get('totals', ['true'])[0].lower() != 'false' diff --git a/tests/unit/test_api_utils_blueprint.py b/tests/unit/test_api_utils_blueprint.py index 9edd6d4e81..ca2f92d935 100644 --- a/tests/unit/test_api_utils_blueprint.py +++ b/tests/unit/test_api_utils_blueprint.py @@ -19,6 +19,7 @@ def mockUtils(self, mocker): def testApp(self): flaskApp = Flask('test') flaskApp.config['DB_CLIENT'] = 'testDBClient' + flaskApp.config['REDIS_CLIENT'] = 'testRedisClient' return flaskApp @@ -38,7 +39,7 @@ def test_languageCounts(self, mockUtils, testApp, mocker): testAPIResponse = languageCounts() assert testAPIResponse == 'languageListResponse' - mockESClient.assert_called_once() + mockESClient.assert_called_once_with('testRedisClient') mockUtils['normalizeQueryParams'].assert_called_once() mockES.languageQuery.assert_called_once_with(True) From 959e421da5c8b3ccdb4328bb8e828060962caea1 Mon Sep 17 00:00:00 2001 From: Michael Benowitz Date: Tue, 29 Jun 2021 11:16:21 -0400 Subject: [PATCH 4/6] NOREF Remove Testing print statements --- api/elastic.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/api/elastic.py b/api/elastic.py index ac3697700d..3a14b8713a 100644 --- a/api/elastic.py +++ b/api/elastic.py @@ -99,9 +99,6 @@ def executeSearchQuery(self, params, page, perPage): else: res = self.query[startPos:endPos].execute() - for r in res.hits: - print(r.meta.score, r.title) - if not searchFromStr: try: lastSort = list(res.hits[-1].meta.sort) @@ -141,7 +138,6 @@ def executeDeepQuery(self, startPos, perPage): iteration += 1 def setPageResultCache(self, cacheKey, sort): - print(sort) self.redis.set( '{}/queryPaging/{}'.format(self.environment, cacheKey), '|'.join([str(s) for s in sort]), From 486f3b4fca3eda45fc74dfceebdb3446402f9986 Mon Sep 17 00:00:00 2001 From: Michael Benowitz Date: Tue, 29 Jun 2021 12:52:54 -0400 Subject: [PATCH 5/6] NOREF Add new ES endpoint --- task-definition.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/task-definition.json b/task-definition.json index 4adf4dc164..8fd436e208 100644 --- a/task-definition.json +++ b/task-definition.json @@ -26,7 +26,7 @@ }, { "name": "ELASTICSEARCH_HOST", - "value": "https://vpc-sfr-search-api-production-tom4zg45o45pfomgtasgskvx4e.us-east-1.es.amazonaws.com" + "value": "https://vpc-drb-search-production-5ptfzwisshcadbbaph35dqilva.us-east-1.es.amazonaws.com" }, { "name": "ELASTICSEARCH_INDEX", From 616510eb19a527006ebbf207f61c5def8c18321b Mon Sep 17 00:00:00 2001 From: Michael Benowitz Date: Wed, 30 Jun 2021 10:29:55 -0400 Subject: [PATCH 6/6] NOREF Escape forward slash in queries There was a single character not being escaped that is a Lucene special character - forward slash. This caused a very small subset of queries to fail and this adds it to the escaped character regex to prevent this from happening in the future. --- api/elastic.py | 2 +- tests/unit/test_api_es.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/api/elastic.py b/api/elastic.py index 3a14b8713a..f35ce0f9c5 100644 --- a/api/elastic.py +++ b/api/elastic.py @@ -170,7 +170,7 @@ def makeDictHashable(cls, object): @staticmethod def escapeSearchQuery(query): - return re.sub(r'[\+\-\&\|\!\(\)\[\]\{\}\^\~\?\:\\]{1}', '\\\\\g<0>', query) + return re.sub(r'[\+\-\&\|\!\(\)\[\]\{\}\^\~\?\:\\\/]{1}', '\\\\\g<0>', query) def languageQuery(self, workTotals): search = self.createSearch() diff --git a/tests/unit/test_api_es.py b/tests/unit/test_api_es.py index dda1dba798..b7e2d3ac61 100644 --- a/tests/unit/test_api_es.py +++ b/tests/unit/test_api_es.py @@ -429,7 +429,7 @@ def test_makeDictHashable(self, mocker): ) def test_escapeSearchQuery_changed(self): - assert ElasticClient.escapeSearchQuery('[test]+a:thing!') == '\[test\]\+a\:thing\!' + assert ElasticClient.escapeSearchQuery('/[test]+a:thing!') == '\/\[test\]\+a\:thing\!' def test_escapeSearchQuery_unchanged(self): assert ElasticClient.escapeSearchQuery('a simple query') == 'a simple query'