From 9426197cb9a699990cfd17ca00b11d1fa0684317 Mon Sep 17 00:00:00 2001 From: keenangraham Date: Fri, 10 Sep 2021 16:00:36 -0700 Subject: [PATCH] SNO-211-increase-batch-size-in-scroll-api (#11) --- src/snosearch/defaults.py | 2 + src/snosearch/mixins.py | 3 +- src/snosearch/queries.py | 16 +-- src/snosearch/tests/test_searches_queries.py | 107 +++++++++++++++++-- 4 files changed, 115 insertions(+), 13 deletions(-) diff --git a/src/snosearch/defaults.py b/src/snosearch/defaults.py index 55e90dc..5b625e1 100644 --- a/src/snosearch/defaults.py +++ b/src/snosearch/defaults.py @@ -86,6 +86,8 @@ MAX_ES_RESULTS_WINDOW = 9999 +DEFAULT_SCAN_SIZE = 1000 + DEFAULT_FRAMES = [ EMBEDDED_FRAME, OBJECT_FRAME, diff --git a/src/snosearch/mixins.py b/src/snosearch/mixins.py index d6bdf02..dfbe7da 100644 --- a/src/snosearch/mixins.py +++ b/src/snosearch/mixins.py @@ -210,7 +210,8 @@ def _limit_generator(self, generator, limit): yield r def _scan(self): - results = self.results._search.scan() + size = self.query_builder._get_scan_size() + results = self.results._search.params(size=size).scan() if not self.query_builder._limit_is_all(): results = self._limit_generator( results, diff --git a/src/snosearch/queries.py b/src/snosearch/queries.py index d363594..a87ce7c 100644 --- a/src/snosearch/queries.py +++ b/src/snosearch/queries.py @@ -23,6 +23,7 @@ from .defaults import BASE_SEARCH_FIELDS from .defaults import DEFAULT_COLUMNS from .defaults import DEFAULT_FRAMES +from .defaults import DEFAULT_SCAN_SIZE from .defaults import DEFAULT_SORT from .defaults import DEFAULT_SORT_OPTIONS from .defaults import INTERNAL_AUDIT_FACETS @@ -413,6 +414,12 @@ def _get_from_value_as_int(self): def _get_default_limit(self): return [(LIMIT_KEY, 25)] + def _get_max_result_window(self): + return self.kwargs.get('max_result_window', MAX_ES_RESULTS_WINDOW) + + def _get_scan_size(self): + return self.kwargs.get('scan_size', DEFAULT_SCAN_SIZE) + @assert_one_or_none_returned(error_message='Invalid to specify multiple limit parameters:') def _get_limit(self): return self.params_parser.get_limit() or self._get_default_limit() @@ -440,7 +447,7 @@ def _limit_is_all(self): def _limit_is_over_maximum_window(self): limit = self._get_limit_value_as_int() if limit: - return limit > MAX_ES_RESULTS_WINDOW + return limit > self._get_max_result_window() return False def _should_scan_over_results(self): @@ -469,11 +476,8 @@ def _should_search_over_all_indices(self): return any(conditions) def _get_bounded_limit_value_or_default(self): - default_limit = self.params_parser.get_one_value( - params=self._get_default_limit() - ) if self._should_scan_over_results(): - return default_limit + return 0 return self._get_limit_value_as_int() @assert_one_or_none_returned(error_message='Invalid to specify multiple mode parameters:') @@ -939,7 +943,7 @@ def add_source(self): def add_slice(self): ''' - If limit=all or limit > MAX_ES_RESULTS_WINDOW we return + If limit=all or limit > max result window we return default slice for the aggregations/total and scan over results in response mixin to_graph method. ''' diff --git a/src/snosearch/tests/test_searches_queries.py b/src/snosearch/tests/test_searches_queries.py index 9aa9aa6..ca3ac1c 100644 --- a/src/snosearch/tests/test_searches_queries.py +++ b/src/snosearch/tests/test_searches_queries.py @@ -1291,6 +1291,43 @@ def test_searches_queries_abstract_query_factory_get_default_limit(params_parser assert default_limit == [('limit', 25)] +@pytest.mark.parametrize( + 'params_parser', + integrations, + indirect=True +) +def test_searches_queries_abstract_query_factory_get_max_result_window(params_parser): + from snosearch.queries import AbstractQueryFactory + aq = AbstractQueryFactory(params_parser) + max_result_window = aq._get_max_result_window() + assert max_result_window == 9999 + aq = AbstractQueryFactory( + params_parser, + max_result_window=99999, + ) + max_result_window = aq._get_max_result_window() + assert max_result_window == 99999 + + + +@pytest.mark.parametrize( + 'params_parser', + integrations, + indirect=True +) +def test_searches_queries_abstract_query_factory_get_scan_size(params_parser): + from snosearch.queries import AbstractQueryFactory + aq = AbstractQueryFactory(params_parser) + scan_size = aq._get_scan_size() + assert scan_size == 1000 + aq = AbstractQueryFactory( + params_parser, + scan_size=200000, + ) + scan_size = aq._get_scan_size() + assert scan_size == 200000 + + @pytest.mark.parametrize( 'params_parser, dummy_request', [ @@ -1417,6 +1454,18 @@ def test_searches_queries_abstract_query_factory_limit_is_over_maximum_window(pa 'type=TestingSearchSchema&status=released' '&limit=10000&field=@id&mode=picker&mode=chair&field=accession' ) + params_parser = ParamsParser( + dummy_request + ) + aq = AbstractQueryFactory( + params_parser, + max_result_window=10000, + ) + assert not aq._limit_is_over_maximum_window() + dummy_request.environ['QUERY_STRING'] = ( + 'type=TestingSearchSchema&status=released' + '&limit=100000&field=@id&mode=picker&mode=chair&field=accession' + ) params_parser = ParamsParser(dummy_request) aq = AbstractQueryFactory(params_parser) assert aq._limit_is_over_maximum_window() @@ -1427,6 +1476,26 @@ def test_searches_queries_abstract_query_factory_limit_is_over_maximum_window(pa params_parser = ParamsParser(dummy_request) aq = AbstractQueryFactory(params_parser) assert not aq._limit_is_over_maximum_window() + dummy_request.environ['QUERY_STRING'] = ( + 'type=TestingSearchSchema&status=released' + '&limit=9&field=@id&mode=picker&mode=chair&field=accession' + ) + params_parser = ParamsParser(dummy_request) + aq = AbstractQueryFactory( + params_parser, + max_result_window=10, + ) + assert not aq._limit_is_over_maximum_window() + dummy_request.environ['QUERY_STRING'] = ( + 'type=TestingSearchSchema&status=released' + '&limit=11&field=@id&mode=picker&mode=chair&field=accession' + ) + params_parser = ParamsParser(dummy_request) + aq = AbstractQueryFactory( + params_parser, + max_result_window=10, + ) + assert aq._limit_is_over_maximum_window() @pytest.mark.parametrize( @@ -1515,12 +1584,28 @@ def test_searches_queries_abstract_query_factory_get_bounded_limit_value_or_defa assert limit == 10 dummy_request.environ['QUERY_STRING'] = ( 'type=TestingSearchSchema&status=released' - '&limit=all&field=@id&mode=picker&mode=chair&field=accession' + '&limit=25&field=@id&mode=picker&mode=chair&field=accession' ) params_parser = ParamsParser(dummy_request) aq = AbstractQueryFactory(params_parser) limit = aq._get_bounded_limit_value_or_default() assert limit == 25 + dummy_request.environ['QUERY_STRING'] = ( + 'type=TestingSearchSchema&status=released' + '&limit=all&field=@id&mode=picker&mode=chair&field=accession' + ) + params_parser = ParamsParser(dummy_request) + aq = AbstractQueryFactory(params_parser) + limit = aq._get_bounded_limit_value_or_default() + assert limit == 0 + dummy_request.environ['QUERY_STRING'] = ( + 'type=TestingSearchSchema&status=released' + '&limit=100000&field=@id&mode=picker&mode=chair&field=accession' + ) + params_parser = ParamsParser(dummy_request) + aq = AbstractQueryFactory(params_parser) + limit = aq._get_bounded_limit_value_or_default() + assert limit == 0 @pytest.mark.parametrize( @@ -4094,7 +4179,7 @@ def test_searches_queries_abstract_query_factory_add_slice(params_parser, dummy_ params_parser = ParamsParser(dummy_request) aq = AbstractQueryFactory(params_parser) aq.add_slice() - assert aq.search.to_dict() == {'from': 0, 'size': 25, 'query': {'match_all': {}}} + assert aq.search.to_dict() == {'from': 0, 'size': 0, 'query': {'match_all': {}}} dummy_request.environ['QUERY_STRING'] = ( 'searchTerm=chip-seq&type=TestingSearchSchema&frame=object&limit=3000' ) @@ -4115,7 +4200,17 @@ def test_searches_queries_abstract_query_factory_add_slice(params_parser, dummy_ params_parser = ParamsParser(dummy_request) aq = AbstractQueryFactory(params_parser) aq.add_slice() - assert aq.search.to_dict() == {'from': 0, 'size': 25, 'query': {'match_all': {}}} + assert aq.search.to_dict() == {'from': 0, 'size': 0, 'query': {'match_all': {}}} + dummy_request.environ['QUERY_STRING'] = ( + 'searchTerm=chip-seq&type=TestingSearchSchema&frame=object&limit=100000' + ) + params_parser = ParamsParser(dummy_request) + aq = AbstractQueryFactory( + params_parser, + max_result_window=200000, + ) + aq.add_slice() + assert aq.search.to_dict() == {'from': 0, 'size': 100000, 'query': {'match_all': {}}} @pytest.mark.parametrize( @@ -4999,7 +5094,7 @@ def test_searches_queries_basic_report_query_factory_with_facets_add_slice(dummy brqf.add_slice() q = brqf.search.to_dict() assert q['from'] == 25 - assert q['size'] == 25 + assert q['size'] == 0 dummy_request.environ['QUERY_STRING'] = ( 'type=TestingSearchSchema&status=released' '&from=25&field=@id&field=accession&mode=picker' @@ -5032,14 +5127,14 @@ def test_searches_queries_basic_report_query_factory_with_facets_add_slice(dummy assert q['size'] == 9999 dummy_request.environ['QUERY_STRING'] = ( 'type=TestingSearchSchema&status=released' - '&limit=10000&field=@id&field=accession&mode=picker' + '&limit=100000&field=@id&field=accession&mode=picker' ) params_parser = ParamsParser(dummy_request) brqf = BasicReportQueryFactoryWithFacets(params_parser) brqf.add_slice() q = brqf.search.to_dict() assert q['from'] == 0 - assert q['size'] == 25 + assert q['size'] == 0 @pytest.mark.parametrize(