Skip to content

Commit

Permalink
SNO-211-increase-batch-size-in-scroll-api (#11)
Browse files Browse the repository at this point in the history
  • Loading branch information
keenangraham authored Sep 10, 2021
1 parent 382e934 commit 9426197
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 13 deletions.
2 changes: 2 additions & 0 deletions src/snosearch/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@

MAX_ES_RESULTS_WINDOW = 9999

DEFAULT_SCAN_SIZE = 1000

DEFAULT_FRAMES = [
EMBEDDED_FRAME,
OBJECT_FRAME,
Expand Down
3 changes: 2 additions & 1 deletion src/snosearch/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,8 @@ def _limit_generator(self, generator, limit):
yield r

def _scan(self):
results = self.results._search.scan()
size = self.query_builder._get_scan_size()
results = self.results._search.params(size=size).scan()
if not self.query_builder._limit_is_all():
results = self._limit_generator(
results,
Expand Down
16 changes: 10 additions & 6 deletions src/snosearch/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from .defaults import BASE_SEARCH_FIELDS
from .defaults import DEFAULT_COLUMNS
from .defaults import DEFAULT_FRAMES
from .defaults import DEFAULT_SCAN_SIZE
from .defaults import DEFAULT_SORT
from .defaults import DEFAULT_SORT_OPTIONS
from .defaults import INTERNAL_AUDIT_FACETS
Expand Down Expand Up @@ -413,6 +414,12 @@ def _get_from_value_as_int(self):
def _get_default_limit(self):
return [(LIMIT_KEY, 25)]

def _get_max_result_window(self):
return self.kwargs.get('max_result_window', MAX_ES_RESULTS_WINDOW)

def _get_scan_size(self):
return self.kwargs.get('scan_size', DEFAULT_SCAN_SIZE)

@assert_one_or_none_returned(error_message='Invalid to specify multiple limit parameters:')
def _get_limit(self):
return self.params_parser.get_limit() or self._get_default_limit()
Expand Down Expand Up @@ -440,7 +447,7 @@ def _limit_is_all(self):
def _limit_is_over_maximum_window(self):
limit = self._get_limit_value_as_int()
if limit:
return limit > MAX_ES_RESULTS_WINDOW
return limit > self._get_max_result_window()
return False

def _should_scan_over_results(self):
Expand Down Expand Up @@ -469,11 +476,8 @@ def _should_search_over_all_indices(self):
return any(conditions)

def _get_bounded_limit_value_or_default(self):
default_limit = self.params_parser.get_one_value(
params=self._get_default_limit()
)
if self._should_scan_over_results():
return default_limit
return 0
return self._get_limit_value_as_int()

@assert_one_or_none_returned(error_message='Invalid to specify multiple mode parameters:')
Expand Down Expand Up @@ -939,7 +943,7 @@ def add_source(self):

def add_slice(self):
'''
If limit=all or limit > MAX_ES_RESULTS_WINDOW we return
If limit=all or limit > max result window we return
default slice for the aggregations/total and scan over results
in response mixin to_graph method.
'''
Expand Down
107 changes: 101 additions & 6 deletions src/snosearch/tests/test_searches_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -1291,6 +1291,43 @@ def test_searches_queries_abstract_query_factory_get_default_limit(params_parser
assert default_limit == [('limit', 25)]


@pytest.mark.parametrize(
'params_parser',
integrations,
indirect=True
)
def test_searches_queries_abstract_query_factory_get_max_result_window(params_parser):
from snosearch.queries import AbstractQueryFactory
aq = AbstractQueryFactory(params_parser)
max_result_window = aq._get_max_result_window()
assert max_result_window == 9999
aq = AbstractQueryFactory(
params_parser,
max_result_window=99999,
)
max_result_window = aq._get_max_result_window()
assert max_result_window == 99999



@pytest.mark.parametrize(
'params_parser',
integrations,
indirect=True
)
def test_searches_queries_abstract_query_factory_get_scan_size(params_parser):
from snosearch.queries import AbstractQueryFactory
aq = AbstractQueryFactory(params_parser)
scan_size = aq._get_scan_size()
assert scan_size == 1000
aq = AbstractQueryFactory(
params_parser,
scan_size=200000,
)
scan_size = aq._get_scan_size()
assert scan_size == 200000


@pytest.mark.parametrize(
'params_parser, dummy_request',
[
Expand Down Expand Up @@ -1417,6 +1454,18 @@ def test_searches_queries_abstract_query_factory_limit_is_over_maximum_window(pa
'type=TestingSearchSchema&status=released'
'&limit=10000&field=@id&mode=picker&mode=chair&field=accession'
)
params_parser = ParamsParser(
dummy_request
)
aq = AbstractQueryFactory(
params_parser,
max_result_window=10000,
)
assert not aq._limit_is_over_maximum_window()
dummy_request.environ['QUERY_STRING'] = (
'type=TestingSearchSchema&status=released'
'&limit=100000&field=@id&mode=picker&mode=chair&field=accession'
)
params_parser = ParamsParser(dummy_request)
aq = AbstractQueryFactory(params_parser)
assert aq._limit_is_over_maximum_window()
Expand All @@ -1427,6 +1476,26 @@ def test_searches_queries_abstract_query_factory_limit_is_over_maximum_window(pa
params_parser = ParamsParser(dummy_request)
aq = AbstractQueryFactory(params_parser)
assert not aq._limit_is_over_maximum_window()
dummy_request.environ['QUERY_STRING'] = (
'type=TestingSearchSchema&status=released'
'&limit=9&field=@id&mode=picker&mode=chair&field=accession'
)
params_parser = ParamsParser(dummy_request)
aq = AbstractQueryFactory(
params_parser,
max_result_window=10,
)
assert not aq._limit_is_over_maximum_window()
dummy_request.environ['QUERY_STRING'] = (
'type=TestingSearchSchema&status=released'
'&limit=11&field=@id&mode=picker&mode=chair&field=accession'
)
params_parser = ParamsParser(dummy_request)
aq = AbstractQueryFactory(
params_parser,
max_result_window=10,
)
assert aq._limit_is_over_maximum_window()


@pytest.mark.parametrize(
Expand Down Expand Up @@ -1515,12 +1584,28 @@ def test_searches_queries_abstract_query_factory_get_bounded_limit_value_or_defa
assert limit == 10
dummy_request.environ['QUERY_STRING'] = (
'type=TestingSearchSchema&status=released'
'&limit=all&field=@id&mode=picker&mode=chair&field=accession'
'&limit=25&field=@id&mode=picker&mode=chair&field=accession'
)
params_parser = ParamsParser(dummy_request)
aq = AbstractQueryFactory(params_parser)
limit = aq._get_bounded_limit_value_or_default()
assert limit == 25
dummy_request.environ['QUERY_STRING'] = (
'type=TestingSearchSchema&status=released'
'&limit=all&field=@id&mode=picker&mode=chair&field=accession'
)
params_parser = ParamsParser(dummy_request)
aq = AbstractQueryFactory(params_parser)
limit = aq._get_bounded_limit_value_or_default()
assert limit == 0
dummy_request.environ['QUERY_STRING'] = (
'type=TestingSearchSchema&status=released'
'&limit=100000&field=@id&mode=picker&mode=chair&field=accession'
)
params_parser = ParamsParser(dummy_request)
aq = AbstractQueryFactory(params_parser)
limit = aq._get_bounded_limit_value_or_default()
assert limit == 0


@pytest.mark.parametrize(
Expand Down Expand Up @@ -4094,7 +4179,7 @@ def test_searches_queries_abstract_query_factory_add_slice(params_parser, dummy_
params_parser = ParamsParser(dummy_request)
aq = AbstractQueryFactory(params_parser)
aq.add_slice()
assert aq.search.to_dict() == {'from': 0, 'size': 25, 'query': {'match_all': {}}}
assert aq.search.to_dict() == {'from': 0, 'size': 0, 'query': {'match_all': {}}}
dummy_request.environ['QUERY_STRING'] = (
'searchTerm=chip-seq&type=TestingSearchSchema&frame=object&limit=3000'
)
Expand All @@ -4115,7 +4200,17 @@ def test_searches_queries_abstract_query_factory_add_slice(params_parser, dummy_
params_parser = ParamsParser(dummy_request)
aq = AbstractQueryFactory(params_parser)
aq.add_slice()
assert aq.search.to_dict() == {'from': 0, 'size': 25, 'query': {'match_all': {}}}
assert aq.search.to_dict() == {'from': 0, 'size': 0, 'query': {'match_all': {}}}
dummy_request.environ['QUERY_STRING'] = (
'searchTerm=chip-seq&type=TestingSearchSchema&frame=object&limit=100000'
)
params_parser = ParamsParser(dummy_request)
aq = AbstractQueryFactory(
params_parser,
max_result_window=200000,
)
aq.add_slice()
assert aq.search.to_dict() == {'from': 0, 'size': 100000, 'query': {'match_all': {}}}


@pytest.mark.parametrize(
Expand Down Expand Up @@ -4999,7 +5094,7 @@ def test_searches_queries_basic_report_query_factory_with_facets_add_slice(dummy
brqf.add_slice()
q = brqf.search.to_dict()
assert q['from'] == 25
assert q['size'] == 25
assert q['size'] == 0
dummy_request.environ['QUERY_STRING'] = (
'type=TestingSearchSchema&status=released'
'&from=25&field=@id&field=accession&mode=picker'
Expand Down Expand Up @@ -5032,14 +5127,14 @@ def test_searches_queries_basic_report_query_factory_with_facets_add_slice(dummy
assert q['size'] == 9999
dummy_request.environ['QUERY_STRING'] = (
'type=TestingSearchSchema&status=released'
'&limit=10000&field=@id&field=accession&mode=picker'
'&limit=100000&field=@id&field=accession&mode=picker'
)
params_parser = ParamsParser(dummy_request)
brqf = BasicReportQueryFactoryWithFacets(params_parser)
brqf.add_slice()
q = brqf.search.to_dict()
assert q['from'] == 0
assert q['size'] == 25
assert q['size'] == 0


@pytest.mark.parametrize(
Expand Down

0 comments on commit 9426197

Please sign in to comment.