Skip to content

Commit

Permalink
feat(consortium-search): Increase limit for batch request IDs count f…
Browse files Browse the repository at this point in the history
…or searching items/holdings in consortium

* feat(consortium-search): Increase limit for batch request IDs count for searching items/holdings in consortium

Closes: MSEARCH-785
  • Loading branch information
mukhiddin-yusuf authored Jul 3, 2024
1 parent f67eed0 commit fe3b144
Show file tree
Hide file tree
Showing 18 changed files with 329 additions and 116 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
* Implement Indexing of Institutions from Kafka ([MSEARCH-771](https://issues.folio.org/browse/MSEARCH-771))
* Implement Indexing of Libraries from Kafka ([MSEARCH-769](https://issues.folio.org/browse/MSEARCH-769))
* Return Unified List of Inventory Campuses in a Consortium ([MSEARCH-773](https://issues.folio.org/browse/MSEARCH-773))
* Increase batch IDs limit for search consolidated items/holdings in consortium ([MSEARCH-785](https://folio-org.atlassian.net/browse/MSEARCH-785))
* Return Unified List of Inventory Libraries in a Consortium ([MSEARCH-772](https://issues.folio.org/browse/MSEARCH-772))
* Index, search Instance place of publication field ([MSEARCH-755](https://folio-org.atlassian.net/browse/MSEARCH-755))

Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@ and [Cross-cluster replication](https://docs.aws.amazon.com/opensearch-service/l
| MAX_BROWSE_REQUEST_OFFSET | 500 | The maximum elasticsearch query offset for additional requests on browse around |
| SYSTEM_USER_ENABLED | true | Defines if system user must be created at service tenant initialization or used for egress service requests |
| REINDEX_LOCATION_BATCH_SIZE | 1_000 | Defines number of locations to retrieve per inventory http request on locations reindex process |
| MAX_SEARCH_BATCH_REQUEST_IDS_COUNT | 20_000 | Defines maximum batch request IDs count for searching consolidated items/holdings in consortium |

The module uses system user to communicate with other modules from Kafka consumers.
For production deployments you MUST specify the password for this system user via env variable:
Expand Down
5 changes: 5 additions & 0 deletions descriptors/ModuleDescriptor-template.json
Original file line number Diff line number Diff line change
Expand Up @@ -1080,6 +1080,11 @@
"name": "REINDEX_LOCATION_BATCH_SIZE",
"value": "1000",
"description": "Defines number of locations to retrieve per inventory http request on locations reindex process"
},
{
"name": "MAX_SEARCH_BATCH_REQUEST_IDS_COUNT,",
"value": "20_000",
"description": "Defines maximum batch request IDs count for searching consolidated items/holdings in consortium"
}
]
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,16 @@ public class SearchConfigurationProperties {
*/
private long maxBrowseRequestOffset = 500L;

/**
* Provides the maximum number of IDs for performing search with batch requests.
*/
private long maxSearchBatchRequestIdsCount = 20_000L;

/**
* Provides the size parameter for querying consortium records (holdings or items).
*/
private int searchConsortiumRecordsPageSize = 5_000;

/**
* Provides map with global features configuration. Can be overwritten by tenant configuration.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@
import static org.folio.search.utils.SearchUtils.INSTANCE_HOLDING_FIELD_NAME;
import static org.folio.search.utils.SearchUtils.INSTANCE_ITEM_FIELD_NAME;

import java.util.Set;
import java.util.HashSet;
import java.util.UUID;
import java.util.stream.Collectors;
import lombok.RequiredArgsConstructor;
import lombok.extern.log4j.Log4j2;
import org.folio.search.domain.dto.BatchIdsDto;
Expand Down Expand Up @@ -156,10 +155,12 @@ public ResponseEntity<ConsortiumItem> getConsortiumItem(UUID id, String tenantHe
public ResponseEntity<ConsortiumHoldingCollection> fetchConsortiumBatchHoldings(String tenantHeader,
BatchIdsDto batchIdsDto) {
var tenant = verifyAndGetTenant(tenantHeader);
var holdingIds = batchIdsDto.getIds().stream().map(UUID::toString).collect(Collectors.toSet());
var searchRequest = idsCqlRequest(tenant, INSTANCE_HOLDING_FIELD_NAME, holdingIds);
if (batchIdsDto.getIds().isEmpty()) {
return ResponseEntity
.ok(new ConsortiumHoldingCollection());
}

var result = searchService.fetchConsortiumBatchHoldings(searchRequest, holdingIds);
var result = searchService.fetchConsortiumBatchHoldings(tenant, new HashSet<>(batchIdsDto.getIds()));
return ResponseEntity.ok(result);
}

Expand All @@ -172,10 +173,8 @@ public ResponseEntity<ConsortiumItemCollection> fetchConsortiumBatchItems(String
}

var tenant = verifyAndGetTenant(tenantHeader);
var itemIds = batchIdsDto.getIds().stream().map(UUID::toString).collect(Collectors.toSet());
var searchRequest = idsCqlRequest(tenant, INSTANCE_ITEM_FIELD_NAME, itemIds);

var result = searchService.fetchConsortiumBatchItems(searchRequest, itemIds);
var result = searchService.fetchConsortiumBatchItems(tenant, new HashSet<>(batchIdsDto.getIds()));
return ResponseEntity.ok(result);
}

Expand All @@ -191,13 +190,4 @@ private CqlSearchRequest<Instance> idCqlRequest(String tenant, String fieldName,
var query = fieldName + ".id=" + id;
return CqlSearchRequest.of(Instance.class, tenant, query, 1, 0, true, false, true);
}

private CqlSearchRequest<Instance> idsCqlRequest(String tenant, String fieldName, Set<String> ids) {
var query = ids.stream()
.map((fieldName + ".id=%s")::formatted)
.collect(Collectors.joining(" or "));

return CqlSearchRequest.of(Instance.class, tenant, query, ids.size(), 0, true, false, true);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ public class CqlSearchRequest<T> implements ResourceRequest {
/**
* Creates {@link CqlSearchRequest} object for given variables.
*
* @param resourceClass - resource class
* @param resourceClass - resource class
* @param tenantId - tenant id
* @param query - CQL query
* @param limit - search result records limit
Expand Down
4 changes: 3 additions & 1 deletion src/main/java/org/folio/search/service/SearchService.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
@RequiredArgsConstructor
public class SearchService {

public static final int DEFAULT_MAX_SEARCH_RESULT_WINDOW = 10_000;

private final SearchRepository searchRepository;
private final SearchFieldProvider searchFieldProvider;
private final CqlSearchQueryConverter cqlSearchQueryConverter;
Expand All @@ -47,7 +49,7 @@ public class SearchService {
public <T> SearchResult<T> search(CqlSearchRequest<T> request) {
log.debug("search:: by [query: {}, resource: {}]", request.getQuery(), request.getResource());

if (request.getOffset() + request.getLimit() > 10_000L) {
if (request.getOffset() + request.getLimit() > DEFAULT_MAX_SEARCH_RESULT_WINDOW) {
var validationException = new RequestValidationException("The sum of limit and offset should not exceed 10000.",
"offset + limit", String.valueOf(request.getOffset() + request.getLimit()));
log.warn(validationException.getMessage());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,40 @@
package org.folio.search.service.consortium;

import static org.apache.commons.collections4.CollectionUtils.isEmpty;

import static org.folio.search.service.SearchService.DEFAULT_MAX_SEARCH_RESULT_WINDOW;
import static org.folio.search.utils.SearchUtils.INSTANCE_HOLDING_FIELD_NAME;
import static org.folio.search.utils.SearchUtils.INSTANCE_ITEM_FIELD_NAME;
import static org.opensearch.index.query.QueryBuilders.boolQuery;
import static org.opensearch.index.query.QueryBuilders.termsQuery;
import static org.opensearch.search.sort.SortBuilders.fieldSort;
import static org.opensearch.search.sort.SortOrder.ASC;

import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.UUID;
import java.util.function.BiFunction;
import java.util.stream.Collectors;
import lombok.RequiredArgsConstructor;
import lombok.extern.log4j.Log4j2;
import org.folio.search.configuration.properties.SearchConfigurationProperties;
import org.folio.search.converter.ConsortiumHoldingMapper;
import org.folio.search.converter.ConsortiumItemMapper;
import org.folio.search.domain.dto.ConsortiumHolding;
import org.folio.search.domain.dto.ConsortiumHoldingCollection;
import org.folio.search.domain.dto.ConsortiumItem;
import org.folio.search.domain.dto.ConsortiumItemCollection;
import org.folio.search.domain.dto.Instance;
import org.folio.search.exception.RequestValidationException;
import org.folio.search.model.SearchResult;
import org.folio.search.model.service.CqlSearchRequest;
import org.folio.search.repository.SearchRepository;
import org.folio.search.service.SearchService;
import org.folio.search.service.converter.ElasticsearchDocumentConverter;
import org.folio.spring.FolioExecutionContext;
import org.opensearch.index.query.QueryBuilder;
import org.opensearch.search.builder.SearchSourceBuilder;
import org.springframework.stereotype.Service;

/**
Expand All @@ -28,6 +47,9 @@
public class ConsortiumInstanceSearchService {

private final SearchService searchService;
private final SearchRepository searchRepository;
private final ElasticsearchDocumentConverter documentConverter;
private final SearchConfigurationProperties properties;

public ConsortiumHolding getConsortiumHolding(String id, CqlSearchRequest<Instance> searchRequest) {
var result = searchService.search(searchRequest);
Expand Down Expand Up @@ -67,34 +89,105 @@ public ConsortiumItem getConsortiumItem(String id, CqlSearchRequest<Instance> se
return ConsortiumItemMapper.toConsortiumItem(instance.getId(), item);
}

public ConsortiumHoldingCollection fetchConsortiumBatchHoldings(CqlSearchRequest<Instance> searchRequest,
Set<String> ids) {
var result = searchService.search(searchRequest);
var consortiumHoldings = result.getRecords().stream()
.flatMap(instance ->
instance.getHoldings().stream()
.filter(holding -> ids.contains(holding.getId()))
.map(holding -> ConsortiumHoldingMapper.toConsortiumHolding(instance.getId(), holding))
)
.toList();
public ConsortiumHoldingCollection fetchConsortiumBatchHoldings(String tenant, Set<UUID> holdingIds) {
validateIdsCount(holdingIds.size());

var ids = holdingIds.stream().map(UUID::toString).collect(Collectors.toSet());
var targetField = INSTANCE_HOLDING_FIELD_NAME + ".id";

var searchRecords = getConsortiumBatchResults(tenant, ids, targetField, this::mapToConsortiumHolding);

if (searchRecords.isEmpty()) {
return new ConsortiumHoldingCollection();
}

return new ConsortiumHoldingCollection()
.holdings(consortiumHoldings)
.totalRecords(consortiumHoldings.size());
.holdings(searchRecords.stream().map(SearchResult::getRecords).flatMap(List::stream).toList())
.totalRecords(searchRecords.iterator().next().getTotalRecords());
}

public ConsortiumItemCollection fetchConsortiumBatchItems(CqlSearchRequest<Instance> searchRequest,
Set<String> ids) {
var result = searchService.search(searchRequest);
var consortiumItems = result.getRecords().stream()
.flatMap(instance ->
instance.getItems().stream()
.filter(item -> ids.contains(item.getId()))
.map(item -> ConsortiumItemMapper.toConsortiumItem(instance.getId(), item))
)
.toList();
public ConsortiumItemCollection fetchConsortiumBatchItems(String tenant, Set<UUID> itemIds) {
validateIdsCount(itemIds.size());

var ids = itemIds.stream().map(UUID::toString).collect(Collectors.toSet());
var targetField = INSTANCE_ITEM_FIELD_NAME + ".id";

var searchRecords = getConsortiumBatchResults(tenant, ids, targetField, this::mapToConsortiumItem);

if (searchRecords.isEmpty()) {
return new ConsortiumItemCollection();
}

return new ConsortiumItemCollection()
.items(consortiumItems)
.totalRecords(consortiumItems.size());
.items(searchRecords.stream().map(SearchResult::getRecords).flatMap(List::stream).toList())
.totalRecords(searchRecords.iterator().next().getTotalRecords());
}

private <T> List<SearchResult<T>> getConsortiumBatchResults(String tenant, Set<String> ids, String targetField,
BiFunction<Instance, Set<String>, List<T>> recordMapper) {
var request = CqlSearchRequest.of(Instance.class, tenant, "", 0, 0, true, false, true);
var termsQuery = termsQuery(targetField, ids);

if (ids.size() < DEFAULT_MAX_SEARCH_RESULT_WINDOW) {
var searchSourceBuilder = queryBuilder(termsQuery, ids.size());
var response = searchRepository.search(request, searchSourceBuilder);
var searchResult = documentConverter.convertToSearchResult(response, request.getResourceClass(),
(hits, item) -> recordMapper.apply(item, ids));
var records = searchResult.getRecords().stream()
.flatMap(List::stream)
.toList();
return List.of(SearchResult.of(records.size(), records));
}

var searchSourceBuilder = queryBuilder(termsQuery, properties.getSearchConsortiumRecordsPageSize())
.sort(fieldSort(targetField).order(ASC))
.searchAfter(new Object[]{""});
var response = searchRepository.search(request, searchSourceBuilder);
List<SearchResult<T>> searchRecords = new ArrayList<>();

while (response.getHits() != null && response.getHits().getHits().length > 0) {
var searchResult = documentConverter.convertToSearchResult(response, request.getResourceClass(),
(hits, item) -> recordMapper.apply(item, ids));
var records = searchResult.getRecords().stream()
.flatMap(List::stream)
.toList();
searchRecords.add(SearchResult.of(records.size(), records));
var searchAfterValue = response.getHits()
.getAt(response.getHits().getHits().length - 1).getSortValues()[0];
searchSourceBuilder.searchAfter(new Object[]{searchAfterValue});
response = searchRepository.search(request, searchSourceBuilder);
}

return searchRecords;
}

private List<ConsortiumHolding> mapToConsortiumHolding(Instance instance, Set<String> ids) {
return instance.getHoldings().stream()
.filter(holding -> ids.contains(holding.getId()))
.map(holding -> ConsortiumHoldingMapper.toConsortiumHolding(instance.getId(), holding))
.toList();
}

private List<ConsortiumItem> mapToConsortiumItem(Instance instance, Set<String> ids) {
return instance.getItems().stream()
.filter(item -> ids.contains(item.getId()))
.map(holding -> ConsortiumItemMapper.toConsortiumItem(instance.getId(), holding))
.toList();
}

private SearchSourceBuilder queryBuilder(QueryBuilder filterQuery, int size) {
return new SearchSourceBuilder()
.query(boolQuery().filter(filterQuery))
.size(size)
.from(0)
.trackTotalHits(true);
}

private void validateIdsCount(long count) {
var idsLimit = properties.getMaxSearchBatchRequestIdsCount();
if (count > idsLimit) {
throw new RequestValidationException("IDs array size exceeds the maximum allowed limit %s".formatted(idsLimit),
"size", Long.toString(count));
}
}
}
2 changes: 2 additions & 0 deletions src/main/resources/application.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ folio:
initial-languages: ${INITIAL_LANGUAGES:eng}
max-supported-languages: ${MAX_SUPPORTED_LANGUAGES:5}
max-browse-request-offset: ${MAX_BROWSE_REQUEST_OFFSET:500}
max-search-batch-request-ids-count: ${MAX_SEARCH_BATCH_REQUEST_IDS_COUNT:20000}
search-consortium-records-page-size: ${SEARCH_CONSORTIUM_RECORDS_PAGE_SIZE:5000}
search-features:
search-all-fields: ${SEARCH_BY_ALL_FIELDS_ENABLED:false}
browse-cn-intermediate-values: ${BROWSE_CN_INTERMEDIATE_VALUES_ENABLED:true}
Expand Down
1 change: 0 additions & 1 deletion src/main/resources/swagger.api/parameters/batchIdsDto.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ properties:
ids:
description: Entity IDs
type: array
maxItems: 1000
items:
type: string
format: uuid
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@

import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.folio.search.domain.dto.BrowseOptionType;
import org.folio.search.domain.dto.Classification;
Expand Down Expand Up @@ -164,7 +163,7 @@ private static Instance instance(List<Object> data) {
.map(pair -> new Classification()
.classificationNumber(String.valueOf(pair.getFirst()))
.classificationTypeId(String.valueOf(pair.getSecond())))
.collect(Collectors.toList()))
.toList())
.staffSuppress(false)
.discoverySuppress(false)
.holdings(emptyList());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

import java.util.List;
import java.util.UUID;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.folio.search.domain.dto.BrowseConfig;
import org.folio.search.domain.dto.BrowseOptionType;
Expand Down Expand Up @@ -215,7 +214,7 @@ private static Instance instance(List<Object> data) {
.map(pair -> new Classification()
.classificationNumber(String.valueOf(pair.getFirst()))
.classificationTypeId(String.valueOf(pair.getSecond())))
.collect(Collectors.toList()))
.toList())
.staffSuppress(false)
.discoverySuppress(false)
.holdings(emptyList());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@

import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.folio.search.domain.dto.Facet;
import org.folio.search.domain.dto.FacetResult;
Expand Down Expand Up @@ -111,7 +110,7 @@ private static Instance instance(List<Object> data) {
} else {
return new Subject().value(String.valueOf(val));
}
}).collect(Collectors.toList()))
}).toList())
.staffSuppress(false)
.discoverySuppress(false)
.holdings(emptyList());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get;

import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.folio.search.domain.dto.Instance;
import org.folio.search.domain.dto.Subject;
Expand Down Expand Up @@ -275,7 +274,7 @@ private static Instance instance(List<Object> data) {
} else {
return new Subject().value(String.valueOf(val));
}
}).collect(Collectors.toList()))
}).toList())
.staffSuppress(false)
.discoverySuppress(false)
.holdings(emptyList());
Expand Down
Loading

0 comments on commit fe3b144

Please sign in to comment.