Skip to content

Commit

Permalink
feat(indexing): simplify indexing for instance child documents
Browse files Browse the repository at this point in the history
+ cleanup indexes

Closes: MSEARCH-851
  • Loading branch information
psmagin committed Oct 15, 2024
1 parent c06850a commit d6594a1
Show file tree
Hide file tree
Showing 32 changed files with 266 additions and 219 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ mvn install

See that it says "BUILD SUCCESS" near the end.

By default the integration tests run against an OpenSearch server.
By default, the integration tests run against an OpenSearch server.
To run them against an Elasticsearch server use

```shell
Expand Down Expand Up @@ -370,7 +370,7 @@ update or sync the aggregated data of those specific Instances, so that the aggr

Upload step queries the aggregated data for the specific Instance resources type and then runs the indexing by populating the data into resource's OpenSearch index.

We can execute both Merge and Upload steps with so called _full reindex_ API:
We can execute both Merge and Upload steps with so-called _full reindex_ API:
```http
POST /search/index/instance-records/reindex/full
```
Expand Down Expand Up @@ -643,7 +643,7 @@ does not produce any values, so the following search options will return an empt
| `modeOfIssuanceId` | term | `modeOfIssuanceId=="123"` | Matches instances that have `123` mode of issuance |
| `natureOfContentTermIds` | term | `natureOfContentTermIds=="123"` | Matches instances that have `123` nature of content |
| `publisher` | full-text | `publisher all "Publisher of Ukraine"` | Matches instances that have `Publisher of Ukraine` publisher |
| `publication.place` | full-text | `publication.place all "Ukraine"` | Matches instances that have `Ukraine` in publication place |
| `publication.place` | full-text | `publication.place all "Ukraine"` | Matches instances that have `Ukraine` in publication place |
| `instanceTags` | term | `instanceTags=="important"` | Matches instances that have `important` tag |
| `classifications.classificationNumber` | term | `classifications.classificationNumber=="cl1"` | Matches instances that have `cl1` classification number |
| `classifications.classificationTypeId` | term | `classifications.classificationTypeId=="123"` | Matches instances that have classification type id `123` |
Expand Down Expand Up @@ -948,7 +948,7 @@ Send a POST request to create a Job
}
```

It is possible to check job status by jobs Id.
It is possible to check job status by job ID.

`GET /search/resources/jobs/{jobId}`

Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
package org.folio.search.model.index;

import java.util.List;
import lombok.Builder;
import lombok.Data;

@Data
@Builder
public class InstanceSubResource {
private String instanceId;
private String typeId;
private String tenantId;
private Boolean shared;
private int count;
private List<String> typeId;
}
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,7 @@ private static String getBrowseField(BrowseConfig config) {
private Integer getTotalRecords(BrowseContext ctx, ClassificationResource classificationResource) {
return consortiumSearchHelper.filterSubResourcesForConsortium(ctx, classificationResource,
ClassificationResource::instances).stream()
.map(InstanceSubResource::getInstanceId)
.distinct()
.map(e -> 1)
.map(InstanceSubResource::getCount)
.reduce(0, Integer::sum);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.folio.search.service.browse;

import static java.util.Objects.nonNull;
import static org.folio.search.utils.SearchUtils.AUTHORITY_ID_FIELD;
import static org.folio.search.utils.SearchUtils.MISSING_LAST_PROP;
import static org.opensearch.index.query.QueryBuilders.boolQuery;
Expand All @@ -11,6 +10,8 @@
import static org.opensearch.search.sort.SortOrder.ASC;
import static org.opensearch.search.sort.SortOrder.DESC;

import java.util.List;
import java.util.Objects;
import java.util.Set;
import lombok.RequiredArgsConstructor;
import lombok.extern.log4j.Log4j2;
Expand Down Expand Up @@ -91,7 +92,8 @@ protected BrowseResult<InstanceContributorBrowseItem> mapToBrowseResult(BrowseCo
ContributorResource::instances);
var typeIds = filteredInstanceResources.stream()
.map(InstanceSubResource::getTypeId)
.filter(typeId -> nonNull(typeId) && !typeId.equals("null"))
.filter(Objects::nonNull)
.flatMap(List::stream)
.distinct()
.sorted()
.toList();
Expand All @@ -113,10 +115,7 @@ protected String getValueForBrowsing(InstanceContributorBrowseItem browseItem) {

private Integer getTotalRecords(Set<InstanceSubResource> filteredInstanceResources) {
return filteredInstanceResources.stream()
.map(InstanceSubResource::getInstanceId)
.filter(instanceId -> nonNull(instanceId) && !instanceId.equals("null"))
.distinct()
.map(e -> 1)
.map(InstanceSubResource::getCount)
.reduce(0, Integer::sum);
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.folio.search.service.browse;

import static java.util.Objects.nonNull;
import static org.folio.search.utils.SearchUtils.AUTHORITY_ID_FIELD;
import static org.folio.search.utils.SearchUtils.MISSING_LAST_PROP;
import static org.opensearch.index.query.QueryBuilders.boolQuery;
Expand Down Expand Up @@ -105,10 +104,7 @@ protected String getValueForBrowsing(SubjectBrowseItem browseItem) {
private Integer getTotalRecords(BrowseContext context, SubjectResource subjectResource) {
return consortiumSearchHelper.filterSubResourcesForConsortium(context, subjectResource,
SubjectResource::instances).stream()
.map(InstanceSubResource::getInstanceId)
.filter(instanceId -> nonNull(instanceId) && !instanceId.equals("null"))
.distinct()
.map(e -> 1)
.map(InstanceSubResource::getCount)
.reduce(0, Integer::sum);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,45 @@
public class ClassificationRepository extends UploadRangeRepository {

private static final String SELECT_QUERY = """
SELECT c.id as id, c.number as number, c.type_id as type_id, json_agg(json_build_object(
'instanceId', ic.instance_id,
'shared', ic.shared,
'tenantId', ic.tenant_id
)) AS instances
FROM %1$s.instance_classification ic
JOIN %1$s.classification c ON c.id = ic.classification_id
WHERE %2$s
GROUP BY c.id;
SELECT
c.id,
c.number,
c.type_id,
json_agg(
json_build_object(
'count', sub.instance_count,
'shared', sub.shared,
'tenantId', sub.tenant_id
)
) AS instances
FROM
(
SELECT
ins.classification_id,
ins.tenant_id,
ins.shared,
COUNT(1) AS instance_count
FROM
%1$s.instance_classification ins
WHERE
%2$s
GROUP BY
ins.classification_id,
ins.tenant_id,
ins.shared
) sub
JOIN
%1$s.classification c ON c.id = sub.classification_id
WHERE
%3$s
GROUP BY
c.id;
""";

private static final String ID_RANGE_WHERE_CLAUSE = "ic.classification_id >= ? AND ic.classification_id <= ? "
+ "AND c.id >= ? AND c.id <= ?";
private static final String IDS_WHERE_CLAUSE = "ic.classification_id IN (%1$s) AND c.id IN (%1$s)";
private static final String ID_RANGE_INS_WHERE_CLAUSE = "ins.classification_id >= ? AND ins.classification_id <= ?";
private static final String ID_RANGE_CLAS_WHERE_CLAUSE = "c.id >= ? AND c.id <= ?";
private static final String IDS_INS_WHERE_CLAUSE = "ins.classification_id IN (%1$s)";
private static final String IDS_CLAS_WHERE_CLAUSE = "c.id IN (%1$s)";

protected ClassificationRepository(JdbcTemplate jdbcTemplate,
JsonConverter jsonConverter,
Expand Down Expand Up @@ -68,7 +93,8 @@ public List<InstanceClassificationEntityAgg> fetchByIds(List<String> ids) {
return Collections.emptyList();
}
var sql = SELECT_QUERY.formatted(JdbcUtils.getSchemaName(context),
IDS_WHERE_CLAUSE.formatted(getParamPlaceholder(ids.size())));
IDS_INS_WHERE_CLAUSE.formatted(getParamPlaceholder(ids.size())),
IDS_CLAS_WHERE_CLAUSE.formatted(getParamPlaceholder(ids.size())));
return jdbcTemplate.query(sql, instanceClassificationAggRowMapper(), ListUtils.union(ids, ids).toArray());
}

Expand All @@ -80,7 +106,9 @@ public List<Map<String, Object>> fetchByIdRange(String lower, String upper) {

@Override
protected String getFetchBySql() {
return SELECT_QUERY.formatted(JdbcUtils.getSchemaName(context), ID_RANGE_WHERE_CLAUSE);
return SELECT_QUERY.formatted(JdbcUtils.getSchemaName(context),
ID_RANGE_INS_WHERE_CLAUSE,
ID_RANGE_CLAS_WHERE_CLAUSE);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,48 @@
public class ContributorRepository extends UploadRangeRepository {

public static final String SELECT_QUERY = """
SELECT c.id, c.name, c.name_type_id, c.authority_id, json_agg(json_build_object(
'instanceId', ic.instance_id,
'typeId', NULLIF(ic.type_id, ''),
'shared', ic.shared,
'tenantId', ic.tenant_id
)) AS instances
FROM %1$s.instance_contributor ic
JOIN %1$s.contributor c ON c.id = ic.contributor_id
WHERE %2$s
GROUP BY c.id;
SELECT
c.id,
c.name,
c.name_type_id,
c.authority_id,
json_agg(
json_build_object(
'count', sub.instance_count,
'typeId', sub.type_ids,
'shared', sub.shared,
'tenantId', sub.tenant_id
)
) AS instances
FROM
(
SELECT
ins.contributor_id,
ins.tenant_id,
ins.shared,
array_agg(DISTINCT ins.type_id) FILTER (WHERE ins.type_id <> '') as type_ids,
COUNT(DISTINCT ins.instance_id) AS instance_count
FROM
%1$s.instance_contributor ins
WHERE
%2$s
GROUP BY
ins.contributor_id,
ins.tenant_id,
ins.shared
) sub
JOIN
%1$s.contributor c ON c.id = sub.contributor_id
WHERE
%3$s
GROUP BY
c.id;
""";

private static final String ID_RANGE_WHERE_CLAUSE = "ic.contributor_id >= ? AND ic.contributor_id <= ? "
+ "AND c.id >= ? AND c.id <= ?";
private static final String IDS_WHERE_CLAUSE = "ic.contributor_id IN (%1$s) AND c.id IN (%1$s)";
private static final String ID_RANGE_INS_WHERE_CLAUSE = "ins.contributor_id >= ? AND ins.contributor_id <= ?";
private static final String ID_RANGE_CONTR_WHERE_CLAUSE = "c.id >= ? AND c.id <= ?";
private static final String IDS_INS_WHERE_CLAUSE = "ins.contributor_id IN (%1$s)";
private static final String IDS_CONTR_WHERE_CLAUSE = "c.id IN (%1$s)";

protected ContributorRepository(JdbcTemplate jdbcTemplate, JsonConverter jsonConverter,
FolioExecutionContext context,
Expand Down Expand Up @@ -68,7 +95,9 @@ public List<InstanceContributorEntityAgg> fetchByIds(List<String> ids) {
return Collections.emptyList();
}
var sql = SELECT_QUERY.formatted(JdbcUtils.getSchemaName(context),
IDS_WHERE_CLAUSE.formatted(getParamPlaceholder(ids.size())));
IDS_INS_WHERE_CLAUSE.formatted(getParamPlaceholder(ids.size())),
IDS_CONTR_WHERE_CLAUSE.formatted(getParamPlaceholder(ids.size()))
);
return jdbcTemplate.query(sql, instanceAggRowMapper(), ListUtils.union(ids, ids).toArray());
}

Expand All @@ -80,7 +109,8 @@ public List<Map<String, Object>> fetchByIdRange(String lower, String upper) {

@Override
protected String getFetchBySql() {
return SELECT_QUERY.formatted(JdbcUtils.getSchemaName(context), ID_RANGE_WHERE_CLAUSE);
return SELECT_QUERY.formatted(JdbcUtils.getSchemaName(context),
ID_RANGE_INS_WHERE_CLAUSE, ID_RANGE_CONTR_WHERE_CLAUSE);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,47 @@
public class SubjectRepository extends UploadRangeRepository {

public static final String SELECT_QUERY = """
SELECT s.id, s.value, s.authority_id, s.source_id, s.type_id, json_agg(json_build_object(
'instanceId', ins.instance_id,
'shared', ins.shared,
'tenantId', ins.tenant_id
)) AS instances
FROM %1$s.instance_subject ins
JOIN %1$s.subject s ON s.id = ins.subject_id
WHERE %2$s
GROUP BY s.id;
SELECT
s.id,
s.value,
s.authority_id,
s.source_id,
s.type_id,
json_agg(
json_build_object(
'count', sub.instance_count,
'shared', sub.shared,
'tenantId', sub.tenant_id
)
) AS instances
FROM
(
SELECT
ins.subject_id,
ins.tenant_id,
ins.shared,
COUNT(1) AS instance_count
FROM
%1$s.instance_subject ins
WHERE
%2$s
GROUP BY
ins.subject_id,
ins.tenant_id,
ins.shared
) sub
JOIN
%1$s.subject s ON s.id = sub.subject_id
WHERE
%3$s
GROUP BY
s.id;
""";

private static final String ID_RANGE_WHERE_CLAUSE = "ins.subject_id >= ? AND ins.subject_id <= ? "
+ "AND s.id >= ? AND s.id <= ?";
private static final String IDS_WHERE_CLAUSE = "ins.subject_id IN (%1$s) AND s.id IN (%1$s)";
private static final String ID_RANGE_INS_WHERE_CLAUSE = "ins.subject_id >= ? AND ins.subject_id <= ?";
private static final String ID_RANGE_SUBJ_WHERE_CLAUSE = "s.id >= ? AND s.id <= ?";
private static final String IDS_INS_WHERE_CLAUSE = "ins.subject_id IN (%1$s)";
private static final String IDS_SUB_WHERE_CLAUSE = "s.id IN (%1$s)";


protected SubjectRepository(JdbcTemplate jdbcTemplate,
Expand Down Expand Up @@ -69,7 +96,8 @@ public List<InstanceSubjectEntityAgg> fetchByIds(List<String> ids) {
return Collections.emptyList();
}
var sql = SELECT_QUERY.formatted(JdbcUtils.getSchemaName(context),
IDS_WHERE_CLAUSE.formatted(getParamPlaceholder(ids.size())));
IDS_INS_WHERE_CLAUSE.formatted(getParamPlaceholder(ids.size())),
IDS_SUB_WHERE_CLAUSE.formatted(getParamPlaceholder(ids.size())));
return jdbcTemplate.query(sql, instanceAggRowMapper(), ListUtils.union(ids, ids).toArray());
}

Expand All @@ -83,7 +111,9 @@ public List<Map<String, Object>> fetchByIdRange(String lower, String upper) {

@Override
protected String getFetchBySql() {
return SELECT_QUERY.formatted(JdbcUtils.getSchemaName(context), ID_RANGE_WHERE_CLAUSE);
return SELECT_QUERY.formatted(JdbcUtils.getSchemaName(context),
ID_RANGE_INS_WHERE_CLAUSE,
ID_RANGE_SUBJ_WHERE_CLAUSE);
}

@Override
Expand Down
6 changes: 6 additions & 0 deletions src/main/resources/elasticsearch/index-field-types.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@
"normalizer": "keyword_uppercase"
}
},
"keyword_icu": {
"mapping": {
"type": "keyword",
"normalizer": "keyword_icu"
}
},
"keyword_lowercase": {
"mapping": {
"type": "keyword",
Expand Down
8 changes: 8 additions & 0 deletions src/main/resources/elasticsearch/index/contributor.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@
"icu_folding"
],
"type": "custom"
},
"keyword_icu": {
"filter": [
"lowercase",
"trim",
"icu_folding"
],
"type": "custom"
}
},
"tokenizers": {}
Expand Down
Loading

0 comments on commit d6594a1

Please sign in to comment.