From 069674e7ee0b5d39166f9ad77c8491d83aea1dcf Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Mon, 18 Dec 2023 16:52:17 +1300 Subject: [PATCH 01/33] [TLC-674] Duplicate Detection service and submission step Service, submission step, integration tests --- .../DuplicateDetectionServiceImpl.java | 285 ++++++++++++++++++ .../factory/ContentServiceFactory.java | 8 + .../factory/ContentServiceFactoryImpl.java | 9 + .../service/DuplicateDetectionService.java | 75 +++++ .../content/virtual/PotentialDuplicate.java | 174 +++++++++++ .../SolrServiceIndexItemSignaturePlugin.java | 93 ++++++ .../PotentialDuplicateConverter.java | 69 +++++ .../org/dspace/app/rest/model/ItemRest.java | 5 + .../rest/model/PotentialDuplicateRest.java | 196 ++++++++++++ .../hateoas/PotentialDuplicateResource.java | 22 ++ .../model/step/DataDuplicateDetection.java | 46 +++ .../ItemDuplicatesLinkRepository.java | 115 +++++++ .../app/rest/submit/SubmissionService.java | 54 ++++ .../submit/step/DuplicateDetectionStep.java | 113 +++++++ dspace/config/dspace.cfg | 1 + dspace/config/item-submission.xml | 7 + dspace/config/modules/duplicate-detection.cfg | 28 ++ dspace/config/spring/api/core-services.xml | 1 + dspace/config/spring/api/discovery.xml | 3 + dspace/solr/search/conf/schema.xml | 22 ++ 20 files changed, 1326 insertions(+) create mode 100644 dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java create mode 100644 dspace-api/src/main/java/org/dspace/content/service/DuplicateDetectionService.java create mode 100644 dspace-api/src/main/java/org/dspace/content/virtual/PotentialDuplicate.java create mode 100644 dspace-api/src/main/java/org/dspace/discovery/SolrServiceIndexItemSignaturePlugin.java create mode 100644 dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/PotentialDuplicateConverter.java create mode 100644 dspace-server-webapp/src/main/java/org/dspace/app/rest/model/PotentialDuplicateRest.java create mode 100644 dspace-server-webapp/src/main/java/org/dspace/app/rest/model/hateoas/PotentialDuplicateResource.java create mode 100644 dspace-server-webapp/src/main/java/org/dspace/app/rest/model/step/DataDuplicateDetection.java create mode 100644 dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemDuplicatesLinkRepository.java create mode 100644 dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/step/DuplicateDetectionStep.java create mode 100644 dspace/config/modules/duplicate-detection.cfg diff --git a/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java new file mode 100644 index 000000000000..82c528fc71e0 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java @@ -0,0 +1,285 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.content; + +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; +import java.util.Optional; + +import org.apache.commons.lang3.StringUtils; +import org.apache.velocity.exception.ResourceNotFoundException; +import org.dspace.authorize.AuthorizeException; +import org.dspace.authorize.service.AuthorizeService; +import org.dspace.content.service.DuplicateDetectionService; +import org.dspace.content.service.MetadataFieldService; +import org.dspace.content.service.MetadataValueService; +import org.dspace.content.virtual.PotentialDuplicate; +import org.dspace.core.Constants; +import org.dspace.core.Context; +import org.dspace.discovery.DiscoverQuery; +import org.dspace.discovery.DiscoverResult; +import org.dspace.discovery.IndexableObject; +import org.dspace.discovery.SearchService; +import org.dspace.discovery.SearchServiceException; +import org.dspace.discovery.SearchUtils; +import org.dspace.discovery.indexobject.IndexableItem; +import org.dspace.discovery.indexobject.IndexableWorkflowItem; +import org.dspace.discovery.indexobject.IndexableWorkspaceItem; +import org.dspace.eperson.service.GroupService; +import org.dspace.services.ConfigurationService; +import org.dspace.versioning.VersionHistory; +import org.dspace.versioning.service.VersionHistoryService; +import org.dspace.workflow.WorkflowItem; +import org.springframework.beans.factory.annotation.Autowired; + +/** + * Default implementation of DuplicateDetectionService. + * Duplicate Detection Service handles get, search and validation operations for duplicate detection. + * + * @author Kim Shepherd + */ +public class DuplicateDetectionServiceImpl implements DuplicateDetectionService { + + @Autowired + ConfigurationService configurationService; + @Autowired + VersionHistoryService versionHistoryService; + @Autowired + AuthorizeService authorizeService; + @Autowired + GroupService groupService; + @Autowired + MetadataFieldService metadataFieldService; + @Autowired + MetadataValueService metadataValueService; + + /** + * Get a list of PotentialDuplicate objects (wrappers with some metadata included for previewing) that + * are identified as potential duplicates of the given item + * + * @param context DSpace context + * @param item Item to check + * @return List of potential duplicates (empty if none found) + * @throws SearchServiceException if an error occurs performing the discovery search + */ + @Override + public List getPotentialDuplicates(Context context, Item item) + throws SearchServiceException { + // Instantiate a new list of potential duplicates + List potentialDuplicates = new LinkedList<>(); + + // Immediately return an empty if this feature is not configured + if (!configurationService.getBooleanProperty("duplicate.enable", false)) { + return potentialDuplicates; + } + + // Search duplicates of this item and get discovery search result + DiscoverResult discoverResult = searchDuplicates(context, item); + + // If the search result is valid, iterate results and validate / transform + if (discoverResult != null) { + for (IndexableObject result : discoverResult.getIndexableObjects()) { + if (result != null) { + try { + // Validate this result and check permissions to read the item + Optional potentialDuplicateOptional = + validateDuplicateResult(context, result, item); + if (potentialDuplicateOptional.isPresent()) { + // Add the potential duplicate to the list + potentialDuplicates.add(potentialDuplicateOptional.get()); + } + } catch (SQLException e) { + log.error("SQL Error obtaining duplicate result: " + e.getMessage()); + } catch (AuthorizeException e) { + log.error("Authorize Error obtaining duplicate result: " + e.getMessage()); + } + } + } + } + + // Return the list of potential duplicates + return potentialDuplicates; + } + + + + /** + * Validate an indexable object (returned by discovery search) to ensure it is permissible, readable and valid + * and can be added to a list of results. + * An Optional is returned, if it is empty then it was invalid or did not pass validation. + * + * @param context The DSpace context + * @param indexableObject The discovery search result + * @param original The original item (to compare IDs, submitters, etc) + * @return An Optional potential duplicate + * @throws SQLException + * @throws AuthorizeException + */ + @Override + public Optional validateDuplicateResult(Context context, IndexableObject indexableObject, Item original) + throws SQLException, + AuthorizeException { + + Item resultItem = null; + PotentialDuplicate potentialDuplicate = null; + WorkspaceItem workspaceItem = null; + WorkflowItem workflowItem = null; + + // Inspect the indexable object, and extract the DSpace item depending on + // what submission / archived state it is in + if (indexableObject instanceof IndexableWorkspaceItem) { + workspaceItem = ((IndexableWorkspaceItem) indexableObject).getIndexedObject(); + // Only process workspace items that belong to the submitter + if (workspaceItem != null && workspaceItem.getSubmitter() != null + && workspaceItem.getSubmitter().equals(context.getCurrentUser())) { + resultItem = workspaceItem.getItem(); + } + } else if (indexableObject instanceof IndexableWorkflowItem) { + workflowItem = ((IndexableWorkflowItem) indexableObject).getIndexedObject(); + if (workflowItem != null) { + resultItem = workflowItem.getItem(); + } + } else if (indexableObject instanceof IndexableItem) { + resultItem = ((IndexableItem) indexableObject).getIndexedObject(); + } + + // Result item must not be null, a template item, or actually identical to the original + if (resultItem == null) { + log.error("skipping null item in duplicate search results"); + return Optional.empty(); + } else if (resultItem.getTemplateItemOf() != null) { + log.error("skipping template item in duplicate search results"); + return Optional.empty(); + } else if (resultItem.getID().equals(original.getID())) { + log.warn("skipping a duplicate search result for the original item"); + return Optional.empty(); + } + + // If our item and the duplicate candidate share the same versionHistory, they are two different + // versions of the same item. + VersionHistory versionHistory = versionHistoryService.findByItem(context, original); + VersionHistory candiateVersionHistory = versionHistoryService.findByItem(context, resultItem); + // if the versionHistory is null, either versioning is switched off or the item doesn't have + // multiple versions + if (versionHistory != null && versionHistory.equals(candiateVersionHistory)) + { + log.warn("skipping item that is just another version of this item"); + return Optional.empty(); + } + + // Construct new potential duplicate object + potentialDuplicate = new PotentialDuplicate(resultItem); + + // Get configured list of metadata fields to copy + List fields = new ArrayList<>(Arrays.asList( + configurationService.getArrayProperty("duplicate.preview.metadata.field", new String[]{}))); + + // Get item metadata and if it's configured for mapping, copy it across to the potential duplicate object + List metadata = resultItem.getCachedMetadata(); + + // Prepare a map of metadata to set on the potential duplicate object + for (MetadataValue metadatum : metadata) { + String fieldName = metadatum.getMetadataField().toString('.'); + if (fields.contains(fieldName)) { + potentialDuplicate.getMetadataValueList().add(metadatum); + } + } + + // Only if the current user is also the submitter of the item will we add this information + if (workspaceItem != null && workspaceItem.getSubmitter() != null + && workspaceItem.getSubmitter().equals(context.getCurrentUser())) { + potentialDuplicate.setWorkspaceItemId(workspaceItem.getID()); + return Optional.of(potentialDuplicate); + } + + // More authorisation checks + if (authorizeService.isAdmin(context, resultItem)) { + // Admins can always read, return immediately + return Optional.of(potentialDuplicate); + } + else if (workflowItem != null) { + Collection c = workflowItem.getCollection(); + if (groupService.isMember(context, context.getCurrentUser(), c.getWorkflowStep1(context)) || + groupService.isMember(context, context.getCurrentUser(), c.getWorkflowStep2(context)) || + groupService.isMember(context, context.getCurrentUser(), c.getWorkflowStep3(context))) { + // Current user is a member of one of the workflow role groups + potentialDuplicate.setWorkflowItemId(workflowItem.getID()); + return Optional.of(potentialDuplicate); + } + } else if (resultItem.isArchived() && !resultItem.isWithdrawn() && resultItem.isDiscoverable()) { + // Not a workspace or workflow item, but is it archived, not withdrawn, and discoverable? + // Is it readable by the current user? + if (authorizeService.authorizeActionBoolean(context, resultItem, Constants.READ)) { + return Optional.of(potentialDuplicate); + } + } + + // By default, return an empty result + return Optional.empty(); + } + + /** + * Search discovery for potential duplicates of a given item. The search uses levenshtein distance (configurable) + * and a single-term "signature" constructed out of the item title + * + * @param context DSpace context + * @param item The item to check + * @return DiscoverResult as a result of performing search. Null if invalid. + * + * @throws SearchServiceException if an error was encountered during the discovery search itself. + */ + @Override + public DiscoverResult searchDuplicates(Context context, Item item) throws SearchServiceException { + + // If the item is null or otherwise invalid (template, etc) then throw an appropriate error + if (item == null) { + throw new ResourceNotFoundException("No such item: " + item); + } + if (item.getTemplateItemOf() != null) { + throw new IllegalArgumentException("Cannot get duplicates for template item"); + } + + // Construct signature object + String signature = item.getName(); + if (StringUtils.isNotBlank(signature)) { + if (configurationService.getBooleanProperty("duplicate.signature.normalise.lowercase")) { + signature = signature.toLowerCase(context.getCurrentLocale()); + } + if (configurationService.getBooleanProperty("duplicate.signature.normalise.whitespace")) { + signature = signature.replaceAll("\\s+", ""); + } + + // Get search service + SearchService searchService = SearchUtils.getSearchService(); + + // Construct discovery query based on signature + DiscoverQuery discoverQuery = new DiscoverQuery(); + discoverQuery.setQuery("(" + configurationService.getProperty("duplicate.signature.field", + "item_signature") + ":" + signature + "~" + + configurationService.getIntProperty("duplicate.signature.distance", 0) + ")"); + // Add filter queries for the resource type + discoverQuery.addFilterQueries("(search.resourcetype:Item OR " + + "search.resourcetype:WorkspaceItem OR " + + "search.resourcetype:WorkflowItem)"); + // Skip this item itself so it isn't a false positive + discoverQuery.addFilterQueries("-search.resourceid:" + item.getID()); + + // Perform search and populate list with results, update total count integer + return searchService.search(context, discoverQuery); + } else { + log.warn("empty item signature, ignoring for duplicate search"); + } + + // Return null by default + return null; + + } +} diff --git a/dspace-api/src/main/java/org/dspace/content/factory/ContentServiceFactory.java b/dspace-api/src/main/java/org/dspace/content/factory/ContentServiceFactory.java index 0b06b34038e1..3a897081f07c 100644 --- a/dspace-api/src/main/java/org/dspace/content/factory/ContentServiceFactory.java +++ b/dspace-api/src/main/java/org/dspace/content/factory/ContentServiceFactory.java @@ -20,6 +20,7 @@ import org.dspace.content.service.CommunityService; import org.dspace.content.service.DSpaceObjectLegacySupportService; import org.dspace.content.service.DSpaceObjectService; +import org.dspace.content.service.DuplicateDetectionService; import org.dspace.content.service.EntityService; import org.dspace.content.service.EntityTypeService; import org.dspace.content.service.InProgressSubmissionService; @@ -113,6 +114,13 @@ public InProgressSubmissionService getInProgressSubmissionService(InProgressSubm } } + /** + * Return the implementation of the DuplicateDetectionService interface + * + * @return the DuplicateDetectionService + */ + public abstract DuplicateDetectionService getDuplicateDetectionService(); + public DSpaceObjectService getDSpaceObjectService(T dso) { return getDSpaceObjectService(dso.getType()); } diff --git a/dspace-api/src/main/java/org/dspace/content/factory/ContentServiceFactoryImpl.java b/dspace-api/src/main/java/org/dspace/content/factory/ContentServiceFactoryImpl.java index e970f0bdab12..4b9d5918b173 100644 --- a/dspace-api/src/main/java/org/dspace/content/factory/ContentServiceFactoryImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/factory/ContentServiceFactoryImpl.java @@ -10,6 +10,7 @@ import java.util.List; import org.dspace.content.DSpaceObject; +import org.dspace.content.DuplicateDetectionServiceImpl; import org.dspace.content.RelationshipMetadataService; import org.dspace.content.service.BitstreamFormatService; import org.dspace.content.service.BitstreamService; @@ -18,6 +19,7 @@ import org.dspace.content.service.CommunityService; import org.dspace.content.service.DSpaceObjectLegacySupportService; import org.dspace.content.service.DSpaceObjectService; +import org.dspace.content.service.DuplicateDetectionService; import org.dspace.content.service.EntityService; import org.dspace.content.service.EntityTypeService; import org.dspace.content.service.InstallItemService; @@ -81,6 +83,8 @@ public class ContentServiceFactoryImpl extends ContentServiceFactory { private EntityTypeService entityTypeService; @Autowired(required = true) private EntityService entityService; + @Autowired(required = true) + private DuplicateDetectionService duplicateDetectionService; @Override public List> getDSpaceObjectServices() { @@ -181,4 +185,9 @@ public EntityService getEntityService() { public RelationshipMetadataService getRelationshipMetadataService() { return relationshipMetadataService; } + + @Override + public DuplicateDetectionService getDuplicateDetectionService() { + return duplicateDetectionService; + } } diff --git a/dspace-api/src/main/java/org/dspace/content/service/DuplicateDetectionService.java b/dspace-api/src/main/java/org/dspace/content/service/DuplicateDetectionService.java new file mode 100644 index 000000000000..27797a4ef953 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/content/service/DuplicateDetectionService.java @@ -0,0 +1,75 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.content.service; + +import java.sql.SQLException; +import java.util.List; +import java.util.Optional; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.dspace.authorize.AuthorizeException; +import org.dspace.content.Item; +import org.dspace.content.virtual.PotentialDuplicate; +import org.dspace.core.Context; +import org.dspace.discovery.DiscoverResult; +import org.dspace.discovery.IndexableObject; +import org.dspace.discovery.SearchServiceException; + +/** + * Duplicate Detection Service handles get, search and validation operations for duplicate detection. + * @see org.dspace.content.DuplicateDetectionServiceImpl for implementation details + * + * @author Kim Shepherd + */ +public interface DuplicateDetectionService { + + /** + * Logger + */ + Logger log = LogManager.getLogger(DuplicateDetectionService.class); + + /** + * Get a list of PotentialDuplicate objects (wrappers with some metadata included for previewing) that + * are identified as potential duplicates of the given item + * + * @param context DSpace context + * @param item Item to check + * @return List of potential duplicates (empty if none found) + * @throws SearchServiceException if an error occurs performing the discovery search + */ + List getPotentialDuplicates(Context context, Item item) + throws SearchServiceException; + + /** + * Validate an indexable object (returned by discovery search) to ensure it is permissible, readable and valid + * and can be added to a list of results. + * An Optional is returned, if it is empty then it was invalid or did not pass validation. + * + * @param context The DSpace context + * @param indexableObject The discovery search result + * @param original The original item (to compare IDs, submitters, etc) + * @return An Optional potential duplicate + * @throws SQLException + * @throws AuthorizeException + */ + public Optional validateDuplicateResult(Context context, IndexableObject indexableObject, + Item original) throws SQLException, AuthorizeException; + + /** + * Search discovery for potential duplicates of a given item. The search uses levenshtein distance (configurable) + * and a single-term "signature" constructed out of the item title + * + * @param context DSpace context + * @param item The item to check + * @return DiscoverResult as a result of performing search. Null if invalid. + * + * @throws SearchServiceException if an error was encountered during the discovery search itself. + */ + DiscoverResult searchDuplicates(Context context, Item item) throws SearchServiceException; +} diff --git a/dspace-api/src/main/java/org/dspace/content/virtual/PotentialDuplicate.java b/dspace-api/src/main/java/org/dspace/content/virtual/PotentialDuplicate.java new file mode 100644 index 000000000000..ee21b2fdaa74 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/content/virtual/PotentialDuplicate.java @@ -0,0 +1,174 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.content.virtual; + +import java.util.LinkedList; +import java.util.List; +import java.util.UUID; + +import org.dspace.content.Item; +import org.dspace.content.MetadataValue; + +/** + * Model of potential duplicate item. Provides as little data as possible, but enough to be useful + * about the context / state of the duplicate, and metadata for preview purposes. + * + * @author Kim Shepherd + */ +public class PotentialDuplicate { + /** + * Title of duplicate object + */ + private String title; + /** + * UUID of duplicate object + */ + private UUID uuid; + /** + * Owning collection name (title) for duplicate item + */ + private String owningCollectionName; + /** + * Workspace item ID, if the duplicate is a workspace item + */ + private Integer workspaceItemId; + /** + * Workflow item ID, if the duplicate is a workflow item + */ + private Integer workflowItemId; + + /** + * List of configured metadata values copied across from the duplicate item + */ + private List metadataValueList; + + /** + * Default constructor + */ + public PotentialDuplicate() { + this.metadataValueList = new LinkedList<>(); + } + + /** + * Constructor that accepts an item and sets some values accordingly + * @param item the potential duplicate item + */ + public PotentialDuplicate(Item item) { + // Throw error if item is null + if (item == null) { + throw new NullPointerException("Null item passed to potential duplicate constructor"); + } + // Instantiate metadata value list + this.metadataValueList = new LinkedList<>(); + // Set title + this.title = item.getName(); + // Set UUID + this.uuid = item.getID(); + // Set owning collection name + if (item.getOwningCollection() != null) { + this.owningCollectionName = item.getOwningCollection().getName(); + } + } + + /** + * Get UUID of duplicate item + * @return UUID of duplicate item + */ + public UUID getUuid() { + return uuid; + } + + /** + * Set UUID of duplicate item + * @param uuid UUID of duplicate item + */ + public void setUuid(UUID uuid) { + this.uuid = uuid; + } + + /** + * Get title of duplicate item + * @return title of duplicate item + */ + public String getTitle() { + return title; + } + + /** + * Set title of duplicate item + * @param title of duplicate item + */ + public void setTitle(String title) { + this.title = title; + } + + /** + * Get owning collection name (title) of duplicate item + * @return owning collection name (title) of duplicate item + */ + public String getOwningCollectionName() { + return owningCollectionName; + } + + /** + * Set owning collection name (title) of duplicate item + * @param owningCollectionName owning collection name (title) of duplicate item + */ + public void setOwningCollectionName(String owningCollectionName) { + this.owningCollectionName = owningCollectionName; + } + + /** + * Get workspace ID for duplicate item, if any + * @return workspace item ID or null + */ + public Integer getWorkspaceItemId() { + return workspaceItemId; + } + + /** + * Set workspace ID for duplicate item + * @param workspaceItemId workspace item ID + */ + public void setWorkspaceItemId(Integer workspaceItemId) { + this.workspaceItemId = workspaceItemId; + } + + /** + * Get workflow ID for duplicate item, if anh + * @return workflow item ID or null + */ + public Integer getWorkflowItemId() { + return workflowItemId; + } + + /** + * Set workflow ID for duplicate item + * @param workflowItemId workspace item ID + */ + public void setWorkflowItemId(Integer workflowItemId) { + this.workflowItemId = workflowItemId; + } + + /** + * Get metadata (sorted, field->value list) for duplicate item + * @return (sorted, field->value list) for duplicate item + */ + public List getMetadataValueList() { + return metadataValueList; + } + + /** + * Set metadata (sorted, field->value list) for duplicate item + * @param metadataValueList MetadataRest list of values mapped to field keys + */ + public void setMetadataValueList(List metadataValueList) { + this.metadataValueList = metadataValueList; + } + +} diff --git a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceIndexItemSignaturePlugin.java b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceIndexItemSignaturePlugin.java new file mode 100644 index 000000000000..ef61ec0e2e5e --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceIndexItemSignaturePlugin.java @@ -0,0 +1,93 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.discovery; + +import org.apache.logging.log4j.Logger; +import org.apache.solr.common.SolrInputDocument; +import org.dspace.content.Item; +import org.dspace.content.WorkspaceItem; +import org.dspace.core.Context; +import org.dspace.discovery.indexobject.IndexableItem; +import org.dspace.discovery.indexobject.IndexableWorkflowItem; +import org.dspace.discovery.indexobject.IndexableWorkspaceItem; +import org.dspace.services.ConfigurationService; +import org.dspace.workflow.WorkflowItem; +import org.springframework.beans.factory.annotation.Autowired; + +/** + * Indexes item "signatures" used for duplicate detection + * + * @author Kim Shepherd + */ +public class SolrServiceIndexItemSignaturePlugin implements SolrServiceIndexPlugin { + + @Autowired + ConfigurationService configurationService; + + private static final Logger log = org.apache.logging.log4j.LogManager + .getLogger(SolrServiceIndexItemSignaturePlugin.class); + + /** + * Index the normalised name of the item to a _signature field + * + * @param context DSpace context + * @param idxObj the indexable item + * @param document the Solr document + */ + @Override + public void additionalIndex(Context context, IndexableObject idxObj, SolrInputDocument document) { + // Immediately return if this feature is not configured + if (!configurationService.getBooleanProperty("duplicate.enable", false)) { + return; + } + // Otherwise, continue with item indexing. Handle items, workflow items, and workspace items + if (idxObj instanceof IndexableItem) { + indexItemSignature(context, ((IndexableItem) idxObj).getIndexedObject(), document); + } else if (idxObj instanceof IndexableWorkspaceItem) { + WorkspaceItem workspaceItem = ((IndexableWorkspaceItem) idxObj).getIndexedObject(); + if (workspaceItem != null) { + Item item = workspaceItem.getItem(); + if (item != null) { + indexItemSignature(context, item, document); + } + } + } else if (idxObj instanceof IndexableWorkflowItem) { + WorkflowItem workflowItem = ((IndexableWorkflowItem) idxObj).getIndexedObject(); + if (workflowItem != null) { + Item item = workflowItem.getItem(); + if (item != null) { + indexItemSignature(context, item, document); + } + } + } + } + + /** + * Add the actual signature field to the given solr doc + * + * @param context DSpace context + * @param item DSpace item + * @param document Solr document + */ + private void indexItemSignature(Context context, Item item, SolrInputDocument document) { + if (item != null) { + // Construct signature object + String signature = item.getName(); + if (signature != null) { + if (configurationService.getBooleanProperty("duplicate.signature.normalise.lowercase")) { + signature = signature.toLowerCase(context.getCurrentLocale()); + } + if (configurationService.getBooleanProperty("duplicate.signature.normalise.whitespace")) { + signature = signature.replaceAll("\\s+", ""); + } + // Add the field to the document + document.addField("item_signature", signature); + } + } + } +} diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/PotentialDuplicateConverter.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/PotentialDuplicateConverter.java new file mode 100644 index 000000000000..566c92e56665 --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/PotentialDuplicateConverter.java @@ -0,0 +1,69 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + *

+ * http://www.dspace.org/license/ + */ + +package org.dspace.app.rest.converter; + +import org.dspace.app.rest.model.MetadataValueList; +import org.dspace.app.rest.model.PotentialDuplicateRest; +import org.dspace.app.rest.projection.Projection; +import org.dspace.content.virtual.PotentialDuplicate; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Lazy; +import org.springframework.stereotype.Component; + +/** + * Convert DSpace PotentialDuplicate object to a PotentialDuplicateRest REST resource + * for use in REST results. + * + * @author Kim Shepherd + */ +@Component +public class PotentialDuplicateConverter implements DSpaceConverter { + @Lazy + @Autowired + private ConverterService converter; + + /** + * Convert a PotentialDuplicate model object into its equivalent REST resource, applying + * a given projection. + * @see PotentialDuplicate + * @see PotentialDuplicateRest + * + * @param modelObject a PotentialDuplicate object + * @param projection current projection + * @return a converted PotentialDuplicateRest REST object + */ + @Override + public PotentialDuplicateRest convert(PotentialDuplicate modelObject, Projection projection) { + if (modelObject == null) { + return null; + } + // Instantiate new REST model object + PotentialDuplicateRest rest = new PotentialDuplicateRest(); + // Set or otherwise transform things here, then return + rest.setUuid(modelObject.getUuid()); + rest.setTitle(modelObject.getTitle()); + rest.setOwningCollectionName(modelObject.getOwningCollectionName()); + rest.setWorkflowItemId(modelObject.getWorkflowItemId()); + rest.setWorkspaceItemId(modelObject.getWorkspaceItemId()); + rest.setMetadata(converter.toRest(new MetadataValueList(modelObject.getMetadataValueList()), projection)); + + // Return converted object + return rest; + } + + /** + * For what DSpace API model class does this converter convert? + * @return Class of model objects represented. + */ + @Override + public Class getModelClass() { + return PotentialDuplicate.class; + } + +} diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/ItemRest.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/ItemRest.java index b2f540c0ac4a..cafbbaaa9184 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/ItemRest.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/ItemRest.java @@ -52,6 +52,10 @@ @LinkRest( name = ItemRest.THUMBNAIL, method = "getThumbnail" + ), + @LinkRest( + name = ItemRest.DUPLICATES, + method = "getDuplicates" ) }) public class ItemRest extends DSpaceObjectRest { @@ -68,6 +72,7 @@ public class ItemRest extends DSpaceObjectRest { public static final String VERSION = "version"; public static final String TEMPLATE_ITEM_OF = "templateItemOf"; public static final String THUMBNAIL = "thumbnail"; + public static final String DUPLICATES = "duplicates"; private boolean inArchive = false; private boolean discoverable = false; diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/PotentialDuplicateRest.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/PotentialDuplicateRest.java new file mode 100644 index 000000000000..1b356ffa01f9 --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/PotentialDuplicateRest.java @@ -0,0 +1,196 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest.model; + +import java.util.UUID; + +/** + * REST Model defining a Potential Duplicate for serialisation to JSON + * This is used in lists of potential duplicates for submission section data and item link / embeds. + * + * @author Kim Shepherd + */ +public class PotentialDuplicateRest extends RestAddressableModel { + + /** + * Type of REST resource + */ + private static final String TYPE = "DUPLICATE"; + /** + * Plural type of REST resource + */ + private static final String TYPE_PLURAL = "DUPLICATES"; + /** + * Title of duplicate object + */ + private String title; + /** + * UUID of duplicate object + */ + private UUID uuid; + /** + * Owning collection name (title) for duplicate item + */ + private String owningCollectionName; + /** + * Workspace item ID, if the duplicate is a workspace item + */ + private Integer workspaceItemId; + /** + * Workflow item ID, if the duplicate is a workflow item + */ + private Integer workflowItemId; + /** + * List of configured metadata copied across from the duplicate item + */ + private MetadataRest metadata; + + /** + * Default constructor + */ + public PotentialDuplicateRest() { + } + + /** + * Get UUID of duplicate item + * @return UUID of duplicate item + */ + public UUID getUuid() { + return uuid; + } + + /** + * Set UUID of duplicate item + * @param uuid UUID of duplicate item + */ + public void setUuid(UUID uuid) { + this.uuid = uuid; + } + + /** + * Get title of duplicate item + * @return title of duplicate item + */ + public String getTitle() { + return title; + } + + /** + * Set title of duplicate item + * @param title of duplicate item + */ + public void setTitle(String title) { + this.title = title; + } + + /** + * Get owning collection name (title) of duplicate item + * @return owning collection name (title) of duplicate item + */ + public String getOwningCollectionName() { + return owningCollectionName; + } + + /** + * Set owning collection name (title) of duplicate item + * @param owningCollectionName owning collection name (title) of duplicate item + */ + public void setOwningCollectionName(String owningCollectionName) { + this.owningCollectionName = owningCollectionName; + } + + /** + * Get metadata (sorted, field->value list) for duplicate item + * @return (sorted, field->value list) for duplicate item + */ + public MetadataRest getMetadata() { + return metadata; + } + + /** + * Set metadata (sorted, field->value list) for duplicate item + * @param metadata MetadataRest list of values mapped to field keys + */ + public void setMetadata(MetadataRest metadata) { + this.metadata = metadata; + } + + /** + * Get workspace ID for duplicate item, if any + * @return workspace item ID or null + */ + public Integer getWorkspaceItemId() { + return workspaceItemId; + } + + /** + * Set workspace ID for duplicate item + * @param workspaceItemId workspace item ID + */ + public void setWorkspaceItemId(Integer workspaceItemId) { + this.workspaceItemId = workspaceItemId; + } + + /** + * Get workflow ID for duplicate item, if anh + * @return workflow item ID or null + */ + public Integer getWorkflowItemId() { + return workflowItemId; + } + + /** + * Set workflow ID for duplicate item + * @param workflowItemId workspace item ID + */ + public void setWorkflowItemId(Integer workflowItemId) { + this.workflowItemId = workflowItemId; + } + + /** + * Get REST resource type name + * @return REST resource type (see static final string) + */ + @Override + public String getType() { + return TYPE; + } + + /** + * Get REST resource type plural name + * @return REST resource type plural name (see static final string) + */ + @Override + public String getTypePlural() { + return TYPE_PLURAL; + } + + /** + * Get REST resource category. + * Not implemented as this model is intended for use only as an ItemLink repository and submission section data, + * it is actually a simple RestModel but has to 'implement' RestAddressableModel to serialize correctly + * + * @return null (not implemented) + */ + @Override + public String getCategory() { + return null; + } + + /** + * Get REST controller for this model. + * Not implemented as this model is intended for use only as an ItemLink repository and submission section data, + * it is actually a simple RestModel but has to 'implement' RestAddressableModel to serialize correctly + * + * @return null (not implemented) + */ + @Override + public Class getController() { + return null; + } +} diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/hateoas/PotentialDuplicateResource.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/hateoas/PotentialDuplicateResource.java new file mode 100644 index 000000000000..85b417d30210 --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/hateoas/PotentialDuplicateResource.java @@ -0,0 +1,22 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + *

+ * http://www.dspace.org/license/ + */ +package org.dspace.app.rest.model.hateoas; + +import org.dspace.app.rest.model.PotentialDuplicateRest; + +/** + * + * Wrap PotentialDuplicatesRest REST resource in a very simple HALResource class + * + * @author Kim Shepherd + */ +public class PotentialDuplicateResource extends HALResource { + public PotentialDuplicateResource(PotentialDuplicateRest data) { + super(data); + } +} diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/step/DataDuplicateDetection.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/step/DataDuplicateDetection.java new file mode 100644 index 000000000000..9506e9676e03 --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/step/DataDuplicateDetection.java @@ -0,0 +1,46 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest.model.step; + +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonUnwrapped; +import org.dspace.app.rest.model.PotentialDuplicateRest; + +/** + * Section data model for potential duplicate items detected during submission + * + * @author Kim Shepherd + */ +public class DataDuplicateDetection implements SectionData { + public DataDuplicateDetection() { + } + + /** + * A list of potential duplicate items found by DuplicateDetectionService, in their REST model form + */ + @JsonUnwrapped + private List potentialDuplicates; + + /** + * Return the list of detected potential duplicates in REST model form + * @return list of potential duplicate REST models + */ + public List getPotentialDuplicates() { + return potentialDuplicates; + } + + /** + * Set list of potential duplicates. + * @see org.dspace.app.rest.converter.PotentialDuplicateConverter + * @param potentialDuplicates list of potential duplicates + */ + public void setPotentialDuplicates(List potentialDuplicates) { + this.potentialDuplicates = potentialDuplicates; + } +} \ No newline at end of file diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemDuplicatesLinkRepository.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemDuplicatesLinkRepository.java new file mode 100644 index 000000000000..1187269ae1fe --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemDuplicatesLinkRepository.java @@ -0,0 +1,115 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest.repository; + +import javax.annotation.Nullable; +import javax.servlet.http.HttpServletRequest; + +import java.sql.SQLException; +import java.util.LinkedList; +import java.util.List; +import java.util.UUID; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.dspace.app.rest.model.ItemRest; +import org.dspace.app.rest.model.PotentialDuplicateRest; +import org.dspace.app.rest.projection.Projection; +import org.dspace.app.rest.submit.SubmissionService; +import org.dspace.content.Item; +import org.dspace.content.service.DuplicateDetectionService; +import org.dspace.content.service.ItemService; +import org.dspace.content.virtual.PotentialDuplicate; +import org.dspace.core.Context; +import org.dspace.discovery.SearchServiceException; +import org.dspace.services.ConfigurationService; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.data.domain.Page; +import org.springframework.data.domain.Pageable; +import org.springframework.data.rest.webmvc.ResourceNotFoundException; +import org.springframework.security.access.prepost.PreAuthorize; +import org.springframework.stereotype.Component; + +/** + * Item link repository to allow a list of potential duplicates to be searched and returned + * for the parent item. + * Used by, e.g. workflow pooled/claimed task page and previews, to show reviewers about potential duplicates + * @see DuplicateDetectionService + * + * @author Kim Shepherd + */ +@Component(ItemRest.CATEGORY + "." + ItemRest.NAME + "." + ItemRest.DUPLICATES) +public class ItemDuplicatesLinkRepository extends AbstractDSpaceRestRepository + implements LinkRestRepository { + + private static final Logger log = LogManager.getLogger(ItemDuplicatesLinkRepository.class); + + @Autowired + ItemService itemService; + @Autowired + ConfigurationService configurationService; + @Autowired + SubmissionService submissionService; + @Autowired + DuplicateDetectionService duplicateDetectionService; + + /** + * Get a list of potential duplicates based on the current item's "signature" (e.g. title) + * + * @param request + * @param itemId + * @param optionalPageable + * @param projection + * @return + */ + @PreAuthorize("hasPermission(#itemId, 'ITEM', 'READ')") + public Page getDuplicates(@Nullable HttpServletRequest request, + UUID itemId, + @Nullable Pageable optionalPageable, + Projection projection) { + + + // Instantiate object to represent this item + Item item; + // Instantiate list of potential duplicates which we will convert and return as paged ItemRest list + List potentialDuplicates = new LinkedList<>(); + // Instantiate total count + int total = 0; + // Obtain context + Context context = obtainContext(); + // Get pagination + Pageable pageable = utils.getPageable(optionalPageable); + + // Try to get item based on UUID parameter + try { + item = itemService.find(context, itemId); + } catch (SQLException e) { + throw new ResourceNotFoundException(e.getMessage()); + } + + // If the item is null or otherwise invalid (template, etc) then throw an appropriate error + if (item == null) { + throw new ResourceNotFoundException("No such item: " + itemId); + } + if (item.getTemplateItemOf() != null) { + throw new IllegalArgumentException("Cannot get duplicates for template item"); + } + + try { + // Search for the list of potential duplicates + potentialDuplicates = duplicateDetectionService.getPotentialDuplicates(context, item); + } catch (SearchServiceException e) { + // If the search fails, log an error and return an empty list rather than throwing a fatal error + log.error("Search service error retrieving duplicates: {}", e.getMessage()); + } + + // Return the list of items along with pagination info and max results + return converter.toRestPage(potentialDuplicates, pageable, total, projection); + } + +} diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/SubmissionService.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/SubmissionService.java index 93d3867d9205..0a3b2e859eee 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/SubmissionService.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/SubmissionService.java @@ -10,6 +10,7 @@ import java.io.IOException; import java.sql.SQLException; import java.util.ArrayList; +import java.util.LinkedList; import java.util.List; import java.util.UUID; import javax.servlet.http.HttpServletRequest; @@ -26,9 +27,11 @@ import org.dspace.app.rest.model.CheckSumRest; import org.dspace.app.rest.model.ErrorRest; import org.dspace.app.rest.model.MetadataValueRest; +import org.dspace.app.rest.model.PotentialDuplicateRest; import org.dspace.app.rest.model.WorkspaceItemRest; import org.dspace.app.rest.model.patch.Operation; import org.dspace.app.rest.model.step.DataCCLicense; +import org.dspace.app.rest.model.step.DataDuplicateDetection; import org.dspace.app.rest.model.step.DataUpload; import org.dspace.app.rest.model.step.UploadBitstreamRest; import org.dspace.app.rest.projection.Projection; @@ -47,11 +50,14 @@ import org.dspace.content.MetadataValue; import org.dspace.content.WorkspaceItem; import org.dspace.content.service.CollectionService; +import org.dspace.content.service.DuplicateDetectionService; import org.dspace.content.service.ItemService; import org.dspace.content.service.WorkspaceItemService; +import org.dspace.content.virtual.PotentialDuplicate; import org.dspace.core.Constants; import org.dspace.core.Context; import org.dspace.core.Utils; +import org.dspace.discovery.SearchServiceException; import org.dspace.license.service.CreativeCommonsService; import org.dspace.services.ConfigurationService; import org.dspace.services.RequestService; @@ -64,6 +70,7 @@ import org.dspace.xmlworkflow.storedcomponents.XmlWorkflowItem; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Lazy; +import org.springframework.data.rest.webmvc.ResourceNotFoundException; import org.springframework.data.rest.webmvc.json.patch.PatchException; import org.springframework.jdbc.datasource.init.UncategorizedScriptException; import org.springframework.stereotype.Component; @@ -101,6 +108,8 @@ public class SubmissionService { @Autowired private org.dspace.app.rest.utils.Utils utils; private SubmissionConfigService submissionConfigService; + @Autowired + private DuplicateDetectionService duplicateDetectionService; public SubmissionService() throws SubmissionConfigReaderException { submissionConfigService = SubmissionServiceFactory.getInstance().getSubmissionConfigService(); @@ -313,6 +322,51 @@ public DataCCLicense getDataCCLicense(InProgressSubmission obj) return result; } + /** + * Prepare section data containing a list of potential duplicates, for use in submission steps. + * This method belongs in SubmissionService and not DuplicateDetectionService because it depends on + * the DataDuplicateDetection class which only appears in the REST project. + * + * @param context DSpace context + * @param obj The in-progress submission object + * @return A DataDuplicateDetection object which implements SectionData for direct use in + * a submission step (see DuplicateDetectionStep) + * @throws SearchServiceException if an error is encountered during Discovery search + */ + public DataDuplicateDetection getDataDuplicateDetection(Context context, InProgressSubmission obj) + throws SearchServiceException { + // Test for a valid object or throw a not found exception + if (obj == null) { + throw new ResourceNotFoundException("Duplicate data step could not find valid in-progress submission obj"); + } + // Initialise an empty section data object + DataDuplicateDetection data = new DataDuplicateDetection(); + + // Get the item for this submission object, throw a not found exception if null + Item item = obj.getItem(); + if (item == null) { + throw new ResourceNotFoundException("Duplicate data step could not find valid item for the" + + " current in-progress submission obj id=" + obj.getID()); + } + // Initialise empty list of PotentialDuplicateRest objects for use in the section data object + List potentialDuplicateRestList = new LinkedList<>(); + + // Get discovery search result for a duplicate detection search based on this item and populate + // the list of REST objects + List potentialDuplicates = duplicateDetectionService.getPotentialDuplicates(context, item); + for (PotentialDuplicate potentialDuplicate : potentialDuplicates) { + // Convert and add the potential duplicate to the list + potentialDuplicateRestList.add(converter.toRest( + potentialDuplicate, utils.obtainProjection())); + } + + // Set the final duplicates list of the section data object + data.setPotentialDuplicates(potentialDuplicateRestList); + + // Return section data + return data; + } + /** * Utility method used by the {@link WorkspaceItemRestRepository} and * {@link WorkflowItemRestRepository} to deal with the upload in an inprogress diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/step/DuplicateDetectionStep.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/step/DuplicateDetectionStep.java new file mode 100644 index 000000000000..3dde379d24d3 --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/step/DuplicateDetectionStep.java @@ -0,0 +1,113 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest.submit.step; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.dspace.app.rest.model.patch.Operation; +import org.dspace.app.rest.model.step.DataDuplicateDetection; +import org.dspace.app.rest.submit.AbstractProcessingStep; +import org.dspace.app.rest.submit.SubmissionService; +import org.dspace.app.rest.utils.ContextUtil; +import org.dspace.app.util.SubmissionStepConfig; +import org.dspace.content.InProgressSubmission; +import org.dspace.content.factory.ContentServiceFactory; +import org.dspace.core.Context; +import org.dspace.handle.service.HandleService; +import org.dspace.services.factory.DSpaceServicesFactory; +import org.dspace.services.model.Request; +import org.springframework.beans.factory.annotation.Autowired; + +/** + * Submission processing step to detect potential duplicates of this item and list them so that + * the submitter can choose to cancel or continue with their submission + * + * @author Kim Shepherd + */ +public class DuplicateDetectionStep extends AbstractProcessingStep { + + private static final Logger log = LogManager.getLogger(DuplicateDetectionStep.class); + + @Autowired(required = true) + protected HandleService handleService; + @Autowired(required = true) + protected ContentServiceFactory contentServiceFactory; + + /** + * Override DataProcessing.getData, return a list of potential duplicates + * + * @param submissionService The submission service + * @param obj The workspace or workflow item + * @param config The submission step configuration + * @return A simple DataIdentifiers bean containing doi, handle and list of other identifiers + */ + @Override + public DataDuplicateDetection getData(SubmissionService submissionService, InProgressSubmission obj, + SubmissionStepConfig config) throws Exception { + // Validate in progress submission object and wrapped item + if (obj == null) { + throw new IllegalArgumentException("Null in-progress wrapper object"); + } + if (obj.getItem() == null) { + throw new IllegalArgumentException("Null in-progress item"); + } + // Immediately return an empty if this feature is not configured + if (!configurationService.getBooleanProperty("duplicate.enable", false)) { + throw new IllegalStateException("Duplicate detection feature is not enabled. " + + "See dspace/config/modules/duplicate-detection.cfg"); + } + // Validate context + Context context = getContext(); + if (context == null) { + throw new ServletException("Null context"); + } + + // Return the constructed data section + return submissionService.getDataDuplicateDetection(context, obj); + } + + /** + * Utility method to get DSpace context from the HTTP request + * @return DSpace context + */ + private Context getContext() { + Context context; + Request currentRequest = DSpaceServicesFactory.getInstance().getRequestService().getCurrentRequest(); + if (currentRequest != null) { + HttpServletRequest request = currentRequest.getHttpServletRequest(); + context = ContextUtil.obtainContext(request); + } else { + context = new Context(); + } + + return context; + } + + /** + * This step is currently just for displaying identifiers and does not take additional patch operations + * @param context + * the DSpace context + * @param currentRequest + * the http request + * @param source + * the in progress submission + * @param op + * the json patch operation + * @param stepConf + * @throws Exception + */ + @Override + public void doPatchProcessing(Context context, HttpServletRequest currentRequest, InProgressSubmission source, + Operation op, SubmissionStepConfig stepConf) throws Exception { + log.warn("Not implemented"); + } + +} diff --git a/dspace/config/dspace.cfg b/dspace/config/dspace.cfg index e860d59d1127..98fa28dea664 100644 --- a/dspace/config/dspace.cfg +++ b/dspace/config/dspace.cfg @@ -1647,6 +1647,7 @@ include = ${module_dir}/clamav.cfg include = ${module_dir}/curate.cfg include = ${module_dir}/discovery.cfg include = ${module_dir}/doi-curation.cfg +include = ${module_dir}/duplicate-detection.cfg include = ${module_dir}/google-analytics.cfg include = ${module_dir}/healthcheck.cfg include = ${module_dir}/identifiers.cfg diff --git a/dspace/config/item-submission.xml b/dspace/config/item-submission.xml index 6bf9c3a15616..6028a7f40114 100644 --- a/dspace/config/item-submission.xml +++ b/dspace/config/item-submission.xml @@ -247,6 +247,13 @@ coarnotify + + + submit.progressbar.duplicates + org.dspace.app.rest.submit.step.DuplicateDetectionStep + duplicates + + diff --git a/dspace/config/modules/duplicate-detection.cfg b/dspace/config/modules/duplicate-detection.cfg new file mode 100644 index 000000000000..aea165324749 --- /dev/null +++ b/dspace/config/modules/duplicate-detection.cfg @@ -0,0 +1,28 @@ +### +# Duplicate detection settings +## + +# Enable this feature. Default: false +duplicate.enable = true + +## +# Normalisation rules. If these are changed, a full index-discovery re-index should be performed to force +# stored signatures to be updated. +## +# Should the signature be normalised for case? Default: true +duplicate.signature.normalise.lowercase = true +# Should the signature be normalised for whitespace? Default: true +# (highly recommended - if this is *not* used, some other placeholder needs to be used to force the value +# to be treated as a single term by Lucene) +duplicate.signature.normalise.whitespace = true + +# Levenshtein edit distance. Default:1 (eg. Test will match Txst but not Txxt) +duplicate.signature.distance = 1 +# Solr field used for storing the item signature +duplicate.signature.field = item_signature + +## Metadata to populate in the potential duplicate +duplicate.preview.metadata.field = dc.title +duplicate.preview.metadata.field = dc.date.issued +duplicate.preview.metadata.field = dc.type +duplicate.preview.metadata.field = dspace.entity.type \ No newline at end of file diff --git a/dspace/config/spring/api/core-services.xml b/dspace/config/spring/api/core-services.xml index 20b5bc74736d..835b4ea0eed7 100644 --- a/dspace/config/spring/api/core-services.xml +++ b/dspace/config/spring/api/core-services.xml @@ -55,6 +55,7 @@ + diff --git a/dspace/config/spring/api/discovery.xml b/dspace/config/spring/api/discovery.xml index 15ae4f45bf28..ce9b4a9d370f 100644 --- a/dspace/config/spring/api/discovery.xml +++ b/dspace/config/spring/api/discovery.xml @@ -44,6 +44,9 @@ + + + diff --git a/dspace/solr/search/conf/schema.xml b/dspace/solr/search/conf/schema.xml index df21afbc6426..09540dccca93 100644 --- a/dspace/solr/search/conf/schema.xml +++ b/dspace/solr/search/conf/schema.xml @@ -223,6 +223,25 @@ + + + + + + + + + + + + + + + + @@ -326,6 +345,9 @@ + + + From 3e307cd5820b8cf87125da9f60da78f89411c094 Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Wed, 17 Jan 2024 17:29:42 +1300 Subject: [PATCH 02/33] [TLC-674] Duplicate detection integration tests --- .../service/DuplicateDetectionService.java | 5 +- .../content/virtual/PotentialDuplicate.java | 2 + .../dspaceFolder/config/item-submission.xml | 14 ++ .../test/data/dspaceFolder/config/local.cfg | 2 + .../content/DuplicateDetectionTest.java | 194 ++++++++++++++++++ .../virtual/PotentialDuplicateTest.java | 12 ++ 6 files changed, 227 insertions(+), 2 deletions(-) create mode 100644 dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java create mode 100644 dspace-api/src/test/java/org/dspace/content/virtual/PotentialDuplicateTest.java diff --git a/dspace-api/src/main/java/org/dspace/content/service/DuplicateDetectionService.java b/dspace-api/src/main/java/org/dspace/content/service/DuplicateDetectionService.java index 27797a4ef953..92e41bf36d42 100644 --- a/dspace-api/src/main/java/org/dspace/content/service/DuplicateDetectionService.java +++ b/dspace-api/src/main/java/org/dspace/content/service/DuplicateDetectionService.java @@ -14,6 +14,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.dspace.authorize.AuthorizeException; +import org.dspace.content.DuplicateDetectionServiceImpl; import org.dspace.content.Item; import org.dspace.content.virtual.PotentialDuplicate; import org.dspace.core.Context; @@ -23,7 +24,7 @@ /** * Duplicate Detection Service handles get, search and validation operations for duplicate detection. - * @see org.dspace.content.DuplicateDetectionServiceImpl for implementation details + * @see DuplicateDetectionServiceImpl for implementation details * * @author Kim Shepherd */ @@ -58,7 +59,7 @@ List getPotentialDuplicates(Context context, Item item) * @throws SQLException * @throws AuthorizeException */ - public Optional validateDuplicateResult(Context context, IndexableObject indexableObject, + Optional validateDuplicateResult(Context context, IndexableObject indexableObject, Item original) throws SQLException, AuthorizeException; /** diff --git a/dspace-api/src/main/java/org/dspace/content/virtual/PotentialDuplicate.java b/dspace-api/src/main/java/org/dspace/content/virtual/PotentialDuplicate.java index ee21b2fdaa74..6c193bb28506 100644 --- a/dspace-api/src/main/java/org/dspace/content/virtual/PotentialDuplicate.java +++ b/dspace-api/src/main/java/org/dspace/content/virtual/PotentialDuplicate.java @@ -17,6 +17,8 @@ /** * Model of potential duplicate item. Provides as little data as possible, but enough to be useful * about the context / state of the duplicate, and metadata for preview purposes. + * This class lives in the virtual package because it is not stored, addressable data, it's a stub / preview + * based on an items' search result and metadata. * * @author Kim Shepherd */ diff --git a/dspace-api/src/test/data/dspaceFolder/config/item-submission.xml b/dspace-api/src/test/data/dspaceFolder/config/item-submission.xml index 0212e5efcca1..3a305d0ccdb9 100644 --- a/dspace-api/src/test/data/dspaceFolder/config/item-submission.xml +++ b/dspace-api/src/test/data/dspaceFolder/config/item-submission.xml @@ -27,6 +27,7 @@ + @@ -180,6 +181,13 @@ submission + + + submit.progressbar.duplicates + org.dspace.app.rest.submit.step.DuplicateDetectionStep + duplicates + + submit.progressbar.coarnotify org.dspace.app.rest.submit.step.NotifyStep @@ -281,6 +289,12 @@ + + + + + + diff --git a/dspace-api/src/test/data/dspaceFolder/config/local.cfg b/dspace-api/src/test/data/dspaceFolder/config/local.cfg index 3dc4e398c11b..9e7050bfdd5a 100644 --- a/dspace-api/src/test/data/dspaceFolder/config/local.cfg +++ b/dspace-api/src/test/data/dspaceFolder/config/local.cfg @@ -175,6 +175,8 @@ authority.controlled.dspace.object.owner = true webui.browse.link.1 = author:dc.contributor.* webui.browse.link.2 = subject:dc.subject.* +# Enable duplicate detection for tests +duplicate.enable = true ########################################### # LDN CONFIGURATIONS # diff --git a/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java b/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java new file mode 100644 index 000000000000..fec36224fcdc --- /dev/null +++ b/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java @@ -0,0 +1,194 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.content; + +import java.util.Comparator; +import java.util.List; +import java.util.Optional; + +import org.apache.commons.lang.StringUtils; +import org.dspace.AbstractIntegrationTestWithDatabase; +import org.dspace.builder.CollectionBuilder; +import org.dspace.builder.CommunityBuilder; +import org.dspace.builder.ItemBuilder; +import org.dspace.content.factory.ContentServiceFactory; +import org.dspace.content.service.DuplicateDetectionService; +import org.dspace.content.virtual.PotentialDuplicate; +import org.dspace.discovery.SearchService; +import org.dspace.discovery.SearchUtils; +import org.dspace.eperson.factory.EPersonServiceFactory; +import org.dspace.eperson.service.GroupService; +import org.dspace.services.ConfigurationService; +import org.dspace.services.factory.DSpaceServicesFactory; +import org.junit.Before; +import org.junit.Test; +import scala.concurrent.impl.FutureConvertersImpl; + +import static junit.framework.TestCase.assertEquals; +import static junit.framework.TestCase.assertNull; +import static org.hamcrest.MatcherAssert.assertThat; + +/** + * + * Integration tests for the duplicate detection service + * + * @author Kim Shepherd + */ +public class DuplicateDetectionTest extends AbstractIntegrationTestWithDatabase { + + private GroupService groupService = EPersonServiceFactory.getInstance().getGroupService(); + private SearchService searchService = SearchUtils.getSearchService(); + private DuplicateDetectionService duplicateDetectionService = ContentServiceFactory.getInstance().getDuplicateDetectionService(); + private ConfigurationService configurationService = DSpaceServicesFactory.getInstance().getConfigurationService(); + private Collection col; + private Item item1; + private Item item2; + private Item item3; + private final String item1IssueDate = "2011-10-17"; + private final String item1Subject = "ExtraEntry 1"; + private final String item1Title = "Public item I"; + private final String item1Author = "Smith, Donald"; + + @Before + public void setUp() throws Exception { + super.setUp(); + // Temporarily enable duplicate detection and set signature distance to 1 + configurationService.setProperty("duplicate.enable", true); + configurationService.setProperty("duplicate.signature.distance", 1); + configurationService.setProperty("duplicate.signature.normalise.lowercase", true); + configurationService.setProperty("duplicate.signature.normalise.whitespace", true); + configurationService.setProperty("duplicate.signature.field", "item_signature"); + configurationService.setProperty("duplicate.preview.metadata.field", + new String[]{"dc.date.issued", "dc.subject"}); + + context.turnOffAuthorisationSystem(); + + parentCommunity = CommunityBuilder.createCommunity(context).withName("Parent Community").build(); + col = CollectionBuilder.createCollection(context, parentCommunity).withName("Collection").build(); + + // Ingest three example items with slightly different titles + // item2 is 1 edit distance from item1 and item3 + // item1 and item3 are 2 edit distance from each other + item1 = ItemBuilder.createItem(context, col) + .withTitle(item1Title) // Public item I + .withIssueDate(item1IssueDate) + .withAuthor(item1Author) + .withSubject(item1Subject) + .build(); + item2 = ItemBuilder.createItem(context, col) + .withTitle("Public item II") + .withIssueDate("2012-10-17") + .withAuthor("Smith, Donald X.") + .withSubject("ExtraEntry 2") + .build(); + item3 = ItemBuilder.createItem(context, col) + .withTitle("Public item III") + .withIssueDate("2013-10-17") + .withAuthor("Smith, Donald Y.") + .withSubject("ExtraEntry 3") + .build(); + + context.setDispatcher("noindex"); + } + + /** + * Test instantiation of simple potential duplicate object + */ + @Test + public void testPotentialDuplicateInstantatation() { + PotentialDuplicate potentialDuplicate = new PotentialDuplicate(); + // The constructor should instantiate a new list for metadata + assertEquals("Metadata value list size should be 0", + 0, potentialDuplicate.getMetadataValueList().size()); + // Other properties should not be set + assertNull("Title should be null", potentialDuplicate.getTitle()); + //StringUtils.getLevenshteinDistance() + } + + /** + * Test instantiation of simple potential duplicate object given an item as a constructor argument + */ + @Test + public void testPotentialDuplicateInstantiationWithItem() { + PotentialDuplicate potentialDuplicate = new PotentialDuplicate(item1); + // We should have title, uuid, owning collection name set and metadata value list instantiated to empty + assertEquals("UUID should match item1 uuid", item1.getID(), potentialDuplicate.getUuid()); + assertEquals("Title should match item1 title", item1Title, potentialDuplicate.getTitle()); + assertEquals("Owning collection should match item1 owning collection", + item1.getOwningCollection().getName(), potentialDuplicate.getOwningCollectionName()); + assertEquals("Metadata value list size should be 0", + 0, potentialDuplicate.getMetadataValueList().size()); + } + + /** + * Test that a search for getPotentialDuplicates returns the expected results, populated with the expected + * preview values and metadata. This is the core method used by the duplicate item link repository and + * detect duplicates submission step. + * + * @throws Exception + */ + @Test + public void testSearchDuplicates() throws Exception { + + // Get potential duplicates of item 1: + // Expected: Public item II should appear as it has the configured levenshtein distance of 1 + List potentialDuplicates = duplicateDetectionService.getPotentialDuplicates(context, item1); + + // Make sure result list is size 1 + int size = 1; + assertEquals("Potential duplicates of item1 should have size " + size, + size, potentialDuplicates.size()); + + // The only member should be Public item II (one distance from public item I) + assertEquals("Item II should be be the detected duplicate", + item2.getID(), potentialDuplicates.get(0).getUuid()); + + // Get potential duplicates of item2: + // Expected: BOTH other items should appear as they are both 1 distance away from "Public item II" + potentialDuplicates = duplicateDetectionService.getPotentialDuplicates(context, item2); + + // Sort by title + potentialDuplicates.sort(Comparator.comparing(PotentialDuplicate::getTitle)); + + // Make sure result list is size 1 + size = 2; + assertEquals("Potential duplicates of item2 should have size " + size, + size, potentialDuplicates.size()); + + // The result list should contain both item1 and item3 in the expected order + assertEquals("item1 should be the first detected duplicate", + item1.getID(), potentialDuplicates.get(0).getUuid()); + assertEquals("item3 should be be the second detected duplicate", + item3.getID(), potentialDuplicates.get(1).getUuid()); + + // Check metadata is populated as per configuration, using item1 (first in results) + // Check for date + Optional foundDate = potentialDuplicates.get(0).getMetadataValueList().stream() + .filter(metadataValue -> metadataValue.getMetadataField().toString('.').equals("dc.date.issued")) + .map(MetadataValue::getValue).findFirst(); + assertThat("There should be an issue date found", foundDate.isPresent()); + assertEquals("item1 issue date should match the duplicate obj metadata issue date", + item1IssueDate, foundDate.get()); + // Check for subject + Optional foundSubject = potentialDuplicates.get(0).getMetadataValueList().stream() + .filter(metadataValue -> metadataValue.getMetadataField().toString('.').equals("dc.subject")) + .map(MetadataValue::getValue).findFirst(); + assertThat("There should be a subject found", foundSubject.isPresent()); + assertEquals("item1 subject should match the duplicate obj metadata subject", + item1Subject, foundSubject.get()); + + // Check for author, which was NOT configured to be copied + Optional foundAuthor = potentialDuplicates.get(0).getMetadataValueList().stream() + .filter(metadataValue -> metadataValue.getMetadataField().toString('.') + .equals("dc.contributor.author")) + .map(MetadataValue::getValue).findFirst(); + assertThat("There should NOT be an author found", foundAuthor.isEmpty()); + + } + +} diff --git a/dspace-api/src/test/java/org/dspace/content/virtual/PotentialDuplicateTest.java b/dspace-api/src/test/java/org/dspace/content/virtual/PotentialDuplicateTest.java new file mode 100644 index 000000000000..2a727abcae6b --- /dev/null +++ b/dspace-api/src/test/java/org/dspace/content/virtual/PotentialDuplicateTest.java @@ -0,0 +1,12 @@ +package org.dspace.content.virtual; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.junit.MockitoJUnitRunner; + +@RunWith(MockitoJUnitRunner.class) +public class PotentialDuplicateTest { + + + +} From 554338b29d50cc6770e2cda731afde8562abab80 Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Thu, 18 Jan 2024 20:25:59 +1300 Subject: [PATCH 03/33] [TLC-674] broken IT (multi items created?!) --- .../DuplicateDetectionServiceImpl.java | 20 +- .../app/rest/DuplicateDetectionRestIT.java | 359 ++++++++++++++++++ 2 files changed, 371 insertions(+), 8 deletions(-) create mode 100644 dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java diff --git a/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java index 82c528fc71e0..73da67ebebbb 100644 --- a/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java @@ -142,12 +142,15 @@ public Optional validateDuplicateResult(Context context, Ind && workspaceItem.getSubmitter().equals(context.getCurrentUser())) { resultItem = workspaceItem.getItem(); } - } else if (indexableObject instanceof IndexableWorkflowItem) { + } + if (indexableObject instanceof IndexableWorkflowItem) { + log.info("ITS A WORKFLOW ITEM ITS A WORKFLOW ITEM"); workflowItem = ((IndexableWorkflowItem) indexableObject).getIndexedObject(); if (workflowItem != null) { resultItem = workflowItem.getItem(); } - } else if (indexableObject instanceof IndexableItem) { + } + if (indexableObject instanceof IndexableItem) { resultItem = ((IndexableItem) indexableObject).getIndexedObject(); } @@ -201,16 +204,14 @@ public Optional validateDuplicateResult(Context context, Ind } // More authorisation checks - if (authorizeService.isAdmin(context, resultItem)) { - // Admins can always read, return immediately - return Optional.of(potentialDuplicate); - } - else if (workflowItem != null) { + if (workflowItem != null) { Collection c = workflowItem.getCollection(); + log.info("INSPECITG WORKFLOW ITEM " + workflowItem.getItem().getName() + " WITH EPERSON " + context.getCurrentUser().getName()); if (groupService.isMember(context, context.getCurrentUser(), c.getWorkflowStep1(context)) || groupService.isMember(context, context.getCurrentUser(), c.getWorkflowStep2(context)) || groupService.isMember(context, context.getCurrentUser(), c.getWorkflowStep3(context))) { // Current user is a member of one of the workflow role groups + log.info("WORKFLOW REVIEWER CAN SEE " + workflowItem.getID()); potentialDuplicate.setWorkflowItemId(workflowItem.getID()); return Optional.of(potentialDuplicate); } @@ -220,9 +221,12 @@ else if (workflowItem != null) { if (authorizeService.authorizeActionBoolean(context, resultItem, Constants.READ)) { return Optional.of(potentialDuplicate); } + } else if (authorizeService.isAdmin(context, resultItem)) { + // Admins can always read, return immediately + return Optional.of(potentialDuplicate); } - // By default, return an empty result + // By default, return an empty result return Optional.empty(); } diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java new file mode 100644 index 000000000000..5b41a6ffc019 --- /dev/null +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java @@ -0,0 +1,359 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest; + +import javax.ws.rs.core.MediaType; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.dspace.app.rest.model.patch.Operation; +import org.dspace.app.rest.model.patch.ReplaceOperation; +import org.dspace.app.rest.test.AbstractControllerIntegrationTest; +import org.dspace.builder.CollectionBuilder; +import org.dspace.builder.CommunityBuilder; +import org.dspace.builder.ItemBuilder; +import org.dspace.builder.WorkflowItemBuilder; +import org.dspace.builder.WorkspaceItemBuilder; +import org.dspace.content.Collection; +import org.dspace.content.DSpaceObject; +import org.dspace.content.Item; +import org.dspace.content.WorkspaceItem; +import org.dspace.content.service.CollectionService; +import org.dspace.content.service.ItemService; +import org.dspace.core.I18nUtil; +import org.dspace.discovery.IndexingService; +import org.dspace.discovery.indexobject.IndexableWorkflowItem; +import org.dspace.eperson.EPerson; +import org.dspace.eperson.factory.EPersonServiceFactory; +import org.dspace.eperson.service.EPersonService; +import org.dspace.handle.service.HandleService; +import org.dspace.services.ConfigurationService; +import org.dspace.xmlworkflow.storedcomponents.XmlWorkflowItem; +import org.hamcrest.Matchers; +import org.junit.After; +import org.junit.Test; +import org.springframework.beans.factory.annotation.Autowired; + +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.patch; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.content; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; + +/** + * Test item link and section data REST endpoints for duplicate detection. + * @see DuplicateDetectionTest (dspace-api) for lower level integration tests. + * + * @author Kim Shepherd + */ +public class DuplicateDetectionRestIT extends AbstractControllerIntegrationTest { + + @Autowired + ConfigurationService configurationService; + @Autowired + ItemService itemService; + @Autowired + IndexingService indexingService; + @Autowired + CollectionService collectionService; + @Autowired + HandleService handleService; + + private Collection col; + private Item item1; + private Item item2; + private Item item3; + private final String item1IssueDate = "2011-10-17"; + private final String item1Subject = "ExtraEntry 1"; + private final String item1Title = "Public item I"; + private final String item1Author = "Smith, Donald"; + private final String item2Subject = "ExtraEntry 2"; + private final String item2IssueDate = "2012-10-17"; + private EPerson anotherEPerson; + + @Override + public void setUp() throws Exception { + super.setUp(); + + // Temporarily enable duplicate detection and set signature distance to 1 + configurationService.setProperty("duplicate.enable", true); + configurationService.setProperty("duplicate.signature.distance", 1); + configurationService.setProperty("duplicate.signature.normalise.lowercase", true); + configurationService.setProperty("duplicate.signature.normalise.whitespace", true); + configurationService.setProperty("duplicate.signature.field", "item_signature"); + configurationService.setProperty("duplicate.preview.metadata.field", + new String[]{"dc.date.issued", "dc.subject"}); + + context.turnOffAuthorisationSystem(); + parentCommunity = CommunityBuilder.createCommunity(context).withName("Parent Community").build(); + DSpaceObject dso = handleService.resolveToObject(context, "123456789/test-duplicate-detection"); + if (dso == null) { + col = CollectionBuilder.createCollection(context, parentCommunity, "123456789/test-duplicate-detection") + .withWorkflowGroup("reviewer", admin) + .withName("Collection").build(); + } else { + col = (Collection) dso; + } + EPersonService ePersonService = EPersonServiceFactory.getInstance().getEPersonService(); + anotherEPerson = ePersonService.findByEmail(context, "test-another-user@email.com"); + if (anotherEPerson == null) { + anotherEPerson = ePersonService.create(context); + anotherEPerson.setFirstName(context, "first"); + anotherEPerson.setLastName(context, "last"); + anotherEPerson.setEmail("test-another-user@email.com"); + anotherEPerson.setCanLogIn(true); + anotherEPerson.setLanguage(context, I18nUtil.getDefaultLocale().getLanguage()); + ePersonService.setPassword(anotherEPerson, password); + // actually save the eperson to unit testing DB + ePersonService.update(context, anotherEPerson); + } + + // Ingest three example items with slightly different titles + // item2 is 1 edit distance from item1 and item3 + // item1 and item3 are 2 edit distance from each other + item1 = ItemBuilder.createItem(context, col) + .withTitle(item1Title) // Public item I + .withIssueDate(item1IssueDate) + .withAuthor(item1Author) + .withSubject(item1Subject) + .build(); + item2 = ItemBuilder.createItem(context, col) + .withTitle("Public item II") + .withIssueDate(item2IssueDate) + .withAuthor("Smith, Donald X.") + .withSubject(item2Subject) + .build(); + item3 = ItemBuilder.createItem(context, col) + .withTitle("Public item III") + .withIssueDate("2013-10-17") + .withAuthor("Smith, Donald Y.") + .withSubject("ExtraEntry 3") + .build(); + + + context.dispatchEvents(); + context.restoreAuthSystemState(); + //context.setDispatcher("noindex"); + + } + + @Test + public void itemsContainDuplicatesLinkTest() throws Exception { + String token = getAuthToken(admin.getEmail(), password); + + getClient(token).perform(get("/api/core/items/" + item1.getID())) + .andExpect(status().isOk()) + .andExpect(content().contentType(contentType)) + .andExpect(jsonPath("$._links.duplicates").exists()); + } + + @Test + public void searchDuplicatesByLinkTest() throws Exception { + String token = getAuthToken(admin.getEmail(), password); + + getClient(token).perform(get("/api/core/items/" + item1.getID() + "/duplicates")) + .andExpect(status().isOk()) + .andExpect(content().contentType(contentType)) + // Valid duplicates array + .andExpect(jsonPath("$._embedded.duplicates", Matchers.hasSize(1))) + // UUID of only array member matches item2 ID + .andExpect(jsonPath("$._embedded.duplicates[0].uuid").value(item2.getID().toString())) + // First item has subject and issue date metadata populated as expected + .andExpect(jsonPath("$._embedded.duplicates[0].metadata['dc.subject'][0].value") + .value(item2Subject)) + .andExpect(jsonPath("$._embedded.duplicates[0].metadata['dc.date.issued'][0].value") + .value(item2IssueDate)) + // Does NOT have other metadata e.g. author, title + .andExpect(jsonPath("$._embedded.duplicates[0].metadata['dc.contributor.author']").doesNotExist()) + .andExpect(jsonPath("$._embedded.duplicates[0].metadata['dc.title']").doesNotExist()); + } + + /** + * Duplicates should be accessible via section data. Data should update as item signature (title) is changed. + * + * @throws Exception + */ + @Test + public void submissionSectionDataTest() throws Exception { + // Test publication + context.turnOffAuthorisationSystem(); + // Create a new workspace item with a similar title to Item 1 (1 edit distance). Reuse other items + // metadata for the rest, as it is not relevant. + WorkspaceItem workspaceItem = WorkspaceItemBuilder.createWorkspaceItem(context, col) + .withTitle("Public item X") + .withSubject(item2Subject) + .withIssueDate(item2IssueDate) + .withAuthor(item1Author) + .withSubmitter(eperson) + .build(); + String submitterToken = getAuthToken(eperson.getEmail(), password); + context.restoreAuthSystemState(); + + getClient(submitterToken).perform(get("/api/submission/workspaceitems/" + workspaceItem.getID())) + .andExpect(status().isOk()) + // The duplicates section is present + .andExpect(jsonPath("$.sections.duplicates").exists()) + // There is a potentialDuplicates array in the section data of size 1 + .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates", Matchers.hasSize(1))) + // The UUID of the first duplicate matches item 1 (which is 1 distance from this new title) + .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates[0].uuid") + .value(item1.getID().toString())) + // Metadata for subject and issue date is populated as expected + .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates[0].metadata['dc.subject'][0].value") + .value(item1Subject)) + .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates[0].metadata['dc.date.issued'][0].value") + .value(item1IssueDate)) + // Metadata for other metadata fields has not been copied across, as expected + .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates[0]" + + ".metadata['dc.contributor.author']").doesNotExist()) + .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates[0]" + + ".metadata['dc.title']").doesNotExist()); + + // Try to add ISBN (type bound to book and book chapter) - this should not work and instead we'll get + // no JSON path for that field, because this item has no type yet + List updateOperations = new ArrayList(); + Map value = new HashMap(); + value.put("value", "Public item II"); + updateOperations.add(new ReplaceOperation("/sections/traditionalpageone/dc.title/0", value)); + String patchBody = getPatchContent(updateOperations); + getClient(submitterToken).perform(patch("/api/submission/workspaceitems/" + workspaceItem.getID()) + .content(patchBody) + .contentType(MediaType.APPLICATION_JSON_PATCH_JSON)) + .andExpect(status().isOk()) + .andExpect(jsonPath("$.errors").doesNotExist()); + + // Now there should be 3 results + getClient(submitterToken).perform(get("/api/submission/workspaceitems/" + workspaceItem.getID())) + .andExpect(status().isOk()) + // The duplicates section is present + .andExpect(jsonPath("$.sections.duplicates").exists()) + // There is a potentialDuplicates array in the section data (even if empty) + .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates", Matchers.hasSize(3))); + + // Now, change the title to something completely different + updateOperations = new ArrayList<>(); + value.put("value", "Research article"); + updateOperations.add(new ReplaceOperation("/sections/traditionalpageone/dc.title/0", value)); + patchBody = getPatchContent(updateOperations); + getClient(submitterToken).perform(patch("/api/submission/workspaceitems/" + workspaceItem.getID()) + .content(patchBody) + .contentType(MediaType.APPLICATION_JSON_PATCH_JSON)) + .andExpect(status().isOk()) + .andExpect(jsonPath("$.errors").doesNotExist()); + + // Now there should be NO results + getClient(submitterToken).perform(get("/api/submission/workspaceitems/" + workspaceItem.getID())) + .andExpect(status().isOk()) + // The duplicates section is present + .andExpect(jsonPath("$.sections.duplicates").exists()) + // There is a potentialDuplicates array in the section data (even if empty) + .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates", Matchers.hasSize(0))); + } + + /** + * If there is a potential duplicate that is also in submission (workspace item), it will + * ONLY be shown if the current user is the submitter / item owner. + * + * @throws Exception + */ + @Test + public void submissionSectionWorkspaceItemVisibilityTest() throws Exception { + // Test publication + context.turnOffAuthorisationSystem(); + // Create a new workspace item with a similar title to Item 1 (1 edit distance). Reuse other items + // metadata for the rest, as it is not relevant. + WorkspaceItem workspaceItem = WorkspaceItemBuilder.createWorkspaceItem(context, col) + .withTitle("Unique title") + .withSubject(item1Subject) + .withIssueDate(item1IssueDate) + .withAuthor(item1Author) + .withSubmitter(eperson) + .build(); + WorkspaceItem workspaceItem2 = WorkspaceItemBuilder.createWorkspaceItem(context, col) + .withTitle("Unique title") + .withSubject(item2Subject) + .withIssueDate(item2IssueDate) + .withAuthor(item1Author) + .withSubmitter(eperson) + .build(); + WorkspaceItem workspaceItem3 = WorkspaceItemBuilder.createWorkspaceItem(context, col) + .withTitle("Unique title") + .withSubject("asdf") + .withIssueDate("2000-01-01") + .withAuthor("asdfasf") + .withSubmitter(admin) + .build(); + String submitterToken = getAuthToken(eperson.getEmail(), password); + + context.restoreAuthSystemState(); + + // Even though there are 3 items with the same title, this 'eperson' user should only see 1 duplicate + // as workspaceItem3 is owned by a different submitter, and self-references are skipped + getClient(submitterToken).perform(get("/api/submission/workspaceitems/" + workspaceItem.getID())) + .andExpect(status().isOk()) + // The duplicates section is present + .andExpect(jsonPath("$.sections.duplicates").exists()) + // There is a potentialDuplicates array in the section data of size 1 + .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates", Matchers.hasSize(1))) + // The UUID of the first duplicate matches item 1 (which is 1 distance from this new title) + .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates[0].uuid") + .value(workspaceItem2.getItem().getID().toString())); + } + + /** + * If there is a potential duplicate that is also in workflow, it will + * ONLY be shown if the current user is in a worflow group for step 1, 2, or 3, or is an admin, or otherwise + * has READ permission + * + * @throws Exception + */ + @Test + public void submissionSectionWorkflowItemVisibilityTest() throws Exception { + String reviewerToken = getAuthToken(admin.getEmail(), password); + context.turnOffAuthorisationSystem(); + + Item testItem = ItemBuilder.createItem(context, col) + .withTitle("Totally unique?") // Public item I + .withIssueDate(item1IssueDate) + .withAuthor(item1Author) + .withSubject(item1Subject) + .build(); + // Create a new workspace item with a similar title to Item 1 (1 edit distance). Reuse other items + // metadata for the rest, as it is not relevant. + XmlWorkflowItem workflowItem1 = WorkflowItemBuilder.createWorkflowItem(context, col) + .withTitle("Totally unique!") + .withIssueDate("2017-10-17") + .build(); + + //indexingService.indexContent(context, new IndexableWorkflowItem(workflowItem1)); + + context.dispatchEvents(); + context.restoreAuthSystemState(); + context.setCurrentUser(admin); + + + // The reviewer should be able to see the workflow item as a potential duplicate of the test item + getClient(reviewerToken).perform(get("/api/core/items/" + testItem.getID() + "/duplicates")) + .andExpect(status().isOk()) + .andExpect(content().contentType(contentType)) + // Valid duplicates array + .andExpect(jsonPath("$._embedded.duplicates", Matchers.hasSize(1))) + // UUID of only array member matches the new workflow item ID + .andExpect(jsonPath("$._embedded.duplicates[0].uuid").value(workflowItem1.getItem().getID().toString())); + + // Another random user will NOT see this + getClient(getAuthToken(anotherEPerson.getEmail(), password)) + .perform(get("/api/core/items/" + testItem.getID() + "/duplicates")) + .andExpect(status().isOk()) + .andExpect(content().contentType(contentType)) + // Valid duplicates array + .andExpect(jsonPath("$._embedded.duplicates", Matchers.hasSize(0))); + } +} From 4e3e68fe56461a0d3c60707f8e49cc756529c4e1 Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Wed, 17 Jan 2024 17:29:42 +1300 Subject: [PATCH 04/33] [TLC-674] Duplicate detection integration tests One workflow REST IT test still failing even though the same test in dspace-api service passes... --- .../DuplicateDetectionServiceImpl.java | 16 +- .../content/DuplicateDetectionTest.java | 57 +++++- .../app/rest/DuplicateDetectionRestIT.java | 192 ++++++++++++------ 3 files changed, 205 insertions(+), 60 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java index 73da67ebebbb..a18e52a7618c 100644 --- a/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java @@ -21,6 +21,7 @@ import org.dspace.content.service.DuplicateDetectionService; import org.dspace.content.service.MetadataFieldService; import org.dspace.content.service.MetadataValueService; +import org.dspace.content.service.WorkspaceItemService; import org.dspace.content.virtual.PotentialDuplicate; import org.dspace.core.Constants; import org.dspace.core.Context; @@ -38,6 +39,8 @@ import org.dspace.versioning.VersionHistory; import org.dspace.versioning.service.VersionHistoryService; import org.dspace.workflow.WorkflowItem; +import org.dspace.xmlworkflow.storedcomponents.XmlWorkflowItem; +import org.dspace.xmlworkflow.storedcomponents.service.XmlWorkflowItemService; import org.springframework.beans.factory.annotation.Autowired; /** @@ -60,6 +63,10 @@ public class DuplicateDetectionServiceImpl implements DuplicateDetectionService MetadataFieldService metadataFieldService; @Autowired MetadataValueService metadataValueService; + @Autowired + XmlWorkflowItemService workflowItemService; + @Autowired + WorkspaceItemService workspaceItemService; /** * Get a list of PotentialDuplicate objects (wrappers with some metadata included for previewing) that @@ -137,6 +144,7 @@ public Optional validateDuplicateResult(Context context, Ind // what submission / archived state it is in if (indexableObject instanceof IndexableWorkspaceItem) { workspaceItem = ((IndexableWorkspaceItem) indexableObject).getIndexedObject(); + log.info("ITS A WORKSPACE ITEM ITS A WORKSPACE ITEM " + workspaceItem.getItem().getName()); // Only process workspace items that belong to the submitter if (workspaceItem != null && workspaceItem.getSubmitter() != null && workspaceItem.getSubmitter().equals(context.getCurrentUser())) { @@ -144,14 +152,18 @@ public Optional validateDuplicateResult(Context context, Ind } } if (indexableObject instanceof IndexableWorkflowItem) { - log.info("ITS A WORKFLOW ITEM ITS A WORKFLOW ITEM"); workflowItem = ((IndexableWorkflowItem) indexableObject).getIndexedObject(); + log.info("ITS A WORKFLOW ITEM ITS A WORKFLOW ITEM " + workflowItem.getItem().getName()); if (workflowItem != null) { resultItem = workflowItem.getItem(); } } if (indexableObject instanceof IndexableItem) { resultItem = ((IndexableItem) indexableObject).getIndexedObject(); + log.info("NORMAL ITEM FOUND " + resultItem.getName()); + // Attempt resolution of workflow or workspace items, tested later + workflowItem = workflowItemService.findByItem(context, resultItem); + workspaceItem = workspaceItemService.findByItem(context, resultItem); } // Result item must not be null, a template item, or actually identical to the original @@ -224,6 +236,8 @@ public Optional validateDuplicateResult(Context context, Ind } else if (authorizeService.isAdmin(context, resultItem)) { // Admins can always read, return immediately return Optional.of(potentialDuplicate); + } else { + log.error("No valid permission to return this item"); } // By default, return an empty result diff --git a/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java b/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java index fec36224fcdc..f341320a9a1f 100644 --- a/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java +++ b/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java @@ -16,19 +16,31 @@ import org.dspace.builder.CollectionBuilder; import org.dspace.builder.CommunityBuilder; import org.dspace.builder.ItemBuilder; +import org.dspace.builder.WorkflowItemBuilder; +import org.dspace.builder.WorkspaceItemBuilder; import org.dspace.content.factory.ContentServiceFactory; import org.dspace.content.service.DuplicateDetectionService; import org.dspace.content.virtual.PotentialDuplicate; +import org.dspace.discovery.IndexingService; import org.dspace.discovery.SearchService; import org.dspace.discovery.SearchUtils; +import org.dspace.discovery.indexobject.IndexableItem; +import org.dspace.discovery.indexobject.IndexableWorkflowItem; import org.dspace.eperson.factory.EPersonServiceFactory; import org.dspace.eperson.service.GroupService; import org.dspace.services.ConfigurationService; import org.dspace.services.factory.DSpaceServicesFactory; +import org.dspace.workflow.factory.WorkflowServiceFactory; +import org.dspace.xmlworkflow.factory.XmlWorkflowFactory; +import org.dspace.xmlworkflow.factory.XmlWorkflowServiceFactory; +import org.dspace.xmlworkflow.service.XmlWorkflowService; +import org.dspace.xmlworkflow.storedcomponents.XmlWorkflowItem; import org.junit.Before; import org.junit.Test; +import org.springframework.beans.factory.annotation.Autowired; import scala.concurrent.impl.FutureConvertersImpl; +import static java.lang.Thread.sleep; import static junit.framework.TestCase.assertEquals; import static junit.framework.TestCase.assertNull; import static org.hamcrest.MatcherAssert.assertThat; @@ -45,7 +57,10 @@ public class DuplicateDetectionTest extends AbstractIntegrationTestWithDatabase private SearchService searchService = SearchUtils.getSearchService(); private DuplicateDetectionService duplicateDetectionService = ContentServiceFactory.getInstance().getDuplicateDetectionService(); private ConfigurationService configurationService = DSpaceServicesFactory.getInstance().getConfigurationService(); + private XmlWorkflowService workflowService = XmlWorkflowServiceFactory.getInstance().getXmlWorkflowService(); + private IndexingService indexingService = DSpaceServicesFactory.getInstance().getServiceManager().getServiceByName("org.dspace.discovery.IndexingService", org.dspace.discovery.IndexingService.class); private Collection col; + private Collection workflowCol; private Item item1; private Item item2; private Item item3; @@ -54,6 +69,7 @@ public class DuplicateDetectionTest extends AbstractIntegrationTestWithDatabase private final String item1Title = "Public item I"; private final String item1Author = "Smith, Donald"; + @Before public void setUp() throws Exception { super.setUp(); @@ -70,6 +86,8 @@ public void setUp() throws Exception { parentCommunity = CommunityBuilder.createCommunity(context).withName("Parent Community").build(); col = CollectionBuilder.createCollection(context, parentCommunity).withName("Collection").build(); + workflowCol = CollectionBuilder.createCollection(context, parentCommunity).withName("Workflow Collection") + .withWorkflowGroup("reviewer", admin).build(); // Ingest three example items with slightly different titles // item2 is 1 edit distance from item1 and item3 @@ -93,7 +111,7 @@ public void setUp() throws Exception { .withSubject("ExtraEntry 3") .build(); - context.setDispatcher("noindex"); + //context.setDispatcher("noindex"); } /** @@ -191,4 +209,41 @@ public void testSearchDuplicates() throws Exception { } + @Test + public void testSearchDuplicatesInWorkflow() throws Exception { + // Get potential duplicates of item 1: + // Expected: Public item II should appear as it has the configured levenshtein distance of 1 + context.turnOffAuthorisationSystem(); + WorkspaceItem workspaceItem = WorkspaceItemBuilder.createWorkspaceItem(context, workflowCol) + .withTitle("Unique title") + .withSubmitter(eperson) + .build(); + WorkspaceItem workspaceItem2 = WorkspaceItemBuilder.createWorkspaceItem(context, workflowCol) + .withTitle("Unique title") + .withSubmitter(eperson) + .build(); + XmlWorkflowItem workflowItem1 = workflowService.start(context, workspaceItem); + XmlWorkflowItem workflowItem2 = workflowService.start(context, workspaceItem2); + + // Force reindex of these workflow items, as test framework appears not to do this by default? + indexingService.reIndexContent(context, new IndexableItem(workflowItem1.getItem())); + indexingService.reIndexContent(context, new IndexableItem(workflowItem2.getItem())); + + context.restoreAuthSystemState(); + + sleep(3600); + + context.setCurrentUser(admin); + List potentialDuplicates = duplicateDetectionService.getPotentialDuplicates(context, workflowItem1.getItem()); + + // Make sure result list is size 1 + int size = 1; + assertEquals("Potential duplicates of item1 should have size " + size, + size, potentialDuplicates.size()); + + // The only member should be workflow item 2 + assertEquals("Workflow item 2 should be be the detected duplicate", + workflowItem2.getItem().getID(), potentialDuplicates.get(0).getUuid()); + } + } diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java index 5b41a6ffc019..f29d5d490157 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java @@ -13,34 +13,40 @@ import java.util.List; import java.util.Map; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.dspace.app.rest.model.patch.Operation; import org.dspace.app.rest.model.patch.ReplaceOperation; import org.dspace.app.rest.test.AbstractControllerIntegrationTest; +import org.dspace.authorize.service.AuthorizeService; import org.dspace.builder.CollectionBuilder; import org.dspace.builder.CommunityBuilder; import org.dspace.builder.ItemBuilder; import org.dspace.builder.WorkflowItemBuilder; import org.dspace.builder.WorkspaceItemBuilder; import org.dspace.content.Collection; -import org.dspace.content.DSpaceObject; import org.dspace.content.Item; import org.dspace.content.WorkspaceItem; import org.dspace.content.service.CollectionService; import org.dspace.content.service.ItemService; +import org.dspace.content.service.WorkspaceItemService; import org.dspace.core.I18nUtil; import org.dspace.discovery.IndexingService; -import org.dspace.discovery.indexobject.IndexableWorkflowItem; +import org.dspace.discovery.indexobject.IndexableItem; import org.dspace.eperson.EPerson; import org.dspace.eperson.factory.EPersonServiceFactory; import org.dspace.eperson.service.EPersonService; import org.dspace.handle.service.HandleService; +import org.dspace.identifier.service.IdentifierService; import org.dspace.services.ConfigurationService; +import org.dspace.xmlworkflow.service.XmlWorkflowService; import org.dspace.xmlworkflow.storedcomponents.XmlWorkflowItem; +import org.dspace.xmlworkflow.storedcomponents.service.XmlWorkflowItemService; import org.hamcrest.Matchers; -import org.junit.After; import org.junit.Test; import org.springframework.beans.factory.annotation.Autowired; +import static java.lang.Thread.sleep; import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.patch; import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.content; @@ -65,11 +71,19 @@ public class DuplicateDetectionRestIT extends AbstractControllerIntegrationTest CollectionService collectionService; @Autowired HandleService handleService; + @Autowired + WorkspaceItemService workspaceItemService; + @Autowired + XmlWorkflowItemService workflowItemService; + @Autowired + IdentifierService identifierService; + @Autowired + AuthorizeService authorizeService; + @Autowired + XmlWorkflowService workflowService; private Collection col; - private Item item1; - private Item item2; - private Item item3; + private Collection simpleCol; private final String item1IssueDate = "2011-10-17"; private final String item1Subject = "ExtraEntry 1"; private final String item1Title = "Public item I"; @@ -78,6 +92,8 @@ public class DuplicateDetectionRestIT extends AbstractControllerIntegrationTest private final String item2IssueDate = "2012-10-17"; private EPerson anotherEPerson; + private static Logger log = LogManager.getLogger(); + @Override public void setUp() throws Exception { super.setUp(); @@ -93,14 +109,16 @@ public void setUp() throws Exception { context.turnOffAuthorisationSystem(); parentCommunity = CommunityBuilder.createCommunity(context).withName("Parent Community").build(); - DSpaceObject dso = handleService.resolveToObject(context, "123456789/test-duplicate-detection"); - if (dso == null) { - col = CollectionBuilder.createCollection(context, parentCommunity, "123456789/test-duplicate-detection") - .withWorkflowGroup("reviewer", admin) - .withName("Collection").build(); - } else { - col = (Collection) dso; - } + + col = CollectionBuilder.createCollection(context, parentCommunity) + .withName("Test Collection") + .withWorkflowGroup(1, admin) + .build(); + simpleCol = CollectionBuilder.createCollection(context, parentCommunity) + .withName("Test Collection without Workflow") + .build(); + eperson.setFirstName(context, "first"); + eperson.setLastName(context, "last"); EPersonService ePersonService = EPersonServiceFactory.getInstance().getEPersonService(); anotherEPerson = ePersonService.findByEmail(context, "test-another-user@email.com"); if (anotherEPerson == null) { @@ -114,49 +132,71 @@ public void setUp() throws Exception { // actually save the eperson to unit testing DB ePersonService.update(context, anotherEPerson); } + //context.setDispatcher("noindex"); + context.restoreAuthSystemState(); + } + + @Test + public void itemsContainDuplicatesLinkTest() throws Exception { + String token = getAuthToken(admin.getEmail(), password); + log.error("EPERSON FULL NAME IS " + eperson.getFullName()); + + context.turnOffAuthorisationSystem(); + WorkspaceItem workspaceItem1 = WorkspaceItemBuilder.createWorkspaceItem(context, simpleCol) + .withTitle(item1Title) + .withSubject(item1Subject) + .withIssueDate(item1IssueDate) + .withAuthor(item1Author) + .withSubmitter(eperson) + .build(); + XmlWorkflowItem wfi1 = workflowService.start(context, workspaceItem1); + Item item1 = wfi1.getItem(); + context.restoreAuthSystemState(); + + getClient(token).perform(get("/api/core/items/" + item1.getID())) + .andExpect(status().isOk()) + .andExpect(content().contentType(contentType)) + .andExpect(jsonPath("$._links.duplicates").exists()); + } + + @Test + public void searchDuplicatesByLinkTest() throws Exception { + String token = getAuthToken(admin.getEmail(), password); + + context.turnOffAuthorisationSystem(); // Ingest three example items with slightly different titles // item2 is 1 edit distance from item1 and item3 // item1 and item3 are 2 edit distance from each other - item1 = ItemBuilder.createItem(context, col) - .withTitle(item1Title) // Public item I + WorkspaceItem workspaceItem1 = WorkspaceItemBuilder.createWorkspaceItem(context, simpleCol) + .withTitle(item1Title) + .withSubject(item1Subject) .withIssueDate(item1IssueDate) .withAuthor(item1Author) - .withSubject(item1Subject) + .withSubmitter(eperson) .build(); - item2 = ItemBuilder.createItem(context, col) + WorkspaceItem workspaceItem2 = WorkspaceItemBuilder.createWorkspaceItem(context, simpleCol) .withTitle("Public item II") .withIssueDate(item2IssueDate) .withAuthor("Smith, Donald X.") .withSubject(item2Subject) + .withSubmitter(eperson) .build(); - item3 = ItemBuilder.createItem(context, col) + WorkspaceItem workspaceItem3 = WorkspaceItemBuilder.createWorkspaceItem(context, simpleCol) + .withTitle(item1Title) .withTitle("Public item III") .withIssueDate("2013-10-17") .withAuthor("Smith, Donald Y.") .withSubject("ExtraEntry 3") + .withSubmitter(eperson) .build(); + log.error("EPERSON FULL NAME IS " + eperson.getFullName()); + XmlWorkflowItem wfi1 = workflowService.start(context, workspaceItem1); + XmlWorkflowItem wfi2 = workflowService.start(context, workspaceItem2); + Item item1 = wfi1.getItem(); + Item item2 = wfi2.getItem(); - - context.dispatchEvents(); context.restoreAuthSystemState(); - //context.setDispatcher("noindex"); - - } - - @Test - public void itemsContainDuplicatesLinkTest() throws Exception { - String token = getAuthToken(admin.getEmail(), password); - - getClient(token).perform(get("/api/core/items/" + item1.getID())) - .andExpect(status().isOk()) - .andExpect(content().contentType(contentType)) - .andExpect(jsonPath("$._links.duplicates").exists()); - } - - @Test - public void searchDuplicatesByLinkTest() throws Exception { - String token = getAuthToken(admin.getEmail(), password); getClient(token).perform(get("/api/core/items/" + item1.getID() + "/duplicates")) .andExpect(status().isOk()) @@ -184,9 +224,35 @@ public void searchDuplicatesByLinkTest() throws Exception { public void submissionSectionDataTest() throws Exception { // Test publication context.turnOffAuthorisationSystem(); + + Collection workspaceCollection = + CollectionBuilder.createCollection(context, parentCommunity, "123456789/test-duplicate-detection") + .withName("Test Collection Workspace").build(); + + // Ingest three example items with slightly different titles + // item2 is 1 edit distance from item1 and item3 + // item1 and item3 are 2 edit distance from each other + Item item1 = ItemBuilder.createItem(context, col) + .withTitle(item1Title) // Public item I + .withIssueDate(item1IssueDate) + .withAuthor(item1Author) + .withSubject(item1Subject) + .build(); + Item item2 = ItemBuilder.createItem(context, col) + .withTitle("Public item II") + .withIssueDate(item2IssueDate) + .withAuthor("Smith, Donald X.") + .withSubject(item2Subject) + .build(); + Item item3 = ItemBuilder.createItem(context, col) + .withTitle("Public item III") + .withIssueDate("2013-10-17") + .withAuthor("Smith, Donald Y.") + .withSubject("ExtraEntry 3") + .build(); // Create a new workspace item with a similar title to Item 1 (1 edit distance). Reuse other items // metadata for the rest, as it is not relevant. - WorkspaceItem workspaceItem = WorkspaceItemBuilder.createWorkspaceItem(context, col) + WorkspaceItem workspaceItem = WorkspaceItemBuilder.createWorkspaceItem(context, workspaceCollection) .withTitle("Public item X") .withSubject(item2Subject) .withIssueDate(item2IssueDate) @@ -267,6 +333,11 @@ public void submissionSectionDataTest() throws Exception { public void submissionSectionWorkspaceItemVisibilityTest() throws Exception { // Test publication context.turnOffAuthorisationSystem(); + // Create a new collection with handle that maps to teh test-duplicate-detection submission config + col = CollectionBuilder.createCollection(context, parentCommunity, "123456789/test-duplicate-detection") + .withName("Test Collection with Duplicate Detection") + .withWorkflowGroup(1, admin) + .build(); // Create a new workspace item with a similar title to Item 1 (1 edit distance). Reuse other items // metadata for the rest, as it is not relevant. WorkspaceItem workspaceItem = WorkspaceItemBuilder.createWorkspaceItem(context, col) @@ -316,44 +387,49 @@ public void submissionSectionWorkspaceItemVisibilityTest() throws Exception { */ @Test public void submissionSectionWorkflowItemVisibilityTest() throws Exception { - String reviewerToken = getAuthToken(admin.getEmail(), password); + context.turnOffAuthorisationSystem(); + // Create a new collection with handle that maps to teh test-duplicate-detection submission config + parentCommunity = CommunityBuilder.createCommunity(context).withName("Parent Community").build(); + Collection workflowCol = CollectionBuilder.createCollection(context, parentCommunity) + .withName("Test Collection with Duplicate Detection") + .withWorkflowGroup("reviewer", admin) + .build(); - Item testItem = ItemBuilder.createItem(context, col) - .withTitle("Totally unique?") // Public item I - .withIssueDate(item1IssueDate) - .withAuthor(item1Author) - .withSubject(item1Subject) + WorkspaceItem workspaceItem = WorkspaceItemBuilder.createWorkspaceItem(context, workflowCol) + .withTitle("Unique title") + .withSubmitter(eperson) .build(); - // Create a new workspace item with a similar title to Item 1 (1 edit distance). Reuse other items - // metadata for the rest, as it is not relevant. - XmlWorkflowItem workflowItem1 = WorkflowItemBuilder.createWorkflowItem(context, col) - .withTitle("Totally unique!") - .withIssueDate("2017-10-17") + WorkspaceItem workspaceItem2 = WorkspaceItemBuilder.createWorkspaceItem(context, workflowCol) + .withTitle("Unique title") + .withSubmitter(eperson) .build(); - - //indexingService.indexContent(context, new IndexableWorkflowItem(workflowItem1)); - - context.dispatchEvents(); + XmlWorkflowItem workflowItem1 = workflowService.start(context, workspaceItem); + XmlWorkflowItem workflowItem2 = workflowService.start(context, workspaceItem2); + indexingService.reIndexContent(context, new IndexableItem(workflowItem1.getItem())); + indexingService.reIndexContent(context, new IndexableItem(workflowItem2.getItem())); context.restoreAuthSystemState(); - context.setCurrentUser(admin); + sleep(3600); + context.setCurrentUser(admin); + String reviewerToken = getAuthToken(admin.getEmail(), password); // The reviewer should be able to see the workflow item as a potential duplicate of the test item - getClient(reviewerToken).perform(get("/api/core/items/" + testItem.getID() + "/duplicates")) + getClient(reviewerToken).perform(get("/api/core/items/" + workflowItem1.getItem().getID() + "/duplicates")) .andExpect(status().isOk()) .andExpect(content().contentType(contentType)) // Valid duplicates array .andExpect(jsonPath("$._embedded.duplicates", Matchers.hasSize(1))) // UUID of only array member matches the new workflow item ID - .andExpect(jsonPath("$._embedded.duplicates[0].uuid").value(workflowItem1.getItem().getID().toString())); + .andExpect(jsonPath("$._embedded.duplicates[0].uuid").value(workflowItem2.getItem().getID().toString())); // Another random user will NOT see this getClient(getAuthToken(anotherEPerson.getEmail(), password)) - .perform(get("/api/core/items/" + testItem.getID() + "/duplicates")) + .perform(get("/api/core/items/" + workflowItem1.getItem().getID() + "/duplicates")) .andExpect(status().isOk()) .andExpect(content().contentType(contentType)) // Valid duplicates array .andExpect(jsonPath("$._embedded.duplicates", Matchers.hasSize(0))); } + } From 878ab75d735badb2d2a39bfb2d2e2ae0f5fd72cb Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Tue, 23 Jan 2024 14:20:14 +1300 Subject: [PATCH 05/33] [TLC-674] IT and checkstyle fixes --- .../DuplicateDetectionServiceImpl.java | 14 ++---- .../factory/ContentServiceFactoryImpl.java | 1 - .../content/DuplicateDetectionTest.java | 48 +++++-------------- .../ItemDuplicatesLinkRepository.java | 5 +- 4 files changed, 19 insertions(+), 49 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java index a18e52a7618c..8ae734189ca7 100644 --- a/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java @@ -39,7 +39,6 @@ import org.dspace.versioning.VersionHistory; import org.dspace.versioning.service.VersionHistoryService; import org.dspace.workflow.WorkflowItem; -import org.dspace.xmlworkflow.storedcomponents.XmlWorkflowItem; import org.dspace.xmlworkflow.storedcomponents.service.XmlWorkflowItemService; import org.springframework.beans.factory.annotation.Autowired; @@ -131,7 +130,8 @@ public List getPotentialDuplicates(Context context, Item ite * @throws AuthorizeException */ @Override - public Optional validateDuplicateResult(Context context, IndexableObject indexableObject, Item original) + public Optional validateDuplicateResult(Context context, IndexableObject indexableObject, + Item original) throws SQLException, AuthorizeException { @@ -144,7 +144,6 @@ public Optional validateDuplicateResult(Context context, Ind // what submission / archived state it is in if (indexableObject instanceof IndexableWorkspaceItem) { workspaceItem = ((IndexableWorkspaceItem) indexableObject).getIndexedObject(); - log.info("ITS A WORKSPACE ITEM ITS A WORKSPACE ITEM " + workspaceItem.getItem().getName()); // Only process workspace items that belong to the submitter if (workspaceItem != null && workspaceItem.getSubmitter() != null && workspaceItem.getSubmitter().equals(context.getCurrentUser())) { @@ -153,14 +152,12 @@ public Optional validateDuplicateResult(Context context, Ind } if (indexableObject instanceof IndexableWorkflowItem) { workflowItem = ((IndexableWorkflowItem) indexableObject).getIndexedObject(); - log.info("ITS A WORKFLOW ITEM ITS A WORKFLOW ITEM " + workflowItem.getItem().getName()); if (workflowItem != null) { resultItem = workflowItem.getItem(); } } if (indexableObject instanceof IndexableItem) { resultItem = ((IndexableItem) indexableObject).getIndexedObject(); - log.info("NORMAL ITEM FOUND " + resultItem.getName()); // Attempt resolution of workflow or workspace items, tested later workflowItem = workflowItemService.findByItem(context, resultItem); workspaceItem = workspaceItemService.findByItem(context, resultItem); @@ -184,8 +181,7 @@ public Optional validateDuplicateResult(Context context, Ind VersionHistory candiateVersionHistory = versionHistoryService.findByItem(context, resultItem); // if the versionHistory is null, either versioning is switched off or the item doesn't have // multiple versions - if (versionHistory != null && versionHistory.equals(candiateVersionHistory)) - { + if (versionHistory != null && versionHistory.equals(candiateVersionHistory)) { log.warn("skipping item that is just another version of this item"); return Optional.empty(); } @@ -218,12 +214,10 @@ public Optional validateDuplicateResult(Context context, Ind // More authorisation checks if (workflowItem != null) { Collection c = workflowItem.getCollection(); - log.info("INSPECITG WORKFLOW ITEM " + workflowItem.getItem().getName() + " WITH EPERSON " + context.getCurrentUser().getName()); if (groupService.isMember(context, context.getCurrentUser(), c.getWorkflowStep1(context)) || groupService.isMember(context, context.getCurrentUser(), c.getWorkflowStep2(context)) || groupService.isMember(context, context.getCurrentUser(), c.getWorkflowStep3(context))) { // Current user is a member of one of the workflow role groups - log.info("WORKFLOW REVIEWER CAN SEE " + workflowItem.getID()); potentialDuplicate.setWorkflowItemId(workflowItem.getID()); return Optional.of(potentialDuplicate); } @@ -286,7 +280,7 @@ public DiscoverResult searchDuplicates(Context context, Item item) throws Search // Add filter queries for the resource type discoverQuery.addFilterQueries("(search.resourcetype:Item OR " + "search.resourcetype:WorkspaceItem OR " + - "search.resourcetype:WorkflowItem)"); + "search.resourcetype:XmlWorkflowItem OR search.resourcetype:WorkflowItem)"); // Skip this item itself so it isn't a false positive discoverQuery.addFilterQueries("-search.resourceid:" + item.getID()); diff --git a/dspace-api/src/main/java/org/dspace/content/factory/ContentServiceFactoryImpl.java b/dspace-api/src/main/java/org/dspace/content/factory/ContentServiceFactoryImpl.java index 4b9d5918b173..3c3c2bf162bb 100644 --- a/dspace-api/src/main/java/org/dspace/content/factory/ContentServiceFactoryImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/factory/ContentServiceFactoryImpl.java @@ -10,7 +10,6 @@ import java.util.List; import org.dspace.content.DSpaceObject; -import org.dspace.content.DuplicateDetectionServiceImpl; import org.dspace.content.RelationshipMetadataService; import org.dspace.content.service.BitstreamFormatService; import org.dspace.content.service.BitstreamService; diff --git a/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java b/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java index f341320a9a1f..86e27328a6ed 100644 --- a/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java +++ b/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java @@ -11,36 +11,22 @@ import java.util.List; import java.util.Optional; -import org.apache.commons.lang.StringUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.dspace.AbstractIntegrationTestWithDatabase; import org.dspace.builder.CollectionBuilder; import org.dspace.builder.CommunityBuilder; import org.dspace.builder.ItemBuilder; import org.dspace.builder.WorkflowItemBuilder; -import org.dspace.builder.WorkspaceItemBuilder; import org.dspace.content.factory.ContentServiceFactory; import org.dspace.content.service.DuplicateDetectionService; import org.dspace.content.virtual.PotentialDuplicate; -import org.dspace.discovery.IndexingService; -import org.dspace.discovery.SearchService; -import org.dspace.discovery.SearchUtils; -import org.dspace.discovery.indexobject.IndexableItem; -import org.dspace.discovery.indexobject.IndexableWorkflowItem; -import org.dspace.eperson.factory.EPersonServiceFactory; -import org.dspace.eperson.service.GroupService; import org.dspace.services.ConfigurationService; import org.dspace.services.factory.DSpaceServicesFactory; -import org.dspace.workflow.factory.WorkflowServiceFactory; -import org.dspace.xmlworkflow.factory.XmlWorkflowFactory; -import org.dspace.xmlworkflow.factory.XmlWorkflowServiceFactory; -import org.dspace.xmlworkflow.service.XmlWorkflowService; import org.dspace.xmlworkflow.storedcomponents.XmlWorkflowItem; import org.junit.Before; import org.junit.Test; -import org.springframework.beans.factory.annotation.Autowired; -import scala.concurrent.impl.FutureConvertersImpl; -import static java.lang.Thread.sleep; import static junit.framework.TestCase.assertEquals; import static junit.framework.TestCase.assertNull; import static org.hamcrest.MatcherAssert.assertThat; @@ -52,13 +38,8 @@ * @author Kim Shepherd */ public class DuplicateDetectionTest extends AbstractIntegrationTestWithDatabase { - - private GroupService groupService = EPersonServiceFactory.getInstance().getGroupService(); - private SearchService searchService = SearchUtils.getSearchService(); private DuplicateDetectionService duplicateDetectionService = ContentServiceFactory.getInstance().getDuplicateDetectionService(); private ConfigurationService configurationService = DSpaceServicesFactory.getInstance().getConfigurationService(); - private XmlWorkflowService workflowService = XmlWorkflowServiceFactory.getInstance().getXmlWorkflowService(); - private IndexingService indexingService = DSpaceServicesFactory.getInstance().getServiceManager().getServiceByName("org.dspace.discovery.IndexingService", org.dspace.discovery.IndexingService.class); private Collection col; private Collection workflowCol; private Item item1; @@ -69,6 +50,7 @@ public class DuplicateDetectionTest extends AbstractIntegrationTestWithDatabase private final String item1Title = "Public item I"; private final String item1Author = "Smith, Donald"; + private static final Logger log = LogManager.getLogger(); @Before public void setUp() throws Exception { @@ -83,11 +65,14 @@ public void setUp() throws Exception { new String[]{"dc.date.issued", "dc.subject"}); context.turnOffAuthorisationSystem(); + context.setDispatcher("default"); parentCommunity = CommunityBuilder.createCommunity(context).withName("Parent Community").build(); col = CollectionBuilder.createCollection(context, parentCommunity).withName("Collection").build(); - workflowCol = CollectionBuilder.createCollection(context, parentCommunity).withName("Workflow Collection") - .withWorkflowGroup("reviewer", admin).build(); + workflowCol = CollectionBuilder.createCollection(context, parentCommunity) + .withName("Workflow Collection") + .withWorkflowGroup("reviewer", admin) + .build(); // Ingest three example items with slightly different titles // item2 is 1 edit distance from item1 and item3 @@ -111,7 +96,7 @@ public void setUp() throws Exception { .withSubject("ExtraEntry 3") .build(); - //context.setDispatcher("noindex"); + } /** @@ -214,25 +199,18 @@ public void testSearchDuplicatesInWorkflow() throws Exception { // Get potential duplicates of item 1: // Expected: Public item II should appear as it has the configured levenshtein distance of 1 context.turnOffAuthorisationSystem(); - WorkspaceItem workspaceItem = WorkspaceItemBuilder.createWorkspaceItem(context, workflowCol) + //context.setDispatcher("default"); + XmlWorkflowItem workflowItem1 = WorkflowItemBuilder.createWorkflowItem(context, workflowCol) .withTitle("Unique title") .withSubmitter(eperson) .build(); - WorkspaceItem workspaceItem2 = WorkspaceItemBuilder.createWorkspaceItem(context, workflowCol) + XmlWorkflowItem workflowItem2 = WorkflowItemBuilder.createWorkflowItem(context, workflowCol) .withTitle("Unique title") .withSubmitter(eperson) .build(); - XmlWorkflowItem workflowItem1 = workflowService.start(context, workspaceItem); - XmlWorkflowItem workflowItem2 = workflowService.start(context, workspaceItem2); - - // Force reindex of these workflow items, as test framework appears not to do this by default? - indexingService.reIndexContent(context, new IndexableItem(workflowItem1.getItem())); - indexingService.reIndexContent(context, new IndexableItem(workflowItem2.getItem())); + //indexingService.commit(); context.restoreAuthSystemState(); - - sleep(3600); - context.setCurrentUser(admin); List potentialDuplicates = duplicateDetectionService.getPotentialDuplicates(context, workflowItem1.getItem()); diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemDuplicatesLinkRepository.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemDuplicatesLinkRepository.java index 1187269ae1fe..93a5422e9a4b 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemDuplicatesLinkRepository.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemDuplicatesLinkRepository.java @@ -7,13 +7,12 @@ */ package org.dspace.app.rest.repository; -import javax.annotation.Nullable; -import javax.servlet.http.HttpServletRequest; - import java.sql.SQLException; import java.util.LinkedList; import java.util.List; import java.util.UUID; +import javax.annotation.Nullable; +import javax.servlet.http.HttpServletRequest; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; From 26b3e9ad13dcf76b3ad525339c9bfadb249349cd Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Tue, 23 Jan 2024 18:47:38 +1300 Subject: [PATCH 06/33] [TLC-674] IT and checkstyle fixes --- .../dspace/app/rest/DuplicateDetectionRestIT.java | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java index f29d5d490157..645c0e258f2e 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java @@ -32,7 +32,6 @@ import org.dspace.content.service.WorkspaceItemService; import org.dspace.core.I18nUtil; import org.dspace.discovery.IndexingService; -import org.dspace.discovery.indexobject.IndexableItem; import org.dspace.eperson.EPerson; import org.dspace.eperson.factory.EPersonServiceFactory; import org.dspace.eperson.service.EPersonService; @@ -46,7 +45,6 @@ import org.junit.Test; import org.springframework.beans.factory.annotation.Autowired; -import static java.lang.Thread.sleep; import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.patch; import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.content; @@ -396,20 +394,15 @@ public void submissionSectionWorkflowItemVisibilityTest() throws Exception { .withWorkflowGroup("reviewer", admin) .build(); - WorkspaceItem workspaceItem = WorkspaceItemBuilder.createWorkspaceItem(context, workflowCol) + XmlWorkflowItem workflowItem1 = WorkflowItemBuilder.createWorkflowItem(context, workflowCol) .withTitle("Unique title") - .withSubmitter(eperson) + .withSubmitter(anotherEPerson) .build(); - WorkspaceItem workspaceItem2 = WorkspaceItemBuilder.createWorkspaceItem(context, workflowCol) + XmlWorkflowItem workflowItem2 = WorkflowItemBuilder.createWorkflowItem(context, workflowCol) .withTitle("Unique title") .withSubmitter(eperson) .build(); - XmlWorkflowItem workflowItem1 = workflowService.start(context, workspaceItem); - XmlWorkflowItem workflowItem2 = workflowService.start(context, workspaceItem2); - indexingService.reIndexContent(context, new IndexableItem(workflowItem1.getItem())); - indexingService.reIndexContent(context, new IndexableItem(workflowItem2.getItem())); context.restoreAuthSystemState(); - sleep(3600); context.setCurrentUser(admin); String reviewerToken = getAuthToken(admin.getEmail(), password); From 485e716e00c8b417c790d3f4e5697bfa94df5394 Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Tue, 23 Jan 2024 19:08:17 +1300 Subject: [PATCH 07/33] [TLC-674] Escape solr reserved characters (and new test coverage) --- .../DuplicateDetectionServiceImpl.java | 3 ++ .../content/DuplicateDetectionTest.java | 44 +++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java index 8ae734189ca7..7efbe680cb3b 100644 --- a/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java @@ -272,6 +272,9 @@ public DiscoverResult searchDuplicates(Context context, Item item) throws Search // Get search service SearchService searchService = SearchUtils.getSearchService(); + // Escape reserved solr characters + signature = searchService.escapeQueryChars(signature); + // Construct discovery query based on signature DiscoverQuery discoverQuery = new DiscoverQuery(); discoverQuery.setQuery("(" + configurationService.getProperty("duplicate.signature.field", diff --git a/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java b/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java index 86e27328a6ed..35b0b35d3d69 100644 --- a/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java +++ b/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java @@ -7,6 +7,7 @@ */ package org.dspace.content; +import java.util.ArrayList; import java.util.Comparator; import java.util.List; import java.util.Optional; @@ -21,6 +22,7 @@ import org.dspace.content.factory.ContentServiceFactory; import org.dspace.content.service.DuplicateDetectionService; import org.dspace.content.virtual.PotentialDuplicate; +import org.dspace.discovery.SearchServiceException; import org.dspace.services.ConfigurationService; import org.dspace.services.factory.DSpaceServicesFactory; import org.dspace.xmlworkflow.storedcomponents.XmlWorkflowItem; @@ -30,6 +32,7 @@ import static junit.framework.TestCase.assertEquals; import static junit.framework.TestCase.assertNull; import static org.hamcrest.MatcherAssert.assertThat; +import static org.junit.Assert.fail; /** * @@ -194,6 +197,47 @@ public void testSearchDuplicates() throws Exception { } + /** + * Test that a search for getPotentialDuplicates properly escapes Solr reserved characters + * e.g. + - && | | ! ( ) { } [ ] ^ " ~ * ? : \ + * + * @throws Exception + */ + @Test + public void testSearchDuplicatesWithReservedSolrCharacters() throws Exception { + + Item item4 = ItemBuilder.createItem(context, col) + .withTitle("Testing: An Important Development Step") + .withIssueDate(item1IssueDate) + .withAuthor(item1Author) + .withSubject(item1Subject) + .build(); + Item item5 = ItemBuilder.createItem(context, col) + .withTitle("Testing an important development step") + .withIssueDate("2012-10-17") + .withAuthor("Smith, Donald X.") + .withSubject("ExtraEntry 2") + .build(); + + // Get potential duplicates of item 4 and make sure no exceptions are thrown + List potentialDuplicates = new ArrayList<>(); + try { + potentialDuplicates = duplicateDetectionService.getPotentialDuplicates(context, item4); + } catch (SearchServiceException e) { + fail("Duplicate search with special characters should NOT result in search exception (" + e.getMessage() + ")"); + } + + // Make sure result list is size 1 + int size = 1; + assertEquals("Potential duplicates of item1 should have size " + size, + size, potentialDuplicates.size()); + + // The only member should be item 5 + assertEquals("Item 5 should be be the detected duplicate", + item5.getID(), potentialDuplicates.get(0).getUuid()); + + } + @Test public void testSearchDuplicatesInWorkflow() throws Exception { // Get potential duplicates of item 1: From bcbf33afb0646fa5ea018eadf2656ec2a9be4151 Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Tue, 23 Jan 2024 19:20:30 +1300 Subject: [PATCH 08/33] [TLC-674] Remove unused test class --- .../content/virtual/PotentialDuplicateTest.java | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 dspace-api/src/test/java/org/dspace/content/virtual/PotentialDuplicateTest.java diff --git a/dspace-api/src/test/java/org/dspace/content/virtual/PotentialDuplicateTest.java b/dspace-api/src/test/java/org/dspace/content/virtual/PotentialDuplicateTest.java deleted file mode 100644 index 2a727abcae6b..000000000000 --- a/dspace-api/src/test/java/org/dspace/content/virtual/PotentialDuplicateTest.java +++ /dev/null @@ -1,12 +0,0 @@ -package org.dspace.content.virtual; - -import org.junit.Test; -import org.junit.runner.RunWith; -import org.mockito.junit.MockitoJUnitRunner; - -@RunWith(MockitoJUnitRunner.class) -public class PotentialDuplicateTest { - - - -} From c43c82c1674ad20cdae46e9aedfcbac80ef0f98c Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Tue, 23 Jan 2024 19:36:50 +1300 Subject: [PATCH 09/33] [TLC-674] Checkstyle fixes for ITs --- .../content/DuplicateDetectionTest.java | 19 ++++++++++-------- .../app/rest/DuplicateDetectionRestIT.java | 20 ++++++++++--------- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java b/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java index 35b0b35d3d69..128936edc8fc 100644 --- a/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java +++ b/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java @@ -7,6 +7,11 @@ */ package org.dspace.content; +import static junit.framework.TestCase.assertEquals; +import static junit.framework.TestCase.assertNull; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.junit.Assert.fail; + import java.util.ArrayList; import java.util.Comparator; import java.util.List; @@ -29,11 +34,6 @@ import org.junit.Before; import org.junit.Test; -import static junit.framework.TestCase.assertEquals; -import static junit.framework.TestCase.assertNull; -import static org.hamcrest.MatcherAssert.assertThat; -import static org.junit.Assert.fail; - /** * * Integration tests for the duplicate detection service @@ -41,7 +41,8 @@ * @author Kim Shepherd */ public class DuplicateDetectionTest extends AbstractIntegrationTestWithDatabase { - private DuplicateDetectionService duplicateDetectionService = ContentServiceFactory.getInstance().getDuplicateDetectionService(); + private DuplicateDetectionService duplicateDetectionService = ContentServiceFactory.getInstance() + .getDuplicateDetectionService(); private ConfigurationService configurationService = DSpaceServicesFactory.getInstance().getConfigurationService(); private Collection col; private Collection workflowCol; @@ -224,7 +225,8 @@ public void testSearchDuplicatesWithReservedSolrCharacters() throws Exception { try { potentialDuplicates = duplicateDetectionService.getPotentialDuplicates(context, item4); } catch (SearchServiceException e) { - fail("Duplicate search with special characters should NOT result in search exception (" + e.getMessage() + ")"); + fail("Duplicate search with special characters should NOT result in search exception (" + + e.getMessage() + ")"); } // Make sure result list is size 1 @@ -256,7 +258,8 @@ public void testSearchDuplicatesInWorkflow() throws Exception { //indexingService.commit(); context.restoreAuthSystemState(); context.setCurrentUser(admin); - List potentialDuplicates = duplicateDetectionService.getPotentialDuplicates(context, workflowItem1.getItem()); + List potentialDuplicates = + duplicateDetectionService.getPotentialDuplicates(context, workflowItem1.getItem()); // Make sure result list is size 1 int size = 1; diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java index 645c0e258f2e..2de08c87c206 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java @@ -7,11 +7,17 @@ */ package org.dspace.app.rest; -import javax.ws.rs.core.MediaType; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.patch; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.content; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; + import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import javax.ws.rs.core.MediaType; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -45,12 +51,6 @@ import org.junit.Test; import org.springframework.beans.factory.annotation.Autowired; -import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; -import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.patch; -import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.content; -import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath; -import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; - /** * Test item link and section data REST endpoints for duplicate detection. * @see DuplicateDetectionTest (dspace-api) for lower level integration tests. @@ -408,13 +408,15 @@ public void submissionSectionWorkflowItemVisibilityTest() throws Exception { String reviewerToken = getAuthToken(admin.getEmail(), password); // The reviewer should be able to see the workflow item as a potential duplicate of the test item - getClient(reviewerToken).perform(get("/api/core/items/" + workflowItem1.getItem().getID() + "/duplicates")) + getClient(reviewerToken).perform(get("/api/core/items/" + workflowItem1.getItem().getID() + + "/duplicates")) .andExpect(status().isOk()) .andExpect(content().contentType(contentType)) // Valid duplicates array .andExpect(jsonPath("$._embedded.duplicates", Matchers.hasSize(1))) // UUID of only array member matches the new workflow item ID - .andExpect(jsonPath("$._embedded.duplicates[0].uuid").value(workflowItem2.getItem().getID().toString())); + .andExpect(jsonPath("$._embedded.duplicates[0].uuid") + .value(workflowItem2.getItem().getID().toString())); // Another random user will NOT see this getClient(getAuthToken(anotherEPerson.getEmail(), password)) From 4515ded85f9687a79846df649925d998784239fc Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Wed, 24 Jan 2024 11:57:35 +1300 Subject: [PATCH 10/33] Fix out of date licence headers on PotentialDuplicateConverter/Resource --- .../dspace/app/rest/converter/PotentialDuplicateConverter.java | 3 +-- .../app/rest/model/hateoas/PotentialDuplicateResource.java | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/PotentialDuplicateConverter.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/PotentialDuplicateConverter.java index 566c92e56665..8d2814b611e5 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/PotentialDuplicateConverter.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/PotentialDuplicateConverter.java @@ -2,10 +2,9 @@ * The contents of this file are subject to the license and copyright * detailed in the LICENSE and NOTICE files at the root of the source * tree and available online at - *

+ * * http://www.dspace.org/license/ */ - package org.dspace.app.rest.converter; import org.dspace.app.rest.model.MetadataValueList; diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/hateoas/PotentialDuplicateResource.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/hateoas/PotentialDuplicateResource.java index 85b417d30210..e753126133b4 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/hateoas/PotentialDuplicateResource.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/hateoas/PotentialDuplicateResource.java @@ -2,7 +2,7 @@ * The contents of this file are subject to the license and copyright * detailed in the LICENSE and NOTICE files at the root of the source * tree and available online at - *

+ * * http://www.dspace.org/license/ */ package org.dspace.app.rest.model.hateoas; From eca9c624b0a2b51ca6d44b64e4b6823e5539a5e2 Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Wed, 24 Jan 2024 15:09:01 +1300 Subject: [PATCH 11/33] [TLC-674] Update SubmissionDefinitionsControllerIT total count --- .../SubmissionDefinitionsControllerIT.java | 41 +++++++++++-------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/SubmissionDefinitionsControllerIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/SubmissionDefinitionsControllerIT.java index 3b8c87ce5f2c..b42082f83db9 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/SubmissionDefinitionsControllerIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/SubmissionDefinitionsControllerIT.java @@ -32,6 +32,11 @@ */ public class SubmissionDefinitionsControllerIT extends AbstractControllerIntegrationTest { + // The total number of expected submission definitions is referred to in multiple tests and assertions as + // is the last page (totalDefinitions - 1) + // This integer should be maintained along with any changes to item-submissions.xml + private static final int totalDefinitions = 11; + @Test public void findAll() throws Exception { //When we call the root endpoint as anonymous user @@ -258,10 +263,10 @@ public void findAllPaginationTest() throws Exception { Matchers.containsString("page=1"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$._links.last.href", Matchers.allOf( Matchers.containsString("/api/config/submissiondefinitions?"), - Matchers.containsString("page=9"), Matchers.containsString("size=1")))) + Matchers.containsString("page=" + (totalDefinitions - 1)), Matchers.containsString("size=1")))) .andExpect(jsonPath("$.page.size", is(1))) - .andExpect(jsonPath("$.page.totalElements", is(10))) - .andExpect(jsonPath("$.page.totalPages", is(10))) + .andExpect(jsonPath("$.page.totalElements", is(totalDefinitions))) + .andExpect(jsonPath("$.page.totalPages", is(totalDefinitions))) .andExpect(jsonPath("$.page.number", is(0))); getClient(tokenAdmin).perform(get("/api/config/submissiondefinitions") @@ -284,10 +289,10 @@ public void findAllPaginationTest() throws Exception { Matchers.containsString("page=1"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$._links.last.href", Matchers.allOf( Matchers.containsString("/api/config/submissiondefinitions?"), - Matchers.containsString("page=9"), Matchers.containsString("size=1")))) + Matchers.containsString("page=" + (totalDefinitions - 1)), Matchers.containsString("size=1")))) .andExpect(jsonPath("$.page.size", is(1))) - .andExpect(jsonPath("$.page.totalElements", is(10))) - .andExpect(jsonPath("$.page.totalPages", is(10))) + .andExpect(jsonPath("$.page.totalElements", is(totalDefinitions))) + .andExpect(jsonPath("$.page.totalPages", is(totalDefinitions))) .andExpect(jsonPath("$.page.number", is(1))); getClient(tokenAdmin).perform(get("/api/config/submissiondefinitions") @@ -310,10 +315,10 @@ public void findAllPaginationTest() throws Exception { Matchers.containsString("page=2"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$._links.last.href", Matchers.allOf( Matchers.containsString("/api/config/submissiondefinitions?"), - Matchers.containsString("page=9"), Matchers.containsString("size=1")))) + Matchers.containsString("page=" + (totalDefinitions - 1)), Matchers.containsString("size=1")))) .andExpect(jsonPath("$.page.size", is(1))) - .andExpect(jsonPath("$.page.totalElements", is(10))) - .andExpect(jsonPath("$.page.totalPages", is(10))) + .andExpect(jsonPath("$.page.totalElements", is(totalDefinitions))) + .andExpect(jsonPath("$.page.totalPages", is(totalDefinitions))) .andExpect(jsonPath("$.page.number", is(2))); getClient(tokenAdmin).perform(get("/api/config/submissiondefinitions") @@ -336,10 +341,10 @@ public void findAllPaginationTest() throws Exception { Matchers.containsString("page=3"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$._links.last.href", Matchers.allOf( Matchers.containsString("/api/config/submissiondefinitions?"), - Matchers.containsString("page=9"), Matchers.containsString("size=1")))) + Matchers.containsString("page=" + (totalDefinitions - 1)), Matchers.containsString("size=1")))) .andExpect(jsonPath("$.page.size", is(1))) - .andExpect(jsonPath("$.page.totalElements", is(10))) - .andExpect(jsonPath("$.page.totalPages", is(10))) + .andExpect(jsonPath("$.page.totalElements", is(totalDefinitions))) + .andExpect(jsonPath("$.page.totalPages", is(totalDefinitions))) .andExpect(jsonPath("$.page.number", is(3))); getClient(tokenAdmin).perform(get("/api/config/submissiondefinitions") @@ -362,10 +367,10 @@ public void findAllPaginationTest() throws Exception { Matchers.containsString("page=4"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$._links.last.href", Matchers.allOf( Matchers.containsString("/api/config/submissiondefinitions?"), - Matchers.containsString("page=9"), Matchers.containsString("size=1")))) + Matchers.containsString("page=" + (totalDefinitions - 1)), Matchers.containsString("size=1")))) .andExpect(jsonPath("$.page.size", is(1))) - .andExpect(jsonPath("$.page.totalElements", is(10))) - .andExpect(jsonPath("$.page.totalPages", is(10))) + .andExpect(jsonPath("$.page.totalElements", is(totalDefinitions))) + .andExpect(jsonPath("$.page.totalPages", is(totalDefinitions))) .andExpect(jsonPath("$.page.number", is(4))); getClient(tokenAdmin).perform(get("/api/config/submissiondefinitions") @@ -388,10 +393,10 @@ public void findAllPaginationTest() throws Exception { Matchers.containsString("page=5"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$._links.last.href", Matchers.allOf( Matchers.containsString("/api/config/submissiondefinitions?"), - Matchers.containsString("page=9"), Matchers.containsString("size=1")))) + Matchers.containsString("page=" + (totalDefinitions - 1)), Matchers.containsString("size=1")))) .andExpect(jsonPath("$.page.size", is(1))) - .andExpect(jsonPath("$.page.totalElements", is(10))) - .andExpect(jsonPath("$.page.totalPages", is(10))) + .andExpect(jsonPath("$.page.totalElements", is(totalDefinitions))) + .andExpect(jsonPath("$.page.totalPages", is(totalDefinitions))) .andExpect(jsonPath("$.page.number", is(5))); getClient(tokenAdmin).perform(get("/api/config/submissiondefinitions") From 69d069d2e5c09f99b6177cfffd39e27598907060 Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Tue, 30 Jan 2024 07:44:04 +1300 Subject: [PATCH 12/33] [TLC-674] Update ItemMatcher embeds with duplicates link --- .../src/test/java/org/dspace/app/rest/matcher/ItemMatcher.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/matcher/ItemMatcher.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/matcher/ItemMatcher.java index 64905f90ea2d..7cbe73a00551 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/matcher/ItemMatcher.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/matcher/ItemMatcher.java @@ -52,6 +52,7 @@ public static Matcher matchFullEmbeds() { return matchEmbeds( "accessStatus", "bundles[]", + "duplicates", "identifiers", "mappedCollections[]", "owningCollection", @@ -69,6 +70,7 @@ public static Matcher matchLinks(UUID uuid) { return HalMatcher.matchLinks(REST_SERVER_URL + "core/items/" + uuid, "accessStatus", "bundles", + "duplicates", "identifiers", "mappedCollections", "owningCollection", From e7424fb0a00b192b901a4dbb1fdd3e37d964f2d8 Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Tue, 30 Jan 2024 08:13:43 +1300 Subject: [PATCH 13/33] [TLC-674] Disable feature by default --- dspace/config/modules/duplicate-detection.cfg | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dspace/config/modules/duplicate-detection.cfg b/dspace/config/modules/duplicate-detection.cfg index aea165324749..191072515500 100644 --- a/dspace/config/modules/duplicate-detection.cfg +++ b/dspace/config/modules/duplicate-detection.cfg @@ -3,7 +3,7 @@ ## # Enable this feature. Default: false -duplicate.enable = true +#duplicate.enable = true ## # Normalisation rules. If these are changed, a full index-discovery re-index should be performed to force @@ -16,8 +16,8 @@ duplicate.signature.normalise.lowercase = true # to be treated as a single term by Lucene) duplicate.signature.normalise.whitespace = true -# Levenshtein edit distance. Default:1 (eg. Test will match Txst but not Txxt) -duplicate.signature.distance = 1 +# Levenshtein edit distance. Default:0 (eg. Test will match Txst but not Txxt) +duplicate.signature.distance = 0 # Solr field used for storing the item signature duplicate.signature.field = item_signature From 8a16c338bf9488943a20e0a429ea3f31703a7fdc Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Tue, 30 Jan 2024 08:25:08 +1300 Subject: [PATCH 14/33] [TLC-674] Long title and exact match duplicate tests --- .../content/DuplicateDetectionTest.java | 105 +++++++++++++++++- 1 file changed, 103 insertions(+), 2 deletions(-) diff --git a/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java b/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java index 128936edc8fc..375697534046 100644 --- a/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java +++ b/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java @@ -176,7 +176,8 @@ public void testSearchDuplicates() throws Exception { // Check metadata is populated as per configuration, using item1 (first in results) // Check for date Optional foundDate = potentialDuplicates.get(0).getMetadataValueList().stream() - .filter(metadataValue -> metadataValue.getMetadataField().toString('.').equals("dc.date.issued")) + .filter(metadataValue -> metadataValue.getMetadataField().toString('.') + .equals("dc.date.issued")) .map(MetadataValue::getValue).findFirst(); assertThat("There should be an issue date found", foundDate.isPresent()); assertEquals("item1 issue date should match the duplicate obj metadata issue date", @@ -231,7 +232,7 @@ public void testSearchDuplicatesWithReservedSolrCharacters() throws Exception { // Make sure result list is size 1 int size = 1; - assertEquals("Potential duplicates of item1 should have size " + size, + assertEquals("Potential duplicates of item4 (special characters) should have size " + size, size, potentialDuplicates.size()); // The only member should be item 5 @@ -240,6 +241,104 @@ public void testSearchDuplicatesWithReservedSolrCharacters() throws Exception { } + /** + * Test that a search for a very long title which also contains reserved characters + * + * @throws Exception + */ + @Test + public void testSearchDuplicatesWithVeryLongTitle() throws Exception { + + Item item6 = ItemBuilder.createItem(context, col) + .withTitle("Testing: This title is over 200 characters long and should behave just the same as a " + + "shorter title, with or without reserved characters. This integration test will prove that " + + "long titles are detected as potential duplicates.") + .withIssueDate(item1IssueDate) + .withAuthor(item1Author) + .withSubject(item1Subject) + .build(); + // This item is the same as above, just missing a comma from the title. + Item item7 = ItemBuilder.createItem(context, col) + .withTitle("Testing: This title is over 200 characters long and should behave just the same as a " + + "shorter title with or without reserved characters. This integration test will prove that " + + "long titles are detected as potential duplicates.") + .withIssueDate("2012-10-17") + .withAuthor("Smith, Donald X.") + .withSubject("ExtraEntry 2") + .build(); + + // Get potential duplicates of item 4 and make sure no exceptions are thrown + List potentialDuplicates = new ArrayList<>(); + try { + potentialDuplicates = duplicateDetectionService.getPotentialDuplicates(context, item6); + } catch (SearchServiceException e) { + fail("Duplicate search with special characters (long title) should NOT result in search exception (" + + e.getMessage() + ")"); + } + + // Make sure result list is size 1 + int size = 1; + assertEquals("Potential duplicates of item6 (long title) should have size " + size, + size, potentialDuplicates.size()); + + // The only member should be item 5 + assertEquals("Item 7's long title should match Item 6 as a potential duplicate", + item7.getID(), potentialDuplicates.get(0).getUuid()); + + } + + /** + * Test that a search for a very long title which also contains reserved characters + * + * @throws Exception + */ + @Test + public void testSearchDuplicatesExactMatch() throws Exception { + + // Set distance to 0 manually + configurationService.setProperty("duplicate.signature.distance", 0); + + Item item8 = ItemBuilder.createItem(context, col) + .withTitle("This integration test will prove that the edit distance of 0 results in an exact match") + .withIssueDate(item1IssueDate) + .withAuthor(item1Author) + .withSubject(item1Subject) + .build(); + // This item is the same as above + Item item9 = ItemBuilder.createItem(context, col) + .withTitle("This integration test will prove that the edit distance of 0 results in an exact match") + .withIssueDate("2012-10-17") + .withAuthor("Smith, Donald X.") + .withSubject("ExtraEntry") + .build(); + // This item has one character different, greater than the edit distance + Item item10 = ItemBuilder.createItem(context, col) + .withTitle("This integration test will prove that the edit distance of 0 results in an exact match.") + .withIssueDate("2012-10-17") + .withAuthor("Smith, Donald X.") + .withSubject("ExtraEntry") + .build(); + + // Get potential duplicates of item 4 and make sure no exceptions are thrown + List potentialDuplicates = new ArrayList<>(); + try { + potentialDuplicates = duplicateDetectionService.getPotentialDuplicates(context, item8); + } catch (SearchServiceException e) { + fail("Duplicate search with special characters (long title) should NOT result in search exception (" + + e.getMessage() + ")"); + } + + // Make sure result list is size 1 - we do NOT expect item 10 to appear + int size = 1; + assertEquals("ONLY one exact match should be found (item 9) " + size, + size, potentialDuplicates.size()); + + // The only member should be item 9 + assertEquals("Item 9 should match Item 8 as a potential duplicate", + item9.getID(), potentialDuplicates.get(0).getUuid()); + + } + @Test public void testSearchDuplicatesInWorkflow() throws Exception { // Get potential duplicates of item 1: @@ -271,4 +370,6 @@ public void testSearchDuplicatesInWorkflow() throws Exception { workflowItem2.getItem().getID(), potentialDuplicates.get(0).getUuid()); } + + } From 7794b9ff825daaf5370bb8526b70f2adad823be5 Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Thu, 1 Feb 2024 11:30:30 +1300 Subject: [PATCH 15/33] [TLC-674] Refactor duplicates from item link to searchBy --- .../rest/repository/ItemRestRepository.java | 59 +++++++++++++++++ .../app/rest/DuplicateDetectionRestIT.java | 63 +++++++------------ .../dspace/app/rest/matcher/ItemMatcher.java | 2 - 3 files changed, 83 insertions(+), 41 deletions(-) diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemRestRepository.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemRestRepository.java index a1659c58d38c..b5926d1defe3 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemRestRepository.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemRestRepository.java @@ -11,6 +11,7 @@ import java.sql.SQLException; import java.util.ArrayList; import java.util.Iterator; +import java.util.LinkedList; import java.util.List; import java.util.Objects; import java.util.UUID; @@ -22,12 +23,15 @@ import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.dspace.app.rest.Parameter; +import org.dspace.app.rest.SearchRestMethod; import org.dspace.app.rest.converter.MetadataConverter; import org.dspace.app.rest.exception.DSpaceBadRequestException; import org.dspace.app.rest.exception.RepositoryMethodNotImplementedException; import org.dspace.app.rest.exception.UnprocessableEntityException; import org.dspace.app.rest.model.BundleRest; import org.dspace.app.rest.model.ItemRest; +import org.dspace.app.rest.model.PotentialDuplicateRest; import org.dspace.app.rest.model.patch.Patch; import org.dspace.app.rest.repository.handler.service.UriListHandlerService; import org.dspace.authorize.AuthorizeException; @@ -39,12 +43,15 @@ import org.dspace.content.WorkspaceItem; import org.dspace.content.service.BundleService; import org.dspace.content.service.CollectionService; +import org.dspace.content.service.DuplicateDetectionService; import org.dspace.content.service.InstallItemService; import org.dspace.content.service.ItemService; import org.dspace.content.service.RelationshipService; import org.dspace.content.service.RelationshipTypeService; import org.dspace.content.service.WorkspaceItemService; +import org.dspace.content.virtual.PotentialDuplicate; import org.dspace.core.Context; +import org.dspace.discovery.SearchServiceException; import org.dspace.util.UUIDUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.data.domain.Page; @@ -95,6 +102,9 @@ public class ItemRestRepository extends DSpaceObjectRestRepository stringList) Item item = uriListHandlerService.handle(context, req, stringList, Item.class); return converter.toRest(item, utils.obtainProjection()); } + + /** + * Search method to find and return a paged list of duplicates, if the current user has authorization + * + * @param itemUuid the UUID of the item for which we are searching duplicates + * @param optionalPageable pagination options + * @return pagination potential duplicate results + */ + @PreAuthorize("hasPermission(#itemUuid, 'ITEM', 'READ')") + @SearchRestMethod(name = "findDuplicates") + public Page findDuplicates(@Parameter(value = "uuid", required = true) UUID itemUuid, + Pageable optionalPageable) { + // Instantiate object to represent this item + Item item; + // Instantiate list of potential duplicates which we will convert and return as paged ItemRest list + List potentialDuplicates = new LinkedList<>(); + // Instantiate total count + int total = 0; + // Obtain context + Context context = obtainContext(); + // Get pagination + Pageable pageable = utils.getPageable(optionalPageable); + + // Try to get item based on UUID parameter + try { + item = itemService.find(context, itemUuid); + } catch (SQLException e) { + throw new ResourceNotFoundException(e.getMessage()); + } + + // If the item is null or otherwise invalid (template, etc) then throw an appropriate error + if (item == null) { + throw new ResourceNotFoundException("No such item: " + itemUuid); + } + if (item.getTemplateItemOf() != null) { + throw new IllegalArgumentException("Cannot get duplicates for template item"); + } + + try { + // Search for the list of potential duplicates + potentialDuplicates = duplicateDetectionService.getPotentialDuplicates(context, item); + } catch (SearchServiceException e) { + // If the search fails, log an error and return an empty list rather than throwing a fatal error + log.error("Search service error retrieving duplicates: {}", e.getMessage()); + } + + // Return the list of items along with pagination info and max results + return converter.toRestPage(potentialDuplicates, pageable, total, utils.obtainProjection()); + } } diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java index 2de08c87c206..5a6c50b7fb9a 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java @@ -135,30 +135,7 @@ public void setUp() throws Exception { } @Test - public void itemsContainDuplicatesLinkTest() throws Exception { - String token = getAuthToken(admin.getEmail(), password); - log.error("EPERSON FULL NAME IS " + eperson.getFullName()); - - context.turnOffAuthorisationSystem(); - WorkspaceItem workspaceItem1 = WorkspaceItemBuilder.createWorkspaceItem(context, simpleCol) - .withTitle(item1Title) - .withSubject(item1Subject) - .withIssueDate(item1IssueDate) - .withAuthor(item1Author) - .withSubmitter(eperson) - .build(); - XmlWorkflowItem wfi1 = workflowService.start(context, workspaceItem1); - Item item1 = wfi1.getItem(); - context.restoreAuthSystemState(); - - getClient(token).perform(get("/api/core/items/" + item1.getID())) - .andExpect(status().isOk()) - .andExpect(content().contentType(contentType)) - .andExpect(jsonPath("$._links.duplicates").exists()); - } - - @Test - public void searchDuplicatesByLinkTest() throws Exception { + public void searchDuplicatesBySearchMethodTest() throws Exception { String token = getAuthToken(admin.getEmail(), password); context.turnOffAuthorisationSystem(); @@ -196,21 +173,26 @@ public void searchDuplicatesByLinkTest() throws Exception { context.restoreAuthSystemState(); - getClient(token).perform(get("/api/core/items/" + item1.getID() + "/duplicates")) + getClient(token).perform(get("/api/core/items/search/findDuplicates?uuid=" + item1.getID())) .andExpect(status().isOk()) .andExpect(content().contentType(contentType)) // Valid duplicates array - .andExpect(jsonPath("$._embedded.duplicates", Matchers.hasSize(1))) + .andExpect(jsonPath("$._embedded.potentialDuplicateResources", Matchers.hasSize(1))) // UUID of only array member matches item2 ID - .andExpect(jsonPath("$._embedded.duplicates[0].uuid").value(item2.getID().toString())) + .andExpect(jsonPath("$._embedded.potentialDuplicateResources[0].uuid") + .value(item2.getID().toString())) // First item has subject and issue date metadata populated as expected - .andExpect(jsonPath("$._embedded.duplicates[0].metadata['dc.subject'][0].value") + .andExpect(jsonPath("$._embedded.potentialDuplicateResources[0]" + + ".metadata['dc.subject'][0].value") .value(item2Subject)) - .andExpect(jsonPath("$._embedded.duplicates[0].metadata['dc.date.issued'][0].value") + .andExpect(jsonPath("$._embedded.potentialDuplicateResources[0]" + + ".metadata['dc.date.issued'][0].value") .value(item2IssueDate)) // Does NOT have other metadata e.g. author, title - .andExpect(jsonPath("$._embedded.duplicates[0].metadata['dc.contributor.author']").doesNotExist()) - .andExpect(jsonPath("$._embedded.duplicates[0].metadata['dc.title']").doesNotExist()); + .andExpect(jsonPath("$._embedded.potentialDuplicateResources[0]" + + ".metadata['dc.contributor.author']").doesNotExist()) + .andExpect(jsonPath("$._embedded.potentialDuplicateResources[0]" + + ".metadata['dc.title']").doesNotExist()); } /** @@ -270,9 +252,11 @@ public void submissionSectionDataTest() throws Exception { .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates[0].uuid") .value(item1.getID().toString())) // Metadata for subject and issue date is populated as expected - .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates[0].metadata['dc.subject'][0].value") + .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates[0]" + + ".metadata['dc.subject'][0].value") .value(item1Subject)) - .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates[0].metadata['dc.date.issued'][0].value") + .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates[0]" + + ".metadata['dc.date.issued'][0].value") .value(item1IssueDate)) // Metadata for other metadata fields has not been copied across, as expected .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates[0]" + @@ -408,23 +392,24 @@ public void submissionSectionWorkflowItemVisibilityTest() throws Exception { String reviewerToken = getAuthToken(admin.getEmail(), password); // The reviewer should be able to see the workflow item as a potential duplicate of the test item - getClient(reviewerToken).perform(get("/api/core/items/" + workflowItem1.getItem().getID() - + "/duplicates")) + getClient(reviewerToken).perform(get("/api/core/items/search/findDuplicates?uuid=" + + workflowItem1.getItem().getID())) .andExpect(status().isOk()) .andExpect(content().contentType(contentType)) // Valid duplicates array - .andExpect(jsonPath("$._embedded.duplicates", Matchers.hasSize(1))) + .andExpect(jsonPath("$._embedded.potentialDuplicateResources", Matchers.hasSize(1))) // UUID of only array member matches the new workflow item ID - .andExpect(jsonPath("$._embedded.duplicates[0].uuid") + .andExpect(jsonPath("$._embedded.potentialDuplicateResources[0].uuid") .value(workflowItem2.getItem().getID().toString())); // Another random user will NOT see this getClient(getAuthToken(anotherEPerson.getEmail(), password)) - .perform(get("/api/core/items/" + workflowItem1.getItem().getID() + "/duplicates")) + .perform(get("/api/core/items/search/findDuplicates?uuid=" + + workflowItem1.getItem().getID())) .andExpect(status().isOk()) .andExpect(content().contentType(contentType)) // Valid duplicates array - .andExpect(jsonPath("$._embedded.duplicates", Matchers.hasSize(0))); + .andExpect(jsonPath("$._embedded.potentialDuplicateResources").doesNotExist()); } } diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/matcher/ItemMatcher.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/matcher/ItemMatcher.java index 7cbe73a00551..64905f90ea2d 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/matcher/ItemMatcher.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/matcher/ItemMatcher.java @@ -52,7 +52,6 @@ public static Matcher matchFullEmbeds() { return matchEmbeds( "accessStatus", "bundles[]", - "duplicates", "identifiers", "mappedCollections[]", "owningCollection", @@ -70,7 +69,6 @@ public static Matcher matchLinks(UUID uuid) { return HalMatcher.matchLinks(REST_SERVER_URL + "core/items/" + uuid, "accessStatus", "bundles", - "duplicates", "identifiers", "mappedCollections", "owningCollection", From 88555566bf3e37ac5b453401cc15d1401aa7daec Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Thu, 1 Feb 2024 12:12:17 +1300 Subject: [PATCH 16/33] [TLC-674] Remove old ItemDuplicatesLinkRepository and update item model --- .../org/dspace/app/rest/model/ItemRest.java | 5 - .../ItemDuplicatesLinkRepository.java | 114 ------------------ 2 files changed, 119 deletions(-) delete mode 100644 dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemDuplicatesLinkRepository.java diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/ItemRest.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/ItemRest.java index cafbbaaa9184..b2f540c0ac4a 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/ItemRest.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/ItemRest.java @@ -52,10 +52,6 @@ @LinkRest( name = ItemRest.THUMBNAIL, method = "getThumbnail" - ), - @LinkRest( - name = ItemRest.DUPLICATES, - method = "getDuplicates" ) }) public class ItemRest extends DSpaceObjectRest { @@ -72,7 +68,6 @@ public class ItemRest extends DSpaceObjectRest { public static final String VERSION = "version"; public static final String TEMPLATE_ITEM_OF = "templateItemOf"; public static final String THUMBNAIL = "thumbnail"; - public static final String DUPLICATES = "duplicates"; private boolean inArchive = false; private boolean discoverable = false; diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemDuplicatesLinkRepository.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemDuplicatesLinkRepository.java deleted file mode 100644 index 93a5422e9a4b..000000000000 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemDuplicatesLinkRepository.java +++ /dev/null @@ -1,114 +0,0 @@ -/** - * The contents of this file are subject to the license and copyright - * detailed in the LICENSE and NOTICE files at the root of the source - * tree and available online at - * - * http://www.dspace.org/license/ - */ -package org.dspace.app.rest.repository; - -import java.sql.SQLException; -import java.util.LinkedList; -import java.util.List; -import java.util.UUID; -import javax.annotation.Nullable; -import javax.servlet.http.HttpServletRequest; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.dspace.app.rest.model.ItemRest; -import org.dspace.app.rest.model.PotentialDuplicateRest; -import org.dspace.app.rest.projection.Projection; -import org.dspace.app.rest.submit.SubmissionService; -import org.dspace.content.Item; -import org.dspace.content.service.DuplicateDetectionService; -import org.dspace.content.service.ItemService; -import org.dspace.content.virtual.PotentialDuplicate; -import org.dspace.core.Context; -import org.dspace.discovery.SearchServiceException; -import org.dspace.services.ConfigurationService; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.data.domain.Page; -import org.springframework.data.domain.Pageable; -import org.springframework.data.rest.webmvc.ResourceNotFoundException; -import org.springframework.security.access.prepost.PreAuthorize; -import org.springframework.stereotype.Component; - -/** - * Item link repository to allow a list of potential duplicates to be searched and returned - * for the parent item. - * Used by, e.g. workflow pooled/claimed task page and previews, to show reviewers about potential duplicates - * @see DuplicateDetectionService - * - * @author Kim Shepherd - */ -@Component(ItemRest.CATEGORY + "." + ItemRest.NAME + "." + ItemRest.DUPLICATES) -public class ItemDuplicatesLinkRepository extends AbstractDSpaceRestRepository - implements LinkRestRepository { - - private static final Logger log = LogManager.getLogger(ItemDuplicatesLinkRepository.class); - - @Autowired - ItemService itemService; - @Autowired - ConfigurationService configurationService; - @Autowired - SubmissionService submissionService; - @Autowired - DuplicateDetectionService duplicateDetectionService; - - /** - * Get a list of potential duplicates based on the current item's "signature" (e.g. title) - * - * @param request - * @param itemId - * @param optionalPageable - * @param projection - * @return - */ - @PreAuthorize("hasPermission(#itemId, 'ITEM', 'READ')") - public Page getDuplicates(@Nullable HttpServletRequest request, - UUID itemId, - @Nullable Pageable optionalPageable, - Projection projection) { - - - // Instantiate object to represent this item - Item item; - // Instantiate list of potential duplicates which we will convert and return as paged ItemRest list - List potentialDuplicates = new LinkedList<>(); - // Instantiate total count - int total = 0; - // Obtain context - Context context = obtainContext(); - // Get pagination - Pageable pageable = utils.getPageable(optionalPageable); - - // Try to get item based on UUID parameter - try { - item = itemService.find(context, itemId); - } catch (SQLException e) { - throw new ResourceNotFoundException(e.getMessage()); - } - - // If the item is null or otherwise invalid (template, etc) then throw an appropriate error - if (item == null) { - throw new ResourceNotFoundException("No such item: " + itemId); - } - if (item.getTemplateItemOf() != null) { - throw new IllegalArgumentException("Cannot get duplicates for template item"); - } - - try { - // Search for the list of potential duplicates - potentialDuplicates = duplicateDetectionService.getPotentialDuplicates(context, item); - } catch (SearchServiceException e) { - // If the search fails, log an error and return an empty list rather than throwing a fatal error - log.error("Search service error retrieving duplicates: {}", e.getMessage()); - } - - // Return the list of items along with pagination info and max results - return converter.toRestPage(potentialDuplicates, pageable, total, projection); - } - -} From f4b6379d3692962400af9bbe8eef75a358e9d5a0 Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Thu, 8 Feb 2024 10:24:09 +1300 Subject: [PATCH 17/33] Remove test user from dupe test cases Improve duplicate-detection.cfg --- .../app/rest/DuplicateDetectionRestIT.java | 30 +++++++++++++------ dspace/config/modules/duplicate-detection.cfg | 2 ++ 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java index 5a6c50b7fb9a..f5a8477f66b8 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java @@ -39,7 +39,6 @@ import org.dspace.core.I18nUtil; import org.dspace.discovery.IndexingService; import org.dspace.eperson.EPerson; -import org.dspace.eperson.factory.EPersonServiceFactory; import org.dspace.eperson.service.EPersonService; import org.dspace.handle.service.HandleService; import org.dspace.identifier.service.IdentifierService; @@ -79,6 +78,8 @@ public class DuplicateDetectionRestIT extends AbstractControllerIntegrationTest AuthorizeService authorizeService; @Autowired XmlWorkflowService workflowService; + @Autowired + EPersonService ePersonService; private Collection col; private Collection simpleCol; @@ -117,7 +118,6 @@ public void setUp() throws Exception { .build(); eperson.setFirstName(context, "first"); eperson.setLastName(context, "last"); - EPersonService ePersonService = EPersonServiceFactory.getInstance().getEPersonService(); anotherEPerson = ePersonService.findByEmail(context, "test-another-user@email.com"); if (anotherEPerson == null) { anotherEPerson = ePersonService.create(context); @@ -213,19 +213,19 @@ public void submissionSectionDataTest() throws Exception { // item2 is 1 edit distance from item1 and item3 // item1 and item3 are 2 edit distance from each other Item item1 = ItemBuilder.createItem(context, col) - .withTitle(item1Title) // Public item I + .withTitle("Submission section test I") // Public item I .withIssueDate(item1IssueDate) .withAuthor(item1Author) .withSubject(item1Subject) .build(); Item item2 = ItemBuilder.createItem(context, col) - .withTitle("Public item II") + .withTitle("Submission section test II") .withIssueDate(item2IssueDate) .withAuthor("Smith, Donald X.") .withSubject(item2Subject) .build(); Item item3 = ItemBuilder.createItem(context, col) - .withTitle("Public item III") + .withTitle("Submission section test III") .withIssueDate("2013-10-17") .withAuthor("Smith, Donald Y.") .withSubject("ExtraEntry 3") @@ -233,7 +233,7 @@ public void submissionSectionDataTest() throws Exception { // Create a new workspace item with a similar title to Item 1 (1 edit distance). Reuse other items // metadata for the rest, as it is not relevant. WorkspaceItem workspaceItem = WorkspaceItemBuilder.createWorkspaceItem(context, workspaceCollection) - .withTitle("Public item X") + .withTitle("Submission section test 1") .withSubject(item2Subject) .withIssueDate(item2IssueDate) .withAuthor(item1Author) @@ -264,11 +264,9 @@ public void submissionSectionDataTest() throws Exception { .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates[0]" + ".metadata['dc.title']").doesNotExist()); - // Try to add ISBN (type bound to book and book chapter) - this should not work and instead we'll get - // no JSON path for that field, because this item has no type yet List updateOperations = new ArrayList(); Map value = new HashMap(); - value.put("value", "Public item II"); + value.put("value", "Submission section test II"); updateOperations.add(new ReplaceOperation("/sections/traditionalpageone/dc.title/0", value)); String patchBody = getPatchContent(updateOperations); getClient(submitterToken).perform(patch("/api/submission/workspaceitems/" + workspaceItem.getID()) @@ -412,4 +410,18 @@ public void submissionSectionWorkflowItemVisibilityTest() throws Exception { .andExpect(jsonPath("$._embedded.potentialDuplicateResources").doesNotExist()); } + @Override + public void destroy() throws Exception { + if (anotherEPerson != null) { + try { + context.turnOffAuthorisationSystem(); + ePersonService.delete(context, anotherEPerson); + context.restoreAuthSystemState(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + super.destroy(); + } + } diff --git a/dspace/config/modules/duplicate-detection.cfg b/dspace/config/modules/duplicate-detection.cfg index 191072515500..b2c26b7a4728 100644 --- a/dspace/config/modules/duplicate-detection.cfg +++ b/dspace/config/modules/duplicate-detection.cfg @@ -17,6 +17,8 @@ duplicate.signature.normalise.lowercase = true duplicate.signature.normalise.whitespace = true # Levenshtein edit distance. Default:0 (eg. Test will match Txst but not Txxt) +# Valid distances are 0, 1, 2 as per Solr documentation. Note that this distance is applied *after* normalisation +# rules above, so capitalisation and whitespace will not count as 'edits' if you have the above rules enabled. duplicate.signature.distance = 0 # Solr field used for storing the item signature duplicate.signature.field = item_signature From 9fc5b71ae3766220852748a3e4d2559d67038860 Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Fri, 9 Feb 2024 11:17:13 +1300 Subject: [PATCH 18/33] Detect dupe: null item ref in ex message --- .../java/org/dspace/content/DuplicateDetectionServiceImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java index 7efbe680cb3b..7d568d1808f1 100644 --- a/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java @@ -253,7 +253,7 @@ public DiscoverResult searchDuplicates(Context context, Item item) throws Search // If the item is null or otherwise invalid (template, etc) then throw an appropriate error if (item == null) { - throw new ResourceNotFoundException("No such item: " + item); + throw new ResourceNotFoundException("Duplicate search error: item is null"); } if (item.getTemplateItemOf() != null) { throw new IllegalArgumentException("Cannot get duplicates for template item"); From 8c4839db51a81d617ab165cfd20a157913255e25 Mon Sep 17 00:00:00 2001 From: Pascal-Nicolas Becker Date: Wed, 14 Feb 2024 00:09:56 +0100 Subject: [PATCH 19/33] Fix SubmissionDefinitionsControllerIT --- .../SubmissionDefinitionsControllerIT.java | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/SubmissionDefinitionsControllerIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/SubmissionDefinitionsControllerIT.java index b42082f83db9..a6f3999b94ac 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/SubmissionDefinitionsControllerIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/SubmissionDefinitionsControllerIT.java @@ -419,10 +419,10 @@ public void findAllPaginationTest() throws Exception { Matchers.containsString("page=5"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$._links.last.href", Matchers.allOf( Matchers.containsString("/api/config/submissiondefinitions?"), - Matchers.containsString("page=9"), Matchers.containsString("size=1")))) + Matchers.containsString("page=10"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$.page.size", is(1))) - .andExpect(jsonPath("$.page.totalElements", is(10))) - .andExpect(jsonPath("$.page.totalPages", is(10))) + .andExpect(jsonPath("$.page.totalElements", is(totalDefinitions))) + .andExpect(jsonPath("$.page.totalPages", is(totalDefinitions))) .andExpect(jsonPath("$.page.number", is(5))); getClient(tokenAdmin).perform(get("/api/config/submissiondefinitions") @@ -445,10 +445,10 @@ public void findAllPaginationTest() throws Exception { Matchers.containsString("page=6"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$._links.last.href", Matchers.allOf( Matchers.containsString("/api/config/submissiondefinitions?"), - Matchers.containsString("page=9"), Matchers.containsString("size=1")))) + Matchers.containsString("page=10"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$.page.size", is(1))) - .andExpect(jsonPath("$.page.totalElements", is(10))) - .andExpect(jsonPath("$.page.totalPages", is(10))) + .andExpect(jsonPath("$.page.totalElements", is(totalDefinitions))) + .andExpect(jsonPath("$.page.totalPages", is(totalDefinitions))) .andExpect(jsonPath("$.page.number", is(6))); getClient(tokenAdmin).perform(get("/api/config/submissiondefinitions") @@ -471,10 +471,10 @@ public void findAllPaginationTest() throws Exception { Matchers.containsString("page=7"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$._links.last.href", Matchers.allOf( Matchers.containsString("/api/config/submissiondefinitions?"), - Matchers.containsString("page=9"), Matchers.containsString("size=1")))) + Matchers.containsString("page=10"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$.page.size", is(1))) - .andExpect(jsonPath("$.page.totalElements", is(10))) - .andExpect(jsonPath("$.page.totalPages", is(10))) + .andExpect(jsonPath("$.page.totalElements", is(totalDefinitions))) + .andExpect(jsonPath("$.page.totalPages", is(totalDefinitions))) .andExpect(jsonPath("$.page.number", is(7))); getClient(tokenAdmin).perform(get("/api/config/submissiondefinitions") @@ -497,10 +497,10 @@ public void findAllPaginationTest() throws Exception { Matchers.containsString("page=8"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$._links.last.href", Matchers.allOf( Matchers.containsString("/api/config/submissiondefinitions?"), - Matchers.containsString("page=9"), Matchers.containsString("size=1")))) + Matchers.containsString("page=10"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$.page.size", is(1))) - .andExpect(jsonPath("$.page.totalElements", is(10))) - .andExpect(jsonPath("$.page.totalPages", is(10))) + .andExpect(jsonPath("$.page.totalElements", is(totalDefinitions))) + .andExpect(jsonPath("$.page.totalPages", is(totalDefinitions))) .andExpect(jsonPath("$.page.number", is(8))); getClient(tokenAdmin).perform(get("/api/config/submissiondefinitions") @@ -520,10 +520,10 @@ public void findAllPaginationTest() throws Exception { Matchers.containsString("page=9"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$._links.last.href", Matchers.allOf( Matchers.containsString("/api/config/submissiondefinitions?"), - Matchers.containsString("page=9"), Matchers.containsString("size=1")))) + Matchers.containsString("page=10"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$.page.size", is(1))) - .andExpect(jsonPath("$.page.totalElements", is(10))) - .andExpect(jsonPath("$.page.totalPages", is(10))) + .andExpect(jsonPath("$.page.totalElements", is(totalDefinitions))) + .andExpect(jsonPath("$.page.totalPages", is(totalDefinitions))) .andExpect(jsonPath("$.page.number", is(9))); } From 68d0382959d5a4cbe1d89fce57ad0af4f662d1e4 Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Tue, 20 Feb 2024 09:41:58 +1300 Subject: [PATCH 20/33] [TLC-674] Comment out duplicate step from default submission cfg --- dspace/config/item-submission.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dspace/config/item-submission.xml b/dspace/config/item-submission.xml index 6028a7f40114..6147c61cbafd 100644 --- a/dspace/config/item-submission.xml +++ b/dspace/config/item-submission.xml @@ -281,6 +281,10 @@ + + From abf54f5ae3004a37c65f2864aeef4cc5d627b813 Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Mon, 26 Feb 2024 10:22:09 +1300 Subject: [PATCH 21/33] [TLC-674] Revert changes to search schema.xml --- dspace/solr/search/conf/schema.xml | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/dspace/solr/search/conf/schema.xml b/dspace/solr/search/conf/schema.xml index 09540dccca93..df21afbc6426 100644 --- a/dspace/solr/search/conf/schema.xml +++ b/dspace/solr/search/conf/schema.xml @@ -223,25 +223,6 @@ - - - - - - - - - - - - - - - - @@ -345,9 +326,6 @@ - - - From 1350f87ca13631e275b4d100d9bef2776f1a066e Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Mon, 26 Feb 2024 10:24:04 +1300 Subject: [PATCH 22/33] [TLC-674] Replace all references of "signature" with "comparison value" To avoid any confusion to DSpace-CRIS work. Also allow multiple fields in comparison value building. --- .../DuplicateDetectionServiceImpl.java | 56 ++++++++++++++----- .../service/DuplicateDetectionService.java | 2 +- ... => SolrServiceIndexComparisonPlugin.java} | 35 ++++++------ .../content/DuplicateDetectionTest.java | 13 +++-- dspace/config/modules/duplicate-detection.cfg | 21 ++++--- dspace/config/spring/api/discovery.xml | 4 +- 6 files changed, 82 insertions(+), 49 deletions(-) rename dspace-api/src/main/java/org/dspace/discovery/{SolrServiceIndexItemSignaturePlugin.java => SolrServiceIndexComparisonPlugin.java} (66%) diff --git a/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java index 7d568d1808f1..ecd0f7d8499c 100644 --- a/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java @@ -8,6 +8,7 @@ package org.dspace.content; import java.sql.SQLException; +import java.text.ParseException; import java.util.ArrayList; import java.util.Arrays; import java.util.LinkedList; @@ -16,9 +17,11 @@ import org.apache.commons.lang3.StringUtils; import org.apache.velocity.exception.ResourceNotFoundException; +import org.dspace.app.itemupdate.MetadataUtilities; import org.dspace.authorize.AuthorizeException; import org.dspace.authorize.service.AuthorizeService; import org.dspace.content.service.DuplicateDetectionService; +import org.dspace.content.service.ItemService; import org.dspace.content.service.MetadataFieldService; import org.dspace.content.service.MetadataValueService; import org.dspace.content.service.WorkspaceItemService; @@ -66,6 +69,8 @@ public class DuplicateDetectionServiceImpl implements DuplicateDetectionService XmlWorkflowItemService workflowItemService; @Autowired WorkspaceItemService workspaceItemService; + @Autowired + ItemService itemService; /** * Get a list of PotentialDuplicate objects (wrappers with some metadata included for previewing) that @@ -240,7 +245,7 @@ public Optional validateDuplicateResult(Context context, Ind /** * Search discovery for potential duplicates of a given item. The search uses levenshtein distance (configurable) - * and a single-term "signature" constructed out of the item title + * and a single-term "comparison value" constructed out of the item title * * @param context DSpace context * @param item The item to check @@ -259,27 +264,50 @@ public DiscoverResult searchDuplicates(Context context, Item item) throws Search throw new IllegalArgumentException("Cannot get duplicates for template item"); } - // Construct signature object - String signature = item.getName(); - if (StringUtils.isNotBlank(signature)) { - if (configurationService.getBooleanProperty("duplicate.signature.normalise.lowercase")) { - signature = signature.toLowerCase(context.getCurrentLocale()); + // Get configured fields to use for comparison values + String[] comparisonFields = configurationService.getArrayProperty("duplication.comparison.metadata.field", + new String[]{"dc.title"}); + // Get all values, in order, for these fields + StringBuilder comparisonValueBuilder = new StringBuilder(); + for (String field : comparisonFields) { + try { + String[] fieldParts = MetadataUtilities.parseCompoundForm(field); + for (MetadataValue metadataValue : itemService.getMetadata(item, + fieldParts[0], fieldParts[1], (fieldParts.length > 2 ? fieldParts[3] : null), Item.ANY)) { + // Add each found value to the string builder (null values interpreted as empty) + if (metadataValue != null) { + comparisonValueBuilder.append(metadataValue.getValue()); + } + } + } catch (ParseException e) { + // Log error and continue processing + log.error("Error parsing configured field for deduplication comparison: {}", field); + } + } + + // Build comparison value + String comparisonValue = comparisonValueBuilder.toString(); + + // Construct query + if (StringUtils.isNotBlank(comparisonValue)) { + if (configurationService.getBooleanProperty("duplicate.comparison.normalise.lowercase")) { + comparisonValue = comparisonValue.toLowerCase(context.getCurrentLocale()); } - if (configurationService.getBooleanProperty("duplicate.signature.normalise.whitespace")) { - signature = signature.replaceAll("\\s+", ""); + if (configurationService.getBooleanProperty("duplicate.comparison.normalise.whitespace")) { + comparisonValue = comparisonValue.replaceAll("\\s+", ""); } // Get search service SearchService searchService = SearchUtils.getSearchService(); // Escape reserved solr characters - signature = searchService.escapeQueryChars(signature); + comparisonValue = searchService.escapeQueryChars(comparisonValue); - // Construct discovery query based on signature + // Construct discovery query based on comparison value DiscoverQuery discoverQuery = new DiscoverQuery(); - discoverQuery.setQuery("(" + configurationService.getProperty("duplicate.signature.field", - "item_signature") + ":" + signature + "~" + - configurationService.getIntProperty("duplicate.signature.distance", 0) + ")"); + discoverQuery.setQuery("(" + configurationService.getProperty("duplicate.comparison.solr.field", + "deduplication_keyword") + ":" + comparisonValue + "~" + + configurationService.getIntProperty("duplicate.comparison.distance", 0) + ")"); // Add filter queries for the resource type discoverQuery.addFilterQueries("(search.resourcetype:Item OR " + "search.resourcetype:WorkspaceItem OR " + @@ -290,7 +318,7 @@ public DiscoverResult searchDuplicates(Context context, Item item) throws Search // Perform search and populate list with results, update total count integer return searchService.search(context, discoverQuery); } else { - log.warn("empty item signature, ignoring for duplicate search"); + log.warn("empty item comparison value, ignoring for duplicate search"); } // Return null by default diff --git a/dspace-api/src/main/java/org/dspace/content/service/DuplicateDetectionService.java b/dspace-api/src/main/java/org/dspace/content/service/DuplicateDetectionService.java index 92e41bf36d42..3bece023dcc6 100644 --- a/dspace-api/src/main/java/org/dspace/content/service/DuplicateDetectionService.java +++ b/dspace-api/src/main/java/org/dspace/content/service/DuplicateDetectionService.java @@ -64,7 +64,7 @@ Optional validateDuplicateResult(Context context, IndexableO /** * Search discovery for potential duplicates of a given item. The search uses levenshtein distance (configurable) - * and a single-term "signature" constructed out of the item title + * and a single-term "comparison value" constructed out of the item title * * @param context DSpace context * @param item The item to check diff --git a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceIndexItemSignaturePlugin.java b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceIndexComparisonPlugin.java similarity index 66% rename from dspace-api/src/main/java/org/dspace/discovery/SolrServiceIndexItemSignaturePlugin.java rename to dspace-api/src/main/java/org/dspace/discovery/SolrServiceIndexComparisonPlugin.java index ef61ec0e2e5e..c44d0384b960 100644 --- a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceIndexItemSignaturePlugin.java +++ b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceIndexComparisonPlugin.java @@ -20,20 +20,20 @@ import org.springframework.beans.factory.annotation.Autowired; /** - * Indexes item "signatures" used for duplicate detection + * Indexes special normalised values used for comparing items, to be used in e.g. basic duplicate detection * * @author Kim Shepherd */ -public class SolrServiceIndexItemSignaturePlugin implements SolrServiceIndexPlugin { +public class SolrServiceIndexComparisonPlugin implements SolrServiceIndexPlugin { @Autowired ConfigurationService configurationService; private static final Logger log = org.apache.logging.log4j.LogManager - .getLogger(SolrServiceIndexItemSignaturePlugin.class); + .getLogger(SolrServiceIndexComparisonPlugin.class); /** - * Index the normalised name of the item to a _signature field + * Index the normalised name of the item to a solr field * * @param context DSpace context * @param idxObj the indexable item @@ -47,13 +47,13 @@ public void additionalIndex(Context context, IndexableObject idxObj, SolrInputDo } // Otherwise, continue with item indexing. Handle items, workflow items, and workspace items if (idxObj instanceof IndexableItem) { - indexItemSignature(context, ((IndexableItem) idxObj).getIndexedObject(), document); + indexItemComparisonValue(context, ((IndexableItem) idxObj).getIndexedObject(), document); } else if (idxObj instanceof IndexableWorkspaceItem) { WorkspaceItem workspaceItem = ((IndexableWorkspaceItem) idxObj).getIndexedObject(); if (workspaceItem != null) { Item item = workspaceItem.getItem(); if (item != null) { - indexItemSignature(context, item, document); + indexItemComparisonValue(context, item, document); } } } else if (idxObj instanceof IndexableWorkflowItem) { @@ -61,32 +61,33 @@ public void additionalIndex(Context context, IndexableObject idxObj, SolrInputDo if (workflowItem != null) { Item item = workflowItem.getItem(); if (item != null) { - indexItemSignature(context, item, document); + indexItemComparisonValue(context, item, document); } } } } /** - * Add the actual signature field to the given solr doc + * Add the actual comparison value field to the given solr doc * * @param context DSpace context * @param item DSpace item * @param document Solr document */ - private void indexItemSignature(Context context, Item item, SolrInputDocument document) { + private void indexItemComparisonValue(Context context, Item item, SolrInputDocument document) { if (item != null) { - // Construct signature object - String signature = item.getName(); - if (signature != null) { - if (configurationService.getBooleanProperty("duplicate.signature.normalise.lowercase")) { - signature = signature.toLowerCase(context.getCurrentLocale()); + // Construct comparisonValue object + String comparisonValue = item.getName(); + if (comparisonValue != null) { + if (configurationService.getBooleanProperty("duplicate.comparison.normalise.lowercase")) { + comparisonValue = comparisonValue.toLowerCase(context.getCurrentLocale()); } - if (configurationService.getBooleanProperty("duplicate.signature.normalise.whitespace")) { - signature = signature.replaceAll("\\s+", ""); + if (configurationService.getBooleanProperty("duplicate.comparison.normalise.whitespace")) { + comparisonValue = comparisonValue.replaceAll("\\s+", ""); } // Add the field to the document - document.addField("item_signature", signature); + document.addField(configurationService.getProperty("duplicate.comparison.solr.field", + "deduplication_keyword"), comparisonValue); } } } diff --git a/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java b/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java index 375697534046..30f4dedd9ebd 100644 --- a/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java +++ b/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java @@ -59,12 +59,13 @@ public class DuplicateDetectionTest extends AbstractIntegrationTestWithDatabase @Before public void setUp() throws Exception { super.setUp(); - // Temporarily enable duplicate detection and set signature distance to 1 + // Temporarily enable duplicate detection and set comparison distance to 1 configurationService.setProperty("duplicate.enable", true); - configurationService.setProperty("duplicate.signature.distance", 1); - configurationService.setProperty("duplicate.signature.normalise.lowercase", true); - configurationService.setProperty("duplicate.signature.normalise.whitespace", true); - configurationService.setProperty("duplicate.signature.field", "item_signature"); + configurationService.setProperty("duplicate.comparison.distance", 1); + configurationService.setProperty("duplicate.comparison.normalise.lowercase", true); + configurationService.setProperty("duplicate.comparison.normalise.whitespace", true); + configurationService.setProperty("duplicate.comparison.solr.field", "deduplication_keyword"); + configurationService.setProperty("duplicate.comparison.metadata.field", new String[]{"dc.title"}); configurationService.setProperty("duplicate.preview.metadata.field", new String[]{"dc.date.issued", "dc.subject"}); @@ -296,7 +297,7 @@ public void testSearchDuplicatesWithVeryLongTitle() throws Exception { public void testSearchDuplicatesExactMatch() throws Exception { // Set distance to 0 manually - configurationService.setProperty("duplicate.signature.distance", 0); + configurationService.setProperty("duplicate.comparison.distance", 0); Item item8 = ItemBuilder.createItem(context, col) .withTitle("This integration test will prove that the edit distance of 0 results in an exact match") diff --git a/dspace/config/modules/duplicate-detection.cfg b/dspace/config/modules/duplicate-detection.cfg index b2c26b7a4728..3eb6c3ab8857 100644 --- a/dspace/config/modules/duplicate-detection.cfg +++ b/dspace/config/modules/duplicate-detection.cfg @@ -3,25 +3,28 @@ ## # Enable this feature. Default: false -#duplicate.enable = true +duplicate.enable = true ## # Normalisation rules. If these are changed, a full index-discovery re-index should be performed to force -# stored signatures to be updated. +# stored comparison values to be updated. ## -# Should the signature be normalised for case? Default: true -duplicate.signature.normalise.lowercase = true -# Should the signature be normalised for whitespace? Default: true +# Should the comparison query/index value be normalised for case? Default: true +duplicate.comparison.normalise.lowercase = true +# Should the comparison query/index value be normalised for whitespace? Default: true # (highly recommended - if this is *not* used, some other placeholder needs to be used to force the value # to be treated as a single term by Lucene) -duplicate.signature.normalise.whitespace = true +duplicate.comparison.normalise.whitespace = true # Levenshtein edit distance. Default:0 (eg. Test will match Txst but not Txxt) # Valid distances are 0, 1, 2 as per Solr documentation. Note that this distance is applied *after* normalisation # rules above, so capitalisation and whitespace will not count as 'edits' if you have the above rules enabled. -duplicate.signature.distance = 0 -# Solr field used for storing the item signature -duplicate.signature.field = item_signature +duplicate.comparison.distance = 0 +# DSpace metadata field(s) to use. They will be concatenated before normalisation. +# Repeat the configuration property for multiple fields. +duplicate.comparison.metadata.field = dc.title +# Solr field used for storing the indexed comparison string +duplicate.comparison.solr.field = deduplication_keyword ## Metadata to populate in the potential duplicate duplicate.preview.metadata.field = dc.title diff --git a/dspace/config/spring/api/discovery.xml b/dspace/config/spring/api/discovery.xml index ce9b4a9d370f..08e015355834 100644 --- a/dspace/config/spring/api/discovery.xml +++ b/dspace/config/spring/api/discovery.xml @@ -44,8 +44,8 @@ - - + + From 0a3713de0ef2b6636f0ef408aff7381aeafa15d9 Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Mon, 26 Feb 2024 11:36:58 +1300 Subject: [PATCH 23/33] [TLC-674] Consolidate shared 'build comparison value' code, ensure multivalues pass --- .../DuplicateDetectionServiceImpl.java | 93 +++++++++++++------ .../service/DuplicateDetectionService.java | 9 ++ .../SolrServiceIndexComparisonPlugin.java | 19 ++-- .../content/DuplicateDetectionTest.java | 54 +++++++++++ .../app/rest/DuplicateDetectionRestIT.java | 13 +-- dspace/config/modules/duplicate-detection.cfg | 4 +- 6 files changed, 146 insertions(+), 46 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java index ecd0f7d8499c..e2db2d0b72d9 100644 --- a/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java @@ -7,6 +7,9 @@ */ package org.dspace.content; +import static java.util.Comparator.comparing; +import static java.util.Comparator.naturalOrder; + import java.sql.SQLException; import java.text.ParseException; import java.util.ArrayList; @@ -264,39 +267,11 @@ public DiscoverResult searchDuplicates(Context context, Item item) throws Search throw new IllegalArgumentException("Cannot get duplicates for template item"); } - // Get configured fields to use for comparison values - String[] comparisonFields = configurationService.getArrayProperty("duplication.comparison.metadata.field", - new String[]{"dc.title"}); - // Get all values, in order, for these fields - StringBuilder comparisonValueBuilder = new StringBuilder(); - for (String field : comparisonFields) { - try { - String[] fieldParts = MetadataUtilities.parseCompoundForm(field); - for (MetadataValue metadataValue : itemService.getMetadata(item, - fieldParts[0], fieldParts[1], (fieldParts.length > 2 ? fieldParts[3] : null), Item.ANY)) { - // Add each found value to the string builder (null values interpreted as empty) - if (metadataValue != null) { - comparisonValueBuilder.append(metadataValue.getValue()); - } - } - } catch (ParseException e) { - // Log error and continue processing - log.error("Error parsing configured field for deduplication comparison: {}", field); - } - } - - // Build comparison value - String comparisonValue = comparisonValueBuilder.toString(); + // Build normalised comparison value + String comparisonValue = buildComparisonValue(context, item); // Construct query if (StringUtils.isNotBlank(comparisonValue)) { - if (configurationService.getBooleanProperty("duplicate.comparison.normalise.lowercase")) { - comparisonValue = comparisonValue.toLowerCase(context.getCurrentLocale()); - } - if (configurationService.getBooleanProperty("duplicate.comparison.normalise.whitespace")) { - comparisonValue = comparisonValue.replaceAll("\\s+", ""); - } - // Get search service SearchService searchService = SearchUtils.getSearchService(); @@ -325,4 +300,62 @@ public DiscoverResult searchDuplicates(Context context, Item item) throws Search return null; } + + /** + * Build a comparison value string made up of values of configured fields, used when indexing and querying + * items for deduplication + * @param context DSpace context + * @param item The DSpace item + * @return a constructed, normalised string + */ + @Override + public String buildComparisonValue(Context context, Item item) { + // Get configured fields to use for comparison values + String[] comparisonFields = configurationService.getArrayProperty("duplicate.comparison.metadata.field", + new String[]{"dc.title"}); + // Get all values, in order, for these fields + StringBuilder comparisonValueBuilder = new StringBuilder(); + String comparisonValue = null; + for (String field : comparisonFields) { + try { + // Get field components + String[] fieldParts = MetadataUtilities.parseCompoundForm(field); + // Get all values of this field + List metadataValues = itemService.getMetadata(item, + fieldParts[0], fieldParts[1], (fieldParts.length > 2 ? fieldParts[2] : null), Item.ANY); + // Sort metadata values by text value, so their 'position' in db doesn't matter for dedupe purposes + metadataValues.sort(comparing(MetadataValue::getValue, naturalOrder())); + for (MetadataValue metadataValue : metadataValues) { + // Add each found value to the string builder (null values interpreted as empty) + if (metadataValue != null) { + comparisonValueBuilder.append(metadataValue.getValue()); + } + } + } catch (ParseException e) { + // Log error and continue processing + log.error("Error parsing configured field for deduplication comparison: item={}, field={}", + item.getID(), field); + } catch (NullPointerException e) { + log.error("Null pointer encountered, probably during metadata value sort, when deduping:" + + "item={}, field={}", item.getID(), field); + } + } + + // Build string + comparisonValue = comparisonValueBuilder.toString(); + + // Normalise according to configuration + if (!StringUtils.isBlank(comparisonValue)) { + if (configurationService.getBooleanProperty("duplicate.comparison.normalise.lowercase")) { + comparisonValue = comparisonValue.toLowerCase(context.getCurrentLocale()); + } + if (configurationService.getBooleanProperty("duplicate.comparison.normalise.whitespace")) { + comparisonValue = comparisonValue.replaceAll("\\s+", ""); + } + } + + // Return comparison value + return comparisonValue; + } + } diff --git a/dspace-api/src/main/java/org/dspace/content/service/DuplicateDetectionService.java b/dspace-api/src/main/java/org/dspace/content/service/DuplicateDetectionService.java index 3bece023dcc6..1f0d3495b1d6 100644 --- a/dspace-api/src/main/java/org/dspace/content/service/DuplicateDetectionService.java +++ b/dspace-api/src/main/java/org/dspace/content/service/DuplicateDetectionService.java @@ -73,4 +73,13 @@ Optional validateDuplicateResult(Context context, IndexableO * @throws SearchServiceException if an error was encountered during the discovery search itself. */ DiscoverResult searchDuplicates(Context context, Item item) throws SearchServiceException; + + /** + * Build a comparison value string made up of values of configured fields, used when indexing and querying + * items for deduplication + * @param context DSpace context + * @param item The DSpace item + * @return a constructed, normalised string + */ + String buildComparisonValue(Context context, Item item); } diff --git a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceIndexComparisonPlugin.java b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceIndexComparisonPlugin.java index c44d0384b960..001d1c51a4dd 100644 --- a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceIndexComparisonPlugin.java +++ b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceIndexComparisonPlugin.java @@ -9,8 +9,11 @@ import org.apache.logging.log4j.Logger; import org.apache.solr.common.SolrInputDocument; +import org.apache.tika.utils.StringUtils; import org.dspace.content.Item; import org.dspace.content.WorkspaceItem; +import org.dspace.content.service.DuplicateDetectionService; +import org.dspace.content.service.ItemService; import org.dspace.core.Context; import org.dspace.discovery.indexobject.IndexableItem; import org.dspace.discovery.indexobject.IndexableWorkflowItem; @@ -28,6 +31,10 @@ public class SolrServiceIndexComparisonPlugin implements SolrServiceIndexPlugin @Autowired ConfigurationService configurationService; + @Autowired + ItemService itemService; + @Autowired + DuplicateDetectionService duplicateDetectionService; private static final Logger log = org.apache.logging.log4j.LogManager .getLogger(SolrServiceIndexComparisonPlugin.class); @@ -76,15 +83,9 @@ public void additionalIndex(Context context, IndexableObject idxObj, SolrInputDo */ private void indexItemComparisonValue(Context context, Item item, SolrInputDocument document) { if (item != null) { - // Construct comparisonValue object - String comparisonValue = item.getName(); - if (comparisonValue != null) { - if (configurationService.getBooleanProperty("duplicate.comparison.normalise.lowercase")) { - comparisonValue = comparisonValue.toLowerCase(context.getCurrentLocale()); - } - if (configurationService.getBooleanProperty("duplicate.comparison.normalise.whitespace")) { - comparisonValue = comparisonValue.replaceAll("\\s+", ""); - } + // Build normalised comparison value and add to the document + String comparisonValue = duplicateDetectionService.buildComparisonValue(context, item); + if (!StringUtils.isBlank(comparisonValue)) { // Add the field to the document document.addField(configurationService.getProperty("duplicate.comparison.solr.field", "deduplication_keyword"), comparisonValue); diff --git a/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java b/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java index 30f4dedd9ebd..0b6c909f03e8 100644 --- a/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java +++ b/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java @@ -209,6 +209,8 @@ public void testSearchDuplicates() throws Exception { @Test public void testSearchDuplicatesWithReservedSolrCharacters() throws Exception { + + Item item4 = ItemBuilder.createItem(context, col) .withTitle("Testing: An Important Development Step") .withIssueDate(item1IssueDate) @@ -242,6 +244,8 @@ public void testSearchDuplicatesWithReservedSolrCharacters() throws Exception { } + //configurationService.setProperty("duplicate.comparison.metadata.field", new String[]{"dc.title"}); + /** * Test that a search for a very long title which also contains reserved characters * @@ -371,6 +375,56 @@ public void testSearchDuplicatesInWorkflow() throws Exception { workflowItem2.getItem().getID(), potentialDuplicates.get(0).getUuid()); } + /** + * Test that a search for getPotentialDuplicates with multiple fields configured as comparison value + * gives the expected results + * + * @throws Exception + */ + @Test + public void testSearchDuplicatesWithMultipleFields() throws Exception { + // Set configure to use both title and author fields + configurationService.setProperty("duplicate.comparison.metadata.field", + new String[]{"dc.title", "dc.contributor.author"}); + + Item item10 = ItemBuilder.createItem(context, col) + .withTitle("Compare both title and author") + .withIssueDate(item1IssueDate) + .withAuthor("Surname, F.") + .withSubject(item1Subject) + .build(); + Item item11 = ItemBuilder.createItem(context, col) + .withTitle("Compare both title and author") + .withIssueDate("2012-10-17") + .withAuthor("Surname, F.") + .withSubject("ExtraEntry 2") + .build(); + + Item item12 = ItemBuilder.createItem(context, col) + .withTitle("Compare both title and author") + .withIssueDate("2012-10-17") + .withAuthor("Lastname, First.") + .withSubject("ExtraEntry 2") + .build(); + + // Get potential duplicates of item 10 and make sure no exceptions are thrown + List potentialDuplicates = new ArrayList<>(); + try { + potentialDuplicates = duplicateDetectionService.getPotentialDuplicates(context, item10); + } catch (SearchServiceException e) { + fail("Duplicate search with title and author (" + + e.getMessage() + ")"); + } + + // Make sure result list is size 1 + int size = 1; + assertEquals("Potential duplicates of item10 (title + author) should have size " + size, + size, potentialDuplicates.size()); + + // The only member should be item 11 since item 12 has a different author (but hte same title + assertEquals("Item 11 should be be the detected duplicate", + item11.getID(), potentialDuplicates.get(0).getUuid()); + } } diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java index f5a8477f66b8..79d0baddd44b 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java @@ -97,12 +97,13 @@ public class DuplicateDetectionRestIT extends AbstractControllerIntegrationTest public void setUp() throws Exception { super.setUp(); - // Temporarily enable duplicate detection and set signature distance to 1 + // Temporarily enable duplicate detection and set comparison value distance to 1 configurationService.setProperty("duplicate.enable", true); - configurationService.setProperty("duplicate.signature.distance", 1); - configurationService.setProperty("duplicate.signature.normalise.lowercase", true); - configurationService.setProperty("duplicate.signature.normalise.whitespace", true); - configurationService.setProperty("duplicate.signature.field", "item_signature"); + configurationService.setProperty("duplicate.comparison.distance", 1); + configurationService.setProperty("duplicate.comparison.normalise.lowercase", true); + configurationService.setProperty("duplicate.comparison.normalise.whitespace", true); + configurationService.setProperty("duplicate.comparison.solr.field", "deduplication_keyword"); + configurationService.setProperty("duplicate.comparison.metadata.field", new String[]{"dc.title"}); configurationService.setProperty("duplicate.preview.metadata.field", new String[]{"dc.date.issued", "dc.subject"}); @@ -196,7 +197,7 @@ public void searchDuplicatesBySearchMethodTest() throws Exception { } /** - * Duplicates should be accessible via section data. Data should update as item signature (title) is changed. + * Duplicates should be accessible via section data. Data should update as comparison value (title) is changed. * * @throws Exception */ diff --git a/dspace/config/modules/duplicate-detection.cfg b/dspace/config/modules/duplicate-detection.cfg index 3eb6c3ab8857..665790384e07 100644 --- a/dspace/config/modules/duplicate-detection.cfg +++ b/dspace/config/modules/duplicate-detection.cfg @@ -3,7 +3,7 @@ ## # Enable this feature. Default: false -duplicate.enable = true +#duplicate.enable = true ## # Normalisation rules. If these are changed, a full index-discovery re-index should be performed to force @@ -23,6 +23,8 @@ duplicate.comparison.distance = 0 # DSpace metadata field(s) to use. They will be concatenated before normalisation. # Repeat the configuration property for multiple fields. duplicate.comparison.metadata.field = dc.title +#duplicate.comparison.metadata.field = dc.contributor.author + # Solr field used for storing the indexed comparison string duplicate.comparison.solr.field = deduplication_keyword From bdf608a0f0efabfb021318be0c1108017cbfbac5 Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Mon, 26 Feb 2024 12:01:26 +1300 Subject: [PATCH 24/33] [TLC-674] Refactor REST controller for Basic Duplicate Detection --- .../DuplicateDetectionRestController.java | 134 ++++++++++++++++++ .../rest/model/PotentialDuplicateRest.java | 2 + .../org/dspace/app/rest/model/RestModel.java | 1 + .../app/rest/DuplicateDetectionRestIT.java | 6 +- 4 files changed, 140 insertions(+), 3 deletions(-) create mode 100644 dspace-server-webapp/src/main/java/org/dspace/app/rest/DuplicateDetectionRestController.java diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/DuplicateDetectionRestController.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/DuplicateDetectionRestController.java new file mode 100644 index 000000000000..f21773b90647 --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/DuplicateDetectionRestController.java @@ -0,0 +1,134 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest; + +import java.sql.SQLException; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; +import java.util.UUID; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.dspace.app.rest.converter.ConverterService; +import org.dspace.app.rest.model.PotentialDuplicateRest; +import org.dspace.app.rest.model.hateoas.PotentialDuplicateResource; +import org.dspace.app.rest.utils.ContextUtil; +import org.dspace.app.rest.utils.Utils; +import org.dspace.content.Item; +import org.dspace.content.service.DuplicateDetectionService; +import org.dspace.content.service.ItemService; +import org.dspace.content.virtual.PotentialDuplicate; +import org.dspace.core.Context; +import org.dspace.discovery.SearchServiceException; +import org.springframework.beans.factory.InitializingBean; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.data.domain.Page; +import org.springframework.data.domain.Pageable; +import org.springframework.data.rest.webmvc.ResourceNotFoundException; +import org.springframework.data.web.PagedResourcesAssembler; +import org.springframework.hateoas.Link; +import org.springframework.hateoas.PagedModel; +import org.springframework.security.access.prepost.PreAuthorize; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestMethod; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; + +/** + * The controller for the api/duplicates endpoint, which handles requests for finding + * potential duplicates + * + * @author Kim Shepherd + */ +@RestController +@RequestMapping("/api/" + PotentialDuplicateRest.CATEGORY) +public class DuplicateDetectionRestController implements InitializingBean { + + private static final Logger log = LogManager.getLogger(DuplicateDetectionRestController.class); + + @Autowired + protected Utils utils; + @Autowired + private DiscoverableEndpointsService discoverableEndpointsService; + @Autowired + private ItemService itemService; + @Autowired + private DuplicateDetectionService duplicateDetectionService; + @Autowired + private ConverterService converter; + + @Override + public void afterPropertiesSet() throws Exception { + discoverableEndpointsService + .register(this, Arrays.asList(Link.of("/api/" + PotentialDuplicateRest.CATEGORY, + PotentialDuplicateRest.CATEGORY))); + } + + /** + * Return a paged list of potential duplicate matches for the given item ID. This may be an item wrapped in + * an in-progress item wrapper like workspace or workflow, as long as the current user has READ access to this item. + * Results from the service search method will only contain matches that lead to items which are readable by + * the current user. + * + * @param uuid The item UUID to search + * @param page Pagination options + * @param assembler The paged resources assembler to construct the paged model + * @return Paged list of potential duplicates + * @throws Exception + */ + @RequestMapping(method = RequestMethod.GET, value = "/search") + @PreAuthorize("hasPermission(#uuid, 'ITEM', 'READ')") + public PagedModel searchPotentialDuplicates( + @RequestParam(name = "uuid", required = true) UUID uuid, Pageable page, PagedResourcesAssembler assembler) { + + // Instantiate object to represent this item + Item item; + // Instantiate list of potential duplicates which we will convert and return as paged ItemRest list + List potentialDuplicates = new LinkedList<>(); + // Instantiate total count + int total = 0; + // Obtain context + Context context = ContextUtil.obtainCurrentRequestContext(); + // Get pagination + Pageable pageable = utils.getPageable(page); + + // Try to get item based on UUID parameter + try { + item = itemService.find(context, uuid); + } catch (SQLException e) { + throw new ResourceNotFoundException(e.getMessage()); + } + + // If the item is null or otherwise invalid (template, etc) then throw an appropriate error + if (item == null) { + throw new ResourceNotFoundException("No such item: " + uuid); + } + if (item.getTemplateItemOf() != null) { + throw new IllegalArgumentException("Cannot get duplicates for template item"); + } + + try { + // Search for the list of potential duplicates + potentialDuplicates = duplicateDetectionService.getPotentialDuplicates(context, item); + } catch (SearchServiceException e) { + // If the search fails, log an error and return an empty list rather than throwing a fatal error + log.error("Search service error retrieving duplicates: {}", e.getMessage()); + } + + // Construct rest and resource pages + Page restPage = converter.toRestPage(potentialDuplicates, pageable, total, + utils.obtainProjection()); + Page resourcePage = restPage.map(potentialDuplicateRest -> + new PotentialDuplicateResource(potentialDuplicateRest)); + + // Return the list of items along with pagination info and max results, assembled as PagedModel + return assembler.toModel(resourcePage); + + } +} diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/PotentialDuplicateRest.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/PotentialDuplicateRest.java index 1b356ffa01f9..6ae002771526 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/PotentialDuplicateRest.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/PotentialDuplicateRest.java @@ -17,6 +17,8 @@ */ public class PotentialDuplicateRest extends RestAddressableModel { + public static final String CATEGORY = RestModel.DUPLICATES; + /** * Type of REST resource */ diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/RestModel.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/RestModel.java index b575ddb59815..72aae3b25a7e 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/RestModel.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/RestModel.java @@ -24,6 +24,7 @@ public interface RestModel extends Serializable { public static final String CORE = "core"; public static final String EPERSON = "eperson"; public static final String DISCOVER = "discover"; + public static final String DUPLICATES = "duplicates"; public static final String CONFIGURATION = "config"; public static final String INTEGRATION = "integration"; public static final String STATISTICS = "statistics"; diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java index 79d0baddd44b..5f480e50ed9a 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java @@ -174,7 +174,7 @@ public void searchDuplicatesBySearchMethodTest() throws Exception { context.restoreAuthSystemState(); - getClient(token).perform(get("/api/core/items/search/findDuplicates?uuid=" + item1.getID())) + getClient(token).perform(get("/api/duplicates/search?uuid=" + item1.getID())) .andExpect(status().isOk()) .andExpect(content().contentType(contentType)) // Valid duplicates array @@ -391,7 +391,7 @@ public void submissionSectionWorkflowItemVisibilityTest() throws Exception { String reviewerToken = getAuthToken(admin.getEmail(), password); // The reviewer should be able to see the workflow item as a potential duplicate of the test item - getClient(reviewerToken).perform(get("/api/core/items/search/findDuplicates?uuid=" + getClient(reviewerToken).perform(get("/api/duplicates/search?uuid=" + workflowItem1.getItem().getID())) .andExpect(status().isOk()) .andExpect(content().contentType(contentType)) @@ -403,7 +403,7 @@ public void submissionSectionWorkflowItemVisibilityTest() throws Exception { // Another random user will NOT see this getClient(getAuthToken(anotherEPerson.getEmail(), password)) - .perform(get("/api/core/items/search/findDuplicates?uuid=" + .perform(get("/api/duplicates/search?uuid=" + workflowItem1.getItem().getID())) .andExpect(status().isOk()) .andExpect(content().contentType(contentType)) From 15012790c4a97b9669531893ab15380114088270 Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Wed, 28 Feb 2024 10:33:18 +1300 Subject: [PATCH 25/33] [TLC-674] Duplicate fixes per review feedback --- .../DuplicateDetectionServiceImpl.java | 9 ++-- .../DuplicateDetectionRestController.java | 2 + .../rest/repository/ItemRestRepository.java | 54 ------------------- .../app/rest/DuplicateDetectionRestIT.java | 6 +-- dspace/config/item-submission.xml | 3 +- 5 files changed, 10 insertions(+), 64 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java index e2db2d0b72d9..9f52b7b63ac3 100644 --- a/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java @@ -173,13 +173,13 @@ public Optional validateDuplicateResult(Context context, Ind // Result item must not be null, a template item, or actually identical to the original if (resultItem == null) { - log.error("skipping null item in duplicate search results"); + log.warn("skipping null item in duplicate search results"); return Optional.empty(); } else if (resultItem.getTemplateItemOf() != null) { - log.error("skipping template item in duplicate search results"); + log.info("skipping template item in duplicate search results, item={}", resultItem.getID()); return Optional.empty(); } else if (resultItem.getID().equals(original.getID())) { - log.warn("skipping a duplicate search result for the original item"); + log.info("skipping a duplicate search result for the original item", resultItem.getID()); return Optional.empty(); } @@ -239,7 +239,8 @@ public Optional validateDuplicateResult(Context context, Ind // Admins can always read, return immediately return Optional.of(potentialDuplicate); } else { - log.error("No valid permission to return this item"); + log.info("Potential duplicate result is not readable by the current user, skipping item={}", + potentialDuplicate.getUuid()); } // By default, return an empty result diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/DuplicateDetectionRestController.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/DuplicateDetectionRestController.java index f21773b90647..beab98ca3be5 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/DuplicateDetectionRestController.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/DuplicateDetectionRestController.java @@ -28,6 +28,7 @@ import org.dspace.discovery.SearchServiceException; import org.springframework.beans.factory.InitializingBean; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.data.domain.Page; import org.springframework.data.domain.Pageable; import org.springframework.data.rest.webmvc.ResourceNotFoundException; @@ -47,6 +48,7 @@ * @author Kim Shepherd */ @RestController +@ConditionalOnProperty("duplicate.enable") @RequestMapping("/api/" + PotentialDuplicateRest.CATEGORY) public class DuplicateDetectionRestController implements InitializingBean { diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemRestRepository.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemRestRepository.java index b5926d1defe3..6ca4ae0d75a9 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemRestRepository.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemRestRepository.java @@ -11,7 +11,6 @@ import java.sql.SQLException; import java.util.ArrayList; import java.util.Iterator; -import java.util.LinkedList; import java.util.List; import java.util.Objects; import java.util.UUID; @@ -23,15 +22,12 @@ import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.dspace.app.rest.Parameter; -import org.dspace.app.rest.SearchRestMethod; import org.dspace.app.rest.converter.MetadataConverter; import org.dspace.app.rest.exception.DSpaceBadRequestException; import org.dspace.app.rest.exception.RepositoryMethodNotImplementedException; import org.dspace.app.rest.exception.UnprocessableEntityException; import org.dspace.app.rest.model.BundleRest; import org.dspace.app.rest.model.ItemRest; -import org.dspace.app.rest.model.PotentialDuplicateRest; import org.dspace.app.rest.model.patch.Patch; import org.dspace.app.rest.repository.handler.service.UriListHandlerService; import org.dspace.authorize.AuthorizeException; @@ -49,9 +45,7 @@ import org.dspace.content.service.RelationshipService; import org.dspace.content.service.RelationshipTypeService; import org.dspace.content.service.WorkspaceItemService; -import org.dspace.content.virtual.PotentialDuplicate; import org.dspace.core.Context; -import org.dspace.discovery.SearchServiceException; import org.dspace.util.UUIDUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.data.domain.Page; @@ -376,52 +370,4 @@ protected ItemRest createAndReturn(Context context, List stringList) return converter.toRest(item, utils.obtainProjection()); } - /** - * Search method to find and return a paged list of duplicates, if the current user has authorization - * - * @param itemUuid the UUID of the item for which we are searching duplicates - * @param optionalPageable pagination options - * @return pagination potential duplicate results - */ - @PreAuthorize("hasPermission(#itemUuid, 'ITEM', 'READ')") - @SearchRestMethod(name = "findDuplicates") - public Page findDuplicates(@Parameter(value = "uuid", required = true) UUID itemUuid, - Pageable optionalPageable) { - // Instantiate object to represent this item - Item item; - // Instantiate list of potential duplicates which we will convert and return as paged ItemRest list - List potentialDuplicates = new LinkedList<>(); - // Instantiate total count - int total = 0; - // Obtain context - Context context = obtainContext(); - // Get pagination - Pageable pageable = utils.getPageable(optionalPageable); - - // Try to get item based on UUID parameter - try { - item = itemService.find(context, itemUuid); - } catch (SQLException e) { - throw new ResourceNotFoundException(e.getMessage()); - } - - // If the item is null or otherwise invalid (template, etc) then throw an appropriate error - if (item == null) { - throw new ResourceNotFoundException("No such item: " + itemUuid); - } - if (item.getTemplateItemOf() != null) { - throw new IllegalArgumentException("Cannot get duplicates for template item"); - } - - try { - // Search for the list of potential duplicates - potentialDuplicates = duplicateDetectionService.getPotentialDuplicates(context, item); - } catch (SearchServiceException e) { - // If the search fails, log an error and return an empty list rather than throwing a fatal error - log.error("Search service error retrieving duplicates: {}", e.getMessage()); - } - - // Return the list of items along with pagination info and max results - return converter.toRestPage(potentialDuplicates, pageable, total, utils.obtainProjection()); - } } diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java index 5f480e50ed9a..245e286b30a6 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java @@ -19,8 +19,6 @@ import java.util.Map; import javax.ws.rs.core.MediaType; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import org.dspace.app.rest.model.patch.Operation; import org.dspace.app.rest.model.patch.ReplaceOperation; import org.dspace.app.rest.test.AbstractControllerIntegrationTest; @@ -91,8 +89,6 @@ public class DuplicateDetectionRestIT extends AbstractControllerIntegrationTest private final String item2IssueDate = "2012-10-17"; private EPerson anotherEPerson; - private static Logger log = LogManager.getLogger(); - @Override public void setUp() throws Exception { super.setUp(); @@ -166,7 +162,7 @@ public void searchDuplicatesBySearchMethodTest() throws Exception { .withSubject("ExtraEntry 3") .withSubmitter(eperson) .build(); - log.error("EPERSON FULL NAME IS " + eperson.getFullName()); + XmlWorkflowItem wfi1 = workflowService.start(context, workspaceItem1); XmlWorkflowItem wfi2 = workflowService.start(context, workspaceItem2); Item item1 = wfi1.getItem(); diff --git a/dspace/config/item-submission.xml b/dspace/config/item-submission.xml index 6147c61cbafd..86ce154d6e30 100644 --- a/dspace/config/item-submission.xml +++ b/dspace/config/item-submission.xml @@ -284,7 +284,7 @@ - + @@ -294,6 +294,7 @@ + From ac590d73d918cced202c1a651c28b287eb45246b Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Wed, 28 Feb 2024 10:48:45 +1300 Subject: [PATCH 26/33] [TLC-674] Duplicate IT fixes per review feedback --- .../app/rest/DuplicateDetectionRestIT.java | 37 +++++-------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java index 245e286b30a6..69de90339014 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java @@ -25,6 +25,7 @@ import org.dspace.authorize.service.AuthorizeService; import org.dspace.builder.CollectionBuilder; import org.dspace.builder.CommunityBuilder; +import org.dspace.builder.EPersonBuilder; import org.dspace.builder.ItemBuilder; import org.dspace.builder.WorkflowItemBuilder; import org.dspace.builder.WorkspaceItemBuilder; @@ -115,19 +116,15 @@ public void setUp() throws Exception { .build(); eperson.setFirstName(context, "first"); eperson.setLastName(context, "last"); - anotherEPerson = ePersonService.findByEmail(context, "test-another-user@email.com"); - if (anotherEPerson == null) { - anotherEPerson = ePersonService.create(context); - anotherEPerson.setFirstName(context, "first"); - anotherEPerson.setLastName(context, "last"); - anotherEPerson.setEmail("test-another-user@email.com"); - anotherEPerson.setCanLogIn(true); - anotherEPerson.setLanguage(context, I18nUtil.getDefaultLocale().getLanguage()); - ePersonService.setPassword(anotherEPerson, password); - // actually save the eperson to unit testing DB - ePersonService.update(context, anotherEPerson); - } - //context.setDispatcher("noindex"); + + anotherEPerson = EPersonBuilder.createEPerson(context) + .withEmail("test-another-user@email.com") + .withNameInMetadata("first", "last") + .withCanLogin(true) + .withLanguage(I18nUtil.getDefaultLocale().getLanguage()) + .withPassword(password) + .build(); + context.restoreAuthSystemState(); } @@ -407,18 +404,4 @@ public void submissionSectionWorkflowItemVisibilityTest() throws Exception { .andExpect(jsonPath("$._embedded.potentialDuplicateResources").doesNotExist()); } - @Override - public void destroy() throws Exception { - if (anotherEPerson != null) { - try { - context.turnOffAuthorisationSystem(); - ePersonService.delete(context, anotherEPerson); - context.restoreAuthSystemState(); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - super.destroy(); - } - } From 666581b17b0da6c59c8a42c59d15e8e206cc97c8 Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Wed, 28 Feb 2024 11:06:44 +1300 Subject: [PATCH 27/33] [TLC-674] Duplicate IT fixes per review feedback --- .../dspace/app/rest/DuplicateDetectionRestController.java | 7 ++++--- .../org/dspace/app/rest/model/PotentialDuplicateRest.java | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/DuplicateDetectionRestController.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/DuplicateDetectionRestController.java index beab98ca3be5..ec5d238dd475 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/DuplicateDetectionRestController.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/DuplicateDetectionRestController.java @@ -49,7 +49,7 @@ */ @RestController @ConditionalOnProperty("duplicate.enable") -@RequestMapping("/api/" + PotentialDuplicateRest.CATEGORY) +@RequestMapping("/api/" + PotentialDuplicateRest.CATEGORY + "/" + PotentialDuplicateRest.NAME) public class DuplicateDetectionRestController implements InitializingBean { private static final Logger log = LogManager.getLogger(DuplicateDetectionRestController.class); @@ -68,8 +68,9 @@ public class DuplicateDetectionRestController implements InitializingBean { @Override public void afterPropertiesSet() throws Exception { discoverableEndpointsService - .register(this, Arrays.asList(Link.of("/api/" + PotentialDuplicateRest.CATEGORY, - PotentialDuplicateRest.CATEGORY))); + .register(this, Arrays.asList(Link.of( + "/api/" + PotentialDuplicateRest.CATEGORY + "/" + PotentialDuplicateRest.NAME, + PotentialDuplicateRest.NAME))); } /** diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/PotentialDuplicateRest.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/PotentialDuplicateRest.java index 6ae002771526..9d706a8d5791 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/PotentialDuplicateRest.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/PotentialDuplicateRest.java @@ -17,7 +17,8 @@ */ public class PotentialDuplicateRest extends RestAddressableModel { - public static final String CATEGORY = RestModel.DUPLICATES; + public static final String CATEGORY = RestModel.SUBMISSION; + public static final String NAME = RestModel.DUPLICATES; /** * Type of REST resource From c28b9e8c9975a9444b3b895296492fc32020de89 Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Wed, 28 Feb 2024 14:11:26 +1300 Subject: [PATCH 28/33] [TLC-674] Include duplicate.enable in REST config --- dspace/config/modules/rest.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/dspace/config/modules/rest.cfg b/dspace/config/modules/rest.cfg index ef4f985f0d78..3bb620510e59 100644 --- a/dspace/config/modules/rest.cfg +++ b/dspace/config/modules/rest.cfg @@ -58,3 +58,4 @@ rest.properties.exposed = ldn.enabled rest.properties.exposed = ldn.notify.inbox rest.properties.exposed = handle.canonical.prefix rest.properties.exposed = contentreport.enable +rest.properties.exposed = duplicate.enable From 54d617da91d67352fcedffbf1a3abeba367fa778 Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Wed, 28 Feb 2024 14:22:20 +1300 Subject: [PATCH 29/33] [TLC-674] Disable duplicates section by default --- dspace/config/item-submission.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dspace/config/item-submission.xml b/dspace/config/item-submission.xml index 86ce154d6e30..dc3599016a01 100644 --- a/dspace/config/item-submission.xml +++ b/dspace/config/item-submission.xml @@ -294,7 +294,7 @@ - + From ecb0ce60e085084588893cc3ede64bf7f62f4c5f Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Wed, 28 Feb 2024 15:13:20 +1300 Subject: [PATCH 30/33] [TLC-674] Update DuplicateDetectionRestIT for new API path --- .../java/org/dspace/app/rest/DuplicateDetectionRestIT.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java index 69de90339014..adc3237a735d 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java @@ -167,7 +167,7 @@ public void searchDuplicatesBySearchMethodTest() throws Exception { context.restoreAuthSystemState(); - getClient(token).perform(get("/api/duplicates/search?uuid=" + item1.getID())) + getClient(token).perform(get("/api/submission/duplicates/search?uuid=" + item1.getID())) .andExpect(status().isOk()) .andExpect(content().contentType(contentType)) // Valid duplicates array @@ -384,7 +384,7 @@ public void submissionSectionWorkflowItemVisibilityTest() throws Exception { String reviewerToken = getAuthToken(admin.getEmail(), password); // The reviewer should be able to see the workflow item as a potential duplicate of the test item - getClient(reviewerToken).perform(get("/api/duplicates/search?uuid=" + getClient(reviewerToken).perform(get("/api/submission/duplicates/search?uuid=" + workflowItem1.getItem().getID())) .andExpect(status().isOk()) .andExpect(content().contentType(contentType)) @@ -396,7 +396,7 @@ public void submissionSectionWorkflowItemVisibilityTest() throws Exception { // Another random user will NOT see this getClient(getAuthToken(anotherEPerson.getEmail(), password)) - .perform(get("/api/duplicates/search?uuid=" + .perform(get("/api/submission/duplicates/search?uuid=" + workflowItem1.getItem().getID())) .andExpect(status().isOk()) .andExpect(content().contentType(contentType)) From 2168f664029777e202dff22b80414f1cc60c0ffe Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Thu, 29 Feb 2024 10:11:45 +1300 Subject: [PATCH 31/33] [TLC-674] Duplicate detection tidy comments/services --- .../dspace/app/rest/repository/ItemRestRepository.java | 4 ---- dspace/config/item-submission.xml | 10 +++++----- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemRestRepository.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemRestRepository.java index 6ca4ae0d75a9..b0f3a8c17d96 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemRestRepository.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemRestRepository.java @@ -39,7 +39,6 @@ import org.dspace.content.WorkspaceItem; import org.dspace.content.service.BundleService; import org.dspace.content.service.CollectionService; -import org.dspace.content.service.DuplicateDetectionService; import org.dspace.content.service.InstallItemService; import org.dspace.content.service.ItemService; import org.dspace.content.service.RelationshipService; @@ -96,9 +95,6 @@ public class ItemRestRepository extends DSpaceObjectRestRepository - - @@ -294,7 +290,11 @@ - + + + From 456182a5714058c248df7881f2fda3bec1c028cc Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Mon, 4 Mar 2024 11:38:20 +1300 Subject: [PATCH 32/33] [TLC-674] Refactor duplicate detection controller to REST repository --- .../DuplicateRestRepository.java} | 129 ++++++++++++------ 1 file changed, 89 insertions(+), 40 deletions(-) rename dspace-server-webapp/src/main/java/org/dspace/app/rest/{DuplicateDetectionRestController.java => repository/DuplicateRestRepository.java} (51%) diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/DuplicateDetectionRestController.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/DuplicateRestRepository.java similarity index 51% rename from dspace-server-webapp/src/main/java/org/dspace/app/rest/DuplicateDetectionRestController.java rename to dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/DuplicateRestRepository.java index ec5d238dd475..8730211a5eba 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/DuplicateDetectionRestController.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/DuplicateRestRepository.java @@ -5,7 +5,7 @@ * * http://www.dspace.org/license/ */ -package org.dspace.app.rest; +package org.dspace.app.rest.repository; import java.sql.SQLException; import java.util.Arrays; @@ -15,11 +15,12 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.dspace.app.rest.converter.ConverterService; +import org.dspace.app.rest.DiscoverableEndpointsService; +import org.dspace.app.rest.Parameter; +import org.dspace.app.rest.SearchRestMethod; +import org.dspace.app.rest.exception.RepositoryMethodNotImplementedException; import org.dspace.app.rest.model.PotentialDuplicateRest; -import org.dspace.app.rest.model.hateoas.PotentialDuplicateResource; import org.dspace.app.rest.utils.ContextUtil; -import org.dspace.app.rest.utils.Utils; import org.dspace.content.Item; import org.dspace.content.service.DuplicateDetectionService; import org.dspace.content.service.ItemService; @@ -32,45 +33,90 @@ import org.springframework.data.domain.Page; import org.springframework.data.domain.Pageable; import org.springframework.data.rest.webmvc.ResourceNotFoundException; -import org.springframework.data.web.PagedResourcesAssembler; import org.springframework.hateoas.Link; -import org.springframework.hateoas.PagedModel; import org.springframework.security.access.prepost.PreAuthorize; -import org.springframework.web.bind.annotation.RequestMapping; -import org.springframework.web.bind.annotation.RequestMethod; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.bind.annotation.RestController; +import org.springframework.stereotype.Component; /** - * The controller for the api/duplicates endpoint, which handles requests for finding - * potential duplicates + * The REST repository for the api/submission/duplicates endpoint, which handles requests for finding + * potential duplicates of a given item (archived or in-progress). + * + * Find one and find all are not implemented as actual REST methods because a duplicate is the result + * of comparing an item with other indexed items, not an object that can be referenced by some kind of ID, but + * we must at least implement the Java methods here in order to extend DSpaceRestRepository and implement + * SearchRestMethods. * * @author Kim Shepherd */ -@RestController @ConditionalOnProperty("duplicate.enable") -@RequestMapping("/api/" + PotentialDuplicateRest.CATEGORY + "/" + PotentialDuplicateRest.NAME) -public class DuplicateDetectionRestController implements InitializingBean { - - private static final Logger log = LogManager.getLogger(DuplicateDetectionRestController.class); +@Component(PotentialDuplicateRest.CATEGORY + "." + PotentialDuplicateRest.NAME) +public class DuplicateRestRepository extends DSpaceRestRepository + implements InitializingBean { + /** + * Discoverable endpoints service + */ @Autowired - protected Utils utils; - @Autowired - private DiscoverableEndpointsService discoverableEndpointsService; - @Autowired - private ItemService itemService; + DiscoverableEndpointsService discoverableEndpointsService; + + /** + * Duplicate detection service + */ @Autowired - private DuplicateDetectionService duplicateDetectionService; + DuplicateDetectionService duplicateDetectionService; + + /** + * Item service + */ @Autowired - private ConverterService converter; + ItemService itemService; + + /** + * Logger + */ + private final static Logger log = LogManager.getLogger(); + /** + * Register this repository endpoint as /api/submission/duplicates + * @throws Exception + */ @Override public void afterPropertiesSet() throws Exception { discoverableEndpointsService - .register(this, Arrays.asList(Link.of( - "/api/" + PotentialDuplicateRest.CATEGORY + "/" + PotentialDuplicateRest.NAME, - PotentialDuplicateRest.NAME))); + .register(this, Arrays.asList(Link.of( + "/api/" + PotentialDuplicateRest.CATEGORY + "/" + PotentialDuplicateRest.NAME + "/search", + PotentialDuplicateRest.NAME + "-search"))); + } + + /** + * This REST method is NOT IMPLEMENTED - it does not make sense in duplicate detection, in which the only + * real addressable objects involved are Items. + * + * @param context + * the dspace context + * @param name + * the rest object id + * @return not implemented + * @throws RepositoryMethodNotImplementedException + */ + @PreAuthorize("permitAll()") + @Override + public PotentialDuplicateRest findOne(Context context, String name) { + throw new RepositoryMethodNotImplementedException("Duplicate detection endpoint only implements searchBy", ""); + } + + /** + * This REST method is NOT IMPLEMENTED - it does not make sense in duplicate detection, where there can be no "all" + * + * @param context + * the dspace context + * @return not implemented + * @throws RepositoryMethodNotImplementedException + */ + @PreAuthorize("permitAll()") + @Override + public Page findAll(Context context, Pageable pageable) { + throw new RepositoryMethodNotImplementedException("Duplicate detection endpoint only implements searchBy", ""); } /** @@ -80,16 +126,14 @@ public void afterPropertiesSet() throws Exception { * the current user. * * @param uuid The item UUID to search - * @param page Pagination options - * @param assembler The paged resources assembler to construct the paged model + * @param pageable Pagination options * @return Paged list of potential duplicates * @throws Exception */ - @RequestMapping(method = RequestMethod.GET, value = "/search") @PreAuthorize("hasPermission(#uuid, 'ITEM', 'READ')") - public PagedModel searchPotentialDuplicates( - @RequestParam(name = "uuid", required = true) UUID uuid, Pageable page, PagedResourcesAssembler assembler) { - + @SearchRestMethod(name = "findByItem") + public Page findByItem(@Parameter(value = "uuid", required = true) UUID uuid, + Pageable pageable) { // Instantiate object to represent this item Item item; // Instantiate list of potential duplicates which we will convert and return as paged ItemRest list @@ -98,8 +142,6 @@ public PagedModel searchPotentialDuplicates( int total = 0; // Obtain context Context context = ContextUtil.obtainCurrentRequestContext(); - // Get pagination - Pageable pageable = utils.getPageable(page); // Try to get item based on UUID parameter try { @@ -124,14 +166,21 @@ public PagedModel searchPotentialDuplicates( log.error("Search service error retrieving duplicates: {}", e.getMessage()); } - // Construct rest and resource pages + // Construct rest pages and return Page restPage = converter.toRestPage(potentialDuplicates, pageable, total, utils.obtainProjection()); - Page resourcePage = restPage.map(potentialDuplicateRest -> - new PotentialDuplicateResource(potentialDuplicateRest)); - // Return the list of items along with pagination info and max results, assembled as PagedModel - return assembler.toModel(resourcePage); + return restPage; } + + /** + * Return the domain class for potential duplicate objects + * @return PotentialDuplicateRest.class + */ + @Override + public Class getDomainClass() { + return PotentialDuplicateRest.class; + } + } From 9a5427ea7be225fce874ff6da19c1cc253e70a5f Mon Sep 17 00:00:00 2001 From: Kim Shepherd Date: Mon, 4 Mar 2024 13:26:16 +1300 Subject: [PATCH 33/33] [TLC-674] Update duplicate IT, handle feature disable more gracefully Instead of throwing illegal state, simply return an empty data section if the feature is not enabled and the section is requested --- .../dspace/app/rest/submit/step/DuplicateDetectionStep.java | 4 ++-- .../java/org/dspace/app/rest/DuplicateDetectionRestIT.java | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/step/DuplicateDetectionStep.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/step/DuplicateDetectionStep.java index 3dde379d24d3..d7ecefa78289 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/step/DuplicateDetectionStep.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/step/DuplicateDetectionStep.java @@ -61,8 +61,8 @@ public DataDuplicateDetection getData(SubmissionService submissionService, InPro } // Immediately return an empty if this feature is not configured if (!configurationService.getBooleanProperty("duplicate.enable", false)) { - throw new IllegalStateException("Duplicate detection feature is not enabled. " + - "See dspace/config/modules/duplicate-detection.cfg"); + log.debug("Duplicate detection is not enabled, returning empty section"); + return new DataDuplicateDetection(); } // Validate context Context context = getContext(); diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java index adc3237a735d..aa0ebb520185 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java @@ -167,7 +167,7 @@ public void searchDuplicatesBySearchMethodTest() throws Exception { context.restoreAuthSystemState(); - getClient(token).perform(get("/api/submission/duplicates/search?uuid=" + item1.getID())) + getClient(token).perform(get("/api/submission/duplicates/search/findByItem?uuid=" + item1.getID())) .andExpect(status().isOk()) .andExpect(content().contentType(contentType)) // Valid duplicates array @@ -384,7 +384,7 @@ public void submissionSectionWorkflowItemVisibilityTest() throws Exception { String reviewerToken = getAuthToken(admin.getEmail(), password); // The reviewer should be able to see the workflow item as a potential duplicate of the test item - getClient(reviewerToken).perform(get("/api/submission/duplicates/search?uuid=" + getClient(reviewerToken).perform(get("/api/submission/duplicates/search/findByItem?uuid=" + workflowItem1.getItem().getID())) .andExpect(status().isOk()) .andExpect(content().contentType(contentType)) @@ -396,7 +396,7 @@ public void submissionSectionWorkflowItemVisibilityTest() throws Exception { // Another random user will NOT see this getClient(getAuthToken(anotherEPerson.getEmail(), password)) - .perform(get("/api/submission/duplicates/search?uuid=" + .perform(get("/api/submission/duplicates/search/findByItem?uuid=" + workflowItem1.getItem().getID())) .andExpect(status().isOk()) .andExpect(content().contentType(contentType))