diff --git a/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java new file mode 100644 index 000000000000..9f52b7b63ac3 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/content/DuplicateDetectionServiceImpl.java @@ -0,0 +1,362 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.content; + +import static java.util.Comparator.comparing; +import static java.util.Comparator.naturalOrder; + +import java.sql.SQLException; +import java.text.ParseException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; +import java.util.Optional; + +import org.apache.commons.lang3.StringUtils; +import org.apache.velocity.exception.ResourceNotFoundException; +import org.dspace.app.itemupdate.MetadataUtilities; +import org.dspace.authorize.AuthorizeException; +import org.dspace.authorize.service.AuthorizeService; +import org.dspace.content.service.DuplicateDetectionService; +import org.dspace.content.service.ItemService; +import org.dspace.content.service.MetadataFieldService; +import org.dspace.content.service.MetadataValueService; +import org.dspace.content.service.WorkspaceItemService; +import org.dspace.content.virtual.PotentialDuplicate; +import org.dspace.core.Constants; +import org.dspace.core.Context; +import org.dspace.discovery.DiscoverQuery; +import org.dspace.discovery.DiscoverResult; +import org.dspace.discovery.IndexableObject; +import org.dspace.discovery.SearchService; +import org.dspace.discovery.SearchServiceException; +import org.dspace.discovery.SearchUtils; +import org.dspace.discovery.indexobject.IndexableItem; +import org.dspace.discovery.indexobject.IndexableWorkflowItem; +import org.dspace.discovery.indexobject.IndexableWorkspaceItem; +import org.dspace.eperson.service.GroupService; +import org.dspace.services.ConfigurationService; +import org.dspace.versioning.VersionHistory; +import org.dspace.versioning.service.VersionHistoryService; +import org.dspace.workflow.WorkflowItem; +import org.dspace.xmlworkflow.storedcomponents.service.XmlWorkflowItemService; +import org.springframework.beans.factory.annotation.Autowired; + +/** + * Default implementation of DuplicateDetectionService. + * Duplicate Detection Service handles get, search and validation operations for duplicate detection. + * + * @author Kim Shepherd + */ +public class DuplicateDetectionServiceImpl implements DuplicateDetectionService { + + @Autowired + ConfigurationService configurationService; + @Autowired + VersionHistoryService versionHistoryService; + @Autowired + AuthorizeService authorizeService; + @Autowired + GroupService groupService; + @Autowired + MetadataFieldService metadataFieldService; + @Autowired + MetadataValueService metadataValueService; + @Autowired + XmlWorkflowItemService workflowItemService; + @Autowired + WorkspaceItemService workspaceItemService; + @Autowired + ItemService itemService; + + /** + * Get a list of PotentialDuplicate objects (wrappers with some metadata included for previewing) that + * are identified as potential duplicates of the given item + * + * @param context DSpace context + * @param item Item to check + * @return List of potential duplicates (empty if none found) + * @throws SearchServiceException if an error occurs performing the discovery search + */ + @Override + public List getPotentialDuplicates(Context context, Item item) + throws SearchServiceException { + // Instantiate a new list of potential duplicates + List potentialDuplicates = new LinkedList<>(); + + // Immediately return an empty if this feature is not configured + if (!configurationService.getBooleanProperty("duplicate.enable", false)) { + return potentialDuplicates; + } + + // Search duplicates of this item and get discovery search result + DiscoverResult discoverResult = searchDuplicates(context, item); + + // If the search result is valid, iterate results and validate / transform + if (discoverResult != null) { + for (IndexableObject result : discoverResult.getIndexableObjects()) { + if (result != null) { + try { + // Validate this result and check permissions to read the item + Optional potentialDuplicateOptional = + validateDuplicateResult(context, result, item); + if (potentialDuplicateOptional.isPresent()) { + // Add the potential duplicate to the list + potentialDuplicates.add(potentialDuplicateOptional.get()); + } + } catch (SQLException e) { + log.error("SQL Error obtaining duplicate result: " + e.getMessage()); + } catch (AuthorizeException e) { + log.error("Authorize Error obtaining duplicate result: " + e.getMessage()); + } + } + } + } + + // Return the list of potential duplicates + return potentialDuplicates; + } + + + + /** + * Validate an indexable object (returned by discovery search) to ensure it is permissible, readable and valid + * and can be added to a list of results. + * An Optional is returned, if it is empty then it was invalid or did not pass validation. + * + * @param context The DSpace context + * @param indexableObject The discovery search result + * @param original The original item (to compare IDs, submitters, etc) + * @return An Optional potential duplicate + * @throws SQLException + * @throws AuthorizeException + */ + @Override + public Optional validateDuplicateResult(Context context, IndexableObject indexableObject, + Item original) + throws SQLException, + AuthorizeException { + + Item resultItem = null; + PotentialDuplicate potentialDuplicate = null; + WorkspaceItem workspaceItem = null; + WorkflowItem workflowItem = null; + + // Inspect the indexable object, and extract the DSpace item depending on + // what submission / archived state it is in + if (indexableObject instanceof IndexableWorkspaceItem) { + workspaceItem = ((IndexableWorkspaceItem) indexableObject).getIndexedObject(); + // Only process workspace items that belong to the submitter + if (workspaceItem != null && workspaceItem.getSubmitter() != null + && workspaceItem.getSubmitter().equals(context.getCurrentUser())) { + resultItem = workspaceItem.getItem(); + } + } + if (indexableObject instanceof IndexableWorkflowItem) { + workflowItem = ((IndexableWorkflowItem) indexableObject).getIndexedObject(); + if (workflowItem != null) { + resultItem = workflowItem.getItem(); + } + } + if (indexableObject instanceof IndexableItem) { + resultItem = ((IndexableItem) indexableObject).getIndexedObject(); + // Attempt resolution of workflow or workspace items, tested later + workflowItem = workflowItemService.findByItem(context, resultItem); + workspaceItem = workspaceItemService.findByItem(context, resultItem); + } + + // Result item must not be null, a template item, or actually identical to the original + if (resultItem == null) { + log.warn("skipping null item in duplicate search results"); + return Optional.empty(); + } else if (resultItem.getTemplateItemOf() != null) { + log.info("skipping template item in duplicate search results, item={}", resultItem.getID()); + return Optional.empty(); + } else if (resultItem.getID().equals(original.getID())) { + log.info("skipping a duplicate search result for the original item", resultItem.getID()); + return Optional.empty(); + } + + // If our item and the duplicate candidate share the same versionHistory, they are two different + // versions of the same item. + VersionHistory versionHistory = versionHistoryService.findByItem(context, original); + VersionHistory candiateVersionHistory = versionHistoryService.findByItem(context, resultItem); + // if the versionHistory is null, either versioning is switched off or the item doesn't have + // multiple versions + if (versionHistory != null && versionHistory.equals(candiateVersionHistory)) { + log.warn("skipping item that is just another version of this item"); + return Optional.empty(); + } + + // Construct new potential duplicate object + potentialDuplicate = new PotentialDuplicate(resultItem); + + // Get configured list of metadata fields to copy + List fields = new ArrayList<>(Arrays.asList( + configurationService.getArrayProperty("duplicate.preview.metadata.field", new String[]{}))); + + // Get item metadata and if it's configured for mapping, copy it across to the potential duplicate object + List metadata = resultItem.getCachedMetadata(); + + // Prepare a map of metadata to set on the potential duplicate object + for (MetadataValue metadatum : metadata) { + String fieldName = metadatum.getMetadataField().toString('.'); + if (fields.contains(fieldName)) { + potentialDuplicate.getMetadataValueList().add(metadatum); + } + } + + // Only if the current user is also the submitter of the item will we add this information + if (workspaceItem != null && workspaceItem.getSubmitter() != null + && workspaceItem.getSubmitter().equals(context.getCurrentUser())) { + potentialDuplicate.setWorkspaceItemId(workspaceItem.getID()); + return Optional.of(potentialDuplicate); + } + + // More authorisation checks + if (workflowItem != null) { + Collection c = workflowItem.getCollection(); + if (groupService.isMember(context, context.getCurrentUser(), c.getWorkflowStep1(context)) || + groupService.isMember(context, context.getCurrentUser(), c.getWorkflowStep2(context)) || + groupService.isMember(context, context.getCurrentUser(), c.getWorkflowStep3(context))) { + // Current user is a member of one of the workflow role groups + potentialDuplicate.setWorkflowItemId(workflowItem.getID()); + return Optional.of(potentialDuplicate); + } + } else if (resultItem.isArchived() && !resultItem.isWithdrawn() && resultItem.isDiscoverable()) { + // Not a workspace or workflow item, but is it archived, not withdrawn, and discoverable? + // Is it readable by the current user? + if (authorizeService.authorizeActionBoolean(context, resultItem, Constants.READ)) { + return Optional.of(potentialDuplicate); + } + } else if (authorizeService.isAdmin(context, resultItem)) { + // Admins can always read, return immediately + return Optional.of(potentialDuplicate); + } else { + log.info("Potential duplicate result is not readable by the current user, skipping item={}", + potentialDuplicate.getUuid()); + } + + // By default, return an empty result + return Optional.empty(); + } + + /** + * Search discovery for potential duplicates of a given item. The search uses levenshtein distance (configurable) + * and a single-term "comparison value" constructed out of the item title + * + * @param context DSpace context + * @param item The item to check + * @return DiscoverResult as a result of performing search. Null if invalid. + * + * @throws SearchServiceException if an error was encountered during the discovery search itself. + */ + @Override + public DiscoverResult searchDuplicates(Context context, Item item) throws SearchServiceException { + + // If the item is null or otherwise invalid (template, etc) then throw an appropriate error + if (item == null) { + throw new ResourceNotFoundException("Duplicate search error: item is null"); + } + if (item.getTemplateItemOf() != null) { + throw new IllegalArgumentException("Cannot get duplicates for template item"); + } + + // Build normalised comparison value + String comparisonValue = buildComparisonValue(context, item); + + // Construct query + if (StringUtils.isNotBlank(comparisonValue)) { + // Get search service + SearchService searchService = SearchUtils.getSearchService(); + + // Escape reserved solr characters + comparisonValue = searchService.escapeQueryChars(comparisonValue); + + // Construct discovery query based on comparison value + DiscoverQuery discoverQuery = new DiscoverQuery(); + discoverQuery.setQuery("(" + configurationService.getProperty("duplicate.comparison.solr.field", + "deduplication_keyword") + ":" + comparisonValue + "~" + + configurationService.getIntProperty("duplicate.comparison.distance", 0) + ")"); + // Add filter queries for the resource type + discoverQuery.addFilterQueries("(search.resourcetype:Item OR " + + "search.resourcetype:WorkspaceItem OR " + + "search.resourcetype:XmlWorkflowItem OR search.resourcetype:WorkflowItem)"); + // Skip this item itself so it isn't a false positive + discoverQuery.addFilterQueries("-search.resourceid:" + item.getID()); + + // Perform search and populate list with results, update total count integer + return searchService.search(context, discoverQuery); + } else { + log.warn("empty item comparison value, ignoring for duplicate search"); + } + + // Return null by default + return null; + + } + + /** + * Build a comparison value string made up of values of configured fields, used when indexing and querying + * items for deduplication + * @param context DSpace context + * @param item The DSpace item + * @return a constructed, normalised string + */ + @Override + public String buildComparisonValue(Context context, Item item) { + // Get configured fields to use for comparison values + String[] comparisonFields = configurationService.getArrayProperty("duplicate.comparison.metadata.field", + new String[]{"dc.title"}); + // Get all values, in order, for these fields + StringBuilder comparisonValueBuilder = new StringBuilder(); + String comparisonValue = null; + for (String field : comparisonFields) { + try { + // Get field components + String[] fieldParts = MetadataUtilities.parseCompoundForm(field); + // Get all values of this field + List metadataValues = itemService.getMetadata(item, + fieldParts[0], fieldParts[1], (fieldParts.length > 2 ? fieldParts[2] : null), Item.ANY); + // Sort metadata values by text value, so their 'position' in db doesn't matter for dedupe purposes + metadataValues.sort(comparing(MetadataValue::getValue, naturalOrder())); + for (MetadataValue metadataValue : metadataValues) { + // Add each found value to the string builder (null values interpreted as empty) + if (metadataValue != null) { + comparisonValueBuilder.append(metadataValue.getValue()); + } + } + } catch (ParseException e) { + // Log error and continue processing + log.error("Error parsing configured field for deduplication comparison: item={}, field={}", + item.getID(), field); + } catch (NullPointerException e) { + log.error("Null pointer encountered, probably during metadata value sort, when deduping:" + + "item={}, field={}", item.getID(), field); + } + } + + // Build string + comparisonValue = comparisonValueBuilder.toString(); + + // Normalise according to configuration + if (!StringUtils.isBlank(comparisonValue)) { + if (configurationService.getBooleanProperty("duplicate.comparison.normalise.lowercase")) { + comparisonValue = comparisonValue.toLowerCase(context.getCurrentLocale()); + } + if (configurationService.getBooleanProperty("duplicate.comparison.normalise.whitespace")) { + comparisonValue = comparisonValue.replaceAll("\\s+", ""); + } + } + + // Return comparison value + return comparisonValue; + } + +} diff --git a/dspace-api/src/main/java/org/dspace/content/factory/ContentServiceFactory.java b/dspace-api/src/main/java/org/dspace/content/factory/ContentServiceFactory.java index 0b06b34038e1..3a897081f07c 100644 --- a/dspace-api/src/main/java/org/dspace/content/factory/ContentServiceFactory.java +++ b/dspace-api/src/main/java/org/dspace/content/factory/ContentServiceFactory.java @@ -20,6 +20,7 @@ import org.dspace.content.service.CommunityService; import org.dspace.content.service.DSpaceObjectLegacySupportService; import org.dspace.content.service.DSpaceObjectService; +import org.dspace.content.service.DuplicateDetectionService; import org.dspace.content.service.EntityService; import org.dspace.content.service.EntityTypeService; import org.dspace.content.service.InProgressSubmissionService; @@ -113,6 +114,13 @@ public InProgressSubmissionService getInProgressSubmissionService(InProgressSubm } } + /** + * Return the implementation of the DuplicateDetectionService interface + * + * @return the DuplicateDetectionService + */ + public abstract DuplicateDetectionService getDuplicateDetectionService(); + public DSpaceObjectService getDSpaceObjectService(T dso) { return getDSpaceObjectService(dso.getType()); } diff --git a/dspace-api/src/main/java/org/dspace/content/factory/ContentServiceFactoryImpl.java b/dspace-api/src/main/java/org/dspace/content/factory/ContentServiceFactoryImpl.java index e970f0bdab12..3c3c2bf162bb 100644 --- a/dspace-api/src/main/java/org/dspace/content/factory/ContentServiceFactoryImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/factory/ContentServiceFactoryImpl.java @@ -18,6 +18,7 @@ import org.dspace.content.service.CommunityService; import org.dspace.content.service.DSpaceObjectLegacySupportService; import org.dspace.content.service.DSpaceObjectService; +import org.dspace.content.service.DuplicateDetectionService; import org.dspace.content.service.EntityService; import org.dspace.content.service.EntityTypeService; import org.dspace.content.service.InstallItemService; @@ -81,6 +82,8 @@ public class ContentServiceFactoryImpl extends ContentServiceFactory { private EntityTypeService entityTypeService; @Autowired(required = true) private EntityService entityService; + @Autowired(required = true) + private DuplicateDetectionService duplicateDetectionService; @Override public List> getDSpaceObjectServices() { @@ -181,4 +184,9 @@ public EntityService getEntityService() { public RelationshipMetadataService getRelationshipMetadataService() { return relationshipMetadataService; } + + @Override + public DuplicateDetectionService getDuplicateDetectionService() { + return duplicateDetectionService; + } } diff --git a/dspace-api/src/main/java/org/dspace/content/service/DuplicateDetectionService.java b/dspace-api/src/main/java/org/dspace/content/service/DuplicateDetectionService.java new file mode 100644 index 000000000000..1f0d3495b1d6 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/content/service/DuplicateDetectionService.java @@ -0,0 +1,85 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.content.service; + +import java.sql.SQLException; +import java.util.List; +import java.util.Optional; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.dspace.authorize.AuthorizeException; +import org.dspace.content.DuplicateDetectionServiceImpl; +import org.dspace.content.Item; +import org.dspace.content.virtual.PotentialDuplicate; +import org.dspace.core.Context; +import org.dspace.discovery.DiscoverResult; +import org.dspace.discovery.IndexableObject; +import org.dspace.discovery.SearchServiceException; + +/** + * Duplicate Detection Service handles get, search and validation operations for duplicate detection. + * @see DuplicateDetectionServiceImpl for implementation details + * + * @author Kim Shepherd + */ +public interface DuplicateDetectionService { + + /** + * Logger + */ + Logger log = LogManager.getLogger(DuplicateDetectionService.class); + + /** + * Get a list of PotentialDuplicate objects (wrappers with some metadata included for previewing) that + * are identified as potential duplicates of the given item + * + * @param context DSpace context + * @param item Item to check + * @return List of potential duplicates (empty if none found) + * @throws SearchServiceException if an error occurs performing the discovery search + */ + List getPotentialDuplicates(Context context, Item item) + throws SearchServiceException; + + /** + * Validate an indexable object (returned by discovery search) to ensure it is permissible, readable and valid + * and can be added to a list of results. + * An Optional is returned, if it is empty then it was invalid or did not pass validation. + * + * @param context The DSpace context + * @param indexableObject The discovery search result + * @param original The original item (to compare IDs, submitters, etc) + * @return An Optional potential duplicate + * @throws SQLException + * @throws AuthorizeException + */ + Optional validateDuplicateResult(Context context, IndexableObject indexableObject, + Item original) throws SQLException, AuthorizeException; + + /** + * Search discovery for potential duplicates of a given item. The search uses levenshtein distance (configurable) + * and a single-term "comparison value" constructed out of the item title + * + * @param context DSpace context + * @param item The item to check + * @return DiscoverResult as a result of performing search. Null if invalid. + * + * @throws SearchServiceException if an error was encountered during the discovery search itself. + */ + DiscoverResult searchDuplicates(Context context, Item item) throws SearchServiceException; + + /** + * Build a comparison value string made up of values of configured fields, used when indexing and querying + * items for deduplication + * @param context DSpace context + * @param item The DSpace item + * @return a constructed, normalised string + */ + String buildComparisonValue(Context context, Item item); +} diff --git a/dspace-api/src/main/java/org/dspace/content/virtual/PotentialDuplicate.java b/dspace-api/src/main/java/org/dspace/content/virtual/PotentialDuplicate.java new file mode 100644 index 000000000000..6c193bb28506 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/content/virtual/PotentialDuplicate.java @@ -0,0 +1,176 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.content.virtual; + +import java.util.LinkedList; +import java.util.List; +import java.util.UUID; + +import org.dspace.content.Item; +import org.dspace.content.MetadataValue; + +/** + * Model of potential duplicate item. Provides as little data as possible, but enough to be useful + * about the context / state of the duplicate, and metadata for preview purposes. + * This class lives in the virtual package because it is not stored, addressable data, it's a stub / preview + * based on an items' search result and metadata. + * + * @author Kim Shepherd + */ +public class PotentialDuplicate { + /** + * Title of duplicate object + */ + private String title; + /** + * UUID of duplicate object + */ + private UUID uuid; + /** + * Owning collection name (title) for duplicate item + */ + private String owningCollectionName; + /** + * Workspace item ID, if the duplicate is a workspace item + */ + private Integer workspaceItemId; + /** + * Workflow item ID, if the duplicate is a workflow item + */ + private Integer workflowItemId; + + /** + * List of configured metadata values copied across from the duplicate item + */ + private List metadataValueList; + + /** + * Default constructor + */ + public PotentialDuplicate() { + this.metadataValueList = new LinkedList<>(); + } + + /** + * Constructor that accepts an item and sets some values accordingly + * @param item the potential duplicate item + */ + public PotentialDuplicate(Item item) { + // Throw error if item is null + if (item == null) { + throw new NullPointerException("Null item passed to potential duplicate constructor"); + } + // Instantiate metadata value list + this.metadataValueList = new LinkedList<>(); + // Set title + this.title = item.getName(); + // Set UUID + this.uuid = item.getID(); + // Set owning collection name + if (item.getOwningCollection() != null) { + this.owningCollectionName = item.getOwningCollection().getName(); + } + } + + /** + * Get UUID of duplicate item + * @return UUID of duplicate item + */ + public UUID getUuid() { + return uuid; + } + + /** + * Set UUID of duplicate item + * @param uuid UUID of duplicate item + */ + public void setUuid(UUID uuid) { + this.uuid = uuid; + } + + /** + * Get title of duplicate item + * @return title of duplicate item + */ + public String getTitle() { + return title; + } + + /** + * Set title of duplicate item + * @param title of duplicate item + */ + public void setTitle(String title) { + this.title = title; + } + + /** + * Get owning collection name (title) of duplicate item + * @return owning collection name (title) of duplicate item + */ + public String getOwningCollectionName() { + return owningCollectionName; + } + + /** + * Set owning collection name (title) of duplicate item + * @param owningCollectionName owning collection name (title) of duplicate item + */ + public void setOwningCollectionName(String owningCollectionName) { + this.owningCollectionName = owningCollectionName; + } + + /** + * Get workspace ID for duplicate item, if any + * @return workspace item ID or null + */ + public Integer getWorkspaceItemId() { + return workspaceItemId; + } + + /** + * Set workspace ID for duplicate item + * @param workspaceItemId workspace item ID + */ + public void setWorkspaceItemId(Integer workspaceItemId) { + this.workspaceItemId = workspaceItemId; + } + + /** + * Get workflow ID for duplicate item, if anh + * @return workflow item ID or null + */ + public Integer getWorkflowItemId() { + return workflowItemId; + } + + /** + * Set workflow ID for duplicate item + * @param workflowItemId workspace item ID + */ + public void setWorkflowItemId(Integer workflowItemId) { + this.workflowItemId = workflowItemId; + } + + /** + * Get metadata (sorted, field->value list) for duplicate item + * @return (sorted, field->value list) for duplicate item + */ + public List getMetadataValueList() { + return metadataValueList; + } + + /** + * Set metadata (sorted, field->value list) for duplicate item + * @param metadataValueList MetadataRest list of values mapped to field keys + */ + public void setMetadataValueList(List metadataValueList) { + this.metadataValueList = metadataValueList; + } + +} diff --git a/dspace-api/src/main/java/org/dspace/discovery/SolrServiceIndexComparisonPlugin.java b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceIndexComparisonPlugin.java new file mode 100644 index 000000000000..001d1c51a4dd --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/discovery/SolrServiceIndexComparisonPlugin.java @@ -0,0 +1,95 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.discovery; + +import org.apache.logging.log4j.Logger; +import org.apache.solr.common.SolrInputDocument; +import org.apache.tika.utils.StringUtils; +import org.dspace.content.Item; +import org.dspace.content.WorkspaceItem; +import org.dspace.content.service.DuplicateDetectionService; +import org.dspace.content.service.ItemService; +import org.dspace.core.Context; +import org.dspace.discovery.indexobject.IndexableItem; +import org.dspace.discovery.indexobject.IndexableWorkflowItem; +import org.dspace.discovery.indexobject.IndexableWorkspaceItem; +import org.dspace.services.ConfigurationService; +import org.dspace.workflow.WorkflowItem; +import org.springframework.beans.factory.annotation.Autowired; + +/** + * Indexes special normalised values used for comparing items, to be used in e.g. basic duplicate detection + * + * @author Kim Shepherd + */ +public class SolrServiceIndexComparisonPlugin implements SolrServiceIndexPlugin { + + @Autowired + ConfigurationService configurationService; + @Autowired + ItemService itemService; + @Autowired + DuplicateDetectionService duplicateDetectionService; + + private static final Logger log = org.apache.logging.log4j.LogManager + .getLogger(SolrServiceIndexComparisonPlugin.class); + + /** + * Index the normalised name of the item to a solr field + * + * @param context DSpace context + * @param idxObj the indexable item + * @param document the Solr document + */ + @Override + public void additionalIndex(Context context, IndexableObject idxObj, SolrInputDocument document) { + // Immediately return if this feature is not configured + if (!configurationService.getBooleanProperty("duplicate.enable", false)) { + return; + } + // Otherwise, continue with item indexing. Handle items, workflow items, and workspace items + if (idxObj instanceof IndexableItem) { + indexItemComparisonValue(context, ((IndexableItem) idxObj).getIndexedObject(), document); + } else if (idxObj instanceof IndexableWorkspaceItem) { + WorkspaceItem workspaceItem = ((IndexableWorkspaceItem) idxObj).getIndexedObject(); + if (workspaceItem != null) { + Item item = workspaceItem.getItem(); + if (item != null) { + indexItemComparisonValue(context, item, document); + } + } + } else if (idxObj instanceof IndexableWorkflowItem) { + WorkflowItem workflowItem = ((IndexableWorkflowItem) idxObj).getIndexedObject(); + if (workflowItem != null) { + Item item = workflowItem.getItem(); + if (item != null) { + indexItemComparisonValue(context, item, document); + } + } + } + } + + /** + * Add the actual comparison value field to the given solr doc + * + * @param context DSpace context + * @param item DSpace item + * @param document Solr document + */ + private void indexItemComparisonValue(Context context, Item item, SolrInputDocument document) { + if (item != null) { + // Build normalised comparison value and add to the document + String comparisonValue = duplicateDetectionService.buildComparisonValue(context, item); + if (!StringUtils.isBlank(comparisonValue)) { + // Add the field to the document + document.addField(configurationService.getProperty("duplicate.comparison.solr.field", + "deduplication_keyword"), comparisonValue); + } + } + } +} diff --git a/dspace-api/src/test/data/dspaceFolder/config/item-submission.xml b/dspace-api/src/test/data/dspaceFolder/config/item-submission.xml index 0212e5efcca1..3a305d0ccdb9 100644 --- a/dspace-api/src/test/data/dspaceFolder/config/item-submission.xml +++ b/dspace-api/src/test/data/dspaceFolder/config/item-submission.xml @@ -27,6 +27,7 @@ + @@ -180,6 +181,13 @@ submission + + + submit.progressbar.duplicates + org.dspace.app.rest.submit.step.DuplicateDetectionStep + duplicates + + submit.progressbar.coarnotify org.dspace.app.rest.submit.step.NotifyStep @@ -281,6 +289,12 @@ + + + + + + diff --git a/dspace-api/src/test/data/dspaceFolder/config/local.cfg b/dspace-api/src/test/data/dspaceFolder/config/local.cfg index 3dc4e398c11b..9e7050bfdd5a 100644 --- a/dspace-api/src/test/data/dspaceFolder/config/local.cfg +++ b/dspace-api/src/test/data/dspaceFolder/config/local.cfg @@ -175,6 +175,8 @@ authority.controlled.dspace.object.owner = true webui.browse.link.1 = author:dc.contributor.* webui.browse.link.2 = subject:dc.subject.* +# Enable duplicate detection for tests +duplicate.enable = true ########################################### # LDN CONFIGURATIONS # diff --git a/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java b/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java new file mode 100644 index 000000000000..0b6c909f03e8 --- /dev/null +++ b/dspace-api/src/test/java/org/dspace/content/DuplicateDetectionTest.java @@ -0,0 +1,430 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.content; + +import static junit.framework.TestCase.assertEquals; +import static junit.framework.TestCase.assertNull; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.junit.Assert.fail; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.Optional; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.dspace.AbstractIntegrationTestWithDatabase; +import org.dspace.builder.CollectionBuilder; +import org.dspace.builder.CommunityBuilder; +import org.dspace.builder.ItemBuilder; +import org.dspace.builder.WorkflowItemBuilder; +import org.dspace.content.factory.ContentServiceFactory; +import org.dspace.content.service.DuplicateDetectionService; +import org.dspace.content.virtual.PotentialDuplicate; +import org.dspace.discovery.SearchServiceException; +import org.dspace.services.ConfigurationService; +import org.dspace.services.factory.DSpaceServicesFactory; +import org.dspace.xmlworkflow.storedcomponents.XmlWorkflowItem; +import org.junit.Before; +import org.junit.Test; + +/** + * + * Integration tests for the duplicate detection service + * + * @author Kim Shepherd + */ +public class DuplicateDetectionTest extends AbstractIntegrationTestWithDatabase { + private DuplicateDetectionService duplicateDetectionService = ContentServiceFactory.getInstance() + .getDuplicateDetectionService(); + private ConfigurationService configurationService = DSpaceServicesFactory.getInstance().getConfigurationService(); + private Collection col; + private Collection workflowCol; + private Item item1; + private Item item2; + private Item item3; + private final String item1IssueDate = "2011-10-17"; + private final String item1Subject = "ExtraEntry 1"; + private final String item1Title = "Public item I"; + private final String item1Author = "Smith, Donald"; + + private static final Logger log = LogManager.getLogger(); + + @Before + public void setUp() throws Exception { + super.setUp(); + // Temporarily enable duplicate detection and set comparison distance to 1 + configurationService.setProperty("duplicate.enable", true); + configurationService.setProperty("duplicate.comparison.distance", 1); + configurationService.setProperty("duplicate.comparison.normalise.lowercase", true); + configurationService.setProperty("duplicate.comparison.normalise.whitespace", true); + configurationService.setProperty("duplicate.comparison.solr.field", "deduplication_keyword"); + configurationService.setProperty("duplicate.comparison.metadata.field", new String[]{"dc.title"}); + configurationService.setProperty("duplicate.preview.metadata.field", + new String[]{"dc.date.issued", "dc.subject"}); + + context.turnOffAuthorisationSystem(); + context.setDispatcher("default"); + + parentCommunity = CommunityBuilder.createCommunity(context).withName("Parent Community").build(); + col = CollectionBuilder.createCollection(context, parentCommunity).withName("Collection").build(); + workflowCol = CollectionBuilder.createCollection(context, parentCommunity) + .withName("Workflow Collection") + .withWorkflowGroup("reviewer", admin) + .build(); + + // Ingest three example items with slightly different titles + // item2 is 1 edit distance from item1 and item3 + // item1 and item3 are 2 edit distance from each other + item1 = ItemBuilder.createItem(context, col) + .withTitle(item1Title) // Public item I + .withIssueDate(item1IssueDate) + .withAuthor(item1Author) + .withSubject(item1Subject) + .build(); + item2 = ItemBuilder.createItem(context, col) + .withTitle("Public item II") + .withIssueDate("2012-10-17") + .withAuthor("Smith, Donald X.") + .withSubject("ExtraEntry 2") + .build(); + item3 = ItemBuilder.createItem(context, col) + .withTitle("Public item III") + .withIssueDate("2013-10-17") + .withAuthor("Smith, Donald Y.") + .withSubject("ExtraEntry 3") + .build(); + + + } + + /** + * Test instantiation of simple potential duplicate object + */ + @Test + public void testPotentialDuplicateInstantatation() { + PotentialDuplicate potentialDuplicate = new PotentialDuplicate(); + // The constructor should instantiate a new list for metadata + assertEquals("Metadata value list size should be 0", + 0, potentialDuplicate.getMetadataValueList().size()); + // Other properties should not be set + assertNull("Title should be null", potentialDuplicate.getTitle()); + //StringUtils.getLevenshteinDistance() + } + + /** + * Test instantiation of simple potential duplicate object given an item as a constructor argument + */ + @Test + public void testPotentialDuplicateInstantiationWithItem() { + PotentialDuplicate potentialDuplicate = new PotentialDuplicate(item1); + // We should have title, uuid, owning collection name set and metadata value list instantiated to empty + assertEquals("UUID should match item1 uuid", item1.getID(), potentialDuplicate.getUuid()); + assertEquals("Title should match item1 title", item1Title, potentialDuplicate.getTitle()); + assertEquals("Owning collection should match item1 owning collection", + item1.getOwningCollection().getName(), potentialDuplicate.getOwningCollectionName()); + assertEquals("Metadata value list size should be 0", + 0, potentialDuplicate.getMetadataValueList().size()); + } + + /** + * Test that a search for getPotentialDuplicates returns the expected results, populated with the expected + * preview values and metadata. This is the core method used by the duplicate item link repository and + * detect duplicates submission step. + * + * @throws Exception + */ + @Test + public void testSearchDuplicates() throws Exception { + + // Get potential duplicates of item 1: + // Expected: Public item II should appear as it has the configured levenshtein distance of 1 + List potentialDuplicates = duplicateDetectionService.getPotentialDuplicates(context, item1); + + // Make sure result list is size 1 + int size = 1; + assertEquals("Potential duplicates of item1 should have size " + size, + size, potentialDuplicates.size()); + + // The only member should be Public item II (one distance from public item I) + assertEquals("Item II should be be the detected duplicate", + item2.getID(), potentialDuplicates.get(0).getUuid()); + + // Get potential duplicates of item2: + // Expected: BOTH other items should appear as they are both 1 distance away from "Public item II" + potentialDuplicates = duplicateDetectionService.getPotentialDuplicates(context, item2); + + // Sort by title + potentialDuplicates.sort(Comparator.comparing(PotentialDuplicate::getTitle)); + + // Make sure result list is size 1 + size = 2; + assertEquals("Potential duplicates of item2 should have size " + size, + size, potentialDuplicates.size()); + + // The result list should contain both item1 and item3 in the expected order + assertEquals("item1 should be the first detected duplicate", + item1.getID(), potentialDuplicates.get(0).getUuid()); + assertEquals("item3 should be be the second detected duplicate", + item3.getID(), potentialDuplicates.get(1).getUuid()); + + // Check metadata is populated as per configuration, using item1 (first in results) + // Check for date + Optional foundDate = potentialDuplicates.get(0).getMetadataValueList().stream() + .filter(metadataValue -> metadataValue.getMetadataField().toString('.') + .equals("dc.date.issued")) + .map(MetadataValue::getValue).findFirst(); + assertThat("There should be an issue date found", foundDate.isPresent()); + assertEquals("item1 issue date should match the duplicate obj metadata issue date", + item1IssueDate, foundDate.get()); + // Check for subject + Optional foundSubject = potentialDuplicates.get(0).getMetadataValueList().stream() + .filter(metadataValue -> metadataValue.getMetadataField().toString('.').equals("dc.subject")) + .map(MetadataValue::getValue).findFirst(); + assertThat("There should be a subject found", foundSubject.isPresent()); + assertEquals("item1 subject should match the duplicate obj metadata subject", + item1Subject, foundSubject.get()); + + // Check for author, which was NOT configured to be copied + Optional foundAuthor = potentialDuplicates.get(0).getMetadataValueList().stream() + .filter(metadataValue -> metadataValue.getMetadataField().toString('.') + .equals("dc.contributor.author")) + .map(MetadataValue::getValue).findFirst(); + assertThat("There should NOT be an author found", foundAuthor.isEmpty()); + + } + + /** + * Test that a search for getPotentialDuplicates properly escapes Solr reserved characters + * e.g. + - && | | ! ( ) { } [ ] ^ " ~ * ? : \ + * + * @throws Exception + */ + @Test + public void testSearchDuplicatesWithReservedSolrCharacters() throws Exception { + + + + Item item4 = ItemBuilder.createItem(context, col) + .withTitle("Testing: An Important Development Step") + .withIssueDate(item1IssueDate) + .withAuthor(item1Author) + .withSubject(item1Subject) + .build(); + Item item5 = ItemBuilder.createItem(context, col) + .withTitle("Testing an important development step") + .withIssueDate("2012-10-17") + .withAuthor("Smith, Donald X.") + .withSubject("ExtraEntry 2") + .build(); + + // Get potential duplicates of item 4 and make sure no exceptions are thrown + List potentialDuplicates = new ArrayList<>(); + try { + potentialDuplicates = duplicateDetectionService.getPotentialDuplicates(context, item4); + } catch (SearchServiceException e) { + fail("Duplicate search with special characters should NOT result in search exception (" + + e.getMessage() + ")"); + } + + // Make sure result list is size 1 + int size = 1; + assertEquals("Potential duplicates of item4 (special characters) should have size " + size, + size, potentialDuplicates.size()); + + // The only member should be item 5 + assertEquals("Item 5 should be be the detected duplicate", + item5.getID(), potentialDuplicates.get(0).getUuid()); + + } + + //configurationService.setProperty("duplicate.comparison.metadata.field", new String[]{"dc.title"}); + + /** + * Test that a search for a very long title which also contains reserved characters + * + * @throws Exception + */ + @Test + public void testSearchDuplicatesWithVeryLongTitle() throws Exception { + + Item item6 = ItemBuilder.createItem(context, col) + .withTitle("Testing: This title is over 200 characters long and should behave just the same as a " + + "shorter title, with or without reserved characters. This integration test will prove that " + + "long titles are detected as potential duplicates.") + .withIssueDate(item1IssueDate) + .withAuthor(item1Author) + .withSubject(item1Subject) + .build(); + // This item is the same as above, just missing a comma from the title. + Item item7 = ItemBuilder.createItem(context, col) + .withTitle("Testing: This title is over 200 characters long and should behave just the same as a " + + "shorter title with or without reserved characters. This integration test will prove that " + + "long titles are detected as potential duplicates.") + .withIssueDate("2012-10-17") + .withAuthor("Smith, Donald X.") + .withSubject("ExtraEntry 2") + .build(); + + // Get potential duplicates of item 4 and make sure no exceptions are thrown + List potentialDuplicates = new ArrayList<>(); + try { + potentialDuplicates = duplicateDetectionService.getPotentialDuplicates(context, item6); + } catch (SearchServiceException e) { + fail("Duplicate search with special characters (long title) should NOT result in search exception (" + + e.getMessage() + ")"); + } + + // Make sure result list is size 1 + int size = 1; + assertEquals("Potential duplicates of item6 (long title) should have size " + size, + size, potentialDuplicates.size()); + + // The only member should be item 5 + assertEquals("Item 7's long title should match Item 6 as a potential duplicate", + item7.getID(), potentialDuplicates.get(0).getUuid()); + + } + + /** + * Test that a search for a very long title which also contains reserved characters + * + * @throws Exception + */ + @Test + public void testSearchDuplicatesExactMatch() throws Exception { + + // Set distance to 0 manually + configurationService.setProperty("duplicate.comparison.distance", 0); + + Item item8 = ItemBuilder.createItem(context, col) + .withTitle("This integration test will prove that the edit distance of 0 results in an exact match") + .withIssueDate(item1IssueDate) + .withAuthor(item1Author) + .withSubject(item1Subject) + .build(); + // This item is the same as above + Item item9 = ItemBuilder.createItem(context, col) + .withTitle("This integration test will prove that the edit distance of 0 results in an exact match") + .withIssueDate("2012-10-17") + .withAuthor("Smith, Donald X.") + .withSubject("ExtraEntry") + .build(); + // This item has one character different, greater than the edit distance + Item item10 = ItemBuilder.createItem(context, col) + .withTitle("This integration test will prove that the edit distance of 0 results in an exact match.") + .withIssueDate("2012-10-17") + .withAuthor("Smith, Donald X.") + .withSubject("ExtraEntry") + .build(); + + // Get potential duplicates of item 4 and make sure no exceptions are thrown + List potentialDuplicates = new ArrayList<>(); + try { + potentialDuplicates = duplicateDetectionService.getPotentialDuplicates(context, item8); + } catch (SearchServiceException e) { + fail("Duplicate search with special characters (long title) should NOT result in search exception (" + + e.getMessage() + ")"); + } + + // Make sure result list is size 1 - we do NOT expect item 10 to appear + int size = 1; + assertEquals("ONLY one exact match should be found (item 9) " + size, + size, potentialDuplicates.size()); + + // The only member should be item 9 + assertEquals("Item 9 should match Item 8 as a potential duplicate", + item9.getID(), potentialDuplicates.get(0).getUuid()); + + } + + @Test + public void testSearchDuplicatesInWorkflow() throws Exception { + // Get potential duplicates of item 1: + // Expected: Public item II should appear as it has the configured levenshtein distance of 1 + context.turnOffAuthorisationSystem(); + //context.setDispatcher("default"); + XmlWorkflowItem workflowItem1 = WorkflowItemBuilder.createWorkflowItem(context, workflowCol) + .withTitle("Unique title") + .withSubmitter(eperson) + .build(); + XmlWorkflowItem workflowItem2 = WorkflowItemBuilder.createWorkflowItem(context, workflowCol) + .withTitle("Unique title") + .withSubmitter(eperson) + .build(); + + //indexingService.commit(); + context.restoreAuthSystemState(); + context.setCurrentUser(admin); + List potentialDuplicates = + duplicateDetectionService.getPotentialDuplicates(context, workflowItem1.getItem()); + + // Make sure result list is size 1 + int size = 1; + assertEquals("Potential duplicates of item1 should have size " + size, + size, potentialDuplicates.size()); + + // The only member should be workflow item 2 + assertEquals("Workflow item 2 should be be the detected duplicate", + workflowItem2.getItem().getID(), potentialDuplicates.get(0).getUuid()); + } + + /** + * Test that a search for getPotentialDuplicates with multiple fields configured as comparison value + * gives the expected results + * + * @throws Exception + */ + @Test + public void testSearchDuplicatesWithMultipleFields() throws Exception { + // Set configure to use both title and author fields + configurationService.setProperty("duplicate.comparison.metadata.field", + new String[]{"dc.title", "dc.contributor.author"}); + + Item item10 = ItemBuilder.createItem(context, col) + .withTitle("Compare both title and author") + .withIssueDate(item1IssueDate) + .withAuthor("Surname, F.") + .withSubject(item1Subject) + .build(); + Item item11 = ItemBuilder.createItem(context, col) + .withTitle("Compare both title and author") + .withIssueDate("2012-10-17") + .withAuthor("Surname, F.") + .withSubject("ExtraEntry 2") + .build(); + + Item item12 = ItemBuilder.createItem(context, col) + .withTitle("Compare both title and author") + .withIssueDate("2012-10-17") + .withAuthor("Lastname, First.") + .withSubject("ExtraEntry 2") + .build(); + + // Get potential duplicates of item 10 and make sure no exceptions are thrown + List potentialDuplicates = new ArrayList<>(); + try { + potentialDuplicates = duplicateDetectionService.getPotentialDuplicates(context, item10); + } catch (SearchServiceException e) { + fail("Duplicate search with title and author (" + + e.getMessage() + ")"); + } + + // Make sure result list is size 1 + int size = 1; + assertEquals("Potential duplicates of item10 (title + author) should have size " + size, + size, potentialDuplicates.size()); + + // The only member should be item 11 since item 12 has a different author (but hte same title + assertEquals("Item 11 should be be the detected duplicate", + item11.getID(), potentialDuplicates.get(0).getUuid()); + + } + +} diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/PotentialDuplicateConverter.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/PotentialDuplicateConverter.java new file mode 100644 index 000000000000..8d2814b611e5 --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/PotentialDuplicateConverter.java @@ -0,0 +1,68 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest.converter; + +import org.dspace.app.rest.model.MetadataValueList; +import org.dspace.app.rest.model.PotentialDuplicateRest; +import org.dspace.app.rest.projection.Projection; +import org.dspace.content.virtual.PotentialDuplicate; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Lazy; +import org.springframework.stereotype.Component; + +/** + * Convert DSpace PotentialDuplicate object to a PotentialDuplicateRest REST resource + * for use in REST results. + * + * @author Kim Shepherd + */ +@Component +public class PotentialDuplicateConverter implements DSpaceConverter { + @Lazy + @Autowired + private ConverterService converter; + + /** + * Convert a PotentialDuplicate model object into its equivalent REST resource, applying + * a given projection. + * @see PotentialDuplicate + * @see PotentialDuplicateRest + * + * @param modelObject a PotentialDuplicate object + * @param projection current projection + * @return a converted PotentialDuplicateRest REST object + */ + @Override + public PotentialDuplicateRest convert(PotentialDuplicate modelObject, Projection projection) { + if (modelObject == null) { + return null; + } + // Instantiate new REST model object + PotentialDuplicateRest rest = new PotentialDuplicateRest(); + // Set or otherwise transform things here, then return + rest.setUuid(modelObject.getUuid()); + rest.setTitle(modelObject.getTitle()); + rest.setOwningCollectionName(modelObject.getOwningCollectionName()); + rest.setWorkflowItemId(modelObject.getWorkflowItemId()); + rest.setWorkspaceItemId(modelObject.getWorkspaceItemId()); + rest.setMetadata(converter.toRest(new MetadataValueList(modelObject.getMetadataValueList()), projection)); + + // Return converted object + return rest; + } + + /** + * For what DSpace API model class does this converter convert? + * @return Class of model objects represented. + */ + @Override + public Class getModelClass() { + return PotentialDuplicate.class; + } + +} diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/PotentialDuplicateRest.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/PotentialDuplicateRest.java new file mode 100644 index 000000000000..9d706a8d5791 --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/PotentialDuplicateRest.java @@ -0,0 +1,199 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest.model; + +import java.util.UUID; + +/** + * REST Model defining a Potential Duplicate for serialisation to JSON + * This is used in lists of potential duplicates for submission section data and item link / embeds. + * + * @author Kim Shepherd + */ +public class PotentialDuplicateRest extends RestAddressableModel { + + public static final String CATEGORY = RestModel.SUBMISSION; + public static final String NAME = RestModel.DUPLICATES; + + /** + * Type of REST resource + */ + private static final String TYPE = "DUPLICATE"; + /** + * Plural type of REST resource + */ + private static final String TYPE_PLURAL = "DUPLICATES"; + /** + * Title of duplicate object + */ + private String title; + /** + * UUID of duplicate object + */ + private UUID uuid; + /** + * Owning collection name (title) for duplicate item + */ + private String owningCollectionName; + /** + * Workspace item ID, if the duplicate is a workspace item + */ + private Integer workspaceItemId; + /** + * Workflow item ID, if the duplicate is a workflow item + */ + private Integer workflowItemId; + /** + * List of configured metadata copied across from the duplicate item + */ + private MetadataRest metadata; + + /** + * Default constructor + */ + public PotentialDuplicateRest() { + } + + /** + * Get UUID of duplicate item + * @return UUID of duplicate item + */ + public UUID getUuid() { + return uuid; + } + + /** + * Set UUID of duplicate item + * @param uuid UUID of duplicate item + */ + public void setUuid(UUID uuid) { + this.uuid = uuid; + } + + /** + * Get title of duplicate item + * @return title of duplicate item + */ + public String getTitle() { + return title; + } + + /** + * Set title of duplicate item + * @param title of duplicate item + */ + public void setTitle(String title) { + this.title = title; + } + + /** + * Get owning collection name (title) of duplicate item + * @return owning collection name (title) of duplicate item + */ + public String getOwningCollectionName() { + return owningCollectionName; + } + + /** + * Set owning collection name (title) of duplicate item + * @param owningCollectionName owning collection name (title) of duplicate item + */ + public void setOwningCollectionName(String owningCollectionName) { + this.owningCollectionName = owningCollectionName; + } + + /** + * Get metadata (sorted, field->value list) for duplicate item + * @return (sorted, field->value list) for duplicate item + */ + public MetadataRest getMetadata() { + return metadata; + } + + /** + * Set metadata (sorted, field->value list) for duplicate item + * @param metadata MetadataRest list of values mapped to field keys + */ + public void setMetadata(MetadataRest metadata) { + this.metadata = metadata; + } + + /** + * Get workspace ID for duplicate item, if any + * @return workspace item ID or null + */ + public Integer getWorkspaceItemId() { + return workspaceItemId; + } + + /** + * Set workspace ID for duplicate item + * @param workspaceItemId workspace item ID + */ + public void setWorkspaceItemId(Integer workspaceItemId) { + this.workspaceItemId = workspaceItemId; + } + + /** + * Get workflow ID for duplicate item, if anh + * @return workflow item ID or null + */ + public Integer getWorkflowItemId() { + return workflowItemId; + } + + /** + * Set workflow ID for duplicate item + * @param workflowItemId workspace item ID + */ + public void setWorkflowItemId(Integer workflowItemId) { + this.workflowItemId = workflowItemId; + } + + /** + * Get REST resource type name + * @return REST resource type (see static final string) + */ + @Override + public String getType() { + return TYPE; + } + + /** + * Get REST resource type plural name + * @return REST resource type plural name (see static final string) + */ + @Override + public String getTypePlural() { + return TYPE_PLURAL; + } + + /** + * Get REST resource category. + * Not implemented as this model is intended for use only as an ItemLink repository and submission section data, + * it is actually a simple RestModel but has to 'implement' RestAddressableModel to serialize correctly + * + * @return null (not implemented) + */ + @Override + public String getCategory() { + return null; + } + + /** + * Get REST controller for this model. + * Not implemented as this model is intended for use only as an ItemLink repository and submission section data, + * it is actually a simple RestModel but has to 'implement' RestAddressableModel to serialize correctly + * + * @return null (not implemented) + */ + @Override + public Class getController() { + return null; + } +} diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/RestModel.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/RestModel.java index b575ddb59815..72aae3b25a7e 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/RestModel.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/RestModel.java @@ -24,6 +24,7 @@ public interface RestModel extends Serializable { public static final String CORE = "core"; public static final String EPERSON = "eperson"; public static final String DISCOVER = "discover"; + public static final String DUPLICATES = "duplicates"; public static final String CONFIGURATION = "config"; public static final String INTEGRATION = "integration"; public static final String STATISTICS = "statistics"; diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/hateoas/PotentialDuplicateResource.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/hateoas/PotentialDuplicateResource.java new file mode 100644 index 000000000000..e753126133b4 --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/hateoas/PotentialDuplicateResource.java @@ -0,0 +1,22 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest.model.hateoas; + +import org.dspace.app.rest.model.PotentialDuplicateRest; + +/** + * + * Wrap PotentialDuplicatesRest REST resource in a very simple HALResource class + * + * @author Kim Shepherd + */ +public class PotentialDuplicateResource extends HALResource { + public PotentialDuplicateResource(PotentialDuplicateRest data) { + super(data); + } +} diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/step/DataDuplicateDetection.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/step/DataDuplicateDetection.java new file mode 100644 index 000000000000..9506e9676e03 --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/step/DataDuplicateDetection.java @@ -0,0 +1,46 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest.model.step; + +import java.util.List; + +import com.fasterxml.jackson.annotation.JsonUnwrapped; +import org.dspace.app.rest.model.PotentialDuplicateRest; + +/** + * Section data model for potential duplicate items detected during submission + * + * @author Kim Shepherd + */ +public class DataDuplicateDetection implements SectionData { + public DataDuplicateDetection() { + } + + /** + * A list of potential duplicate items found by DuplicateDetectionService, in their REST model form + */ + @JsonUnwrapped + private List potentialDuplicates; + + /** + * Return the list of detected potential duplicates in REST model form + * @return list of potential duplicate REST models + */ + public List getPotentialDuplicates() { + return potentialDuplicates; + } + + /** + * Set list of potential duplicates. + * @see org.dspace.app.rest.converter.PotentialDuplicateConverter + * @param potentialDuplicates list of potential duplicates + */ + public void setPotentialDuplicates(List potentialDuplicates) { + this.potentialDuplicates = potentialDuplicates; + } +} \ No newline at end of file diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/DuplicateRestRepository.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/DuplicateRestRepository.java new file mode 100644 index 000000000000..8730211a5eba --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/DuplicateRestRepository.java @@ -0,0 +1,186 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest.repository; + +import java.sql.SQLException; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; +import java.util.UUID; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.dspace.app.rest.DiscoverableEndpointsService; +import org.dspace.app.rest.Parameter; +import org.dspace.app.rest.SearchRestMethod; +import org.dspace.app.rest.exception.RepositoryMethodNotImplementedException; +import org.dspace.app.rest.model.PotentialDuplicateRest; +import org.dspace.app.rest.utils.ContextUtil; +import org.dspace.content.Item; +import org.dspace.content.service.DuplicateDetectionService; +import org.dspace.content.service.ItemService; +import org.dspace.content.virtual.PotentialDuplicate; +import org.dspace.core.Context; +import org.dspace.discovery.SearchServiceException; +import org.springframework.beans.factory.InitializingBean; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.data.domain.Page; +import org.springframework.data.domain.Pageable; +import org.springframework.data.rest.webmvc.ResourceNotFoundException; +import org.springframework.hateoas.Link; +import org.springframework.security.access.prepost.PreAuthorize; +import org.springframework.stereotype.Component; + +/** + * The REST repository for the api/submission/duplicates endpoint, which handles requests for finding + * potential duplicates of a given item (archived or in-progress). + * + * Find one and find all are not implemented as actual REST methods because a duplicate is the result + * of comparing an item with other indexed items, not an object that can be referenced by some kind of ID, but + * we must at least implement the Java methods here in order to extend DSpaceRestRepository and implement + * SearchRestMethods. + * + * @author Kim Shepherd + */ +@ConditionalOnProperty("duplicate.enable") +@Component(PotentialDuplicateRest.CATEGORY + "." + PotentialDuplicateRest.NAME) +public class DuplicateRestRepository extends DSpaceRestRepository + implements InitializingBean { + + /** + * Discoverable endpoints service + */ + @Autowired + DiscoverableEndpointsService discoverableEndpointsService; + + /** + * Duplicate detection service + */ + @Autowired + DuplicateDetectionService duplicateDetectionService; + + /** + * Item service + */ + @Autowired + ItemService itemService; + + /** + * Logger + */ + private final static Logger log = LogManager.getLogger(); + + /** + * Register this repository endpoint as /api/submission/duplicates + * @throws Exception + */ + @Override + public void afterPropertiesSet() throws Exception { + discoverableEndpointsService + .register(this, Arrays.asList(Link.of( + "/api/" + PotentialDuplicateRest.CATEGORY + "/" + PotentialDuplicateRest.NAME + "/search", + PotentialDuplicateRest.NAME + "-search"))); + } + + /** + * This REST method is NOT IMPLEMENTED - it does not make sense in duplicate detection, in which the only + * real addressable objects involved are Items. + * + * @param context + * the dspace context + * @param name + * the rest object id + * @return not implemented + * @throws RepositoryMethodNotImplementedException + */ + @PreAuthorize("permitAll()") + @Override + public PotentialDuplicateRest findOne(Context context, String name) { + throw new RepositoryMethodNotImplementedException("Duplicate detection endpoint only implements searchBy", ""); + } + + /** + * This REST method is NOT IMPLEMENTED - it does not make sense in duplicate detection, where there can be no "all" + * + * @param context + * the dspace context + * @return not implemented + * @throws RepositoryMethodNotImplementedException + */ + @PreAuthorize("permitAll()") + @Override + public Page findAll(Context context, Pageable pageable) { + throw new RepositoryMethodNotImplementedException("Duplicate detection endpoint only implements searchBy", ""); + } + + /** + * Return a paged list of potential duplicate matches for the given item ID. This may be an item wrapped in + * an in-progress item wrapper like workspace or workflow, as long as the current user has READ access to this item. + * Results from the service search method will only contain matches that lead to items which are readable by + * the current user. + * + * @param uuid The item UUID to search + * @param pageable Pagination options + * @return Paged list of potential duplicates + * @throws Exception + */ + @PreAuthorize("hasPermission(#uuid, 'ITEM', 'READ')") + @SearchRestMethod(name = "findByItem") + public Page findByItem(@Parameter(value = "uuid", required = true) UUID uuid, + Pageable pageable) { + // Instantiate object to represent this item + Item item; + // Instantiate list of potential duplicates which we will convert and return as paged ItemRest list + List potentialDuplicates = new LinkedList<>(); + // Instantiate total count + int total = 0; + // Obtain context + Context context = ContextUtil.obtainCurrentRequestContext(); + + // Try to get item based on UUID parameter + try { + item = itemService.find(context, uuid); + } catch (SQLException e) { + throw new ResourceNotFoundException(e.getMessage()); + } + + // If the item is null or otherwise invalid (template, etc) then throw an appropriate error + if (item == null) { + throw new ResourceNotFoundException("No such item: " + uuid); + } + if (item.getTemplateItemOf() != null) { + throw new IllegalArgumentException("Cannot get duplicates for template item"); + } + + try { + // Search for the list of potential duplicates + potentialDuplicates = duplicateDetectionService.getPotentialDuplicates(context, item); + } catch (SearchServiceException e) { + // If the search fails, log an error and return an empty list rather than throwing a fatal error + log.error("Search service error retrieving duplicates: {}", e.getMessage()); + } + + // Construct rest pages and return + Page restPage = converter.toRestPage(potentialDuplicates, pageable, total, + utils.obtainProjection()); + + return restPage; + + } + + /** + * Return the domain class for potential duplicate objects + * @return PotentialDuplicateRest.class + */ + @Override + public Class getDomainClass() { + return PotentialDuplicateRest.class; + } + +} diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemRestRepository.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemRestRepository.java index a1659c58d38c..b0f3a8c17d96 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemRestRepository.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ItemRestRepository.java @@ -365,4 +365,5 @@ protected ItemRest createAndReturn(Context context, List stringList) Item item = uriListHandlerService.handle(context, req, stringList, Item.class); return converter.toRest(item, utils.obtainProjection()); } + } diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/SubmissionService.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/SubmissionService.java index 93d3867d9205..0a3b2e859eee 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/SubmissionService.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/SubmissionService.java @@ -10,6 +10,7 @@ import java.io.IOException; import java.sql.SQLException; import java.util.ArrayList; +import java.util.LinkedList; import java.util.List; import java.util.UUID; import javax.servlet.http.HttpServletRequest; @@ -26,9 +27,11 @@ import org.dspace.app.rest.model.CheckSumRest; import org.dspace.app.rest.model.ErrorRest; import org.dspace.app.rest.model.MetadataValueRest; +import org.dspace.app.rest.model.PotentialDuplicateRest; import org.dspace.app.rest.model.WorkspaceItemRest; import org.dspace.app.rest.model.patch.Operation; import org.dspace.app.rest.model.step.DataCCLicense; +import org.dspace.app.rest.model.step.DataDuplicateDetection; import org.dspace.app.rest.model.step.DataUpload; import org.dspace.app.rest.model.step.UploadBitstreamRest; import org.dspace.app.rest.projection.Projection; @@ -47,11 +50,14 @@ import org.dspace.content.MetadataValue; import org.dspace.content.WorkspaceItem; import org.dspace.content.service.CollectionService; +import org.dspace.content.service.DuplicateDetectionService; import org.dspace.content.service.ItemService; import org.dspace.content.service.WorkspaceItemService; +import org.dspace.content.virtual.PotentialDuplicate; import org.dspace.core.Constants; import org.dspace.core.Context; import org.dspace.core.Utils; +import org.dspace.discovery.SearchServiceException; import org.dspace.license.service.CreativeCommonsService; import org.dspace.services.ConfigurationService; import org.dspace.services.RequestService; @@ -64,6 +70,7 @@ import org.dspace.xmlworkflow.storedcomponents.XmlWorkflowItem; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Lazy; +import org.springframework.data.rest.webmvc.ResourceNotFoundException; import org.springframework.data.rest.webmvc.json.patch.PatchException; import org.springframework.jdbc.datasource.init.UncategorizedScriptException; import org.springframework.stereotype.Component; @@ -101,6 +108,8 @@ public class SubmissionService { @Autowired private org.dspace.app.rest.utils.Utils utils; private SubmissionConfigService submissionConfigService; + @Autowired + private DuplicateDetectionService duplicateDetectionService; public SubmissionService() throws SubmissionConfigReaderException { submissionConfigService = SubmissionServiceFactory.getInstance().getSubmissionConfigService(); @@ -313,6 +322,51 @@ public DataCCLicense getDataCCLicense(InProgressSubmission obj) return result; } + /** + * Prepare section data containing a list of potential duplicates, for use in submission steps. + * This method belongs in SubmissionService and not DuplicateDetectionService because it depends on + * the DataDuplicateDetection class which only appears in the REST project. + * + * @param context DSpace context + * @param obj The in-progress submission object + * @return A DataDuplicateDetection object which implements SectionData for direct use in + * a submission step (see DuplicateDetectionStep) + * @throws SearchServiceException if an error is encountered during Discovery search + */ + public DataDuplicateDetection getDataDuplicateDetection(Context context, InProgressSubmission obj) + throws SearchServiceException { + // Test for a valid object or throw a not found exception + if (obj == null) { + throw new ResourceNotFoundException("Duplicate data step could not find valid in-progress submission obj"); + } + // Initialise an empty section data object + DataDuplicateDetection data = new DataDuplicateDetection(); + + // Get the item for this submission object, throw a not found exception if null + Item item = obj.getItem(); + if (item == null) { + throw new ResourceNotFoundException("Duplicate data step could not find valid item for the" + + " current in-progress submission obj id=" + obj.getID()); + } + // Initialise empty list of PotentialDuplicateRest objects for use in the section data object + List potentialDuplicateRestList = new LinkedList<>(); + + // Get discovery search result for a duplicate detection search based on this item and populate + // the list of REST objects + List potentialDuplicates = duplicateDetectionService.getPotentialDuplicates(context, item); + for (PotentialDuplicate potentialDuplicate : potentialDuplicates) { + // Convert and add the potential duplicate to the list + potentialDuplicateRestList.add(converter.toRest( + potentialDuplicate, utils.obtainProjection())); + } + + // Set the final duplicates list of the section data object + data.setPotentialDuplicates(potentialDuplicateRestList); + + // Return section data + return data; + } + /** * Utility method used by the {@link WorkspaceItemRestRepository} and * {@link WorkflowItemRestRepository} to deal with the upload in an inprogress diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/step/DuplicateDetectionStep.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/step/DuplicateDetectionStep.java new file mode 100644 index 000000000000..d7ecefa78289 --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/step/DuplicateDetectionStep.java @@ -0,0 +1,113 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest.submit.step; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.dspace.app.rest.model.patch.Operation; +import org.dspace.app.rest.model.step.DataDuplicateDetection; +import org.dspace.app.rest.submit.AbstractProcessingStep; +import org.dspace.app.rest.submit.SubmissionService; +import org.dspace.app.rest.utils.ContextUtil; +import org.dspace.app.util.SubmissionStepConfig; +import org.dspace.content.InProgressSubmission; +import org.dspace.content.factory.ContentServiceFactory; +import org.dspace.core.Context; +import org.dspace.handle.service.HandleService; +import org.dspace.services.factory.DSpaceServicesFactory; +import org.dspace.services.model.Request; +import org.springframework.beans.factory.annotation.Autowired; + +/** + * Submission processing step to detect potential duplicates of this item and list them so that + * the submitter can choose to cancel or continue with their submission + * + * @author Kim Shepherd + */ +public class DuplicateDetectionStep extends AbstractProcessingStep { + + private static final Logger log = LogManager.getLogger(DuplicateDetectionStep.class); + + @Autowired(required = true) + protected HandleService handleService; + @Autowired(required = true) + protected ContentServiceFactory contentServiceFactory; + + /** + * Override DataProcessing.getData, return a list of potential duplicates + * + * @param submissionService The submission service + * @param obj The workspace or workflow item + * @param config The submission step configuration + * @return A simple DataIdentifiers bean containing doi, handle and list of other identifiers + */ + @Override + public DataDuplicateDetection getData(SubmissionService submissionService, InProgressSubmission obj, + SubmissionStepConfig config) throws Exception { + // Validate in progress submission object and wrapped item + if (obj == null) { + throw new IllegalArgumentException("Null in-progress wrapper object"); + } + if (obj.getItem() == null) { + throw new IllegalArgumentException("Null in-progress item"); + } + // Immediately return an empty if this feature is not configured + if (!configurationService.getBooleanProperty("duplicate.enable", false)) { + log.debug("Duplicate detection is not enabled, returning empty section"); + return new DataDuplicateDetection(); + } + // Validate context + Context context = getContext(); + if (context == null) { + throw new ServletException("Null context"); + } + + // Return the constructed data section + return submissionService.getDataDuplicateDetection(context, obj); + } + + /** + * Utility method to get DSpace context from the HTTP request + * @return DSpace context + */ + private Context getContext() { + Context context; + Request currentRequest = DSpaceServicesFactory.getInstance().getRequestService().getCurrentRequest(); + if (currentRequest != null) { + HttpServletRequest request = currentRequest.getHttpServletRequest(); + context = ContextUtil.obtainContext(request); + } else { + context = new Context(); + } + + return context; + } + + /** + * This step is currently just for displaying identifiers and does not take additional patch operations + * @param context + * the DSpace context + * @param currentRequest + * the http request + * @param source + * the in progress submission + * @param op + * the json patch operation + * @param stepConf + * @throws Exception + */ + @Override + public void doPatchProcessing(Context context, HttpServletRequest currentRequest, InProgressSubmission source, + Operation op, SubmissionStepConfig stepConf) throws Exception { + log.warn("Not implemented"); + } + +} diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java new file mode 100644 index 000000000000..aa0ebb520185 --- /dev/null +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/DuplicateDetectionRestIT.java @@ -0,0 +1,407 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest; + +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.patch; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.content; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import javax.ws.rs.core.MediaType; + +import org.dspace.app.rest.model.patch.Operation; +import org.dspace.app.rest.model.patch.ReplaceOperation; +import org.dspace.app.rest.test.AbstractControllerIntegrationTest; +import org.dspace.authorize.service.AuthorizeService; +import org.dspace.builder.CollectionBuilder; +import org.dspace.builder.CommunityBuilder; +import org.dspace.builder.EPersonBuilder; +import org.dspace.builder.ItemBuilder; +import org.dspace.builder.WorkflowItemBuilder; +import org.dspace.builder.WorkspaceItemBuilder; +import org.dspace.content.Collection; +import org.dspace.content.Item; +import org.dspace.content.WorkspaceItem; +import org.dspace.content.service.CollectionService; +import org.dspace.content.service.ItemService; +import org.dspace.content.service.WorkspaceItemService; +import org.dspace.core.I18nUtil; +import org.dspace.discovery.IndexingService; +import org.dspace.eperson.EPerson; +import org.dspace.eperson.service.EPersonService; +import org.dspace.handle.service.HandleService; +import org.dspace.identifier.service.IdentifierService; +import org.dspace.services.ConfigurationService; +import org.dspace.xmlworkflow.service.XmlWorkflowService; +import org.dspace.xmlworkflow.storedcomponents.XmlWorkflowItem; +import org.dspace.xmlworkflow.storedcomponents.service.XmlWorkflowItemService; +import org.hamcrest.Matchers; +import org.junit.Test; +import org.springframework.beans.factory.annotation.Autowired; + +/** + * Test item link and section data REST endpoints for duplicate detection. + * @see DuplicateDetectionTest (dspace-api) for lower level integration tests. + * + * @author Kim Shepherd + */ +public class DuplicateDetectionRestIT extends AbstractControllerIntegrationTest { + + @Autowired + ConfigurationService configurationService; + @Autowired + ItemService itemService; + @Autowired + IndexingService indexingService; + @Autowired + CollectionService collectionService; + @Autowired + HandleService handleService; + @Autowired + WorkspaceItemService workspaceItemService; + @Autowired + XmlWorkflowItemService workflowItemService; + @Autowired + IdentifierService identifierService; + @Autowired + AuthorizeService authorizeService; + @Autowired + XmlWorkflowService workflowService; + @Autowired + EPersonService ePersonService; + + private Collection col; + private Collection simpleCol; + private final String item1IssueDate = "2011-10-17"; + private final String item1Subject = "ExtraEntry 1"; + private final String item1Title = "Public item I"; + private final String item1Author = "Smith, Donald"; + private final String item2Subject = "ExtraEntry 2"; + private final String item2IssueDate = "2012-10-17"; + private EPerson anotherEPerson; + + @Override + public void setUp() throws Exception { + super.setUp(); + + // Temporarily enable duplicate detection and set comparison value distance to 1 + configurationService.setProperty("duplicate.enable", true); + configurationService.setProperty("duplicate.comparison.distance", 1); + configurationService.setProperty("duplicate.comparison.normalise.lowercase", true); + configurationService.setProperty("duplicate.comparison.normalise.whitespace", true); + configurationService.setProperty("duplicate.comparison.solr.field", "deduplication_keyword"); + configurationService.setProperty("duplicate.comparison.metadata.field", new String[]{"dc.title"}); + configurationService.setProperty("duplicate.preview.metadata.field", + new String[]{"dc.date.issued", "dc.subject"}); + + context.turnOffAuthorisationSystem(); + parentCommunity = CommunityBuilder.createCommunity(context).withName("Parent Community").build(); + + col = CollectionBuilder.createCollection(context, parentCommunity) + .withName("Test Collection") + .withWorkflowGroup(1, admin) + .build(); + simpleCol = CollectionBuilder.createCollection(context, parentCommunity) + .withName("Test Collection without Workflow") + .build(); + eperson.setFirstName(context, "first"); + eperson.setLastName(context, "last"); + + anotherEPerson = EPersonBuilder.createEPerson(context) + .withEmail("test-another-user@email.com") + .withNameInMetadata("first", "last") + .withCanLogin(true) + .withLanguage(I18nUtil.getDefaultLocale().getLanguage()) + .withPassword(password) + .build(); + + context.restoreAuthSystemState(); + } + + @Test + public void searchDuplicatesBySearchMethodTest() throws Exception { + String token = getAuthToken(admin.getEmail(), password); + + context.turnOffAuthorisationSystem(); + + // Ingest three example items with slightly different titles + // item2 is 1 edit distance from item1 and item3 + // item1 and item3 are 2 edit distance from each other + WorkspaceItem workspaceItem1 = WorkspaceItemBuilder.createWorkspaceItem(context, simpleCol) + .withTitle(item1Title) + .withSubject(item1Subject) + .withIssueDate(item1IssueDate) + .withAuthor(item1Author) + .withSubmitter(eperson) + .build(); + WorkspaceItem workspaceItem2 = WorkspaceItemBuilder.createWorkspaceItem(context, simpleCol) + .withTitle("Public item II") + .withIssueDate(item2IssueDate) + .withAuthor("Smith, Donald X.") + .withSubject(item2Subject) + .withSubmitter(eperson) + .build(); + WorkspaceItem workspaceItem3 = WorkspaceItemBuilder.createWorkspaceItem(context, simpleCol) + .withTitle(item1Title) + .withTitle("Public item III") + .withIssueDate("2013-10-17") + .withAuthor("Smith, Donald Y.") + .withSubject("ExtraEntry 3") + .withSubmitter(eperson) + .build(); + + XmlWorkflowItem wfi1 = workflowService.start(context, workspaceItem1); + XmlWorkflowItem wfi2 = workflowService.start(context, workspaceItem2); + Item item1 = wfi1.getItem(); + Item item2 = wfi2.getItem(); + + context.restoreAuthSystemState(); + + getClient(token).perform(get("/api/submission/duplicates/search/findByItem?uuid=" + item1.getID())) + .andExpect(status().isOk()) + .andExpect(content().contentType(contentType)) + // Valid duplicates array + .andExpect(jsonPath("$._embedded.potentialDuplicateResources", Matchers.hasSize(1))) + // UUID of only array member matches item2 ID + .andExpect(jsonPath("$._embedded.potentialDuplicateResources[0].uuid") + .value(item2.getID().toString())) + // First item has subject and issue date metadata populated as expected + .andExpect(jsonPath("$._embedded.potentialDuplicateResources[0]" + + ".metadata['dc.subject'][0].value") + .value(item2Subject)) + .andExpect(jsonPath("$._embedded.potentialDuplicateResources[0]" + + ".metadata['dc.date.issued'][0].value") + .value(item2IssueDate)) + // Does NOT have other metadata e.g. author, title + .andExpect(jsonPath("$._embedded.potentialDuplicateResources[0]" + + ".metadata['dc.contributor.author']").doesNotExist()) + .andExpect(jsonPath("$._embedded.potentialDuplicateResources[0]" + + ".metadata['dc.title']").doesNotExist()); + } + + /** + * Duplicates should be accessible via section data. Data should update as comparison value (title) is changed. + * + * @throws Exception + */ + @Test + public void submissionSectionDataTest() throws Exception { + // Test publication + context.turnOffAuthorisationSystem(); + + Collection workspaceCollection = + CollectionBuilder.createCollection(context, parentCommunity, "123456789/test-duplicate-detection") + .withName("Test Collection Workspace").build(); + + // Ingest three example items with slightly different titles + // item2 is 1 edit distance from item1 and item3 + // item1 and item3 are 2 edit distance from each other + Item item1 = ItemBuilder.createItem(context, col) + .withTitle("Submission section test I") // Public item I + .withIssueDate(item1IssueDate) + .withAuthor(item1Author) + .withSubject(item1Subject) + .build(); + Item item2 = ItemBuilder.createItem(context, col) + .withTitle("Submission section test II") + .withIssueDate(item2IssueDate) + .withAuthor("Smith, Donald X.") + .withSubject(item2Subject) + .build(); + Item item3 = ItemBuilder.createItem(context, col) + .withTitle("Submission section test III") + .withIssueDate("2013-10-17") + .withAuthor("Smith, Donald Y.") + .withSubject("ExtraEntry 3") + .build(); + // Create a new workspace item with a similar title to Item 1 (1 edit distance). Reuse other items + // metadata for the rest, as it is not relevant. + WorkspaceItem workspaceItem = WorkspaceItemBuilder.createWorkspaceItem(context, workspaceCollection) + .withTitle("Submission section test 1") + .withSubject(item2Subject) + .withIssueDate(item2IssueDate) + .withAuthor(item1Author) + .withSubmitter(eperson) + .build(); + String submitterToken = getAuthToken(eperson.getEmail(), password); + context.restoreAuthSystemState(); + + getClient(submitterToken).perform(get("/api/submission/workspaceitems/" + workspaceItem.getID())) + .andExpect(status().isOk()) + // The duplicates section is present + .andExpect(jsonPath("$.sections.duplicates").exists()) + // There is a potentialDuplicates array in the section data of size 1 + .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates", Matchers.hasSize(1))) + // The UUID of the first duplicate matches item 1 (which is 1 distance from this new title) + .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates[0].uuid") + .value(item1.getID().toString())) + // Metadata for subject and issue date is populated as expected + .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates[0]" + + ".metadata['dc.subject'][0].value") + .value(item1Subject)) + .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates[0]" + + ".metadata['dc.date.issued'][0].value") + .value(item1IssueDate)) + // Metadata for other metadata fields has not been copied across, as expected + .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates[0]" + + ".metadata['dc.contributor.author']").doesNotExist()) + .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates[0]" + + ".metadata['dc.title']").doesNotExist()); + + List updateOperations = new ArrayList(); + Map value = new HashMap(); + value.put("value", "Submission section test II"); + updateOperations.add(new ReplaceOperation("/sections/traditionalpageone/dc.title/0", value)); + String patchBody = getPatchContent(updateOperations); + getClient(submitterToken).perform(patch("/api/submission/workspaceitems/" + workspaceItem.getID()) + .content(patchBody) + .contentType(MediaType.APPLICATION_JSON_PATCH_JSON)) + .andExpect(status().isOk()) + .andExpect(jsonPath("$.errors").doesNotExist()); + + // Now there should be 3 results + getClient(submitterToken).perform(get("/api/submission/workspaceitems/" + workspaceItem.getID())) + .andExpect(status().isOk()) + // The duplicates section is present + .andExpect(jsonPath("$.sections.duplicates").exists()) + // There is a potentialDuplicates array in the section data (even if empty) + .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates", Matchers.hasSize(3))); + + // Now, change the title to something completely different + updateOperations = new ArrayList<>(); + value.put("value", "Research article"); + updateOperations.add(new ReplaceOperation("/sections/traditionalpageone/dc.title/0", value)); + patchBody = getPatchContent(updateOperations); + getClient(submitterToken).perform(patch("/api/submission/workspaceitems/" + workspaceItem.getID()) + .content(patchBody) + .contentType(MediaType.APPLICATION_JSON_PATCH_JSON)) + .andExpect(status().isOk()) + .andExpect(jsonPath("$.errors").doesNotExist()); + + // Now there should be NO results + getClient(submitterToken).perform(get("/api/submission/workspaceitems/" + workspaceItem.getID())) + .andExpect(status().isOk()) + // The duplicates section is present + .andExpect(jsonPath("$.sections.duplicates").exists()) + // There is a potentialDuplicates array in the section data (even if empty) + .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates", Matchers.hasSize(0))); + } + + /** + * If there is a potential duplicate that is also in submission (workspace item), it will + * ONLY be shown if the current user is the submitter / item owner. + * + * @throws Exception + */ + @Test + public void submissionSectionWorkspaceItemVisibilityTest() throws Exception { + // Test publication + context.turnOffAuthorisationSystem(); + // Create a new collection with handle that maps to teh test-duplicate-detection submission config + col = CollectionBuilder.createCollection(context, parentCommunity, "123456789/test-duplicate-detection") + .withName("Test Collection with Duplicate Detection") + .withWorkflowGroup(1, admin) + .build(); + // Create a new workspace item with a similar title to Item 1 (1 edit distance). Reuse other items + // metadata for the rest, as it is not relevant. + WorkspaceItem workspaceItem = WorkspaceItemBuilder.createWorkspaceItem(context, col) + .withTitle("Unique title") + .withSubject(item1Subject) + .withIssueDate(item1IssueDate) + .withAuthor(item1Author) + .withSubmitter(eperson) + .build(); + WorkspaceItem workspaceItem2 = WorkspaceItemBuilder.createWorkspaceItem(context, col) + .withTitle("Unique title") + .withSubject(item2Subject) + .withIssueDate(item2IssueDate) + .withAuthor(item1Author) + .withSubmitter(eperson) + .build(); + WorkspaceItem workspaceItem3 = WorkspaceItemBuilder.createWorkspaceItem(context, col) + .withTitle("Unique title") + .withSubject("asdf") + .withIssueDate("2000-01-01") + .withAuthor("asdfasf") + .withSubmitter(admin) + .build(); + String submitterToken = getAuthToken(eperson.getEmail(), password); + + context.restoreAuthSystemState(); + + // Even though there are 3 items with the same title, this 'eperson' user should only see 1 duplicate + // as workspaceItem3 is owned by a different submitter, and self-references are skipped + getClient(submitterToken).perform(get("/api/submission/workspaceitems/" + workspaceItem.getID())) + .andExpect(status().isOk()) + // The duplicates section is present + .andExpect(jsonPath("$.sections.duplicates").exists()) + // There is a potentialDuplicates array in the section data of size 1 + .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates", Matchers.hasSize(1))) + // The UUID of the first duplicate matches item 1 (which is 1 distance from this new title) + .andExpect(jsonPath("$.sections.duplicates.potentialDuplicates[0].uuid") + .value(workspaceItem2.getItem().getID().toString())); + } + + /** + * If there is a potential duplicate that is also in workflow, it will + * ONLY be shown if the current user is in a worflow group for step 1, 2, or 3, or is an admin, or otherwise + * has READ permission + * + * @throws Exception + */ + @Test + public void submissionSectionWorkflowItemVisibilityTest() throws Exception { + + context.turnOffAuthorisationSystem(); + // Create a new collection with handle that maps to teh test-duplicate-detection submission config + parentCommunity = CommunityBuilder.createCommunity(context).withName("Parent Community").build(); + Collection workflowCol = CollectionBuilder.createCollection(context, parentCommunity) + .withName("Test Collection with Duplicate Detection") + .withWorkflowGroup("reviewer", admin) + .build(); + + XmlWorkflowItem workflowItem1 = WorkflowItemBuilder.createWorkflowItem(context, workflowCol) + .withTitle("Unique title") + .withSubmitter(anotherEPerson) + .build(); + XmlWorkflowItem workflowItem2 = WorkflowItemBuilder.createWorkflowItem(context, workflowCol) + .withTitle("Unique title") + .withSubmitter(eperson) + .build(); + context.restoreAuthSystemState(); + + context.setCurrentUser(admin); + String reviewerToken = getAuthToken(admin.getEmail(), password); + + // The reviewer should be able to see the workflow item as a potential duplicate of the test item + getClient(reviewerToken).perform(get("/api/submission/duplicates/search/findByItem?uuid=" + + workflowItem1.getItem().getID())) + .andExpect(status().isOk()) + .andExpect(content().contentType(contentType)) + // Valid duplicates array + .andExpect(jsonPath("$._embedded.potentialDuplicateResources", Matchers.hasSize(1))) + // UUID of only array member matches the new workflow item ID + .andExpect(jsonPath("$._embedded.potentialDuplicateResources[0].uuid") + .value(workflowItem2.getItem().getID().toString())); + + // Another random user will NOT see this + getClient(getAuthToken(anotherEPerson.getEmail(), password)) + .perform(get("/api/submission/duplicates/search/findByItem?uuid=" + + workflowItem1.getItem().getID())) + .andExpect(status().isOk()) + .andExpect(content().contentType(contentType)) + // Valid duplicates array + .andExpect(jsonPath("$._embedded.potentialDuplicateResources").doesNotExist()); + } + +} diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/SubmissionDefinitionsControllerIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/SubmissionDefinitionsControllerIT.java index 3b8c87ce5f2c..a6f3999b94ac 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/SubmissionDefinitionsControllerIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/SubmissionDefinitionsControllerIT.java @@ -32,6 +32,11 @@ */ public class SubmissionDefinitionsControllerIT extends AbstractControllerIntegrationTest { + // The total number of expected submission definitions is referred to in multiple tests and assertions as + // is the last page (totalDefinitions - 1) + // This integer should be maintained along with any changes to item-submissions.xml + private static final int totalDefinitions = 11; + @Test public void findAll() throws Exception { //When we call the root endpoint as anonymous user @@ -258,10 +263,10 @@ public void findAllPaginationTest() throws Exception { Matchers.containsString("page=1"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$._links.last.href", Matchers.allOf( Matchers.containsString("/api/config/submissiondefinitions?"), - Matchers.containsString("page=9"), Matchers.containsString("size=1")))) + Matchers.containsString("page=" + (totalDefinitions - 1)), Matchers.containsString("size=1")))) .andExpect(jsonPath("$.page.size", is(1))) - .andExpect(jsonPath("$.page.totalElements", is(10))) - .andExpect(jsonPath("$.page.totalPages", is(10))) + .andExpect(jsonPath("$.page.totalElements", is(totalDefinitions))) + .andExpect(jsonPath("$.page.totalPages", is(totalDefinitions))) .andExpect(jsonPath("$.page.number", is(0))); getClient(tokenAdmin).perform(get("/api/config/submissiondefinitions") @@ -284,10 +289,10 @@ public void findAllPaginationTest() throws Exception { Matchers.containsString("page=1"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$._links.last.href", Matchers.allOf( Matchers.containsString("/api/config/submissiondefinitions?"), - Matchers.containsString("page=9"), Matchers.containsString("size=1")))) + Matchers.containsString("page=" + (totalDefinitions - 1)), Matchers.containsString("size=1")))) .andExpect(jsonPath("$.page.size", is(1))) - .andExpect(jsonPath("$.page.totalElements", is(10))) - .andExpect(jsonPath("$.page.totalPages", is(10))) + .andExpect(jsonPath("$.page.totalElements", is(totalDefinitions))) + .andExpect(jsonPath("$.page.totalPages", is(totalDefinitions))) .andExpect(jsonPath("$.page.number", is(1))); getClient(tokenAdmin).perform(get("/api/config/submissiondefinitions") @@ -310,10 +315,10 @@ public void findAllPaginationTest() throws Exception { Matchers.containsString("page=2"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$._links.last.href", Matchers.allOf( Matchers.containsString("/api/config/submissiondefinitions?"), - Matchers.containsString("page=9"), Matchers.containsString("size=1")))) + Matchers.containsString("page=" + (totalDefinitions - 1)), Matchers.containsString("size=1")))) .andExpect(jsonPath("$.page.size", is(1))) - .andExpect(jsonPath("$.page.totalElements", is(10))) - .andExpect(jsonPath("$.page.totalPages", is(10))) + .andExpect(jsonPath("$.page.totalElements", is(totalDefinitions))) + .andExpect(jsonPath("$.page.totalPages", is(totalDefinitions))) .andExpect(jsonPath("$.page.number", is(2))); getClient(tokenAdmin).perform(get("/api/config/submissiondefinitions") @@ -336,10 +341,10 @@ public void findAllPaginationTest() throws Exception { Matchers.containsString("page=3"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$._links.last.href", Matchers.allOf( Matchers.containsString("/api/config/submissiondefinitions?"), - Matchers.containsString("page=9"), Matchers.containsString("size=1")))) + Matchers.containsString("page=" + (totalDefinitions - 1)), Matchers.containsString("size=1")))) .andExpect(jsonPath("$.page.size", is(1))) - .andExpect(jsonPath("$.page.totalElements", is(10))) - .andExpect(jsonPath("$.page.totalPages", is(10))) + .andExpect(jsonPath("$.page.totalElements", is(totalDefinitions))) + .andExpect(jsonPath("$.page.totalPages", is(totalDefinitions))) .andExpect(jsonPath("$.page.number", is(3))); getClient(tokenAdmin).perform(get("/api/config/submissiondefinitions") @@ -362,10 +367,10 @@ public void findAllPaginationTest() throws Exception { Matchers.containsString("page=4"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$._links.last.href", Matchers.allOf( Matchers.containsString("/api/config/submissiondefinitions?"), - Matchers.containsString("page=9"), Matchers.containsString("size=1")))) + Matchers.containsString("page=" + (totalDefinitions - 1)), Matchers.containsString("size=1")))) .andExpect(jsonPath("$.page.size", is(1))) - .andExpect(jsonPath("$.page.totalElements", is(10))) - .andExpect(jsonPath("$.page.totalPages", is(10))) + .andExpect(jsonPath("$.page.totalElements", is(totalDefinitions))) + .andExpect(jsonPath("$.page.totalPages", is(totalDefinitions))) .andExpect(jsonPath("$.page.number", is(4))); getClient(tokenAdmin).perform(get("/api/config/submissiondefinitions") @@ -388,10 +393,10 @@ public void findAllPaginationTest() throws Exception { Matchers.containsString("page=5"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$._links.last.href", Matchers.allOf( Matchers.containsString("/api/config/submissiondefinitions?"), - Matchers.containsString("page=9"), Matchers.containsString("size=1")))) + Matchers.containsString("page=" + (totalDefinitions - 1)), Matchers.containsString("size=1")))) .andExpect(jsonPath("$.page.size", is(1))) - .andExpect(jsonPath("$.page.totalElements", is(10))) - .andExpect(jsonPath("$.page.totalPages", is(10))) + .andExpect(jsonPath("$.page.totalElements", is(totalDefinitions))) + .andExpect(jsonPath("$.page.totalPages", is(totalDefinitions))) .andExpect(jsonPath("$.page.number", is(5))); getClient(tokenAdmin).perform(get("/api/config/submissiondefinitions") @@ -414,10 +419,10 @@ public void findAllPaginationTest() throws Exception { Matchers.containsString("page=5"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$._links.last.href", Matchers.allOf( Matchers.containsString("/api/config/submissiondefinitions?"), - Matchers.containsString("page=9"), Matchers.containsString("size=1")))) + Matchers.containsString("page=10"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$.page.size", is(1))) - .andExpect(jsonPath("$.page.totalElements", is(10))) - .andExpect(jsonPath("$.page.totalPages", is(10))) + .andExpect(jsonPath("$.page.totalElements", is(totalDefinitions))) + .andExpect(jsonPath("$.page.totalPages", is(totalDefinitions))) .andExpect(jsonPath("$.page.number", is(5))); getClient(tokenAdmin).perform(get("/api/config/submissiondefinitions") @@ -440,10 +445,10 @@ public void findAllPaginationTest() throws Exception { Matchers.containsString("page=6"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$._links.last.href", Matchers.allOf( Matchers.containsString("/api/config/submissiondefinitions?"), - Matchers.containsString("page=9"), Matchers.containsString("size=1")))) + Matchers.containsString("page=10"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$.page.size", is(1))) - .andExpect(jsonPath("$.page.totalElements", is(10))) - .andExpect(jsonPath("$.page.totalPages", is(10))) + .andExpect(jsonPath("$.page.totalElements", is(totalDefinitions))) + .andExpect(jsonPath("$.page.totalPages", is(totalDefinitions))) .andExpect(jsonPath("$.page.number", is(6))); getClient(tokenAdmin).perform(get("/api/config/submissiondefinitions") @@ -466,10 +471,10 @@ public void findAllPaginationTest() throws Exception { Matchers.containsString("page=7"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$._links.last.href", Matchers.allOf( Matchers.containsString("/api/config/submissiondefinitions?"), - Matchers.containsString("page=9"), Matchers.containsString("size=1")))) + Matchers.containsString("page=10"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$.page.size", is(1))) - .andExpect(jsonPath("$.page.totalElements", is(10))) - .andExpect(jsonPath("$.page.totalPages", is(10))) + .andExpect(jsonPath("$.page.totalElements", is(totalDefinitions))) + .andExpect(jsonPath("$.page.totalPages", is(totalDefinitions))) .andExpect(jsonPath("$.page.number", is(7))); getClient(tokenAdmin).perform(get("/api/config/submissiondefinitions") @@ -492,10 +497,10 @@ public void findAllPaginationTest() throws Exception { Matchers.containsString("page=8"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$._links.last.href", Matchers.allOf( Matchers.containsString("/api/config/submissiondefinitions?"), - Matchers.containsString("page=9"), Matchers.containsString("size=1")))) + Matchers.containsString("page=10"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$.page.size", is(1))) - .andExpect(jsonPath("$.page.totalElements", is(10))) - .andExpect(jsonPath("$.page.totalPages", is(10))) + .andExpect(jsonPath("$.page.totalElements", is(totalDefinitions))) + .andExpect(jsonPath("$.page.totalPages", is(totalDefinitions))) .andExpect(jsonPath("$.page.number", is(8))); getClient(tokenAdmin).perform(get("/api/config/submissiondefinitions") @@ -515,10 +520,10 @@ public void findAllPaginationTest() throws Exception { Matchers.containsString("page=9"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$._links.last.href", Matchers.allOf( Matchers.containsString("/api/config/submissiondefinitions?"), - Matchers.containsString("page=9"), Matchers.containsString("size=1")))) + Matchers.containsString("page=10"), Matchers.containsString("size=1")))) .andExpect(jsonPath("$.page.size", is(1))) - .andExpect(jsonPath("$.page.totalElements", is(10))) - .andExpect(jsonPath("$.page.totalPages", is(10))) + .andExpect(jsonPath("$.page.totalElements", is(totalDefinitions))) + .andExpect(jsonPath("$.page.totalPages", is(totalDefinitions))) .andExpect(jsonPath("$.page.number", is(9))); } diff --git a/dspace/config/dspace.cfg b/dspace/config/dspace.cfg index e860d59d1127..98fa28dea664 100644 --- a/dspace/config/dspace.cfg +++ b/dspace/config/dspace.cfg @@ -1647,6 +1647,7 @@ include = ${module_dir}/clamav.cfg include = ${module_dir}/curate.cfg include = ${module_dir}/discovery.cfg include = ${module_dir}/doi-curation.cfg +include = ${module_dir}/duplicate-detection.cfg include = ${module_dir}/google-analytics.cfg include = ${module_dir}/healthcheck.cfg include = ${module_dir}/identifiers.cfg diff --git a/dspace/config/item-submission.xml b/dspace/config/item-submission.xml index 6bf9c3a15616..b5278d2f3d87 100644 --- a/dspace/config/item-submission.xml +++ b/dspace/config/item-submission.xml @@ -247,6 +247,13 @@ coarnotify + + + submit.progressbar.duplicates + org.dspace.app.rest.submit.step.DuplicateDetectionStep + duplicates + + @@ -284,6 +291,11 @@ + + + diff --git a/dspace/config/modules/duplicate-detection.cfg b/dspace/config/modules/duplicate-detection.cfg new file mode 100644 index 000000000000..665790384e07 --- /dev/null +++ b/dspace/config/modules/duplicate-detection.cfg @@ -0,0 +1,35 @@ +### +# Duplicate detection settings +## + +# Enable this feature. Default: false +#duplicate.enable = true + +## +# Normalisation rules. If these are changed, a full index-discovery re-index should be performed to force +# stored comparison values to be updated. +## +# Should the comparison query/index value be normalised for case? Default: true +duplicate.comparison.normalise.lowercase = true +# Should the comparison query/index value be normalised for whitespace? Default: true +# (highly recommended - if this is *not* used, some other placeholder needs to be used to force the value +# to be treated as a single term by Lucene) +duplicate.comparison.normalise.whitespace = true + +# Levenshtein edit distance. Default:0 (eg. Test will match Txst but not Txxt) +# Valid distances are 0, 1, 2 as per Solr documentation. Note that this distance is applied *after* normalisation +# rules above, so capitalisation and whitespace will not count as 'edits' if you have the above rules enabled. +duplicate.comparison.distance = 0 +# DSpace metadata field(s) to use. They will be concatenated before normalisation. +# Repeat the configuration property for multiple fields. +duplicate.comparison.metadata.field = dc.title +#duplicate.comparison.metadata.field = dc.contributor.author + +# Solr field used for storing the indexed comparison string +duplicate.comparison.solr.field = deduplication_keyword + +## Metadata to populate in the potential duplicate +duplicate.preview.metadata.field = dc.title +duplicate.preview.metadata.field = dc.date.issued +duplicate.preview.metadata.field = dc.type +duplicate.preview.metadata.field = dspace.entity.type \ No newline at end of file diff --git a/dspace/config/modules/rest.cfg b/dspace/config/modules/rest.cfg index ef4f985f0d78..3bb620510e59 100644 --- a/dspace/config/modules/rest.cfg +++ b/dspace/config/modules/rest.cfg @@ -58,3 +58,4 @@ rest.properties.exposed = ldn.enabled rest.properties.exposed = ldn.notify.inbox rest.properties.exposed = handle.canonical.prefix rest.properties.exposed = contentreport.enable +rest.properties.exposed = duplicate.enable diff --git a/dspace/config/spring/api/core-services.xml b/dspace/config/spring/api/core-services.xml index 20b5bc74736d..835b4ea0eed7 100644 --- a/dspace/config/spring/api/core-services.xml +++ b/dspace/config/spring/api/core-services.xml @@ -55,6 +55,7 @@ + diff --git a/dspace/config/spring/api/discovery.xml b/dspace/config/spring/api/discovery.xml index 15ae4f45bf28..08e015355834 100644 --- a/dspace/config/spring/api/discovery.xml +++ b/dspace/config/spring/api/discovery.xml @@ -44,6 +44,9 @@ + + +