From 98f58abe40c2bfc2cc2b7f7d2ef25e7c645a9d5c Mon Sep 17 00:00:00 2001 From: Vincenzo Mecca Date: Thu, 11 Jan 2024 11:01:34 +0100 Subject: [PATCH 1/3] [DQ-26] Adds data-quality addon configuration --- .../service/impl/SolrDedupServiceImpl.java | 8 +- .../app/deduplication/utils/DedupUtils.java | 61 +++++---- .../utils/DuplicateInfoList.java | 34 ----- .../app/deduplication/utils/IDedupUtils.java | 79 +++++++++++ .../service/DeduplicationService.java | 11 +- .../DetectPotentialDuplicateValidator.java | 4 +- .../entities/merge-relationship-types.xml | 126 ++++++++++++++++++ dspace-server-webapp/pom.xml | 17 +++ .../DetectDuplicateAddPatchOperation.java | 12 +- .../step/DetectPotentialDuplicateStep.java | 8 +- dspace/config/dspace.cfg | 10 ++ .../entities/merge-relationship-types.xml | 126 ++++++++++++++++++ dspace/config/modules/authority.cfg | 11 ++ dspace/config/modules/rest.cfg | 4 + dspace/config/registries/dspace-types.xml | 8 ++ dspace/modules/additions/pom.xml | 49 ++++--- pom.xml | 21 +++ 17 files changed, 495 insertions(+), 94 deletions(-) delete mode 100644 dspace-api/src/main/java/org/dspace/app/deduplication/utils/DuplicateInfoList.java create mode 100644 dspace-api/src/main/java/org/dspace/app/deduplication/utils/IDedupUtils.java create mode 100644 dspace-api/src/test/data/dspaceFolder/config/entities/merge-relationship-types.xml create mode 100644 dspace/config/entities/merge-relationship-types.xml diff --git a/dspace-api/src/main/java/org/dspace/app/deduplication/service/impl/SolrDedupServiceImpl.java b/dspace-api/src/main/java/org/dspace/app/deduplication/service/impl/SolrDedupServiceImpl.java index cc5c0f2bc861..e12f1100be10 100644 --- a/dspace-api/src/main/java/org/dspace/app/deduplication/service/impl/SolrDedupServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/app/deduplication/service/impl/SolrDedupServiceImpl.java @@ -43,8 +43,8 @@ import org.dspace.app.deduplication.service.DedupService; import org.dspace.app.deduplication.service.SearchDeduplication; import org.dspace.app.deduplication.service.SolrDedupServiceIndexPlugin; -import org.dspace.app.deduplication.utils.DedupUtils; import org.dspace.app.deduplication.utils.DuplicateItemInfo; +import org.dspace.app.deduplication.utils.IDedupUtils; import org.dspace.app.deduplication.utils.Signature; import org.dspace.app.util.Util; import org.dspace.authorize.AuthorizeException; @@ -174,7 +174,7 @@ public class SolrDedupServiceImpl implements DedupService { protected VersioningService versioningService; @Autowired(required = true) - protected DedupUtils dedupUtils; + protected IDedupUtils dedupUtils; /*** * Deduplication status @@ -750,8 +750,8 @@ private void setDuplicateDecision(Context context, Item item, UUID duplicatedIte private List findDuplicationWithDecisions(Context context, Item item) { try { return dedupUtils.getAdminDuplicateByIdAndType(context, item.getID(), item.getType()).stream() - .filter(duplication -> isNotEmpty(duplication.getDecisionTypes())) - .collect(Collectors.toList()); + .filter(duplication -> isNotEmpty(duplication.getDecisionTypes())) + .collect(Collectors.toList()); } catch (SQLException | SearchServiceException e) { throw new RuntimeException(e); } diff --git a/dspace-api/src/main/java/org/dspace/app/deduplication/utils/DedupUtils.java b/dspace-api/src/main/java/org/dspace/app/deduplication/utils/DedupUtils.java index 97bf4a334652..b4c29d8780c9 100644 --- a/dspace-api/src/main/java/org/dspace/app/deduplication/utils/DedupUtils.java +++ b/dspace-api/src/main/java/org/dspace/app/deduplication/utils/DedupUtils.java @@ -47,12 +47,15 @@ import org.dspace.services.ConfigurationService; import org.dspace.util.ItemUtils; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; /** * Utility class used to search for duplicates inside the dedup solr core. * */ -public class DedupUtils { + +@Service +public class DedupUtils implements IDedupUtils { private static Logger log = LogManager.getLogger(DedupUtils.class); @@ -64,11 +67,14 @@ public class DedupUtils { @Autowired(required = true) protected ConfigurationService configurationService; - public DuplicateInfoList findSignatureWithDuplicate(Context context, String signatureType, int resourceType, - int limit, int offset, int rule) throws SearchServiceException, SQLException { + @Override + public Collection findSignatureWithDuplicate(Context context, String signatureType, int resourceType, + int limit, int offset, int rule) + throws SearchServiceException, SQLException { return findPotentialMatch(context, signatureType, resourceType, limit, offset, rule); } + @Override public Map countSignaturesWithDuplicates(String query, int resourceTypeId) throws SearchServiceException { Map results = new HashMap(); @@ -113,6 +119,7 @@ public Map countSignaturesWithDuplicates(String query, int reso return results; } + @Override public Map countSuggestedDuplicate(String query, int resourceTypeId) throws SearchServiceException { Map results = new HashMap(); @@ -241,8 +248,9 @@ private boolean hasStoredDecision(UUID firstItemID, UUID secondItemID, Duplicate return !response.getResults().isEmpty(); } + @Override public boolean matchExist(Context context, UUID itemID, UUID targetItemID, Integer resourceType, - String signatureType, Boolean isInWorkflow) throws SQLException, SearchServiceException { + String signatureType, Boolean isInWorkflow) throws SQLException, SearchServiceException { boolean exist = false; List potentialDuplicates = findDuplicate(context, itemID, resourceType, null, isInWorkflow); for (DuplicateItemInfo match : potentialDuplicates) { @@ -256,6 +264,7 @@ public boolean matchExist(Context context, UUID itemID, UUID targetItemID, Integ } + @Override public boolean rejectAdminDups(Context context, UUID firstId, UUID secondId, Integer type) throws SQLException, AuthorizeException { if (firstId == secondId) { @@ -309,6 +318,7 @@ public boolean rejectAdminDups(Context context, UUID firstId, UUID secondId, Int * @throws AuthorizeException * @throws SearchServiceException */ + @Override public boolean rejectAdminDups(Context context, UUID itemID, String signatureType, int resourceType) throws SQLException, AuthorizeException, SearchServiceException { @@ -336,6 +346,7 @@ public boolean rejectAdminDups(Context context, UUID itemID, String signatureTyp } + @Override public void rejectAdminDups(Context context, List items, String signatureID) throws SQLException, AuthorizeException, SearchServiceException { for (DSpaceObject item : items) { @@ -343,8 +354,9 @@ public void rejectAdminDups(Context context, List items, String si } } + @Override public void verify(Context context, int dedupId, UUID firstId, UUID secondId, int type, boolean toFix, String note, - boolean check) throws SQLException, AuthorizeException { + boolean check) throws SQLException, AuthorizeException { UUID[] sortedIds = new UUID[] { firstId, secondId }; Arrays.sort(sortedIds); firstId = sortedIds[0]; @@ -417,8 +429,9 @@ private Deduplication retrieveDuplicationRow(Context context, UUID firstId, UUID return row; } + @Override public void setDuplicateDecision(Context context, UUID firstId, UUID secondId, Integer type, - DuplicateDecisionObjectRest decisionObject) + DuplicateDecisionObjectRest decisionObject) throws AuthorizeException, SQLException, SearchServiceException { if (hasAuthorization(context, firstId, secondId)) { @@ -478,6 +491,7 @@ public void setDuplicateDecision(Context context, UUID firstId, UUID secondId, I } } + @Override public boolean validateDecision(DuplicateDecisionObjectRest decisionObject) { boolean valid = false; @@ -500,8 +514,9 @@ public boolean validateDecision(DuplicateDecisionObjectRest decisionObject) { return valid; } + @Override public boolean rejectDups(Context context, UUID firstId, UUID secondId, Integer type, boolean notDupl, String note, - boolean check) throws SQLException { + boolean check) throws SQLException { UUID[] sortedIds = new UUID[] { firstId, secondId }; Arrays.sort(sortedIds); Deduplication row = null; @@ -547,11 +562,9 @@ public boolean rejectDups(Context context, UUID firstId, UUID secondId, Integer return false; } - private DuplicateInfoList findPotentialMatch(Context context, String signatureType, int resourceType, int start, + private List findPotentialMatch(Context context, String signatureType, int resourceType, int start, int rows, int rule) throws SearchServiceException, SQLException { - DuplicateInfoList dil = new DuplicateInfoList(); - if (StringUtils.isNotEmpty(signatureType)) { if (!StringUtils.contains(signatureType, "_signature")) { signatureType += "_signature"; @@ -594,7 +607,7 @@ private DuplicateInfoList findPotentialMatch(Context context, String signatureTy FacetField facetField = responseFacet.getFacetField(signatureType); - List result = new ArrayList(); + List result = new ArrayList<>(); int index = 0; for (Count facetHit : facetField.getValues()) { @@ -653,10 +666,7 @@ private DuplicateInfoList findPotentialMatch(Context context, String signatureTy } index++; } - - dil.setDsi(result); - dil.setSize(facetField.getValues().size()); - return dil; + return result; } private DuplicateSignatureInfo findPotentialMatchByID(Context context, String signatureType, int resourceType, @@ -699,38 +709,45 @@ private DuplicateSignatureInfo findPotentialMatchByID(Context context, String si return dsi; } + @Override public DedupService getDedupService() { return dedupService; } + @Override public void setDedupService(DedupService dedupService) { this.dedupService = dedupService; } + @Override public void commit() { dedupService.commit(); } + @Override public List getDuplicateByIDandType(Context context, UUID itemID, int typeID, - boolean isInWorkflow) throws SQLException, SearchServiceException { + boolean isInWorkflow) + throws SQLException, SearchServiceException { return getDuplicateByIdAndTypeAndSignatureType(context, itemID, typeID, null, isInWorkflow); } + @Override public List getDuplicateByIdAndTypeAndSignatureType(Context context, UUID itemID, int typeID, - String signatureType, boolean isInWorkflow) throws SQLException, SearchServiceException { + String signatureType, boolean isInWorkflow) + throws SQLException, SearchServiceException { return findDuplicate(context, itemID, typeID, signatureType, isInWorkflow); } + @Override public List getAdminDuplicateByIdAndType(Context context, UUID itemID, int typeID) throws SQLException, SearchServiceException { return findDuplicate(context, itemID, typeID, null, null); } - public DuplicateInfoList findSuggestedDuplicate(Context context, int resourceType, int start, int rows) + @Override + public List findSuggestedDuplicate(Context context, int resourceType, int start, int rows) throws SearchServiceException, SQLException { - DuplicateInfoList dil = new DuplicateInfoList(); - SolrQuery solrQueryInternal = new SolrQuery(); solrQueryInternal.setQuery(SolrDedupServiceImpl.SUBQUERY_NOT_IN_REJECTED); @@ -774,8 +791,6 @@ public DuplicateInfoList findSuggestedDuplicate(Context context, int resourceTyp index++; } - dil.setDsi(result); - dil.setSize(solrDocumentList.getNumFound()); - return dil; + return result; } } diff --git a/dspace-api/src/main/java/org/dspace/app/deduplication/utils/DuplicateInfoList.java b/dspace-api/src/main/java/org/dspace/app/deduplication/utils/DuplicateInfoList.java deleted file mode 100644 index 3935944ffa77..000000000000 --- a/dspace-api/src/main/java/org/dspace/app/deduplication/utils/DuplicateInfoList.java +++ /dev/null @@ -1,34 +0,0 @@ -/** - * The contents of this file are subject to the license and copyright - * detailed in the LICENSE and NOTICE files at the root of the source - * tree and available online at - * - * http://www.dspace.org/license/ - */ -package org.dspace.app.deduplication.utils; - -import java.util.List; - -public class DuplicateInfoList { - - private long size; - - private List dsi; - - public long getSize() { - return size; - } - - public void setSize(long size) { - this.size = size; - } - - public List getDsi() { - return dsi; - } - - public void setDsi(List dsi) { - this.dsi = dsi; - } - -} \ No newline at end of file diff --git a/dspace-api/src/main/java/org/dspace/app/deduplication/utils/IDedupUtils.java b/dspace-api/src/main/java/org/dspace/app/deduplication/utils/IDedupUtils.java new file mode 100644 index 000000000000..774735eaac3a --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/app/deduplication/utils/IDedupUtils.java @@ -0,0 +1,79 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.deduplication.utils; + +import java.sql.SQLException; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.UUID; + +import org.dspace.app.deduplication.model.DuplicateDecisionObjectRest; +import org.dspace.app.deduplication.service.DedupService; +import org.dspace.authorize.AuthorizeException; +import org.dspace.content.DSpaceObject; +import org.dspace.core.Context; +import org.dspace.discovery.SearchServiceException; + +/** + * @author Vincenzo Mecca (vins01-4science - vincenzo.mecca at 4science.com) + **/ +public interface IDedupUtils { + Collection findSignatureWithDuplicate(Context context, String signatureType, int resourceType, + int limit, int offset, int rule) throws SearchServiceException, + SQLException; + + Map countSignaturesWithDuplicates(String query, int resourceTypeId) + throws SearchServiceException; + + Map countSuggestedDuplicate(String query, int resourceTypeId) + throws SearchServiceException; + + boolean matchExist(Context context, UUID itemID, UUID targetItemID, Integer resourceType, + String signatureType, Boolean isInWorkflow) throws SQLException, SearchServiceException; + + boolean rejectAdminDups(Context context, UUID firstId, UUID secondId, Integer type) + throws SQLException, AuthorizeException; + + boolean rejectAdminDups(Context context, UUID itemID, String signatureType, int resourceType) + throws SQLException, AuthorizeException, SearchServiceException; + + void rejectAdminDups(Context context, List items, String signatureID) + throws SQLException, AuthorizeException, SearchServiceException; + + void verify(Context context, int dedupId, UUID firstId, UUID secondId, int type, boolean toFix, String note, + boolean check) throws SQLException, AuthorizeException; + + void setDuplicateDecision(Context context, UUID firstId, UUID secondId, Integer type, + DuplicateDecisionObjectRest decisionObject) + throws AuthorizeException, SQLException, SearchServiceException; + + boolean validateDecision(DuplicateDecisionObjectRest decisionObject); + + boolean rejectDups(Context context, UUID firstId, UUID secondId, Integer type, boolean notDupl, String note, + boolean check) throws SQLException; + + DedupService getDedupService(); + + void setDedupService(DedupService dedupService); + + void commit(); + + List getDuplicateByIDandType(Context context, UUID itemID, int typeID, + boolean isInWorkflow) throws SQLException, SearchServiceException; + + List getDuplicateByIdAndTypeAndSignatureType(Context context, UUID itemID, int typeID, + String signatureType, boolean isInWorkflow) + throws SQLException, SearchServiceException; + + List getAdminDuplicateByIdAndType(Context context, UUID itemID, int typeID) + throws SQLException, SearchServiceException; + + Collection findSuggestedDuplicate(Context context, int resourceType, int start, int rows) + throws SearchServiceException, SQLException; +} diff --git a/dspace-api/src/main/java/org/dspace/deduplication/service/DeduplicationService.java b/dspace-api/src/main/java/org/dspace/deduplication/service/DeduplicationService.java index ab36dc46b4c1..b2826998cccc 100644 --- a/dspace-api/src/main/java/org/dspace/deduplication/service/DeduplicationService.java +++ b/dspace-api/src/main/java/org/dspace/deduplication/service/DeduplicationService.java @@ -15,6 +15,7 @@ import org.dspace.deduplication.Deduplication; public interface DeduplicationService { + /** * Create a new Deduplication object * @@ -23,7 +24,7 @@ public interface DeduplicationService { * @throws SQLException An exception that provides information on a database * access error or other errors. */ - public Deduplication create(Context context, Deduplication dedup) throws SQLException; + Deduplication create(Context context, Deduplication dedup) throws SQLException; /*** * Return all deduplication objects @@ -35,7 +36,7 @@ public interface DeduplicationService { * @throws SQLException An exception that provides information on a database * access error or other errors. */ - public List findAll(Context context, int pageSize, int offset) throws SQLException; + List findAll(Context context, int pageSize, int offset) throws SQLException; /** * Count all accounts. @@ -55,11 +56,11 @@ public interface DeduplicationService { * @throws SQLException An exception that provides information on a database * access error or other errors. */ - public void update(Context context, Deduplication dedup) throws SQLException; + void update(Context context, Deduplication dedup) throws SQLException; - public List getDeduplicationByFirstAndSecond(Context context, UUID firstId, UUID secondId) + List getDeduplicationByFirstAndSecond(Context context, UUID firstId, UUID secondId) throws SQLException; - public Deduplication uniqueDeduplicationByFirstAndSecond(Context context, UUID firstId, UUID secondId) + Deduplication uniqueDeduplicationByFirstAndSecond(Context context, UUID firstId, UUID secondId) throws SQLException; } diff --git a/dspace-api/src/main/java/org/dspace/validation/DetectPotentialDuplicateValidator.java b/dspace-api/src/main/java/org/dspace/validation/DetectPotentialDuplicateValidator.java index 8a9a7aba10bd..4b4e237a3772 100644 --- a/dspace-api/src/main/java/org/dspace/validation/DetectPotentialDuplicateValidator.java +++ b/dspace-api/src/main/java/org/dspace/validation/DetectPotentialDuplicateValidator.java @@ -21,8 +21,8 @@ import org.dspace.app.deduplication.model.DuplicateDecisionType; import org.dspace.app.deduplication.model.DuplicateDecisionValue; -import org.dspace.app.deduplication.utils.DedupUtils; import org.dspace.app.deduplication.utils.DuplicateItemInfo; +import org.dspace.app.deduplication.utils.IDedupUtils; import org.dspace.app.util.SubmissionStepConfig; import org.dspace.content.DSpaceObject; import org.dspace.content.InProgressSubmission; @@ -47,7 +47,7 @@ public class DetectPotentialDuplicateValidator implements SubmissionStepValidato private static final String ERROR_VALIDATION_DUPLICATION = "error.validation.detect-duplicate"; @Autowired - private DedupUtils dedupUtils; + private IDedupUtils dedupUtils; @Autowired private ItemService itemService; diff --git a/dspace-api/src/test/data/dspaceFolder/config/entities/merge-relationship-types.xml b/dspace-api/src/test/data/dspaceFolder/config/entities/merge-relationship-types.xml new file mode 100644 index 000000000000..8db947319542 --- /dev/null +++ b/dspace-api/src/test/data/dspaceFolder/config/entities/merge-relationship-types.xml @@ -0,0 +1,126 @@ + + + + + + Project + Project + isMergedFromItem + isMergedInItem + + 0 + + + 0 + + + + Person + Person + isMergedFromItem + isMergedInItem + + 0 + + + 0 + + + + Funding + Funding + isMergedFromItem + isMergedInItem + + 0 + + + 0 + + + + OrgUnit + OrgUnit + isMergedFromItem + isMergedInItem + + 0 + + + 0 + + + + Journal + Journal + isMergedFromItem + isMergedInItem + + 0 + + + 0 + + + + Publication + Publication + isMergedFromItem + isMergedInItem + + 0 + + + 0 + + + + Product + Product + isMergedFromItem + isMergedInItem + + 0 + + + 0 + + + + Patent + Patent + isMergedFromItem + isMergedInItem + + 0 + + + 0 + + + + Event + Event + isMergedFromItem + isMergedInItem + + 0 + + + 0 + + + + Equipment + Equipment + isMergedFromItem + isMergedInItem + + 0 + + + 0 + + + + \ No newline at end of file diff --git a/dspace-server-webapp/pom.xml b/dspace-server-webapp/pom.xml index 0a0b394d576a..c83aa538698f 100644 --- a/dspace-server-webapp/pom.xml +++ b/dspace-server-webapp/pom.xml @@ -350,6 +350,23 @@ + + + addon-dataquality + + + dq.on + + + + + it.4science.dspace + addon-dataquality + ${addon-dataquality.version} + jar + + + diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/factory/impl/DetectDuplicateAddPatchOperation.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/factory/impl/DetectDuplicateAddPatchOperation.java index 4561a8a9c807..819bba0c1423 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/factory/impl/DetectDuplicateAddPatchOperation.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/factory/impl/DetectDuplicateAddPatchOperation.java @@ -12,7 +12,7 @@ import org.dspace.app.deduplication.model.DuplicateDecisionObjectRest; import org.dspace.app.deduplication.model.DuplicateDecisionType; -import org.dspace.app.deduplication.utils.DedupUtils; +import org.dspace.app.deduplication.utils.IDedupUtils; import org.dspace.app.rest.exception.UnprocessableEntityException; import org.dspace.app.rest.model.patch.LateObjectEvaluator; import org.dspace.content.InProgressSubmission; @@ -43,7 +43,7 @@ void add(Context context, HttpServletRequest currentRequest, InProgressSubmissio String.format("The specified path '%s' is not valid", getAbsolutePath(path))); } - DedupUtils dedupUtils = new DSpace().getServiceManager().getServiceByName("dedupUtils", DedupUtils.class); + IDedupUtils IDedupUtils = new DSpace().getServiceManager().getServiceByName("dedupUtils", IDedupUtils.class); DuplicateDecisionObjectRest decisionObject = evaluateSingleObject((LateObjectEvaluator) value); UUID currentItemID = source.getItem().getID(); @@ -98,7 +98,7 @@ void add(Context context, HttpServletRequest currentRequest, InProgressSubmissio // generate UnprocessableEntityException if decisionObject is invalid try { - if (!dedupUtils.validateDecision(decisionObject)) { + if (!IDedupUtils.validateDecision(decisionObject)) { throw new UnprocessableEntityException( String.format("The specified decision %s is not valid", decisionObject.getValue())); } @@ -106,13 +106,13 @@ void add(Context context, HttpServletRequest currentRequest, InProgressSubmissio throw new UnprocessableEntityException(String.format("The specified decision %s is not valid", subPath)); } - if (!dedupUtils.matchExist(context, currentItemID, duplicateItemID, resourceType, null, isInWorkflow)) { + if (!IDedupUtils.matchExist(context, currentItemID, duplicateItemID, resourceType, null, isInWorkflow)) { throw new UnprocessableEntityException( String.format("Cannot find any duplicate match related to Item %s", duplicateItemID)); } - dedupUtils.setDuplicateDecision(context, source.getItem().getID(), duplicateItemID, source.getItem().getType(), - decisionObject); + IDedupUtils.setDuplicateDecision(context, source.getItem().getID(), duplicateItemID, source.getItem().getType(), + decisionObject); } diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/step/DetectPotentialDuplicateStep.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/step/DetectPotentialDuplicateStep.java index d7ad62153bcc..90f72afe7f07 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/step/DetectPotentialDuplicateStep.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/submit/step/DetectPotentialDuplicateStep.java @@ -15,8 +15,8 @@ import javax.servlet.http.HttpServletRequest; import org.dspace.app.deduplication.model.DuplicateDecisionType; -import org.dspace.app.deduplication.utils.DedupUtils; import org.dspace.app.deduplication.utils.DuplicateItemInfo; +import org.dspace.app.deduplication.utils.IDedupUtils; import org.dspace.app.rest.converter.factory.ConverterServiceFactoryImpl; import org.dspace.app.rest.model.MetadataValueRest; import org.dspace.app.rest.model.patch.Operation; @@ -54,14 +54,14 @@ public class DetectPotentialDuplicateStep extends AbstractProcessingStep { public DataDetectDuplicate getData(SubmissionService submissionService, InProgressSubmission obj, SubmissionStepConfig config) throws Exception { - DedupUtils dedupUtils = new DSpace().getServiceManager().getServiceByName("dedupUtils", DedupUtils.class); + IDedupUtils IDedupUtils = new DSpace().getServiceManager().getServiceByName("dedupUtils", IDedupUtils.class); UUID itemID = obj.getItem().getID(); int typeID = obj.getItem().getType(); boolean check = !(obj instanceof WorkspaceItem); - List potentialDuplicates = dedupUtils.getDuplicateByIDandType(getContext(), itemID, typeID, - check); + List potentialDuplicates = IDedupUtils.getDuplicateByIDandType(getContext(), itemID, typeID, + check); Map matches = processPotentialDuplicates(itemID, check, potentialDuplicates); DataDetectDuplicate result = new DataDetectDuplicate(); diff --git a/dspace/config/dspace.cfg b/dspace/config/dspace.cfg index 05ebdd660679..477d14e207e2 100644 --- a/dspace/config/dspace.cfg +++ b/dspace/config/dspace.cfg @@ -1926,6 +1926,16 @@ bulk-export.limit.notLoggedIn = 0 # has 2 threads on which schedule events system-event.thread.size = 2 +#------------------------------------------------------------------# +#------------DEDUPLICATION / DATAQUALITY CONFIGURATIONS------------# +#------------------------------------------------------------------# +# # +# Configurations for the Deduplication / DataQuality features # +# # +#------------------------------------------------------------------# +# metadata here listed will be excluded by merge tool logic +merge.excluded-metadata = dc.description.provenance + # Load default module configs # ---------------------------- # To exclude a module configuration, simply comment out its "include" statement. diff --git a/dspace/config/entities/merge-relationship-types.xml b/dspace/config/entities/merge-relationship-types.xml new file mode 100644 index 000000000000..8db947319542 --- /dev/null +++ b/dspace/config/entities/merge-relationship-types.xml @@ -0,0 +1,126 @@ + + + + + + Project + Project + isMergedFromItem + isMergedInItem + + 0 + + + 0 + + + + Person + Person + isMergedFromItem + isMergedInItem + + 0 + + + 0 + + + + Funding + Funding + isMergedFromItem + isMergedInItem + + 0 + + + 0 + + + + OrgUnit + OrgUnit + isMergedFromItem + isMergedInItem + + 0 + + + 0 + + + + Journal + Journal + isMergedFromItem + isMergedInItem + + 0 + + + 0 + + + + Publication + Publication + isMergedFromItem + isMergedInItem + + 0 + + + 0 + + + + Product + Product + isMergedFromItem + isMergedInItem + + 0 + + + 0 + + + + Patent + Patent + isMergedFromItem + isMergedInItem + + 0 + + + 0 + + + + Event + Event + isMergedFromItem + isMergedInItem + + 0 + + + 0 + + + + Equipment + Equipment + isMergedFromItem + isMergedInItem + + 0 + + + 0 + + + + \ No newline at end of file diff --git a/dspace/config/modules/authority.cfg b/dspace/config/modules/authority.cfg index 037cf01e5d79..ecc9e858df88 100644 --- a/dspace/config/modules/authority.cfg +++ b/dspace/config/modules/authority.cfg @@ -280,3 +280,14 @@ choices.plugin.dc.type = ControlledVocabularyAuthority # DSpace-CRIS stores by default the authority of controlled vocabularies vocabulary.plugin.authority.store = true + +#------------------------------------------------------------------# +#------------DEDUPLICATION / DATAQUALITY CONFIGURATIONS------------# +#------------------------------------------------------------------# +org.dspace.content.authority.ItemAuthority = OrgUnitAuthority +# AuthorStrictMatchAuthority configuration +cris.ItemAuthority.AuthorStrictMatchAuthority.forceInternalName = false +# AuthorCoarseMatchAuthority configuration +cris.ItemAuthority.AuthorCoarseMatchAuthority.forceInternalName = false +choices.plugin.green.override.dc.contributor.author = AuthorStrictMatchAuthority +choices.plugin.orange.override.dc.contributor.author = AuthorCoarseMatchAuthority \ No newline at end of file diff --git a/dspace/config/modules/rest.cfg b/dspace/config/modules/rest.cfg index ab28cd13cc25..faf7b248046b 100644 --- a/dspace/config/modules/rest.cfg +++ b/dspace/config/modules/rest.cfg @@ -95,6 +95,10 @@ rest.properties.exposed = identifiers.item-status.register-doi rest.properties.exposed = authentication-password.domain.valid rest.properties.exposed = request.item.type rest.properties.exposed = handle.canonical.prefix +#------------------------------------------------------------------# +#------------DEDUPLICATION / DATAQUALITY CONFIGURATIONS------------# +#------------------------------------------------------------------# +rest.properties.exposed = merge.excluded-metadata #---------------------------------------------------------------# # These configs are used by the deprecated REST (v4-6) module # diff --git a/dspace/config/registries/dspace-types.xml b/dspace/config/registries/dspace-types.xml index 861dc67a816a..2cc899dd0cda 100644 --- a/dspace/config/registries/dspace-types.xml +++ b/dspace/config/registries/dspace-types.xml @@ -122,4 +122,12 @@ + + + dspace + merge + target-uri + stores the value of uri of target item + + diff --git a/dspace/modules/additions/pom.xml b/dspace/modules/additions/pom.xml index 7de65e9ca49e..f5ae804f8b8a 100644 --- a/dspace/modules/additions/pom.xml +++ b/dspace/modules/additions/pom.xml @@ -249,22 +249,39 @@ - - addon-analytics - - - analytics.on - - - - - it.4science.dspace - addon-analytics-api - ${addon-analytics.version} - jar - - - + + addon-analytics + + + analytics.on + + + + + it.4science.dspace + addon-analytics-api + ${addon-analytics.version} + jar + + + + + + addon-dataquality + + + dq.on + + + + + it.4science.dspace + addon-dataquality + ${addon-dataquality.version} + jar + + + diff --git a/pom.xml b/pom.xml index 09b01d200b50..c7e823636c09 100644 --- a/pom.xml +++ b/pom.xml @@ -65,6 +65,7 @@ [CRIS-7.1-SNAPSHOT,CRIS-8.0-SNAPSHOT) [CRIS-7.0-SNAPSHOT,CRIS-8.0-SNAPSHOT) [CRIS-7.0-SNAPSHOT,CRIS-8.0-SNAPSHOT) + cris-2023.02.00 UTF-8 @@ -939,6 +940,26 @@ + + + addon-dataquality + + false + + + + + it.4science.dspace + addon-dataquality + ${addon-dataquality.version} + jar + + + + + From 8d73d806644711eca5d8a89cb013a8215b78b477 Mon Sep 17 00:00:00 2001 From: Vincenzo Mecca Date: Thu, 8 Feb 2024 17:14:27 +0100 Subject: [PATCH 2/3] [DQ-26] Refactors configurations --- dspace/config/dspace.cfg | 10 ---------- dspace/config/modules/authority.cfg | 13 +------------ dspace/config/modules/deduplication.cfg | 10 ++++++++++ pom.xml | 2 +- 4 files changed, 12 insertions(+), 23 deletions(-) diff --git a/dspace/config/dspace.cfg b/dspace/config/dspace.cfg index 477d14e207e2..05ebdd660679 100644 --- a/dspace/config/dspace.cfg +++ b/dspace/config/dspace.cfg @@ -1926,16 +1926,6 @@ bulk-export.limit.notLoggedIn = 0 # has 2 threads on which schedule events system-event.thread.size = 2 -#------------------------------------------------------------------# -#------------DEDUPLICATION / DATAQUALITY CONFIGURATIONS------------# -#------------------------------------------------------------------# -# # -# Configurations for the Deduplication / DataQuality features # -# # -#------------------------------------------------------------------# -# metadata here listed will be excluded by merge tool logic -merge.excluded-metadata = dc.description.provenance - # Load default module configs # ---------------------------- # To exclude a module configuration, simply comment out its "include" statement. diff --git a/dspace/config/modules/authority.cfg b/dspace/config/modules/authority.cfg index ecc9e858df88..86d0248060e5 100644 --- a/dspace/config/modules/authority.cfg +++ b/dspace/config/modules/authority.cfg @@ -279,15 +279,4 @@ authority.controlled.dc.type = true choices.plugin.dc.type = ControlledVocabularyAuthority # DSpace-CRIS stores by default the authority of controlled vocabularies -vocabulary.plugin.authority.store = true - -#------------------------------------------------------------------# -#------------DEDUPLICATION / DATAQUALITY CONFIGURATIONS------------# -#------------------------------------------------------------------# -org.dspace.content.authority.ItemAuthority = OrgUnitAuthority -# AuthorStrictMatchAuthority configuration -cris.ItemAuthority.AuthorStrictMatchAuthority.forceInternalName = false -# AuthorCoarseMatchAuthority configuration -cris.ItemAuthority.AuthorCoarseMatchAuthority.forceInternalName = false -choices.plugin.green.override.dc.contributor.author = AuthorStrictMatchAuthority -choices.plugin.orange.override.dc.contributor.author = AuthorCoarseMatchAuthority \ No newline at end of file +vocabulary.plugin.authority.store = true \ No newline at end of file diff --git a/dspace/config/modules/deduplication.cfg b/dspace/config/modules/deduplication.cfg index 71aa5c8840d7..c2f42dc2902f 100644 --- a/dspace/config/modules/deduplication.cfg +++ b/dspace/config/modules/deduplication.cfg @@ -27,3 +27,13 @@ deduplication.tool.duplicatechecker.ignorewithdrawn = true # only reported section don't check submitter suggestion duplicate deduplication.tool.duplicatechecker.ignore.submitter.suggestion = true + +#------------------------------------------------------------------# +#------------DEDUPLICATION / DATAQUALITY CONFIGURATIONS------------# +#------------------------------------------------------------------# +# # +# Configurations for the Deduplication / DataQuality features # +# # +#------------------------------------------------------------------# +# metadata here listed will be excluded by merge tool logic +merge.excluded-metadata = dc.description.provenance diff --git a/pom.xml b/pom.xml index c7e823636c09..8ec60b3a99df 100644 --- a/pom.xml +++ b/pom.xml @@ -65,7 +65,7 @@ [CRIS-7.1-SNAPSHOT,CRIS-8.0-SNAPSHOT) [CRIS-7.0-SNAPSHOT,CRIS-8.0-SNAPSHOT) [CRIS-7.0-SNAPSHOT,CRIS-8.0-SNAPSHOT) - cris-2023.02.00 + [CRIS-2023.02-SNAPSHOT,CRIS-2023.03-SNAPSHOT) UTF-8 From f3e266878b1a82b7eb70f2b3bf745080afe43bf4 Mon Sep 17 00:00:00 2001 From: Vincenzo Mecca Date: Fri, 9 Feb 2024 13:22:52 +0100 Subject: [PATCH 3/3] [DQ-26] Addresses changes for a new dataquality-types file --- dspace/config/dspace.cfg | 1 + .../config/registries/dataquality-types.xml | 19 +++++++++++++++++++ dspace/config/registries/dspace-types.xml | 8 -------- 3 files changed, 20 insertions(+), 8 deletions(-) create mode 100644 dspace/config/registries/dataquality-types.xml diff --git a/dspace/config/dspace.cfg b/dspace/config/dspace.cfg index 05ebdd660679..98a7aba0aa5c 100644 --- a/dspace/config/dspace.cfg +++ b/dspace/config/dspace.cfg @@ -1011,6 +1011,7 @@ registry.metadata.load = openaire4-types.xml registry.metadata.load = dspace-types.xml registry.metadata.load = iiif-types.xml registry.metadata.load = bitstream-types.xml +registry.metadata.load = dataquality-types.xml #---------------------------------------------------------------# #-----------------UI-Related CONFIGURATIONS---------------------# diff --git a/dspace/config/registries/dataquality-types.xml b/dspace/config/registries/dataquality-types.xml new file mode 100644 index 000000000000..2658d701dc29 --- /dev/null +++ b/dspace/config/registries/dataquality-types.xml @@ -0,0 +1,19 @@ + + + + DataQuality Addon metadata types + + + + dq + http://dspace.org/dq + + + + dq + merge + target-uri + stores the value of uri of target item + + + \ No newline at end of file diff --git a/dspace/config/registries/dspace-types.xml b/dspace/config/registries/dspace-types.xml index 2cc899dd0cda..861dc67a816a 100644 --- a/dspace/config/registries/dspace-types.xml +++ b/dspace/config/registries/dspace-types.xml @@ -122,12 +122,4 @@ - - - dspace - merge - target-uri - stores the value of uri of target item - -