Skip to content

Commit

Permalink
datacite import plugin for Project entities
Browse files Browse the repository at this point in the history
import service for projects extending the existing datacite import plugin implementation and basic import mapping matching the current fields in the submission forms and the existing transformators/extractors for the metadata
DSpace#9636
  • Loading branch information
floriangantner committed Dec 30, 2024
1 parent f6ce766 commit 0e82053
Show file tree
Hide file tree
Showing 12 changed files with 309 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,16 @@ public class DataCiteImportMetadataSourceServiceImpl
@Autowired
private ConfigurationService configurationService;

private String entityFilterQuery;

public String getEntityFilterQuery() {
return entityFilterQuery;
}

public void setEntityFilterQuery(String entityFilterQuery) {
this.entityFilterQuery = entityFilterQuery;
}

@Override
public String getImportSource() {
return "datacite";
Expand Down Expand Up @@ -80,6 +90,9 @@ public int getRecordsCount(String query) throws MetadataSourceException {
if (StringUtils.isBlank(id)) {
id = query;
}
if (StringUtils.isNotBlank(getEntityFilterQuery())) {
id = id + " " + getEntityFilterQuery();
}
uriParameters.put("query", id);
uriParameters.put("page[size]", "1");
int timeoutMs = configurationService.getIntProperty("datacite.timeout", 180000);
Expand Down Expand Up @@ -118,6 +131,9 @@ public Collection<ImportRecord> getRecords(String query, int start, int count) t
if (StringUtils.isBlank(id)) {
id = query;
}
if (StringUtils.isNotBlank(getEntityFilterQuery())) {
id = id + " " + getEntityFilterQuery();
}
uriParameters.put("query", id);
// start = current dspace page / datacite page number starting with 1
// dspace rounds up/down to the next configured pagination settings.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.importer.external.datacite;

import java.util.Map;

import jakarta.annotation.Resource;
import org.dspace.importer.external.metadatamapping.AbstractMetadataFieldMapping;

/**
* An implementation of {@link AbstractMetadataFieldMapping}
* Responsible for defining the mapping of the datacite metadatum fields on the DSpace metadatum fields
*
* @author Pasquale Cavallo (pasquale.cavallo at 4science dot it)
* @author Florian Gantner ([email protected])
*/
public class DataCiteProjectFieldMapping extends AbstractMetadataFieldMapping {

/**
* Defines which metadatum is mapped on which metadatum. Note that while the key must be unique it
* only matters here for postprocessing of the value. The mapped MetadatumContributor has full control over
* what metadatafield is generated.
*
* @param metadataFieldMap The map containing the link between retrieve metadata and metadata that will be set to
* the item.
*/
@Override
@Resource(name = "dataciteProjectMetadataFieldMap")
public void setMetadataFieldMap(Map metadataFieldMap) {
super.setMetadataFieldMap(metadataFieldMap);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.importer.external.datacite;

/**
* Implements a data source for querying Datacite for specific for Project resourceTypes.
* This inherits the methods of DataCiteImportMetadataSourceServiceImpl
*
* @author Florian Gantner ([email protected])
*
*/
public class DataCiteProjectImportMetadataSourceServiceImpl
extends DataCiteImportMetadataSourceServiceImpl {

@Override
public String getImportSource() {
return "dataciteProject";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,21 @@
<bean id="DataCiteImportService"
class="org.dspace.importer.external.datacite.DataCiteImportMetadataSourceServiceImpl" scope="singleton">
<property name="metadataFieldMapping" ref="DataCiteMetadataFieldMapping"/>
<property name="entityFilterQuery" value="${datacite.publicationimport.entityfilterquery}" />
</bean>
<bean id="DataCiteMetadataFieldMapping"
class="org.dspace.importer.external.datacite.DataCiteFieldMapping">
</bean>

<bean id="DataCiteProjectImportService"
class="org.dspace.importer.external.datacite.DataCiteProjectImportMetadataSourceServiceImpl" scope="singleton">
<property name="metadataFieldMapping" ref="DataCiteProjectMetadataFieldMapping"/>
<property name="entityFilterQuery" value="${datacite.projectimport.entityfilterquery}" />
</bean>
<bean id="DataCiteProjectMetadataFieldMapping"
class="org.dspace.importer.external.datacite.DataCiteProjectFieldMapping">
</bean>

<bean id="ArXivImportService"
class="org.dspace.importer.external.arxiv.service.ArXivImportMetadataSourceServiceImpl" scope="singleton">
<property name="metadataFieldMapping" ref="ArXivMetadataFieldMapping"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,5 +104,16 @@
</list>
</property>
</bean>

<bean id="dataciteProjectLiveImportDataProvider" class="org.dspace.external.provider.impl.LiveImportDataProvider">
<property name="metadataSource" ref="DataCiteProjectImportService"/>
<property name="sourceIdentifier" value="dataciteProject"/>
<property name="recordIdMetadata" value="dc.identifier"/>
<property name="supportedEntityTypes">
<list>
<value>Project</value>
</list>
</property>
</bean>
</beans>

Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
import org.dspace.importer.external.datamodel.ImportRecord;
import org.dspace.importer.external.liveimportclient.service.LiveImportClientImpl;
import org.dspace.importer.external.metadatamapping.MetadatumDTO;
import org.dspace.kernel.ServiceManager;
import org.dspace.services.factory.DSpaceServicesFactory;
import org.junit.Before;
import org.junit.Test;
import org.mockito.ArgumentMatchers;
import org.mockito.Mockito;
Expand All @@ -44,9 +47,16 @@ public class DataCiteImportMetadataSourceServiceIT extends AbstractLiveImportInt
@Autowired
private LiveImportClientImpl liveImportClientImpl;

@Autowired
//@Autowired
private DataCiteImportMetadataSourceServiceImpl dataCiteServiceImpl;

@Before
public void setup() throws Exception {
ServiceManager serviceManager = DSpaceServicesFactory.getInstance().getServiceManager();
dataCiteServiceImpl = serviceManager.getServiceByName("DataCiteImportService",
DataCiteImportMetadataSourceServiceImpl.class);
}

@Test
public void dataCiteImportMetadataGetRecordsTest() throws Exception {
context.turnOffAuthorisationSystem();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.app.rest;

import static org.junit.Assert.assertEquals;
import static org.mockito.Mockito.when;

import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import org.apache.commons.io.IOUtils;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.impl.client.CloseableHttpClient;
import org.dspace.importer.external.datacite.DataCiteProjectImportMetadataSourceServiceImpl;
import org.dspace.importer.external.datamodel.ImportRecord;
import org.dspace.importer.external.liveimportclient.service.LiveImportClientImpl;
import org.dspace.importer.external.metadatamapping.MetadatumDTO;
import org.dspace.kernel.ServiceManager;
import org.dspace.services.factory.DSpaceServicesFactory;
import org.junit.Test;
import org.mockito.ArgumentMatchers;
import org.mockito.Mockito;
import org.springframework.beans.factory.annotation.Autowired;


/**
* Integration tests for {@link DataCiteProjectImportMetadataSourceServiceImpl}
* General tests for the datacite api are covered in the {@link DataCiteImportMetadataSourceServiceIT}
*
* @author Florian Gantner ([email protected])
*/
public class DataCiteProjectImportMetadataSourceServiceIT extends AbstractLiveImportIntegrationTest {

@Autowired
private LiveImportClientImpl liveImportClientImpl;

// @Autowired
private DataCiteProjectImportMetadataSourceServiceImpl dataCiteProjectServiceImpl;

@Test
public void dataCiteProjectImportMetadataGetRecordsTest() throws Exception {
context.turnOffAuthorisationSystem();
ServiceManager serviceManager = DSpaceServicesFactory.getInstance().getServiceManager();
dataCiteProjectServiceImpl = serviceManager.getServiceByName("DataCiteProjectImportService",
DataCiteProjectImportMetadataSourceServiceImpl.class);
CloseableHttpClient originalHttpClient = liveImportClientImpl.getHttpClient();
CloseableHttpClient httpClient = Mockito.mock(CloseableHttpClient.class);
try (InputStream dataCiteResp = getClass().getResourceAsStream("dataCiteProject-test.json")) {
String dataCiteRespXmlResp = IOUtils.toString(dataCiteResp, Charset.defaultCharset());

liveImportClientImpl.setHttpClient(httpClient);
CloseableHttpResponse response = mockResponse(dataCiteRespXmlResp, 200, "OK");
when(httpClient.execute(ArgumentMatchers.any())).thenReturn(response);

context.restoreAuthSystemState();
ArrayList<ImportRecord> collection2match = getRecords();
Collection<ImportRecord> recordsImported = dataCiteProjectServiceImpl.getRecords("10.60872/ror",
0, -1);
assertEquals(1, recordsImported.size());
matchRecords(new ArrayList<>(recordsImported), collection2match);
} finally {
liveImportClientImpl.setHttpClient(originalHttpClient);
}
}

private ArrayList<ImportRecord> getRecords() {
ArrayList<ImportRecord> records = new ArrayList<>();
//define first record
List<MetadatumDTO> metadatums = new ArrayList<>();
MetadatumDTO title = createMetadatumDTO("dc", "title", null,
"Affiliations and Identifiers for Research Organizations (ROR)");
MetadatumDTO title1 = createMetadatumDTO("dc", "title", null,
"Identifying Organizations");
MetadatumDTO projectidentifier = createMetadatumDTO("dc", "identifier", null, "10.60872/ror");
MetadatumDTO contributor1 = createMetadatumDTO("project", "investigator", null,
"Ted Habermann");
MetadatumDTO description1 = createMetadatumDTO("dc", "description", null,
"The Research Organization Registry (ROR) is a community-led project launched in January 2019 to " +
"develop an open, sustainable, usable, and unique identifier for every research organization in the " +
"world. Metadata Game Changers worked with Dryad in the first large-scale adoption of RORs by a " +
"repository. We connected to papers related to Dryad datasets, found affiliations from Crossref and " +
"other sources, searched the early ROR for identifiers, and added them to the Dryad metadata. Since " +
"that time, we have been involved in re-curating repositories to add RORs and other kinds of " +
"identifiers.");
MetadatumDTO subject1 = createMetadatumDTO("dc", "subject", null, "ROR");
MetadatumDTO subject2 = createMetadatumDTO("dc", "subject", null,
"Research Organizations");
MetadatumDTO subject3 = createMetadatumDTO("dc", "subject", null, "Identifiers");
MetadatumDTO subject4 = createMetadatumDTO("dc", "subject", null, "Affiliations");
MetadatumDTO subject5 = createMetadatumDTO("dc", "subject", null, "Metadata");
metadatums.add(title);
metadatums.add(title1);
metadatums.add(projectidentifier);
metadatums.add(contributor1);
metadatums.add(description1);
metadatums.add(subject1);
metadatums.add(subject2);
metadatums.add(subject3);
metadatums.add(subject4);
metadatums.add(subject5);

ImportRecord firstRecord = new ImportRecord(metadatums);

records.add(firstRecord);
return records;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public void findAllExternalSources() throws Exception {
ExternalSourceMatcher.matchExternalSource(
"openaireFunding", "openaireFunding", false)
)))
.andExpect(jsonPath("$.page.totalElements", Matchers.is(11)));
.andExpect(jsonPath("$.page.totalElements", Matchers.is(12)));
}

@Test
Expand Down

Large diffs are not rendered by default.

6 changes: 5 additions & 1 deletion dspace/config/modules/external-providers.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,13 @@ wos.url.search = https://wos-api.clarivate.com/api/wos/?databaseId=WOS&lang=en&u

datacite.url = https://api.datacite.org/dois/
datacite.timeout = 180000
# additional query added to the query to limit the resultSet for specific resourceTypes.
# during import some additional whitespace is being added before the entityfilterquery value to avoid invalid queries.
datacite.publicationimport.entityfilterquery = AND NOT ((types.resourceTypeGeneral:Project) OR (types.resourceTypeGeneral:Other AND types.resourceType:Project))
datacite.projectimport.entityfilterquery = AND ((types.resourceTypeGeneral:Project) OR (types.resourceTypeGeneral:Other AND types.resourceType:Project))
#################################################################
#--------------------------- ROR -------------------------------#
#---------------------------------------------------------------#

ror.orgunit-import.api-url = https://api.ror.org/organizations
#################################################################
#################################################################
66 changes: 66 additions & 0 deletions dspace/config/spring/api/datacite-integration.xml
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,70 @@
<constructor-arg value="dc.identifier.doi"/>
</bean>

<!-- beans and mapping for project import-->
<util:map id="dataciteProjectMetadataFieldMap" key-type="org.dspace.importer.external.metadatamapping.MetadataFieldConfig"
value-type="org.dspace.importer.external.metadatamapping.contributor.MetadataContributor">
<description>Defines which metadatum is mapped on which metadatum. Note that while the key must be unique it
only matters here for postprocessing of the value. The mapped MetadatumContributor has full control over
what metadatafield is generated.
</description>
<entry key-ref="datacite.title" value-ref="dataciteTitleContrib"/>
<entry key-ref="datacite.id.import" value-ref="dataciteIDImportContrib"/>
<entry key-ref="datacite.project.investigator" value-ref="dataciteProjectInvestigatorContrib"/>
<entry key-ref="datacite.project.description" value-ref="dataciteProjectDescriptionContrib"/>
<entry key-ref="datacite.project.subject" value-ref="dataciteProjectSubjectContrib"/>
<!-- TODO: extend your datacite project import mapping with additional MetadataContributor beans
matching your submission configuration here -->

</util:map>

<!-- must be present to be imported, since it's used as the recordId.
Project identifier does not use the dc.identifier.doi field -->
<bean id="dataciteIDImportContrib" class="org.dspace.importer.external.metadatamapping.contributor.SimpleJsonPathMetadataContributor">
<property name="field" ref="datacite.id.import"/>
<property name="query" value="/doi"/>
</bean>
<bean id="datacite.id.import" class="org.dspace.importer.external.metadatamapping.MetadataFieldConfig">
<constructor-arg value="dc.identifier"/>
</bean>

<bean id="dataciteProjectInvestigatorContrib" class="org.dspace.importer.external.metadatamapping.contributor.SimpleJsonPathMetadataContributor">
<property name="metadataProcessor">
<bean class="org.dspace.importer.external.metadatamapping.contributor.ArrayElementAttributeProcessor">
<property name="pathToArray" value="/creators"/>
<property name="elementAttribute" value="/name"/>
</bean>
</property>
<property name="field" ref="datacite.project.investigator"/>
</bean>
<bean id="datacite.project.investigator" class="org.dspace.importer.external.metadatamapping.MetadataFieldConfig">
<constructor-arg value="project.investigator"/>
</bean>

<bean id="dataciteProjectDescriptionContrib" class="org.dspace.importer.external.metadatamapping.contributor.SimpleJsonPathMetadataContributor">
<property name="metadataProcessor">
<bean class="org.dspace.importer.external.metadatamapping.contributor.ArrayElementAttributeProcessor">
<property name="pathToArray" value="/descriptions"/>
<property name="elementAttribute" value="/description"/>
</bean>
</property>
<property name="field" ref="datacite.project.description"/>
</bean>
<bean id="datacite.project.description" class="org.dspace.importer.external.metadatamapping.MetadataFieldConfig">
<constructor-arg value="dc.description"/>
</bean>

<bean id="dataciteProjectSubjectContrib" class="org.dspace.importer.external.metadatamapping.contributor.SimpleJsonPathMetadataContributor">
<property name="metadataProcessor">
<bean class="org.dspace.importer.external.metadatamapping.contributor.ArrayElementAttributeProcessor">
<property name="pathToArray" value="/subjects"/>
<property name="elementAttribute" value="/subject"/>
</bean>
</property>
<property name="field" ref="datacite.project.subject"/>
</bean>
<bean id="datacite.project.subject" class="org.dspace.importer.external.metadatamapping.MetadataFieldConfig">
<constructor-arg value="dc.subject"/>
</bean>

</beans>
11 changes: 11 additions & 0 deletions dspace/config/spring/api/external-services.xml
Original file line number Diff line number Diff line change
Expand Up @@ -271,4 +271,15 @@
</list>
</property>
</bean>

<bean id="dataciteProjectLiveImportDataProvider" class="org.dspace.external.provider.impl.LiveImportDataProvider">
<property name="metadataSource" ref="DataCiteProjectImportService"/>
<property name="sourceIdentifier" value="dataciteProject"/>
<property name="recordIdMetadata" value="dc.identifier"/>
<property name="supportedEntityTypes">
<list>
<value>Project</value>
</list>
</property>
</bean>
</beans>

0 comments on commit 0e82053

Please sign in to comment.