Skip to content

Commit

Permalink
Requisiti Extraction (#92)
Browse files Browse the repository at this point in the history
* added params to search API

* rights holder is now indexed

* rights holder is now indexed

* rights holder is now indexed

* fixes owasp

* using a different index name to avoid reindexing
  • Loading branch information
ndc-dxc authored Mar 7, 2024
1 parent 1754bb1 commit da3ec4d
Show file tree
Hide file tree
Showing 30 changed files with 361 additions and 99 deletions.
2 changes: 2 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ dependencies {
implementation 'org.springframework.boot:spring-boot-starter-jdbc'
implementation 'org.springframework.boot:spring-boot-starter-security'

implementation group: 'org.springframework', name: 'spring-web', version: '5.3.32'

implementation 'org.springframework.data:spring-data-elasticsearch'
implementation 'org.apache.jena:apache-jena-libs:4.9.0'
implementation 'org.apache.jena:jena-querybuilder:4.9.0'
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
package it.gov.innovazione.ndc.controller;

import it.gov.innovazione.ndc.controller.exception.SemanticAssetNotFoundException;
import it.gov.innovazione.ndc.gen.dto.SearchResultItem;
import it.gov.innovazione.ndc.gen.dto.SemanticAssetDetails;
import it.gov.innovazione.ndc.harvester.service.RepositoryService;
import it.gov.innovazione.ndc.model.Builders;
import it.gov.innovazione.ndc.service.SemanticAssetSearchService;
import it.gov.innovazione.ndc.gen.dto.SearchResultItem;
import org.junit.jupiter.api.Test;
import org.mockito.Mock;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest;
import org.springframework.boot.test.mock.mockito.MockBean;
Expand Down Expand Up @@ -33,6 +35,8 @@ public class SemanticAssetsControllerMvcTest {
private MockMvc mockMvc;
@MockBean
private SemanticAssetSearchService searchService;
@MockBean
private RepositoryService repositoryService;

@Test
void shouldFindByIri() throws Exception {
Expand Down Expand Up @@ -72,7 +76,7 @@ void shouldReturnMatchingAssetsUsingDefaultPageParams() throws Exception {
dto.setDescription("some-description");
dto.setModifiedOn(LocalDate.parse("2020-01-01"));

when(searchService.search(any(), any(), any(), any())
when(searchService.search(any(), any(), any(), any(), any())
).thenReturn(Builders.searchResult()
.limit(10)
.offset(0)
Expand Down Expand Up @@ -103,13 +107,14 @@ void shouldReturnMatchingAssetsUsingDefaultPageParams() throws Exception {
verify(searchService).search("searchText",
Set.of("CONTROLLED_VOCABULARY", "ONTOLOGY"),
Set.of("http://publications.europa.eu/resource/authority/data-theme/AGRI", "http://publications.europa.eu/resource/authority/data-theme/EDUC"),
null,
OffsetBasedPageRequest.of(0, 10));
}

@Test
void shouldReturnMatchingAssetsUsingProvidedPageParams() throws Exception {
SearchResultItem dto = new SearchResultItem();
when(searchService.search(any(), any(), any(), any())
when(searchService.search(any(), any(), any(), any(), any())
).thenReturn(Builders.searchResult()
.limit(20)
.offset(100)
Expand All @@ -123,7 +128,7 @@ void shouldReturnMatchingAssetsUsingProvidedPageParams() throws Exception {
.accept(MediaType.APPLICATION_JSON)
);

verify(searchService).search("", Set.of(), Set.of(), OffsetBasedPageRequest.of(100, 20));
verify(searchService).search("", Set.of(), Set.of(), null, OffsetBasedPageRequest.of(100, 20));

apiResult
.andDo(print())
Expand All @@ -140,7 +145,7 @@ void shouldSearchWithDefaultWhenNoParamsProvided() throws Exception {
.andDo(print())
.andExpect(status().isOk());

verify(searchService).search("", Set.of(), Set.of(), OffsetBasedPageRequest.of(0, 10));
verify(searchService).search("", Set.of(), Set.of(), null, OffsetBasedPageRequest.of(0, 10));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import it.gov.innovazione.ndc.harvester.AgencyRepositoryService;
import it.gov.innovazione.ndc.harvester.HarvesterService;
import it.gov.innovazione.ndc.harvester.model.index.SemanticAssetMetadata;
import it.gov.innovazione.ndc.harvester.service.RepositoryService;
import it.gov.innovazione.ndc.repository.TripleStoreProperties;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
Expand All @@ -23,6 +24,7 @@
import java.util.Set;

import static it.gov.innovazione.ndc.harvester.service.RepositoryUtils.asRepo;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyList;
import static org.mockito.Mockito.doNothing;
import static org.mockito.Mockito.doReturn;
Expand All @@ -49,6 +51,9 @@ public class BaseIntegrationTest {
@SpyBean
AgencyRepositoryService agencyRepositoryService;

@SpyBean
RepositoryService repositoryService;

@Autowired
TripleStoreProperties virtuosoProps;

Expand Down Expand Up @@ -79,6 +84,7 @@ private void dataIsHarvested() throws IOException {
Path cloneDir = Path.of(dir);
doReturn(cloneDir).when(agencyRepositoryService).cloneRepo(REPO_URL, null);
doNothing().when(agencyRepositoryService).removeClonedRepo(cloneDir);
doNothing().when(repositoryService).storeRightsHolders(any(), any());

harvesterService.harvest(asRepo(REPO_URL));

Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
package it.gov.innovazione.ndc.config;

import it.gov.innovazione.ndc.harvester.model.index.RightsHolder;
import it.gov.innovazione.ndc.model.harvester.Repository;
import lombok.AccessLevel;
import lombok.Builder;
import lombok.Data;
import lombok.RequiredArgsConstructor;
import lombok.Singular;
import lombok.With;

import java.util.ArrayList;
import java.util.List;

@With
@Data
@Builder
Expand All @@ -18,4 +23,10 @@ public class HarvestExecutionContext {
private final String runId;
private final String currentUserId;
private final String rootPath;
@Singular
private final List<RightsHolder> rightsHolders = new ArrayList<>();

public void addRightsHolder(RightsHolder agencyId) {
rightsHolders.add(agencyId);
}
}
Original file line number Diff line number Diff line change
@@ -1,18 +1,28 @@
package it.gov.innovazione.ndc.controller;

import io.swagger.annotations.ApiOperation;
import io.swagger.annotations.ApiResponse;
import io.swagger.annotations.ApiResponses;
import it.gov.innovazione.ndc.gen.api.SemanticAssetsApi;
import it.gov.innovazione.ndc.gen.dto.AssetType;
import it.gov.innovazione.ndc.gen.dto.Direction;
import it.gov.innovazione.ndc.gen.dto.SearchResult;
import it.gov.innovazione.ndc.gen.dto.SemanticAssetDetails;
import it.gov.innovazione.ndc.gen.dto.SortBy;
import it.gov.innovazione.ndc.gen.dto.Theme;
import it.gov.innovazione.ndc.harvester.model.index.RightsHolder;
import it.gov.innovazione.ndc.harvester.service.RepositoryService;
import it.gov.innovazione.ndc.service.SemanticAssetSearchService;
import lombok.RequiredArgsConstructor;
import org.springframework.data.domain.Pageable;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.RestController;

import java.net.URI;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
Expand All @@ -21,15 +31,45 @@
@RestController
public class SemanticAssetsController implements SemanticAssetsApi {
private final SemanticAssetSearchService searchService;
private final RepositoryService repositoryService;

/**
* GET /semantic-assets/rights-holders
* Retrieves the rights holders of the semantic assets.
*
* @return OK (status code 200)
*/
@ApiOperation(value = "", nickname = "getRightsHolders", notes = "Retrieves the rights holders",
response = SemanticAssetDetails.class, tags = {"semantic-assets"})
@ApiResponses(value = {@ApiResponse(code = 200, message = "OK", response = SemanticAssetDetails.class)})
@RequestMapping(
method = RequestMethod.GET,
value = "/semantic-assets/rights-holders",
produces = {"application/json"}
)
List<RightsHolder> getRightsHolders() {
return repositoryService.getRightsHolders();

}

@Override
public ResponseEntity<SearchResult> search(String q, Integer offset, Integer limit, Set<AssetType> type, Set<Theme> theme) {
public ResponseEntity<SearchResult> search(
String q,
Integer offset,
Integer limit,
SortBy sortBy,
Direction direction,
Set<AssetType> type,
Set<Theme> theme,
Set<String> rightsHolder) {

Pageable pageable = OffsetBasedPageRequest.of(offset, limit);

return AppJsonResponse.ok(
searchService.search(q,
toEnumStrings(type, AssetType::getValue),
toEnumStrings(theme, Theme::getValue),
rightsHolder,
pageable
)
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import it.gov.innovazione.ndc.config.HarvestExecutionContext;
import it.gov.innovazione.ndc.config.HarvestExecutionContextUtils;
import it.gov.innovazione.ndc.harvester.model.index.RightsHolder;
import it.gov.innovazione.ndc.harvester.service.RepositoryService;
import it.gov.innovazione.ndc.model.harvester.Repository;
import it.gov.innovazione.ndc.repository.SemanticAssetMetadataRepository;
import it.gov.innovazione.ndc.repository.TripleStoreRepository;
Expand All @@ -11,8 +13,15 @@

import java.io.IOException;
import java.nio.file.Path;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;

import static java.util.stream.Collectors.groupingBy;
import static java.util.stream.Collectors.toList;
import static java.util.stream.Collectors.toMap;

@Slf4j
@Component
Expand All @@ -22,6 +31,7 @@ public class HarvesterService {
private final List<SemanticAssetHarvester> semanticAssetHarvesters;
private final TripleStoreRepository tripleStoreRepository;
private final SemanticAssetMetadataRepository semanticAssetMetadataRepository;
private final RepositoryService repositoryService;

public void harvest(Repository repository) throws IOException {
harvest(repository, null);
Expand Down Expand Up @@ -76,10 +86,22 @@ private void harvestClonedRepo(Repository repository, Path path) {
clearRepo(repository.getUrl());

harvestSemanticAssets(repository, path);
storeRightsHolders(repository);

log.info("Repo {} processed", repository);
}

private void storeRightsHolders(Repository repository) {
Map<String, Map<String, String>> rightsHolders = Optional.ofNullable(HarvestExecutionContextUtils.getContext())
.map(HarvestExecutionContext::getRightsHolders)
.orElse(Collections.emptyList()).stream()
.collect(groupingBy(RightsHolder::getIdentifier, toList()))
.entrySet().stream()
.collect(toMap(Map.Entry::getKey, e -> e.getValue().get(0).getName()));

repositoryService.storeRightsHolders(repository, rightsHolders);
}

private void clearRepo(String repoUrl) {
cleanUpWithHarvesters(repoUrl);
cleanUpTripleStore(repoUrl);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import it.gov.innovazione.ndc.harvester.model.extractors.NodeExtractor;
import it.gov.innovazione.ndc.harvester.model.index.Distribution;
import it.gov.innovazione.ndc.harvester.model.index.NodeSummary;
import it.gov.innovazione.ndc.harvester.model.index.RightsHolder;
import it.gov.innovazione.ndc.harvester.model.index.SemanticAssetMetadata;
import it.gov.innovazione.ndc.model.profiles.Admsapit;
import lombok.Getter;
Expand Down Expand Up @@ -36,6 +37,7 @@
import static it.gov.innovazione.ndc.harvester.model.extractors.NodeExtractor.requireNodes;
import static it.gov.innovazione.ndc.harvester.model.extractors.NodeSummaryExtractor.extractRequiredNodeSummary;
import static it.gov.innovazione.ndc.harvester.model.extractors.NodeSummaryExtractor.maybeNodeSummaries;
import static it.gov.innovazione.ndc.harvester.model.extractors.RightsHolderExtractor.getAgencyId;
import static java.lang.String.format;
import static java.util.Collections.emptyList;
import static org.apache.jena.rdf.model.ResourceFactory.createResource;
Expand Down Expand Up @@ -200,6 +202,7 @@ protected static List<Distribution> extractDistributionsFilteredByFormat(

public SemanticAssetMetadata extractMetadata() {
Resource mainResource = getMainResource();
RightsHolder agencyId = getAgencyId(mainResource, validationContext);
return SemanticAssetMetadata.builder()
.iri(mainResource.getURI())
.repoUrl(repoUrl)
Expand All @@ -221,6 +224,7 @@ public SemanticAssetMetadata extractMetadata() {
.conformsTo(maybeNodeSummaries(mainResource, conformsTo, FOAF.name))
.distributions(getDistributions())
.status(LiteralExtractor.extractAll(mainResource, Admsapit.status))
.agencyId(agencyId.getIdentifier())
.build();
}

Expand All @@ -243,6 +247,7 @@ public SemanticAssetModelValidationContext validateMetadata() {
.add(v -> extractOptional(getMainResource(), temporal, v.withWarningValidationType().withFieldName(SemanticAssetMetadata.Fields.temporal)))
.add(v -> maybeNodeSummaries(getMainResource(), conformsTo, FOAF.name, v.withWarningValidationType().withFieldName(SemanticAssetMetadata.Fields.conformsTo)))
.add(v -> getDistributions(v.withFieldName(SemanticAssetMetadata.Fields.distributions)))
.add(v -> getAgencyId(getMainResource(), v.withFieldName(SemanticAssetMetadata.Fields.agencyId)))
.build()
.stream()
.map(consumer -> returningValidationContext(this.validationContext, consumer))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@

import com.github.jsonldjava.shaded.com.google.common.collect.ImmutableList;
import it.gov.innovazione.ndc.harvester.model.exception.InvalidModelException;
import it.gov.innovazione.ndc.harvester.model.extractors.RightsHolderExtractor;
import it.gov.innovazione.ndc.harvester.model.index.Distribution;
import it.gov.innovazione.ndc.harvester.model.index.RightsHolder;
import it.gov.innovazione.ndc.harvester.model.index.SemanticAssetMetadata;
import it.gov.innovazione.ndc.model.profiles.NDC;
import lombok.extern.slf4j.Slf4j;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.Statement;
import org.apache.jena.rdf.model.StmtIterator;
import org.apache.jena.vocabulary.DCTerms;
import org.apache.jena.vocabulary.RDF;

import java.util.List;
Expand Down Expand Up @@ -90,29 +91,8 @@ private static void validateKeyConcept(String keyConcept, Resource mainResource,
}
}

public String getAgencyId() {
return getAgencyId(getMainResource(), NO_VALIDATION);
}

public static String getAgencyId(Resource mainResource, SemanticAssetModelValidationContext validationContext) {
Statement rightsHolder;
try {
rightsHolder = mainResource.getRequiredProperty(DCTerms.rightsHolder);
} catch (Exception e) {
InvalidModelException invalidModelException = new InvalidModelException(format("Cannot find required rightsHolder property (%s)", DCTerms.rightsHolder));
validationContext.addValidationException(invalidModelException);
throw invalidModelException;
}
Statement idProperty;
try {
idProperty = rightsHolder.getProperty(DCTerms.identifier);
} catch (Exception e) {
String rightsHolderIri = rightsHolder.getObject().toString();
InvalidModelException invalidModelException = new InvalidModelException(format("Cannot find required id (%s) for rightsHolder '%s'", DCTerms.identifier, rightsHolderIri));
validationContext.addValidationException(invalidModelException);
throw invalidModelException;
}
return idProperty.getString();
public RightsHolder getAgencyId() {
return RightsHolderExtractor.getAgencyId(getMainResource(), NO_VALIDATION);
}

public void addNdcDataServiceProperties(String baseUrl) {
Expand All @@ -125,15 +105,15 @@ public void addNdcDataServiceProperties(String baseUrl) {
}

private String buildDataServiceIndividualUri() {
return format("https://w3id.org/italia/data/data-service/%s-%s", getAgencyId(), getKeyConcept());
return format("https://w3id.org/italia/data/data-service/%s-%s", getAgencyId().getIdentifier(), getKeyConcept());
}

public String getEndpointUrl() {
return endpointUrl;
}

private String buildEndpointUrl(String baseUrl) {
return format(NDC_ENDPOINT_URL_TEMPLATE, baseUrl, getAgencyId(), getKeyConcept());
return format(NDC_ENDPOINT_URL_TEMPLATE, baseUrl, getAgencyId().getIdentifier(), getKeyConcept());
}

@Override
Expand All @@ -147,7 +127,6 @@ public SemanticAssetMetadata extractMetadata() {
.type(CONTROLLED_VOCABULARY)
.distributions(getDistributions())
.keyConcept(getKeyConcept())
.agencyId(getAgencyId())
.endpointUrl(getEndpointUrl())
.build();
}
Expand All @@ -159,7 +138,7 @@ public SemanticAssetModelValidationContext validateMetadata() {
SemanticAssetModelValidationContext context = new ImmutableList.Builder<Consumer<SemanticAssetModelValidationContext>>()
.add(v -> getDistributions(v.withFieldName(SemanticAssetMetadata.Fields.distributions)))
.add(v -> getKeyConcept(getMainResource(), v.withWarningValidationType().withFieldName(SemanticAssetMetadata.Fields.keyConcept)))
.add(v -> getAgencyId(getMainResource(), v.withWarningValidationType().withFieldName(SemanticAssetMetadata.Fields.agencyId)))
.add(v -> RightsHolderExtractor.getAgencyId(getMainResource(), v.withWarningValidationType().withFieldName(SemanticAssetMetadata.Fields.agencyId)))
.build()
.stream()
.map(consumer -> returningValidationContext(this.validationContext, consumer))
Expand Down
Loading

0 comments on commit da3ec4d

Please sign in to comment.