From 3e758d3922b3df7d5a9db11229e09d163da778fd Mon Sep 17 00:00:00 2001 From: Giuseppe Nespolino Date: Thu, 8 Feb 2024 16:09:29 +0100 Subject: [PATCH] requisito file scanner --- config/spotbugs/exclude-filter.xml | 5 +- .../ndc/config/HarvestExecutionContext.java | 21 ++++ .../config/HarvestExecutionContextUtils.java | 17 +++ .../SimpleHarvestRepositoryProcessor.java | 9 ++ .../ndc/controller/RepositoryController.java | 2 + .../event/HarvestedFileTooBigEvent.java | 52 +++++++++ .../handler/HarvesterRunUpdatingHandler.java | 2 - .../ndc/harvester/HarvesterService.java | 28 +++-- .../ndc/harvester/SemanticAssetHarvester.java | 4 +- .../BaseSemanticAssetHarvester.java | 65 ++++++++++- .../ControlledVocabularyHarvester.java | 7 +- .../harvesters/OntologyHarvester.java | 7 +- .../harvester/harvesters/SchemaHarvester.java | 7 +- .../ndc/harvester/model/CvPath.java | 12 ++ .../harvester/model/SemanticAssetPath.java | 7 ++ .../harvester/service/RepositoryService.java | 24 ++-- .../ndc/model/harvester/Repository.java | 4 +- .../ndc/repository/TripleStoreRepository.java | 5 +- .../resources/application-local.properties | 2 +- .../db/migration/V3__max_file_size.sql | 2 + .../ndc/harvester/HarvesterServiceTest.java | 20 +++- .../BaseSemanticAssetHarvesterTest.java | 107 ++++++++++++++---- .../ControlledVocabularyHarvesterTest.java | 25 +++- .../harvesters/OntologyHarvesterTest.java | 5 +- .../harvesters/SchemaHarvesterTest.java | 7 +- 25 files changed, 375 insertions(+), 71 deletions(-) create mode 100644 src/main/java/it/gov/innovazione/ndc/config/HarvestExecutionContext.java create mode 100644 src/main/java/it/gov/innovazione/ndc/config/HarvestExecutionContextUtils.java create mode 100644 src/main/java/it/gov/innovazione/ndc/eventhandler/event/HarvestedFileTooBigEvent.java create mode 100644 src/main/resources/db/migration/V3__max_file_size.sql diff --git a/config/spotbugs/exclude-filter.xml b/config/spotbugs/exclude-filter.xml index 10c48be2..b59fd063 100644 --- a/config/spotbugs/exclude-filter.xml +++ b/config/spotbugs/exclude-filter.xml @@ -11,4 +11,7 @@ - \ No newline at end of file + + + + diff --git a/src/main/java/it/gov/innovazione/ndc/config/HarvestExecutionContext.java b/src/main/java/it/gov/innovazione/ndc/config/HarvestExecutionContext.java new file mode 100644 index 00000000..d61e83bb --- /dev/null +++ b/src/main/java/it/gov/innovazione/ndc/config/HarvestExecutionContext.java @@ -0,0 +1,21 @@ +package it.gov.innovazione.ndc.config; + +import it.gov.innovazione.ndc.model.harvester.Repository; +import lombok.AccessLevel; +import lombok.Builder; +import lombok.Data; +import lombok.RequiredArgsConstructor; +import lombok.With; + +@With +@Data +@Builder +@RequiredArgsConstructor(access = AccessLevel.PRIVATE) +public class HarvestExecutionContext { + private final Repository repository; + private final String revision; + private final String correlationId; + private final String runId; + private final String currentUserId; + private final String rootPath; +} diff --git a/src/main/java/it/gov/innovazione/ndc/config/HarvestExecutionContextUtils.java b/src/main/java/it/gov/innovazione/ndc/config/HarvestExecutionContextUtils.java new file mode 100644 index 00000000..4b065397 --- /dev/null +++ b/src/main/java/it/gov/innovazione/ndc/config/HarvestExecutionContextUtils.java @@ -0,0 +1,17 @@ +package it.gov.innovazione.ndc.config; + +import lombok.NoArgsConstructor; + +@NoArgsConstructor(access = lombok.AccessLevel.PRIVATE) +public class HarvestExecutionContextUtils { + + private static final ThreadLocal CONTEXT_HOLDER = new ThreadLocal<>(); + + public static HarvestExecutionContext getContext() { + return CONTEXT_HOLDER.get(); + } + + public static void setContext(HarvestExecutionContext context) { + CONTEXT_HOLDER.set(context); + } +} diff --git a/src/main/java/it/gov/innovazione/ndc/config/SimpleHarvestRepositoryProcessor.java b/src/main/java/it/gov/innovazione/ndc/config/SimpleHarvestRepositoryProcessor.java index fbe68084..7823faa8 100644 --- a/src/main/java/it/gov/innovazione/ndc/config/SimpleHarvestRepositoryProcessor.java +++ b/src/main/java/it/gov/innovazione/ndc/config/SimpleHarvestRepositoryProcessor.java @@ -83,6 +83,15 @@ public void execute(String runId, Repository repository, String correlationId, S verifySameRunWasNotExecuted(repository, revision); } + HarvestExecutionContextUtils.setContext( + HarvestExecutionContext.builder() + .repository(repository) + .revision(revision) + .correlationId(correlationId) + .runId(runId) + .currentUserId(currentUserLogin) + .build()); + harvesterService.harvest(repository, revision); publishHarvesterSuccessfulEvent(repository, correlationId, revision, runId, currentUserLogin); diff --git a/src/main/java/it/gov/innovazione/ndc/controller/RepositoryController.java b/src/main/java/it/gov/innovazione/ndc/controller/RepositoryController.java index 77dd0143..9ce315fe 100644 --- a/src/main/java/it/gov/innovazione/ndc/controller/RepositoryController.java +++ b/src/main/java/it/gov/innovazione/ndc/controller/RepositoryController.java @@ -58,6 +58,7 @@ public void createRepository( repository.getUrl(), repository.getName(), repository.getDescription(), + repository.getMaxFileSizeBytes(), principal); } @@ -104,5 +105,6 @@ public static class CreateRepository { private String url; private String name; private String description; + private Long maxFileSizeBytes; } } diff --git a/src/main/java/it/gov/innovazione/ndc/eventhandler/event/HarvestedFileTooBigEvent.java b/src/main/java/it/gov/innovazione/ndc/eventhandler/event/HarvestedFileTooBigEvent.java new file mode 100644 index 00000000..9e24d2c4 --- /dev/null +++ b/src/main/java/it/gov/innovazione/ndc/eventhandler/event/HarvestedFileTooBigEvent.java @@ -0,0 +1,52 @@ +package it.gov.innovazione.ndc.eventhandler.event; + +import it.gov.innovazione.ndc.model.harvester.Repository; +import lombok.AccessLevel; +import lombok.Builder; +import lombok.Data; +import lombok.RequiredArgsConstructor; + +import java.io.File; +import java.util.List; +import java.util.stream.Collectors; + +@Builder +@Data +public class HarvestedFileTooBigEvent { + private final String runId; + private final Repository repository; + private final String revision; + private final ViolatingSemanticAsset violatingSemanticAsset; + private final String relativePathInRepo; + + @Data + @RequiredArgsConstructor(access = AccessLevel.PRIVATE) + public static class ViolatingSemanticAsset { + private final String pathInRepo; + private final List fileDetails; + + public static ViolatingSemanticAsset fromPath( + String pathInRepo, + List files, + long maxFileSizeBytes) { + return new ViolatingSemanticAsset( + pathInRepo, + files.stream() + .map(file -> new FileDetail( + file.getPath(), + file.length(), + file.length() > maxFileSizeBytes)) + .collect(Collectors.toList())); + } + + } + + + @Data + @RequiredArgsConstructor(access = AccessLevel.PRIVATE) + public static class FileDetail { + private final String path; + private final long size; + private final boolean violatesMaxSize; + } +} diff --git a/src/main/java/it/gov/innovazione/ndc/eventhandler/handler/HarvesterRunUpdatingHandler.java b/src/main/java/it/gov/innovazione/ndc/eventhandler/handler/HarvesterRunUpdatingHandler.java index 768c30a0..0ba6e6d1 100644 --- a/src/main/java/it/gov/innovazione/ndc/eventhandler/handler/HarvesterRunUpdatingHandler.java +++ b/src/main/java/it/gov/innovazione/ndc/eventhandler/handler/HarvesterRunUpdatingHandler.java @@ -5,7 +5,6 @@ import it.gov.innovazione.ndc.eventhandler.event.HarvesterFinishedEvent; import it.gov.innovazione.ndc.eventhandler.event.HarvesterStartedEvent; import it.gov.innovazione.ndc.harvester.service.HarvesterRunService; -import it.gov.innovazione.ndc.harvester.service.RepositoryService; import it.gov.innovazione.ndc.model.harvester.HarvesterRun; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -20,7 +19,6 @@ @Slf4j public class HarvesterRunUpdatingHandler implements NdcEventHandler { - private final RepositoryService repositoryService; private final HarvesterRunService harvesterRunService; private static final Collection> SUPPORTED_EVENTS = Arrays.asList( diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/HarvesterService.java b/src/main/java/it/gov/innovazione/ndc/harvester/HarvesterService.java index fde9f2e9..8c6d6252 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/HarvesterService.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/HarvesterService.java @@ -1,5 +1,7 @@ package it.gov.innovazione.ndc.harvester; +import it.gov.innovazione.ndc.config.HarvestExecutionContext; +import it.gov.innovazione.ndc.config.HarvestExecutionContextUtils; import it.gov.innovazione.ndc.model.harvester.Repository; import it.gov.innovazione.ndc.repository.SemanticAssetMetadataRepository; import it.gov.innovazione.ndc.repository.TripleStoreRepository; @@ -10,6 +12,7 @@ import java.io.IOException; import java.nio.file.Path; import java.util.List; +import java.util.Objects; @Slf4j @Component @@ -26,13 +29,15 @@ public void harvest(Repository repository) throws IOException { public void harvest(Repository repository, String revision) throws IOException { log.info("Processing repo {}", repository.getUrl()); - String repoUrl = normaliseRepoUrl(repository.getUrl()); + Repository normalisedRepo = repository.withUrl(normaliseRepoUrl(repository.getUrl())); + String repoUrl = normalisedRepo.getUrl(); log.debug("Normalised repo url {}", repoUrl); try { Path path = cloneRepoToTempPath(repoUrl, revision); try { - harvestClonedRepo(repoUrl, path); + updateContextWithRootPath(path); + harvestClonedRepo(normalisedRepo, path); } finally { agencyRepositoryService.removeClonedRepo(path); } @@ -43,6 +48,13 @@ public void harvest(Repository repository, String revision) throws IOException { } } + private static void updateContextWithRootPath(Path path) { + HarvestExecutionContext context = HarvestExecutionContextUtils.getContext(); + if (Objects.nonNull(context)) { + HarvestExecutionContextUtils.setContext(context.withRootPath(path.toString())); + } + } + public void clear(String repoUrl) { log.info("Clearing repo {}", repoUrl); repoUrl = normaliseRepoUrl(repoUrl); @@ -60,12 +72,12 @@ private String normaliseRepoUrl(String repoUrl) { return repoUrl.replace(".git", ""); } - private void harvestClonedRepo(String repoUrl, Path path) { - clearRepo(repoUrl); + private void harvestClonedRepo(Repository repository, Path path) { + clearRepo(repository.getUrl()); - harvestSemanticAssets(repoUrl, path); + harvestSemanticAssets(repository, path); - log.info("Repo {} processed", repoUrl); + log.info("Repo {} processed", repository); } private void clearRepo(String repoUrl) { @@ -83,10 +95,10 @@ private void cleanUpWithHarvesters(String repoUrl) { }); } - private void harvestSemanticAssets(String repoUrl, Path path) { + private void harvestSemanticAssets(Repository repository, Path path) { semanticAssetHarvesters.forEach(h -> { log.debug("Harvesting {} for {} assets", path, h.getType()); - h.harvest(repoUrl, path); + h.harvest(repository, path); log.debug("Harvested {} for {} assets", path, h.getType()); }); diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/SemanticAssetHarvester.java b/src/main/java/it/gov/innovazione/ndc/harvester/SemanticAssetHarvester.java index 75b0f58e..19901815 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/SemanticAssetHarvester.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/SemanticAssetHarvester.java @@ -1,5 +1,7 @@ package it.gov.innovazione.ndc.harvester; +import it.gov.innovazione.ndc.model.harvester.Repository; + import java.nio.file.Path; public interface SemanticAssetHarvester { @@ -7,5 +9,5 @@ public interface SemanticAssetHarvester { void cleanUpBeforeHarvesting(String repoUrl); - void harvest(String repoUrl, Path rootPath); + void harvest(Repository repository, Path rootPath); } diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvester.java b/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvester.java index 068991b4..680ec467 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvester.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvester.java @@ -1,12 +1,19 @@ package it.gov.innovazione.ndc.harvester.harvesters; -import it.gov.innovazione.ndc.harvester.model.SemanticAssetPath; +import it.gov.innovazione.ndc.config.HarvestExecutionContext; +import it.gov.innovazione.ndc.config.HarvestExecutionContextUtils; +import it.gov.innovazione.ndc.eventhandler.NdcEventPublisher; +import it.gov.innovazione.ndc.eventhandler.event.HarvestedFileTooBigEvent; +import it.gov.innovazione.ndc.eventhandler.event.HarvestedFileTooBigEvent.ViolatingSemanticAsset; import it.gov.innovazione.ndc.harvester.SemanticAssetHarvester; import it.gov.innovazione.ndc.harvester.SemanticAssetType; import it.gov.innovazione.ndc.harvester.exception.SinglePathProcessingException; +import it.gov.innovazione.ndc.harvester.model.SemanticAssetPath; +import it.gov.innovazione.ndc.model.harvester.Repository; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import java.io.File; import java.nio.file.Path; import java.util.List; @@ -14,6 +21,7 @@ @RequiredArgsConstructor public abstract class BaseSemanticAssetHarvester

implements SemanticAssetHarvester { private final SemanticAssetType type; + private final NdcEventPublisher eventPublisher; @Override public SemanticAssetType getType() { @@ -21,23 +29,72 @@ public SemanticAssetType getType() { } @Override - public void harvest(String repoUrl, Path rootPath) { + public void harvest(Repository repository, Path rootPath) { log.debug("Looking for {} paths", type); List

paths = scanForPaths(rootPath); + + paths.forEach(this::notifyIfSizeExceed); + log.debug("Found {} {} path(s) for processing", paths.size(), type); for (P path : paths) { try { - processPath(repoUrl, path); + processPath(repository.getUrl(), path); log.debug("Path {} processed correctly for {}", path, type); } catch (SinglePathProcessingException e) { - log.error("Error processing {} {} in repo {}", type, path, repoUrl, e); + log.error("Error processing {} {} in repo {}", type, path, repository.getUrl(), e); + } + } + } + + private void notifyIfSizeExceed(P path) { + HarvestExecutionContext context = HarvestExecutionContextUtils.getContext(); + if (context != null) { + List files = path.getAllFiles(); + if (isBiggerThan(context.getRepository().getMaxFileSizeBytes(), files)) { + notify(context, path); } } } + private void notify(HarvestExecutionContext context, P path) { + eventPublisher.publishEvent( + "harvester", + "harvester.max-file-size-exceeded", + context.getCorrelationId(), + context.getCurrentUserId(), + HarvestedFileTooBigEvent.builder() + .runId(context.getRunId()) + .repository(context.getRepository()) + .revision(context.getRevision()) + .violatingSemanticAsset( + ViolatingSemanticAsset.fromPath( + relativize( + path.getTtlPath(), + context), + path.getAllFiles(), + context.getRepository().getMaxFileSizeBytes())) + .build()); + log.warn("File {} is bigger than maxFileSizeBytes", path.getTtlPath()); + } + + private String relativize(String ttlFile, HarvestExecutionContext context) { + return getFolderNameFromFile(ttlFile).replace( + context.getRootPath(), ""); + } + + private String getFolderNameFromFile(String ttlFile) { + return new File(ttlFile).getParent(); + } + + private boolean isBiggerThan(Long maxFileSizeBytes, List files) { + return maxFileSizeBytes != null + && maxFileSizeBytes > 0 + && files.stream().anyMatch(file -> file.length() > maxFileSizeBytes); + } + @Override public void cleanUpBeforeHarvesting(String repoUrl) { // by default nothing specific diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/ControlledVocabularyHarvester.java b/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/ControlledVocabularyHarvester.java index 925e059c..15ca91e5 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/ControlledVocabularyHarvester.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/ControlledVocabularyHarvester.java @@ -1,9 +1,10 @@ package it.gov.innovazione.ndc.harvester.harvesters; +import it.gov.innovazione.ndc.eventhandler.NdcEventPublisher; import it.gov.innovazione.ndc.harvester.AgencyRepositoryService; +import it.gov.innovazione.ndc.harvester.SemanticAssetType; import it.gov.innovazione.ndc.harvester.model.CvPath; import it.gov.innovazione.ndc.harvester.pathprocessors.ControlledVocabularyPathProcessor; -import it.gov.innovazione.ndc.harvester.SemanticAssetType; import org.springframework.stereotype.Component; import java.nio.file.Path; @@ -14,8 +15,8 @@ public class ControlledVocabularyHarvester extends BaseSemanticAssetHarvester getAllFiles() { + if (Objects.nonNull(csvPath)) { + return List.of( + new File(getTtlPath()), new File(getCsvPath().get())); + } + return super.getAllFiles(); + } + @Override public String toString() { return "CvPath{ttlPath='" + ttlPath + "', csvPath='" + csvPath + "'}"; diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/model/SemanticAssetPath.java b/src/main/java/it/gov/innovazione/ndc/harvester/model/SemanticAssetPath.java index 99cd07f5..71bfffb0 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/model/SemanticAssetPath.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/model/SemanticAssetPath.java @@ -3,6 +3,9 @@ import lombok.EqualsAndHashCode; import lombok.Getter; +import java.io.File; +import java.util.List; + @Getter @EqualsAndHashCode public class SemanticAssetPath { @@ -20,4 +23,8 @@ public static SemanticAssetPath of(String ttlPath) { public String toString() { return "SemanticAssetPath{ttlPath='" + ttlPath + "'}"; } + + public List getAllFiles() { + return List.of(new File(ttlPath)); + } } diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/service/RepositoryService.java b/src/main/java/it/gov/innovazione/ndc/harvester/service/RepositoryService.java index 7ee501f4..59daf66f 100644 --- a/src/main/java/it/gov/innovazione/ndc/harvester/service/RepositoryService.java +++ b/src/main/java/it/gov/innovazione/ndc/harvester/service/RepositoryService.java @@ -34,7 +34,8 @@ public class RepositoryService { + "CREATED, " + "CREATED_BY, " + "UPDATED, " - + "UPDATED_BY " + + "UPDATED_BY, " + + "MAX_FILE_SIZE_BYTES " + "FROM REPOSITORY"; private final JdbcTemplate jdbcTemplate; @@ -57,6 +58,7 @@ public List getAllRepos() { .createdBy(rs.getString("CREATED_BY")) .updatedAt(rs.getTimestamp("UPDATED").toInstant()) .updatedBy(rs.getString("UPDATED_BY")) + .maxFileSizeBytes(rs.getLong("MAX_FILE_SIZE_BYTES")) .build()); if (!allRepos.isEmpty()) { @@ -96,7 +98,8 @@ private void save(Repository repo) { + "CREATED, " + "CREATED_BY, " + "UPDATED, " - + "UPDATED_BY) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"; + + "UPDATED_BY, " + + "MAX_FILE_SIZE_BYTES) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"; jdbcTemplate.update(query, repo.getId(), repo.getUrl(), @@ -107,7 +110,8 @@ private void save(Repository repo) { repo.getCreatedAt(), repo.getCreatedBy(), repo.getUpdatedAt(), - repo.getUpdatedBy()); + repo.getUpdatedBy(), + repo.getMaxFileSizeBytes()); } public Optional findRepoById(String id) { @@ -118,7 +122,7 @@ public Optional findRepoById(String id) { } @SneakyThrows - public void createRepo(String url, String name, String description, Principal principal) { + public void createRepo(String url, String name, String description, Long maxFileSizeBytes, Principal principal) { boolean isDuplicate = getAllRepos().stream() .anyMatch(repo -> startsWithIgnoreCase( @@ -142,7 +146,9 @@ public void createRepo(String url, String name, String description, Principal pr + "CREATED, " + "CREATED_BY, " + "UPDATED, " - + "UPDATED_BY) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"; + + "UPDATED_BY," + + "MAX_FILE_SIZE_BYTES) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"; + jdbcTemplate.update(query, RepositoryUtils.generateId(), url, @@ -153,7 +159,8 @@ public void createRepo(String url, String name, String description, Principal pr java.sql.Timestamp.from(java.time.Instant.now()), principal.getName(), java.sql.Timestamp.from(java.time.Instant.now()), - principal.getName()); + principal.getName(), + maxFileSizeBytes); } public int updateRepo(String id, RepositoryController.CreateRepository loadedRepo, Principal principal) { @@ -162,7 +169,8 @@ public int updateRepo(String id, RepositoryController.CreateRepository loadedRep + "NAME = ?, " + "DESCRIPTION = ?, " + "UPDATED = ?, " - + "UPDATED_BY = ? " + + "UPDATED_BY = ?, " + + "MAX_FILE_SIZE_BYTES = ? " + "WHERE ID = ?"; return jdbcTemplate.update(query, loadedRepo.getUrl(), @@ -170,6 +178,7 @@ public int updateRepo(String id, RepositoryController.CreateRepository loadedRep loadedRepo.getDescription(), java.sql.Timestamp.from(java.time.Instant.now()), principal.getName(), + loadedRepo.getMaxFileSizeBytes(), id); } @@ -185,5 +194,4 @@ public int delete(String id, Principal principal) { principal.getName(), id); } - } diff --git a/src/main/java/it/gov/innovazione/ndc/model/harvester/Repository.java b/src/main/java/it/gov/innovazione/ndc/model/harvester/Repository.java index 11bbd385..2041756b 100644 --- a/src/main/java/it/gov/innovazione/ndc/model/harvester/Repository.java +++ b/src/main/java/it/gov/innovazione/ndc/model/harvester/Repository.java @@ -3,6 +3,7 @@ import lombok.Builder; import lombok.Data; import lombok.EqualsAndHashCode; +import lombok.With; import java.time.Instant; @@ -11,6 +12,7 @@ @EqualsAndHashCode(exclude = {"id", "createdAt", "updatedAt"}) public class Repository { private String id; + @With private String url; private String name; private String description; @@ -20,5 +22,5 @@ public class Repository { private String createdBy; private Instant updatedAt; private String updatedBy; - private Long maxSizeBytes; + private Long maxFileSizeBytes; } diff --git a/src/main/java/it/gov/innovazione/ndc/repository/TripleStoreRepository.java b/src/main/java/it/gov/innovazione/ndc/repository/TripleStoreRepository.java index 5557a415..445643e7 100644 --- a/src/main/java/it/gov/innovazione/ndc/repository/TripleStoreRepository.java +++ b/src/main/java/it/gov/innovazione/ndc/repository/TripleStoreRepository.java @@ -47,7 +47,10 @@ private void saveWithConnection(String graphName, Model model, RDFConnection con public void clearExistingNamedGraph(String repoUrl) { try { String sparqlEndpoint = virtuosoClient.getSparqlEndpoint(); - UpdateExecution.service(sparqlEndpoint).updateString(getUpdateCommand(repoUrl)).execute(); + UpdateExecution + .service(sparqlEndpoint) + .updateString(getUpdateCommand(repoUrl)) + .execute(); } catch (Exception e) { log.error(format("Could not clear existing named graph! - %s", repoUrl), e); if (e instanceof HttpException) { diff --git a/src/main/resources/application-local.properties b/src/main/resources/application-local.properties index 173e389f..91080b64 100644 --- a/src/main/resources/application-local.properties +++ b/src/main/resources/application-local.properties @@ -1,5 +1,5 @@ spring.elasticsearch.uris=http://localhost:9200 -harvester.repositories=https://github.com/teamdigitale/openapi,https://github.com/italia/daf-ontologie-vocabolari-controllati,https://github.com/InailUfficio5/inail-ndc +harvester.repositories=https://github.com/istat/ts-ontologie-vocabolari-controllati virtuoso.sparql=http://localhost:8890/sparql-auth virtuoso.sparql-graph-store=http://localhost:8890/sparql-graph-crud-auth diff --git a/src/main/resources/db/migration/V3__max_file_size.sql b/src/main/resources/db/migration/V3__max_file_size.sql new file mode 100644 index 00000000..75cdaca2 --- /dev/null +++ b/src/main/resources/db/migration/V3__max_file_size.sql @@ -0,0 +1,2 @@ +ALTER TABLE REPOSITORY + add MAX_FILE_SIZE_BYTES LONG null; diff --git a/src/test/java/it/gov/innovazione/ndc/harvester/HarvesterServiceTest.java b/src/test/java/it/gov/innovazione/ndc/harvester/HarvesterServiceTest.java index 08d78cc7..1dda94c6 100644 --- a/src/test/java/it/gov/innovazione/ndc/harvester/HarvesterServiceTest.java +++ b/src/test/java/it/gov/innovazione/ndc/harvester/HarvesterServiceTest.java @@ -1,10 +1,12 @@ package it.gov.innovazione.ndc.harvester; +import it.gov.innovazione.ndc.model.harvester.Repository; import it.gov.innovazione.ndc.repository.SemanticAssetMetadataRepository; import it.gov.innovazione.ndc.repository.TripleStoreRepository; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.ArgumentCaptor; import org.mockito.InOrder; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; @@ -15,6 +17,8 @@ import static it.gov.innovazione.ndc.harvester.service.RepositoryUtils.asRepo; import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.inOrder; import static org.mockito.Mockito.verify; @@ -53,27 +57,32 @@ void shouldHarvestAssets() throws IOException { harvesterService.harvest(asRepo(repoUrl)); verify(agencyRepoService).cloneRepo("someRepoUri", null); - verify(harvester).harvest(sanitizedRepoUrl, clonedRepoPath); + + ArgumentCaptor repoCaptor = ArgumentCaptor.forClass(Repository.class); + verify(harvester).harvest(repoCaptor.capture(), any()); + assertEquals(sanitizedRepoUrl, repoCaptor.getValue().getUrl()); } @Test void shouldGiveUpOnRepoWhenGenericExceptionIsThrown() throws IOException { String repoUrl = "someRepoUri"; + Repository repo = asRepo(repoUrl); when(agencyRepoService.cloneRepo(repoUrl, null)).thenReturn(clonedRepoPath); doThrow(new RuntimeException("Something else went wrong")).when( - harvester).harvest(repoUrl, clonedRepoPath); + harvester).harvest(repo, clonedRepoPath); assertThatThrownBy(() -> harvesterService.harvest(asRepo(repoUrl))) .isInstanceOf(RuntimeException.class) .hasMessage("Something else went wrong"); - verify(harvester).harvest(repoUrl, clonedRepoPath); + verify(harvester).harvest(repo, clonedRepoPath); } @Test void shouldClearNamedGraphAndMetadataBeforeProcessingData() throws IOException { String repoUrl = "someRepoUri"; + Repository repo = asRepo(repoUrl); when(agencyRepoService.cloneRepo(repoUrl, null)).thenReturn(clonedRepoPath); @@ -83,7 +92,7 @@ void shouldClearNamedGraphAndMetadataBeforeProcessingData() throws IOException { order.verify(harvester).cleanUpBeforeHarvesting(repoUrl); order.verify(tripleStoreRepository).clearExistingNamedGraph(repoUrl); order.verify(metadataRepository).deleteByRepoUrl(repoUrl); - order.verify(harvester).harvest(repoUrl, clonedRepoPath); + order.verify(harvester).harvest(repo, clonedRepoPath); } @Test @@ -100,10 +109,11 @@ void shouldCleanUpTemporaryFolderWithRepoAfterProcessing() throws IOException { @Test void shouldCleanUpTemporaryFolderWithRepoAfterFailure() throws IOException { String repoUrl = "someRepoUri"; + Repository repo = asRepo(repoUrl); when(agencyRepoService.cloneRepo(repoUrl, null)).thenReturn(clonedRepoPath); doThrow(new RuntimeException("network disaster")).when(harvester) - .harvest(repoUrl, clonedRepoPath); + .harvest(repo, clonedRepoPath); assertThatThrownBy(() -> harvesterService.harvest(asRepo(repoUrl))) .hasMessage("network disaster"); diff --git a/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvesterTest.java b/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvesterTest.java index 7f248044..8af34f1c 100644 --- a/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvesterTest.java +++ b/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvesterTest.java @@ -1,42 +1,37 @@ package it.gov.innovazione.ndc.harvester.harvesters; +import it.gov.innovazione.ndc.config.HarvestExecutionContext; +import it.gov.innovazione.ndc.config.HarvestExecutionContextUtils; +import it.gov.innovazione.ndc.eventhandler.NdcEventPublisher; +import it.gov.innovazione.ndc.harvester.SemanticAssetType; import it.gov.innovazione.ndc.harvester.exception.InvalidAssetException; import it.gov.innovazione.ndc.harvester.model.SemanticAssetPath; -import it.gov.innovazione.ndc.harvester.SemanticAssetType; +import it.gov.innovazione.ndc.model.harvester.Repository; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; +import org.mockito.MockedStatic; import org.mockito.junit.jupiter.MockitoExtension; +import java.io.File; import java.nio.file.Path; import java.util.List; import java.util.function.BiConsumer; +import static it.gov.innovazione.ndc.harvester.service.RepositoryUtils.asRepo; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockStatic; import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; @ExtendWith(MockitoExtension.class) class BaseSemanticAssetHarvesterTest { - private class TestHarvester extends BaseSemanticAssetHarvester { - public TestHarvester() { - super(SemanticAssetType.ONTOLOGY); - } - - @Override - protected void processPath(String repoUrl, SemanticAssetPath path) { - processor.accept(repoUrl, path); - } - - @Override - protected List scanForPaths(Path rootPath) { - return paths; - } - } - - private TestHarvester harvester = new TestHarvester(); - private List paths; @Mock - private BiConsumer processor; + private NdcEventPublisher eventPublisher; @Test void shouldMoveOnToNextOntologyIfProcessingOneFails() { @@ -44,14 +39,80 @@ void shouldMoveOnToNextOntologyIfProcessingOneFails() { Path basePath = Path.of("ontologyRoot"); SemanticAssetPath path1 = SemanticAssetPath.of("test1.ttl"); SemanticAssetPath path2 = SemanticAssetPath.of("test2.ttl"); - paths = List.of(path1, path2); + List paths = List.of(path1, path2); + + TestHarvester harvester = new TestHarvester(paths); doThrow(new InvalidAssetException("Something went wrong")).when(processor) .accept(repoUrl, path1); - harvester.harvest(repoUrl, basePath); + harvester.harvest(asRepo(repoUrl), basePath); verify(processor).accept(repoUrl, path1); verify(processor).accept(repoUrl, path2); } -} \ No newline at end of file + + @Mock + private BiConsumer processor; + + @Test + void shouldNotifyIfSizeExceed() { + String repoUrl = "someRepoUri"; + Path basePath = Path.of("ontologyRoot"); + SemanticAssetPath path = mock(SemanticAssetPath.class); + + File file = mock(File.class); + when(path.getAllFiles()).thenReturn(List.of(file)); + when(path.getTtlPath()).thenReturn("someRootPath/someFile.ttl"); + when(file.length()).thenReturn(2L); + + Repository repository = asRepo(repoUrl).toBuilder() + .maxFileSizeBytes(1L) + .build(); + + doNothing().when(processor).accept(repoUrl, path); + + try (MockedStatic contextUtils = mockStatic(HarvestExecutionContextUtils.class)) { + contextUtils.when(HarvestExecutionContextUtils::getContext) + .thenReturn( + HarvestExecutionContext.builder() + .repository(repository) + .revision("someRevision") + .correlationId("someCorrelationId") + .runId("someRunId") + .currentUserId("someUserId") + .rootPath("someRootPath") + .build()); + + TestHarvester harvester = new TestHarvester(List.of(path)); + + harvester.harvest(repository, basePath); + verify(eventPublisher).publishEvent( + anyString(), + anyString(), + anyString(), + anyString(), + any()); + } + } + + private class TestHarvester extends BaseSemanticAssetHarvester { + + private final List paths; + + public TestHarvester(List paths) { + super(SemanticAssetType.ONTOLOGY, eventPublisher); + this.paths = paths; + } + + @Override + protected void processPath(String repoUrl, SemanticAssetPath path) { + processor.accept(repoUrl, path); + } + + @Override + protected List scanForPaths(Path rootPath) { + return paths; + } + } +} diff --git a/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/ControlledVocabularyHarvesterTest.java b/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/ControlledVocabularyHarvesterTest.java index b9780bd0..37652556 100644 --- a/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/ControlledVocabularyHarvesterTest.java +++ b/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/ControlledVocabularyHarvesterTest.java @@ -9,9 +9,12 @@ import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import java.io.File; import java.nio.file.Path; import java.util.List; +import static it.gov.innovazione.ndc.harvester.service.RepositoryUtils.asRepo; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -32,7 +35,7 @@ void shouldProcessAllScannedPaths() { final CvPath path2 = CvPath.of("onto2.ttl", "onto2.csv"); when(agencyRepositoryService.getControlledVocabularyPaths(cvBasePath)).thenReturn(List.of(path1, path2)); - harvester.harvest(repoUrl, cvBasePath); + harvester.harvest(asRepo(repoUrl), cvBasePath); verify(agencyRepositoryService).getControlledVocabularyPaths(cvBasePath); verify(pathProcessor).process(repoUrl, path1); @@ -47,4 +50,22 @@ void shouldCleanIndicesBeforeHarvesting() { verify(pathProcessor).dropCsvIndicesForRepo(repoUrl); } -} \ No newline at end of file + + @Test + void shouldReturnAllFiles() { + final CvPath path = CvPath.of("onto1.ttl", "onto1.csv"); + List allFiles = path.getAllFiles(); + + assertEquals("onto1.ttl", allFiles.get(0).getName()); + assertEquals("onto1.csv", allFiles.get(1).getName()); + } + + @Test + void shouldReturnTTLOnly() { + final CvPath path = CvPath.of("onto1.ttl", null); + List allFiles = path.getAllFiles(); + + assertEquals("onto1.ttl", allFiles.get(0).getName()); + assertEquals(1, allFiles.size()); + } +} diff --git a/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/OntologyHarvesterTest.java b/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/OntologyHarvesterTest.java index 96d91491..2afd9525 100644 --- a/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/OntologyHarvesterTest.java +++ b/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/OntologyHarvesterTest.java @@ -12,6 +12,7 @@ import java.nio.file.Path; import java.util.List; +import static it.gov.innovazione.ndc.harvester.service.RepositoryUtils.asRepo; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -32,10 +33,10 @@ void shouldProcessAllScannedPaths() { final SemanticAssetPath path2 = SemanticAssetPath.of("onto2.ttl"); when(agencyRepositoryService.getOntologyPaths(ontologyBasePath)).thenReturn(List.of(path1, path2)); - harvester.harvest(repoUrl, ontologyBasePath); + harvester.harvest(asRepo(repoUrl), ontologyBasePath); verify(agencyRepositoryService).getOntologyPaths(ontologyBasePath); verify(pathProcessor).process(repoUrl, path1); verify(pathProcessor).process(repoUrl, path2); } -} \ No newline at end of file +} diff --git a/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/SchemaHarvesterTest.java b/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/SchemaHarvesterTest.java index 6b1ecf45..facc6bbd 100644 --- a/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/SchemaHarvesterTest.java +++ b/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/SchemaHarvesterTest.java @@ -1,8 +1,8 @@ package it.gov.innovazione.ndc.harvester.harvesters; -import it.gov.innovazione.ndc.harvester.pathprocessors.SchemaPathProcessor; import it.gov.innovazione.ndc.harvester.AgencyRepositoryService; import it.gov.innovazione.ndc.harvester.model.SemanticAssetPath; +import it.gov.innovazione.ndc.harvester.pathprocessors.SchemaPathProcessor; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.InjectMocks; @@ -12,6 +12,7 @@ import java.nio.file.Path; import java.util.List; +import static it.gov.innovazione.ndc.harvester.service.RepositoryUtils.asRepo; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -32,11 +33,11 @@ void shouldProcessAllScannedPaths() { final SemanticAssetPath path2 = SemanticAssetPath.of("schema2.ttl"); when(agencyRepositoryService.getSchemaPaths(schemaBasePath)).thenReturn(List.of(path1, path2)); - harvester.harvest(repoUrl, schemaBasePath); + harvester.harvest(asRepo(repoUrl), schemaBasePath); verify(agencyRepositoryService).getSchemaPaths(schemaBasePath); verify(pathProcessor).process(repoUrl, path1); verify(pathProcessor).process(repoUrl, path2); } -} \ No newline at end of file +}