> SUPPORTED_EVENTS = Arrays.asList(
diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/HarvesterService.java b/src/main/java/it/gov/innovazione/ndc/harvester/HarvesterService.java
index fde9f2e9..8c6d6252 100644
--- a/src/main/java/it/gov/innovazione/ndc/harvester/HarvesterService.java
+++ b/src/main/java/it/gov/innovazione/ndc/harvester/HarvesterService.java
@@ -1,5 +1,7 @@
package it.gov.innovazione.ndc.harvester;
+import it.gov.innovazione.ndc.config.HarvestExecutionContext;
+import it.gov.innovazione.ndc.config.HarvestExecutionContextUtils;
import it.gov.innovazione.ndc.model.harvester.Repository;
import it.gov.innovazione.ndc.repository.SemanticAssetMetadataRepository;
import it.gov.innovazione.ndc.repository.TripleStoreRepository;
@@ -10,6 +12,7 @@
import java.io.IOException;
import java.nio.file.Path;
import java.util.List;
+import java.util.Objects;
@Slf4j
@Component
@@ -26,13 +29,15 @@ public void harvest(Repository repository) throws IOException {
public void harvest(Repository repository, String revision) throws IOException {
log.info("Processing repo {}", repository.getUrl());
- String repoUrl = normaliseRepoUrl(repository.getUrl());
+ Repository normalisedRepo = repository.withUrl(normaliseRepoUrl(repository.getUrl()));
+ String repoUrl = normalisedRepo.getUrl();
log.debug("Normalised repo url {}", repoUrl);
try {
Path path = cloneRepoToTempPath(repoUrl, revision);
try {
- harvestClonedRepo(repoUrl, path);
+ updateContextWithRootPath(path);
+ harvestClonedRepo(normalisedRepo, path);
} finally {
agencyRepositoryService.removeClonedRepo(path);
}
@@ -43,6 +48,13 @@ public void harvest(Repository repository, String revision) throws IOException {
}
}
+ private static void updateContextWithRootPath(Path path) {
+ HarvestExecutionContext context = HarvestExecutionContextUtils.getContext();
+ if (Objects.nonNull(context)) {
+ HarvestExecutionContextUtils.setContext(context.withRootPath(path.toString()));
+ }
+ }
+
public void clear(String repoUrl) {
log.info("Clearing repo {}", repoUrl);
repoUrl = normaliseRepoUrl(repoUrl);
@@ -60,12 +72,12 @@ private String normaliseRepoUrl(String repoUrl) {
return repoUrl.replace(".git", "");
}
- private void harvestClonedRepo(String repoUrl, Path path) {
- clearRepo(repoUrl);
+ private void harvestClonedRepo(Repository repository, Path path) {
+ clearRepo(repository.getUrl());
- harvestSemanticAssets(repoUrl, path);
+ harvestSemanticAssets(repository, path);
- log.info("Repo {} processed", repoUrl);
+ log.info("Repo {} processed", repository);
}
private void clearRepo(String repoUrl) {
@@ -83,10 +95,10 @@ private void cleanUpWithHarvesters(String repoUrl) {
});
}
- private void harvestSemanticAssets(String repoUrl, Path path) {
+ private void harvestSemanticAssets(Repository repository, Path path) {
semanticAssetHarvesters.forEach(h -> {
log.debug("Harvesting {} for {} assets", path, h.getType());
- h.harvest(repoUrl, path);
+ h.harvest(repository, path);
log.debug("Harvested {} for {} assets", path, h.getType());
});
diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/SemanticAssetHarvester.java b/src/main/java/it/gov/innovazione/ndc/harvester/SemanticAssetHarvester.java
index 75b0f58e..19901815 100644
--- a/src/main/java/it/gov/innovazione/ndc/harvester/SemanticAssetHarvester.java
+++ b/src/main/java/it/gov/innovazione/ndc/harvester/SemanticAssetHarvester.java
@@ -1,5 +1,7 @@
package it.gov.innovazione.ndc.harvester;
+import it.gov.innovazione.ndc.model.harvester.Repository;
+
import java.nio.file.Path;
public interface SemanticAssetHarvester {
@@ -7,5 +9,5 @@ public interface SemanticAssetHarvester {
void cleanUpBeforeHarvesting(String repoUrl);
- void harvest(String repoUrl, Path rootPath);
+ void harvest(Repository repository, Path rootPath);
}
diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvester.java b/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvester.java
index 068991b4..680ec467 100644
--- a/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvester.java
+++ b/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvester.java
@@ -1,12 +1,19 @@
package it.gov.innovazione.ndc.harvester.harvesters;
-import it.gov.innovazione.ndc.harvester.model.SemanticAssetPath;
+import it.gov.innovazione.ndc.config.HarvestExecutionContext;
+import it.gov.innovazione.ndc.config.HarvestExecutionContextUtils;
+import it.gov.innovazione.ndc.eventhandler.NdcEventPublisher;
+import it.gov.innovazione.ndc.eventhandler.event.HarvestedFileTooBigEvent;
+import it.gov.innovazione.ndc.eventhandler.event.HarvestedFileTooBigEvent.ViolatingSemanticAsset;
import it.gov.innovazione.ndc.harvester.SemanticAssetHarvester;
import it.gov.innovazione.ndc.harvester.SemanticAssetType;
import it.gov.innovazione.ndc.harvester.exception.SinglePathProcessingException;
+import it.gov.innovazione.ndc.harvester.model.SemanticAssetPath;
+import it.gov.innovazione.ndc.model.harvester.Repository;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
+import java.io.File;
import java.nio.file.Path;
import java.util.List;
@@ -14,6 +21,7 @@
@RequiredArgsConstructor
public abstract class BaseSemanticAssetHarvester implements SemanticAssetHarvester {
private final SemanticAssetType type;
+ private final NdcEventPublisher eventPublisher;
@Override
public SemanticAssetType getType() {
@@ -21,23 +29,72 @@ public SemanticAssetType getType() {
}
@Override
- public void harvest(String repoUrl, Path rootPath) {
+ public void harvest(Repository repository, Path rootPath) {
log.debug("Looking for {} paths", type);
List
paths = scanForPaths(rootPath);
+
+ paths.forEach(this::notifyIfSizeExceed);
+
log.debug("Found {} {} path(s) for processing", paths.size(), type);
for (P path : paths) {
try {
- processPath(repoUrl, path);
+ processPath(repository.getUrl(), path);
log.debug("Path {} processed correctly for {}", path, type);
} catch (SinglePathProcessingException e) {
- log.error("Error processing {} {} in repo {}", type, path, repoUrl, e);
+ log.error("Error processing {} {} in repo {}", type, path, repository.getUrl(), e);
+ }
+ }
+ }
+
+ private void notifyIfSizeExceed(P path) {
+ HarvestExecutionContext context = HarvestExecutionContextUtils.getContext();
+ if (context != null) {
+ List files = path.getAllFiles();
+ if (isBiggerThan(context.getRepository().getMaxFileSizeBytes(), files)) {
+ notify(context, path);
}
}
}
+ private void notify(HarvestExecutionContext context, P path) {
+ eventPublisher.publishEvent(
+ "harvester",
+ "harvester.max-file-size-exceeded",
+ context.getCorrelationId(),
+ context.getCurrentUserId(),
+ HarvestedFileTooBigEvent.builder()
+ .runId(context.getRunId())
+ .repository(context.getRepository())
+ .revision(context.getRevision())
+ .violatingSemanticAsset(
+ ViolatingSemanticAsset.fromPath(
+ relativize(
+ path.getTtlPath(),
+ context),
+ path.getAllFiles(),
+ context.getRepository().getMaxFileSizeBytes()))
+ .build());
+ log.warn("File {} is bigger than maxFileSizeBytes", path.getTtlPath());
+ }
+
+ private String relativize(String ttlFile, HarvestExecutionContext context) {
+ return getFolderNameFromFile(ttlFile).replace(
+ context.getRootPath(), "");
+ }
+
+ private String getFolderNameFromFile(String ttlFile) {
+ return new File(ttlFile).getParent();
+ }
+
+ private boolean isBiggerThan(Long maxFileSizeBytes, List files) {
+ return maxFileSizeBytes != null
+ && maxFileSizeBytes > 0
+ && files.stream().anyMatch(file -> file.length() > maxFileSizeBytes);
+ }
+
@Override
public void cleanUpBeforeHarvesting(String repoUrl) {
// by default nothing specific
diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/ControlledVocabularyHarvester.java b/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/ControlledVocabularyHarvester.java
index 925e059c..15ca91e5 100644
--- a/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/ControlledVocabularyHarvester.java
+++ b/src/main/java/it/gov/innovazione/ndc/harvester/harvesters/ControlledVocabularyHarvester.java
@@ -1,9 +1,10 @@
package it.gov.innovazione.ndc.harvester.harvesters;
+import it.gov.innovazione.ndc.eventhandler.NdcEventPublisher;
import it.gov.innovazione.ndc.harvester.AgencyRepositoryService;
+import it.gov.innovazione.ndc.harvester.SemanticAssetType;
import it.gov.innovazione.ndc.harvester.model.CvPath;
import it.gov.innovazione.ndc.harvester.pathprocessors.ControlledVocabularyPathProcessor;
-import it.gov.innovazione.ndc.harvester.SemanticAssetType;
import org.springframework.stereotype.Component;
import java.nio.file.Path;
@@ -14,8 +15,8 @@ public class ControlledVocabularyHarvester extends BaseSemanticAssetHarvester getAllFiles() {
+ if (Objects.nonNull(csvPath)) {
+ return List.of(
+ new File(getTtlPath()), new File(getCsvPath().get()));
+ }
+ return super.getAllFiles();
+ }
+
@Override
public String toString() {
return "CvPath{ttlPath='" + ttlPath + "', csvPath='" + csvPath + "'}";
diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/model/SemanticAssetPath.java b/src/main/java/it/gov/innovazione/ndc/harvester/model/SemanticAssetPath.java
index 99cd07f5..71bfffb0 100644
--- a/src/main/java/it/gov/innovazione/ndc/harvester/model/SemanticAssetPath.java
+++ b/src/main/java/it/gov/innovazione/ndc/harvester/model/SemanticAssetPath.java
@@ -3,6 +3,9 @@
import lombok.EqualsAndHashCode;
import lombok.Getter;
+import java.io.File;
+import java.util.List;
+
@Getter
@EqualsAndHashCode
public class SemanticAssetPath {
@@ -20,4 +23,8 @@ public static SemanticAssetPath of(String ttlPath) {
public String toString() {
return "SemanticAssetPath{ttlPath='" + ttlPath + "'}";
}
+
+ public List getAllFiles() {
+ return List.of(new File(ttlPath));
+ }
}
diff --git a/src/main/java/it/gov/innovazione/ndc/harvester/service/RepositoryService.java b/src/main/java/it/gov/innovazione/ndc/harvester/service/RepositoryService.java
index 7ee501f4..59daf66f 100644
--- a/src/main/java/it/gov/innovazione/ndc/harvester/service/RepositoryService.java
+++ b/src/main/java/it/gov/innovazione/ndc/harvester/service/RepositoryService.java
@@ -34,7 +34,8 @@ public class RepositoryService {
+ "CREATED, "
+ "CREATED_BY, "
+ "UPDATED, "
- + "UPDATED_BY "
+ + "UPDATED_BY, "
+ + "MAX_FILE_SIZE_BYTES "
+ "FROM REPOSITORY";
private final JdbcTemplate jdbcTemplate;
@@ -57,6 +58,7 @@ public List getAllRepos() {
.createdBy(rs.getString("CREATED_BY"))
.updatedAt(rs.getTimestamp("UPDATED").toInstant())
.updatedBy(rs.getString("UPDATED_BY"))
+ .maxFileSizeBytes(rs.getLong("MAX_FILE_SIZE_BYTES"))
.build());
if (!allRepos.isEmpty()) {
@@ -96,7 +98,8 @@ private void save(Repository repo) {
+ "CREATED, "
+ "CREATED_BY, "
+ "UPDATED, "
- + "UPDATED_BY) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)";
+ + "UPDATED_BY, "
+ + "MAX_FILE_SIZE_BYTES) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)";
jdbcTemplate.update(query,
repo.getId(),
repo.getUrl(),
@@ -107,7 +110,8 @@ private void save(Repository repo) {
repo.getCreatedAt(),
repo.getCreatedBy(),
repo.getUpdatedAt(),
- repo.getUpdatedBy());
+ repo.getUpdatedBy(),
+ repo.getMaxFileSizeBytes());
}
public Optional findRepoById(String id) {
@@ -118,7 +122,7 @@ public Optional findRepoById(String id) {
}
@SneakyThrows
- public void createRepo(String url, String name, String description, Principal principal) {
+ public void createRepo(String url, String name, String description, Long maxFileSizeBytes, Principal principal) {
boolean isDuplicate = getAllRepos().stream()
.anyMatch(repo ->
startsWithIgnoreCase(
@@ -142,7 +146,9 @@ public void createRepo(String url, String name, String description, Principal pr
+ "CREATED, "
+ "CREATED_BY, "
+ "UPDATED, "
- + "UPDATED_BY) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)";
+ + "UPDATED_BY,"
+ + "MAX_FILE_SIZE_BYTES) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)";
+
jdbcTemplate.update(query,
RepositoryUtils.generateId(),
url,
@@ -153,7 +159,8 @@ public void createRepo(String url, String name, String description, Principal pr
java.sql.Timestamp.from(java.time.Instant.now()),
principal.getName(),
java.sql.Timestamp.from(java.time.Instant.now()),
- principal.getName());
+ principal.getName(),
+ maxFileSizeBytes);
}
public int updateRepo(String id, RepositoryController.CreateRepository loadedRepo, Principal principal) {
@@ -162,7 +169,8 @@ public int updateRepo(String id, RepositoryController.CreateRepository loadedRep
+ "NAME = ?, "
+ "DESCRIPTION = ?, "
+ "UPDATED = ?, "
- + "UPDATED_BY = ? "
+ + "UPDATED_BY = ?, "
+ + "MAX_FILE_SIZE_BYTES = ? "
+ "WHERE ID = ?";
return jdbcTemplate.update(query,
loadedRepo.getUrl(),
@@ -170,6 +178,7 @@ public int updateRepo(String id, RepositoryController.CreateRepository loadedRep
loadedRepo.getDescription(),
java.sql.Timestamp.from(java.time.Instant.now()),
principal.getName(),
+ loadedRepo.getMaxFileSizeBytes(),
id);
}
@@ -185,5 +194,4 @@ public int delete(String id, Principal principal) {
principal.getName(),
id);
}
-
}
diff --git a/src/main/java/it/gov/innovazione/ndc/model/harvester/Repository.java b/src/main/java/it/gov/innovazione/ndc/model/harvester/Repository.java
index 11bbd385..2041756b 100644
--- a/src/main/java/it/gov/innovazione/ndc/model/harvester/Repository.java
+++ b/src/main/java/it/gov/innovazione/ndc/model/harvester/Repository.java
@@ -3,6 +3,7 @@
import lombok.Builder;
import lombok.Data;
import lombok.EqualsAndHashCode;
+import lombok.With;
import java.time.Instant;
@@ -11,6 +12,7 @@
@EqualsAndHashCode(exclude = {"id", "createdAt", "updatedAt"})
public class Repository {
private String id;
+ @With
private String url;
private String name;
private String description;
@@ -20,5 +22,5 @@ public class Repository {
private String createdBy;
private Instant updatedAt;
private String updatedBy;
- private Long maxSizeBytes;
+ private Long maxFileSizeBytes;
}
diff --git a/src/main/java/it/gov/innovazione/ndc/repository/TripleStoreRepository.java b/src/main/java/it/gov/innovazione/ndc/repository/TripleStoreRepository.java
index 5557a415..445643e7 100644
--- a/src/main/java/it/gov/innovazione/ndc/repository/TripleStoreRepository.java
+++ b/src/main/java/it/gov/innovazione/ndc/repository/TripleStoreRepository.java
@@ -47,7 +47,10 @@ private void saveWithConnection(String graphName, Model model, RDFConnection con
public void clearExistingNamedGraph(String repoUrl) {
try {
String sparqlEndpoint = virtuosoClient.getSparqlEndpoint();
- UpdateExecution.service(sparqlEndpoint).updateString(getUpdateCommand(repoUrl)).execute();
+ UpdateExecution
+ .service(sparqlEndpoint)
+ .updateString(getUpdateCommand(repoUrl))
+ .execute();
} catch (Exception e) {
log.error(format("Could not clear existing named graph! - %s", repoUrl), e);
if (e instanceof HttpException) {
diff --git a/src/main/resources/application-local.properties b/src/main/resources/application-local.properties
index 173e389f..91080b64 100644
--- a/src/main/resources/application-local.properties
+++ b/src/main/resources/application-local.properties
@@ -1,5 +1,5 @@
spring.elasticsearch.uris=http://localhost:9200
-harvester.repositories=https://github.com/teamdigitale/openapi,https://github.com/italia/daf-ontologie-vocabolari-controllati,https://github.com/InailUfficio5/inail-ndc
+harvester.repositories=https://github.com/istat/ts-ontologie-vocabolari-controllati
virtuoso.sparql=http://localhost:8890/sparql-auth
virtuoso.sparql-graph-store=http://localhost:8890/sparql-graph-crud-auth
diff --git a/src/main/resources/db/migration/V3__max_file_size.sql b/src/main/resources/db/migration/V3__max_file_size.sql
new file mode 100644
index 00000000..75cdaca2
--- /dev/null
+++ b/src/main/resources/db/migration/V3__max_file_size.sql
@@ -0,0 +1,2 @@
+ALTER TABLE REPOSITORY
+ add MAX_FILE_SIZE_BYTES LONG null;
diff --git a/src/test/java/it/gov/innovazione/ndc/harvester/HarvesterServiceTest.java b/src/test/java/it/gov/innovazione/ndc/harvester/HarvesterServiceTest.java
index 08d78cc7..1dda94c6 100644
--- a/src/test/java/it/gov/innovazione/ndc/harvester/HarvesterServiceTest.java
+++ b/src/test/java/it/gov/innovazione/ndc/harvester/HarvesterServiceTest.java
@@ -1,10 +1,12 @@
package it.gov.innovazione.ndc.harvester;
+import it.gov.innovazione.ndc.model.harvester.Repository;
import it.gov.innovazione.ndc.repository.SemanticAssetMetadataRepository;
import it.gov.innovazione.ndc.repository.TripleStoreRepository;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.ArgumentCaptor;
import org.mockito.InOrder;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
@@ -15,6 +17,8 @@
import static it.gov.innovazione.ndc.harvester.service.RepositoryUtils.asRepo;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.inOrder;
import static org.mockito.Mockito.verify;
@@ -53,27 +57,32 @@ void shouldHarvestAssets() throws IOException {
harvesterService.harvest(asRepo(repoUrl));
verify(agencyRepoService).cloneRepo("someRepoUri", null);
- verify(harvester).harvest(sanitizedRepoUrl, clonedRepoPath);
+
+ ArgumentCaptor repoCaptor = ArgumentCaptor.forClass(Repository.class);
+ verify(harvester).harvest(repoCaptor.capture(), any());
+ assertEquals(sanitizedRepoUrl, repoCaptor.getValue().getUrl());
}
@Test
void shouldGiveUpOnRepoWhenGenericExceptionIsThrown() throws IOException {
String repoUrl = "someRepoUri";
+ Repository repo = asRepo(repoUrl);
when(agencyRepoService.cloneRepo(repoUrl, null)).thenReturn(clonedRepoPath);
doThrow(new RuntimeException("Something else went wrong")).when(
- harvester).harvest(repoUrl, clonedRepoPath);
+ harvester).harvest(repo, clonedRepoPath);
assertThatThrownBy(() -> harvesterService.harvest(asRepo(repoUrl)))
.isInstanceOf(RuntimeException.class)
.hasMessage("Something else went wrong");
- verify(harvester).harvest(repoUrl, clonedRepoPath);
+ verify(harvester).harvest(repo, clonedRepoPath);
}
@Test
void shouldClearNamedGraphAndMetadataBeforeProcessingData() throws IOException {
String repoUrl = "someRepoUri";
+ Repository repo = asRepo(repoUrl);
when(agencyRepoService.cloneRepo(repoUrl, null)).thenReturn(clonedRepoPath);
@@ -83,7 +92,7 @@ void shouldClearNamedGraphAndMetadataBeforeProcessingData() throws IOException {
order.verify(harvester).cleanUpBeforeHarvesting(repoUrl);
order.verify(tripleStoreRepository).clearExistingNamedGraph(repoUrl);
order.verify(metadataRepository).deleteByRepoUrl(repoUrl);
- order.verify(harvester).harvest(repoUrl, clonedRepoPath);
+ order.verify(harvester).harvest(repo, clonedRepoPath);
}
@Test
@@ -100,10 +109,11 @@ void shouldCleanUpTemporaryFolderWithRepoAfterProcessing() throws IOException {
@Test
void shouldCleanUpTemporaryFolderWithRepoAfterFailure() throws IOException {
String repoUrl = "someRepoUri";
+ Repository repo = asRepo(repoUrl);
when(agencyRepoService.cloneRepo(repoUrl, null)).thenReturn(clonedRepoPath);
doThrow(new RuntimeException("network disaster")).when(harvester)
- .harvest(repoUrl, clonedRepoPath);
+ .harvest(repo, clonedRepoPath);
assertThatThrownBy(() -> harvesterService.harvest(asRepo(repoUrl)))
.hasMessage("network disaster");
diff --git a/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvesterTest.java b/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvesterTest.java
index 7f248044..8af34f1c 100644
--- a/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvesterTest.java
+++ b/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/BaseSemanticAssetHarvesterTest.java
@@ -1,42 +1,37 @@
package it.gov.innovazione.ndc.harvester.harvesters;
+import it.gov.innovazione.ndc.config.HarvestExecutionContext;
+import it.gov.innovazione.ndc.config.HarvestExecutionContextUtils;
+import it.gov.innovazione.ndc.eventhandler.NdcEventPublisher;
+import it.gov.innovazione.ndc.harvester.SemanticAssetType;
import it.gov.innovazione.ndc.harvester.exception.InvalidAssetException;
import it.gov.innovazione.ndc.harvester.model.SemanticAssetPath;
-import it.gov.innovazione.ndc.harvester.SemanticAssetType;
+import it.gov.innovazione.ndc.model.harvester.Repository;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
+import org.mockito.MockedStatic;
import org.mockito.junit.jupiter.MockitoExtension;
+import java.io.File;
import java.nio.file.Path;
import java.util.List;
import java.util.function.BiConsumer;
+import static it.gov.innovazione.ndc.harvester.service.RepositoryUtils.asRepo;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyString;
+import static org.mockito.Mockito.doNothing;
import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.mockStatic;
import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
@ExtendWith(MockitoExtension.class)
class BaseSemanticAssetHarvesterTest {
- private class TestHarvester extends BaseSemanticAssetHarvester {
- public TestHarvester() {
- super(SemanticAssetType.ONTOLOGY);
- }
-
- @Override
- protected void processPath(String repoUrl, SemanticAssetPath path) {
- processor.accept(repoUrl, path);
- }
-
- @Override
- protected List scanForPaths(Path rootPath) {
- return paths;
- }
- }
-
- private TestHarvester harvester = new TestHarvester();
- private List paths;
@Mock
- private BiConsumer processor;
+ private NdcEventPublisher eventPublisher;
@Test
void shouldMoveOnToNextOntologyIfProcessingOneFails() {
@@ -44,14 +39,80 @@ void shouldMoveOnToNextOntologyIfProcessingOneFails() {
Path basePath = Path.of("ontologyRoot");
SemanticAssetPath path1 = SemanticAssetPath.of("test1.ttl");
SemanticAssetPath path2 = SemanticAssetPath.of("test2.ttl");
- paths = List.of(path1, path2);
+ List paths = List.of(path1, path2);
+
+ TestHarvester harvester = new TestHarvester(paths);
doThrow(new InvalidAssetException("Something went wrong")).when(processor)
.accept(repoUrl, path1);
- harvester.harvest(repoUrl, basePath);
+ harvester.harvest(asRepo(repoUrl), basePath);
verify(processor).accept(repoUrl, path1);
verify(processor).accept(repoUrl, path2);
}
-}
\ No newline at end of file
+
+ @Mock
+ private BiConsumer processor;
+
+ @Test
+ void shouldNotifyIfSizeExceed() {
+ String repoUrl = "someRepoUri";
+ Path basePath = Path.of("ontologyRoot");
+ SemanticAssetPath path = mock(SemanticAssetPath.class);
+
+ File file = mock(File.class);
+ when(path.getAllFiles()).thenReturn(List.of(file));
+ when(path.getTtlPath()).thenReturn("someRootPath/someFile.ttl");
+ when(file.length()).thenReturn(2L);
+
+ Repository repository = asRepo(repoUrl).toBuilder()
+ .maxFileSizeBytes(1L)
+ .build();
+
+ doNothing().when(processor).accept(repoUrl, path);
+
+ try (MockedStatic contextUtils = mockStatic(HarvestExecutionContextUtils.class)) {
+ contextUtils.when(HarvestExecutionContextUtils::getContext)
+ .thenReturn(
+ HarvestExecutionContext.builder()
+ .repository(repository)
+ .revision("someRevision")
+ .correlationId("someCorrelationId")
+ .runId("someRunId")
+ .currentUserId("someUserId")
+ .rootPath("someRootPath")
+ .build());
+
+ TestHarvester harvester = new TestHarvester(List.of(path));
+
+ harvester.harvest(repository, basePath);
+ verify(eventPublisher).publishEvent(
+ anyString(),
+ anyString(),
+ anyString(),
+ anyString(),
+ any());
+ }
+ }
+
+ private class TestHarvester extends BaseSemanticAssetHarvester {
+
+ private final List paths;
+
+ public TestHarvester(List paths) {
+ super(SemanticAssetType.ONTOLOGY, eventPublisher);
+ this.paths = paths;
+ }
+
+ @Override
+ protected void processPath(String repoUrl, SemanticAssetPath path) {
+ processor.accept(repoUrl, path);
+ }
+
+ @Override
+ protected List scanForPaths(Path rootPath) {
+ return paths;
+ }
+ }
+}
diff --git a/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/ControlledVocabularyHarvesterTest.java b/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/ControlledVocabularyHarvesterTest.java
index b9780bd0..37652556 100644
--- a/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/ControlledVocabularyHarvesterTest.java
+++ b/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/ControlledVocabularyHarvesterTest.java
@@ -9,9 +9,12 @@
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
+import java.io.File;
import java.nio.file.Path;
import java.util.List;
+import static it.gov.innovazione.ndc.harvester.service.RepositoryUtils.asRepo;
+import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
@@ -32,7 +35,7 @@ void shouldProcessAllScannedPaths() {
final CvPath path2 = CvPath.of("onto2.ttl", "onto2.csv");
when(agencyRepositoryService.getControlledVocabularyPaths(cvBasePath)).thenReturn(List.of(path1, path2));
- harvester.harvest(repoUrl, cvBasePath);
+ harvester.harvest(asRepo(repoUrl), cvBasePath);
verify(agencyRepositoryService).getControlledVocabularyPaths(cvBasePath);
verify(pathProcessor).process(repoUrl, path1);
@@ -47,4 +50,22 @@ void shouldCleanIndicesBeforeHarvesting() {
verify(pathProcessor).dropCsvIndicesForRepo(repoUrl);
}
-}
\ No newline at end of file
+
+ @Test
+ void shouldReturnAllFiles() {
+ final CvPath path = CvPath.of("onto1.ttl", "onto1.csv");
+ List allFiles = path.getAllFiles();
+
+ assertEquals("onto1.ttl", allFiles.get(0).getName());
+ assertEquals("onto1.csv", allFiles.get(1).getName());
+ }
+
+ @Test
+ void shouldReturnTTLOnly() {
+ final CvPath path = CvPath.of("onto1.ttl", null);
+ List allFiles = path.getAllFiles();
+
+ assertEquals("onto1.ttl", allFiles.get(0).getName());
+ assertEquals(1, allFiles.size());
+ }
+}
diff --git a/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/OntologyHarvesterTest.java b/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/OntologyHarvesterTest.java
index 96d91491..2afd9525 100644
--- a/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/OntologyHarvesterTest.java
+++ b/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/OntologyHarvesterTest.java
@@ -12,6 +12,7 @@
import java.nio.file.Path;
import java.util.List;
+import static it.gov.innovazione.ndc.harvester.service.RepositoryUtils.asRepo;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
@@ -32,10 +33,10 @@ void shouldProcessAllScannedPaths() {
final SemanticAssetPath path2 = SemanticAssetPath.of("onto2.ttl");
when(agencyRepositoryService.getOntologyPaths(ontologyBasePath)).thenReturn(List.of(path1, path2));
- harvester.harvest(repoUrl, ontologyBasePath);
+ harvester.harvest(asRepo(repoUrl), ontologyBasePath);
verify(agencyRepositoryService).getOntologyPaths(ontologyBasePath);
verify(pathProcessor).process(repoUrl, path1);
verify(pathProcessor).process(repoUrl, path2);
}
-}
\ No newline at end of file
+}
diff --git a/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/SchemaHarvesterTest.java b/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/SchemaHarvesterTest.java
index 6b1ecf45..facc6bbd 100644
--- a/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/SchemaHarvesterTest.java
+++ b/src/test/java/it/gov/innovazione/ndc/harvester/harvesters/SchemaHarvesterTest.java
@@ -1,8 +1,8 @@
package it.gov.innovazione.ndc.harvester.harvesters;
-import it.gov.innovazione.ndc.harvester.pathprocessors.SchemaPathProcessor;
import it.gov.innovazione.ndc.harvester.AgencyRepositoryService;
import it.gov.innovazione.ndc.harvester.model.SemanticAssetPath;
+import it.gov.innovazione.ndc.harvester.pathprocessors.SchemaPathProcessor;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.InjectMocks;
@@ -12,6 +12,7 @@
import java.nio.file.Path;
import java.util.List;
+import static it.gov.innovazione.ndc.harvester.service.RepositoryUtils.asRepo;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
@@ -32,11 +33,11 @@ void shouldProcessAllScannedPaths() {
final SemanticAssetPath path2 = SemanticAssetPath.of("schema2.ttl");
when(agencyRepositoryService.getSchemaPaths(schemaBasePath)).thenReturn(List.of(path1, path2));
- harvester.harvest(repoUrl, schemaBasePath);
+ harvester.harvest(asRepo(repoUrl), schemaBasePath);
verify(agencyRepositoryService).getSchemaPaths(schemaBasePath);
verify(pathProcessor).process(repoUrl, path1);
verify(pathProcessor).process(repoUrl, path2);
}
-}
\ No newline at end of file
+}