Skip to content

Commit

Permalink
requisito file scanner
Browse files Browse the repository at this point in the history
  • Loading branch information
gnespolino committed Feb 8, 2024
1 parent fcc1e82 commit 3e758d3
Show file tree
Hide file tree
Showing 25 changed files with 375 additions and 71 deletions.
5 changes: 4 additions & 1 deletion config/spotbugs/exclude-filter.xml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,7 @@
<Match>
<Bug pattern="EI_EXPOSE_REP2"/><!-- False positive when using lombok -->
</Match>
</FindBugsFilter>
<Match>
<Bug pattern="ES_COMPARING_PARAMETER_STRING_WITH_EQ"/><!-- False positive when using lombok -->
</Match>
</FindBugsFilter>
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package it.gov.innovazione.ndc.config;

import it.gov.innovazione.ndc.model.harvester.Repository;
import lombok.AccessLevel;
import lombok.Builder;
import lombok.Data;
import lombok.RequiredArgsConstructor;
import lombok.With;

@With
@Data
@Builder
@RequiredArgsConstructor(access = AccessLevel.PRIVATE)
public class HarvestExecutionContext {
private final Repository repository;
private final String revision;
private final String correlationId;
private final String runId;
private final String currentUserId;
private final String rootPath;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package it.gov.innovazione.ndc.config;

import lombok.NoArgsConstructor;

@NoArgsConstructor(access = lombok.AccessLevel.PRIVATE)
public class HarvestExecutionContextUtils {

private static final ThreadLocal<HarvestExecutionContext> CONTEXT_HOLDER = new ThreadLocal<>();

public static HarvestExecutionContext getContext() {
return CONTEXT_HOLDER.get();
}

public static void setContext(HarvestExecutionContext context) {
CONTEXT_HOLDER.set(context);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,15 @@ public void execute(String runId, Repository repository, String correlationId, S
verifySameRunWasNotExecuted(repository, revision);
}

HarvestExecutionContextUtils.setContext(
HarvestExecutionContext.builder()
.repository(repository)
.revision(revision)
.correlationId(correlationId)
.runId(runId)
.currentUserId(currentUserLogin)
.build());

harvesterService.harvest(repository, revision);

publishHarvesterSuccessfulEvent(repository, correlationId, revision, runId, currentUserLogin);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ public void createRepository(
repository.getUrl(),
repository.getName(),
repository.getDescription(),
repository.getMaxFileSizeBytes(),
principal);
}

Expand Down Expand Up @@ -104,5 +105,6 @@ public static class CreateRepository {
private String url;
private String name;
private String description;
private Long maxFileSizeBytes;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package it.gov.innovazione.ndc.eventhandler.event;

import it.gov.innovazione.ndc.model.harvester.Repository;
import lombok.AccessLevel;
import lombok.Builder;
import lombok.Data;
import lombok.RequiredArgsConstructor;

import java.io.File;
import java.util.List;
import java.util.stream.Collectors;

@Builder
@Data
public class HarvestedFileTooBigEvent {
private final String runId;
private final Repository repository;
private final String revision;
private final ViolatingSemanticAsset violatingSemanticAsset;
private final String relativePathInRepo;

@Data
@RequiredArgsConstructor(access = AccessLevel.PRIVATE)
public static class ViolatingSemanticAsset {
private final String pathInRepo;
private final List<FileDetail> fileDetails;

public static ViolatingSemanticAsset fromPath(
String pathInRepo,
List<File> files,
long maxFileSizeBytes) {
return new ViolatingSemanticAsset(
pathInRepo,
files.stream()
.map(file -> new FileDetail(
file.getPath(),
file.length(),
file.length() > maxFileSizeBytes))
.collect(Collectors.toList()));
}

}


@Data
@RequiredArgsConstructor(access = AccessLevel.PRIVATE)
public static class FileDetail {
private final String path;
private final long size;
private final boolean violatesMaxSize;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import it.gov.innovazione.ndc.eventhandler.event.HarvesterFinishedEvent;
import it.gov.innovazione.ndc.eventhandler.event.HarvesterStartedEvent;
import it.gov.innovazione.ndc.harvester.service.HarvesterRunService;
import it.gov.innovazione.ndc.harvester.service.RepositoryService;
import it.gov.innovazione.ndc.model.harvester.HarvesterRun;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
Expand All @@ -20,7 +19,6 @@
@Slf4j
public class HarvesterRunUpdatingHandler implements NdcEventHandler {

private final RepositoryService repositoryService;
private final HarvesterRunService harvesterRunService;

private static final Collection<Class<?>> SUPPORTED_EVENTS = Arrays.asList(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package it.gov.innovazione.ndc.harvester;

import it.gov.innovazione.ndc.config.HarvestExecutionContext;
import it.gov.innovazione.ndc.config.HarvestExecutionContextUtils;
import it.gov.innovazione.ndc.model.harvester.Repository;
import it.gov.innovazione.ndc.repository.SemanticAssetMetadataRepository;
import it.gov.innovazione.ndc.repository.TripleStoreRepository;
Expand All @@ -10,6 +12,7 @@
import java.io.IOException;
import java.nio.file.Path;
import java.util.List;
import java.util.Objects;

@Slf4j
@Component
Expand All @@ -26,13 +29,15 @@ public void harvest(Repository repository) throws IOException {

public void harvest(Repository repository, String revision) throws IOException {
log.info("Processing repo {}", repository.getUrl());
String repoUrl = normaliseRepoUrl(repository.getUrl());
Repository normalisedRepo = repository.withUrl(normaliseRepoUrl(repository.getUrl()));
String repoUrl = normalisedRepo.getUrl();
log.debug("Normalised repo url {}", repoUrl);
try {
Path path = cloneRepoToTempPath(repoUrl, revision);

try {
harvestClonedRepo(repoUrl, path);
updateContextWithRootPath(path);
harvestClonedRepo(normalisedRepo, path);
} finally {
agencyRepositoryService.removeClonedRepo(path);
}
Expand All @@ -43,6 +48,13 @@ public void harvest(Repository repository, String revision) throws IOException {
}
}

private static void updateContextWithRootPath(Path path) {
HarvestExecutionContext context = HarvestExecutionContextUtils.getContext();
if (Objects.nonNull(context)) {
HarvestExecutionContextUtils.setContext(context.withRootPath(path.toString()));
}
}

public void clear(String repoUrl) {
log.info("Clearing repo {}", repoUrl);
repoUrl = normaliseRepoUrl(repoUrl);
Expand All @@ -60,12 +72,12 @@ private String normaliseRepoUrl(String repoUrl) {
return repoUrl.replace(".git", "");
}

private void harvestClonedRepo(String repoUrl, Path path) {
clearRepo(repoUrl);
private void harvestClonedRepo(Repository repository, Path path) {
clearRepo(repository.getUrl());

harvestSemanticAssets(repoUrl, path);
harvestSemanticAssets(repository, path);

log.info("Repo {} processed", repoUrl);
log.info("Repo {} processed", repository);
}

private void clearRepo(String repoUrl) {
Expand All @@ -83,10 +95,10 @@ private void cleanUpWithHarvesters(String repoUrl) {
});
}

private void harvestSemanticAssets(String repoUrl, Path path) {
private void harvestSemanticAssets(Repository repository, Path path) {
semanticAssetHarvesters.forEach(h -> {
log.debug("Harvesting {} for {} assets", path, h.getType());
h.harvest(repoUrl, path);
h.harvest(repository, path);

log.debug("Harvested {} for {} assets", path, h.getType());
});
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
package it.gov.innovazione.ndc.harvester;

import it.gov.innovazione.ndc.model.harvester.Repository;

import java.nio.file.Path;

public interface SemanticAssetHarvester {
SemanticAssetType getType();

void cleanUpBeforeHarvesting(String repoUrl);

void harvest(String repoUrl, Path rootPath);
void harvest(Repository repository, Path rootPath);
}
Original file line number Diff line number Diff line change
@@ -1,43 +1,100 @@
package it.gov.innovazione.ndc.harvester.harvesters;

import it.gov.innovazione.ndc.harvester.model.SemanticAssetPath;
import it.gov.innovazione.ndc.config.HarvestExecutionContext;
import it.gov.innovazione.ndc.config.HarvestExecutionContextUtils;
import it.gov.innovazione.ndc.eventhandler.NdcEventPublisher;
import it.gov.innovazione.ndc.eventhandler.event.HarvestedFileTooBigEvent;
import it.gov.innovazione.ndc.eventhandler.event.HarvestedFileTooBigEvent.ViolatingSemanticAsset;
import it.gov.innovazione.ndc.harvester.SemanticAssetHarvester;
import it.gov.innovazione.ndc.harvester.SemanticAssetType;
import it.gov.innovazione.ndc.harvester.exception.SinglePathProcessingException;
import it.gov.innovazione.ndc.harvester.model.SemanticAssetPath;
import it.gov.innovazione.ndc.model.harvester.Repository;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;

import java.io.File;
import java.nio.file.Path;
import java.util.List;

@Slf4j
@RequiredArgsConstructor
public abstract class BaseSemanticAssetHarvester<P extends SemanticAssetPath> implements SemanticAssetHarvester {
private final SemanticAssetType type;
private final NdcEventPublisher eventPublisher;

@Override
public SemanticAssetType getType() {
return type;
}

@Override
public void harvest(String repoUrl, Path rootPath) {
public void harvest(Repository repository, Path rootPath) {
log.debug("Looking for {} paths", type);

List<P> paths = scanForPaths(rootPath);

paths.forEach(this::notifyIfSizeExceed);

log.debug("Found {} {} path(s) for processing", paths.size(), type);

for (P path : paths) {
try {
processPath(repoUrl, path);
processPath(repository.getUrl(), path);
log.debug("Path {} processed correctly for {}", path, type);

} catch (SinglePathProcessingException e) {
log.error("Error processing {} {} in repo {}", type, path, repoUrl, e);
log.error("Error processing {} {} in repo {}", type, path, repository.getUrl(), e);
}
}
}

private void notifyIfSizeExceed(P path) {
HarvestExecutionContext context = HarvestExecutionContextUtils.getContext();
if (context != null) {
List<File> files = path.getAllFiles();
if (isBiggerThan(context.getRepository().getMaxFileSizeBytes(), files)) {
notify(context, path);
}
}
}

private void notify(HarvestExecutionContext context, P path) {
eventPublisher.publishEvent(
"harvester",
"harvester.max-file-size-exceeded",
context.getCorrelationId(),
context.getCurrentUserId(),
HarvestedFileTooBigEvent.builder()
.runId(context.getRunId())
.repository(context.getRepository())
.revision(context.getRevision())
.violatingSemanticAsset(
ViolatingSemanticAsset.fromPath(
relativize(
path.getTtlPath(),
context),
path.getAllFiles(),
context.getRepository().getMaxFileSizeBytes()))
.build());
log.warn("File {} is bigger than maxFileSizeBytes", path.getTtlPath());
}

private String relativize(String ttlFile, HarvestExecutionContext context) {
return getFolderNameFromFile(ttlFile).replace(
context.getRootPath(), "");
}

private String getFolderNameFromFile(String ttlFile) {
return new File(ttlFile).getParent();
}

private boolean isBiggerThan(Long maxFileSizeBytes, List<File> files) {
return maxFileSizeBytes != null
&& maxFileSizeBytes > 0
&& files.stream().anyMatch(file -> file.length() > maxFileSizeBytes);
}

@Override
public void cleanUpBeforeHarvesting(String repoUrl) {
// by default nothing specific
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
package it.gov.innovazione.ndc.harvester.harvesters;

import it.gov.innovazione.ndc.eventhandler.NdcEventPublisher;
import it.gov.innovazione.ndc.harvester.AgencyRepositoryService;
import it.gov.innovazione.ndc.harvester.SemanticAssetType;
import it.gov.innovazione.ndc.harvester.model.CvPath;
import it.gov.innovazione.ndc.harvester.pathprocessors.ControlledVocabularyPathProcessor;
import it.gov.innovazione.ndc.harvester.SemanticAssetType;
import org.springframework.stereotype.Component;

import java.nio.file.Path;
Expand All @@ -14,8 +15,8 @@ public class ControlledVocabularyHarvester extends BaseSemanticAssetHarvester<Cv
private final AgencyRepositoryService agencyRepositoryService;
private final ControlledVocabularyPathProcessor pathProcessor;

public ControlledVocabularyHarvester(AgencyRepositoryService agencyRepositoryService, ControlledVocabularyPathProcessor pathProcessor) {
super(SemanticAssetType.CONTROLLED_VOCABULARY);
public ControlledVocabularyHarvester(AgencyRepositoryService agencyRepositoryService, ControlledVocabularyPathProcessor pathProcessor, NdcEventPublisher ndcEventPublisher) {
super(SemanticAssetType.CONTROLLED_VOCABULARY, ndcEventPublisher);
this.agencyRepositoryService = agencyRepositoryService;
this.pathProcessor = pathProcessor;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
package it.gov.innovazione.ndc.harvester.harvesters;

import it.gov.innovazione.ndc.eventhandler.NdcEventPublisher;
import it.gov.innovazione.ndc.harvester.AgencyRepositoryService;
import it.gov.innovazione.ndc.harvester.SemanticAssetType;
import it.gov.innovazione.ndc.harvester.model.SemanticAssetPath;
import it.gov.innovazione.ndc.harvester.pathprocessors.OntologyPathProcessor;
import it.gov.innovazione.ndc.harvester.SemanticAssetType;
import org.springframework.stereotype.Component;

import java.nio.file.Path;
Expand All @@ -14,8 +15,8 @@ public class OntologyHarvester extends BaseSemanticAssetHarvester<SemanticAssetP
private final AgencyRepositoryService agencyRepositoryService;
private final OntologyPathProcessor pathProcessor;

public OntologyHarvester(AgencyRepositoryService agencyRepositoryService, OntologyPathProcessor pathProcessor) {
super(SemanticAssetType.ONTOLOGY);
public OntologyHarvester(AgencyRepositoryService agencyRepositoryService, OntologyPathProcessor pathProcessor, NdcEventPublisher ndcEventPublisher) {
super(SemanticAssetType.ONTOLOGY, ndcEventPublisher);
this.agencyRepositoryService = agencyRepositoryService;
this.pathProcessor = pathProcessor;
}
Expand Down
Loading

0 comments on commit 3e758d3

Please sign in to comment.