From 94033ce7f9f9c62c50f479d92f998a141e3a82a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Kir=C3=A1ly?= Date: Wed, 11 Dec 2024 12:25:08 +0100 Subject: [PATCH] Improve dependency checker, JSON outout of schema --- pom.xml | 2 +- .../gwdg/metadataqa/api/json/DataElement.java | 1 + .../metadataqa/api/rule/BaseRuleChecker.java | 5 ++- .../api/rule/logical/AndChecker.java | 5 +++ .../singlefieldchecker/DependencyChecker.java | 35 +++++++++++++++++-- .../gwdg/metadataqa/api/cli/VersionTest.java | 2 +- .../api/rule/logical/AndCheckerTest.java | 32 +++++++++++++++++ .../metadataqa/api/schema/BaseSchemaTest.java | 10 +++--- 8 files changed, 82 insertions(+), 10 deletions(-) diff --git a/pom.xml b/pom.xml index 5043d785..0eb67df8 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ de.gwdg.metadataqa metadata-qa-api jar - 0.9.6-SNAPSHOT + 0.9.7-SNAPSHOT Metadata Quality Assurance Framework API A metadata quality assurance framework. It checks some metrics of diff --git a/src/main/java/de/gwdg/metadataqa/api/json/DataElement.java b/src/main/java/de/gwdg/metadataqa/api/json/DataElement.java index 36cb8b50..1e44a11b 100644 --- a/src/main/java/de/gwdg/metadataqa/api/json/DataElement.java +++ b/src/main/java/de/gwdg/metadataqa/api/json/DataElement.java @@ -172,6 +172,7 @@ public DataElement setChildren(List children) { return this; } + @JsonIgnore public boolean isCollection() { return collection; } diff --git a/src/main/java/de/gwdg/metadataqa/api/rule/BaseRuleChecker.java b/src/main/java/de/gwdg/metadataqa/api/rule/BaseRuleChecker.java index 52ff8c4a..97cdecef 100644 --- a/src/main/java/de/gwdg/metadataqa/api/rule/BaseRuleChecker.java +++ b/src/main/java/de/gwdg/metadataqa/api/rule/BaseRuleChecker.java @@ -91,7 +91,10 @@ else if (outputType.equals(RuleCheckingOutputType.SCORE)) return header + ":" + getId() + suffix; } - protected void addOutput(FieldCounter results, boolean isNA, boolean allPassed, RuleCheckingOutputType outputType) { + protected void addOutput(FieldCounter results, + boolean isNA, + boolean allPassed, + RuleCheckingOutputType outputType) { addOutput(results, isNA, allPassed, outputType, null, null); } diff --git a/src/main/java/de/gwdg/metadataqa/api/rule/logical/AndChecker.java b/src/main/java/de/gwdg/metadataqa/api/rule/logical/AndChecker.java index eff52f61..02aa4aa9 100644 --- a/src/main/java/de/gwdg/metadataqa/api/rule/logical/AndChecker.java +++ b/src/main/java/de/gwdg/metadataqa/api/rule/logical/AndChecker.java @@ -59,7 +59,12 @@ public void update(Selector cache, FieldCounter results, Rule MinCountChecker minCountChecker = (MinCountChecker) checker; if (!minCountChecker.isEmptyInstancesAllowed() || minCountChecker.getMinCount() > 0) allPassed = false; + } else if (checker instanceof DependencyChecker) { + DependencyChecker dependencyChecker = (DependencyChecker) checker; + allPassed = dependencyChecker.getResult(outputType, results); } + if (!allPassed) + break; } } addOutput(results, isNA, allPassed, outputType); diff --git a/src/main/java/de/gwdg/metadataqa/api/rule/singlefieldchecker/DependencyChecker.java b/src/main/java/de/gwdg/metadataqa/api/rule/singlefieldchecker/DependencyChecker.java index 9dfebd2b..df3fccbf 100644 --- a/src/main/java/de/gwdg/metadataqa/api/rule/singlefieldchecker/DependencyChecker.java +++ b/src/main/java/de/gwdg/metadataqa/api/rule/singlefieldchecker/DependencyChecker.java @@ -24,7 +24,9 @@ public DependencyChecker(DataElement field, List dependencies) { this(field, field.getLabel(), dependencies, RuleCheckingOutputStatus.FAILED); } - public DependencyChecker(DataElement field, List dependencies, RuleCheckingOutputStatus failedDepencencyStatus) { + public DependencyChecker(DataElement field, + List dependencies, + RuleCheckingOutputStatus failedDepencencyStatus) { this(field, field.getLabel(), dependencies, failedDepencencyStatus); } @@ -42,7 +44,9 @@ public void update(Selector cache, FieldCounter results, Rule update(cache, results, outputType, null); } - public void update(Selector cache, FieldCounter localResults, RuleCheckingOutputType outputType, + public void update(Selector cache, + FieldCounter localResults, + RuleCheckingOutputType outputType, FieldCounter globalResults) { if (isDebug()) LOGGER.info(this.getClass().getSimpleName() + " " + this.id); @@ -82,4 +86,31 @@ public void update(Selector cache, FieldCounter localResults, if (isDebug()) LOGGER.info(this.getClass().getSimpleName() + " " + this.id + ") result: " + RuleCheckingOutputStatus.create(isNA, allPassed)); } + + public List getDependencies() { + return dependencies; + } + + public boolean getResult(RuleCheckingOutputType outputType, + FieldCounter globalResults) { + boolean allPassed = true; + for (String ruleId : dependencies) { + String keyEnd = outputType.equals(RuleCheckingOutputType.BOTH) ? ruleId + ":status" : ruleId; + boolean found = false; + for (Map.Entry entry : globalResults.getMap().entrySet()) { + if (entry.getKey().endsWith(keyEnd)) { + found = true; + if (entry.getValue().getStatus().equals(RuleCheckingOutputStatus.FAILED)) { + allPassed = false; + break; + } + } + } + if (!found) { + allPassed = false; + break; + } + } + return allPassed; + } } diff --git a/src/test/java/de/gwdg/metadataqa/api/cli/VersionTest.java b/src/test/java/de/gwdg/metadataqa/api/cli/VersionTest.java index 0cbb4ea4..1e82dba6 100644 --- a/src/test/java/de/gwdg/metadataqa/api/cli/VersionTest.java +++ b/src/test/java/de/gwdg/metadataqa/api/cli/VersionTest.java @@ -6,7 +6,7 @@ public class VersionTest { - private final String EXPECTED_VERSION = "0.9.6-SNAPSHOT"; + private final String EXPECTED_VERSION = "0.9.7-SNAPSHOT"; @Test public void getVersion() { diff --git a/src/test/java/de/gwdg/metadataqa/api/rule/logical/AndCheckerTest.java b/src/test/java/de/gwdg/metadataqa/api/rule/logical/AndCheckerTest.java index c6998133..1aa88dd3 100644 --- a/src/test/java/de/gwdg/metadataqa/api/rule/logical/AndCheckerTest.java +++ b/src/test/java/de/gwdg/metadataqa/api/rule/logical/AndCheckerTest.java @@ -101,4 +101,36 @@ public void failure() { assertEquals(RuleCheckingOutputStatus.FAILED, fieldCounter.get(andChecker.getHeader(RuleCheckingOutputType.STATUS)).getStatus()); } + + @Test + public void withDependency() { + schema.getPathByLabel("name") + .setRule(Arrays.asList( + new Rule().withAnd(Arrays.asList( + new Rule().withMinCount(1), + new Rule().withMaxCount(1))) + .withNaScore(-1) + .withId("Q1"), + new Rule().withAnd(Arrays.asList( + new Rule().withDependencies(List.of("Q1")), + new Rule().withHasValue("a"))) + .withId("Q2") + )); + + cache = (CsvSelector) SelectorFactory.getInstance(schema.getFormat(), ",b,a"); + cache.setCsvReader(new CsvReader().setHeader(((CsvAwareSchema) schema).getHeader())); + + FieldCounter fieldCounter = new FieldCounter<>(); + for (RuleChecker checker : schema.getRuleCheckers()) { + checker.update(cache, fieldCounter, RuleCheckingOutputType.STATUS); + } + System.err.println(fieldCounter); + // FieldCounter{fieldMap={ + // name:and:name:minCount:name:maxCount:Q1:status=0, + // name:and:name:minCount:name:maxCount:Q1:score=0, + // name:and:name:dependency:name:hasValue:Q2:status=0, + // name:and:name:dependency:name:hasValue:Q2:score=0 + // }} + + } } \ No newline at end of file diff --git a/src/test/java/de/gwdg/metadataqa/api/schema/BaseSchemaTest.java b/src/test/java/de/gwdg/metadataqa/api/schema/BaseSchemaTest.java index 302a3703..45119ff1 100644 --- a/src/test/java/de/gwdg/metadataqa/api/schema/BaseSchemaTest.java +++ b/src/test/java/de/gwdg/metadataqa/api/schema/BaseSchemaTest.java @@ -924,17 +924,17 @@ public void toJson() { assertTrue(schemaString.contains("\"name\":\"author\"")); assertTrue(schemaString.contains("\"path\":\"author\"")); assertTrue(schemaString.contains("\"categories\":[]")); - // assertTrue(schemaString.contains("\"children\":[]")); - assertTrue(schemaString.contains("\"collection\":false")); + assertFalse(schemaString.contains("\"children\":[]")); + assertFalse(schemaString.contains("\"collection\":false")); assertTrue(schemaString.contains("\"hasValue\":\"a\"")); assertTrue(schemaString.contains("\"hidden\":false")); assertTrue(schemaString.contains("\"skip\":false")); assertTrue(schemaString.contains("\"debug\":false")); assertTrue(schemaString.contains("\"allowEmptyInstances\":false")); - // assertTrue(schemaString.contains("\"absolutePath\":\"author\"")); - // assertTrue(schemaString.contains("\"active\":true")); + assertFalse(schemaString.contains("\"absolutePath\":\"author\"")); + assertFalse(schemaString.contains("\"active\":true")); assertTrue(schemaString.contains("\"extractable\":false")); - // assertTrue(schemaString.contains("\"mandatory\":false")); + assertFalse(schemaString.contains("\"mandatory\":false")); assertTrue(schemaString.contains("\"rules\":[")); assertTrue(schemaString.contains("\"groups\":[]")); assertTrue(schemaString.contains("\"categories\":[]"));