Skip to content

Commit

Permalink
hasLanguageTag checker #194
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed Aug 26, 2024
1 parent 179cd0b commit 67e7196
Show file tree
Hide file tree
Showing 8 changed files with 261 additions and 14 deletions.
73 changes: 73 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -885,6 +885,79 @@ fields:
minHeight: 200
```

##### `hasLanguageTag (anyOf|oneOf|allOf)`

(since v0.9.6)

It checks if the data element value has language tag. In XML the language tag is
found in `@xml:lang` attribute. In JSON it might be encoded differently. Right now
MQAF suppoert the following encoding:

```json
"description": {
"de": ["Porträt"]
}
```

Since this kind of structure might be applied not only for the language annotation, at
the field level we should set that the field is expected to have language annotation:

```yaml
format: json
fields:
- name: description
path: $.['description']
asLanguageTagged: true
```

The parameters defines if any, one or all instances should have language annottation:

* `anyOf`: the test passes if at least one instance has language tag
* `oneOf`: the test passes if one and only one instance has language tag
* `allOf`: the test passes if at least all instances have language tag

A full example:

```yaml
format: json
fields:
- name: description
path: $.['description']
asLanguageTagged: true
rules:
- hasLanguageTag: allOf
```

##### `isMultilingual (boolean)`

(since v0.9.6)

It checks if the data element is multilingual, so it has at least two instances with
different language annotations.

```json
{
"description":{
"de":["Portr\u00e4t"],
"zh":["\u8096\u50cf"]
}
}
```

an example schema

```yaml
format: json
fields:
- name: description
path: $.['description']
asLanguageTagged: true
rules:
- isMultilingual: true
```


#### General properties

##### `id <String>`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@
import de.gwdg.metadataqa.api.json.DataElement;
import de.gwdg.metadataqa.api.model.XmlFieldInstance;
import de.gwdg.metadataqa.api.model.selector.Selector;
import de.gwdg.metadataqa.api.rule.RuleChecker;
import de.gwdg.metadataqa.api.rule.RuleCheckerOutput;
import de.gwdg.metadataqa.api.rule.RuleCheckingOutputStatus;
import de.gwdg.metadataqa.api.rule.RuleCheckingOutputType;
import de.gwdg.metadataqa.api.uniqueness.UniquenessExtractor;

import java.util.List;

Expand All @@ -29,28 +27,41 @@ public LanguageTagChecker(DataElement field, String header) {
@Override
public void update(Selector cache, FieldCounter<RuleCheckerOutput> results, RuleCheckingOutputType outputType) {
if (isDebug())
LOGGER.info(this.getClass() + " " + this.id);
LOGGER.info(this.getClass() + " " + this.id + ", scope: " + scope);
var allPassed = true;
var isNA = true;
int counter = 0;
List<XmlFieldInstance> instances = cache.get(field);
if (instances != null && !instances.isEmpty()) {
for (XmlFieldInstance instance : instances) {
isNA = false;
if (instance.hasLanguage()) {
isNA = false;
counter++;
if (isDebug())
LOGGER.info("language tag: " + instance.hasLanguage());
LOGGER.info("language tag: " + instance.getLanguage());
if (scope.equals(ApplicationScope.anyOf)) {
break;
}
} else if (scope.equals(ApplicationScope.allOf)) {
allPassed = false;
break;
}
}
}

if (!isNA && counter == 0) {
allPassed = false;
} else if (scope.equals(ApplicationScope.oneOf) && counter != 1) {
allPassed = false;
}

addOutput(results, isNA, allPassed, outputType);
if (isDebug())
LOGGER.info(this.getClass().getSimpleName() + " " + this.id + ") result: " + RuleCheckingOutputStatus.create(isNA, allPassed));

}

public RuleChecker withScope(ApplicationScope hasLanguageTag) {
public LanguageTagChecker withScope(ApplicationScope scope) {
this.scope = scope;
return this;
}
Expand Down
9 changes: 5 additions & 4 deletions src/main/java/de/gwdg/metadataqa/api/schema/SchemaUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ public static List<RuleChecker> getRuleCheckers(Schema schema) {
}

private static List<RuleChecker> processRule(Schema schema, DataElement dataElement, Rule rule) {
System.err.println("processRule");
List<RuleChecker> ruleCheckers = new ArrayList<>();
if (rule.getSkip().equals(Boolean.TRUE))
return ruleCheckers;
Expand Down Expand Up @@ -128,11 +129,9 @@ private static List<RuleChecker> processRule(Schema schema, DataElement dataElem
if (rule.getUnique() != null && rule.getUnique().equals(Boolean.TRUE))
ruleCheckers.add(new UniquenessChecker(dataElement));

// TODO
if (rule.getMultilingual() != null && rule.getMultilingual().equals(Boolean.TRUE))
ruleCheckers.add(new MultilingualChecker(dataElement));

// TODO
if (rule.getHasLanguageTag() != null)
ruleCheckers.add(new LanguageTagChecker(dataElement).withScope(rule.getHasLanguageTag()));

Expand All @@ -142,7 +141,6 @@ private static List<RuleChecker> processRule(Schema schema, DataElement dataElem
if (rule.getLessThanOrEquals() != null)
pair(schema, ruleCheckers, dataElement, rule.getLessThan(), "lessThanOrEquals");

//
if (rule.getLessThanOrEquals() != null)
ruleCheckers.add(new DependencyChecker(dataElement, rule.getDependencies()));

Expand All @@ -161,7 +159,8 @@ private static List<RuleChecker> processRule(Schema schema, DataElement dataElem
ruleCheckers.add(new NotChecker(dataElement, childRuleCheckers));
}

if (!ruleCheckers.isEmpty())
if (!ruleCheckers.isEmpty()) {
System.err.println("ruleCheckers is not empty");
for (RuleChecker ruleChecker : ruleCheckers) {
ruleChecker.setFailureScore(rule.getFailureScore());
ruleChecker.setSuccessScore(rule.getSuccessScore());
Expand All @@ -171,6 +170,7 @@ private static List<RuleChecker> processRule(Schema schema, DataElement dataElem
if (rule.getHidden().equals(Boolean.TRUE))
ruleChecker.setHidden();
if (rule.getDebug().equals(Boolean.TRUE)) {
System.err.println("det debug");
ruleChecker.setDebug();
if (ruleChecker instanceof LogicalChecker) {
for (RuleChecker child : ((LogicalChecker) ruleChecker).getCheckers()) {
Expand All @@ -179,6 +179,7 @@ private static List<RuleChecker> processRule(Schema schema, DataElement dataElem
}
}
}
}

return ruleCheckers;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
package de.gwdg.metadataqa.api.rule.singlefieldchecker;

import com.fasterxml.jackson.core.JsonProcessingException;
import de.gwdg.metadataqa.api.configuration.ConfigurationReader;
import de.gwdg.metadataqa.api.configuration.schema.ApplicationScope;
import de.gwdg.metadataqa.api.counter.FieldCounter;
import de.gwdg.metadataqa.api.json.JsonUtils;
import de.gwdg.metadataqa.api.model.selector.JsonSelector;
import de.gwdg.metadataqa.api.model.selector.Selector;
import de.gwdg.metadataqa.api.rule.CheckerTestBase;
Expand All @@ -13,6 +16,8 @@
import org.junit.Before;
import org.junit.Test;

import java.io.IOException;
import java.net.URISyntaxException;
import java.util.regex.Pattern;

import static org.junit.Assert.assertEquals;
Expand All @@ -26,13 +31,12 @@ public void setUp() throws Exception {
schema = ConfigurationReader
.readSchemaYaml("src/test/resources/configuration/schema/rules/languageTag/hasLanguageTag.yaml")
.asSchema();
cache = new JsonSelector(FileUtils
.readFirstLineFromResource("configuration/schema/rules/languageTag/multilingual.json"));
}

@Test
public void success() {
MultilingualChecker checker = new MultilingualChecker(schema.getPathByLabel("description"));
public void allOf_all() {
setCache("multilingual-all.json");
LanguageTagChecker checker = new LanguageTagChecker(schema.getPathByLabel("description"));

FieldCounter<RuleCheckerOutput> fieldCounter = new FieldCounter<>();
checker.update(cache, fieldCounter, RuleCheckingOutputType.BOTH);
Expand All @@ -45,4 +49,157 @@ public void success() {
fieldCounter.get(checker.getHeader(RuleCheckingOutputType.STATUS)).getStatus()
);
}

@Test
public void allOf_one() {
setCache("multilingual-one.json");
LanguageTagChecker checker = new LanguageTagChecker(schema.getPathByLabel("description"));

FieldCounter<RuleCheckerOutput> fieldCounter = new FieldCounter<>();
checker.update(cache, fieldCounter, RuleCheckingOutputType.BOTH);

assertEquals(2, fieldCounter.size());
assertEquals("description", checker.getHeaderWithoutId());
assertTrue(Pattern.compile("^description:\\d+$").matcher(checker.getHeader()).matches());
Assert.assertEquals(
RuleCheckingOutputStatus.PASSED,
fieldCounter.get(checker.getHeader(RuleCheckingOutputType.STATUS)).getStatus()
);
}

@Test
public void allOf_none() {
setCache("multilingual-none.json");
LanguageTagChecker checker = new LanguageTagChecker(schema.getPathByLabel("description"));

FieldCounter<RuleCheckerOutput> fieldCounter = new FieldCounter<>();
checker.update(cache, fieldCounter, RuleCheckingOutputType.BOTH);

assertEquals(2, fieldCounter.size());
assertEquals("description", checker.getHeaderWithoutId());
assertTrue(Pattern.compile("^description:\\d+$").matcher(checker.getHeader()).matches());
Assert.assertEquals(
RuleCheckingOutputStatus.FAILED,
fieldCounter.get(checker.getHeader(RuleCheckingOutputType.STATUS)).getStatus()
);
}

@Test
public void oneOf_one() {
setCache("multilingual-one.json");
LanguageTagChecker checker = new LanguageTagChecker(schema.getPathByLabel("description"))
.withScope(ApplicationScope.oneOf);

FieldCounter<RuleCheckerOutput> fieldCounter = new FieldCounter<>();
checker.update(cache, fieldCounter, RuleCheckingOutputType.BOTH);

assertEquals(2, fieldCounter.size());
assertEquals("description", checker.getHeaderWithoutId());
assertTrue(Pattern.compile("^description:\\d+$").matcher(checker.getHeader()).matches());
Assert.assertEquals(
RuleCheckingOutputStatus.PASSED,
fieldCounter.get(checker.getHeader(RuleCheckingOutputType.STATUS)).getStatus()
);
}

@Test
public void oneOf_all() {
setCache("multilingual-all.json");
LanguageTagChecker checker = new LanguageTagChecker(schema.getPathByLabel("description"))
.withScope(ApplicationScope.oneOf);

FieldCounter<RuleCheckerOutput> fieldCounter = new FieldCounter<>();
checker.update(cache, fieldCounter, RuleCheckingOutputType.BOTH);

assertEquals(2, fieldCounter.size());
assertEquals("description", checker.getHeaderWithoutId());
assertTrue(Pattern.compile("^description:\\d+$").matcher(checker.getHeader()).matches());
Assert.assertEquals(
RuleCheckingOutputStatus.FAILED,
fieldCounter.get(checker.getHeader(RuleCheckingOutputType.STATUS)).getStatus()
);
}

@Test
public void oneOf_none_fails() {
setCache("multilingual-none.json");
LanguageTagChecker checker = new LanguageTagChecker(schema.getPathByLabel("description"))
.withScope(ApplicationScope.oneOf);

FieldCounter<RuleCheckerOutput> fieldCounter = new FieldCounter<>();
checker.update(cache, fieldCounter, RuleCheckingOutputType.BOTH);

assertEquals(2, fieldCounter.size());
assertEquals("description", checker.getHeaderWithoutId());
assertTrue(Pattern.compile("^description:\\d+$").matcher(checker.getHeader()).matches());
Assert.assertEquals(
RuleCheckingOutputStatus.FAILED,
fieldCounter.get(checker.getHeader(RuleCheckingOutputType.STATUS)).getStatus()
);
}

@Test
public void anyOf_one_passes() {
setCache("multilingual-one.json");
LanguageTagChecker checker = new LanguageTagChecker(schema.getPathByLabel("description"))
.withScope(ApplicationScope.anyOf);

FieldCounter<RuleCheckerOutput> fieldCounter = new FieldCounter<>();
checker.update(cache, fieldCounter, RuleCheckingOutputType.BOTH);

assertEquals(2, fieldCounter.size());
assertEquals("description", checker.getHeaderWithoutId());
assertTrue(Pattern.compile("^description:\\d+$").matcher(checker.getHeader()).matches());
Assert.assertEquals(
RuleCheckingOutputStatus.PASSED,
fieldCounter.get(checker.getHeader(RuleCheckingOutputType.STATUS)).getStatus()
);
}

@Test
public void anyOf_all_passed() {
setCache("multilingual-all.json");
LanguageTagChecker checker = new LanguageTagChecker(schema.getPathByLabel("description"))
.withScope(ApplicationScope.anyOf);

FieldCounter<RuleCheckerOutput> fieldCounter = new FieldCounter<>();
checker.update(cache, fieldCounter, RuleCheckingOutputType.BOTH);

assertEquals(2, fieldCounter.size());
assertEquals("description", checker.getHeaderWithoutId());
assertTrue(Pattern.compile("^description:\\d+$").matcher(checker.getHeader()).matches());
Assert.assertEquals(
RuleCheckingOutputStatus.PASSED,
fieldCounter.get(checker.getHeader(RuleCheckingOutputType.STATUS)).getStatus()
);
}

@Test
public void anyOf_none_failed() {
setCache("multilingual-none.json");
LanguageTagChecker checker = new LanguageTagChecker(schema.getPathByLabel("description"))
.withScope(ApplicationScope.anyOf);

FieldCounter<RuleCheckerOutput> fieldCounter = new FieldCounter<>();
checker.update(cache, fieldCounter, RuleCheckingOutputType.BOTH);

assertEquals(2, fieldCounter.size());
assertEquals("description", checker.getHeaderWithoutId());
assertTrue(Pattern.compile("^description:\\d+$").matcher(checker.getHeader()).matches());
Assert.assertEquals(
RuleCheckingOutputStatus.FAILED,
fieldCounter.get(checker.getHeader(RuleCheckingOutputType.STATUS)).getStatus()
);
}

private void setCache(String filename) {
try {
cache = new JsonSelector(FileUtils.readFirstLineFromResource("configuration/schema/rules/languageTag/" + filename));
} catch (URISyntaxException e) {
throw new RuntimeException(e);
} catch (IOException e) {
throw new RuntimeException(e);
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ fields:
asLanguageTagged: true
rules:
- hasLanguageTag: allOf
debug: true
id: 1.1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id":"1","description":{"de":["Portr\u00e4t"],"zh":["\u8096\u50cf"]}}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id":"1","description":["Portr\u00e4t","\u8096\u50cf"]}
Loading

0 comments on commit 67e7196

Please sign in to comment.