Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add echo calculator #1

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,19 @@ private void configure() {
addLanguageMeasurement();
addMultilingualSaturationMeasurement();
addUniquenessMeasurement();
addEcho();
}

private void addExtractor() {
if (configuration.isFieldExtractorEnabled())
calculators.add(new FieldExtractor(schema));
}

private void addEcho() {
if (configuration.isEchoEnabled())
calculators.add(new EchoField(schema));
}

private void addCompleteness() {
if (configuration.isCompletenessMeasurementEnabled()) {
CompletenessCalculator completenessCalculator = new CompletenessCalculator(schema);
Expand Down
71 changes: 71 additions & 0 deletions src/main/java/de/gwdg/metadataqa/api/calculator/EchoField.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
package de.gwdg.metadataqa.api.calculator;

import com.jayway.jsonpath.InvalidJsonException;
import de.gwdg.metadataqa.api.interfaces.Calculator;
import de.gwdg.metadataqa.api.interfaces.MetricResult;
import de.gwdg.metadataqa.api.model.pathcache.PathCache;
import de.gwdg.metadataqa.api.model.XmlFieldInstance;
import de.gwdg.metadataqa.api.schema.Schema;
import de.gwdg.metadataqa.api.counter.FieldCounter;
import de.gwdg.metadataqa.api.problemcatalog.FieldCounterBasedResult;
import de.gwdg.metadataqa.api.json.JsonBranch;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;

/**
*
* @author Péter Király <peter.kiraly at gwdg.de>
*/
public class EchoField implements Calculator, Serializable {

private static final Logger LOGGER = Logger.getLogger(EchoField.class.getCanonicalName());

public static final String CALCULATOR_NAME = "echoField";

protected Schema schema;

public EchoField(Schema schema) {
this.schema = schema;
}

@Override
public String getCalculatorName() {
return CALCULATOR_NAME;
}

@Override
public List<MetricResult> measure(PathCache cache)
throws InvalidJsonException {
// FieldCounter<T> resultMap;
FieldCounter<String> resultMap = new FieldCounter<>();

if (schema != null) {
String path;
for (String fieldName : schema.getEchoFields().keySet()) {
path = schema.getEchoFields().get(fieldName);
List<XmlFieldInstance> values = cache.get(path);
String value = null;
if (values == null || values.isEmpty() || values.get(0) == null || values.get(0).getValue() == null) {
// logger.warning("Null value in field: " + fieldName + " (" + path + ")");
value = null;
} else {
value = values.get(0).getValue();
}
resultMap.put(fieldName, value);
}
}
return List.of(new FieldCounterBasedResult<>(getCalculatorName(), resultMap).withNoCompression());
}

@Override
public List<String> getHeader() {
List<String> headers = new ArrayList<>();
for (String fieldName : schema.getEchoFields().keySet()) {
headers.add("echo:" + fieldName);
}
return headers;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ public class MeasurementConfiguration {
*/
private boolean fieldExtractorEnabled = false;

/**
* Flag whether or not field echo is enabled (default: false).
*/
private boolean fieldEchoEnabled = false;

/**
* Flag whether or not run the field existence measurement
* (default: true).
Expand Down Expand Up @@ -122,11 +127,13 @@ public MeasurementConfiguration() {}
* Flag whether or not run the problem catalog
*/
public MeasurementConfiguration(final boolean runFieldExistence,
final boolean runFieldEcho,
final boolean runFieldCardinality,
final boolean runCompleteness,
final boolean runTfIdf,
final boolean runProblemCatalog) {
this.fieldExistenceMeasurementEnabled = runFieldExistence;
this.fieldEchoEnabled = runFieldEcho;
this.fieldCardinalityMeasurementEnabled = runFieldCardinality;
this.completenessMeasurementEnabled = runCompleteness;
this.tfIdfMeasurementEnabled = runTfIdf;
Expand Down Expand Up @@ -154,6 +161,23 @@ public boolean isFieldExtractorEnabled() {
return fieldExtractorEnabled;
}

public MeasurementConfiguration enableEcho() {
return enableEcho(true);
}

public MeasurementConfiguration disableEcho() {
return enableEcho(false);
}

public MeasurementConfiguration enableEcho(boolean flag) {
this.fieldEchoEnabled = flag;
return this;
}

public boolean isEchoEnabled() {
return fieldEchoEnabled;
}

/**
* Returns whether or not to run the field existence measurement.
* @return
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ public class Field {
private String path;
private List<String> categories;
private boolean extractable;
private boolean echo;
private List<Rule> rules;
private String indexField;

Expand Down Expand Up @@ -42,6 +43,15 @@ public void setExtractable(boolean extractable) {
this.extractable = extractable;
}

public boolean isEcho() {
return echo;
}

public void setEcho(boolean echo) {
this.echo= echo;
}


public List<Rule> getRules() {
return rules;
}
Expand Down
15 changes: 15 additions & 0 deletions src/main/java/de/gwdg/metadataqa/api/json/JsonBranch.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ public class JsonBranch implements Cloneable, Serializable {
private boolean collection = false;
private boolean isActive = true;
private boolean isExtractable = false;
private boolean isEcho = false;
private boolean isMandatory = false;
private List<Rule> rules;
private Schema schema;
Expand Down Expand Up @@ -205,6 +206,20 @@ public JsonBranch setExtractable(boolean extractable) {
return this;
}

public boolean isEcho() {
return isEcho;
}

public JsonBranch setEcho() {
isEcho = true;
return this;
}

public JsonBranch setEcho(boolean echo) {
isEcho = echo;
return this;
}

public List<Rule> getRules() {
return rules;
}
Expand Down
19 changes: 19 additions & 0 deletions src/main/java/de/gwdg/metadataqa/api/schema/BaseSchema.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ public class BaseSchema implements Schema, CsvAwareSchema, Serializable {
private final Map<String, JsonBranch> collectionPaths = new LinkedHashMap<>();
private final Map<String, JsonBranch> directChildren = new LinkedHashMap<>();
private Map<String, String> extractableFields = new LinkedHashMap<>();
private Map<String, String> echoFields = new LinkedHashMap<>();
private List<String> categories = null;
private List<RuleChecker> ruleCheckers;
private List<JsonBranch> indexFields;
Expand All @@ -42,6 +43,9 @@ public BaseSchema addField(JsonBranch branch) {
if (branch.isExtractable())
extractableFields.put(branch.getLabel(), branch.getJsonPath());

if (branch.isEcho())
echoFields.put(branch.getLabel(), branch.getJsonPath());

return this;
}

Expand Down Expand Up @@ -123,6 +127,21 @@ public void addExtractableField(String label, String jsonPath) {
extractableFields.put(label, jsonPath);
}

@Override
public Map<String, String> getEchoFields() {
return echoFields;
}

@Override
public void setEchoFields(Map<String, String> echoFields) {
this.echoFields = echoFields;
}

@Override
public void addEchoField(String label, String jsonPath) {
echoFields.put(label, jsonPath);
}

@Override
public List<String> getCategories() {
if (categories == null) {
Expand Down
16 changes: 16 additions & 0 deletions src/main/java/de/gwdg/metadataqa/api/schema/MarcJsonSchema.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ public class MarcJsonSchema implements Schema, ProblemCatalogSchema, Serializabl
private static final Map<String, JsonBranch> DIRECT_CHILDREN = new LinkedHashMap<>();
public static final String NOT_SUPPORTED_YET = "Not supported yet.";
private static Map<String, String> extractableFields = new LinkedHashMap<>();
private static Map<String, String> echoFields = new LinkedHashMap<>();
private static List<String> categories = null;
private static List<RuleChecker> ruleCheckers = null;

Expand Down Expand Up @@ -438,6 +439,21 @@ public void addExtractableField(String label, String jsonPath) {
extractableFields.put(label, jsonPath);
}

@Override
public Map<String, String> getEchoFields() {
return echoFields;
}

@Override
public void setEchoFields(Map<String, String> echoFields) {
this.echoFields = echoFields;
}

@Override
public void addEchoField(String label, String jsonPath) {
echoFields.put(label, jsonPath);
}

private static void addPath(JsonBranch branch) {
PATHS.put(branch.getLabel(), branch);

Expand Down
21 changes: 21 additions & 0 deletions src/main/java/de/gwdg/metadataqa/api/schema/Schema.java
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,27 @@ public interface Schema extends Serializable {
*/
void addExtractableField(String label, String jsonPath);

/**
* Get fields for which the values should be echoed from the records.
* @return The map of echoed fields. The key is the label of a field,
* the value is a JSON path expression.
*/
Map<String, String> getEchoFields();

/**
* Set the echo fields.
* @see #getEchoFields
* @param echoFields The extractable fields.
*/
void setEchoFields(Map<String, String> echoFields);

/**
* Add a single field to the map of echo fields.
* @param label The label of the field.
* @param jsonPath JSON path expression.
*/
void addEchoField(String label, String jsonPath);

List<String> getCategories();

List<RuleChecker> getRuleCheckers();
Expand Down
16 changes: 16 additions & 0 deletions src/main/java/de/gwdg/metadataqa/api/schema/edm/EdmSchema.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public abstract class EdmSchema implements Schema, ProblemCatalogSchema {
protected List<RuleChecker> ruleCheckers;

protected Map<String, String> extractableFields = new LinkedHashMap<>();
protected Map<String, String> echoFields = new LinkedHashMap<>();

protected void addPath(JsonBranch branch) {
paths.put(branch.getLabel(), branch);
Expand Down Expand Up @@ -128,4 +129,19 @@ public void setExtractableFields(Map<String, String> extractableFields) {
public void addExtractableField(String label, String jsonPath) {
extractableFields.put(label, jsonPath);
}

@Override
public Map<String, String> getEchoFields() {
return echoFields;
}

@Override
public void setEchoFields(Map<String, String> echoFields) {
this.echoFields = echoFields;
}

@Override
public void addEchoField(String label, String jsonPath) {
echoFields.put(label, jsonPath);
}
}
3 changes: 3 additions & 0 deletions src/main/java/de/gwdg/metadataqa/api/util/SchemaFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ public static Schema fromConfig(SchemaConfiguration config) {
if (field.isExtractable())
branch.setExtractable();

if (field.isEcho())
branch.setEcho();

if (field.getRules() != null)
branch.setRule(field.getRules());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public void tearDown() {

@Test
public void testNoAbbreviate() throws URISyntaxException, IOException {
MeasurementConfiguration configuration = new MeasurementConfiguration(true, true, true, false, true);
MeasurementConfiguration configuration = new MeasurementConfiguration(true, false, true, true, false, true);
CalculatorFacade calculatorFacade = new CalculatorFacade(configuration);
calculatorFacade.setSchema(new EdmOaiPmhJsonSchema());
calculatorFacade.configure();
Expand All @@ -65,7 +65,7 @@ public void testNoAbbreviate() throws URISyntaxException, IOException {

@Test
public void testNoAbbreviate_map() throws URISyntaxException, IOException {
MeasurementConfiguration configuration = new MeasurementConfiguration(true, true, true, false, true);
MeasurementConfiguration configuration = new MeasurementConfiguration(true, false, true, true, false, true);
CalculatorFacade calculatorFacade = new CalculatorFacade(configuration);
calculatorFacade.setSchema(new EdmOaiPmhJsonSchema());
calculatorFacade.configure();
Expand All @@ -79,7 +79,7 @@ public void testNoAbbreviate_map() throws URISyntaxException, IOException {

@Test
public void testNoAbbreviate_list() throws URISyntaxException, IOException {
CalculatorFacade calculatorFacade = new CalculatorFacade(new MeasurementConfiguration(true, true, true, false, true));
CalculatorFacade calculatorFacade = new CalculatorFacade(new MeasurementConfiguration(true, false, true, true, false, true));
calculatorFacade.setSchema(new EdmOaiPmhJsonSchema());
calculatorFacade.configure();

Expand All @@ -93,7 +93,7 @@ public void testNoAbbreviate_list() throws URISyntaxException, IOException {

@Test
public void testNoAbbreviate_list2() throws URISyntaxException, IOException {
CalculatorFacade calculatorFacade = new CalculatorFacade(new MeasurementConfiguration(true, true, true, false, true));
CalculatorFacade calculatorFacade = new CalculatorFacade(new MeasurementConfiguration(true, false, true, true, false, true));
calculatorFacade.setSchema(new EdmOaiPmhJsonSchema());
calculatorFacade.configure();

Expand Down Expand Up @@ -140,7 +140,7 @@ public void testNoAbbreviate_list2() throws URISyntaxException, IOException {

@Test
public void testNoAbbreviate_listOfObject() throws URISyntaxException, IOException {
CalculatorFacade calculatorFacade = new CalculatorFacade(new MeasurementConfiguration(true, true, true, false, true));
CalculatorFacade calculatorFacade = new CalculatorFacade(new MeasurementConfiguration(true, false, true, true, false, true));
calculatorFacade.setSchema(new EdmOaiPmhJsonSchema());
calculatorFacade.configure();

Expand All @@ -154,7 +154,7 @@ public void testNoAbbreviate_listOfObject() throws URISyntaxException, IOExcepti

@Test
public void testWithAbbreviate() throws URISyntaxException, IOException {
CalculatorFacade calculatorFacade = new CalculatorFacade(new MeasurementConfiguration(true, true, true, false, true));
CalculatorFacade calculatorFacade = new CalculatorFacade(new MeasurementConfiguration(true, false, true, true, false, true));
calculatorFacade.setSchema(new EdmOaiPmhJsonSchema());
calculatorFacade.configure();
String expected = "0.184,1.0,0.181818,0.388889,0.272727,0.5,0.357143,0.75,0.363636,0.4,1,1,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,0,5,0,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,12,0,0,0.0,0.0,0.0";
Expand Down Expand Up @@ -325,7 +325,7 @@ private CalculatorFacade createCalculatorFacadeForCsv() {
new CsvReader()
.setHeader(((CsvAwareSchema) schema).getHeader()));

MeasurementConfiguration measurementConfiguration = new MeasurementConfiguration(true, true, true, false, true);
MeasurementConfiguration measurementConfiguration = new MeasurementConfiguration(true, false, true, true, false, true);
assertTrue((measurementConfiguration.isFieldExistenceMeasurementEnabled()));
CalculatorFacade calculatorFacade = new CalculatorFacade(measurementConfiguration);

Expand Down
Loading