From 3c1ad632298b4c66f2583eef1cf33dd08791520c Mon Sep 17 00:00:00 2001
From: Steven Winship <39765413+stevenwinship@users.noreply.github.com>
Date: Tue, 7 May 2024 13:38:59 -0400
Subject: [PATCH 01/11] Json dataset validation
---
doc/sphinx-guides/source/api/native-api.rst | 13 +-
scripts/search/tests/data/dataset-finch3.json | 102 ++++++
.../iq/dataverse/DataverseServiceBean.java | 6 +-
.../JsonSchemaConstraintException.java | 4 +
.../validation/JSONDataValidation.java | 207 +++++++++++
src/main/java/propertyFiles/Bundle.properties | 12 +
.../harvard/iq/dataverse/api/DatasetsIT.java | 71 ++++
.../validation/JSONDataValidationTest.java | 327 ++++++++++++++++++
8 files changed, 738 insertions(+), 4 deletions(-)
create mode 100644 scripts/search/tests/data/dataset-finch3.json
create mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/exception/JsonSchemaConstraintException.java
create mode 100644 src/main/java/edu/harvard/iq/dataverse/validation/JSONDataValidation.java
create mode 100644 src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java
diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index bcc37d6db1c..c30f551685c 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -566,8 +566,17 @@ While it is recommended to download a copy of the JSON Schema from the collectio
Validate Dataset JSON File for a Collection
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Validates a dataset JSON file customized for a given collection prior to creating the dataset. The validation only tests for json formatting
-and the presence of required elements:
+Validates a dataset JSON file customized for a given collection prior to creating the dataset.
+The validation tests for:
+Json formatting and the presence of required elements
+typeClass must follow these rules:
+- if multiple = true then value must be a list
+- if typeClass = ''primitive'' the value object is a String or a List of Strings depending on the multiple flag
+- if typeClass = ''compound'' the value object is a FieldDTO or a List of FieldDTOs depending on the multiple flag
+- if typeClass = ''controlledVocabulary'' the value(s) are checked against the list of known values
+typeName validations include:
+- dsDescription validation includes checks for typeName = ''dsDescriptionValue'' (required) and ''dsDescriptionDate'' (optional)
+- datasetContact validation includes checks for typeName = ''datasetContactName'' (required) and ''datasetContactEmail''; ''datasetContactAffiliation'' (optional)
.. code-block:: bash
diff --git a/scripts/search/tests/data/dataset-finch3.json b/scripts/search/tests/data/dataset-finch3.json
new file mode 100644
index 00000000000..903b0aa124d
--- /dev/null
+++ b/scripts/search/tests/data/dataset-finch3.json
@@ -0,0 +1,102 @@
+{
+ "datasetVersion": {
+ "license": {
+ "name": "CC0 1.0",
+ "uri": "http://creativecommons.org/publicdomain/zero/1.0"
+ },
+ "metadataBlocks": {
+ "citation": {
+ "fields": [
+ {
+ "value": "HTML & More",
+ "typeClass": "primitive",
+ "multiple": false,
+ "typeName": "title"
+ },
+ {
+ "value": [
+ {
+ "authorName": {
+ "value": "Markup, Marty",
+ "typeClass": "primitive",
+ "multiple": false,
+ "typeName": "authorName"
+ },
+ "authorAffiliation": {
+ "value": "W4C",
+ "typeClass": "primitive",
+ "multiple": false,
+ "typeName": "authorAffiliation"
+ }
+ }
+ ],
+ "typeClass": "compound",
+ "multiple": true,
+ "typeName": "author"
+ },
+ {
+ "value": [
+ {
+ "datasetContactEmail": {
+ "typeClass": "primitive",
+ "multiple": false,
+ "typeName": "datasetContactEmail",
+ "value": "markup@mailinator.com"
+ },
+ "datasetContactName": {
+ "typeClass": "primitive",
+ "multiple": false,
+ "typeName": "datasetContactName",
+ "value": "Markup, Marty"
+ }
+ }
+ ],
+ "typeClass": "compound",
+ "multiple": true,
+ "typeName": "datasetContact"
+ },
+ {
+ "value": [
+ {
+ "dsDescriptionValue": {
+ "value": "BEGIN
END",
+ "multiple": false,
+ "typeClass": "primitive",
+ "typeName": "dsDescriptionValue"
+ },
+ "dsDescriptionDate": {
+ "typeName": "dsDescriptionDate",
+ "multiple": false,
+ "typeClass": "primitive",
+ "value": "2021-07-13"
+ }
+ }
+ ],
+ "typeClass": "compound",
+ "multiple": true,
+ "typeName": "dsDescription"
+ },
+ {
+ "value": [
+ "Medicine, Health and Life Sciences"
+ ],
+ "typeClass": "controlledVocabulary",
+ "multiple": true,
+ "typeName": "subject"
+ },
+ {
+ "typeName": "language",
+ "multiple": true,
+ "typeClass": "controlledVocabulary",
+ "value": [
+ "English",
+ "Afar",
+ "aar"
+ ]
+ }
+ ],
+ "displayName": "Citation Metadata"
+ }
+ }
+ }
+}
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
index 10b5d800c21..3d9ff19a617 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
@@ -22,7 +22,7 @@
import edu.harvard.iq.dataverse.storageuse.StorageQuota;
import edu.harvard.iq.dataverse.util.StringUtil;
import edu.harvard.iq.dataverse.util.SystemConfig;
-import edu.harvard.iq.dataverse.util.json.JsonUtil;
+
import java.io.File;
import java.io.IOException;
import java.sql.Timestamp;
@@ -34,6 +34,7 @@
import java.util.logging.Logger;
import java.util.Properties;
+import edu.harvard.iq.dataverse.validation.JSONDataValidation;
import jakarta.ejb.EJB;
import jakarta.ejb.Stateless;
import jakarta.inject.Inject;
@@ -1023,9 +1024,10 @@ private String getCustomMDBSchema (MetadataBlock mdb, List req
public String isDatasetJsonValid(String dataverseAlias, String jsonInput) {
JSONObject rawSchema = new JSONObject(new JSONTokener(getCollectionDatasetSchema(dataverseAlias)));
- try {
+ try {
Schema schema = SchemaLoader.load(rawSchema);
schema.validate(new JSONObject(jsonInput)); // throws a ValidationException if this object is invalid
+ JSONDataValidation.validate(schema, jsonInput); // throws a ValidationException if any objects are invalid
} catch (ValidationException vx) {
logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage());
String accumulatedexceptions = "";
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/exception/JsonSchemaConstraintException.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/exception/JsonSchemaConstraintException.java
new file mode 100644
index 00000000000..110a4460313
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/exception/JsonSchemaConstraintException.java
@@ -0,0 +1,4 @@
+package edu.harvard.iq.dataverse.engine.command.exception;
+
+public class JsonSchemaConstraintException {
+}
diff --git a/src/main/java/edu/harvard/iq/dataverse/validation/JSONDataValidation.java b/src/main/java/edu/harvard/iq/dataverse/validation/JSONDataValidation.java
new file mode 100644
index 00000000000..99b0fdd9edc
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/validation/JSONDataValidation.java
@@ -0,0 +1,207 @@
+package edu.harvard.iq.dataverse.validation;
+
+import com.mashape.unirest.http.JsonNode;
+import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
+import edu.harvard.iq.dataverse.DatasetFieldType;
+import edu.harvard.iq.dataverse.util.BundleUtil;
+import jakarta.enterprise.inject.spi.CDI;
+import org.everit.json.schema.Schema;
+import org.everit.json.schema.ValidationException;
+import org.json.JSONArray;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+public class JSONDataValidation {
+ private static DatasetFieldServiceBean datasetFieldService = null;
+ private static Map> schemaDTOMap = new ConcurrentHashMap<>();
+
+ /**
+ *
+ * @param schema Schema file defining the JSON objects to be validated
+ * @param jsonInput JSON string to validate against the schema
+ * @throws ValidationException
+ */
+ public static void validate(final Schema schema, String jsonInput) throws ValidationException {
+ if (datasetFieldService == null) {
+ datasetFieldService = CDI.current().select(DatasetFieldServiceBean.class).get();
+ }
+ if (schemaDTOMap.isEmpty()) {
+ // TODO: load from a config file
+ schemaDTOMap.put("datasetContact", Collections.EMPTY_LIST);
+ schemaDTOMap.put("datasetContact.required", List.of("datasetContactName"));
+ schemaDTOMap.put("datasetContact.allowed", List.of("datasetContactName", "datasetContactEmail","datasetContactAffiliation"));
+ schemaDTOMap.put("dsDescription", Collections.EMPTY_LIST);
+ schemaDTOMap.put("dsDescription.required", List.of("dsDescriptionValue"));
+ schemaDTOMap.put("dsDescription.allowed", List.of("dsDescriptionValue", "dsDescriptionDate"));
+ }
+ JsonNode node = new JsonNode(jsonInput);
+ if (node.isArray()) {
+ JSONArray arrayNode = node.getArray();
+ validateObject(schema, "root", arrayNode.toList());
+ } else {
+ node.getObject().toMap().forEach((k,v) -> {
+ validateObject(schema, k, (v instanceof JSONArray) ? ((JSONArray) v).toList() : v);
+ });
+ }
+ }
+
+ /*
+ * Validate objects recursively
+ */
+ private static void validateObject(final Schema schema, String key, Object value) {
+ if (value instanceof Map,?>) {
+ validateSchemaObject(schema, key, (Map) value);
+
+ ((Map, ?>) value).entrySet().forEach(e -> {
+ validateObject(schema, (String) e.getKey(), e.getValue());
+ });
+ } else if (value instanceof List) {
+ ((List>) value).listIterator().forEachRemaining(v -> {
+ validateObject(schema, key, v);
+ });
+ }
+ }
+
+ /*
+ * Validate objects specific to a type. Currently only validating Datasets
+ */
+ private static void validateSchemaObject(final Schema schema, String key, Map valueMap) {
+ if (schema.definesProperty("datasetVersion")) {
+ validateDatasetObject(schema, key, valueMap);
+ }
+ }
+
+ /*
+ * Specific validation for Dataset objects
+ */
+ private static void validateDatasetObject(final Schema schema, String key, Map valueMap) {
+ if (valueMap != null && valueMap.containsKey("typeClass")) {
+ validateTypeClass(schema, key, valueMap, valueMap.get("value"), "dataset");
+ }
+ }
+
+ /*
+ * key: The name of the parent object
+ * valueMap: Map of all the metadata of the object
+ * value: The value field of the object
+ * messageType: Refers to the parent: if this is an object from a dataset the messageType would be 'dataset'
+ * This needs to match the Bundle.properties for mapping the error messages when an exception occurs
+ *
+ * Rules for typeClass:
+ * The contents of value depend on the field attributes
+ * if single/primitive, value is a String
+ * if multiple, value is a JsonArray
+ * multiple/primitive: each JsonArray element will contain String
+ * multiple/compound: each JsonArray element will contain Set of FieldDTOs
+ */
+ private static void validateTypeClass(Schema schema, String key, Map valueMap, Object value, String messageType) {
+
+ String typeClass = valueMap.containsKey("typeClass") ? valueMap.get("typeClass").toString() : "";
+ String typeName = valueMap.containsKey("typeName") ? valueMap.get("typeName").toString() : "";
+ boolean multiple = Boolean.valueOf(String.valueOf(valueMap.getOrDefault("multiple", "false")));
+
+ // make sure there is a value since 'value' is required
+ if (value == null) {
+ throwValidationException("value.missing", List.of(key, typeName));
+ }
+
+ if (multiple && !(value instanceof List>)) {
+ throwValidationException("notlist.multiple", List.of(key, typeName, typeClass));
+ }
+ if (!multiple && value instanceof List>) {
+ throwValidationException("list.notmultiple", List.of(key, typeName));
+ }
+ if ("primitive".equals(typeClass) && !multiple && !(value instanceof String)) {
+ throwValidationException("type", List.of(key, typeName, typeClass));
+ }
+ if ("primitive".equals(typeClass) && multiple) {
+ ((List>) value).listIterator().forEachRemaining(primitive -> {
+ if (!(primitive instanceof String)) {
+ throwValidationException("type", List.of(key, typeName, typeClass));
+ }
+ });
+ }
+ if ("compound".equals(typeClass)) {
+ if (multiple && value instanceof List>) {
+ ((List>) value).listIterator().forEachRemaining(item -> {
+ if (!(item instanceof Map, ?>)) {
+ throwValidationException("compound", List.of(key, typeName, typeClass));
+ } else {
+ ((Map) item).forEach((k,val) -> {
+ if (!(val instanceof Map, ?>)) {
+ throwValidationException("compound", List.of(key, typeName, typeClass));
+ }
+ // validate mismatch between compound object key and typeName in value
+ String valTypeName = ((Map, ?>) val).containsKey("typeName") ? (String)((Map, ?>) val).get("typeName") : "";
+ if (!k.equals(valTypeName)) {
+ throwValidationException("compound.mismatch", List.of((String)k, valTypeName));
+ }
+ validateChildObject(schema, (String)k, val, messageType + "." + typeName,
+ schemaDTOMap.getOrDefault(typeName+".required", Collections.EMPTY_LIST), schemaDTOMap.getOrDefault(typeName+".allowed", Collections.EMPTY_LIST));
+ });
+ }
+ });
+ }
+ }
+
+ if ("controlledVocabulary".equals(typeClass)) {
+ DatasetFieldType dsft = datasetFieldService.findByName(typeName);
+ if (value instanceof List>) {
+ ((List>) value).listIterator().forEachRemaining(cvv -> {
+ if (datasetFieldService.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(dsft, (String) cvv, true) == null) {
+ throwValidationException("dataset", "cvv.missing", List.of(key, typeName, (String) cvv));
+ }
+ });
+ } else {
+ if (datasetFieldService.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(dsft, (String) value, true) == null) {
+ throwValidationException("dataset", "cvv.missing", List.of(key, typeName, (String) value));
+ }
+ }
+ }
+ }
+
+ // If value is another object or list of objects that need to be further validated then childType refers to the parent
+ // Example: If this is a dsDescriptionValue from a dataset the messageType would be dataset.dsDescriptionValue
+ // This needs to match the Bundle.properties for mapping the error messages when an exception occurs
+ private static void validateChildObject(Schema schema, String key, Object child, String messageType, List requiredFields, List allowedFields) {
+ if (child instanceof Map, ?>) {
+ Map childMap = (Map) child;
+
+ if (!childMap.containsKey("value")) { // if child is simple key/value where the value Map is what we really want to validate
+ requiredFields.forEach(field -> {
+ if (!childMap.containsKey(field)) {
+ throwValidationException(messageType, "required.missing", List.of(key, field));
+ }
+ });
+ childMap.forEach((k, v) -> {
+ if (!allowedFields.isEmpty() && !allowedFields.contains(k)) {
+ throwValidationException(messageType, "invalidType", List.of(key, (String) k));
+ }
+ });
+ childMap.forEach((k,v) -> {
+ Map, ?> valueMap = (v instanceof Map, ?>) ? (Map, ?>) v : null;
+ if (valueMap == null || !k.equals(valueMap.get("typeName"))) {
+ throwValidationException(messageType, "invalidType", List.of(key, (String) k));
+ }
+ validateChildObject(schema, (String)k, v, messageType, requiredFields, allowedFields);
+ });
+ } else { // this child is an object with a "value" and "typeName" attribute
+ String typeName = childMap.containsKey("typeName") ? childMap.get("typeName").toString() : "";
+ validateTypeClass(schema, typeName, childMap, childMap.get("value"), messageType);
+ }
+ }
+ }
+ private static void throwValidationException(String key, List argList) {
+ throw new ValidationException(BundleUtil.getStringFromBundle("schema.validation.exception." + key, argList));
+ }
+ private static void throwValidationException(String type, String message, List argList) {
+ if (type != null) {
+ throwValidationException(type + "." + message, argList);
+ } else {
+ throwValidationException(message, argList);
+ }
+ }
+}
diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
index 02d848df1e3..9a82d7569e5 100644
--- a/src/main/java/propertyFiles/Bundle.properties
+++ b/src/main/java/propertyFiles/Bundle.properties
@@ -3007,3 +3007,15 @@ publishDatasetCommand.pidNotReserved=Cannot publish dataset because its persiste
api.errors.invalidApiToken=Invalid API token.
api.ldninbox.citation.alert={0},
The {1} has just been notified that the {2}, {3}, cites "{6}" in this repository.
api.ldninbox.citation.subject={0}: A Dataset Citation has been reported!
+
+#Schema Validation
+schema.validation.exception.value.missing=Invalid data for key:{0} typeName:{1}. 'value' missing.
+schema.validation.exception.list.notmultiple=Invalid data for key:{0} typeName:{1}. Found value as list but ''multiple'' is set to false.
+schema.validation.exception.notlist.multiple=Invalid data for key:{0} typeName:{1}. Fields with ''multiple'' set to true must be a list.
+schema.validation.exception.compound=Compound data type must be accompanied by a value that is either an object (multiple=false) or a list of objects (multiple=true)
+schema.validation.exception.compound.mismatch=Compound value {0} must match typeName of the object. Found {1}
+schema.validation.exception.dataset.cvv.missing=Controlled vocabulary for key:{0} typeName:{1} value:''{2}'' is not valid.
+schema.validation.exception.dataset.dsDescription.required.missing=Invalid data for key:{0} typeName:{1}. dsDescriptionValue is required if field type is dsDescription.
+schema.validation.exception.dataset.dsDescription.invalidType=Invalid data for key:{0} typeName:{1}. Only dsDescriptionValue and dsDescriptionDate allowed.
+schema.validation.exception.dataset.datasetContact.required.missing=Invalid data for key:{0} typeName:{1}. datasetContactName is required if field type is datasetContact.
+schema.validation.exception.dataset.datasetContact.invalidType=Invalid data for key:{0} typeName:{1}. Only datasetContactName, datasetContactEmail and datasetContactAffiliation allowed.
\ No newline at end of file
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index 5b603d88c6d..5d0bb6e2fad 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -182,6 +182,77 @@ public void testCollectionSchema(){
}
+ @Test
+ public void testDatasetSchemaValidation() {
+
+ Response createUser = UtilIT.createRandomUser();
+ createUser.prettyPrint();
+ String username = UtilIT.getUsernameFromResponse(createUser);
+ String apiToken = UtilIT.getApiTokenFromResponse(createUser);
+
+ Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
+ createDataverseResponse.prettyPrint();
+ String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);
+
+ Response getCollectionSchemaResponse = UtilIT.getCollectionSchema(dataverseAlias, apiToken);
+ getCollectionSchemaResponse.prettyPrint();
+ getCollectionSchemaResponse.then().assertThat()
+ .statusCode(200);
+
+ JsonObject expectedSchema = null;
+ try {
+ expectedSchema = JsonUtil.getJsonObjectFromFile("doc/sphinx-guides/source/_static/api/dataset-schema.json");
+ } catch (IOException ex) {
+ }
+
+ assertEquals(JsonUtil.prettyPrint(expectedSchema), JsonUtil.prettyPrint(getCollectionSchemaResponse.body().asString()));
+
+ // add a language that is not in the Controlled vocabulary
+ testDatasetSchemaValidationHelper(dataverseAlias, apiToken,
+ "\"aar\"",
+ "\"aar\",\"badlang\"",
+ BundleUtil.getStringFromBundle("schema.validation.exception.dataset.cvv.missing", List.of("fields", "language", "badlang"))
+ );
+
+ // change multiple to true on value that is a not a List
+ testDatasetSchemaValidationHelper(dataverseAlias, apiToken,
+ "multiple\": false,\n" +
+ " \"typeName\": \"title",
+ "multiple\": true,\n" +
+ " \"typeName\": \"title",
+ BundleUtil.getStringFromBundle("schema.validation.exception.notlist.multiple", List.of("fields", "title"))
+ );
+
+ // change multiple to false on value that is a List
+ testDatasetSchemaValidationHelper(dataverseAlias, apiToken,
+ "typeName\": \"language\",\n" +
+ " \"multiple\": true",
+ "typeName\": \"language\",\n" +
+ " \"multiple\": false",
+ BundleUtil.getStringFromBundle("schema.validation.exception.list.notmultiple", List.of("fields", "language"))
+ );
+
+ // add a mismatched typeName
+ testDatasetSchemaValidationHelper(dataverseAlias, apiToken,
+ "\"typeName\": \"datasetContactName\",",
+ "\"typeName\": \"datasetContactNme\",",
+ BundleUtil.getStringFromBundle("schema.validation.exception.compound.mismatch", List.of("datasetContactName", "datasetContactNme"))
+ );
+
+ Response deleteDataverseResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken);
+ deleteDataverseResponse.prettyPrint();
+ assertEquals(200, deleteDataverseResponse.getStatusCode());
+ }
+ private void testDatasetSchemaValidationHelper(String dataverseAlias, String apiToken, String origString, String replacementString, String expectedError) {
+ String json = UtilIT.getDatasetJson("scripts/search/tests/data/dataset-finch3.json");
+ json = json.replace(origString, replacementString);
+ Response validateDatasetJsonResponse = UtilIT.validateDatasetJson(dataverseAlias, json, apiToken);
+ validateDatasetJsonResponse.prettyPrint();
+ validateDatasetJsonResponse.then().assertThat()
+ .statusCode(200)
+ .body(containsString(expectedError));
+ }
+
@Test
public void testCreateDataset() {
diff --git a/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java b/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java
new file mode 100644
index 00000000000..25bdc9fe3af
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java
@@ -0,0 +1,327 @@
+package edu.harvard.iq.dataverse.validation;
+
+import edu.harvard.iq.dataverse.ControlledVocabularyValue;
+import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
+import edu.harvard.iq.dataverse.DatasetFieldType;
+import edu.harvard.iq.dataverse.util.json.JsonUtil;
+import org.everit.json.schema.Schema;
+import org.everit.json.schema.ValidationException;
+import org.everit.json.schema.loader.SchemaLoader;
+import org.json.JSONObject;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+
+import java.lang.reflect.Field;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.fail;
+import static org.mockito.ArgumentMatchers.any;
+
+public class JSONDataValidationTest {
+
+ @Mock
+ static DatasetFieldServiceBean datasetFieldServiceMock;
+ @Mock
+ static DatasetFieldType datasetFieldTypeMock;
+ static ControlledVocabularyValue cvv = new ControlledVocabularyValue();
+ static String rawSchema() {
+ return """
+ {
+ "$schema": "http://json-schema.org/draft-04/schema#",
+ "$defs": {
+ "field": {
+ "type": "object",
+ "required": ["typeClass", "multiple", "typeName"],
+ "properties": {
+ "value": {
+ "anyOf": [
+ {
+ "type": "array"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "$ref": "#/$defs/field"
+ }
+ ]
+ },
+ "typeClass": {
+ "type": "string"
+ },
+ "multiple": {
+ "type": "boolean"
+ },
+ "typeName": {
+ "type": "string"
+ }
+ }
+ }
+ },
+ "type": "object",
+ "properties": {
+ "datasetVersion": {
+ "type": "object",
+ "properties": {
+ "license": {
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string"
+ },
+ "uri": {
+ "type": "string",
+ "format": "uri"
+ }
+ },
+ "required": ["name", "uri"]
+ },
+ "metadataBlocks": {
+ "type": "object",
+ "properties": {
+ "citation": {
+ "type": "object",
+ "properties": {
+ "fields": {
+ "type": "array",
+ "items": {
+ "$ref": "#/$defs/field"
+ },
+ "minItems": 5,
+ "allOf": [
+ {
+ "contains": {
+ "properties": {
+ "typeName": {
+ "const": "title"
+ }
+ }
+ }
+ },
+ {
+ "contains": {
+ "properties": {
+ "typeName": {
+ "const": "author"
+ }
+ }
+ }
+ },
+ {
+ "contains": {
+ "properties": {
+ "typeName": {
+ "const": "datasetContact"
+ }
+ }
+ }
+ },
+ {
+ "contains": {
+ "properties": {
+ "typeName": {
+ "const": "dsDescription"
+ }
+ }
+ }
+ },
+ {
+ "contains": {
+ "properties": {
+ "typeName": {
+ "const": "subject"
+ }
+ }
+ }
+ }
+ ]
+ }
+ },
+ "required": ["fields"]
+ }
+ },
+ "required": ["citation"]
+ }
+ },
+ "required": ["metadataBlocks"]
+ }
+ },
+ "required": ["datasetVersion"]
+ }
+ """;
+ }
+ static String jsonInput() {
+ return """
+ {
+ "datasetVersion": {
+ "license": {
+ "name": "CC0 1.0",
+ "uri": "http://creativecommons.org/publicdomain/zero/1.0"
+ },
+ "metadataBlocks": {
+ "citation": {
+ "fields": [
+ {
+ "value": "Darwin's Finches",
+ "typeClass": "primitive",
+ "multiple": false,
+ "typeName": "title"
+ },
+ {
+ "value": [
+ {
+ "authorName": {
+ "value": "Finch, Fiona",
+ "typeClass": "primitive",
+ "multiple": false,
+ "typeName": "authorName"
+ },
+ "authorAffiliation": {
+ "value": "Birds Inc.",
+ "typeClass": "primitive",
+ "multiple": false,
+ "typeName": "authorAffiliation"
+ }
+ }
+ ],
+ "typeClass": "compound",
+ "multiple": true,
+ "typeName": "author"
+ },
+ {
+ "value": [
+ { "datasetContactEmail" : {
+ "typeClass": "primitive",
+ "multiple": false,
+ "typeName": "datasetContactEmail",
+ "value" : "finch@mailinator.com"
+ },
+ "datasetContactName" : {
+ "typeClass": "primitive",
+ "multiple": false,
+ "typeName": "datasetContactName",
+ "value": "Finch, Fiona"
+ }
+ }],
+ "typeClass": "compound",
+ "multiple": true,
+ "typeName": "datasetContact"
+ },
+ {
+ "value": [{
+ "dsDescriptionValue":{
+ "value": "Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.",
+ "multiple": false,
+ "typeClass": "primitive",
+ "typeName": "dsDescriptionValue"
+ },
+ "dsDescriptionDate": {
+ "typeName": "dsDescriptionDate",
+ "multiple": false,
+ "typeClass": "primitive",
+ "value": "2021-07-13"
+ }
+ }],
+ "typeClass": "compound",
+ "multiple": true,
+ "typeName": "dsDescription"
+ },
+ {
+ "value": {
+ "dsDescriptionValue":{
+ "value": "Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.",
+ "multiple": false,
+ "typeClass": "primitive",
+ "typeName": "dsDescriptionValue"
+ }},
+ "typeClass": "compound",
+ "multiple": false,
+ "typeName": "dsDescription"
+ },
+ {
+ "value": [
+ "Medicine, Health and Life Sciences",
+ "Social Sciences"
+ ],
+ "typeClass": "controlledVocabulary",
+ "multiple": true,
+ "typeName": "subject"
+ }
+ ],
+ "displayName": "Citation Metadata"
+ }
+ }
+ }
+ }
+ """;
+ }
+
+ @BeforeAll
+ static void setup() throws NoSuchFieldException, IllegalAccessException {
+ datasetFieldServiceMock = Mockito.mock(DatasetFieldServiceBean.class);
+ datasetFieldTypeMock = Mockito.mock(DatasetFieldType.class);
+ Field datasetFieldServiceField = JSONDataValidation.class.getDeclaredField("datasetFieldService");
+ datasetFieldServiceField.setAccessible(true);
+ datasetFieldServiceField.set(JSONDataValidation.class, datasetFieldServiceMock);
+
+ Mockito.when(datasetFieldServiceMock.findByName(any(String.class))).thenReturn(datasetFieldTypeMock);
+ List cvvList = List.of("Medicine, Health and Life Sciences", "Social Sciences");
+ cvvList.forEach(i -> {
+ Mockito.when(datasetFieldServiceMock.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(datasetFieldTypeMock, i,true)).thenReturn(cvv);
+ });
+ Mockito.when(datasetFieldServiceMock.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(datasetFieldTypeMock, "Bad",true)).thenReturn(null);
+ }
+ @Test
+ public void testGoodJson() {
+ Schema schema = SchemaLoader.load(new JSONObject(rawSchema()));
+ JSONDataValidation.validate(schema, jsonInput());
+ }
+ @Test
+ public void testBadJson() {
+ Schema schema = SchemaLoader.load(new JSONObject(rawSchema()));
+ try {
+ JSONDataValidation.validate(schema, jsonInput().replace("\"Social Sciences\"", "\"Social Sciences\",\"Bad\""));
+ fail();
+ } catch (ValidationException e) {
+ System.out.println(e.getMessage());
+ System.out.println(e.getStackTrace());
+ }
+
+ try {
+ // test multiple = false but value is list
+ JSONDataValidation.validate(schema, jsonInput().replaceAll("true", "false"));
+ fail();
+ } catch (ValidationException e) {
+ System.out.println(e.getMessage());
+ }
+
+ // verify that child objects are also validated
+ String childTest = "\"multiple\": false, \"typeName\": \"authorAffiliation\"";
+ try {
+ String trimmedStr = jsonInput().replaceAll("\\s{2,}", " ");
+ // test child object with multiple set to true
+ JSONDataValidation.validate(schema, trimmedStr.replace(childTest, childTest.replace("false", "true")));
+ fail();
+ } catch (ValidationException e) {
+ System.out.println(e.getMessage());
+ }
+
+ try {
+ // test dsDescription but dsDescriptionValue missing
+ JSONDataValidation.validate(schema, jsonInput().replace("typeName\": \"dsDescriptionValue", "typeName\": \"notdsDescriptionValue"));
+ fail();
+ } catch (ValidationException e) {
+ System.out.println(e.getMessage());
+ }
+
+ try {
+ // test dsDescription but child dsDescriptionValue missing
+ JSONDataValidation.validate(schema, jsonInput().replace("dsDescriptionValue\":{", "notdsDescriptionValue\":{"));
+ fail();
+ } catch (ValidationException e) {
+ System.out.println(e.getMessage());
+ }
+ }
+}
From 33d6b56777f445e51b619ef91fa6186c0aa38fd6 Mon Sep 17 00:00:00 2001
From: Steven Winship <39765413+stevenwinship@users.noreply.github.com>
Date: Thu, 9 May 2024 15:40:54 -0400
Subject: [PATCH 02/11] rework
---
.../iq/dataverse/DataverseServiceBean.java | 23 ++++-
.../validation/JSONDataValidation.java | 93 ++++++++-----------
src/main/java/propertyFiles/Bundle.properties | 6 +-
.../harvard/iq/dataverse/api/DatasetsIT.java | 13 +++
.../validation/JSONDataValidationTest.java | 61 ++++++++----
5 files changed, 114 insertions(+), 82 deletions(-)
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
index 3d9ff19a617..00774bbd3bf 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
@@ -889,14 +889,16 @@ public List