From 3c1ad632298b4c66f2583eef1cf33dd08791520c Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Tue, 7 May 2024 13:38:59 -0400 Subject: [PATCH 01/11] Json dataset validation --- doc/sphinx-guides/source/api/native-api.rst | 13 +- scripts/search/tests/data/dataset-finch3.json | 102 ++++++ .../iq/dataverse/DataverseServiceBean.java | 6 +- .../JsonSchemaConstraintException.java | 4 + .../validation/JSONDataValidation.java | 207 +++++++++++ src/main/java/propertyFiles/Bundle.properties | 12 + .../harvard/iq/dataverse/api/DatasetsIT.java | 71 ++++ .../validation/JSONDataValidationTest.java | 327 ++++++++++++++++++ 8 files changed, 738 insertions(+), 4 deletions(-) create mode 100644 scripts/search/tests/data/dataset-finch3.json create mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/exception/JsonSchemaConstraintException.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/validation/JSONDataValidation.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index bcc37d6db1c..c30f551685c 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -566,8 +566,17 @@ While it is recommended to download a copy of the JSON Schema from the collectio Validate Dataset JSON File for a Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Validates a dataset JSON file customized for a given collection prior to creating the dataset. The validation only tests for json formatting -and the presence of required elements: +Validates a dataset JSON file customized for a given collection prior to creating the dataset. +The validation tests for: +Json formatting and the presence of required elements +typeClass must follow these rules: +- if multiple = true then value must be a list +- if typeClass = ''primitive'' the value object is a String or a List of Strings depending on the multiple flag +- if typeClass = ''compound'' the value object is a FieldDTO or a List of FieldDTOs depending on the multiple flag +- if typeClass = ''controlledVocabulary'' the value(s) are checked against the list of known values +typeName validations include: +- dsDescription validation includes checks for typeName = ''dsDescriptionValue'' (required) and ''dsDescriptionDate'' (optional) +- datasetContact validation includes checks for typeName = ''datasetContactName'' (required) and ''datasetContactEmail''; ''datasetContactAffiliation'' (optional) .. code-block:: bash diff --git a/scripts/search/tests/data/dataset-finch3.json b/scripts/search/tests/data/dataset-finch3.json new file mode 100644 index 00000000000..903b0aa124d --- /dev/null +++ b/scripts/search/tests/data/dataset-finch3.json @@ -0,0 +1,102 @@ +{ + "datasetVersion": { + "license": { + "name": "CC0 1.0", + "uri": "http://creativecommons.org/publicdomain/zero/1.0" + }, + "metadataBlocks": { + "citation": { + "fields": [ + { + "value": "HTML & More", + "typeClass": "primitive", + "multiple": false, + "typeName": "title" + }, + { + "value": [ + { + "authorName": { + "value": "Markup, Marty", + "typeClass": "primitive", + "multiple": false, + "typeName": "authorName" + }, + "authorAffiliation": { + "value": "W4C", + "typeClass": "primitive", + "multiple": false, + "typeName": "authorAffiliation" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "author" + }, + { + "value": [ + { + "datasetContactEmail": { + "typeClass": "primitive", + "multiple": false, + "typeName": "datasetContactEmail", + "value": "markup@mailinator.com" + }, + "datasetContactName": { + "typeClass": "primitive", + "multiple": false, + "typeName": "datasetContactName", + "value": "Markup, Marty" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "datasetContact" + }, + { + "value": [ + { + "dsDescriptionValue": { + "value": "BEGIN

END", + "multiple": false, + "typeClass": "primitive", + "typeName": "dsDescriptionValue" + }, + "dsDescriptionDate": { + "typeName": "dsDescriptionDate", + "multiple": false, + "typeClass": "primitive", + "value": "2021-07-13" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "dsDescription" + }, + { + "value": [ + "Medicine, Health and Life Sciences" + ], + "typeClass": "controlledVocabulary", + "multiple": true, + "typeName": "subject" + }, + { + "typeName": "language", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "English", + "Afar", + "aar" + ] + } + ], + "displayName": "Citation Metadata" + } + } + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index 10b5d800c21..3d9ff19a617 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -22,7 +22,7 @@ import edu.harvard.iq.dataverse.storageuse.StorageQuota; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; -import edu.harvard.iq.dataverse.util.json.JsonUtil; + import java.io.File; import java.io.IOException; import java.sql.Timestamp; @@ -34,6 +34,7 @@ import java.util.logging.Logger; import java.util.Properties; +import edu.harvard.iq.dataverse.validation.JSONDataValidation; import jakarta.ejb.EJB; import jakarta.ejb.Stateless; import jakarta.inject.Inject; @@ -1023,9 +1024,10 @@ private String getCustomMDBSchema (MetadataBlock mdb, List req public String isDatasetJsonValid(String dataverseAlias, String jsonInput) { JSONObject rawSchema = new JSONObject(new JSONTokener(getCollectionDatasetSchema(dataverseAlias))); - try { + try { Schema schema = SchemaLoader.load(rawSchema); schema.validate(new JSONObject(jsonInput)); // throws a ValidationException if this object is invalid + JSONDataValidation.validate(schema, jsonInput); // throws a ValidationException if any objects are invalid } catch (ValidationException vx) { logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage()); String accumulatedexceptions = ""; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/exception/JsonSchemaConstraintException.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/exception/JsonSchemaConstraintException.java new file mode 100644 index 00000000000..110a4460313 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/exception/JsonSchemaConstraintException.java @@ -0,0 +1,4 @@ +package edu.harvard.iq.dataverse.engine.command.exception; + +public class JsonSchemaConstraintException { +} diff --git a/src/main/java/edu/harvard/iq/dataverse/validation/JSONDataValidation.java b/src/main/java/edu/harvard/iq/dataverse/validation/JSONDataValidation.java new file mode 100644 index 00000000000..99b0fdd9edc --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/validation/JSONDataValidation.java @@ -0,0 +1,207 @@ +package edu.harvard.iq.dataverse.validation; + +import com.mashape.unirest.http.JsonNode; +import edu.harvard.iq.dataverse.DatasetFieldServiceBean; +import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.util.BundleUtil; +import jakarta.enterprise.inject.spi.CDI; +import org.everit.json.schema.Schema; +import org.everit.json.schema.ValidationException; +import org.json.JSONArray; + +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +public class JSONDataValidation { + private static DatasetFieldServiceBean datasetFieldService = null; + private static Map> schemaDTOMap = new ConcurrentHashMap<>(); + + /** + * + * @param schema Schema file defining the JSON objects to be validated + * @param jsonInput JSON string to validate against the schema + * @throws ValidationException + */ + public static void validate(final Schema schema, String jsonInput) throws ValidationException { + if (datasetFieldService == null) { + datasetFieldService = CDI.current().select(DatasetFieldServiceBean.class).get(); + } + if (schemaDTOMap.isEmpty()) { + // TODO: load from a config file + schemaDTOMap.put("datasetContact", Collections.EMPTY_LIST); + schemaDTOMap.put("datasetContact.required", List.of("datasetContactName")); + schemaDTOMap.put("datasetContact.allowed", List.of("datasetContactName", "datasetContactEmail","datasetContactAffiliation")); + schemaDTOMap.put("dsDescription", Collections.EMPTY_LIST); + schemaDTOMap.put("dsDescription.required", List.of("dsDescriptionValue")); + schemaDTOMap.put("dsDescription.allowed", List.of("dsDescriptionValue", "dsDescriptionDate")); + } + JsonNode node = new JsonNode(jsonInput); + if (node.isArray()) { + JSONArray arrayNode = node.getArray(); + validateObject(schema, "root", arrayNode.toList()); + } else { + node.getObject().toMap().forEach((k,v) -> { + validateObject(schema, k, (v instanceof JSONArray) ? ((JSONArray) v).toList() : v); + }); + } + } + + /* + * Validate objects recursively + */ + private static void validateObject(final Schema schema, String key, Object value) { + if (value instanceof Map) { + validateSchemaObject(schema, key, (Map) value); + + ((Map) value).entrySet().forEach(e -> { + validateObject(schema, (String) e.getKey(), e.getValue()); + }); + } else if (value instanceof List) { + ((List) value).listIterator().forEachRemaining(v -> { + validateObject(schema, key, v); + }); + } + } + + /* + * Validate objects specific to a type. Currently only validating Datasets + */ + private static void validateSchemaObject(final Schema schema, String key, Map valueMap) { + if (schema.definesProperty("datasetVersion")) { + validateDatasetObject(schema, key, valueMap); + } + } + + /* + * Specific validation for Dataset objects + */ + private static void validateDatasetObject(final Schema schema, String key, Map valueMap) { + if (valueMap != null && valueMap.containsKey("typeClass")) { + validateTypeClass(schema, key, valueMap, valueMap.get("value"), "dataset"); + } + } + + /* + * key: The name of the parent object + * valueMap: Map of all the metadata of the object + * value: The value field of the object + * messageType: Refers to the parent: if this is an object from a dataset the messageType would be 'dataset' + * This needs to match the Bundle.properties for mapping the error messages when an exception occurs + * + * Rules for typeClass: + * The contents of value depend on the field attributes + * if single/primitive, value is a String + * if multiple, value is a JsonArray + * multiple/primitive: each JsonArray element will contain String + * multiple/compound: each JsonArray element will contain Set of FieldDTOs + */ + private static void validateTypeClass(Schema schema, String key, Map valueMap, Object value, String messageType) { + + String typeClass = valueMap.containsKey("typeClass") ? valueMap.get("typeClass").toString() : ""; + String typeName = valueMap.containsKey("typeName") ? valueMap.get("typeName").toString() : ""; + boolean multiple = Boolean.valueOf(String.valueOf(valueMap.getOrDefault("multiple", "false"))); + + // make sure there is a value since 'value' is required + if (value == null) { + throwValidationException("value.missing", List.of(key, typeName)); + } + + if (multiple && !(value instanceof List)) { + throwValidationException("notlist.multiple", List.of(key, typeName, typeClass)); + } + if (!multiple && value instanceof List) { + throwValidationException("list.notmultiple", List.of(key, typeName)); + } + if ("primitive".equals(typeClass) && !multiple && !(value instanceof String)) { + throwValidationException("type", List.of(key, typeName, typeClass)); + } + if ("primitive".equals(typeClass) && multiple) { + ((List) value).listIterator().forEachRemaining(primitive -> { + if (!(primitive instanceof String)) { + throwValidationException("type", List.of(key, typeName, typeClass)); + } + }); + } + if ("compound".equals(typeClass)) { + if (multiple && value instanceof List) { + ((List) value).listIterator().forEachRemaining(item -> { + if (!(item instanceof Map)) { + throwValidationException("compound", List.of(key, typeName, typeClass)); + } else { + ((Map) item).forEach((k,val) -> { + if (!(val instanceof Map)) { + throwValidationException("compound", List.of(key, typeName, typeClass)); + } + // validate mismatch between compound object key and typeName in value + String valTypeName = ((Map) val).containsKey("typeName") ? (String)((Map) val).get("typeName") : ""; + if (!k.equals(valTypeName)) { + throwValidationException("compound.mismatch", List.of((String)k, valTypeName)); + } + validateChildObject(schema, (String)k, val, messageType + "." + typeName, + schemaDTOMap.getOrDefault(typeName+".required", Collections.EMPTY_LIST), schemaDTOMap.getOrDefault(typeName+".allowed", Collections.EMPTY_LIST)); + }); + } + }); + } + } + + if ("controlledVocabulary".equals(typeClass)) { + DatasetFieldType dsft = datasetFieldService.findByName(typeName); + if (value instanceof List) { + ((List) value).listIterator().forEachRemaining(cvv -> { + if (datasetFieldService.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(dsft, (String) cvv, true) == null) { + throwValidationException("dataset", "cvv.missing", List.of(key, typeName, (String) cvv)); + } + }); + } else { + if (datasetFieldService.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(dsft, (String) value, true) == null) { + throwValidationException("dataset", "cvv.missing", List.of(key, typeName, (String) value)); + } + } + } + } + + // If value is another object or list of objects that need to be further validated then childType refers to the parent + // Example: If this is a dsDescriptionValue from a dataset the messageType would be dataset.dsDescriptionValue + // This needs to match the Bundle.properties for mapping the error messages when an exception occurs + private static void validateChildObject(Schema schema, String key, Object child, String messageType, List requiredFields, List allowedFields) { + if (child instanceof Map) { + Map childMap = (Map) child; + + if (!childMap.containsKey("value")) { // if child is simple key/value where the value Map is what we really want to validate + requiredFields.forEach(field -> { + if (!childMap.containsKey(field)) { + throwValidationException(messageType, "required.missing", List.of(key, field)); + } + }); + childMap.forEach((k, v) -> { + if (!allowedFields.isEmpty() && !allowedFields.contains(k)) { + throwValidationException(messageType, "invalidType", List.of(key, (String) k)); + } + }); + childMap.forEach((k,v) -> { + Map valueMap = (v instanceof Map) ? (Map) v : null; + if (valueMap == null || !k.equals(valueMap.get("typeName"))) { + throwValidationException(messageType, "invalidType", List.of(key, (String) k)); + } + validateChildObject(schema, (String)k, v, messageType, requiredFields, allowedFields); + }); + } else { // this child is an object with a "value" and "typeName" attribute + String typeName = childMap.containsKey("typeName") ? childMap.get("typeName").toString() : ""; + validateTypeClass(schema, typeName, childMap, childMap.get("value"), messageType); + } + } + } + private static void throwValidationException(String key, List argList) { + throw new ValidationException(BundleUtil.getStringFromBundle("schema.validation.exception." + key, argList)); + } + private static void throwValidationException(String type, String message, List argList) { + if (type != null) { + throwValidationException(type + "." + message, argList); + } else { + throwValidationException(message, argList); + } + } +} diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 02d848df1e3..9a82d7569e5 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -3007,3 +3007,15 @@ publishDatasetCommand.pidNotReserved=Cannot publish dataset because its persiste api.errors.invalidApiToken=Invalid API token. api.ldninbox.citation.alert={0},

The {1} has just been notified that the {2}, {3}, cites "{6}" in this repository. api.ldninbox.citation.subject={0}: A Dataset Citation has been reported! + +#Schema Validation +schema.validation.exception.value.missing=Invalid data for key:{0} typeName:{1}. 'value' missing. +schema.validation.exception.list.notmultiple=Invalid data for key:{0} typeName:{1}. Found value as list but ''multiple'' is set to false. +schema.validation.exception.notlist.multiple=Invalid data for key:{0} typeName:{1}. Fields with ''multiple'' set to true must be a list. +schema.validation.exception.compound=Compound data type must be accompanied by a value that is either an object (multiple=false) or a list of objects (multiple=true) +schema.validation.exception.compound.mismatch=Compound value {0} must match typeName of the object. Found {1} +schema.validation.exception.dataset.cvv.missing=Controlled vocabulary for key:{0} typeName:{1} value:''{2}'' is not valid. +schema.validation.exception.dataset.dsDescription.required.missing=Invalid data for key:{0} typeName:{1}. dsDescriptionValue is required if field type is dsDescription. +schema.validation.exception.dataset.dsDescription.invalidType=Invalid data for key:{0} typeName:{1}. Only dsDescriptionValue and dsDescriptionDate allowed. +schema.validation.exception.dataset.datasetContact.required.missing=Invalid data for key:{0} typeName:{1}. datasetContactName is required if field type is datasetContact. +schema.validation.exception.dataset.datasetContact.invalidType=Invalid data for key:{0} typeName:{1}. Only datasetContactName, datasetContactEmail and datasetContactAffiliation allowed. \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 5b603d88c6d..5d0bb6e2fad 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -182,6 +182,77 @@ public void testCollectionSchema(){ } + @Test + public void testDatasetSchemaValidation() { + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response getCollectionSchemaResponse = UtilIT.getCollectionSchema(dataverseAlias, apiToken); + getCollectionSchemaResponse.prettyPrint(); + getCollectionSchemaResponse.then().assertThat() + .statusCode(200); + + JsonObject expectedSchema = null; + try { + expectedSchema = JsonUtil.getJsonObjectFromFile("doc/sphinx-guides/source/_static/api/dataset-schema.json"); + } catch (IOException ex) { + } + + assertEquals(JsonUtil.prettyPrint(expectedSchema), JsonUtil.prettyPrint(getCollectionSchemaResponse.body().asString())); + + // add a language that is not in the Controlled vocabulary + testDatasetSchemaValidationHelper(dataverseAlias, apiToken, + "\"aar\"", + "\"aar\",\"badlang\"", + BundleUtil.getStringFromBundle("schema.validation.exception.dataset.cvv.missing", List.of("fields", "language", "badlang")) + ); + + // change multiple to true on value that is a not a List + testDatasetSchemaValidationHelper(dataverseAlias, apiToken, + "multiple\": false,\n" + + " \"typeName\": \"title", + "multiple\": true,\n" + + " \"typeName\": \"title", + BundleUtil.getStringFromBundle("schema.validation.exception.notlist.multiple", List.of("fields", "title")) + ); + + // change multiple to false on value that is a List + testDatasetSchemaValidationHelper(dataverseAlias, apiToken, + "typeName\": \"language\",\n" + + " \"multiple\": true", + "typeName\": \"language\",\n" + + " \"multiple\": false", + BundleUtil.getStringFromBundle("schema.validation.exception.list.notmultiple", List.of("fields", "language")) + ); + + // add a mismatched typeName + testDatasetSchemaValidationHelper(dataverseAlias, apiToken, + "\"typeName\": \"datasetContactName\",", + "\"typeName\": \"datasetContactNme\",", + BundleUtil.getStringFromBundle("schema.validation.exception.compound.mismatch", List.of("datasetContactName", "datasetContactNme")) + ); + + Response deleteDataverseResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken); + deleteDataverseResponse.prettyPrint(); + assertEquals(200, deleteDataverseResponse.getStatusCode()); + } + private void testDatasetSchemaValidationHelper(String dataverseAlias, String apiToken, String origString, String replacementString, String expectedError) { + String json = UtilIT.getDatasetJson("scripts/search/tests/data/dataset-finch3.json"); + json = json.replace(origString, replacementString); + Response validateDatasetJsonResponse = UtilIT.validateDatasetJson(dataverseAlias, json, apiToken); + validateDatasetJsonResponse.prettyPrint(); + validateDatasetJsonResponse.then().assertThat() + .statusCode(200) + .body(containsString(expectedError)); + } + @Test public void testCreateDataset() { diff --git a/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java b/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java new file mode 100644 index 00000000000..25bdc9fe3af --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java @@ -0,0 +1,327 @@ +package edu.harvard.iq.dataverse.validation; + +import edu.harvard.iq.dataverse.ControlledVocabularyValue; +import edu.harvard.iq.dataverse.DatasetFieldServiceBean; +import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.util.json.JsonUtil; +import org.everit.json.schema.Schema; +import org.everit.json.schema.ValidationException; +import org.everit.json.schema.loader.SchemaLoader; +import org.json.JSONObject; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.mockito.Mock; +import org.mockito.Mockito; + +import java.lang.reflect.Field; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.fail; +import static org.mockito.ArgumentMatchers.any; + +public class JSONDataValidationTest { + + @Mock + static DatasetFieldServiceBean datasetFieldServiceMock; + @Mock + static DatasetFieldType datasetFieldTypeMock; + static ControlledVocabularyValue cvv = new ControlledVocabularyValue(); + static String rawSchema() { + return """ + { + "$schema": "http://json-schema.org/draft-04/schema#", + "$defs": { + "field": { + "type": "object", + "required": ["typeClass", "multiple", "typeName"], + "properties": { + "value": { + "anyOf": [ + { + "type": "array" + }, + { + "type": "string" + }, + { + "$ref": "#/$defs/field" + } + ] + }, + "typeClass": { + "type": "string" + }, + "multiple": { + "type": "boolean" + }, + "typeName": { + "type": "string" + } + } + } + }, + "type": "object", + "properties": { + "datasetVersion": { + "type": "object", + "properties": { + "license": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "uri": { + "type": "string", + "format": "uri" + } + }, + "required": ["name", "uri"] + }, + "metadataBlocks": { + "type": "object", + "properties": { + "citation": { + "type": "object", + "properties": { + "fields": { + "type": "array", + "items": { + "$ref": "#/$defs/field" + }, + "minItems": 5, + "allOf": [ + { + "contains": { + "properties": { + "typeName": { + "const": "title" + } + } + } + }, + { + "contains": { + "properties": { + "typeName": { + "const": "author" + } + } + } + }, + { + "contains": { + "properties": { + "typeName": { + "const": "datasetContact" + } + } + } + }, + { + "contains": { + "properties": { + "typeName": { + "const": "dsDescription" + } + } + } + }, + { + "contains": { + "properties": { + "typeName": { + "const": "subject" + } + } + } + } + ] + } + }, + "required": ["fields"] + } + }, + "required": ["citation"] + } + }, + "required": ["metadataBlocks"] + } + }, + "required": ["datasetVersion"] + } + """; + } + static String jsonInput() { + return """ + { + "datasetVersion": { + "license": { + "name": "CC0 1.0", + "uri": "http://creativecommons.org/publicdomain/zero/1.0" + }, + "metadataBlocks": { + "citation": { + "fields": [ + { + "value": "Darwin's Finches", + "typeClass": "primitive", + "multiple": false, + "typeName": "title" + }, + { + "value": [ + { + "authorName": { + "value": "Finch, Fiona", + "typeClass": "primitive", + "multiple": false, + "typeName": "authorName" + }, + "authorAffiliation": { + "value": "Birds Inc.", + "typeClass": "primitive", + "multiple": false, + "typeName": "authorAffiliation" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "author" + }, + { + "value": [ + { "datasetContactEmail" : { + "typeClass": "primitive", + "multiple": false, + "typeName": "datasetContactEmail", + "value" : "finch@mailinator.com" + }, + "datasetContactName" : { + "typeClass": "primitive", + "multiple": false, + "typeName": "datasetContactName", + "value": "Finch, Fiona" + } + }], + "typeClass": "compound", + "multiple": true, + "typeName": "datasetContact" + }, + { + "value": [{ + "dsDescriptionValue":{ + "value": "Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.", + "multiple": false, + "typeClass": "primitive", + "typeName": "dsDescriptionValue" + }, + "dsDescriptionDate": { + "typeName": "dsDescriptionDate", + "multiple": false, + "typeClass": "primitive", + "value": "2021-07-13" + } + }], + "typeClass": "compound", + "multiple": true, + "typeName": "dsDescription" + }, + { + "value": { + "dsDescriptionValue":{ + "value": "Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.", + "multiple": false, + "typeClass": "primitive", + "typeName": "dsDescriptionValue" + }}, + "typeClass": "compound", + "multiple": false, + "typeName": "dsDescription" + }, + { + "value": [ + "Medicine, Health and Life Sciences", + "Social Sciences" + ], + "typeClass": "controlledVocabulary", + "multiple": true, + "typeName": "subject" + } + ], + "displayName": "Citation Metadata" + } + } + } + } + """; + } + + @BeforeAll + static void setup() throws NoSuchFieldException, IllegalAccessException { + datasetFieldServiceMock = Mockito.mock(DatasetFieldServiceBean.class); + datasetFieldTypeMock = Mockito.mock(DatasetFieldType.class); + Field datasetFieldServiceField = JSONDataValidation.class.getDeclaredField("datasetFieldService"); + datasetFieldServiceField.setAccessible(true); + datasetFieldServiceField.set(JSONDataValidation.class, datasetFieldServiceMock); + + Mockito.when(datasetFieldServiceMock.findByName(any(String.class))).thenReturn(datasetFieldTypeMock); + List cvvList = List.of("Medicine, Health and Life Sciences", "Social Sciences"); + cvvList.forEach(i -> { + Mockito.when(datasetFieldServiceMock.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(datasetFieldTypeMock, i,true)).thenReturn(cvv); + }); + Mockito.when(datasetFieldServiceMock.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(datasetFieldTypeMock, "Bad",true)).thenReturn(null); + } + @Test + public void testGoodJson() { + Schema schema = SchemaLoader.load(new JSONObject(rawSchema())); + JSONDataValidation.validate(schema, jsonInput()); + } + @Test + public void testBadJson() { + Schema schema = SchemaLoader.load(new JSONObject(rawSchema())); + try { + JSONDataValidation.validate(schema, jsonInput().replace("\"Social Sciences\"", "\"Social Sciences\",\"Bad\"")); + fail(); + } catch (ValidationException e) { + System.out.println(e.getMessage()); + System.out.println(e.getStackTrace()); + } + + try { + // test multiple = false but value is list + JSONDataValidation.validate(schema, jsonInput().replaceAll("true", "false")); + fail(); + } catch (ValidationException e) { + System.out.println(e.getMessage()); + } + + // verify that child objects are also validated + String childTest = "\"multiple\": false, \"typeName\": \"authorAffiliation\""; + try { + String trimmedStr = jsonInput().replaceAll("\\s{2,}", " "); + // test child object with multiple set to true + JSONDataValidation.validate(schema, trimmedStr.replace(childTest, childTest.replace("false", "true"))); + fail(); + } catch (ValidationException e) { + System.out.println(e.getMessage()); + } + + try { + // test dsDescription but dsDescriptionValue missing + JSONDataValidation.validate(schema, jsonInput().replace("typeName\": \"dsDescriptionValue", "typeName\": \"notdsDescriptionValue")); + fail(); + } catch (ValidationException e) { + System.out.println(e.getMessage()); + } + + try { + // test dsDescription but child dsDescriptionValue missing + JSONDataValidation.validate(schema, jsonInput().replace("dsDescriptionValue\":{", "notdsDescriptionValue\":{")); + fail(); + } catch (ValidationException e) { + System.out.println(e.getMessage()); + } + } +} From 33d6b56777f445e51b619ef91fa6186c0aa38fd6 Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Thu, 9 May 2024 15:40:54 -0400 Subject: [PATCH 02/11] rework --- .../iq/dataverse/DataverseServiceBean.java | 23 ++++- .../validation/JSONDataValidation.java | 93 ++++++++----------- src/main/java/propertyFiles/Bundle.properties | 6 +- .../harvard/iq/dataverse/api/DatasetsIT.java | 13 +++ .../validation/JSONDataValidationTest.java | 61 ++++++++---- 5 files changed, 114 insertions(+), 82 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index 3d9ff19a617..00774bbd3bf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -889,14 +889,16 @@ public List getDatasetTitlesWithinDataverse(Long dataverseId) { return em.createNativeQuery(cqString).getResultList(); } - public String getCollectionDatasetSchema(String dataverseAlias) { + return getCollectionDatasetSchema(dataverseAlias, null); + } + public String getCollectionDatasetSchema(String dataverseAlias, Map>> schemaChildMap) { Dataverse testDV = this.findByAlias(dataverseAlias); while (!testDV.isMetadataBlockRoot()) { if (testDV.getOwner() == null) { - break; // we are at the root; which by defintion is metadata blcok root, regarldess of the value + break; // we are at the root; which by definition is metadata block root, regardless of the value } testDV = testDV.getOwner(); } @@ -933,6 +935,8 @@ public String getCollectionDatasetSchema(String dataverseAlias) { dsft.setRequiredDV(dsft.isRequired()); dsft.setInclude(true); } + List childrenRequired = new ArrayList<>(); + List childrenAllowed = new ArrayList<>(); if (dsft.isHasChildren()) { for (DatasetFieldType child : dsft.getChildDatasetFieldTypes()) { DataverseFieldTypeInputLevel dsfIlChild = dataverseFieldTypeInputLevelService.findByDataverseIdDatasetFieldTypeId(testDV.getId(), child.getId()); @@ -945,8 +949,18 @@ public String getCollectionDatasetSchema(String dataverseAlias) { child.setRequiredDV(child.isRequired() && dsft.isRequired()); child.setInclude(true); } + if (child.isRequired()) { + childrenRequired.add(child.getName()); + } + childrenAllowed.add(child.getName()); } } + if (schemaChildMap != null) { + Map> map = new HashMap<>(); + map.put("required", childrenRequired); + map.put("allowed", childrenAllowed); + schemaChildMap.put(dsft.getName(), map); + } if(dsft.isRequiredDV()){ requiredDSFT.add(dsft); } @@ -1022,12 +1036,13 @@ private String getCustomMDBSchema (MetadataBlock mdb, List req } public String isDatasetJsonValid(String dataverseAlias, String jsonInput) { - JSONObject rawSchema = new JSONObject(new JSONTokener(getCollectionDatasetSchema(dataverseAlias))); + Map>> schemaChildMap = new HashMap<>(); + JSONObject rawSchema = new JSONObject(new JSONTokener(getCollectionDatasetSchema(dataverseAlias, schemaChildMap))); try { Schema schema = SchemaLoader.load(rawSchema); schema.validate(new JSONObject(jsonInput)); // throws a ValidationException if this object is invalid - JSONDataValidation.validate(schema, jsonInput); // throws a ValidationException if any objects are invalid + JSONDataValidation.validate(schema, schemaChildMap, jsonInput); // throws a ValidationException if any objects are invalid } catch (ValidationException vx) { logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage()); String accumulatedexceptions = ""; diff --git a/src/main/java/edu/harvard/iq/dataverse/validation/JSONDataValidation.java b/src/main/java/edu/harvard/iq/dataverse/validation/JSONDataValidation.java index 99b0fdd9edc..fb19a14e7de 100644 --- a/src/main/java/edu/harvard/iq/dataverse/validation/JSONDataValidation.java +++ b/src/main/java/edu/harvard/iq/dataverse/validation/JSONDataValidation.java @@ -9,14 +9,13 @@ import org.everit.json.schema.ValidationException; import org.json.JSONArray; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; +import java.util.*; +import java.util.logging.Logger; +import java.util.stream.Collectors; public class JSONDataValidation { + private static final Logger logger = Logger.getLogger(JSONDataValidation.class.getCanonicalName()); private static DatasetFieldServiceBean datasetFieldService = null; - private static Map> schemaDTOMap = new ConcurrentHashMap<>(); /** * @@ -24,26 +23,17 @@ public class JSONDataValidation { * @param jsonInput JSON string to validate against the schema * @throws ValidationException */ - public static void validate(final Schema schema, String jsonInput) throws ValidationException { + public static void validate(Schema schema, Map>> schemaChildMap, String jsonInput) throws ValidationException { if (datasetFieldService == null) { datasetFieldService = CDI.current().select(DatasetFieldServiceBean.class).get(); } - if (schemaDTOMap.isEmpty()) { - // TODO: load from a config file - schemaDTOMap.put("datasetContact", Collections.EMPTY_LIST); - schemaDTOMap.put("datasetContact.required", List.of("datasetContactName")); - schemaDTOMap.put("datasetContact.allowed", List.of("datasetContactName", "datasetContactEmail","datasetContactAffiliation")); - schemaDTOMap.put("dsDescription", Collections.EMPTY_LIST); - schemaDTOMap.put("dsDescription.required", List.of("dsDescriptionValue")); - schemaDTOMap.put("dsDescription.allowed", List.of("dsDescriptionValue", "dsDescriptionDate")); - } JsonNode node = new JsonNode(jsonInput); if (node.isArray()) { JSONArray arrayNode = node.getArray(); - validateObject(schema, "root", arrayNode.toList()); + validateObject(schema, schemaChildMap, "root", arrayNode.toList()); } else { node.getObject().toMap().forEach((k,v) -> { - validateObject(schema, k, (v instanceof JSONArray) ? ((JSONArray) v).toList() : v); + validateObject(schema, schemaChildMap, k, (v instanceof JSONArray) ? ((JSONArray) v).toList() : v); }); } } @@ -51,16 +41,16 @@ public static void validate(final Schema schema, String jsonInput) throws Valida /* * Validate objects recursively */ - private static void validateObject(final Schema schema, String key, Object value) { + private static void validateObject(Schema schema, Map>> schemaChildMap, String key, Object value) { if (value instanceof Map) { - validateSchemaObject(schema, key, (Map) value); + validateSchemaObject(schema, schemaChildMap, key, (Map) value); ((Map) value).entrySet().forEach(e -> { - validateObject(schema, (String) e.getKey(), e.getValue()); + validateObject(schema, schemaChildMap, (String) e.getKey(), e.getValue()); }); } else if (value instanceof List) { ((List) value).listIterator().forEachRemaining(v -> { - validateObject(schema, key, v); + validateObject(schema, schemaChildMap, key, v); }); } } @@ -68,18 +58,18 @@ private static void validateObject(final Schema schema, String key, Object value /* * Validate objects specific to a type. Currently only validating Datasets */ - private static void validateSchemaObject(final Schema schema, String key, Map valueMap) { + private static void validateSchemaObject(Schema schema, Map>> schemaChildMap, String key, Map valueMap) { if (schema.definesProperty("datasetVersion")) { - validateDatasetObject(schema, key, valueMap); + validateDatasetObject(schema, schemaChildMap, key, valueMap); } } /* * Specific validation for Dataset objects */ - private static void validateDatasetObject(final Schema schema, String key, Map valueMap) { + private static void validateDatasetObject(Schema schema, Map>> schemaChildMap, String key, Map valueMap) { if (valueMap != null && valueMap.containsKey("typeClass")) { - validateTypeClass(schema, key, valueMap, valueMap.get("value"), "dataset"); + validateTypeClass(schema, schemaChildMap, key, valueMap, valueMap.get("value"), "dataset"); } } @@ -97,7 +87,7 @@ private static void validateDatasetObject(final Schema schema, String key, Map v * multiple/primitive: each JsonArray element will contain String * multiple/compound: each JsonArray element will contain Set of FieldDTOs */ - private static void validateTypeClass(Schema schema, String key, Map valueMap, Object value, String messageType) { + private static void validateTypeClass(Schema schema, Map>> schemaChildMap, String key, Map valueMap, Object value, String messageType) { String typeClass = valueMap.containsKey("typeClass") ? valueMap.get("typeClass").toString() : ""; String typeName = valueMap.containsKey("typeName") ? valueMap.get("typeName").toString() : ""; @@ -135,13 +125,12 @@ private static void validateTypeClass(Schema schema, String key, Map valueMap, O throwValidationException("compound", List.of(key, typeName, typeClass)); } // validate mismatch between compound object key and typeName in value - String valTypeName = ((Map) val).containsKey("typeName") ? (String)((Map) val).get("typeName") : ""; + String valTypeName = ((Map) val).containsKey("typeName") ? (String) ((Map) val).get("typeName") : ""; if (!k.equals(valTypeName)) { - throwValidationException("compound.mismatch", List.of((String)k, valTypeName)); + throwValidationException("compound.mismatch", List.of((String) k, valTypeName)); } - validateChildObject(schema, (String)k, val, messageType + "." + typeName, - schemaDTOMap.getOrDefault(typeName+".required", Collections.EMPTY_LIST), schemaDTOMap.getOrDefault(typeName+".allowed", Collections.EMPTY_LIST)); }); + validateChildren(schema, schemaChildMap, key, ((Map) item).values(), typeName, messageType); } }); } @@ -166,32 +155,26 @@ private static void validateTypeClass(Schema schema, String key, Map valueMap, O // If value is another object or list of objects that need to be further validated then childType refers to the parent // Example: If this is a dsDescriptionValue from a dataset the messageType would be dataset.dsDescriptionValue // This needs to match the Bundle.properties for mapping the error messages when an exception occurs - private static void validateChildObject(Schema schema, String key, Object child, String messageType, List requiredFields, List allowedFields) { - if (child instanceof Map) { - Map childMap = (Map) child; - - if (!childMap.containsKey("value")) { // if child is simple key/value where the value Map is what we really want to validate - requiredFields.forEach(field -> { - if (!childMap.containsKey(field)) { - throwValidationException(messageType, "required.missing", List.of(key, field)); - } - }); - childMap.forEach((k, v) -> { - if (!allowedFields.isEmpty() && !allowedFields.contains(k)) { - throwValidationException(messageType, "invalidType", List.of(key, (String) k)); - } - }); - childMap.forEach((k,v) -> { - Map valueMap = (v instanceof Map) ? (Map) v : null; - if (valueMap == null || !k.equals(valueMap.get("typeName"))) { - throwValidationException(messageType, "invalidType", List.of(key, (String) k)); - } - validateChildObject(schema, (String)k, v, messageType, requiredFields, allowedFields); - }); - } else { // this child is an object with a "value" and "typeName" attribute - String typeName = childMap.containsKey("typeName") ? childMap.get("typeName").toString() : ""; - validateTypeClass(schema, typeName, childMap, childMap.get("value"), messageType); + private static void validateChildren(Schema schema, Map>> schemaChildMap, String key, Collection children, String typeName, String messageType) { + if (children == null || children.isEmpty()) { + return; + } + List requiredFields = new ArrayList<>(); + requiredFields.addAll((List)schemaChildMap.getOrDefault(typeName, Collections.EMPTY_MAP).getOrDefault("required", Collections.EMPTY_LIST)); + List allowedFields = (List)schemaChildMap.getOrDefault(typeName, Collections.EMPTY_MAP).getOrDefault("allowed", Collections.EMPTY_LIST); + children.forEach(child -> { + if (child instanceof Map) { + String childTypeName = ((Map) child).containsKey("typeName") ? (String)((Map) child).get("typeName") : ""; + if (!allowedFields.isEmpty() && !allowedFields.contains(childTypeName)) { + throwValidationException(messageType, "invalidType", List.of(typeName, childTypeName, allowedFields.stream().collect(Collectors.joining(", ")))); + } + if (!requiredFields.isEmpty() && requiredFields.contains(childTypeName)) { + requiredFields.remove(childTypeName); + } } + }); + if (!requiredFields.isEmpty()) { + throwValidationException(messageType, "required.missing", List.of(typeName, requiredFields.stream().collect(Collectors.joining(", ")), typeName)); } } private static void throwValidationException(String key, List argList) { diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 9a82d7569e5..6355e71cb36 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -3015,7 +3015,5 @@ schema.validation.exception.notlist.multiple=Invalid data for key:{0} typeName:{ schema.validation.exception.compound=Compound data type must be accompanied by a value that is either an object (multiple=false) or a list of objects (multiple=true) schema.validation.exception.compound.mismatch=Compound value {0} must match typeName of the object. Found {1} schema.validation.exception.dataset.cvv.missing=Controlled vocabulary for key:{0} typeName:{1} value:''{2}'' is not valid. -schema.validation.exception.dataset.dsDescription.required.missing=Invalid data for key:{0} typeName:{1}. dsDescriptionValue is required if field type is dsDescription. -schema.validation.exception.dataset.dsDescription.invalidType=Invalid data for key:{0} typeName:{1}. Only dsDescriptionValue and dsDescriptionDate allowed. -schema.validation.exception.dataset.datasetContact.required.missing=Invalid data for key:{0} typeName:{1}. datasetContactName is required if field type is datasetContact. -schema.validation.exception.dataset.datasetContact.invalidType=Invalid data for key:{0} typeName:{1}. Only datasetContactName, datasetContactEmail and datasetContactAffiliation allowed. \ No newline at end of file +schema.validation.exception.dataset.invalidType=Invalid data for key:{0} typeName:{1}. Only {2} allowed. +schema.validation.exception.dataset.required.missing=Invalid data for key:{0}. {1} is(are) required if field type is {2}. \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 5d0bb6e2fad..1506fd8be90 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -239,6 +239,19 @@ public void testDatasetSchemaValidation() { BundleUtil.getStringFromBundle("schema.validation.exception.compound.mismatch", List.of("datasetContactName", "datasetContactNme")) ); + // add a typeName which is not allowed + testDatasetSchemaValidationHelper(dataverseAlias, apiToken, + "\"datasetContactEmail\": {\n" + + " \"typeClass\": \"primitive\",\n" + + " \"multiple\": false,\n" + + " \"typeName\": \"datasetContactEmail\",", + "\"datasetContactNotAllowed\": {\n" + + " \"typeClass\": \"primitive\",\n" + + " \"multiple\": false,\n" + + " \"typeName\": \"datasetContactNotAllowed\",", + BundleUtil.getStringFromBundle("schema.validation.exception.dataset.invalidType", List.of("datasetContact", "datasetContactNotAllowed", "datasetContactName, datasetContactAffiliation, datasetContactEmail")) + ); + Response deleteDataverseResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken); deleteDataverseResponse.prettyPrint(); assertEquals(200, deleteDataverseResponse.getStatusCode()); diff --git a/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java b/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java index 25bdc9fe3af..4e60d013f87 100644 --- a/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java @@ -3,18 +3,20 @@ import edu.harvard.iq.dataverse.ControlledVocabularyValue; import edu.harvard.iq.dataverse.DatasetFieldServiceBean; import edu.harvard.iq.dataverse.DatasetFieldType; -import edu.harvard.iq.dataverse.util.json.JsonUtil; import org.everit.json.schema.Schema; import org.everit.json.schema.ValidationException; import org.everit.json.schema.loader.SchemaLoader; import org.json.JSONObject; +import org.json.JSONTokener; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.mockito.Mock; import org.mockito.Mockito; import java.lang.reflect.Field; +import java.util.HashMap; import java.util.List; +import java.util.Map; import static org.junit.jupiter.api.Assertions.fail; import static org.mockito.ArgumentMatchers.any; @@ -26,6 +28,7 @@ public class JSONDataValidationTest { @Mock static DatasetFieldType datasetFieldTypeMock; static ControlledVocabularyValue cvv = new ControlledVocabularyValue(); + static Map>> schemaChildMap = new HashMap<>(); static String rawSchema() { return """ { @@ -228,18 +231,6 @@ static String jsonInput() { "multiple": true, "typeName": "dsDescription" }, - { - "value": { - "dsDescriptionValue":{ - "value": "Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.", - "multiple": false, - "typeClass": "primitive", - "typeName": "dsDescriptionValue" - }}, - "typeClass": "compound", - "multiple": false, - "typeName": "dsDescription" - }, { "value": [ "Medicine, Health and Life Sciences", @@ -272,17 +263,33 @@ static void setup() throws NoSuchFieldException, IllegalAccessException { Mockito.when(datasetFieldServiceMock.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(datasetFieldTypeMock, i,true)).thenReturn(cvv); }); Mockito.when(datasetFieldServiceMock.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(datasetFieldTypeMock, "Bad",true)).thenReturn(null); + + Map> datasetContact = new HashMap<>(); + datasetContact.put("required", List.of("datasetContactName")); + datasetContact.put("allowed", List.of("datasetContactName", "datasetContactEmail","datasetContactAffiliation")); + schemaChildMap.put("datasetContact",datasetContact); + Map> dsDescription = new HashMap<>(); + dsDescription.put("required", List.of("dsDescriptionValue")); + dsDescription.put("allowed", List.of("dsDescriptionValue", "dsDescriptionDate")); + schemaChildMap.put("dsDescription",dsDescription); + + } + @Test + public void testSchema() { + JSONObject rawSchema = new JSONObject(new JSONTokener(rawSchema())); + Schema schema = SchemaLoader.load(rawSchema); + schema.validate(new JSONObject(jsonInput())); } @Test public void testGoodJson() { Schema schema = SchemaLoader.load(new JSONObject(rawSchema())); - JSONDataValidation.validate(schema, jsonInput()); + JSONDataValidation.validate(schema, schemaChildMap, jsonInput()); } @Test public void testBadJson() { Schema schema = SchemaLoader.load(new JSONObject(rawSchema())); try { - JSONDataValidation.validate(schema, jsonInput().replace("\"Social Sciences\"", "\"Social Sciences\",\"Bad\"")); + JSONDataValidation.validate(schema, schemaChildMap, jsonInput().replace("\"Social Sciences\"", "\"Social Sciences\",\"Bad\"")); fail(); } catch (ValidationException e) { System.out.println(e.getMessage()); @@ -291,7 +298,7 @@ public void testBadJson() { try { // test multiple = false but value is list - JSONDataValidation.validate(schema, jsonInput().replaceAll("true", "false")); + JSONDataValidation.validate(schema, schemaChildMap, jsonInput().replaceAll("true", "false")); fail(); } catch (ValidationException e) { System.out.println(e.getMessage()); @@ -302,7 +309,7 @@ public void testBadJson() { try { String trimmedStr = jsonInput().replaceAll("\\s{2,}", " "); // test child object with multiple set to true - JSONDataValidation.validate(schema, trimmedStr.replace(childTest, childTest.replace("false", "true"))); + JSONDataValidation.validate(schema, schemaChildMap, trimmedStr.replace(childTest, childTest.replace("false", "true"))); fail(); } catch (ValidationException e) { System.out.println(e.getMessage()); @@ -310,7 +317,7 @@ public void testBadJson() { try { // test dsDescription but dsDescriptionValue missing - JSONDataValidation.validate(schema, jsonInput().replace("typeName\": \"dsDescriptionValue", "typeName\": \"notdsDescriptionValue")); + JSONDataValidation.validate(schema, schemaChildMap, jsonInput().replace("typeName\": \"dsDescriptionValue", "typeName\": \"notdsDescriptionValue")); fail(); } catch (ValidationException e) { System.out.println(e.getMessage()); @@ -318,7 +325,23 @@ public void testBadJson() { try { // test dsDescription but child dsDescriptionValue missing - JSONDataValidation.validate(schema, jsonInput().replace("dsDescriptionValue\":{", "notdsDescriptionValue\":{")); + JSONDataValidation.validate(schema, schemaChildMap, jsonInput().replace("dsDescriptionValue\":{", "notdsDescriptionValue\":{")); + fail(); + } catch (ValidationException e) { + System.out.println(e.getMessage()); + } + + try { + // test required dataType missing + JSONDataValidation.validate(schema, schemaChildMap, jsonInput().replaceAll("\"datasetContactName\"", "\"datasetContactAffiliation\"")); + fail(); + } catch (ValidationException e) { + System.out.println(e.getMessage()); + } + + try { + // test dataType not allowed + JSONDataValidation.validate(schema, schemaChildMap, jsonInput().replaceAll("\"datasetContactEmail\"", "\"datasetContactNotAllowed\"")); fail(); } catch (ValidationException e) { System.out.println(e.getMessage()); From ba8b01b44b18d776b636191cf3ad02a1ccace787 Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Fri, 10 May 2024 10:04:51 -0400 Subject: [PATCH 03/11] doc changes --- doc/release-notes/10169-JSON-schema-validation.md | 4 ++++ doc/sphinx-guides/source/api/native-api.rst | 5 +++-- .../iq/dataverse/validation/JSONDataValidationTest.java | 4 ++-- 3 files changed, 9 insertions(+), 4 deletions(-) create mode 100644 doc/release-notes/10169-JSON-schema-validation.md diff --git a/doc/release-notes/10169-JSON-schema-validation.md b/doc/release-notes/10169-JSON-schema-validation.md new file mode 100644 index 00000000000..ae47f6a1401 --- /dev/null +++ b/doc/release-notes/10169-JSON-schema-validation.md @@ -0,0 +1,4 @@ +### JSON Schema for datasets + +Enhanced JSON schema validation with checks for required and allowed child objects, Type checking for field types including: ''primative''; ''compound''; and ''controlledVocabulary'' . More user-friendly error messages to help pinpoint the issues in the Dataset JSON. Rules are driven off the database schema, so no manual configuration is needed. See [Retrieve a Dataset JSON Schema for a Collection](https://guides.dataverse.org/en/6.1/api/native-api.html#retrieve-a-dataset-json-schema-for-a-collection) in the API Guide and PR #10169. + diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index c30f551685c..c7447deb22a 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -573,10 +573,11 @@ typeClass must follow these rules: - if multiple = true then value must be a list - if typeClass = ''primitive'' the value object is a String or a List of Strings depending on the multiple flag - if typeClass = ''compound'' the value object is a FieldDTO or a List of FieldDTOs depending on the multiple flag -- if typeClass = ''controlledVocabulary'' the value(s) are checked against the list of known values -typeName validations include: +- if typeClass = ''controlledVocabulary'' the value(s) are checked against the list of known values stored in the database +typeName validations (child objects with their required and allowed typeNames are configured automatically by the database schema). Examples include: - dsDescription validation includes checks for typeName = ''dsDescriptionValue'' (required) and ''dsDescriptionDate'' (optional) - datasetContact validation includes checks for typeName = ''datasetContactName'' (required) and ''datasetContactEmail''; ''datasetContactAffiliation'' (optional) +- etc. .. code-block:: bash diff --git a/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java b/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java index 4e60d013f87..e88dc9f4bd9 100644 --- a/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java @@ -281,12 +281,12 @@ public void testSchema() { schema.validate(new JSONObject(jsonInput())); } @Test - public void testGoodJson() { + public void testValid() { Schema schema = SchemaLoader.load(new JSONObject(rawSchema())); JSONDataValidation.validate(schema, schemaChildMap, jsonInput()); } @Test - public void testBadJson() { + public void testInvalid() { Schema schema = SchemaLoader.load(new JSONObject(rawSchema())); try { JSONDataValidation.validate(schema, schemaChildMap, jsonInput().replace("\"Social Sciences\"", "\"Social Sciences\",\"Bad\"")); From fe297bad1e90c2c6d5071589acbea4732178bebb Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Fri, 10 May 2024 10:07:43 -0400 Subject: [PATCH 04/11] remove unused class --- .../command/exception/JsonSchemaConstraintException.java | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/exception/JsonSchemaConstraintException.java diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/exception/JsonSchemaConstraintException.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/exception/JsonSchemaConstraintException.java deleted file mode 100644 index 110a4460313..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/exception/JsonSchemaConstraintException.java +++ /dev/null @@ -1,4 +0,0 @@ -package edu.harvard.iq.dataverse.engine.command.exception; - -public class JsonSchemaConstraintException { -} From d40b2e5e37c2d0a48a4a6dc846cd50a4b5ac3cd4 Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Mon, 3 Jun 2024 09:49:12 -0400 Subject: [PATCH 05/11] review comment mods --- doc/release-notes/10169-JSON-schema-validation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/10169-JSON-schema-validation.md b/doc/release-notes/10169-JSON-schema-validation.md index ae47f6a1401..ad89804f504 100644 --- a/doc/release-notes/10169-JSON-schema-validation.md +++ b/doc/release-notes/10169-JSON-schema-validation.md @@ -1,4 +1,4 @@ ### JSON Schema for datasets -Enhanced JSON schema validation with checks for required and allowed child objects, Type checking for field types including: ''primative''; ''compound''; and ''controlledVocabulary'' . More user-friendly error messages to help pinpoint the issues in the Dataset JSON. Rules are driven off the database schema, so no manual configuration is needed. See [Retrieve a Dataset JSON Schema for a Collection](https://guides.dataverse.org/en/6.1/api/native-api.html#retrieve-a-dataset-json-schema-for-a-collection) in the API Guide and PR #10169. +Enhanced JSON schema validation with checks for required and allowed child objects, Type checking for field types including: ''primative''; ''compound''; and ''controlledVocabulary'' . More user-friendly error messages to help pinpoint the issues in the Dataset JSON. Rules are driven off the database schema, so no manual configuration is needed. See [Retrieve a Dataset JSON Schema for a Collection](https://guides.dataverse.org/en/6.3/api/native-api.html#retrieve-a-dataset-json-schema-for-a-collection) in the API Guide and PR #10169. From 76d6ee45ffc1cf2b76a1cda6c9bbfc31fae819af Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 10 Jun 2024 17:07:26 -0400 Subject: [PATCH 06/11] doc and release note tweaks #10169 --- .../10169-JSON-schema-validation.md | 5 ++-- doc/sphinx-guides/source/api/native-api.rst | 28 ++++++++++--------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/doc/release-notes/10169-JSON-schema-validation.md b/doc/release-notes/10169-JSON-schema-validation.md index ad89804f504..92ff4a917d5 100644 --- a/doc/release-notes/10169-JSON-schema-validation.md +++ b/doc/release-notes/10169-JSON-schema-validation.md @@ -1,4 +1,3 @@ -### JSON Schema for datasets - -Enhanced JSON schema validation with checks for required and allowed child objects, Type checking for field types including: ''primative''; ''compound''; and ''controlledVocabulary'' . More user-friendly error messages to help pinpoint the issues in the Dataset JSON. Rules are driven off the database schema, so no manual configuration is needed. See [Retrieve a Dataset JSON Schema for a Collection](https://guides.dataverse.org/en/6.3/api/native-api.html#retrieve-a-dataset-json-schema-for-a-collection) in the API Guide and PR #10169. +### Improved JSON Schema validation for datasets +Enhanced JSON schema validation with checks for required and allowed child objects, type checking for field types including `primitive`, `compound` and `controlledVocabulary`. More user-friendly error messages to help pinpoint the issues in the dataset JSON. See [Retrieve a Dataset JSON Schema for a Collection](https://guides.dataverse.org/en/6.3/api/native-api.html#retrieve-a-dataset-json-schema-for-a-collection) in the API Guide and PR #10543. diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index a1c864fca4f..7bd1e9f9561 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -539,9 +539,7 @@ The fully expanded example above (without environment variables) looks like this Retrieve a Dataset JSON Schema for a Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Retrieves a JSON schema customized for a given collection in order to validate a dataset JSON file prior to creating the dataset. This -first version of the schema only includes required elements and fields. In the future we plan to improve the schema by adding controlled -vocabulary and more robust dataset field format testing: +Retrieves a JSON schema customized for a given collection in order to validate a dataset JSON file prior to creating the dataset: .. code-block:: bash @@ -567,17 +565,21 @@ Validate Dataset JSON File for a Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Validates a dataset JSON file customized for a given collection prior to creating the dataset. + The validation tests for: -Json formatting and the presence of required elements -typeClass must follow these rules: -- if multiple = true then value must be a list -- if typeClass = ''primitive'' the value object is a String or a List of Strings depending on the multiple flag -- if typeClass = ''compound'' the value object is a FieldDTO or a List of FieldDTOs depending on the multiple flag -- if typeClass = ''controlledVocabulary'' the value(s) are checked against the list of known values stored in the database -typeName validations (child objects with their required and allowed typeNames are configured automatically by the database schema). Examples include: -- dsDescription validation includes checks for typeName = ''dsDescriptionValue'' (required) and ''dsDescriptionDate'' (optional) -- datasetContact validation includes checks for typeName = ''datasetContactName'' (required) and ''datasetContactEmail''; ''datasetContactAffiliation'' (optional) -- etc. + +- JSON formatting +- required fields +- typeClass must follow these rules: + + - if multiple = true then value must be a list + - if typeClass = ``primitive`` the value object is a String or a List of Strings depending on the multiple flag + - if typeClass = ``compound`` the value object is a FieldDTO or a List of FieldDTOs depending on the multiple flag + - if typeClass = ``controlledVocabulary`` the values are checked against the list of allowed values stored in the database + - typeName validations (child objects with their required and allowed typeNames are configured automatically by the database schema). Examples include: + + - dsDescription validation includes checks for typeName = ``dsDescriptionValue`` (required) and ``dsDescriptionDate`` (optional) + - datasetContact validation includes checks for typeName = ``datasetContactName`` (required) and ``datasetContactEmail``; ``datasetContactAffiliation`` (optional) .. code-block:: bash From fc067f8d757d6b7b8274ee26774a3d8597a5f406 Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Mon, 17 Jun 2024 09:36:05 -0400 Subject: [PATCH 07/11] remove extra space in error message per review comment --- src/main/java/propertyFiles/Bundle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 3fb60e5f8e2..0d5a9333a31 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2744,7 +2744,7 @@ dataverses.api.create.dataset.error.mustIncludeVersion=Please provide initial ve dataverses.api.create.dataset.error.superuserFiles=Only a superuser may add files via this api dataverses.api.create.dataset.error.mustIncludeAuthorName=Please provide author name in the dataset json dataverses.api.validate.json.succeeded=The Dataset JSON provided is valid for this Dataverse Collection. -dataverses.api.validate.json.failed=The Dataset JSON provided failed validation with the following error: +dataverses.api.validate.json.failed=The Dataset JSON provided failed validation with the following error: dataverses.api.validate.json.exception=Validation failed with following exception: #Access.java From a523093ca0057fdcfbcbefccf4646c8f90f9c1da Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Mon, 17 Jun 2024 10:25:55 -0400 Subject: [PATCH 08/11] remove hard coded datasetschema.json and load from file --- .../harvard/iq/dataverse/api/Datasets.java | 1 - .../validation/JSONDataValidationTest.java | 133 ++---------------- 2 files changed, 11 insertions(+), 123 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index fc0afc562fc..b27f9532cc3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1,7 +1,6 @@ package edu.harvard.iq.dataverse.api; import com.amazonaws.services.s3.model.PartETag; - import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.DatasetLock.Reason; import edu.harvard.iq.dataverse.actionlogging.ActionLogRecord; diff --git a/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java b/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java index e88dc9f4bd9..4ac27b66131 100644 --- a/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java @@ -3,6 +3,7 @@ import edu.harvard.iq.dataverse.ControlledVocabularyValue; import edu.harvard.iq.dataverse.DatasetFieldServiceBean; import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import org.everit.json.schema.Schema; import org.everit.json.schema.ValidationException; import org.everit.json.schema.loader.SchemaLoader; @@ -13,6 +14,7 @@ import org.mockito.Mock; import org.mockito.Mockito; +import java.io.IOException; import java.lang.reflect.Field; import java.util.HashMap; import java.util.List; @@ -29,132 +31,19 @@ public class JSONDataValidationTest { static DatasetFieldType datasetFieldTypeMock; static ControlledVocabularyValue cvv = new ControlledVocabularyValue(); static Map>> schemaChildMap = new HashMap<>(); + + static String rawSchemaJson = null; static String rawSchema() { - return """ - { - "$schema": "http://json-schema.org/draft-04/schema#", - "$defs": { - "field": { - "type": "object", - "required": ["typeClass", "multiple", "typeName"], - "properties": { - "value": { - "anyOf": [ - { - "type": "array" - }, - { - "type": "string" - }, - { - "$ref": "#/$defs/field" - } - ] - }, - "typeClass": { - "type": "string" - }, - "multiple": { - "type": "boolean" - }, - "typeName": { - "type": "string" - } - } + if (rawSchemaJson == null) { + try { + rawSchemaJson = JsonUtil.prettyPrint(JsonUtil.getJsonObjectFromFile("doc/sphinx-guides/source/_static/api/dataset-schema.json")); + } catch (IOException e) { + e.printStackTrace(); } - }, - "type": "object", - "properties": { - "datasetVersion": { - "type": "object", - "properties": { - "license": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "uri": { - "type": "string", - "format": "uri" - } - }, - "required": ["name", "uri"] - }, - "metadataBlocks": { - "type": "object", - "properties": { - "citation": { - "type": "object", - "properties": { - "fields": { - "type": "array", - "items": { - "$ref": "#/$defs/field" - }, - "minItems": 5, - "allOf": [ - { - "contains": { - "properties": { - "typeName": { - "const": "title" - } - } - } - }, - { - "contains": { - "properties": { - "typeName": { - "const": "author" - } - } - } - }, - { - "contains": { - "properties": { - "typeName": { - "const": "datasetContact" - } - } - } - }, - { - "contains": { - "properties": { - "typeName": { - "const": "dsDescription" - } - } - } - }, - { - "contains": { - "properties": { - "typeName": { - "const": "subject" - } - } - } - } - ] - } - }, - "required": ["fields"] - } - }, - "required": ["citation"] - } - }, - "required": ["metadataBlocks"] - } - }, - "required": ["datasetVersion"] } - """; + return rawSchemaJson; } + static String jsonInput() { return """ { From 6e41658e099cdda7177c87108a59a105bca6283b Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Mon, 17 Jun 2024 10:52:36 -0400 Subject: [PATCH 09/11] remove hard coded datasetschema.json and load from file --- .../validation/JSONDataValidationTest.java | 34 +++++++++---------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java b/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java index 4ac27b66131..107dcecba35 100644 --- a/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/validation/JSONDataValidationTest.java @@ -3,12 +3,10 @@ import edu.harvard.iq.dataverse.ControlledVocabularyValue; import edu.harvard.iq.dataverse.DatasetFieldServiceBean; import edu.harvard.iq.dataverse.DatasetFieldType; -import edu.harvard.iq.dataverse.util.json.JsonUtil; import org.everit.json.schema.Schema; import org.everit.json.schema.ValidationException; import org.everit.json.schema.loader.SchemaLoader; import org.json.JSONObject; -import org.json.JSONTokener; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.mockito.Mock; @@ -16,6 +14,9 @@ import java.io.IOException; import java.lang.reflect.Field; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -32,16 +33,14 @@ public class JSONDataValidationTest { static ControlledVocabularyValue cvv = new ControlledVocabularyValue(); static Map>> schemaChildMap = new HashMap<>(); - static String rawSchemaJson = null; - static String rawSchema() { - if (rawSchemaJson == null) { - try { - rawSchemaJson = JsonUtil.prettyPrint(JsonUtil.getJsonObjectFromFile("doc/sphinx-guides/source/_static/api/dataset-schema.json")); - } catch (IOException e) { - e.printStackTrace(); - } + static JSONObject rawSchema = null; + static JSONObject rawSchema() throws IOException { + if (rawSchema == null) { + Path file = Path.of("doc/sphinx-guides/source/_static/api/dataset-schema.json"); + String schema = Files.readString(file, StandardCharsets.UTF_8); + rawSchema = new JSONObject(schema); } - return rawSchemaJson; + return rawSchema; } static String jsonInput() { @@ -164,19 +163,18 @@ static void setup() throws NoSuchFieldException, IllegalAccessException { } @Test - public void testSchema() { - JSONObject rawSchema = new JSONObject(new JSONTokener(rawSchema())); - Schema schema = SchemaLoader.load(rawSchema); + public void testSchema() throws IOException { + Schema schema = SchemaLoader.load(rawSchema()); schema.validate(new JSONObject(jsonInput())); } @Test - public void testValid() { - Schema schema = SchemaLoader.load(new JSONObject(rawSchema())); + public void testValid() throws IOException { + Schema schema = SchemaLoader.load(rawSchema()); JSONDataValidation.validate(schema, schemaChildMap, jsonInput()); } @Test - public void testInvalid() { - Schema schema = SchemaLoader.load(new JSONObject(rawSchema())); + public void testInvalid() throws IOException { + Schema schema = SchemaLoader.load(rawSchema()); try { JSONDataValidation.validate(schema, schemaChildMap, jsonInput().replace("\"Social Sciences\"", "\"Social Sciences\",\"Bad\"")); fail(); From 8205f1395e5d6daefc0c78c94fbfd0a54d305c06 Mon Sep 17 00:00:00 2001 From: Guillermo Portas Date: Thu, 15 Aug 2024 14:16:28 +0100 Subject: [PATCH 10/11] Extend the API for dataverse facets retrieval (#10727) * Added: list dataverse facets API endpoint extended to include details * Added: docs for listFacets dataverses API endpoint * Added: API endpoint for getting all facetable field types * Changed: API endpoint route for obtaining facetables dataset fields * Changed: DatasetFieldServiceApi format reset * Added: docs for #10726 * Added: release notes for #10726 * Added: IT test case for listing Dataverse facets --- .../10726-dataverse-facets-api-extension.md | 3 + doc/sphinx-guides/source/api/native-api.rst | 38 ++++++ .../iq/dataverse/api/DatasetFields.java | 29 +++++ .../harvard/iq/dataverse/api/Dataverses.java | 23 ++-- .../iq/dataverse/util/json/JsonPrinter.java | 115 ++++++++++-------- .../iq/dataverse/api/DatasetFieldsIT.java | 29 +++++ .../iq/dataverse/api/DataversesIT.java | 53 ++++++++ .../edu/harvard/iq/dataverse/api/UtilIT.java | 11 +- tests/integration-tests.txt | 2 +- 9 files changed, 244 insertions(+), 59 deletions(-) create mode 100644 doc/release-notes/10726-dataverse-facets-api-extension.md create mode 100644 src/main/java/edu/harvard/iq/dataverse/api/DatasetFields.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/api/DatasetFieldsIT.java diff --git a/doc/release-notes/10726-dataverse-facets-api-extension.md b/doc/release-notes/10726-dataverse-facets-api-extension.md new file mode 100644 index 00000000000..baf6f798e35 --- /dev/null +++ b/doc/release-notes/10726-dataverse-facets-api-extension.md @@ -0,0 +1,3 @@ +New optional query parameter "returnDetails" added to "dataverses/{identifier}/facets/" endpoint to include detailed information of each DataverseFacet. + +New endpoint "datasetfields/facetables" that lists all facetable dataset fields defined in the installation. diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 369b5595f51..97cc1acc5c3 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -224,6 +224,22 @@ The fully expanded example above (without environment variables) looks like this curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/facets" +By default, this endpoint will return an array including the facet names. If more detailed information is needed, we can set the query parameter ``returnDetails`` to ``true``, which will return the display name and id in addition to the name for each facet: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=root + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/facets?returnDetails=true" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/facets?returnDetails=true" + Set Facets for a Dataverse Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -4717,6 +4733,28 @@ The fully expanded example above (without environment variables) looks like this curl "https://demo.dataverse.org/api/metadatablocks/citation" +.. _dataset-fields-api: + +Dataset Fields +-------------- + +List All Facetable Dataset Fields +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +List all facetable dataset fields defined in the installation. + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + + curl "$SERVER_URL/api/datasetfields/facetables" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl "https://demo.dataverse.org/api/datasetfields/facetables" + .. _Notifications: Notifications diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DatasetFields.java b/src/main/java/edu/harvard/iq/dataverse/api/DatasetFields.java new file mode 100644 index 00000000000..2ec35c896d9 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/api/DatasetFields.java @@ -0,0 +1,29 @@ +package edu.harvard.iq.dataverse.api; + +import edu.harvard.iq.dataverse.DatasetFieldServiceBean; +import edu.harvard.iq.dataverse.DatasetFieldType; +import jakarta.ejb.EJB; +import jakarta.ws.rs.*; +import jakarta.ws.rs.core.Response; + +import java.util.List; + +import static edu.harvard.iq.dataverse.util.json.JsonPrinter.jsonDatasetFieldTypes; + +/** + * Api bean for managing dataset fields. + */ +@Path("datasetfields") +@Produces("application/json") +public class DatasetFields extends AbstractApiBean { + + @EJB + DatasetFieldServiceBean datasetFieldService; + + @GET + @Path("facetables") + public Response listAllFacetableDatasetFields() { + List datasetFieldTypes = datasetFieldService.findAllFacetableFieldTypes(); + return ok(jsonDatasetFieldTypes(datasetFieldTypes)); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java index 3ea3c74c4a0..ed2a8db5e06 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java @@ -849,22 +849,29 @@ public Response setMetadataRoot(@Context ContainerRequestContext crc, @PathParam /** * return list of facets for the dataverse with alias `dvIdtf` */ - public Response listFacets(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf) { + public Response listFacets(@Context ContainerRequestContext crc, + @PathParam("identifier") String dvIdtf, + @QueryParam("returnDetails") boolean returnDetails) { try { - User u = getRequestUser(crc); - DataverseRequest r = createDataverseRequest(u); + User user = getRequestUser(crc); + DataverseRequest request = createDataverseRequest(user); Dataverse dataverse = findDataverseOrDie(dvIdtf); - JsonArrayBuilder fs = Json.createArrayBuilder(); - for (DataverseFacet f : execCommand(new ListFacetsCommand(r, dataverse))) { - fs.add(f.getDatasetFieldType().getName()); + List dataverseFacets = execCommand(new ListFacetsCommand(request, dataverse)); + + if (returnDetails) { + return ok(jsonDataverseFacets(dataverseFacets)); + } else { + JsonArrayBuilder facetsBuilder = Json.createArrayBuilder(); + for (DataverseFacet facet : dataverseFacets) { + facetsBuilder.add(facet.getDatasetFieldType().getName()); + } + return ok(facetsBuilder); } - return ok(fs); } catch (WrappedResponse e) { return e.getResponse(); } } - @GET @AuthRequired @Path("{identifier}/featured") diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index c908a4d2bce..0b1643186a5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -79,7 +79,7 @@ public static void injectSettingsService(SettingsServiceBean ssb, DatasetFieldSe } public JsonPrinter() { - + } public static final BriefJsonPrinter brief = new BriefJsonPrinter(); @@ -122,7 +122,7 @@ public static JsonObjectBuilder json(AuthenticatedUser authenticatedUser) { .add("authenticationProviderId", authenticatedUser.getAuthenticatedUserLookup().getAuthenticationProviderId()); return builder; } - + public static JsonObjectBuilder json(RoleAssignment ra) { return jsonObjectBuilder() .add("id", ra.getId()) @@ -147,7 +147,7 @@ public static JsonObjectBuilder json(DatasetLock lock) { .add("dataset", lock.getDataset().getGlobalId().asString()) .add("message", lock.getInfo()); } - + public static JsonObjectBuilder json( RoleAssigneeDisplayInfo d ) { return jsonObjectBuilder() .add("title", d.getTitle()) @@ -171,17 +171,17 @@ public static JsonObjectBuilder json(IpGroup grp) { .add("id", grp.getId() ) .add("name", grp.getDisplayName() ) .add("description", grp.getDescription() ); - + if ( ! singles.isEmpty() ) { bld.add("addresses", asJsonArray(singles) ); } - + if ( ! ranges.isEmpty() ) { JsonArrayBuilder rangesBld = Json.createArrayBuilder(); ranges.forEach( r -> rangesBld.add( Json.createArrayBuilder().add(r.get(0)).add(r.get(1))) ); bld.add("ranges", rangesBld ); } - + return bld; } @@ -192,7 +192,7 @@ public static JsonObjectBuilder json(ShibGroup grp) { .add("pattern", grp.getPattern()) .add("id", grp.getId()); } - + public static JsonObjectBuilder json(MailDomainGroup grp) { JsonObjectBuilder bld = jsonObjectBuilder() .add("alias", grp.getPersistedGroupAlias() ) @@ -235,14 +235,14 @@ public static JsonObjectBuilder json(DataverseRole role) { return bld; } - + public static JsonObjectBuilder json(Workflow wf){ JsonObjectBuilder bld = jsonObjectBuilder(); bld.add("name", wf.getName()); if ( wf.getId() != null ) { bld.add("id", wf.getId()); } - + if ( wf.getSteps()!=null && !wf.getSteps().isEmpty()) { JsonArrayBuilder arr = Json.createArrayBuilder(); for ( WorkflowStepData stp : wf.getSteps() ) { @@ -253,10 +253,10 @@ public static JsonObjectBuilder json(Workflow wf){ } bld.add("steps", arr ); } - + return bld; } - + public static JsonObjectBuilder json(Dataverse dv) { return json(dv, false, false); } @@ -268,7 +268,7 @@ public static JsonObjectBuilder json(Dataverse dv, Boolean hideEmail, Boolean re .add("alias", dv.getAlias()) .add("name", dv.getName()) .add("affiliation", dv.getAffiliation()); - if(!hideEmail) { + if(!hideEmail) { bld.add("dataverseContacts", JsonPrinter.json(dv.getDataverseContacts())); } if (returnOwners){ @@ -312,11 +312,11 @@ public static JsonArrayBuilder json(List dataverseContacts) { } return jsonArrayOfContacts; } - + public static JsonObjectBuilder getOwnersFromDvObject(DvObject dvObject){ return getOwnersFromDvObject(dvObject, null); } - + public static JsonObjectBuilder getOwnersFromDvObject(DvObject dvObject, DatasetVersion dsv) { List ownerList = new ArrayList(); dvObject = dvObject.getOwner(); // We're going to ignore the object itself @@ -324,7 +324,7 @@ public static JsonObjectBuilder getOwnersFromDvObject(DvObject dvObject, Dataset while (dvObject != null) { ownerList.add(0, dvObject); dvObject = dvObject.getOwner(); - } + } //then work "inside out" JsonObjectBuilder saved = null; for (DvObject dvo : ownerList) { @@ -332,7 +332,7 @@ public static JsonObjectBuilder getOwnersFromDvObject(DvObject dvObject, Dataset } return saved; } - + private static JsonObjectBuilder addEmbeddedOwnerObject(DvObject dvo, JsonObjectBuilder isPartOf, DatasetVersion dsv ) { JsonObjectBuilder ownerObject = jsonObjectBuilder(); @@ -353,16 +353,16 @@ private static JsonObjectBuilder addEmbeddedOwnerObject(DvObject dvo, JsonObject ownerObject.add("version", versionString); } } - + ownerObject.add("displayName", dvo.getDisplayName()); - + if (isPartOf != null) { ownerObject.add("isPartOf", isPartOf); } - + return ownerObject; } - + public static JsonObjectBuilder json( DataverseTheme theme ) { final NullSafeJsonBuilder baseObject = jsonObjectBuilder() .add("id", theme.getId() ) @@ -385,7 +385,7 @@ public static JsonObjectBuilder json(BuiltinUser user) { .add("id", user.getId()) .add("userName", user.getUserName()); } - + public static JsonObjectBuilder json(Dataset ds){ return json(ds, false); } @@ -421,7 +421,7 @@ public static JsonObjectBuilder json(DatasetVersion dsv, boolean includeFiles) { return json(dsv, null, includeFiles, false); } - public static JsonObjectBuilder json(DatasetVersion dsv, List anonymizedFieldTypeNamesList, + public static JsonObjectBuilder json(DatasetVersion dsv, List anonymizedFieldTypeNamesList, boolean includeFiles, boolean returnOwners) { Dataset dataset = dsv.getDataset(); JsonObjectBuilder bld = jsonObjectBuilder() @@ -471,7 +471,7 @@ public static JsonObjectBuilder json(DatasetVersion dsv, List anonymized bld.add("metadataBlocks", (anonymizedFieldTypeNamesList != null) ? jsonByBlocks(dsv.getDatasetFields(), anonymizedFieldTypeNamesList) : jsonByBlocks(dsv.getDatasetFields()) - ); + ); if(returnOwners){ bld.add("isPartOf", getOwnersFromDvObject(dataset)); } @@ -483,19 +483,19 @@ public static JsonObjectBuilder json(DatasetVersion dsv, List anonymized } public static JsonObjectBuilder jsonDataFileList(List dataFiles){ - + if (dataFiles==null){ throw new NullPointerException("dataFiles cannot be null"); } - + JsonObjectBuilder bld = jsonObjectBuilder(); - - + + List dataFileList = dataFiles.stream() .map(x -> x.getFileMetadata()) .collect(Collectors.toList()); - + bld.add("files", jsonFileMetadatas(dataFileList)); return bld; @@ -584,7 +584,7 @@ public static JsonObjectBuilder json(MetadataBlock block, List fie blockBld.add("displayName", block.getDisplayName()); blockBld.add("name", block.getName()); - + final JsonArrayBuilder fieldsArray = Json.createArrayBuilder(); Map cvocMap = (datasetFieldService==null) ? new HashMap() :datasetFieldService.getCVocConf(true); DatasetFieldWalker.walk(fields, settingsService, cvocMap, new DatasetFieldsToJson(fieldsArray, anonymizedFieldTypeNamesList)); @@ -663,6 +663,14 @@ public static JsonObjectBuilder json(MetadataBlock metadataBlock, boolean printO return jsonObjectBuilder; } + public static JsonArrayBuilder jsonDatasetFieldTypes(List fields) { + JsonArrayBuilder fieldsJson = Json.createArrayBuilder(); + for (DatasetFieldType field : fields) { + fieldsJson.add(JsonPrinter.json(field)); + } + return fieldsJson; + } + public static JsonObjectBuilder json(DatasetFieldType fld) { return json(fld, null); } @@ -705,7 +713,7 @@ public static JsonObjectBuilder json(DatasetFieldType fld, Dataverse ownerDatave return fieldsBld; } - + public static JsonObjectBuilder json(FileMetadata fmd){ return json(fmd, false, false); } @@ -751,11 +759,11 @@ public static JsonObjectBuilder json(AuxiliaryFile auxFile) { public static JsonObjectBuilder json(DataFile df) { return JsonPrinter.json(df, null, false); } - + public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boolean forExportDataProvider){ return json(df, fileMetadata, forExportDataProvider, false); } - + public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boolean forExportDataProvider, boolean returnOwners) { // File names are no longer stored in the DataFile entity; // (they are instead in the FileMetadata (as "labels") - this way @@ -766,13 +774,13 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boo // *correct* file name - i.e., that it comes from the right version. // (TODO...? L.A. 4.5, Aug 7 2016) String fileName = null; - + if (fileMetadata == null){ // Note that this may not necessarily grab the file metadata from the // version *you want*! (L.A.) fileMetadata = df.getFileMetadata(); } - + fileName = fileMetadata.getLabel(); GlobalId filePid = df.getGlobalId(); String pidURL = (filePid!=null)? filePid.asURL(): null; @@ -839,7 +847,7 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boo } return builder; } - + //Started from https://github.com/RENCI-NRIG/dataverse/, i.e. https://github.com/RENCI-NRIG/dataverse/commit/2b5a1225b42cf1caba85e18abfeb952171c6754a public static JsonArrayBuilder jsonDT(List ldt) { JsonArrayBuilder ldtArr = Json.createArrayBuilder(); @@ -880,8 +888,8 @@ public static JsonObjectBuilder json(DataVariable dv) { .add("variableFormatType", dv.getType().name()) // varFormat .add("formatCategory", dv.getFormatCategory()) .add("format", dv.getFormat()) - .add("isOrderedCategorical", dv.isOrderedCategorical()) - .add("fileOrder", dv.getFileOrder()) + .add("isOrderedCategorical", dv.isOrderedCategorical()) + .add("fileOrder", dv.getFileOrder()) .add("UNF",dv.getUnf()) .add("fileStartPosition", dv.getFileStartPosition()) .add("fileEndPosition", dv.getFileEndPosition()) @@ -909,7 +917,7 @@ private static JsonArrayBuilder jsonInvalidRanges(Collection inva .add("hasEndValueType", vr.getEndValueType()!=null) .add("endValueTypeMax", vr.isEndValueTypeMax()) .add("endValueTypeMaxExcl", vr.isEndValueTypeMaxExcl()); - + invRanges.add(job); } return invRanges; @@ -941,7 +949,7 @@ private static JsonArrayBuilder jsonCatStat(Collection catStat } return catArr; } - + private static JsonArrayBuilder jsonVarGroup(List varGroups) { JsonArrayBuilder vgArr = Json.createArrayBuilder(); for (VarGroup vg : varGroups) { @@ -955,7 +963,7 @@ private static JsonArrayBuilder jsonVarGroup(List varGroups) { } return vgArr; } - + private static JsonArrayBuilder jsonVarMetadata(Collection varMetadatas) { JsonArrayBuilder vmArr = Json.createArrayBuilder(); for (VariableMetadata vm : varMetadatas) { @@ -976,7 +984,7 @@ private static JsonArrayBuilder jsonVarMetadata(Collection var } return vmArr; } - + private static JsonArrayBuilder json(Collection categoriesMetadata) { JsonArrayBuilder cmArr = Json.createArrayBuilder(); for(CategoryMetadata cm: categoriesMetadata) { @@ -990,9 +998,9 @@ private static JsonArrayBuilder json(Collection categoriesMeta public static JsonObjectBuilder json(HarvestingClient harvestingClient) { if (harvestingClient == null) { - return null; + return null; } - + return jsonObjectBuilder().add("nickName", harvestingClient.getName()). add("dataverseAlias", harvestingClient.getDataverse().getAlias()). add("type", harvestingClient.getHarvestType()). @@ -1014,7 +1022,7 @@ public static JsonObjectBuilder json(HarvestingClient harvestingClient) { add("lastDatasetsDeleted", harvestingClient.getLastDeletedDatasetCount()). // == null ? "N/A" : harvestingClient.getLastDeletedDatasetCount().toString()). add("lastDatasetsFailed", harvestingClient.getLastFailedDatasetCount()); // == null ? "N/A" : harvestingClient.getLastFailedDatasetCount().toString()); } - + public static String format(Date d) { return (d == null) ? null : Util.getDateTimeFormat().format(d); } @@ -1051,7 +1059,7 @@ public static JsonArrayBuilder getTabularFileTags(DataFile df) { } return tabularTags; } - + private static class DatasetFieldsToJson implements DatasetFieldWalker.Listener { Deque objectStack = new LinkedList<>(); @@ -1187,11 +1195,20 @@ public static JsonObjectBuilder json( ExplicitGroup eg ) { .add("displayName", eg.getDisplayName()) .add("containedRoleAssignees", ras); } - - public static JsonObjectBuilder json( DataverseFacet aFacet ) { + + public static JsonArrayBuilder jsonDataverseFacets(List dataverseFacets) { + JsonArrayBuilder dataverseFacetsJson = Json.createArrayBuilder(); + for(DataverseFacet facet: dataverseFacets) { + dataverseFacetsJson.add(json(facet)); + } + return dataverseFacetsJson; + } + + public static JsonObjectBuilder json(DataverseFacet aFacet) { return jsonObjectBuilder() .add("id", String.valueOf(aFacet.getId())) // TODO should just be id I think - .add("name", aFacet.getDatasetFieldType().getDisplayName()); + .add("displayName", aFacet.getDatasetFieldType().getDisplayName()) + .add("name", aFacet.getDatasetFieldType().getName()); } public static JsonObjectBuilder json(Embargo embargo) { @@ -1329,7 +1346,7 @@ public static JsonObjectBuilder getChecksumTypeAndValue(DataFile.ChecksumType ch return null; } } - + /** * Takes a map, returns a Json object for this map. * If map is {@code null}, returns {@code null}. diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetFieldsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetFieldsIT.java new file mode 100644 index 00000000000..ae90ddf0b4c --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetFieldsIT.java @@ -0,0 +1,29 @@ +package edu.harvard.iq.dataverse.api; + +import io.restassured.RestAssured; +import io.restassured.response.Response; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import static jakarta.ws.rs.core.Response.Status.OK; +import static org.hamcrest.CoreMatchers.equalTo; + +public class DatasetFieldsIT { + + @BeforeAll + public static void setUpClass() { + RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); + } + + @Test + void testListAllFacetableDatasetFields() { + Response listAllFacetableDatasetFieldsResponse = UtilIT.listAllFacetableDatasetFields(); + listAllFacetableDatasetFieldsResponse.then().assertThat().statusCode(OK.getStatusCode()); + int expectedNumberOfFacetableDatasetFields = 59; + listAllFacetableDatasetFieldsResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data[0].name", equalTo("authorName")) + .body("data[0].displayName", equalTo("Author Name")) + .body("data.size()", equalTo(expectedNumberOfFacetableDatasetFields)); + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java index c0b762df2ab..1b7440465ec 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java @@ -1046,6 +1046,59 @@ public void testAddDataverse() { .body("message", equalTo("Invalid metadata block name: \"" + invalidMetadataBlockName + "\"")); } + @Test + public void testListFacets() { + Response createUserResponse = UtilIT.createRandomUser(); + String apiToken = UtilIT.getApiTokenFromResponse(createUserResponse); + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + String[] expectedFacetNames = {"authorName", "subject", "keywordValue", "dateOfDeposit"}; + + // returnDetails is false + Response listFacetsResponse = UtilIT.listDataverseFacets(dataverseAlias, false, apiToken); + listFacetsResponse.then().assertThat().statusCode(OK.getStatusCode()); + String actualFacetName = listFacetsResponse.then().extract().path("data[0]"); + assertThat(expectedFacetNames, hasItemInArray(actualFacetName)); + + // returnDetails is true + String[] expectedDisplayNames = {"Author Name", "Subject", "Keyword Term", "Deposit Date"}; + listFacetsResponse = UtilIT.listDataverseFacets(dataverseAlias, true, apiToken); + listFacetsResponse.then().assertThat().statusCode(OK.getStatusCode()); + actualFacetName = listFacetsResponse.then().extract().path("data[0].name"); + assertThat(expectedFacetNames, hasItemInArray(actualFacetName)); + String actualDisplayName = listFacetsResponse.then().extract().path("data[0].displayName"); + assertThat(expectedDisplayNames, hasItemInArray(actualDisplayName)); + String actualId = listFacetsResponse.then().extract().path("data[0].id"); + assertNotNull(actualId); + + // Dataverse with custom facets + String dataverseWithCustomFacetsAlias = UtilIT.getRandomDvAlias() + "customFacets"; + + String[] testFacetNames = {"authorName", "authorAffiliation"}; + String[] testMetadataBlockNames = {"citation", "geospatial"}; + + Response createSubDataverseResponse = UtilIT.createSubDataverse(dataverseWithCustomFacetsAlias, null, apiToken, "root", null, testFacetNames, testMetadataBlockNames); + createSubDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + + listFacetsResponse = UtilIT.listDataverseFacets(dataverseWithCustomFacetsAlias, true, apiToken); + listFacetsResponse.then().assertThat().statusCode(OK.getStatusCode()); + + String actualFacetName1 = listFacetsResponse.then().extract().path("data[0].name"); + String actualFacetName2 = listFacetsResponse.then().extract().path("data[1].name"); + assertNotEquals(actualFacetName1, actualFacetName2); + assertThat(testFacetNames, hasItemInArray(actualFacetName1)); + assertThat(testFacetNames, hasItemInArray(actualFacetName2)); + + String[] testFacetExpectedDisplayNames = {"Author Name", "Author Affiliation"}; + String actualFacetDisplayName1 = listFacetsResponse.then().extract().path("data[0].displayName"); + String actualFacetDisplayName2 = listFacetsResponse.then().extract().path("data[1].displayName"); + assertNotEquals(actualFacetDisplayName1, actualFacetDisplayName2); + assertThat(testFacetExpectedDisplayNames, hasItemInArray(actualFacetDisplayName1)); + assertThat(testFacetExpectedDisplayNames, hasItemInArray(actualFacetDisplayName2)); + } + @Test public void testGetUserPermissionsOnDataverse() { Response createUserResponse = UtilIT.createRandomUser(); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 5a5d5e1c29b..8f1fcdf57eb 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -4031,9 +4031,13 @@ public static Response getOpenAPI(String accept, String format) { } static Response listDataverseFacets(String dataverseAlias, String apiToken) { + return listDataverseFacets(dataverseAlias, false, apiToken); + } + + static Response listDataverseFacets(String dataverseAlias, boolean returnDetails, String apiToken) { return given() .header(API_TOKEN_HTTP_HEADER, apiToken) - .contentType("application/json") + .queryParam("returnDetails", returnDetails) .get("/api/dataverses/" + dataverseAlias + "/facets"); } @@ -4043,4 +4047,9 @@ static Response listDataverseInputLevels(String dataverseAlias, String apiToken) .contentType("application/json") .get("/api/dataverses/" + dataverseAlias + "/inputLevels"); } + + static Response listAllFacetableDatasetFields() { + return given() + .get("/api/datasetfields/facetables"); + } } diff --git a/tests/integration-tests.txt b/tests/integration-tests.txt index 44bbfdcceb7..64610d07e50 100644 --- a/tests/integration-tests.txt +++ b/tests/integration-tests.txt @@ -1 +1 @@ -DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT,SignpostingIT,FitsIT,LogoutIT,DataRetrieverApiIT,ProvIT,S3AccessIT,OpenApiIT,InfoIT +DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT,SignpostingIT,FitsIT,LogoutIT,DataRetrieverApiIT,ProvIT,S3AccessIT,OpenApiIT,InfoIT,DatasetFieldsIT From cf174b218b79073bb52ddf2e3d8eb557662403e4 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 15 Aug 2024 14:49:43 -0400 Subject: [PATCH 11/11] IQSS/7068 Reserve File Pids (#7334) * file pid reservation * add file pid reservation step to publish (analogous to dataset pid register if needed) Conflicts: src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java src/main/java/propertyFiles/Bundle.properties * comment change * check if file PIDs used once, use constants - per comments * adding release note * release notes, API doc update * reflecting datasets and files for the PID endpoint * removing release note about pre-reg for file PIDs as this is not supported * file pid pre-reservation Conflicts: src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java src/main/java/propertyFiles/Bundle.properties * avoid problem when GlobalIDServiceBean implicitly merges @kcondon sees a DB error persisting a file with null createtime during the GlobalIDServiceBean.getBean call which uses a set namedQuery to find the :DoiProvider. Create times for files are set above, but not merged prior to calling registerFilePidsIfNeeded. Assuming the namedQuery is forcing the file (without a merge) to persist which triggers the error. In #7337, the code is reworked so there is a merge prior to registerFilePidsIfNeeded. This commit adds one temporarily so this PR works indepdently of the other. * update theDataset * noting that PID reservation can cause old timeouts to be too short * more specifics * release note update * cleanup reformatting * further cleanup * set createTime earlier --------- Co-authored-by: Danny Brooke --- doc/release-notes/7068-reserve-file-pids.md | 9 +++ doc/sphinx-guides/source/api/native-api.rst | 2 +- .../iq/dataverse/DataFileServiceBean.java | 8 -- .../command/impl/AbstractDatasetCommand.java | 80 ++++++++++++++++++- .../FinalizeDatasetPublicationCommand.java | 22 ++--- .../command/impl/PublishDatasetCommand.java | 7 -- .../command/impl/ReservePidCommand.java | 30 ++----- .../impl/UpdateDatasetVersionCommand.java | 3 +- .../iq/dataverse/util/SystemConfig.java | 4 +- src/main/java/propertyFiles/Bundle.properties | 7 +- 10 files changed, 110 insertions(+), 62 deletions(-) create mode 100644 doc/release-notes/7068-reserve-file-pids.md diff --git a/doc/release-notes/7068-reserve-file-pids.md b/doc/release-notes/7068-reserve-file-pids.md new file mode 100644 index 00000000000..182a0d7f67b --- /dev/null +++ b/doc/release-notes/7068-reserve-file-pids.md @@ -0,0 +1,9 @@ +## Release Highlights + +### Pre-Publish File DOI Reservation with DataCite + +Dataverse installations using DataCite (or other persistent identifier (PID) Providers that support reserving PIDs) will be able to reserve PIDs for files when they are uploaded (rather than at publication time). Note that reserving file DOIs can slow uploads with large numbers of files so administrators may need to adjust timeouts (specifically any Apache "``ProxyPass / ajp://localhost:8009/ timeout=``" setting in the recommended Dataverse configuration). + +## Major Use Cases + +- Users will have DOIs/PIDs reserved for their files as part of file upload instead of at publication time. (Issue #7068, PR #7334) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 97cc1acc5c3..bf790c47778 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -5171,7 +5171,7 @@ The fully expanded example above (without environment variables) looks like this Reserve a PID ~~~~~~~~~~~~~ -Reserved a PID for a dataset. A superuser API token is required. +Reserve a PID for a dataset if not yet registered, and, if FilePIDs are enabled, reserve any file PIDs that are not yet registered. A superuser API token is required. .. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of export below. diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 21f925f8981..9331ec67d12 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1248,14 +1248,6 @@ public List selectFilesWithMissingOriginalSizes() { } - /** - * Check that a identifier entered by the user is unique (not currently used - * for any other study in this Dataverse Network). Also check for duplicate - * in the remote PID service if needed - * @param datafileId - * @param storageLocation - * @return {@code true} iff the global identifier is unique. - */ public void finalizeFileDelete(Long dataFileId, String storageLocation) throws IOException { // Verify that the DataFile no longer exists: if (find(dataFileId) != null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java index 1a1f4f9318b..bd38245d334 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.engine.command.impl; +import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetField; import edu.harvard.iq.dataverse.DatasetFieldServiceBean; @@ -18,9 +19,11 @@ import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import edu.harvard.iq.dataverse.pidproviders.PidProvider; import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.pidproviders.doi.fake.FakeDOIProvider; import edu.harvard.iq.dataverse.util.BundleUtil; import java.sql.Timestamp; +import java.util.Arrays; import java.util.Date; import java.util.Set; import java.util.logging.Level; @@ -169,13 +172,12 @@ protected void registerExternalIdentifier(Dataset theDataset, CommandContext ctx } while (pidProvider.alreadyRegistered(theDataset) && attempts <= FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT); } if(!retry) { - logger.warning("Reserving PID for: " + getDataset().getId() + " during publication failed."); - throw new IllegalCommandException(BundleUtil.getStringFromBundle("publishDatasetCommand.pidNotReserved"), this); + logger.warning("Reserving PID for: " + getDataset().getId() + " failed."); + throw new CommandExecutionException(BundleUtil.getStringFromBundle("abstractDatasetCommand.pidNotReserved", Arrays.asList(theDataset.getIdentifier())), this); } if(attempts > FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT) { //Didn't work - we existed the loop with too many tries - throw new CommandExecutionException("This dataset may not be published because its identifier is already in use by another dataset; " - + "gave up after " + attempts + " attempts. Current (last requested) identifier: " + theDataset.getIdentifier(), this); + throw new CommandExecutionException(BundleUtil.getStringFromBundle("abstractDatasetCommand.pidReservationRetryExceeded", Arrays.asList(Integer.toString(attempts), theDataset.getIdentifier())), this); } } // Invariant: Dataset identifier does not exist in the remote registry @@ -188,6 +190,9 @@ protected void registerExternalIdentifier(Dataset theDataset, CommandContext ctx } } catch (Throwable e) { + if (e instanceof CommandException) { + throw (CommandException) e; + } throw new CommandException(BundleUtil.getStringFromBundle("dataset.publish.error", pidProvider.getProviderInformation()), this); } } else { @@ -217,6 +222,73 @@ protected Timestamp getTimestamp() { return timestamp; } + protected void registerFilePidsIfNeeded(Dataset theDataset, CommandContext ctxt, boolean b) throws CommandException { + // Register file PIDs if needed + PidProvider pidGenerator = ctxt.dvObjects().getEffectivePidGenerator(getDataset()); + boolean shouldRegister = !pidGenerator.registerWhenPublished() && + ctxt.systemConfig().isFilePIDsEnabledForCollection(getDataset().getOwner()) && + pidGenerator.canCreatePidsLike(getDataset().getGlobalId()); + if (shouldRegister) { + for (DataFile dataFile : theDataset.getFiles()) { + logger.fine(dataFile.getId() + " is registered?: " + dataFile.isIdentifierRegistered()); + if (!dataFile.isIdentifierRegistered()) { + // pre-register a persistent id + registerFileExternalIdentifier(dataFile, pidGenerator, ctxt, true); + } + } + } + } + + private void registerFileExternalIdentifier(DataFile dataFile, PidProvider pidProvider, CommandContext ctxt, boolean retry) throws CommandException { + + if (!dataFile.isIdentifierRegistered()) { + + if (pidProvider instanceof FakeDOIProvider) { + retry = false; // No reason to allow a retry with the FakeProvider (even if it allows + // pre-registration someday), so set false for efficiency + } + try { + if (pidProvider.alreadyRegistered(dataFile)) { + int attempts = 0; + if (retry) { + do { + pidProvider.generatePid(dataFile); + logger.log(Level.INFO, "Attempting to register external identifier for datafile {0} (trying: {1}).", + new Object[] { dataFile.getId(), dataFile.getIdentifier() }); + attempts++; + } while (pidProvider.alreadyRegistered(dataFile) && attempts <= FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT); + } + if (!retry) { + logger.warning("Reserving File PID for: " + getDataset().getId() + ", fileId: " + dataFile.getId() + ", during publication failed."); + throw new CommandExecutionException(BundleUtil.getStringFromBundle("abstractDatasetCommand.filePidNotReserved", Arrays.asList(getDataset().getIdentifier())), this); + } + if (attempts > FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT) { + // Didn't work - we existed the loop with too many tries + throw new CommandExecutionException("This dataset may not be published because its identifier is already in use by another dataset; " + + "gave up after " + attempts + " attempts. Current (last requested) identifier: " + dataFile.getIdentifier(), this); + } + } + // Invariant: DataFile identifier does not exist in the remote registry + try { + pidProvider.createIdentifier(dataFile); + dataFile.setGlobalIdCreateTime(getTimestamp()); + dataFile.setIdentifierRegistered(true); + } catch (Throwable ex) { + logger.info("Call to globalIdServiceBean.createIdentifier failed: " + ex); + } + + } catch (Throwable e) { + if (e instanceof CommandException) { + throw (CommandException) e; + } + throw new CommandException(BundleUtil.getStringFromBundle("file.register.error", pidProvider.getProviderInformation()), this); + } + } else { + throw new IllegalCommandException("This datafile may not have a PID because its id registry service is not supported.", this); + } + + } + protected void checkSystemMetadataKeyIfNeeded(DatasetVersion newVersion, DatasetVersion persistedVersion) throws IllegalCommandException { Set changedMDBs = DatasetVersionDifference.getBlocksWithChanges(newVersion, persistedVersion); for (MetadataBlock mdb : changedMDBs) { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java index 287e877f6e0..69ebe6feed8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java @@ -102,13 +102,13 @@ public Dataset execute(CommandContext ctxt) throws CommandException { try { // This can potentially throw a CommandException, so let's make // sure we exit cleanly: - - registerExternalIdentifier(theDataset, ctxt, false); + registerExternalIdentifier(theDataset, ctxt, false); + registerFilePidsIfNeeded(theDataset, ctxt, false); } catch (CommandException comEx) { - logger.warning("Failed to reserve the identifier "+theDataset.getGlobalId().asString()+"; notifying the user(s), unlocking the dataset"); - // Send failure notification to the user: + logger.warning("Failed to reserve the identifier " + theDataset.getGlobalId().asString() + "; notifying the user(s), unlocking the dataset"); + // Send failure notification to the user: notifyUsersDatasetPublishStatus(ctxt, theDataset, UserNotification.Type.PUBLISHFAILED_PIDREG); - // Remove the dataset lock: + // Remove the dataset lock: ctxt.datasets().removeDatasetLocks(theDataset, DatasetLock.Reason.finalizePublication); // re-throw the exception: throw comEx; @@ -395,8 +395,7 @@ private void publicizeExternalIdentifier(Dataset dataset, CommandContext ctxt) t // we can't get "dependent" DOIs assigned to files in a dataset // with the registered id that is a handle; or even a DOI, but in // an authority that's different from what's currently configured. - // Additionaly in 4.9.3 we have added a system variable to disable - // registering file PIDs on the installation level. + // File PIDs may be enabled/disabled per collection. boolean registerGlobalIdsForFiles = ctxt.systemConfig().isFilePIDsEnabledForCollection( getDataset().getOwner()) && pidProvider.canCreatePidsLike(dataset.getGlobalId()); @@ -422,8 +421,8 @@ private void publicizeExternalIdentifier(Dataset dataset, CommandContext ctxt) t // pidProvider. dataset.setIdentifierRegistered(true); } catch (Throwable e) { - logger.warning("Failed to register the identifier " + dataset.getGlobalId().asString() - + ", or to register a file in the dataset; notifying the user(s), unlocking the dataset"); + logger.warning("Failed to publicize the identifier " + dataset.getGlobalId().asString() + + ", or to publicize a file in the dataset; notifying the user(s), unlocking the dataset"); // Send failure notification to the user: notifyUsersDatasetPublishStatus(ctxt, dataset, UserNotification.Type.PUBLISHFAILED_PIDREG); @@ -440,8 +439,9 @@ private void updateFiles(Timestamp updateTime, CommandContext ctxt) throws Comma if (dataFile.getPublicationDate() == null) { // this is a new, previously unpublished file, so publish by setting date dataFile.setPublicationDate(updateTime); - - // check if any prexisting roleassignments have file download and send notifications + + // check if any pre-existing role assignments have file download and send + // notifications notifyUsersFileDownload(ctxt, dataFile); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java index 6b95f3b6de1..1ac41105237 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java @@ -4,20 +4,13 @@ import edu.harvard.iq.dataverse.DatasetLock; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; -import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; -import edu.harvard.iq.dataverse.pidproviders.PidProvider; -import edu.harvard.iq.dataverse.privateurl.PrivateUrl; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; -import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.workflow.Workflow; import edu.harvard.iq.dataverse.workflow.WorkflowContext.TriggerType; -import java.util.Date; -import java.util.List; import java.util.Optional; import java.util.logging.Logger; import static java.util.stream.Collectors.joining; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReservePidCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReservePidCommand.java index b7e3ddd8ce6..77b06e4e152 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReservePidCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReservePidCommand.java @@ -3,27 +3,21 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; -import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; -import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; -import edu.harvard.iq.dataverse.pidproviders.PidProvider; -import edu.harvard.iq.dataverse.pidproviders.PidUtil; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; -import java.util.Arrays; import java.util.Collections; -import java.util.Date; import java.util.logging.Logger; /** * No required permissions because we check for superuser status. + * @param */ @RequiredPermissions({}) -public class ReservePidCommand extends AbstractVoidCommand { +public class ReservePidCommand extends AbstractDatasetCommand { private static final Logger logger = Logger.getLogger(ReservePidCommand.class.getCanonicalName()); @@ -35,27 +29,15 @@ public ReservePidCommand(DataverseRequest request, Dataset dataset) { } @Override - protected void executeImpl(CommandContext ctxt) throws CommandException { + public Dataset execute(CommandContext ctxt) throws CommandException { if (!(getUser() instanceof AuthenticatedUser) || !getUser().isSuperuser()) { throw new PermissionException(BundleUtil.getStringFromBundle("admin.api.auth.mustBeSuperUser"), this, Collections.singleton(Permission.EditDataset), dataset); } - - PidProvider pidProvider = ctxt.dvObjects().getEffectivePidGenerator(dataset); - - try { - String returnString = pidProvider.createIdentifier(dataset); - logger.fine(returnString); - // No errors caught, so mark PID as reserved. - dataset.setGlobalIdCreateTime(new Date()); - // We don't setIdentifierRegistered(true) yet. - ctxt.datasets().merge(dataset); - } catch (Throwable ex) { - String message = BundleUtil.getStringFromBundle("pids.commands.reservePid.failure", Arrays.asList(dataset.getId().toString(), ex.getLocalizedMessage())); - logger.info(message); - throw new IllegalCommandException(message, this); - } + registerExternalIdentifier(getDataset(), ctxt, true); + registerFilePidsIfNeeded(getDataset(), ctxt, true); + return dataset; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java index 994f4c7dfb6..768bb88fd43 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java @@ -154,7 +154,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { throw e; } } - + //Set creator and create date for files if needed for (DataFile dataFile : theDataset.getFiles()) { if (dataFile.getCreateDate() == null) { dataFile.setCreateDate(getTimestamp()); @@ -259,6 +259,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { for(FileMetadata fmd: theDataset.getOrCreateEditVersion().getFileMetadatas()) { logger.fine("FMD: " + fmd.getId() + " for file: " + fmd.getDataFile().getId() + "is in final draft version"); } + registerFilePidsIfNeeded(theDataset, ctxt, true); if (recalculateUNF) { ctxt.ingest().recalculateDatasetVersionUNF(theDataset.getOrCreateEditVersion()); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index f9801419e47..5cc28e4b225 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -986,7 +986,7 @@ public boolean isFilePIDsEnabledForCollection(Dataverse collection) { Dataverse thisCollection = collection; // If neither enabled nor disabled specifically for this collection, - // the parent collection setting is inhereted (recursively): + // the parent collection setting is inherited (recursively): while (thisCollection.getFilePIDsEnabled() == null) { if (thisCollection.getOwner() == null) { // We've reached the root collection, and file PIDs registration @@ -1002,8 +1002,6 @@ public boolean isFilePIDsEnabledForCollection(Dataverse collection) { // takes precedent: return thisCollection.getFilePIDsEnabled(); } - - public String getMDCLogPath() { String mDCLogPath = settingsService.getValueForKey(SettingsServiceBean.Key.MDCLogPath, null); diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index edf0206ab5d..a092b0f456b 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -3008,12 +3008,13 @@ pids.api.reservePid.success=PID reserved for {0} pids.api.deletePid.success=PID deleted for {0} pids.deletePid.failureExpected=Unable to delete PID {0}. Status code: {1}. pids.deletePid.failureOther=Problem deleting PID {0}: {1} -pids.commands.reservePid.failure=Problem reserving PID for dataset id {0}: {1}. pids.datacite.errors.noResponseCode=Problem getting HTTP status code from {0}. Is it in DNS? Is doi.dataciterestapiurlstring configured properly? pids.datacite.errors.DoiOnly=Only doi: is supported. -#PublishDatasetCommand -publishDatasetCommand.pidNotReserved=Cannot publish dataset because its persistent identifier has not been reserved. +#AbstractDatasetCommand +abstractDatasetCommand.pidNotReserved=Unable to reserve a persistent identifier for the dataset: {0}. +abstractDatasetCommand.filePidNotReserved=Unable to reserve a persistent identifier for one or more files in the dataset: {0}. +abstractDatasetCommand.pidReservationRetryExceeded="This dataset may not be registered because its identifier is already in use by another dataset: gave up after {0} attempts. Current (last requested) identifier: {1}" # APIs api.errors.invalidApiToken=Invalid API token.