From 385174065ed4b095bbcc6ab36873c9d036bf56b7 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 31 Jan 2019 18:31:18 -0500 Subject: [PATCH 01/85] IQSS-5505 --- pom.xml | 5 +++ .../AbstractGlobalIdServiceBean.java | 4 +++ .../dataverse/DOIDataCiteRegisterService.java | 35 +++++++++++++++++++ .../iq/dataverse/GlobalIdServiceBean.java | 2 ++ .../harvard/iq/dataverse/api/Datasets.java | 12 +++++-- .../UpdateDvObjectPIDMetadataCommand.java | 4 +-- 6 files changed, 58 insertions(+), 4 deletions(-) diff --git a/pom.xml b/pom.xml index 380fbbbe402..224ab957fd7 100644 --- a/pom.xml +++ b/pom.xml @@ -585,6 +585,11 @@ tika-parsers 1.19 + + org.xmlunit + xmlunit-core + 2.6.2 + diff --git a/src/main/java/edu/harvard/iq/dataverse/AbstractGlobalIdServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AbstractGlobalIdServiceBean.java index 98310a136b5..4e632dce27f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AbstractGlobalIdServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/AbstractGlobalIdServiceBean.java @@ -428,5 +428,9 @@ public String getMetadataFromDvObject(String identifier, Map met logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata); return xmlMetadata; } + + public boolean updateIdentifier(DvObject dvObject) { + return publicizeIdentifier(dvObject); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java index 50f92f81fb5..99cf4f3694d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java +++ b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java @@ -23,10 +23,17 @@ import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; import javax.persistence.TypedQuery; +import javax.xml.transform.Source; + import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; +import org.xmlunit.builder.DiffBuilder; +import org.xmlunit.builder.Input; +import org.xmlunit.builder.Input.Builder; +import org.xmlunit.diff.Diff; +import org.xmlunit.diff.Difference; /** * @@ -114,6 +121,34 @@ public String registerIdentifier(String identifier, Map metadata } return retString; } + + public String reRegisterIdentifier(String identifier, Map metadata, DvObject dvObject) throws IOException { + String retString = ""; + String numericIdentifier = identifier.substring(identifier.indexOf(":") + 1); + String xmlMetadata = getMetadataFromDvObject(identifier, metadata, dvObject); + String target = metadata.get("_target"); + DataCiteRESTfullClient client = getClient(); + String currentMetadata = client.getMetadata(numericIdentifier); + Diff myDiff = DiffBuilder.compare(xmlMetadata) + .withTest(currentMetadata).ignoreWhitespace().checkForSimilar() + .build(); + + if (myDiff.hasDifferences()) { + for(Difference d : myDiff.getDifferences()) { + + logger.fine(d.toString()); + } + retString = "metadata:\\r" + client.postMetadata(xmlMetadata) + "\\r"; + } + if (!target.equals(client.getUrl(numericIdentifier))) { + logger.fine("updating target URl to " + target); + client.postUrl(numericIdentifier, target); + retString = retString + "url:\\r" + target; + + } + + return retString; + } public String deactivateIdentifier(String identifier, HashMap metadata, DvObject dvObject) { String retString = ""; diff --git a/src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java index 0d64c1050b8..cd0c2c04c73 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java @@ -50,6 +50,8 @@ public interface GlobalIdServiceBean { boolean publicizeIdentifier(DvObject studyIn); + boolean updateIdentifier(DvObject dvObject); + static GlobalIdServiceBean getBean(String protocol, CommandContext ctxt) { final Function protocolHandler = BeanDispatcher.DISPATCHER.get(protocol); if ( protocolHandler != null ) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 4f868d90ae7..42f01f67ace 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -386,8 +386,9 @@ public Response updateDatasetPIDMetadata(@PathParam("id") String id) { } return response(req -> { - execCommand(new UpdateDvObjectPIDMetadataCommand(findDatasetOrDie(id), req)); - List args = Arrays.asList(id); + Dataset dataset = findDatasetOrDie(id); + execCommand(new UpdateDvObjectPIDMetadataCommand(dataset, req)); + List args = Arrays.asList(dataset.getIdentifier()); return ok(BundleUtil.getStringFromBundle("datasets.api.updatePIDMetadata.success.for.single.dataset", args)); }); } @@ -398,7 +399,14 @@ public Response updateDatasetPIDMetadataAll() { return response( req -> { datasetService.findAll().forEach( ds -> { try { + logger.fine("ReRegistering: " + ds.getId() + " : " + ds.getIdentifier()); + if (!ds.isReleased() || (!ds.isIdentifierRegistered() || (ds.getIdentifier() == null))) { + if (ds.isReleased()) { + logger.warning("Dataset id=" + ds.getId() + " is in an inconsistent state (publicationdate but no identifier/identifier not registered"); + } + } else { execCommand(new UpdateDvObjectPIDMetadataCommand(findDatasetOrDie(ds.getId().toString()), req)); + } } catch (WrappedResponse ex) { Logger.getLogger(Datasets.class.getName()).log(Level.SEVERE, null, ex); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java index e36fe06b863..99d0a183c9d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java @@ -47,13 +47,13 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { } GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(target.getProtocol(), ctxt); try { - Boolean doiRetString = idServiceBean.publicizeIdentifier(target); + Boolean doiRetString = idServiceBean.updateIdentifier(target); if (doiRetString) { target.setGlobalIdCreateTime(new Timestamp(new Date().getTime())); ctxt.em().merge(target); ctxt.em().flush(); for (DataFile df : target.getFiles()) { - doiRetString = idServiceBean.publicizeIdentifier(df); + doiRetString = idServiceBean.updateIdentifier(df); if (doiRetString) { df.setGlobalIdCreateTime(new Timestamp(new Date().getTime())); ctxt.em().merge(df); From d5dec010424435bdcea65c76dd2b1e524473d8a9 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 22 Jun 2020 17:56:28 -0400 Subject: [PATCH 02/85] xml escape dataset description --- .../edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java index f9107d8565b..fa04b710819 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java +++ b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java @@ -228,7 +228,8 @@ public static String getMetadataFromDvObject(String identifier, Map from HTML, it leaves '&' (at least so we need to xml escape as well + String description = StringEscapeUtils.escapeXml(dataset.getLatestVersion().getDescriptionPlainText()); if (description.isEmpty() || description.equals(DatasetField.NA_VALUE)) { description = AbstractGlobalIdServiceBean.UNAVAILABLE; } From 72fca2c7e30cce95c8c4446399d32c6a418630f0 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Mon, 27 Mar 2023 13:26:21 -0400 Subject: [PATCH 03/85] Pdf export function --- pom.xml | 6 +++ .../dataverse/export/ddi/DdiExportUtil.java | 46 ++++++++++++++++++- 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index e028771d24b..780bb7bdc0c 100644 --- a/pom.xml +++ b/pom.xml @@ -429,6 +429,12 @@ commons-compress + + + org.apache.xmlgraphics + fop + 2.8 + org.duracloud common diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index c78bb02d5c5..ee9231a9d53 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -74,7 +74,19 @@ import javax.xml.transform.stream.StreamResult; import java.io.File; import java.io.InputStream; -import java.io.InputStreamReader; + +import java.io.OutputStream; +import javax.xml.transform.Result; +import javax.xml.transform.Source; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.sax.SAXResult; +import javax.xml.transform.stream.StreamSource; + +import org.apache.fop.apps.FOUserAgent; +import org.apache.fop.apps.Fop; +import org.apache.fop.apps.FopFactory; +import org.apache.fop.apps.MimeConstants; public class DdiExportUtil { @@ -1948,6 +1960,38 @@ private static boolean checkParentElement(XMLStreamWriter xmlw, String elementNa return true; } + public static void datasetPdfDDI(InputStream datafile, OutputStream outputStream) throws XMLStreamException { + try { + File xsltfile = new File("ddi-to-fo.xsl"); + + final FopFactory fopFactory = FopFactory.newInstance(new File(".").toURI)); + FOUserAgent foUserAgent = fopFactory.newFOUserAgent(); + + try { + Fop fop = fopFactory.newFop(MimeConstants.MIME_PDF, foUserAgent, outputStream); + // Setup XSLT + TransformerFactory factory = TransformerFactory.newInstance(); + Transformer transformer = factory.newTransformer(new StreamSource(xsltfile)); + + // Set the value of a in the stylesheet + transformer.setParameter("versionParam", "2.0"); + + // Setup input for XSLT transformation + Source src = new StreamSource(datafile); + + // Resulting SAX events (the generated FO) must be piped through to FOP + Result res = new SAXResult(fop.getDefaultHandler()); + + // Start XSLT transformation and FOP processing + transformer.transform(src, res); + } finally { + outputStream.close(); + } + } catch (Exception e) { + logger.severe(e.getMessage()); + } + } + public static void datasetHtmlDDI(InputStream datafile, OutputStream outputStream) throws XMLStreamException { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); From a5821b9fd0d53137918c329fb79590bc6431dbe1 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Mon, 3 Apr 2023 17:35:28 -0400 Subject: [PATCH 04/85] pdf codebook --- .../harvard/iq/dataverse/api/Datasets.java | 2 +- .../dataverse/export/PdfCodeBookExporter.java | 91 + .../dataverse/export/ddi/DdiExportUtil.java | 7 +- src/main/java/propertyFiles/Bundle.properties | 1 + .../from-ddi-2.5/ddi-pdf/i18n.inc.xslt | 5 + .../ddi-pdf/messages_en.properties.xml | 174 + .../ddi-pdf/messages_es.properties.xml | 170 + .../ddi-pdf/messages_fr.properties.xml | 173 + .../ddi-pdf/messages_ja.properties.xml | 161 + .../ddi-pdf/messages_nn.properties.xml | 174 + .../ddi-pdf/messages_no.properties.xml | 174 + .../ddi-pdf/messages_ru.properties.xml | 169 + .../iq/dataverse/from-ddi-2.5/ddi-to-fo.xsl | 4473 +++++++++++++++++ 13 files changed, 5771 insertions(+), 3 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/export/PdfCodeBookExporter.java create mode 100644 src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/i18n.inc.xslt create mode 100644 src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_en.properties.xml create mode 100644 src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_es.properties.xml create mode 100644 src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_fr.properties.xml create mode 100644 src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_ja.properties.xml create mode 100644 src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_nn.properties.xml create mode 100644 src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_no.properties.xml create mode 100644 src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_ru.properties.xml create mode 100644 src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-to-fo.xsl diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index d40bc153141..3b3326611dc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -272,7 +272,7 @@ public Response getDataset(@Context ContainerRequestContext crc, @PathParam("id" @GET @Path("/export") - @Produces({"application/xml", "application/json", "application/html" }) + @Produces({"application/xml", "application/json", "application/html", "*/*" }) public Response exportDataset(@QueryParam("persistentId") String persistentId, @QueryParam("exporter") String exporter, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) { try { diff --git a/src/main/java/edu/harvard/iq/dataverse/export/PdfCodeBookExporter.java b/src/main/java/edu/harvard/iq/dataverse/export/PdfCodeBookExporter.java new file mode 100644 index 00000000000..8f3bb78f8d3 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/export/PdfCodeBookExporter.java @@ -0,0 +1,91 @@ +package edu.harvard.iq.dataverse.export; + +import com.google.auto.service.AutoService; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.export.ddi.DdiExportUtil; +import edu.harvard.iq.dataverse.export.spi.Exporter; +import edu.harvard.iq.dataverse.util.BundleUtil; + +import javax.json.JsonObject; +import javax.ws.rs.core.MediaType; +import javax.xml.stream.XMLStreamException; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Path; +import java.nio.file.Paths; + +@AutoService(Exporter.class) +public class PdfCodeBookExporter implements Exporter { + + @Override + public String getProviderName() { + return "pdf"; + } + + @Override + public String getDisplayName() { + return BundleUtil.getStringFromBundle("dataset.exportBtn.itemLabel.pdf") != null ? BundleUtil.getStringFromBundle("dataset.exportBtn.itemLabel.pdf") : "DDI PDF Codebook"; + } + + @Override + public void exportDataset(DatasetVersion version, JsonObject json, OutputStream outputStream) throws ExportException { + try { + InputStream ddiInputStream; + try { + ddiInputStream = ExportService.getInstance().getExport(version.getDataset(), "ddi"); + } catch(ExportException | IOException e) { + throw new ExportException ("Cannot open export_ddi cached file"); + } + DdiExportUtil.datasetPdfDDI(ddiInputStream, outputStream); + } catch (XMLStreamException xse) { + throw new ExportException ("Caught XMLStreamException performing DDI export"); + } + } + + @Override + public Boolean isXMLFormat() { + return false; + } + + @Override + public Boolean isHarvestable() { + // No, we don't want this format to be harvested! + // For datasets with tabular data the portions of the DDIs + // become huge and expensive to parse; even as they don't contain any + // metadata useful to remote harvesters. -- L.A. 4.5 + return false; + } + + @Override + public Boolean isAvailableToUsers() { + return true; + } + + @Override + public String getXMLNameSpace() throws ExportException { + return null; + } + + @Override + public String getXMLSchemaLocation() throws ExportException { + return null; + } + + @Override + public String getXMLSchemaVersion() throws ExportException { + return null; + } + + @Override + public void setParam(String name, Object value) { + // this exporter does not uses or supports any parameters as of now. + } + + @Override + public String getMediaType() { + return MediaType.WILDCARD; + }; +} diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index ee9231a9d53..3912b28a886 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -1962,9 +1962,12 @@ private static boolean checkParentElement(XMLStreamWriter xmlw, String elementNa public static void datasetPdfDDI(InputStream datafile, OutputStream outputStream) throws XMLStreamException { try { - File xsltfile = new File("ddi-to-fo.xsl"); + File xsltfile = new File("/home/victoria/ddi-to-fo.xsl"); + logger.info("start datasetPdfDDI"); + //InputStream xsltfile = DdiExportUtil.class.getClassLoader().getResourceAsStream( + // "edu/harvard/iq/dataverse/from-ddi-2.5/ddi-to-fo.xsl"); - final FopFactory fopFactory = FopFactory.newInstance(new File(".").toURI)); + final FopFactory fopFactory = FopFactory.newInstance(new File(".").toURI()); FOUserAgent foUserAgent = fopFactory.newFOUserAgent(); try { diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 45807dc7cde..3ba8d3fc8dd 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -1395,6 +1395,7 @@ dataset.exportBtn.itemLabel.json=JSON dataset.exportBtn.itemLabel.oai_ore=OAI_ORE dataset.exportBtn.itemLabel.dataciteOpenAIRE=OpenAIRE dataset.exportBtn.itemLabel.html=DDI HTML Codebook +dataset.exportBtn.itemLabel.pdf=DDI PDF Codebook license.custom=Custom Dataset Terms license.custom.description=Custom terms specific to this dataset metrics.title=Metrics diff --git a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/i18n.inc.xslt b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/i18n.inc.xslt new file mode 100644 index 00000000000..edf876f3b04 --- /dev/null +++ b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/i18n.inc.xslt @@ -0,0 +1,5 @@ + + + + + diff --git a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_en.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_en.properties.xml new file mode 100644 index 00000000000..d8e98dfd3c6 --- /dev/null +++ b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_en.properties.xml @@ -0,0 +1,174 @@ + + +Generated by Properties2Xml on Fri Apr 11 09:45:39 EDT 2008 +Valid +Frequency table not shown +Derivation +discrete +Data Collection Mode +Other Processing +Other Acknowledgment(s) +Untitled +Identification +Click here to access/export data files from Nesstar format +Value +Percentage +Weighting +Primary Investigator(s) +This document was generated using the +Sampling +Cases +Access Conditions +Source +Modalities +Rights & Disclaimer +Definition +Estimates of Sampling Error +Data Files +Geographic Coverage +April +Mean +Metadata Production +Data Collection +Missing Data +Scripts and programs +Variable(s) +Interviewer instructions +Funding Agency/ies +November +Missing +Version +Universe +Contributor(s) +Access Authority +Data Processing & Appraisal +Scope +Administrative documents +StdDev +Contact(s) +Label +Technical documents +Decimal(s) +Type +Literal question +Concepts +Range +Abstract +June +Supervision +Other Forms of Data Appraisal +References +Accessibility +Data Collection Dates +Data Editing +Questionnaires +Valid case(s) +Reports and analytical documents +Copyright +Documentation +Deviations from Sample Design +Publisher(s) +February +Dataset contains +Acknowledgment(s) +Continuous +Standard deviation +Variables Description +Producer +Production Date + +The Explorer allows you to view data files and export them to common statistical formats +Discrete +Group +July +Filename +Cases +Name +Warning: these figures indicate the number of cases found in the data file. They cannot be interpreted as summary statistics of the population of interest. +Statistical tables +December +Subjects +Processing Checks +software +Interviewer's instructions +Table of Contents +Document Information +Subgroup(s) +Keywords +group(s) +W +Weight +Files Description +Notes +Data Collection Notes +file(s) +continuous +Disclaimer +Content +variable(s) +Other Producer(s) +Producers & Sponsors +Data Cleaning Notes +Distributor(s) +Overview +Citation Requirements +September +Category +Confidentiality +Statistics +May +Undetermined +Structure +file +Pre-question +Response Rate +Width +Recoding and Derivation +Series +October +Unit of Analysis +Data Processing Notes +Kind of Data +File +Time Period(s) +File Content +Invalid +Vars +cont. +Key(s) +Question +Source of information +Imputation +Security +To open this file, you will need the free +Other resources +Data Dictionnary +Information +January +Other documents +Minimum +Scope & Coverage +Metadata Producer(s) +Show more info +Data Collector(s) +Post-question +Topics +Sampling Procedure +File Structure +Variables List +Format +Sampling Notes +Variables Group(s) +Description +Categories +Maximum +Depositor(s) +August +NW +Cover Page +Weighted +March + total - showing a subset of +Countries +question details + diff --git a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_es.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_es.properties.xml new file mode 100644 index 00000000000..9cfcdaf6e7e --- /dev/null +++ b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_es.properties.xml @@ -0,0 +1,170 @@ + + +Generated by Properties2Xml on Fri Apr 11 09:45:40 EDT 2008 +Válido +No se presentan las tablas de frecuencias +Derivación +discreta +Método de Recolección +Otros relacionados al procesamiento +Otros Reconocimientos +Sin título +Identificación +Presione aquí para acceder/exportar al archivo(s) de datos +Valor +Porcentaje +Ponderando +Investigadores Principales +Este documento fue producido utilizando el +Muestreo +Casos +Condiciones de uso +Fuente +Modalidades +Derechos y Notas Legales +Definición +Estimaciones del Error Muestral +Archivo de Datos +Cobertura Geográfica +Abril +Media +Producción de los Metadatos +Recolección de Datos +Datos perdidos +Programas informáticos +Variable(s) +Manual del encuestador +Agencias Auspiciadoras +Noviembre +Valores perdidos +Versión +Universo +Contribuidor(es) +Institución Propietaria +Tratamiento y Validación de Datos +Dominio Temático +Documentos Administrativos +Desviación +Contacto(s) +Etiqueta +Documentos Técnicos +Decimal(es) +Tipo +Pregunta textual +Conceptos +Rango +Resumen +Junio +Supervisión +Otras Formas de Validación de los Datos +Referencias +Accesibilidad +Fechas de Recolección de Datos +Procesamiento de Datos +Cuestionarios +Casos válidos +Reportes y documentos analíticos +Derechos de Autor +Documentación +Modificaciones al Diseño Muestral +Editor(es) +Febrero +Contenido de la Base de Datos +Reconocimiento(s) +Contínua +Desviación estándar +Descripción de la variable +Productor +Fecha de Producción +El Explorador NESSTAR permite visualizar los archivos de datos y exportarlos a diferentes formatos estadísticos +Discreta +Grupo +Julio +Nombre del Archivo +Casos +Nombre +Cuadros estadísticos +Diciembre +Temas +Controles de Tratamiento +software +Manual del encuestador +Indice +Información acerca de la Documentación +Subgrupo(s) +Palabra Clave +grupo(s) +P +Ponderador +Descripción de los Archivos +Notas +Notas sobre la Recolección de Datos +archivo(s) +continua +Nota Legal +Contenido +variable(s) +Otros Productores +Productores y Auspiciadores +Notas acerca de la Depuración de los Datos +Distribuidor(es) +Resumen General +Forma de citar +Septiembre +Categoría +Confidencialidad +Estadística + +Mayo +Indeterminado +Estructura +archivo +Pre-pregunta +Tasa de Respuesta +Ancho +Recodificación y Derivación +Series +Octubre +Unidad de Análisis +Notas sobre el Procesamiento de Datos +Tipo de Datos +Archivo +Periodo de Referencia +Contenido del Archivo +Inválido +Vars. +cont. +Clave(s) +Pregunta +Fuente de información +Imputación +Seguridad +Para abrir este archivo se necesita el software gratuito +Otros recursos +Diccionario de Datos +Información +Enero +Otros documentos +Mínimo +Cobertura y Dominio Temático +Productor de los Metadatos +Mostrar más información +Entrevistador(es) +Pos-pregunta +Temas +Procedimiento de Muestreo +Estructura del Archivo +Lista de variables +Formato +Notas sobre el Muestreo +Grupo(s) de Variables +Descripción +Categorías +Máximo +Depositante(s) +Agosto +NP +Carátula +Ponderado +Marzo + diff --git a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_fr.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_fr.properties.xml new file mode 100644 index 00000000000..9fa4d2178b1 --- /dev/null +++ b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_fr.properties.xml @@ -0,0 +1,173 @@ + + +Generated by Properties2Xml on Fri Apr 11 09:45:40 EDT 2008 +Valide +Tableau de fréquences non-affiché +Mode de calcul +discrète +Méthode de collecte +Autre traitement +Autre(s) remerciement(s) +Sans titre +Identification +Cliquer ici pour accéder/exporter les fichiers de données du format Nesstar +Valeur +Pourcentage +Pondération +Enquêteur(s) principal/aux +Ce document a été généré à l'aide du +Echantillonage +Enregistrements +Conditions d'accès +Source +Modalités +Responsabilité et droits d'auteurs +Définition +Estimation des erreurs d'échantillonage +Fichiers de données +Couverture géographique +Avril +Moyenne +Production des méta-données +Collecte des données +Valeures manquantes +Programmes informatiques +Variable(s) +Instructions aux enquêteurs +Financement +Novembre +Manquant +Version +Univers +Contributeur(s) +Agence propriétaire +Traitement et évaluation des données +Domaines thématiques +Documents administratifs +Ecart type +Contact(s) +Libellé +Documents techniques +Décimale(s) +Type +Formulation de la question +Concepts +Gamme +Résumé +Juin +Supervision +Autres formes d'évaluation des données +Références +Accessibilité +Dates de collecte +Edition des données +Questionnaires +Cas valide(s) +Rapports et documents analytiques +Droits d'auteurs +Documentation +Déviations par rapport à l'échantillon initial +Editeur(s) +Février +Le jeu de données contient +Remerciement(s) +Continue +Ecart type +Description des variables +Producteur +Date de production + +L'Explorer vous permet d'accéder aux données et de les exporter vers les formats statistiques les plus courants +Discrète +Groupe +Juillet +Nom du fichier +Enreg. +Nom +Avertissement: Ces chiffres indiquent le nombre de cas identifiés dans le fichier de données. Ils ne peuvent pas être interpretés comme étant représentatifs de la population concernée. +Tableaux statistiques +Décembre +Sujets +Contrôles de traitement + +Instructions aux enquêteurs +Table des matières +Informations sur le document +Sous-groupe(s) +Mots-clé +groupe(s) +P +Pondération +Description des fichiers +Notes +Notes sur la collecte +fichier(s) +continue +Responsabilité(s) +Contenu +variable(s) +Autre(s) producteur(s) +Producteurs et sponsors +Notes sur l'apurement des données +Distributeur(s) +Aperçu +Citation +Septembre +Catégorie +Confidentialité +Statistiques +Mai +Indéterminé +Structure +fichier +Pré-question +Taux de réponse +Taille +Formulation de la question +Recodage et dérivation +Série +Octobre +Unité d'analyse +Notes sur le traitement des données +Type d'étude +Fichier +Période(s) de référence +Contenu du fichier +Non-valide +Vars +suite +Clé(s) +Question +Source d'information +Imputation +Sécurité +Pour ouvrir ce fichier, vous avez besoin du logiciel gratuit +Autres resources +Dictionnaire des variables +Information +Janvier +Autres documents +Minimum +Domaines thématiques et couverture +Producteur(s) des méta-données +Information complémentaire +Enquêteurs +Post-question +Thèmes +Procédure d'échantillonage +Structure du fichier +Liste des variables +Format +Notes sur l'échantillonage +Groupe(s) de variables +Description +Catégories +Maximum +Dépositaire(s) +Août +NP +Couverture +Pondéré +Mars +question details + diff --git a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_ja.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_ja.properties.xml new file mode 100644 index 00000000000..bc5dbb06154 --- /dev/null +++ b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_ja.properties.xml @@ -0,0 +1,161 @@ + + +Generated by Properties2Xml on Tue Feb 13 13:55:43 EST 2007 +有効な +度数表(Frequency table)は表示されません +由来 +不連続な +データ収集モード +その他の確認事項 +識別番号 +データファイルにアクセスするにはここをクリックしてください +無題 + +割合 +ウェイティング +第一次調査官 +この文書はToolkitを使用して作られました +サンプリング +ケース +アクセス条件 +情報源 +様相 +権利及び声明文 +定義 +サンプルエラーの見積もり +データファイル +地理的な適用範囲 +4月 +平均 +メタデータ製作 +データ収集 +損失データ +スクリプトおよびプログラム +可変的 +面接者の指示 +出資機関 +11月 +バージョン +共通の +貢献者 +アクセス権限 +データ処理、評価 +範囲, 領域 +管理用文章 +連絡先 +ラベル +技術的な文書 +小数点 +タイプ +文字の質問 +概念 +範囲 +要約 +6月 +監督 +その他ファーマットのデータ評価 +参照 +アクセス、入手法 +データ収集日 +データ編集 +質問 +レポートおよび分析的な文書 +有効な場合 +コピーライト +書類 +サンプルデザインによる偏差 +発行者 +2月 +データセットに含まれる +確認事項 +連続的な +標準偏差 +変数の記述 +製作者 +製作日 +” Explorer”によってデータファイルを参照することも一般的に使えわれている統計データフォーマットに変換。抽出することも可能です +不連続性 +グループ +7月 +ファイルの名前 +ケース +名前 +統計表 +12月 +主題, 内容 +工程監査 +ソフト +面接者への指示 +目録 +書類の情報 +サブグループ +キーワード + +グループ +ウェイト +ファイルの詳細 +メモ +データ収集メモ +ファイル +継続的な +声明文 +内容 +変数 +その他の製作者 +製作者とスポンサー +データクリーニングメモ +分配者 +概略 +引用する場合の必要条件 +9月 +カテゴリー +機密性、コンフィデンシャリティー +5月 +未定 +構造 +ファイル +調査前の質問 +回答比率 + +記録と誘導 +シリー +10月 +分析の単位 +データ処理メモ +データの種類 + +ファイル +期間 +ファイルの内容 +無効 +キー +情報源 +非難 +セキュリティー +このファイルを開けるには、無料で配布されているNesstar Explorer が必要です。 +その他の資料 +データ辞典 +情報 +1月 +その他の書類 +最小値 +規模及び適用範囲 +メタデータ製作者 +さらにインフォメーションを表示 +データ収集者 +調査後の質問 +サンプリングの手順 +ファイルの構造 +変数のリスト +フォーマット +サンプリングメモ +変数のグループ +詳細 +カテゴリー +最大値 +デポジター、提供者、供託者 +8月 +表紙 +ウェイトされた +3月 + diff --git a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_nn.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_nn.properties.xml new file mode 100644 index 00000000000..fdf14f5dfcd --- /dev/null +++ b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_nn.properties.xml @@ -0,0 +1,174 @@ + + +Generated by Properties2Xml on Fri Apr 11 09:45:39 EDT 2008 +Gyldige +Frekvenstabell ikke vist +Avledning +diskret +Type datainnsamling +Annen prosessering +Andre identifikatorer og krediteringer +Uten tittel +Identifisering +Click here to access/export data files from Nesstar format +Verdi +Prosent +Vekting +Primary Investigator(s) +Dette dokumentet ble generert av +Utvalg +Enheter +Tilgangsbetingelser +Kilde +Modaliteter +Rights & Disclaimer +Definisjon +Estimert utvalgsfeil +Datafiler +Geografisk omfang +April +Mean +Metadata-produksjon +Datainnsamling +Manglende data +Script og programmer +Variable(r) +Instruksjoner til intervjueren +Sponsor/finansierende institusjon(er) +November +Missing +Versjon +Univers +Bidragsyter(e) +Tilgangskontrollør +Dataprosessering og -evaluering +Omfang +Administrative dokumenter +Standardavvik +Kontaktperson(er) +Merkelapp +Tekniske dokumenter +Desimal(er) +Type +Spørsmålstekst +Begrep(er) +Rekkevidde +Sammendrag +Juni +Supervision +Andre former for dataevaluering +Referanser +Tilgjengelighet +Datainnsamlingsdatoer +Dataredigering +Spørreskjema +Gyldige enheter +Rapporter og analysedokumenter +Copyright +Dokumentasjon +Avvik fra utvalgsdesign +Utgiver(e) +Februar +Datasettet inneholder +Krediteringer +Kontinuerlig +Standardavvik +Variabelbeskrivelse +Produsent +Produksjonsdato + +The Explorer allows you to view data files and export them to common statistical formats +Diskret +Gruppe +Juli +Filnavn +Enheter +Navn +Advarsel: disse tallene indikerer antall enheter (cases) i datafilen. De kan ikke tolkes som oppsummert statistikk for populasjonen. +Statistiske tabeller +Desember +Emner +Prosesseringssjekk +programvare +Instruksjoner til intervjueren +Innholdsfortegnelse +Dokumentinformasjon +Undergruppe(r) +Nøkkelord +gruppe(r) +W +Vekt +Filbeskrivelse +Kommentarer +Datainnsamlingskommentarer +file(r) +kontinuerlig +Fraskrivelse +Innhold +variable(r) +Andre produsenter +Produsenter og sponsorer +Kommentarer om datarensing +Distributør(er) +Oversikt +Sitatkrav +September +Kategori +Konfidensialitet +Statistikk +Mai +Uavklart +Struktur +fil +Tekst før spørsmål +Responsrate +Bredde +Omkodinger og utledninger +Serie +Oktober +Analyseenhet +Dataprosesseringskommentarer +Datatype +Fil +Tidsperiode(r) +Filinnhold +Ugyldig +Variabler +kont. +Nøkler +Spørsmål +Kilde for informasjon +Imputasjon +Sikkerhet +For å åpne denne filen trenger du følgende gratisverktøy +Andre ressurser +Dataordbok +Informasjon +Januar +Andre dokumenter +Minimum +Omfang +Metadataprodusenter +Vis mer informasjon +Datainnsamler(e) +Tekst etter spørsmål +Emner +Utvalgsprosedyre +Filstruktur +Variabelliste +Format +Utvalgskommentarer +Variabelgrupper +Beskrivelse +Kategorier +Maksimum +Utgiver(e) +August +NW +Forside +Vektet +Mars + total - viser et utvalg av +Land +spørsmålsdetaljer + diff --git a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_no.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_no.properties.xml new file mode 100644 index 00000000000..fdf14f5dfcd --- /dev/null +++ b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_no.properties.xml @@ -0,0 +1,174 @@ + + +Generated by Properties2Xml on Fri Apr 11 09:45:39 EDT 2008 +Gyldige +Frekvenstabell ikke vist +Avledning +diskret +Type datainnsamling +Annen prosessering +Andre identifikatorer og krediteringer +Uten tittel +Identifisering +Click here to access/export data files from Nesstar format +Verdi +Prosent +Vekting +Primary Investigator(s) +Dette dokumentet ble generert av +Utvalg +Enheter +Tilgangsbetingelser +Kilde +Modaliteter +Rights & Disclaimer +Definisjon +Estimert utvalgsfeil +Datafiler +Geografisk omfang +April +Mean +Metadata-produksjon +Datainnsamling +Manglende data +Script og programmer +Variable(r) +Instruksjoner til intervjueren +Sponsor/finansierende institusjon(er) +November +Missing +Versjon +Univers +Bidragsyter(e) +Tilgangskontrollør +Dataprosessering og -evaluering +Omfang +Administrative dokumenter +Standardavvik +Kontaktperson(er) +Merkelapp +Tekniske dokumenter +Desimal(er) +Type +Spørsmålstekst +Begrep(er) +Rekkevidde +Sammendrag +Juni +Supervision +Andre former for dataevaluering +Referanser +Tilgjengelighet +Datainnsamlingsdatoer +Dataredigering +Spørreskjema +Gyldige enheter +Rapporter og analysedokumenter +Copyright +Dokumentasjon +Avvik fra utvalgsdesign +Utgiver(e) +Februar +Datasettet inneholder +Krediteringer +Kontinuerlig +Standardavvik +Variabelbeskrivelse +Produsent +Produksjonsdato + +The Explorer allows you to view data files and export them to common statistical formats +Diskret +Gruppe +Juli +Filnavn +Enheter +Navn +Advarsel: disse tallene indikerer antall enheter (cases) i datafilen. De kan ikke tolkes som oppsummert statistikk for populasjonen. +Statistiske tabeller +Desember +Emner +Prosesseringssjekk +programvare +Instruksjoner til intervjueren +Innholdsfortegnelse +Dokumentinformasjon +Undergruppe(r) +Nøkkelord +gruppe(r) +W +Vekt +Filbeskrivelse +Kommentarer +Datainnsamlingskommentarer +file(r) +kontinuerlig +Fraskrivelse +Innhold +variable(r) +Andre produsenter +Produsenter og sponsorer +Kommentarer om datarensing +Distributør(er) +Oversikt +Sitatkrav +September +Kategori +Konfidensialitet +Statistikk +Mai +Uavklart +Struktur +fil +Tekst før spørsmål +Responsrate +Bredde +Omkodinger og utledninger +Serie +Oktober +Analyseenhet +Dataprosesseringskommentarer +Datatype +Fil +Tidsperiode(r) +Filinnhold +Ugyldig +Variabler +kont. +Nøkler +Spørsmål +Kilde for informasjon +Imputasjon +Sikkerhet +For å åpne denne filen trenger du følgende gratisverktøy +Andre ressurser +Dataordbok +Informasjon +Januar +Andre dokumenter +Minimum +Omfang +Metadataprodusenter +Vis mer informasjon +Datainnsamler(e) +Tekst etter spørsmål +Emner +Utvalgsprosedyre +Filstruktur +Variabelliste +Format +Utvalgskommentarer +Variabelgrupper +Beskrivelse +Kategorier +Maksimum +Utgiver(e) +August +NW +Forside +Vektet +Mars + total - viser et utvalg av +Land +spørsmålsdetaljer + diff --git a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_ru.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_ru.properties.xml new file mode 100644 index 00000000000..06fde85af5e --- /dev/null +++ b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_ru.properties.xml @@ -0,0 +1,169 @@ + + +Generated by Properties2Xml on Fri Apr 11 09:45:40 EDT 2008 +Валидный +Частотная таблица не выводится +Расчет +дискретная +Способ сбора данных +Прочая обработка +Другие участники +Безымянный +Индентификация +Щелкните здесь, чтобы получить доступ к файлам или экспортировать их +Значение +Процент +Взвешивание +Первичный(е) исследователь(и) +Документ был сгенерирован с использованием +Выборка +Наблюдения +Условия доступа +Источник +Модальности +Авторские права и ограничения ответственности +Определение +Оценки ошибок выборки +Файлы данных +Географический охват +Апрель +Среднее +Разработка метаданных +Сбор данных +Пропущенные данные +Скрипты и программы +Переменная(ые) +Инструкции интервьюеру +Кто финансировал +Ноябрь +Пропущенные +Версия +Генеральная совокупность +Участник(и) +Права доступа +Обработка и инспекция данных +Охват +Административные документы +СтдОткл +Контак(ы) +Метка +Технические документы +Десятичные +Тип +Текст вопроса +Концепции +Диапазон +Резюме +Июнь +Контроль +Другие формы инспекции данных +Установки +Доступность +Даты сбора данных +Редактирование данных +Вопросники +Валидное(ые) наблюдение(я) +Отчеты и аналитические документы +Авторские права +Документация +Отклонения от дизайна выборки +Издатель(и) +Февраль +Набор данных содержит +Участник(и) +Непрерывная +Стандартное отклонение +Описание переменных +Разработчик +Дата разработки +Проводник позволяет просматривать файлы данных и экспортировать их в распространенные статистические форматы +Дикретная +Группа +Июль +Имя файла +Наблюдения +Имя +Статистичсекие таблицы +Декабрь +Темы +Контроль обработки +программное обеспечение +Инструкции интервьюеру +Оглавление +Информация о документе +Подгруппа(ы) +Ключевые слова +группа(ы) +B +Вес +Описание файла +Примечания +Примечания по сбору данных +файл(ы) +непрерывная +Ограничения ответственности +Содержание +переменная(ые) +Другие разработчики +Разработчики и спонсоры +Примечания по чистке данных +Дистрибьютор(ы) +Обзор +Требования по цитированию +Сентябрь +Категория +Конфиденциальность +Статистики +Май +Неопределенный +Структура +файл +Текст, предваряющий вопрос +Доля ответов +Ширина +Перекодировка и расчеты +Серия +Октябрь +Единица анализа +Примечания по обработке данных +Тип данных +Файл +Период(ы) времени +Содержание файла +Некорректный +Переменные +непр. +Ключ(и) +Вопрос +Источник информации +Импутация +Безопасность +Чтобы открыть этот файл, необходимо иметь свободным +Прочие источники +Словарь данных +Информация +Январь +Прочие документы +Минимум +Охват и покрытие +Разработчик(и) метаданных +Показать дополнительную информацию +Кто собирал данные +Текст после вопроса +Разделы +Процедура выборки +Структура файла +Список переменных +Формат +Примечания по выборке +Группа(ы) переменных +Описание +Категории +Максимум +Депозитор(ы) +Август +HB +Титульный лист +взвешенные +Март + diff --git a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-to-fo.xsl b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-to-fo.xsl new file mode 100644 index 00000000000..64849846481 --- /dev/null +++ b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-to-fo.xsl @@ -0,0 +1,4473 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + default-page + + + + + + + + + + + + + + + + + + + + + + Times + + + + + + + + + + + + + + + + + + + + + + + + + 50 + + + + + + + 0 + 0 + 0 + 1 + + + + + + + 1 + 0 + + + + + + + 0 + 1 + + + + + + ddp + toolkit + toolkit + toolkit + + + + + + + + () + + + + + + , + + + + + + + + + + + + + + + + - + + + + + + + + + + + + 1 + 1 + 1 + 1 + 0 + + + + + 1 + 1 + 1 + 1 + 0 + + + + + 1 + 1 + 1 + 1 + 1 + 0 + + + + + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 0 + + + + + 1 + 1 + 1 + 0 + + + + + 1 + 1 + 1 + 1 + 1 + 1 + 0 + + + + + 1 + 1 + 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + : + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + , + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + : + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + , + + + + + + + + + + + + + + + + + + , + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + *** + + **** + + + + + + + + + + + + + + + , + + + + + + + + : + + + + + + + + + + + + + () + , + + + + , + + + + + + + + + + + () + , + + + + + + + : + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + # + + + + + + + + + + + # + + + + + + + + + + + + + + + + + : + + + + + + :  + + + + + + + +  () + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + : + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + () + , + + + + + + + + + + , + + , + + + + + + + + + + () + , + + , + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + , + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + .. + + + + + + + + + + + + + + + + + + + + - + + + + + + + + + + + + + + + + + + + + + + + - + + + + + + + + + + + + + - + + + + - + + + + . + + + + + + + + + + + + + + + + + - + + + + + + + + + + + + + + + + + - + + + + + + + + + + + + + + +   + + + + + + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + # + + + + + : + + + + + + + + + + + + + + + + + + + + + [= + + + + + + ] + + + + [= + + - + + + + . + + + ] + + + + [= - + ] + + + [ + + =* + + / + + + ] + + + + + + + + + + + + + + [ + + / + + ] + + + + + + + + + + + + + + + + + + + + + + + + + + + [ + + = + + + / + + + + + + - + + + ] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + , + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + () + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ( + ) + + + + + + + + + + + + + + + + + + + + : + + + + + + + + + + + + + + + + + + *** + *** + + + + + , + + + + + + + , + + + + + + , + + + + + + + + + + + + + , + + + + + , + + + + + , " + + " + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + , + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + # + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + + + + + + + - + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + , + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + <h2> + + + </h2> + + + <br/> + + + + + + + + + + + + + + + + + + + +   + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Z + + + + + + + + + + + + + + + + + + + - + + + + + + + + NaN + + + + + + + + + + + From 84caa2a85e2f86d1c6f82725673bb4a60ec19f3d Mon Sep 17 00:00:00 2001 From: lubitchv Date: Mon, 12 Jun 2023 17:52:04 -0400 Subject: [PATCH 05/85] q --- .../dataverse/export/PdfCodeBookExporter.java | 88 +++++++++---------- 1 file changed, 40 insertions(+), 48 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/PdfCodeBookExporter.java b/src/main/java/edu/harvard/iq/dataverse/export/PdfCodeBookExporter.java index 8f3bb78f8d3..c4f1d3ff2cc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/PdfCodeBookExporter.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/PdfCodeBookExporter.java @@ -1,55 +1,58 @@ + + package edu.harvard.iq.dataverse.export; import com.google.auto.service.AutoService; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.export.ddi.DdiExportUtil; -import edu.harvard.iq.dataverse.export.spi.Exporter; -import edu.harvard.iq.dataverse.util.BundleUtil; + import edu.harvard.iq.dataverse.export.ddi.DdiExportUtil; + import io.gdcc.spi.export.ExportDataProvider; + import io.gdcc.spi.export.ExportException; + import io.gdcc.spi.export.Exporter; + import edu.harvard.iq.dataverse.util.BundleUtil; -import javax.json.JsonObject; -import javax.ws.rs.core.MediaType; -import javax.xml.stream.XMLStreamException; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.nio.file.Path; -import java.nio.file.Paths; + import javax.json.JsonObject; + import javax.ws.rs.core.MediaType; + import javax.xml.stream.XMLStreamException; + import java.io.File; + import java.io.IOException; + import java.io.InputStream; + import java.io.OutputStream; + import java.nio.file.Path; + import java.nio.file.Paths; + import java.util.Locale; + import java.util.Optional; @AutoService(Exporter.class) public class PdfCodeBookExporter implements Exporter { @Override - public String getProviderName() { + public String getFormatName() { return "pdf"; } @Override - public String getDisplayName() { - return BundleUtil.getStringFromBundle("dataset.exportBtn.itemLabel.pdf") != null ? BundleUtil.getStringFromBundle("dataset.exportBtn.itemLabel.pdf") : "DDI PDF Codebook"; + public String getDisplayName(Locale locale) { + String displayName = BundleUtil.getStringFromBundle("dataset.exportBtn.itemLabel.pdf", locale); + return Optional.ofNullable(displayName).orElse("DDI pdf codebook"); } @Override - public void exportDataset(DatasetVersion version, JsonObject json, OutputStream outputStream) throws ExportException { - try { - InputStream ddiInputStream; - try { - ddiInputStream = ExportService.getInstance().getExport(version.getDataset(), "ddi"); - } catch(ExportException | IOException e) { - throw new ExportException ("Cannot open export_ddi cached file"); + public void exportDataset(ExportDataProvider dataProvider, OutputStream outputStream) throws ExportException { + Optional ddiInputStreamOptional = dataProvider.getPrerequisiteInputStream(); + if (ddiInputStreamOptional.isPresent()) { + try (InputStream ddiInputStream = ddiInputStreamOptional.get()) { + DdiExportUtil.datasetPdfDDI(ddiInputStream, outputStream); + } catch (IOException e) { + throw new ExportException("Cannot open export_ddi cached file"); + } catch (XMLStreamException xse) { + throw new ExportException("Caught XMLStreamException performing DDI export"); } - DdiExportUtil.datasetPdfDDI(ddiInputStream, outputStream); - } catch (XMLStreamException xse) { - throw new ExportException ("Caught XMLStreamException performing DDI export"); + } else { + throw new ExportException("No prerequisite input stream found"); } } - @Override - public Boolean isXMLFormat() { - return false; - } - @Override public Boolean isHarvestable() { // No, we don't want this format to be harvested! @@ -65,27 +68,16 @@ public Boolean isAvailableToUsers() { } @Override - public String getXMLNameSpace() throws ExportException { - return null; - } - - @Override - public String getXMLSchemaLocation() throws ExportException { - return null; - } - - @Override - public String getXMLSchemaVersion() throws ExportException { - return null; - } - - @Override - public void setParam(String name, Object value) { - // this exporter does not uses or supports any parameters as of now. + public Optional getPrerequisiteFormatName() { + //This exporter relies on being able to get the output of the ddi exporter + return Optional.of("ddi"); } @Override public String getMediaType() { - return MediaType.WILDCARD; + return MediaType.TEXT_HTML; }; } + + + From cb85fd678945ed206cbcca7fc378bbb7d1771394 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Thu, 15 Jun 2023 17:54:09 -0400 Subject: [PATCH 06/85] english --- .../dataverse/export/PdfCodeBookExporter.java | 2 +- .../dataverse/export/ddi/DdiExportUtil.java | 15 +- .../{from-ddi-2.5 => }/ddi-to-fo.xsl | 444 +++++++++--------- .../from-ddi-2.5/ddi-pdf/i18n.inc.xslt | 5 - .../ddi-pdf/messages_en.properties.xml | 174 ------- .../ddi-pdf/messages_es.properties.xml | 170 ------- .../ddi-pdf/messages_fr.properties.xml | 173 ------- .../ddi-pdf/messages_ja.properties.xml | 161 ------- .../ddi-pdf/messages_nn.properties.xml | 174 ------- .../ddi-pdf/messages_no.properties.xml | 174 ------- .../ddi-pdf/messages_ru.properties.xml | 169 ------- 11 files changed, 234 insertions(+), 1427 deletions(-) rename src/main/resources/edu/harvard/iq/dataverse/{from-ddi-2.5 => }/ddi-to-fo.xsl (91%) delete mode 100644 src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/i18n.inc.xslt delete mode 100644 src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_en.properties.xml delete mode 100644 src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_es.properties.xml delete mode 100644 src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_fr.properties.xml delete mode 100644 src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_ja.properties.xml delete mode 100644 src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_nn.properties.xml delete mode 100644 src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_no.properties.xml delete mode 100644 src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_ru.properties.xml diff --git a/src/main/java/edu/harvard/iq/dataverse/export/PdfCodeBookExporter.java b/src/main/java/edu/harvard/iq/dataverse/export/PdfCodeBookExporter.java index c4f1d3ff2cc..e0d5171e30c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/PdfCodeBookExporter.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/PdfCodeBookExporter.java @@ -75,7 +75,7 @@ public Optional getPrerequisiteFormatName() { @Override public String getMediaType() { - return MediaType.TEXT_HTML; + return MediaType.WILDCARD; }; } diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index 00ec3c4d5c9..c77be3515f7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -43,6 +43,7 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; +import java.net.URL; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -2110,7 +2111,10 @@ private static boolean checkParentElement(XMLStreamWriter xmlw, String elementNa public static void datasetPdfDDI(InputStream datafile, OutputStream outputStream) throws XMLStreamException { try { - File xsltfile = new File("/home/victoria/ddi-to-fo.xsl"); + //File xsltfile = new File("/home/victoria/fop-2.8/fop/ddi-to-fo.xsl"); + //URL resource = DdiExportUtil.class.getResource("edu/harvard/iq/dataverse/ddi-to-fo.xsl"); + //File xsltfile = new File(resource.toURI()); + InputStream styleSheetInput = DdiExportUtil.class.getClassLoader().getResourceAsStream("edu/harvard/iq/dataverse/ddi-to-fo.xsl"); logger.info("start datasetPdfDDI"); //InputStream xsltfile = DdiExportUtil.class.getClassLoader().getResourceAsStream( // "edu/harvard/iq/dataverse/from-ddi-2.5/ddi-to-fo.xsl"); @@ -2122,7 +2126,7 @@ public static void datasetPdfDDI(InputStream datafile, OutputStream outputStream Fop fop = fopFactory.newFop(MimeConstants.MIME_PDF, foUserAgent, outputStream); // Setup XSLT TransformerFactory factory = TransformerFactory.newInstance(); - Transformer transformer = factory.newTransformer(new StreamSource(xsltfile)); + Transformer transformer = factory.newTransformer(new StreamSource(styleSheetInput)); // Set the value of a in the stylesheet transformer.setParameter("versionParam", "2.0"); @@ -2135,10 +2139,13 @@ public static void datasetPdfDDI(InputStream datafile, OutputStream outputStream // Start XSLT transformation and FOP processing transformer.transform(src, res); - } finally { - outputStream.close(); + + } catch (Exception e) { + logger.info("First try"); + logger.severe(e.getMessage()); } } catch (Exception e) { + logger.info("Second try"); logger.severe(e.getMessage()); } } diff --git a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-to-fo.xsl b/src/main/resources/edu/harvard/iq/dataverse/ddi-to-fo.xsl similarity index 91% rename from src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-to-fo.xsl rename to src/main/resources/edu/harvard/iq/dataverse/ddi-to-fo.xsl index 64849846481..1c03d5caf34 100644 --- a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-to-fo.xsl +++ b/src/main/resources/edu/harvard/iq/dataverse/ddi-to-fo.xsl @@ -66,7 +66,8 @@ Report optional text --> - + + @@ -123,7 +124,6 @@ Times - - + - + - + - + - + @@ -437,21 +437,21 @@ + select="'Data_Processing_and_Appraisal'"/> - + - + @@ -461,7 +461,7 @@ - + @@ -486,7 +486,7 @@ - + @@ -503,7 +503,7 @@ - + @@ -528,7 +528,7 @@ - + @@ -569,7 +569,7 @@ - + @@ -642,7 +642,7 @@ - + @@ -657,7 +657,7 @@ border="{$default-border}" padding="{$cell-padding}"> + select="'Metadata_Producers'"/> + select="'Production_Date'"/> + select="'Version'"/> + select="'Identification'"/> - + @@ -745,7 +745,7 @@ - + @@ -764,14 +764,14 @@ - + - + @@ -782,7 +782,7 @@ + select="'Scope_and_Coverage'"/> @@ -793,7 +793,7 @@ + select="'Producers_and_Sponsors'"/> @@ -803,7 +803,7 @@ - + @@ -813,7 +813,7 @@ - + @@ -825,7 +825,7 @@ internal-destination="data-processing-and-appraisal" text-decoration="underline" color="blue"> + select="'Data_Processing_and_Appraisal'"/> @@ -836,7 +836,7 @@ - + @@ -847,7 +847,7 @@ + select="'Rights_and_Disclaimer'"/> @@ -858,7 +858,7 @@ + select="'Files_Description'"/> @@ -891,7 +891,7 @@ - + @@ -924,7 +924,7 @@ - + @@ -949,7 +949,7 @@ + select="'Variables_Description'"/> @@ -982,7 +982,7 @@ - + @@ -1002,7 +1002,7 @@ - + @@ -1065,7 +1065,7 @@ - + @@ -1078,7 +1078,7 @@ - + + select="'Identification'"/> - @@ -1130,7 +1130,7 @@ select="/ddi:codeBook/ddi:stdyDscr/ddi:citation/ddi:verStmt/ddi:version"> : + select="'Production_Date'"/>: @@ -1139,7 +1139,7 @@ - @@ -1156,7 +1156,7 @@ - + - + select="'Kind_of_Data'"/> + select="'Unit_of_Analysis'"/> + select="'Scope_and_Coverage'"/> @@ -1257,7 +1257,7 @@ - + - @@ -1299,7 +1299,7 @@ - + - + @@ -1344,7 +1344,7 @@ - + @@ -1361,7 +1361,7 @@ border="{$default-border}" padding="{$cell-padding}"> - @@ -1422,7 +1422,7 @@ padding="{$cell-padding}"> @@ -1444,7 +1444,7 @@ padding="{$cell-padding}"> + select="'Other_Producers'"/> + select="'Funding_Agencies'"/> @@ -1514,7 +1514,7 @@ - @@ -1534,7 +1534,7 @@ padding="{$cell-padding}"> + select="'Sampling'"/> + select="'Sampling_Notes'"/> + select="'Sampling_Procedure'"/> + select="'Response_Rate'"/> + select="'Weighting'"/> + select="'Data_Collection'"/> @@ -1672,7 +1672,7 @@ padding="{$cell-padding}"> @@ -1694,7 +1694,7 @@ padding="{$cell-padding}"> + select="'Time_Periods'"/> @@ -1738,7 +1738,7 @@ border="{$default-border}" padding="{$cell-padding}"> + select="'Questionnaires'"/> + select="'Data_Collectors'"/> + select="'Supervision'"/> - + @@ -1885,7 +1885,7 @@ border="{$default-border}" padding="{$cell-padding}"> + select="'Data_Editing'"/> + select="'Other_Processing'"/> + select="'Accessibility'"/> @@ -1980,7 +1980,7 @@ padding="{$cell-padding}"> + select="'Access_Authority'"/> - @@ -2022,7 +2022,7 @@ padding="{$cell-padding}"> + select="'Distributors'"/> + select="'Depositors'"/> + select="'Confidentiality'"/> + select="'Access_Conditions'"/> @@ -2142,7 +2142,7 @@ border="{$default-border}" padding="{$cell-padding}"> + select="'Disclaimer'"/> + select="'Copyright'"/> - + @@ -2194,15 +2194,15 @@ - + - + - + @@ -2218,7 +2218,7 @@ - + @@ -2227,16 +2227,16 @@ - + - + - - + + - + @@ -2262,17 +2262,17 @@ - + - + - + - + @@ -2290,7 +2290,7 @@ - + @@ -2299,17 +2299,17 @@ - + - + - + - + @@ -2327,7 +2327,7 @@ - + @@ -2336,7 +2336,7 @@ - + @@ -2372,7 +2372,7 @@ + select="'Reports_and_analytical_documents'"/> @@ -2389,7 +2389,7 @@ space-after="0.03in"> - + @@ -2407,7 +2407,7 @@ + select="'Technical_documents'"/> @@ -2425,7 +2425,7 @@ + select="'Administrative_documents'"/> @@ -2442,7 +2442,7 @@ space-after="0.03in"> - + @@ -2459,7 +2459,7 @@ space-after="0.03in"> - + @@ -2477,7 +2477,7 @@ + select="'Statistical_tables'"/> @@ -2495,7 +2495,7 @@ + select="'Scripts_and_programs'"/> @@ -2512,7 +2512,7 @@ space-after="0.03in"> - + @@ -2532,7 +2532,7 @@ + select="'Reports_and_analytical_documents'"/> - + - + - - + - + - + @@ -2602,7 +2602,7 @@ - + @@ -2611,7 +2611,7 @@ - + *** - + **** @@ -2749,7 +2749,7 @@ - # # @@ -2762,7 +2762,7 @@ - # # @@ -2776,18 +2776,18 @@ - + - : + : - :  + @@ -2811,7 +2811,7 @@ - + @@ -2823,7 +2823,7 @@ - + @@ -2835,7 +2835,7 @@ - + @@ -2847,7 +2847,7 @@ - + @@ -2859,7 +2859,7 @@ - + @@ -2871,7 +2871,7 @@ - + @@ -2905,10 +2905,10 @@ - + - + @@ -2955,7 +2955,7 @@ - + : @@ -3059,7 +3059,7 @@ - + @@ -3080,7 +3080,7 @@ - + @@ -3092,7 +3092,7 @@ - + @@ -3104,7 +3104,7 @@ - + @@ -3116,7 +3116,7 @@ - + @@ -3239,13 +3239,13 @@ - + - + - + @@ -3322,7 +3322,7 @@   - + @@ -3369,7 +3369,7 @@ : - + @@ -3382,7 +3382,7 @@ - + [= + select="'Type'"/>= + select="'discrete'"/> + select="'continuous'"/> ] @@ -3404,7 +3404,7 @@ [== - @@ -3419,13 +3419,13 @@ [= = - ] [ - + =* / @@ -3437,7 +3437,7 @@ @@ -3452,12 +3452,12 @@ - + [ + select="'Abbrev_NotWeighted'"/> / - + ] @@ -3470,18 +3470,18 @@ - + select="'Invalid'"/> - + - @@ -3517,7 +3517,7 @@ - + - + - + - + - + - + @@ -3638,7 +3638,7 @@ - + + select="'Recoding_and_Derivation'"/> - + - + - + - - + + - @@ -3798,7 +3798,7 @@ padding="{$cell-padding}" text-align="center"> + select="'Cases_Abbreviation'"/> @@ -3806,7 +3806,7 @@ padding="{$cell-padding}" text-align="center"> + select="'Weighted'"/> @@ -3814,9 +3814,9 @@ padding="{$cell-padding}" text-align="center"> + select="'Percentage'"/> () + select="'Weighted'"/>) @@ -3950,16 +3950,16 @@ + select="'SumStat_Warning'"/> - + ( - ) + ) @@ -4003,7 +4003,7 @@ - *** + *** *** @@ -4055,7 +4055,7 @@ - + @@ -4070,7 +4070,7 @@ - + @@ -4085,7 +4085,7 @@ - + @@ -4100,7 +4100,7 @@ - + @@ -4162,38 +4162,38 @@ - + - + - + - + - + - + - + @@ -4257,40 +4257,40 @@ - + - + - + - + - + - + - + - + - + - + - + - + diff --git a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/i18n.inc.xslt b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/i18n.inc.xslt deleted file mode 100644 index edf876f3b04..00000000000 --- a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/i18n.inc.xslt +++ /dev/null @@ -1,5 +0,0 @@ - - - - - diff --git a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_en.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_en.properties.xml deleted file mode 100644 index d8e98dfd3c6..00000000000 --- a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_en.properties.xml +++ /dev/null @@ -1,174 +0,0 @@ - - -Generated by Properties2Xml on Fri Apr 11 09:45:39 EDT 2008 -Valid -Frequency table not shown -Derivation -discrete -Data Collection Mode -Other Processing -Other Acknowledgment(s) -Untitled -Identification -Click here to access/export data files from Nesstar format -Value -Percentage -Weighting -Primary Investigator(s) -This document was generated using the -Sampling -Cases -Access Conditions -Source -Modalities -Rights & Disclaimer -Definition -Estimates of Sampling Error -Data Files -Geographic Coverage -April -Mean -Metadata Production -Data Collection -Missing Data -Scripts and programs -Variable(s) -Interviewer instructions -Funding Agency/ies -November -Missing -Version -Universe -Contributor(s) -Access Authority -Data Processing & Appraisal -Scope -Administrative documents -StdDev -Contact(s) -Label -Technical documents -Decimal(s) -Type -Literal question -Concepts -Range -Abstract -June -Supervision -Other Forms of Data Appraisal -References -Accessibility -Data Collection Dates -Data Editing -Questionnaires -Valid case(s) -Reports and analytical documents -Copyright -Documentation -Deviations from Sample Design -Publisher(s) -February -Dataset contains -Acknowledgment(s) -Continuous -Standard deviation -Variables Description -Producer -Production Date - -The Explorer allows you to view data files and export them to common statistical formats -Discrete -Group -July -Filename -Cases -Name -Warning: these figures indicate the number of cases found in the data file. They cannot be interpreted as summary statistics of the population of interest. -Statistical tables -December -Subjects -Processing Checks -software -Interviewer's instructions -Table of Contents -Document Information -Subgroup(s) -Keywords -group(s) -W -Weight -Files Description -Notes -Data Collection Notes -file(s) -continuous -Disclaimer -Content -variable(s) -Other Producer(s) -Producers & Sponsors -Data Cleaning Notes -Distributor(s) -Overview -Citation Requirements -September -Category -Confidentiality -Statistics -May -Undetermined -Structure -file -Pre-question -Response Rate -Width -Recoding and Derivation -Series -October -Unit of Analysis -Data Processing Notes -Kind of Data -File -Time Period(s) -File Content -Invalid -Vars -cont. -Key(s) -Question -Source of information -Imputation -Security -To open this file, you will need the free -Other resources -Data Dictionnary -Information -January -Other documents -Minimum -Scope & Coverage -Metadata Producer(s) -Show more info -Data Collector(s) -Post-question -Topics -Sampling Procedure -File Structure -Variables List -Format -Sampling Notes -Variables Group(s) -Description -Categories -Maximum -Depositor(s) -August -NW -Cover Page -Weighted -March - total - showing a subset of -Countries -question details - diff --git a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_es.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_es.properties.xml deleted file mode 100644 index 9cfcdaf6e7e..00000000000 --- a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_es.properties.xml +++ /dev/null @@ -1,170 +0,0 @@ - - -Generated by Properties2Xml on Fri Apr 11 09:45:40 EDT 2008 -Válido -No se presentan las tablas de frecuencias -Derivación -discreta -Método de Recolección -Otros relacionados al procesamiento -Otros Reconocimientos -Sin título -Identificación -Presione aquí para acceder/exportar al archivo(s) de datos -Valor -Porcentaje -Ponderando -Investigadores Principales -Este documento fue producido utilizando el -Muestreo -Casos -Condiciones de uso -Fuente -Modalidades -Derechos y Notas Legales -Definición -Estimaciones del Error Muestral -Archivo de Datos -Cobertura Geográfica -Abril -Media -Producción de los Metadatos -Recolección de Datos -Datos perdidos -Programas informáticos -Variable(s) -Manual del encuestador -Agencias Auspiciadoras -Noviembre -Valores perdidos -Versión -Universo -Contribuidor(es) -Institución Propietaria -Tratamiento y Validación de Datos -Dominio Temático -Documentos Administrativos -Desviación -Contacto(s) -Etiqueta -Documentos Técnicos -Decimal(es) -Tipo -Pregunta textual -Conceptos -Rango -Resumen -Junio -Supervisión -Otras Formas de Validación de los Datos -Referencias -Accesibilidad -Fechas de Recolección de Datos -Procesamiento de Datos -Cuestionarios -Casos válidos -Reportes y documentos analíticos -Derechos de Autor -Documentación -Modificaciones al Diseño Muestral -Editor(es) -Febrero -Contenido de la Base de Datos -Reconocimiento(s) -Contínua -Desviación estándar -Descripción de la variable -Productor -Fecha de Producción -El Explorador NESSTAR permite visualizar los archivos de datos y exportarlos a diferentes formatos estadísticos -Discreta -Grupo -Julio -Nombre del Archivo -Casos -Nombre -Cuadros estadísticos -Diciembre -Temas -Controles de Tratamiento -software -Manual del encuestador -Indice -Información acerca de la Documentación -Subgrupo(s) -Palabra Clave -grupo(s) -P -Ponderador -Descripción de los Archivos -Notas -Notas sobre la Recolección de Datos -archivo(s) -continua -Nota Legal -Contenido -variable(s) -Otros Productores -Productores y Auspiciadores -Notas acerca de la Depuración de los Datos -Distribuidor(es) -Resumen General -Forma de citar -Septiembre -Categoría -Confidencialidad -Estadística - -Mayo -Indeterminado -Estructura -archivo -Pre-pregunta -Tasa de Respuesta -Ancho -Recodificación y Derivación -Series -Octubre -Unidad de Análisis -Notas sobre el Procesamiento de Datos -Tipo de Datos -Archivo -Periodo de Referencia -Contenido del Archivo -Inválido -Vars. -cont. -Clave(s) -Pregunta -Fuente de información -Imputación -Seguridad -Para abrir este archivo se necesita el software gratuito -Otros recursos -Diccionario de Datos -Información -Enero -Otros documentos -Mínimo -Cobertura y Dominio Temático -Productor de los Metadatos -Mostrar más información -Entrevistador(es) -Pos-pregunta -Temas -Procedimiento de Muestreo -Estructura del Archivo -Lista de variables -Formato -Notas sobre el Muestreo -Grupo(s) de Variables -Descripción -Categorías -Máximo -Depositante(s) -Agosto -NP -Carátula -Ponderado -Marzo - diff --git a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_fr.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_fr.properties.xml deleted file mode 100644 index 9fa4d2178b1..00000000000 --- a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_fr.properties.xml +++ /dev/null @@ -1,173 +0,0 @@ - - -Generated by Properties2Xml on Fri Apr 11 09:45:40 EDT 2008 -Valide -Tableau de fréquences non-affiché -Mode de calcul -discrète -Méthode de collecte -Autre traitement -Autre(s) remerciement(s) -Sans titre -Identification -Cliquer ici pour accéder/exporter les fichiers de données du format Nesstar -Valeur -Pourcentage -Pondération -Enquêteur(s) principal/aux -Ce document a été généré à l'aide du -Echantillonage -Enregistrements -Conditions d'accès -Source -Modalités -Responsabilité et droits d'auteurs -Définition -Estimation des erreurs d'échantillonage -Fichiers de données -Couverture géographique -Avril -Moyenne -Production des méta-données -Collecte des données -Valeures manquantes -Programmes informatiques -Variable(s) -Instructions aux enquêteurs -Financement -Novembre -Manquant -Version -Univers -Contributeur(s) -Agence propriétaire -Traitement et évaluation des données -Domaines thématiques -Documents administratifs -Ecart type -Contact(s) -Libellé -Documents techniques -Décimale(s) -Type -Formulation de la question -Concepts -Gamme -Résumé -Juin -Supervision -Autres formes d'évaluation des données -Références -Accessibilité -Dates de collecte -Edition des données -Questionnaires -Cas valide(s) -Rapports et documents analytiques -Droits d'auteurs -Documentation -Déviations par rapport à l'échantillon initial -Editeur(s) -Février -Le jeu de données contient -Remerciement(s) -Continue -Ecart type -Description des variables -Producteur -Date de production - -L'Explorer vous permet d'accéder aux données et de les exporter vers les formats statistiques les plus courants -Discrète -Groupe -Juillet -Nom du fichier -Enreg. -Nom -Avertissement: Ces chiffres indiquent le nombre de cas identifiés dans le fichier de données. Ils ne peuvent pas être interpretés comme étant représentatifs de la population concernée. -Tableaux statistiques -Décembre -Sujets -Contrôles de traitement - -Instructions aux enquêteurs -Table des matières -Informations sur le document -Sous-groupe(s) -Mots-clé -groupe(s) -P -Pondération -Description des fichiers -Notes -Notes sur la collecte -fichier(s) -continue -Responsabilité(s) -Contenu -variable(s) -Autre(s) producteur(s) -Producteurs et sponsors -Notes sur l'apurement des données -Distributeur(s) -Aperçu -Citation -Septembre -Catégorie -Confidentialité -Statistiques -Mai -Indéterminé -Structure -fichier -Pré-question -Taux de réponse -Taille -Formulation de la question -Recodage et dérivation -Série -Octobre -Unité d'analyse -Notes sur le traitement des données -Type d'étude -Fichier -Période(s) de référence -Contenu du fichier -Non-valide -Vars -suite -Clé(s) -Question -Source d'information -Imputation -Sécurité -Pour ouvrir ce fichier, vous avez besoin du logiciel gratuit -Autres resources -Dictionnaire des variables -Information -Janvier -Autres documents -Minimum -Domaines thématiques et couverture -Producteur(s) des méta-données -Information complémentaire -Enquêteurs -Post-question -Thèmes -Procédure d'échantillonage -Structure du fichier -Liste des variables -Format -Notes sur l'échantillonage -Groupe(s) de variables -Description -Catégories -Maximum -Dépositaire(s) -Août -NP -Couverture -Pondéré -Mars -question details - diff --git a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_ja.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_ja.properties.xml deleted file mode 100644 index bc5dbb06154..00000000000 --- a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_ja.properties.xml +++ /dev/null @@ -1,161 +0,0 @@ - - -Generated by Properties2Xml on Tue Feb 13 13:55:43 EST 2007 -有効な -度数表(Frequency table)は表示されません -由来 -不連続な -データ収集モード -その他の確認事項 -識別番号 -データファイルにアクセスするにはここをクリックしてください -無題 - -割合 -ウェイティング -第一次調査官 -この文書はToolkitを使用して作られました -サンプリング -ケース -アクセス条件 -情報源 -様相 -権利及び声明文 -定義 -サンプルエラーの見積もり -データファイル -地理的な適用範囲 -4月 -平均 -メタデータ製作 -データ収集 -損失データ -スクリプトおよびプログラム -可変的 -面接者の指示 -出資機関 -11月 -バージョン -共通の -貢献者 -アクセス権限 -データ処理、評価 -範囲, 領域 -管理用文章 -連絡先 -ラベル -技術的な文書 -小数点 -タイプ -文字の質問 -概念 -範囲 -要約 -6月 -監督 -その他ファーマットのデータ評価 -参照 -アクセス、入手法 -データ収集日 -データ編集 -質問 -レポートおよび分析的な文書 -有効な場合 -コピーライト -書類 -サンプルデザインによる偏差 -発行者 -2月 -データセットに含まれる -確認事項 -連続的な -標準偏差 -変数の記述 -製作者 -製作日 -” Explorer”によってデータファイルを参照することも一般的に使えわれている統計データフォーマットに変換。抽出することも可能です -不連続性 -グループ -7月 -ファイルの名前 -ケース -名前 -統計表 -12月 -主題, 内容 -工程監査 -ソフト -面接者への指示 -目録 -書類の情報 -サブグループ -キーワード - -グループ -ウェイト -ファイルの詳細 -メモ -データ収集メモ -ファイル -継続的な -声明文 -内容 -変数 -その他の製作者 -製作者とスポンサー -データクリーニングメモ -分配者 -概略 -引用する場合の必要条件 -9月 -カテゴリー -機密性、コンフィデンシャリティー -5月 -未定 -構造 -ファイル -調査前の質問 -回答比率 - -記録と誘導 -シリー -10月 -分析の単位 -データ処理メモ -データの種類 - -ファイル -期間 -ファイルの内容 -無効 -キー -情報源 -非難 -セキュリティー -このファイルを開けるには、無料で配布されているNesstar Explorer が必要です。 -その他の資料 -データ辞典 -情報 -1月 -その他の書類 -最小値 -規模及び適用範囲 -メタデータ製作者 -さらにインフォメーションを表示 -データ収集者 -調査後の質問 -サンプリングの手順 -ファイルの構造 -変数のリスト -フォーマット -サンプリングメモ -変数のグループ -詳細 -カテゴリー -最大値 -デポジター、提供者、供託者 -8月 -表紙 -ウェイトされた -3月 - diff --git a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_nn.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_nn.properties.xml deleted file mode 100644 index fdf14f5dfcd..00000000000 --- a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_nn.properties.xml +++ /dev/null @@ -1,174 +0,0 @@ - - -Generated by Properties2Xml on Fri Apr 11 09:45:39 EDT 2008 -Gyldige -Frekvenstabell ikke vist -Avledning -diskret -Type datainnsamling -Annen prosessering -Andre identifikatorer og krediteringer -Uten tittel -Identifisering -Click here to access/export data files from Nesstar format -Verdi -Prosent -Vekting -Primary Investigator(s) -Dette dokumentet ble generert av -Utvalg -Enheter -Tilgangsbetingelser -Kilde -Modaliteter -Rights & Disclaimer -Definisjon -Estimert utvalgsfeil -Datafiler -Geografisk omfang -April -Mean -Metadata-produksjon -Datainnsamling -Manglende data -Script og programmer -Variable(r) -Instruksjoner til intervjueren -Sponsor/finansierende institusjon(er) -November -Missing -Versjon -Univers -Bidragsyter(e) -Tilgangskontrollør -Dataprosessering og -evaluering -Omfang -Administrative dokumenter -Standardavvik -Kontaktperson(er) -Merkelapp -Tekniske dokumenter -Desimal(er) -Type -Spørsmålstekst -Begrep(er) -Rekkevidde -Sammendrag -Juni -Supervision -Andre former for dataevaluering -Referanser -Tilgjengelighet -Datainnsamlingsdatoer -Dataredigering -Spørreskjema -Gyldige enheter -Rapporter og analysedokumenter -Copyright -Dokumentasjon -Avvik fra utvalgsdesign -Utgiver(e) -Februar -Datasettet inneholder -Krediteringer -Kontinuerlig -Standardavvik -Variabelbeskrivelse -Produsent -Produksjonsdato - -The Explorer allows you to view data files and export them to common statistical formats -Diskret -Gruppe -Juli -Filnavn -Enheter -Navn -Advarsel: disse tallene indikerer antall enheter (cases) i datafilen. De kan ikke tolkes som oppsummert statistikk for populasjonen. -Statistiske tabeller -Desember -Emner -Prosesseringssjekk -programvare -Instruksjoner til intervjueren -Innholdsfortegnelse -Dokumentinformasjon -Undergruppe(r) -Nøkkelord -gruppe(r) -W -Vekt -Filbeskrivelse -Kommentarer -Datainnsamlingskommentarer -file(r) -kontinuerlig -Fraskrivelse -Innhold -variable(r) -Andre produsenter -Produsenter og sponsorer -Kommentarer om datarensing -Distributør(er) -Oversikt -Sitatkrav -September -Kategori -Konfidensialitet -Statistikk -Mai -Uavklart -Struktur -fil -Tekst før spørsmål -Responsrate -Bredde -Omkodinger og utledninger -Serie -Oktober -Analyseenhet -Dataprosesseringskommentarer -Datatype -Fil -Tidsperiode(r) -Filinnhold -Ugyldig -Variabler -kont. -Nøkler -Spørsmål -Kilde for informasjon -Imputasjon -Sikkerhet -For å åpne denne filen trenger du følgende gratisverktøy -Andre ressurser -Dataordbok -Informasjon -Januar -Andre dokumenter -Minimum -Omfang -Metadataprodusenter -Vis mer informasjon -Datainnsamler(e) -Tekst etter spørsmål -Emner -Utvalgsprosedyre -Filstruktur -Variabelliste -Format -Utvalgskommentarer -Variabelgrupper -Beskrivelse -Kategorier -Maksimum -Utgiver(e) -August -NW -Forside -Vektet -Mars - total - viser et utvalg av -Land -spørsmålsdetaljer - diff --git a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_no.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_no.properties.xml deleted file mode 100644 index fdf14f5dfcd..00000000000 --- a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_no.properties.xml +++ /dev/null @@ -1,174 +0,0 @@ - - -Generated by Properties2Xml on Fri Apr 11 09:45:39 EDT 2008 -Gyldige -Frekvenstabell ikke vist -Avledning -diskret -Type datainnsamling -Annen prosessering -Andre identifikatorer og krediteringer -Uten tittel -Identifisering -Click here to access/export data files from Nesstar format -Verdi -Prosent -Vekting -Primary Investigator(s) -Dette dokumentet ble generert av -Utvalg -Enheter -Tilgangsbetingelser -Kilde -Modaliteter -Rights & Disclaimer -Definisjon -Estimert utvalgsfeil -Datafiler -Geografisk omfang -April -Mean -Metadata-produksjon -Datainnsamling -Manglende data -Script og programmer -Variable(r) -Instruksjoner til intervjueren -Sponsor/finansierende institusjon(er) -November -Missing -Versjon -Univers -Bidragsyter(e) -Tilgangskontrollør -Dataprosessering og -evaluering -Omfang -Administrative dokumenter -Standardavvik -Kontaktperson(er) -Merkelapp -Tekniske dokumenter -Desimal(er) -Type -Spørsmålstekst -Begrep(er) -Rekkevidde -Sammendrag -Juni -Supervision -Andre former for dataevaluering -Referanser -Tilgjengelighet -Datainnsamlingsdatoer -Dataredigering -Spørreskjema -Gyldige enheter -Rapporter og analysedokumenter -Copyright -Dokumentasjon -Avvik fra utvalgsdesign -Utgiver(e) -Februar -Datasettet inneholder -Krediteringer -Kontinuerlig -Standardavvik -Variabelbeskrivelse -Produsent -Produksjonsdato - -The Explorer allows you to view data files and export them to common statistical formats -Diskret -Gruppe -Juli -Filnavn -Enheter -Navn -Advarsel: disse tallene indikerer antall enheter (cases) i datafilen. De kan ikke tolkes som oppsummert statistikk for populasjonen. -Statistiske tabeller -Desember -Emner -Prosesseringssjekk -programvare -Instruksjoner til intervjueren -Innholdsfortegnelse -Dokumentinformasjon -Undergruppe(r) -Nøkkelord -gruppe(r) -W -Vekt -Filbeskrivelse -Kommentarer -Datainnsamlingskommentarer -file(r) -kontinuerlig -Fraskrivelse -Innhold -variable(r) -Andre produsenter -Produsenter og sponsorer -Kommentarer om datarensing -Distributør(er) -Oversikt -Sitatkrav -September -Kategori -Konfidensialitet -Statistikk -Mai -Uavklart -Struktur -fil -Tekst før spørsmål -Responsrate -Bredde -Omkodinger og utledninger -Serie -Oktober -Analyseenhet -Dataprosesseringskommentarer -Datatype -Fil -Tidsperiode(r) -Filinnhold -Ugyldig -Variabler -kont. -Nøkler -Spørsmål -Kilde for informasjon -Imputasjon -Sikkerhet -For å åpne denne filen trenger du følgende gratisverktøy -Andre ressurser -Dataordbok -Informasjon -Januar -Andre dokumenter -Minimum -Omfang -Metadataprodusenter -Vis mer informasjon -Datainnsamler(e) -Tekst etter spørsmål -Emner -Utvalgsprosedyre -Filstruktur -Variabelliste -Format -Utvalgskommentarer -Variabelgrupper -Beskrivelse -Kategorier -Maksimum -Utgiver(e) -August -NW -Forside -Vektet -Mars - total - viser et utvalg av -Land -spørsmålsdetaljer - diff --git a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_ru.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_ru.properties.xml deleted file mode 100644 index 06fde85af5e..00000000000 --- a/src/main/resources/edu/harvard/iq/dataverse/from-ddi-2.5/ddi-pdf/messages_ru.properties.xml +++ /dev/null @@ -1,169 +0,0 @@ - - -Generated by Properties2Xml on Fri Apr 11 09:45:40 EDT 2008 -Валидный -Частотная таблица не выводится -Расчет -дискретная -Способ сбора данных -Прочая обработка -Другие участники -Безымянный -Индентификация -Щелкните здесь, чтобы получить доступ к файлам или экспортировать их -Значение -Процент -Взвешивание -Первичный(е) исследователь(и) -Документ был сгенерирован с использованием -Выборка -Наблюдения -Условия доступа -Источник -Модальности -Авторские права и ограничения ответственности -Определение -Оценки ошибок выборки -Файлы данных -Географический охват -Апрель -Среднее -Разработка метаданных -Сбор данных -Пропущенные данные -Скрипты и программы -Переменная(ые) -Инструкции интервьюеру -Кто финансировал -Ноябрь -Пропущенные -Версия -Генеральная совокупность -Участник(и) -Права доступа -Обработка и инспекция данных -Охват -Административные документы -СтдОткл -Контак(ы) -Метка -Технические документы -Десятичные -Тип -Текст вопроса -Концепции -Диапазон -Резюме -Июнь -Контроль -Другие формы инспекции данных -Установки -Доступность -Даты сбора данных -Редактирование данных -Вопросники -Валидное(ые) наблюдение(я) -Отчеты и аналитические документы -Авторские права -Документация -Отклонения от дизайна выборки -Издатель(и) -Февраль -Набор данных содержит -Участник(и) -Непрерывная -Стандартное отклонение -Описание переменных -Разработчик -Дата разработки -Проводник позволяет просматривать файлы данных и экспортировать их в распространенные статистические форматы -Дикретная -Группа -Июль -Имя файла -Наблюдения -Имя -Статистичсекие таблицы -Декабрь -Темы -Контроль обработки -программное обеспечение -Инструкции интервьюеру -Оглавление -Информация о документе -Подгруппа(ы) -Ключевые слова -группа(ы) -B -Вес -Описание файла -Примечания -Примечания по сбору данных -файл(ы) -непрерывная -Ограничения ответственности -Содержание -переменная(ые) -Другие разработчики -Разработчики и спонсоры -Примечания по чистке данных -Дистрибьютор(ы) -Обзор -Требования по цитированию -Сентябрь -Категория -Конфиденциальность -Статистики -Май -Неопределенный -Структура -файл -Текст, предваряющий вопрос -Доля ответов -Ширина -Перекодировка и расчеты -Серия -Октябрь -Единица анализа -Примечания по обработке данных -Тип данных -Файл -Период(ы) времени -Содержание файла -Некорректный -Переменные -непр. -Ключ(и) -Вопрос -Источник информации -Импутация -Безопасность -Чтобы открыть этот файл, необходимо иметь свободным -Прочие источники -Словарь данных -Информация -Январь -Прочие документы -Минимум -Охват и покрытие -Разработчик(и) метаданных -Показать дополнительную информацию -Кто собирал данные -Текст после вопроса -Разделы -Процедура выборки -Структура файла -Список переменных -Формат -Примечания по выборке -Группа(ы) переменных -Описание -Категории -Максимум -Депозитор(ы) -Август -HB -Титульный лист -взвешенные -Март - From 685bc8be429e6f65765e66668de372ee2c8bb2b5 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Fri, 1 Sep 2023 14:09:06 -0400 Subject: [PATCH 07/85] xslt --- .../edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index 7739408e408..9076680754e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -2123,13 +2123,7 @@ private static boolean checkParentElement(XMLStreamWriter xmlw, String elementNa public static void datasetPdfDDI(InputStream datafile, OutputStream outputStream) throws XMLStreamException { try { - //File xsltfile = new File("/home/victoria/fop-2.8/fop/ddi-to-fo.xsl"); - //URL resource = DdiExportUtil.class.getResource("edu/harvard/iq/dataverse/ddi-to-fo.xsl"); - //File xsltfile = new File(resource.toURI()); InputStream styleSheetInput = DdiExportUtil.class.getClassLoader().getResourceAsStream("edu/harvard/iq/dataverse/ddi-to-fo.xsl"); - logger.info("start datasetPdfDDI"); - //InputStream xsltfile = DdiExportUtil.class.getClassLoader().getResourceAsStream( - // "edu/harvard/iq/dataverse/from-ddi-2.5/ddi-to-fo.xsl"); final FopFactory fopFactory = FopFactory.newInstance(new File(".").toURI()); FOUserAgent foUserAgent = fopFactory.newFOUserAgent(); @@ -2153,7 +2147,6 @@ public static void datasetPdfDDI(InputStream datafile, OutputStream outputStream transformer.transform(src, res); } catch (Exception e) { - logger.info("First try"); logger.severe(e.getMessage()); } } catch (Exception e) { From 12c463a8fa41c44ee3a94926299ceffaa7d6a114 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Tue, 5 Sep 2023 12:19:17 -0400 Subject: [PATCH 08/85] inc --- .../dataverse/export/PdfCodeBookExporter.java | 36 +- .../dataverse/export/ddi/DdiExportUtil.java | 7 +- .../harvard/iq/dataverse/ddi-pdf/i18n.inc.xsl | 5 + .../ddi-pdf/messages_en.properties.xml | 174 +++++++ .../ddi-pdf/messages_es.properties.xml | 170 +++++++ .../ddi-pdf/messages_fr.properties.xml | 173 +++++++ .../ddi-pdf/messages_ja.properties.xml | 161 +++++++ .../ddi-pdf/messages_nn.properties.xml | 174 +++++++ .../ddi-pdf/messages_no.properties.xml | 174 +++++++ .../ddi-pdf/messages_ru.properties.xml | 169 +++++++ .../edu/harvard/iq/dataverse/ddi-to-fo.xsl | 448 +++++++++--------- 11 files changed, 1446 insertions(+), 245 deletions(-) create mode 100644 src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/i18n.inc.xsl create mode 100644 src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_en.properties.xml create mode 100644 src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_es.properties.xml create mode 100644 src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_fr.properties.xml create mode 100644 src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_ja.properties.xml create mode 100644 src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_nn.properties.xml create mode 100644 src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_no.properties.xml create mode 100644 src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_ru.properties.xml diff --git a/src/main/java/edu/harvard/iq/dataverse/export/PdfCodeBookExporter.java b/src/main/java/edu/harvard/iq/dataverse/export/PdfCodeBookExporter.java index e0d5171e30c..4772c09ffd5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/PdfCodeBookExporter.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/PdfCodeBookExporter.java @@ -3,25 +3,23 @@ package edu.harvard.iq.dataverse.export; import com.google.auto.service.AutoService; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetVersion; - import edu.harvard.iq.dataverse.export.ddi.DdiExportUtil; - import io.gdcc.spi.export.ExportDataProvider; - import io.gdcc.spi.export.ExportException; - import io.gdcc.spi.export.Exporter; - import edu.harvard.iq.dataverse.util.BundleUtil; - - import javax.json.JsonObject; - import javax.ws.rs.core.MediaType; - import javax.xml.stream.XMLStreamException; - import java.io.File; - import java.io.IOException; - import java.io.InputStream; - import java.io.OutputStream; - import java.nio.file.Path; - import java.nio.file.Paths; - import java.util.Locale; - import java.util.Optional; + +import edu.harvard.iq.dataverse.export.ddi.DdiExportUtil; +import io.gdcc.spi.export.ExportDataProvider; +import io.gdcc.spi.export.ExportException; +import io.gdcc.spi.export.Exporter; +import edu.harvard.iq.dataverse.util.BundleUtil; +import jakarta.ws.rs.core.MediaType; + +import javax.xml.stream.XMLStreamException; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Locale; +import java.util.Optional; @AutoService(Exporter.class) public class PdfCodeBookExporter implements Exporter { diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index 9ed89a6fe94..aab728c7773 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -76,6 +76,7 @@ import org.apache.fop.apps.Fop; import org.apache.fop.apps.FopFactory; import org.apache.fop.apps.MimeConstants; +import java.io.File; public class DdiExportUtil { @@ -2108,6 +2109,7 @@ private static boolean checkParentElement(XMLStreamWriter xmlw, String elementNa public static void datasetPdfDDI(InputStream datafile, OutputStream outputStream) throws XMLStreamException { try { + String sysId = DdiExportUtil.class.getClassLoader().getResource("edu/harvard/iq/dataverse/ddi-to-fo.xsl").toURI().toString(); InputStream styleSheetInput = DdiExportUtil.class.getClassLoader().getResourceAsStream("edu/harvard/iq/dataverse/ddi-to-fo.xsl"); final FopFactory fopFactory = FopFactory.newInstance(new File(".").toURI()); @@ -2117,7 +2119,9 @@ public static void datasetPdfDDI(InputStream datafile, OutputStream outputStream Fop fop = fopFactory.newFop(MimeConstants.MIME_PDF, foUserAgent, outputStream); // Setup XSLT TransformerFactory factory = TransformerFactory.newInstance(); - Transformer transformer = factory.newTransformer(new StreamSource(styleSheetInput)); + Source mySrc = new StreamSource(styleSheetInput); + mySrc.setSystemId(sysId); + Transformer transformer = factory.newTransformer(mySrc); // Set the value of a in the stylesheet transformer.setParameter("versionParam", "2.0"); @@ -2135,7 +2139,6 @@ public static void datasetPdfDDI(InputStream datafile, OutputStream outputStream logger.severe(e.getMessage()); } } catch (Exception e) { - logger.info("Second try"); logger.severe(e.getMessage()); } } diff --git a/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/i18n.inc.xsl b/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/i18n.inc.xsl new file mode 100644 index 00000000000..edf876f3b04 --- /dev/null +++ b/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/i18n.inc.xsl @@ -0,0 +1,5 @@ + + + + + diff --git a/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_en.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_en.properties.xml new file mode 100644 index 00000000000..d8e98dfd3c6 --- /dev/null +++ b/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_en.properties.xml @@ -0,0 +1,174 @@ + + +Generated by Properties2Xml on Fri Apr 11 09:45:39 EDT 2008 +Valid +Frequency table not shown +Derivation +discrete +Data Collection Mode +Other Processing +Other Acknowledgment(s) +Untitled +Identification +Click here to access/export data files from Nesstar format +Value +Percentage +Weighting +Primary Investigator(s) +This document was generated using the +Sampling +Cases +Access Conditions +Source +Modalities +Rights & Disclaimer +Definition +Estimates of Sampling Error +Data Files +Geographic Coverage +April +Mean +Metadata Production +Data Collection +Missing Data +Scripts and programs +Variable(s) +Interviewer instructions +Funding Agency/ies +November +Missing +Version +Universe +Contributor(s) +Access Authority +Data Processing & Appraisal +Scope +Administrative documents +StdDev +Contact(s) +Label +Technical documents +Decimal(s) +Type +Literal question +Concepts +Range +Abstract +June +Supervision +Other Forms of Data Appraisal +References +Accessibility +Data Collection Dates +Data Editing +Questionnaires +Valid case(s) +Reports and analytical documents +Copyright +Documentation +Deviations from Sample Design +Publisher(s) +February +Dataset contains +Acknowledgment(s) +Continuous +Standard deviation +Variables Description +Producer +Production Date + +The Explorer allows you to view data files and export them to common statistical formats +Discrete +Group +July +Filename +Cases +Name +Warning: these figures indicate the number of cases found in the data file. They cannot be interpreted as summary statistics of the population of interest. +Statistical tables +December +Subjects +Processing Checks +software +Interviewer's instructions +Table of Contents +Document Information +Subgroup(s) +Keywords +group(s) +W +Weight +Files Description +Notes +Data Collection Notes +file(s) +continuous +Disclaimer +Content +variable(s) +Other Producer(s) +Producers & Sponsors +Data Cleaning Notes +Distributor(s) +Overview +Citation Requirements +September +Category +Confidentiality +Statistics +May +Undetermined +Structure +file +Pre-question +Response Rate +Width +Recoding and Derivation +Series +October +Unit of Analysis +Data Processing Notes +Kind of Data +File +Time Period(s) +File Content +Invalid +Vars +cont. +Key(s) +Question +Source of information +Imputation +Security +To open this file, you will need the free +Other resources +Data Dictionnary +Information +January +Other documents +Minimum +Scope & Coverage +Metadata Producer(s) +Show more info +Data Collector(s) +Post-question +Topics +Sampling Procedure +File Structure +Variables List +Format +Sampling Notes +Variables Group(s) +Description +Categories +Maximum +Depositor(s) +August +NW +Cover Page +Weighted +March + total - showing a subset of +Countries +question details + diff --git a/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_es.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_es.properties.xml new file mode 100644 index 00000000000..9cfcdaf6e7e --- /dev/null +++ b/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_es.properties.xml @@ -0,0 +1,170 @@ + + +Generated by Properties2Xml on Fri Apr 11 09:45:40 EDT 2008 +Válido +No se presentan las tablas de frecuencias +Derivación +discreta +Método de Recolección +Otros relacionados al procesamiento +Otros Reconocimientos +Sin título +Identificación +Presione aquí para acceder/exportar al archivo(s) de datos +Valor +Porcentaje +Ponderando +Investigadores Principales +Este documento fue producido utilizando el +Muestreo +Casos +Condiciones de uso +Fuente +Modalidades +Derechos y Notas Legales +Definición +Estimaciones del Error Muestral +Archivo de Datos +Cobertura Geográfica +Abril +Media +Producción de los Metadatos +Recolección de Datos +Datos perdidos +Programas informáticos +Variable(s) +Manual del encuestador +Agencias Auspiciadoras +Noviembre +Valores perdidos +Versión +Universo +Contribuidor(es) +Institución Propietaria +Tratamiento y Validación de Datos +Dominio Temático +Documentos Administrativos +Desviación +Contacto(s) +Etiqueta +Documentos Técnicos +Decimal(es) +Tipo +Pregunta textual +Conceptos +Rango +Resumen +Junio +Supervisión +Otras Formas de Validación de los Datos +Referencias +Accesibilidad +Fechas de Recolección de Datos +Procesamiento de Datos +Cuestionarios +Casos válidos +Reportes y documentos analíticos +Derechos de Autor +Documentación +Modificaciones al Diseño Muestral +Editor(es) +Febrero +Contenido de la Base de Datos +Reconocimiento(s) +Contínua +Desviación estándar +Descripción de la variable +Productor +Fecha de Producción +El Explorador NESSTAR permite visualizar los archivos de datos y exportarlos a diferentes formatos estadísticos +Discreta +Grupo +Julio +Nombre del Archivo +Casos +Nombre +Cuadros estadísticos +Diciembre +Temas +Controles de Tratamiento +software +Manual del encuestador +Indice +Información acerca de la Documentación +Subgrupo(s) +Palabra Clave +grupo(s) +P +Ponderador +Descripción de los Archivos +Notas +Notas sobre la Recolección de Datos +archivo(s) +continua +Nota Legal +Contenido +variable(s) +Otros Productores +Productores y Auspiciadores +Notas acerca de la Depuración de los Datos +Distribuidor(es) +Resumen General +Forma de citar +Septiembre +Categoría +Confidencialidad +Estadística + +Mayo +Indeterminado +Estructura +archivo +Pre-pregunta +Tasa de Respuesta +Ancho +Recodificación y Derivación +Series +Octubre +Unidad de Análisis +Notas sobre el Procesamiento de Datos +Tipo de Datos +Archivo +Periodo de Referencia +Contenido del Archivo +Inválido +Vars. +cont. +Clave(s) +Pregunta +Fuente de información +Imputación +Seguridad +Para abrir este archivo se necesita el software gratuito +Otros recursos +Diccionario de Datos +Información +Enero +Otros documentos +Mínimo +Cobertura y Dominio Temático +Productor de los Metadatos +Mostrar más información +Entrevistador(es) +Pos-pregunta +Temas +Procedimiento de Muestreo +Estructura del Archivo +Lista de variables +Formato +Notas sobre el Muestreo +Grupo(s) de Variables +Descripción +Categorías +Máximo +Depositante(s) +Agosto +NP +Carátula +Ponderado +Marzo + diff --git a/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_fr.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_fr.properties.xml new file mode 100644 index 00000000000..9fa4d2178b1 --- /dev/null +++ b/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_fr.properties.xml @@ -0,0 +1,173 @@ + + +Generated by Properties2Xml on Fri Apr 11 09:45:40 EDT 2008 +Valide +Tableau de fréquences non-affiché +Mode de calcul +discrète +Méthode de collecte +Autre traitement +Autre(s) remerciement(s) +Sans titre +Identification +Cliquer ici pour accéder/exporter les fichiers de données du format Nesstar +Valeur +Pourcentage +Pondération +Enquêteur(s) principal/aux +Ce document a été généré à l'aide du +Echantillonage +Enregistrements +Conditions d'accès +Source +Modalités +Responsabilité et droits d'auteurs +Définition +Estimation des erreurs d'échantillonage +Fichiers de données +Couverture géographique +Avril +Moyenne +Production des méta-données +Collecte des données +Valeures manquantes +Programmes informatiques +Variable(s) +Instructions aux enquêteurs +Financement +Novembre +Manquant +Version +Univers +Contributeur(s) +Agence propriétaire +Traitement et évaluation des données +Domaines thématiques +Documents administratifs +Ecart type +Contact(s) +Libellé +Documents techniques +Décimale(s) +Type +Formulation de la question +Concepts +Gamme +Résumé +Juin +Supervision +Autres formes d'évaluation des données +Références +Accessibilité +Dates de collecte +Edition des données +Questionnaires +Cas valide(s) +Rapports et documents analytiques +Droits d'auteurs +Documentation +Déviations par rapport à l'échantillon initial +Editeur(s) +Février +Le jeu de données contient +Remerciement(s) +Continue +Ecart type +Description des variables +Producteur +Date de production + +L'Explorer vous permet d'accéder aux données et de les exporter vers les formats statistiques les plus courants +Discrète +Groupe +Juillet +Nom du fichier +Enreg. +Nom +Avertissement: Ces chiffres indiquent le nombre de cas identifiés dans le fichier de données. Ils ne peuvent pas être interpretés comme étant représentatifs de la population concernée. +Tableaux statistiques +Décembre +Sujets +Contrôles de traitement + +Instructions aux enquêteurs +Table des matières +Informations sur le document +Sous-groupe(s) +Mots-clé +groupe(s) +P +Pondération +Description des fichiers +Notes +Notes sur la collecte +fichier(s) +continue +Responsabilité(s) +Contenu +variable(s) +Autre(s) producteur(s) +Producteurs et sponsors +Notes sur l'apurement des données +Distributeur(s) +Aperçu +Citation +Septembre +Catégorie +Confidentialité +Statistiques +Mai +Indéterminé +Structure +fichier +Pré-question +Taux de réponse +Taille +Formulation de la question +Recodage et dérivation +Série +Octobre +Unité d'analyse +Notes sur le traitement des données +Type d'étude +Fichier +Période(s) de référence +Contenu du fichier +Non-valide +Vars +suite +Clé(s) +Question +Source d'information +Imputation +Sécurité +Pour ouvrir ce fichier, vous avez besoin du logiciel gratuit +Autres resources +Dictionnaire des variables +Information +Janvier +Autres documents +Minimum +Domaines thématiques et couverture +Producteur(s) des méta-données +Information complémentaire +Enquêteurs +Post-question +Thèmes +Procédure d'échantillonage +Structure du fichier +Liste des variables +Format +Notes sur l'échantillonage +Groupe(s) de variables +Description +Catégories +Maximum +Dépositaire(s) +Août +NP +Couverture +Pondéré +Mars +question details + diff --git a/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_ja.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_ja.properties.xml new file mode 100644 index 00000000000..bc5dbb06154 --- /dev/null +++ b/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_ja.properties.xml @@ -0,0 +1,161 @@ + + +Generated by Properties2Xml on Tue Feb 13 13:55:43 EST 2007 +有効な +度数表(Frequency table)は表示されません +由来 +不連続な +データ収集モード +その他の確認事項 +識別番号 +データファイルにアクセスするにはここをクリックしてください +無題 + +割合 +ウェイティング +第一次調査官 +この文書はToolkitを使用して作られました +サンプリング +ケース +アクセス条件 +情報源 +様相 +権利及び声明文 +定義 +サンプルエラーの見積もり +データファイル +地理的な適用範囲 +4月 +平均 +メタデータ製作 +データ収集 +損失データ +スクリプトおよびプログラム +可変的 +面接者の指示 +出資機関 +11月 +バージョン +共通の +貢献者 +アクセス権限 +データ処理、評価 +範囲, 領域 +管理用文章 +連絡先 +ラベル +技術的な文書 +小数点 +タイプ +文字の質問 +概念 +範囲 +要約 +6月 +監督 +その他ファーマットのデータ評価 +参照 +アクセス、入手法 +データ収集日 +データ編集 +質問 +レポートおよび分析的な文書 +有効な場合 +コピーライト +書類 +サンプルデザインによる偏差 +発行者 +2月 +データセットに含まれる +確認事項 +連続的な +標準偏差 +変数の記述 +製作者 +製作日 +” Explorer”によってデータファイルを参照することも一般的に使えわれている統計データフォーマットに変換。抽出することも可能です +不連続性 +グループ +7月 +ファイルの名前 +ケース +名前 +統計表 +12月 +主題, 内容 +工程監査 +ソフト +面接者への指示 +目録 +書類の情報 +サブグループ +キーワード + +グループ +ウェイト +ファイルの詳細 +メモ +データ収集メモ +ファイル +継続的な +声明文 +内容 +変数 +その他の製作者 +製作者とスポンサー +データクリーニングメモ +分配者 +概略 +引用する場合の必要条件 +9月 +カテゴリー +機密性、コンフィデンシャリティー +5月 +未定 +構造 +ファイル +調査前の質問 +回答比率 + +記録と誘導 +シリー +10月 +分析の単位 +データ処理メモ +データの種類 + +ファイル +期間 +ファイルの内容 +無効 +キー +情報源 +非難 +セキュリティー +このファイルを開けるには、無料で配布されているNesstar Explorer が必要です。 +その他の資料 +データ辞典 +情報 +1月 +その他の書類 +最小値 +規模及び適用範囲 +メタデータ製作者 +さらにインフォメーションを表示 +データ収集者 +調査後の質問 +サンプリングの手順 +ファイルの構造 +変数のリスト +フォーマット +サンプリングメモ +変数のグループ +詳細 +カテゴリー +最大値 +デポジター、提供者、供託者 +8月 +表紙 +ウェイトされた +3月 + diff --git a/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_nn.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_nn.properties.xml new file mode 100644 index 00000000000..fdf14f5dfcd --- /dev/null +++ b/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_nn.properties.xml @@ -0,0 +1,174 @@ + + +Generated by Properties2Xml on Fri Apr 11 09:45:39 EDT 2008 +Gyldige +Frekvenstabell ikke vist +Avledning +diskret +Type datainnsamling +Annen prosessering +Andre identifikatorer og krediteringer +Uten tittel +Identifisering +Click here to access/export data files from Nesstar format +Verdi +Prosent +Vekting +Primary Investigator(s) +Dette dokumentet ble generert av +Utvalg +Enheter +Tilgangsbetingelser +Kilde +Modaliteter +Rights & Disclaimer +Definisjon +Estimert utvalgsfeil +Datafiler +Geografisk omfang +April +Mean +Metadata-produksjon +Datainnsamling +Manglende data +Script og programmer +Variable(r) +Instruksjoner til intervjueren +Sponsor/finansierende institusjon(er) +November +Missing +Versjon +Univers +Bidragsyter(e) +Tilgangskontrollør +Dataprosessering og -evaluering +Omfang +Administrative dokumenter +Standardavvik +Kontaktperson(er) +Merkelapp +Tekniske dokumenter +Desimal(er) +Type +Spørsmålstekst +Begrep(er) +Rekkevidde +Sammendrag +Juni +Supervision +Andre former for dataevaluering +Referanser +Tilgjengelighet +Datainnsamlingsdatoer +Dataredigering +Spørreskjema +Gyldige enheter +Rapporter og analysedokumenter +Copyright +Dokumentasjon +Avvik fra utvalgsdesign +Utgiver(e) +Februar +Datasettet inneholder +Krediteringer +Kontinuerlig +Standardavvik +Variabelbeskrivelse +Produsent +Produksjonsdato + +The Explorer allows you to view data files and export them to common statistical formats +Diskret +Gruppe +Juli +Filnavn +Enheter +Navn +Advarsel: disse tallene indikerer antall enheter (cases) i datafilen. De kan ikke tolkes som oppsummert statistikk for populasjonen. +Statistiske tabeller +Desember +Emner +Prosesseringssjekk +programvare +Instruksjoner til intervjueren +Innholdsfortegnelse +Dokumentinformasjon +Undergruppe(r) +Nøkkelord +gruppe(r) +W +Vekt +Filbeskrivelse +Kommentarer +Datainnsamlingskommentarer +file(r) +kontinuerlig +Fraskrivelse +Innhold +variable(r) +Andre produsenter +Produsenter og sponsorer +Kommentarer om datarensing +Distributør(er) +Oversikt +Sitatkrav +September +Kategori +Konfidensialitet +Statistikk +Mai +Uavklart +Struktur +fil +Tekst før spørsmål +Responsrate +Bredde +Omkodinger og utledninger +Serie +Oktober +Analyseenhet +Dataprosesseringskommentarer +Datatype +Fil +Tidsperiode(r) +Filinnhold +Ugyldig +Variabler +kont. +Nøkler +Spørsmål +Kilde for informasjon +Imputasjon +Sikkerhet +For å åpne denne filen trenger du følgende gratisverktøy +Andre ressurser +Dataordbok +Informasjon +Januar +Andre dokumenter +Minimum +Omfang +Metadataprodusenter +Vis mer informasjon +Datainnsamler(e) +Tekst etter spørsmål +Emner +Utvalgsprosedyre +Filstruktur +Variabelliste +Format +Utvalgskommentarer +Variabelgrupper +Beskrivelse +Kategorier +Maksimum +Utgiver(e) +August +NW +Forside +Vektet +Mars + total - viser et utvalg av +Land +spørsmålsdetaljer + diff --git a/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_no.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_no.properties.xml new file mode 100644 index 00000000000..fdf14f5dfcd --- /dev/null +++ b/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_no.properties.xml @@ -0,0 +1,174 @@ + + +Generated by Properties2Xml on Fri Apr 11 09:45:39 EDT 2008 +Gyldige +Frekvenstabell ikke vist +Avledning +diskret +Type datainnsamling +Annen prosessering +Andre identifikatorer og krediteringer +Uten tittel +Identifisering +Click here to access/export data files from Nesstar format +Verdi +Prosent +Vekting +Primary Investigator(s) +Dette dokumentet ble generert av +Utvalg +Enheter +Tilgangsbetingelser +Kilde +Modaliteter +Rights & Disclaimer +Definisjon +Estimert utvalgsfeil +Datafiler +Geografisk omfang +April +Mean +Metadata-produksjon +Datainnsamling +Manglende data +Script og programmer +Variable(r) +Instruksjoner til intervjueren +Sponsor/finansierende institusjon(er) +November +Missing +Versjon +Univers +Bidragsyter(e) +Tilgangskontrollør +Dataprosessering og -evaluering +Omfang +Administrative dokumenter +Standardavvik +Kontaktperson(er) +Merkelapp +Tekniske dokumenter +Desimal(er) +Type +Spørsmålstekst +Begrep(er) +Rekkevidde +Sammendrag +Juni +Supervision +Andre former for dataevaluering +Referanser +Tilgjengelighet +Datainnsamlingsdatoer +Dataredigering +Spørreskjema +Gyldige enheter +Rapporter og analysedokumenter +Copyright +Dokumentasjon +Avvik fra utvalgsdesign +Utgiver(e) +Februar +Datasettet inneholder +Krediteringer +Kontinuerlig +Standardavvik +Variabelbeskrivelse +Produsent +Produksjonsdato + +The Explorer allows you to view data files and export them to common statistical formats +Diskret +Gruppe +Juli +Filnavn +Enheter +Navn +Advarsel: disse tallene indikerer antall enheter (cases) i datafilen. De kan ikke tolkes som oppsummert statistikk for populasjonen. +Statistiske tabeller +Desember +Emner +Prosesseringssjekk +programvare +Instruksjoner til intervjueren +Innholdsfortegnelse +Dokumentinformasjon +Undergruppe(r) +Nøkkelord +gruppe(r) +W +Vekt +Filbeskrivelse +Kommentarer +Datainnsamlingskommentarer +file(r) +kontinuerlig +Fraskrivelse +Innhold +variable(r) +Andre produsenter +Produsenter og sponsorer +Kommentarer om datarensing +Distributør(er) +Oversikt +Sitatkrav +September +Kategori +Konfidensialitet +Statistikk +Mai +Uavklart +Struktur +fil +Tekst før spørsmål +Responsrate +Bredde +Omkodinger og utledninger +Serie +Oktober +Analyseenhet +Dataprosesseringskommentarer +Datatype +Fil +Tidsperiode(r) +Filinnhold +Ugyldig +Variabler +kont. +Nøkler +Spørsmål +Kilde for informasjon +Imputasjon +Sikkerhet +For å åpne denne filen trenger du følgende gratisverktøy +Andre ressurser +Dataordbok +Informasjon +Januar +Andre dokumenter +Minimum +Omfang +Metadataprodusenter +Vis mer informasjon +Datainnsamler(e) +Tekst etter spørsmål +Emner +Utvalgsprosedyre +Filstruktur +Variabelliste +Format +Utvalgskommentarer +Variabelgrupper +Beskrivelse +Kategorier +Maksimum +Utgiver(e) +August +NW +Forside +Vektet +Mars + total - viser et utvalg av +Land +spørsmålsdetaljer + diff --git a/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_ru.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_ru.properties.xml new file mode 100644 index 00000000000..06fde85af5e --- /dev/null +++ b/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_ru.properties.xml @@ -0,0 +1,169 @@ + + +Generated by Properties2Xml on Fri Apr 11 09:45:40 EDT 2008 +Валидный +Частотная таблица не выводится +Расчет +дискретная +Способ сбора данных +Прочая обработка +Другие участники +Безымянный +Индентификация +Щелкните здесь, чтобы получить доступ к файлам или экспортировать их +Значение +Процент +Взвешивание +Первичный(е) исследователь(и) +Документ был сгенерирован с использованием +Выборка +Наблюдения +Условия доступа +Источник +Модальности +Авторские права и ограничения ответственности +Определение +Оценки ошибок выборки +Файлы данных +Географический охват +Апрель +Среднее +Разработка метаданных +Сбор данных +Пропущенные данные +Скрипты и программы +Переменная(ые) +Инструкции интервьюеру +Кто финансировал +Ноябрь +Пропущенные +Версия +Генеральная совокупность +Участник(и) +Права доступа +Обработка и инспекция данных +Охват +Административные документы +СтдОткл +Контак(ы) +Метка +Технические документы +Десятичные +Тип +Текст вопроса +Концепции +Диапазон +Резюме +Июнь +Контроль +Другие формы инспекции данных +Установки +Доступность +Даты сбора данных +Редактирование данных +Вопросники +Валидное(ые) наблюдение(я) +Отчеты и аналитические документы +Авторские права +Документация +Отклонения от дизайна выборки +Издатель(и) +Февраль +Набор данных содержит +Участник(и) +Непрерывная +Стандартное отклонение +Описание переменных +Разработчик +Дата разработки +Проводник позволяет просматривать файлы данных и экспортировать их в распространенные статистические форматы +Дикретная +Группа +Июль +Имя файла +Наблюдения +Имя +Статистичсекие таблицы +Декабрь +Темы +Контроль обработки +программное обеспечение +Инструкции интервьюеру +Оглавление +Информация о документе +Подгруппа(ы) +Ключевые слова +группа(ы) +B +Вес +Описание файла +Примечания +Примечания по сбору данных +файл(ы) +непрерывная +Ограничения ответственности +Содержание +переменная(ые) +Другие разработчики +Разработчики и спонсоры +Примечания по чистке данных +Дистрибьютор(ы) +Обзор +Требования по цитированию +Сентябрь +Категория +Конфиденциальность +Статистики +Май +Неопределенный +Структура +файл +Текст, предваряющий вопрос +Доля ответов +Ширина +Перекодировка и расчеты +Серия +Октябрь +Единица анализа +Примечания по обработке данных +Тип данных +Файл +Период(ы) времени +Содержание файла +Некорректный +Переменные +непр. +Ключ(и) +Вопрос +Источник информации +Импутация +Безопасность +Чтобы открыть этот файл, необходимо иметь свободным +Прочие источники +Словарь данных +Информация +Январь +Прочие документы +Минимум +Охват и покрытие +Разработчик(и) метаданных +Показать дополнительную информацию +Кто собирал данные +Текст после вопроса +Разделы +Процедура выборки +Структура файла +Список переменных +Формат +Примечания по выборке +Группа(ы) переменных +Описание +Категории +Максимум +Депозитор(ы) +Август +HB +Титульный лист +взвешенные +Март + diff --git a/src/main/resources/edu/harvard/iq/dataverse/ddi-to-fo.xsl b/src/main/resources/edu/harvard/iq/dataverse/ddi-to-fo.xsl index 1c03d5caf34..c25c76246d6 100644 --- a/src/main/resources/edu/harvard/iq/dataverse/ddi-to-fo.xsl +++ b/src/main/resources/edu/harvard/iq/dataverse/ddi-to-fo.xsl @@ -66,8 +66,7 @@ Report optional text --> - - + @@ -110,7 +109,7 @@ - + @@ -124,6 +123,7 @@ Times + - 1 + 1 0 @@ -381,21 +381,21 @@ - + - + - + @@ -403,33 +403,33 @@ - + - + - + - + - + @@ -437,21 +437,21 @@ + select="$msg/*/entry[@key='Data_Processing_and_Appraisal']"/> - + - + @@ -461,7 +461,7 @@ - + @@ -486,7 +486,7 @@ - + @@ -503,7 +503,7 @@ - + @@ -528,7 +528,7 @@ - + @@ -569,7 +569,7 @@ - + @@ -642,7 +642,7 @@ - + @@ -657,7 +657,7 @@ border="{$default-border}" padding="{$cell-padding}"> + select="$msg/*/entry[@key='Metadata_Producers']"/> + select="$msg/*/entry[@key='Production_Date']"/> + select="$msg/*/entry[@key='Version']"/> + select="$msg/*/entry[@key='Identification']"/> - + @@ -745,7 +745,7 @@ - + @@ -764,14 +764,14 @@ - + - + @@ -782,7 +782,7 @@ + select="$msg/*/entry[@key='Scope_and_Coverage']"/> @@ -793,7 +793,7 @@ + select="$msg/*/entry[@key='Producers_and_Sponsors']"/> @@ -803,7 +803,7 @@ - + @@ -813,7 +813,7 @@ - + @@ -825,7 +825,7 @@ internal-destination="data-processing-and-appraisal" text-decoration="underline" color="blue"> + select="$msg/*/entry[@key='Data_Processing_and_Appraisal']"/> @@ -836,7 +836,7 @@ - + @@ -847,7 +847,7 @@ + select="$msg/*/entry[@key='Rights_and_Disclaimer']"/> @@ -858,7 +858,7 @@ + select="$msg/*/entry[@key='Files_Description']"/> @@ -891,7 +891,7 @@ - + @@ -924,7 +924,7 @@ - + @@ -949,7 +949,7 @@ + select="$msg/*/entry[@key='Variables_Description']"/> @@ -982,7 +982,7 @@ - + @@ -1002,7 +1002,7 @@ - + @@ -1065,7 +1065,7 @@ - + @@ -1078,7 +1078,7 @@ - + + select="$msg/*/entry[@key='Identification']"/> - @@ -1130,7 +1130,7 @@ select="/ddi:codeBook/ddi:stdyDscr/ddi:citation/ddi:verStmt/ddi:version"> : + select="$msg/*/entry[@key='Production_Date']"/>: @@ -1139,7 +1139,7 @@ - @@ -1156,7 +1156,7 @@ - + - + select="$msg/*/entry[@key='Kind_of_Data']"/> + select="$msg/*/entry[@key='Unit_of_Analysis']"/> + select="$msg/*/entry[@key='Scope_and_Coverage']"/> @@ -1257,7 +1257,7 @@ - + - @@ -1299,7 +1299,7 @@ - + - + @@ -1344,7 +1344,7 @@ - + @@ -1361,7 +1361,7 @@ border="{$default-border}" padding="{$cell-padding}"> - @@ -1422,7 +1422,7 @@ padding="{$cell-padding}"> @@ -1444,7 +1444,7 @@ padding="{$cell-padding}"> + select="$msg/*/entry[@key='Other_Producers']"/> + select="$msg/*/entry[@key='Funding_Agencies']"/> @@ -1514,7 +1514,7 @@ - @@ -1534,7 +1534,7 @@ padding="{$cell-padding}"> + select="$msg/*/entry[@key='Sampling']"/> + select="$msg/*/entry[@key='Sampling_Notes']"/> + select="$msg/*/entry[@key='Sampling_Procedure']"/> + select="$msg/*/entry[@key='Response_Rate']"/> + select="$msg/*/entry[@key='Weighting']"/> + select="$msg/*/entry[@key='Data_Collection']"/> @@ -1672,7 +1672,7 @@ padding="{$cell-padding}"> @@ -1694,7 +1694,7 @@ padding="{$cell-padding}"> + select="$msg/*/entry[@key='Time_Periods']"/> @@ -1738,7 +1738,7 @@ border="{$default-border}" padding="{$cell-padding}"> + select="$msg/*/entry[@key='Questionnaires']"/> + select="$msg/*/entry[@key='Data_Collectors']"/> + select="$msg/*/entry[@key='Supervision']"/> - + @@ -1885,7 +1885,7 @@ border="{$default-border}" padding="{$cell-padding}"> + select="$msg/*/entry[@key='Data_Editing']"/> + select="$msg/*/entry[@key='Other_Processing']"/> + select="$msg/*/entry[@key='Accessibility']"/> @@ -1980,7 +1980,7 @@ padding="{$cell-padding}"> + select="$msg/*/entry[@key='Access_Authority']"/> - @@ -2022,7 +2022,7 @@ padding="{$cell-padding}"> + select="$msg/*/entry[@key='Distributors']"/> + select="$msg/*/entry[@key='Depositors']"/> + select="$msg/*/entry[@key='Confidentiality']"/> + select="$msg/*/entry[@key='Access_Conditions']"/> @@ -2142,7 +2142,7 @@ border="{$default-border}" padding="{$cell-padding}"> + select="$msg/*/entry[@key='Disclaimer']"/> + select="$msg/*/entry[@key='Copyright']"/> - + @@ -2194,15 +2194,15 @@ - + - + - + @@ -2218,7 +2218,7 @@ - + @@ -2227,16 +2227,16 @@ - + - + - - + + - + @@ -2262,17 +2262,17 @@ - + - + - + - + @@ -2290,7 +2290,7 @@ - + @@ -2299,17 +2299,17 @@ - + - + - + - + @@ -2327,7 +2327,7 @@ - + @@ -2336,7 +2336,7 @@ - + @@ -2372,7 +2372,7 @@ + select="$msg/*/entry[@key='Reports_and_analytical_documents']"/> @@ -2389,7 +2389,7 @@ space-after="0.03in"> - + @@ -2407,7 +2407,7 @@ + select="$msg/*/entry[@key='Technical_documents']"/> @@ -2425,7 +2425,7 @@ + select="$msg/*/entry[@key='Administrative_documents']"/> @@ -2442,7 +2442,7 @@ space-after="0.03in"> - + @@ -2459,7 +2459,7 @@ space-after="0.03in"> - + @@ -2477,7 +2477,7 @@ + select="$msg/*/entry[@key='Statistical_tables']"/> @@ -2495,7 +2495,7 @@ + select="$msg/*/entry[@key='Scripts_and_programs']"/> @@ -2512,7 +2512,7 @@ space-after="0.03in"> - + @@ -2532,7 +2532,7 @@ + select="$msg/*/entry[@key='Reports_and_analytical_documents']"/> - + - + - - + - + - + @@ -2602,7 +2602,7 @@ - + @@ -2611,7 +2611,7 @@ - + *** - + **** @@ -2749,7 +2749,7 @@ - # # @@ -2762,7 +2762,7 @@ - # # @@ -2776,18 +2776,18 @@ - + - : + : - :  + @@ -2811,7 +2811,7 @@ - + @@ -2823,7 +2823,7 @@ - + @@ -2835,7 +2835,7 @@ - + @@ -2847,7 +2847,7 @@ - + @@ -2859,7 +2859,7 @@ - + @@ -2871,7 +2871,7 @@ - + @@ -2905,10 +2905,10 @@ - + - + @@ -2955,7 +2955,7 @@ - + : @@ -3059,7 +3059,7 @@ - + @@ -3080,7 +3080,7 @@ - + @@ -3092,7 +3092,7 @@ - + @@ -3104,7 +3104,7 @@ - + @@ -3116,7 +3116,7 @@ - + @@ -3239,13 +3239,13 @@ - + - + - + @@ -3322,7 +3322,7 @@   - + @@ -3369,7 +3369,7 @@ : - + @@ -3382,7 +3382,7 @@ - + [= + select="$msg/*/entry[@key='Type']"/>= + select="$msg/*/entry[@key='discrete']"/> + select="$msg/*/entry[@key='continuous']"/> ] @@ -3404,7 +3404,7 @@ [== - @@ -3419,13 +3419,13 @@ [= = - ] [ - + =* / @@ -3437,7 +3437,7 @@ @@ -3452,12 +3452,12 @@ - + [ + select="$msg/*/entry[@key='Abbrev_NotWeighted']"/> / - + ] @@ -3470,18 +3470,18 @@ - + select="$msg/*/entry[@key='Invalid']"/> - + - @@ -3517,7 +3517,7 @@ - + - + - + - + - + - + @@ -3638,7 +3638,7 @@ - + + select="$msg/*/entry[@key='Recoding_and_Derivation']"/> - + - + - + - - + + - @@ -3798,7 +3798,7 @@ padding="{$cell-padding}" text-align="center"> + select="$msg/*/entry[@key='Cases_Abbreviation']"/> @@ -3806,7 +3806,7 @@ padding="{$cell-padding}" text-align="center"> + select="$msg/*/entry[@key='Weighted']"/> @@ -3814,9 +3814,9 @@ padding="{$cell-padding}" text-align="center"> + select="$msg/*/entry[@key='Percentage']"/> () + select="$msg/*/entry[@key='Weighted']"/>) @@ -3950,16 +3950,16 @@ + select="$msg/*/entry[@key='SumStat_Warning']"/> - + ( - ) + ) @@ -4003,7 +4003,7 @@ - *** + *** *** @@ -4055,7 +4055,7 @@ - + @@ -4070,7 +4070,7 @@ - + @@ -4085,7 +4085,7 @@ - + @@ -4100,7 +4100,7 @@ - + @@ -4162,38 +4162,38 @@ - + - + - + - + - + - + - + @@ -4257,40 +4257,40 @@ - + - + - + - + - + - + - + - + - + - + - + - + From 3764bb2358aac756ed7bf97e052441836d5512e8 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 8 Nov 2023 14:34:48 -0500 Subject: [PATCH 09/85] fix guestbook preview stop refresh of datasetForm --- src/main/webapp/dataset-license-terms.xhtml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/webapp/dataset-license-terms.xhtml b/src/main/webapp/dataset-license-terms.xhtml index c54d94442ea..6d5b0a5fe4f 100644 --- a/src/main/webapp/dataset-license-terms.xhtml +++ b/src/main/webapp/dataset-license-terms.xhtml @@ -482,7 +482,7 @@
- #{bundle['file.dataFilesTab.terms.list.guestbook']}   + #{bundle['file.dataFilesTab.terms.list.guestbook']}  
@@ -524,7 +524,7 @@ + update=":datasetForm:previewGuestbook" oncomplete="PF('viewGuestbook').show();"/>
@@ -563,7 +563,7 @@ + update=":datasetForm:previewGuestbook" oncomplete="PF('viewGuestbook').show();"/> From 528e25a50c74976449ad306c7e61b39906671ea1 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Fri, 2 Feb 2024 14:22:58 -0500 Subject: [PATCH 10/85] xsl --- .../edu/harvard/iq/dataverse/ddi-to-fo.xsl | 52 +++---------------- 1 file changed, 7 insertions(+), 45 deletions(-) diff --git a/src/main/resources/edu/harvard/iq/dataverse/ddi-to-fo.xsl b/src/main/resources/edu/harvard/iq/dataverse/ddi-to-fo.xsl index c25c76246d6..cb622912960 100644 --- a/src/main/resources/edu/harvard/iq/dataverse/ddi-to-fo.xsl +++ b/src/main/resources/edu/harvard/iq/dataverse/ddi-to-fo.xsl @@ -109,7 +109,7 @@ - + @@ -123,7 +123,7 @@ Times - + - 1 + 1 0 @@ -3884,50 +3884,17 @@ select="$catgry-freq div $catgry-sum-freq"/> - - - - - - - - - - - - - - - - - - - - 0 - - - - - + + + test="not(@missing='Y')"> - - - - - - + @@ -3947,11 +3914,6 @@ - - - From 60a2795951aa665819cfecea92c4b9e4e0a63b07 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Fri, 2 Feb 2024 17:09:24 -0500 Subject: [PATCH 11/85] xsl instead xslt --- src/main/resources/edu/harvard/iq/dataverse/ddi-to-fo.xsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/edu/harvard/iq/dataverse/ddi-to-fo.xsl b/src/main/resources/edu/harvard/iq/dataverse/ddi-to-fo.xsl index cb622912960..26fd7c23479 100644 --- a/src/main/resources/edu/harvard/iq/dataverse/ddi-to-fo.xsl +++ b/src/main/resources/edu/harvard/iq/dataverse/ddi-to-fo.xsl @@ -123,7 +123,7 @@ Times - + - ${payara.version} From 56e2f9c7b0d8d437a9aed465ab1ef0cdbc98365b Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 15 Apr 2024 14:30:46 +0200 Subject: [PATCH 15/85] docs: fix link to Payara docs to not include a version We should avoid hardcoding the version into the link to avoid stale information. --- doc/sphinx-guides/source/admin/monitoring.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/admin/monitoring.rst b/doc/sphinx-guides/source/admin/monitoring.rst index ef306c88c6f..16bb18b7ad2 100644 --- a/doc/sphinx-guides/source/admin/monitoring.rst +++ b/doc/sphinx-guides/source/admin/monitoring.rst @@ -154,7 +154,7 @@ Tips: MicroProfile Metrics endpoint ----------------------------- -Payara provides the metrics endpoint: _ +Payara provides the metrics endpoint: _ The metrics you can retrieve that way: - `index_permit_wait_time_seconds_mean` displays how long does it take to receive a permit to index a dataset. - `index_time_seconds` displays how long does it take to index a dataset. From f7b6d94cedbe7b3d9f71279e99b546cb631f933d Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 15 Apr 2024 14:32:08 +0200 Subject: [PATCH 16/85] docs: update to reference Payara 6.2024.4 as requirement --- doc/sphinx-guides/source/developers/classic-dev-env.rst | 6 +++--- doc/sphinx-guides/source/installation/prerequisites.rst | 6 +++--- doc/sphinx-guides/source/qa/test-automation.md | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/sphinx-guides/source/developers/classic-dev-env.rst b/doc/sphinx-guides/source/developers/classic-dev-env.rst index 82e10b727ef..a2f35acfe70 100755 --- a/doc/sphinx-guides/source/developers/classic-dev-env.rst +++ b/doc/sphinx-guides/source/developers/classic-dev-env.rst @@ -93,15 +93,15 @@ On Linux, install ``jq`` from your package manager or download a binary from htt Install Payara ~~~~~~~~~~~~~~ -Payara 6.2023.8 or higher is required. +Payara 6.2024.4 or higher is required. To install Payara, run the following commands: ``cd /usr/local`` -``sudo curl -O -L https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/6.2023.8/payara-6.2023.8.zip`` +``sudo curl -O -L https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/6.2024.4/payara-6.2024.4.zip`` -``sudo unzip payara-6.2023.8.zip`` +``sudo unzip payara-6.2024.4.zip`` ``sudo chown -R $USER /usr/local/payara6`` diff --git a/doc/sphinx-guides/source/installation/prerequisites.rst b/doc/sphinx-guides/source/installation/prerequisites.rst index a56f4811ace..9f88dd7ecfb 100644 --- a/doc/sphinx-guides/source/installation/prerequisites.rst +++ b/doc/sphinx-guides/source/installation/prerequisites.rst @@ -44,7 +44,7 @@ On RHEL/derivative you can make Java 17 the default with the ``alternatives`` co Payara ------ -Payara 6.2023.8 is recommended. Newer versions might work fine. Regular updates are recommended. +Payara 6.2024.4 is recommended. Newer versions might work fine. Regular updates are recommended. Installing Payara ================= @@ -55,8 +55,8 @@ Installing Payara - Download and install Payara (installed in ``/usr/local/payara6`` in the example commands below):: - # wget https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/6.2023.8/payara-6.2023.8.zip - # unzip payara-6.2023.8.zip + # wget https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/6.2024.4/payara-6.2024.4.zip + # unzip payara-6.2024.4.zip # mv payara6 /usr/local If nexus.payara.fish is ever down for maintenance, Payara distributions are also available from https://repo1.maven.org/maven2/fish/payara/distributions/payara/ diff --git a/doc/sphinx-guides/source/qa/test-automation.md b/doc/sphinx-guides/source/qa/test-automation.md index 708d0f88e23..d2de33b48a5 100644 --- a/doc/sphinx-guides/source/qa/test-automation.md +++ b/doc/sphinx-guides/source/qa/test-automation.md @@ -52,7 +52,7 @@ Go to the end of the log and then scroll up, looking for the failure. A failed A ``` TASK [dataverse : download payara zip] ***************************************** -fatal: [localhost]: FAILED! => {"changed": false, "dest": "/tmp/payara.zip", "elapsed": 10, "msg": "Request failed: ", "url": "https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/6.2023.8/payara-6.2023.8.zip"} +fatal: [localhost]: FAILED! => {"changed": false, "dest": "/tmp/payara.zip", "elapsed": 10, "msg": "Request failed: ", "url": "https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/6.2024.4/payara-6.2024.4.zip"} ``` In the example above, if Payara can't be downloaded, we're obviously going to have problems deploying Dataverse to it! From b061b91ec711511e1be5b0d97ef9b08a56e59367 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 15 Apr 2024 14:32:50 +0200 Subject: [PATCH 17/85] build: upgrade to Payara 6.2024.4 #10494 --- modules/dataverse-parent/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 612902b47a4..f857c105a33 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -148,7 +148,7 @@ -Duser.timezone=${project.timezone} -Dfile.encoding=${project.build.sourceEncoding} -Duser.language=${project.language} -Duser.region=${project.region} - 6.2023.8 + 6.2024.4 42.7.2 9.3.0 1.12.290 From 76a9ef3f485dc8d70ec8bc0afcdfedd1daa2755e Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 15 Apr 2024 14:36:08 +0200 Subject: [PATCH 18/85] build: remove phased out Payara dependency repo on GitHub --- modules/dataverse-parent/pom.xml | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index f857c105a33..b2ce8c2069c 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -386,18 +386,6 @@ false - - - payara-patched-externals - Payara Patched Externals - https://raw.github.com/payara/Payara_PatchedProjects/master - - true - - - false - - central-repo Central Repository From 77a5c41939856ff7a91f611d32b88e7b65c3efc1 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 15 Apr 2024 15:21:17 +0200 Subject: [PATCH 19/85] refactor(install): remove workaround for FISH-7722 No longer necessary as the required add-opens has been included in upstream --- scripts/installer/as-setup.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/scripts/installer/as-setup.sh b/scripts/installer/as-setup.sh index 34deddf51a3..eb8c72973c8 100755 --- a/scripts/installer/as-setup.sh +++ b/scripts/installer/as-setup.sh @@ -117,9 +117,6 @@ function preliminary_setup() ./asadmin $ASADMIN_OPTS create-jvm-options "-Ddataverse.timerServer=true" - # Workaround for FISH-7722: Failed to deploy war with @Stateless https://github.com/payara/Payara/issues/6337 - ./asadmin $ASADMIN_OPTS create-jvm-options --add-opens=java.base/java.io=ALL-UNNAMED - # enable comet support ./asadmin $ASADMIN_OPTS set server-config.network-config.protocols.protocol.http-listener-1.http.comet-support-enabled="true" From 222b326aab6be19be9d7bc8907504801bc362343 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Mon, 15 Apr 2024 15:46:57 +0200 Subject: [PATCH 20/85] doc(deps): add release note about Payara upgrade #10494 --- doc/release-notes/10494-payara-upgrade.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 doc/release-notes/10494-payara-upgrade.md diff --git a/doc/release-notes/10494-payara-upgrade.md b/doc/release-notes/10494-payara-upgrade.md new file mode 100644 index 00000000000..050faee1f3e --- /dev/null +++ b/doc/release-notes/10494-payara-upgrade.md @@ -0,0 +1,6 @@ +# Upgrade Payara to v6.2024.4 + +With this version of Dataverse, we encourage you to upgrade to version 6.2024.4. +This will address security issues accumulated since the release of 6.2023.8, which was required since Dataverse release 6.0. + +If you are using GDCC containers, this upgrade is included when pulling new release images. From e9bf15effbd1ea8d985b10aa46934d0f03303d98 Mon Sep 17 00:00:00 2001 From: Ludovic DANIEL Date: Wed, 3 Apr 2024 14:24:25 +0200 Subject: [PATCH 21/85] 9276 - CVOC : allow customized mapping of indexed fields of cvoc configuration + handle ontoportal json formats from externalvocabularyvalue --- doc/release-notes/9276-doc-cvoc-index-in.md | 8 ++ .../iq/dataverse/DatasetFieldServiceBean.java | 93 ++++++++++++------- .../iq/dataverse/search/IndexServiceBean.java | 40 +++++++- 3 files changed, 107 insertions(+), 34 deletions(-) create mode 100644 doc/release-notes/9276-doc-cvoc-index-in.md diff --git a/doc/release-notes/9276-doc-cvoc-index-in.md b/doc/release-notes/9276-doc-cvoc-index-in.md new file mode 100644 index 00000000000..5c4dd4ca10f --- /dev/null +++ b/doc/release-notes/9276-doc-cvoc-index-in.md @@ -0,0 +1,8 @@ +## Release Highlights + +### Updates on Support for External Vocabulary Services + +#### Indexed field accuracy + +For more relevant indexing, you can now map external vocabulary values to a `managed-fields` of a [:CVocConf setting](https://guides.dataverse.org/en/6.3/installation/config.html#cvocconf) by adding the key `indexIn` in `retrieval-filtering`. +For more information, please check [GDCC/dataverse-external-vocab-support documentation](https://github.com/gdcc/dataverse-external-vocab-support/tree/main/docs). \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java index 6223cd83773..b1717431e41 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java @@ -41,6 +41,7 @@ import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.httpclient.HttpException; +import org.apache.commons.lang3.StringUtils; import org.apache.http.HttpResponse; import org.apache.http.HttpResponseInterceptor; import org.apache.http.client.methods.HttpGet; @@ -321,14 +322,15 @@ public Map getCVocConf(boolean byTermUriField){ + jo.getString("term-uri-field")); } } - if (jo.containsKey("child-fields")) { - JsonArray childFields = jo.getJsonArray("child-fields"); - for (JsonString elm : childFields.getValuesAs(JsonString.class)) { - dft = findByNameOpt(elm.getString()); - logger.info("Found: " + dft.getName()); + if (jo.containsKey("managed-fields")) { + JsonObject managedFields = jo.getJsonObject("managed-fields"); + for (String s : managedFields.keySet()) { + dft = findByNameOpt(managedFields.getString(s)); if (dft == null) { logger.warning("Ignoring External Vocabulary setting for non-existent child field: " - + elm.getString()); + + managedFields.getString(s)); + } else { + logger.info("Found: " + dft.getName()); } } } @@ -345,7 +347,7 @@ public Map getCVocConf(boolean byTermUriField){ * @param df - the primitive/parent compound field containing a newly saved value */ public void registerExternalVocabValues(DatasetField df) { - DatasetFieldType dft =df.getDatasetFieldType(); + DatasetFieldType dft = df.getDatasetFieldType(); logger.fine("Registering for field: " + dft.getName()); JsonObject cvocEntry = getCVocConf(true).get(dft.getId()); if (dft.isPrimitive()) { @@ -366,38 +368,48 @@ public void registerExternalVocabValues(DatasetField df) { } } } - + /** - * Retrieves indexable strings from a cached externalvocabularyvalue entry. - * - * This method assumes externalvocabularyvalue entries have been filtered and - * the externalvocabularyvalue entry contain a single JsonObject whose "personName" or "termName" values - * are either Strings or an array of objects with "lang" and ("value" or "content") keys. The - * string, or the "value/content"s for each language are added to the set. - * + * Retrieves indexable strings from a cached externalvocabularyvalue entry filtered through retrieval-filtering configuration. + *

+ * This method externalvocabularyvalue entries have been filtered and contains a single JsonObject. + * Is handled : Strings, Array of Objects with "lang" and ("value" or "content") keys, Object with Strings as value or Object with Array of Strings as value. + * The string, or the "value/content"s for each language are added to the set. + * This method can retrieve string values to be indexed in term-uri-field (parameter defined in CVOC configuration) or in "indexIn" field (optional parameter of retrieval-filtering defined in CVOC configuration). + *

* Any parsing error results in no entries (there can be unfiltered entries with * unknown structure - getting some strings from such an entry could give fairly * random info that would be bad to addd for searches, etc.) - * - * @param termUri + * + * @param termUri unique identifier to search in database + * @param cvocEntry related cvoc configuration + * @param indexingField name of solr field that will be filled with getStringsFor while indexing * @return - a set of indexable strings */ - public Set getStringsFor(String termUri) { - Set strings = new HashSet(); + public Set getIndexableStringsByTermUri(String termUri, JsonObject cvocEntry, String indexingField) { + Set strings = new HashSet<>(); JsonObject jo = getExternalVocabularyValue(termUri); + JsonObject filtering = cvocEntry.getJsonObject("retrieval-filtering"); + String termUriField = cvocEntry.getJsonString("term-uri-field").getString(); if (jo != null) { try { for (String key : jo.keySet()) { - if (key.equals("termName") || key.equals("personName")) { + String indexIn = filtering.getJsonObject(key).getString("indexIn", null); + // Either we are in mapping mode so indexingField (solr field) equals indexIn (cvoc config) + // Or we are in default mode indexingField is termUriField, indexIn is not defined then only termName and personName keys are used + if (indexingField.equals(indexIn) || + (indexIn == null && termUriField.equals(indexingField) && (key.equals("termName")) || key.equals("personName"))) { JsonValue jv = jo.get(key); if (jv.getValueType().equals(JsonValue.ValueType.STRING)) { logger.fine("adding " + jo.getString(key) + " for " + termUri); strings.add(jo.getString(key)); - } else { - if (jv.getValueType().equals(JsonValue.ValueType.ARRAY)) { - JsonArray jarr = jv.asJsonArray(); - for (int i = 0; i < jarr.size(); i++) { + } else if (jv.getValueType().equals(JsonValue.ValueType.ARRAY)) { + JsonArray jarr = jv.asJsonArray(); + for (int i = 0; i < jarr.size(); i++) { + if (jarr.get(i).getValueType().equals(JsonValue.ValueType.STRING)) { + strings.add(jarr.getString(i)); + } else if (jarr.get(i).getValueType().equals(ValueType.OBJECT)) { // This condition handles SKOMOS format like [{"lang": "en","value": "non-apis bee"},{"lang": "fr","value": "abeille non apis"}] JsonObject entry = jarr.getJsonObject(i); if (entry.containsKey("value")) { logger.fine("adding " + entry.getString("value") + " for " + termUri); @@ -409,6 +421,22 @@ public Set getStringsFor(String termUri) { } } } + } else if (jv.getValueType().equals(JsonValue.ValueType.OBJECT)) { + JsonObject joo = jv.asJsonObject(); + for (Map.Entry entry : joo.entrySet()) { + if (entry.getValue().getValueType().equals(JsonValue.ValueType.STRING)) { // This condition handles format like { "fr": "association de quartier", "en": "neighborhood associations"} + logger.fine("adding " + joo.getString(entry.getKey()) + " for " + termUri); + strings.add(joo.getString(entry.getKey())); + } else if (entry.getValue().getValueType().equals(ValueType.ARRAY)) { // This condition handles format like {"en": ["neighbourhood societies"]} + JsonArray jarr = entry.getValue().asJsonArray(); + for (int i = 0; i < jarr.size(); i++) { + if (jarr.get(i).getValueType().equals(JsonValue.ValueType.STRING)) { + logger.fine("adding " + jarr.getString(i) + " for " + termUri); + strings.add(jarr.getString(i)); + } + } + } + } } } } @@ -420,7 +448,7 @@ public Set getStringsFor(String termUri) { } logger.fine("Returning " + String.join(",", strings) + " for " + termUri); return strings; - } + } /** * Perform a query to retrieve a cached value from the externalvocabularvalue table @@ -454,10 +482,11 @@ public JsonObject getExternalVocabularyValue(String termUri) { public void registerExternalTerm(JsonObject cvocEntry, String term) { String retrievalUri = cvocEntry.getString("retrieval-uri"); String prefix = cvocEntry.getString("prefix", null); - if(term.isBlank()) { - logger.fine("Ingoring blank term"); + if(StringUtils.isBlank(term)) { + logger.fine("Ignoring blank term"); return; } + boolean isExternal = false; JsonObject vocabs = cvocEntry.getJsonObject("vocabs"); for (String key: vocabs.keySet()) { @@ -512,7 +541,7 @@ public void process(HttpResponse response, HttpContext context) throws HttpExcep if (statusCode == 200) { logger.fine("Returned data: " + data); try (JsonReader jsonReader = Json.createReader(new StringReader(data))) { - String dataObj =filterResponse(cvocEntry, jsonReader.readObject(), term).toString(); + String dataObj = filterResponse(cvocEntry, jsonReader.readObject(), term).toString(); evv.setValue(dataObj); evv.setLastUpdateDate(Timestamp.from(Instant.now())); logger.fine("JsonObject: " + dataObj); @@ -543,7 +572,7 @@ public void process(HttpResponse response, HttpContext context) throws HttpExcep * Parse the raw value returned by an external service for a give term uri and * filter it according to the 'retrieval-filtering' configuration for this * DatasetFieldType, creating a Json value with the specified structure - * + * * @param cvocEntry - the config for this DatasetFieldType * @param readObject - the raw response from the service * @param termUri - the term uri @@ -602,6 +631,8 @@ private JsonObject filterResponse(JsonObject cvocEntry, JsonObject readObject, S if (pattern.equals("{0}")) { if (vals.get(0) instanceof JsonArray) { job.add(filterKey, (JsonArray) vals.get(0)); + } else if (vals.get(0) instanceof JsonObject) { + job.add(filterKey, (JsonObject) vals.get(0)); } else { job.add(filterKey, (String) vals.get(0)); } @@ -639,7 +670,7 @@ Object processPathSegment(int index, String[] pathParts, JsonValue curPath, Stri String[] keyVal = pathParts[index].split("="); logger.fine("Looking for object where " + keyVal[0] + " is " + keyVal[1]); String expected = keyVal[1]; - + if (!expected.equals("*")) { if (expected.equals("@id")) { expected = termUri; @@ -668,7 +699,7 @@ Object processPathSegment(int index, String[] pathParts, JsonValue curPath, Stri } return parts.build(); } - + } else { curPath = ((JsonObject) curPath).get(pathParts[index]); logger.fine("Found next Path object " + curPath.toString()); diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index d6b3fd8c339..cf8a37e0a80 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -922,6 +922,20 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set langs = settingsService.getConfiguredLanguages(); Map cvocMap = datasetFieldService.getCVocConf(true); + Map> cvocManagedFieldMap = new HashMap<>(); + for (Map.Entry cvocEntry : cvocMap.entrySet()) { + if(cvocEntry.getValue().containsKey("managed-fields")) { + JsonObject managedFields = cvocEntry.getValue().getJsonObject("managed-fields"); + Set managedFieldValues = new HashSet<>(); + for (String s : managedFields.keySet()) { + managedFieldValues.add(managedFields.getString(s)); + } + cvocManagedFieldMap.put(cvocEntry.getKey(), managedFieldValues); + } + } + + + Set metadataBlocksWithValue = new HashSet<>(); for (DatasetField dsf : datasetVersion.getFlatDatasetFields()) { @@ -996,19 +1010,39 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set vals = dsf.getValues_nondisplay(); - Set searchStrings = new HashSet(); + Set searchStrings = new HashSet<>(); for (String val: vals) { searchStrings.add(val); - searchStrings.addAll(datasetFieldService.getStringsFor(val)); + // Try to get string values from externalvocabularyvalue using val as termUri + searchStrings.addAll(datasetFieldService.getIndexableStringsByTermUri(val, cvocMap.get(dsfType.getId()), dsfType.getName())); + + if(dsfType.getParentDatasetFieldType()!=null) { + List childDatasetFields = dsf.getParentDatasetFieldCompoundValue().getChildDatasetFields(); + for (DatasetField df : childDatasetFields) { + if(cvocManagedFieldMap.get(dsfType.getId()).contains(df.getDatasetFieldType().getName())) { + String solrManagedFieldSearchable = df.getDatasetFieldType().getSolrField().getNameSearchable(); + // Try to get string values from externalvocabularyvalue but for a managed fields of the CVOCConf + Set stringsForManagedField = datasetFieldService.getIndexableStringsByTermUri(val, cvocMap.get(dsfType.getId()), df.getDatasetFieldType().getName()); + logger.fine(solrManagedFieldSearchable + " filled with externalvocabularyvalue : " + stringsForManagedField); + //.addField works as addition of value not a replace of value + // it allows to add mapped values by CVOCConf before or after indexing real DatasetField value(s) of solrManagedFieldSearchable + solrInputDocument.addField(solrManagedFieldSearchable, stringsForManagedField); + } + } + } } + logger.fine(solrFieldSearchable + " filled with externalvocabularyvalue : " + searchStrings); solrInputDocument.addField(solrFieldSearchable, searchStrings); if (dsfType.getSolrField().isFacetable()) { + logger.fine(solrFieldFacetable + " gets " + vals); solrInputDocument.addField(solrFieldFacetable, vals); } } + if (dsfType.isControlledVocabulary()) { /** If the cvv list is empty but the dfv list is not then it is assumed this was harvested * from an installation that had controlled vocabulary entries that don't exist in our this db From 6b2e111142a39083db578d429394375dc061ce60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20ROUCOU?= Date: Tue, 23 Apr 2024 12:08:42 +0200 Subject: [PATCH 22/85] Add unit tests for "getIndexableStringsByTermUri" method --- .../DatasetFieldServiceBeanTest.java | 179 ++++++++++++++++++ src/test/resources/json/cvoc-agroportal.json | 76 ++++++++ src/test/resources/json/cvoc-orcid.json | 43 +++++ src/test/resources/json/cvoc-skosmos.json | 69 +++++++ 4 files changed, 367 insertions(+) create mode 100644 src/test/java/edu/harvard/iq/dataverse/DatasetFieldServiceBeanTest.java create mode 100644 src/test/resources/json/cvoc-agroportal.json create mode 100644 src/test/resources/json/cvoc-orcid.json create mode 100644 src/test/resources/json/cvoc-skosmos.json diff --git a/src/test/java/edu/harvard/iq/dataverse/DatasetFieldServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldServiceBeanTest.java new file mode 100644 index 00000000000..873d417131d --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldServiceBeanTest.java @@ -0,0 +1,179 @@ +package edu.harvard.iq.dataverse; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Collections; +import java.util.Set; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.AdditionalMatchers; +import org.mockito.Mockito; + +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import jakarta.json.Json; +import jakarta.json.JsonObject; + +public class DatasetFieldServiceBeanTest { + + private DatasetFieldServiceBean datasetFieldServiceBean; + + static String getCvocJson(String pathToJsonFile) throws IOException { + final File datasetVersionJson = new File(pathToJsonFile); + return new String(Files.readAllBytes(Paths.get(datasetVersionJson.getAbsolutePath()))); + } + + @BeforeEach + void setUp() { + this.datasetFieldServiceBean = Mockito.spy(new DatasetFieldServiceBean()); + } + + @AfterEach + void tearDown() { + this.datasetFieldServiceBean = null; + } + + @Test + void getIndexableStringsByTermUriSkosmos() throws IOException { + String fieldName = "keyword"; + String termURI = "http://aims.fao.org/aos/agrovoc/c_2389"; + + JsonObject cvocEntry = prepare(fieldName, "src/test/resources/json/cvoc-skosmos.json"); + + JsonObject getExtVocabValueReturnedValue = Json.createObjectBuilder() + .add("@id", termURI) + .add("termName", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("lang", "fr") + .add("value", "faux bourdon")) + .add(Json.createObjectBuilder() + .add("lang", "en") + .add("value", "drone (insects)"))) + .add("vocabularyUri", "http://aims.fao.org/aos/agrovoc") + .add("synonyms", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("lang", "fr") + .add("value", "Abeille mâle")) + .add(Json.createObjectBuilder() + .add("lang", "en") + .add("value", "drone honey bees"))) + .add("genericTerm", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("lang", "fr") + .add("value", "Colonie d'abeilles")) + .add(Json.createObjectBuilder() + .add("lang", "en") + .add("value", "bee colonies"))) + .build(); + Mockito.doReturn(getExtVocabValueReturnedValue).when(datasetFieldServiceBean).getExternalVocabularyValue(termURI); + Mockito.doReturn(null).when(datasetFieldServiceBean).getExternalVocabularyValue(AdditionalMatchers.not(Mockito.eq(termURI))); + + // keywordTermURL + Set result = datasetFieldServiceBean.getIndexableStringsByTermUri(termURI, cvocEntry, "keywordTermURL"); + assertEquals(Set.of("faux bourdon", "drone (insects)"), result); + + // keywordValue + result = datasetFieldServiceBean.getIndexableStringsByTermUri(termURI, cvocEntry, "keywordValue"); + assertEquals(Collections.emptySet(), result, "Only 'keywordTermURL' must return values for Skosmos"); + + // Any others field + result = datasetFieldServiceBean.getIndexableStringsByTermUri(termURI, cvocEntry, ""); + assertEquals(Collections.emptySet(), result, "Only 'keywordTermURL' must return values for Skosmos"); + + // Another termURI not in database + result = datasetFieldServiceBean.getIndexableStringsByTermUri("http://example.org/uuid", cvocEntry, "keywordTermURL"); + assertEquals(Collections.emptySet(), result); + } + + @Test + void getIndexableStringsByTermUriAgroportal() throws IOException { + String fieldName = "keyword"; + String termURI = "http://aims.fao.org/aos/agrovoc/c_50265"; + + JsonObject cvocEntry = prepare(fieldName, "src/test/resources/json/cvoc-agroportal.json"); + + JsonObject getExtVocabValueReturnedValue = Json.createObjectBuilder() + .add("@id", termURI) + .add("termName", Json.createObjectBuilder() + .add("fr", "association de quartier") + .add("en", "neighborhood associations")) + .add("vocabularyName", "https://data.agroportal.lirmm.fr/ontologies/AGROVOC") + .add("vocabularyUri", "https://data.agroportal.lirmm.fr/ontologies/AGROVOC") + .add("synonyms", Json.createObjectBuilder() + .add("en", Json.createArrayBuilder().add("neighborhood societies"))) + .build(); + Mockito.doReturn(getExtVocabValueReturnedValue).when(datasetFieldServiceBean).getExternalVocabularyValue(termURI); + Mockito.doReturn(null).when(datasetFieldServiceBean).getExternalVocabularyValue(AdditionalMatchers.not(Mockito.eq(termURI))); + + // keywordValue + Set result = datasetFieldServiceBean.getIndexableStringsByTermUri(termURI, cvocEntry, "keywordValue"); + assertEquals(Set.of("association de quartier", "neighborhood associations", "neighborhood societies"), result); + + // keywordTermURL + result = datasetFieldServiceBean.getIndexableStringsByTermUri(termURI, cvocEntry, "keywordTermURL"); + assertEquals(Collections.emptySet(), result, "Only 'keywordValue' must return values for Agroportal"); + + // Any others field + result = datasetFieldServiceBean.getIndexableStringsByTermUri(termURI, cvocEntry, ""); + assertEquals(Collections.emptySet(), result, "Only 'keywordValue' must return values for Agroportal"); + + // Another termURI not in database + result = datasetFieldServiceBean.getIndexableStringsByTermUri("http://example.org/uuid", cvocEntry, "keywordValue"); + assertEquals(Collections.emptySet(), result); + } + + @Test + void getIndexableStringsByTermUriOrcid() throws IOException { + String fieldName = "creator"; + String termURI = "https://orcid.org/0000-0003-4217-153X"; + + JsonObject cvocEntry = prepare(fieldName, "src/test/resources/json/cvoc-orcid.json"); + + JsonObject getExtVocabValueReturnedValue = Json.createObjectBuilder() + .add("@id", termURI) + .add("scheme", "ORCID") + .add("@type", "https://schema.org/Person") + .add("personName", "Doe, John") + .build(); + Mockito.doReturn(getExtVocabValueReturnedValue).when(datasetFieldServiceBean).getExternalVocabularyValue(termURI); + Mockito.doReturn(null).when(datasetFieldServiceBean).getExternalVocabularyValue(AdditionalMatchers.not(Mockito.eq(termURI))); + + // ORCID match with "personName" field into "getIndexableStringsByTermUri" method + Set result = datasetFieldServiceBean.getIndexableStringsByTermUri(termURI, cvocEntry, ""); + assertEquals(Set.of("Doe, John"), result); + + // Another termURI not in database + result = datasetFieldServiceBean.getIndexableStringsByTermUri("http://example.org/uuid", cvocEntry, fieldName); + assertEquals(Collections.emptySet(), result); + } + + /** + * Prepare unit tests with mock methods. + * + * @param fieldName "field-name" into cvoc configuration file + * @param jsonPath path of the JSON configuration file: src/test/resources/json/... + * @return {@link JsonObject} representing the configuration file + * @throws IOException in case on read error on configuration file + */ + JsonObject prepare(String fieldName, String jsonPath) throws IOException { + Long dftId = Long.parseLong("1"); + // DatasetFieldType name corresponding to "field-name" into cvoc configuration file + DatasetFieldType dft = new DatasetFieldType(fieldName, DatasetFieldType.FieldType.NONE, true); + dft.setId(dftId); + + Mockito.doReturn(dft).when(datasetFieldServiceBean).findByNameOpt(fieldName); + Mockito.doReturn(null).when(datasetFieldServiceBean).findByNameOpt(AdditionalMatchers.not(Mockito.eq(fieldName))); + + SettingsServiceBean settingsService = Mockito.mock(SettingsServiceBean.class); + Mockito.when(settingsService.getValueForKey(SettingsServiceBean.Key.CVocConf)).thenReturn(getCvocJson(jsonPath)); + datasetFieldServiceBean.settingsService = settingsService; + + return datasetFieldServiceBean.getCVocConf(false).get(dftId); + } + +} diff --git a/src/test/resources/json/cvoc-agroportal.json b/src/test/resources/json/cvoc-agroportal.json new file mode 100644 index 00000000000..03c9e2f4d07 --- /dev/null +++ b/src/test/resources/json/cvoc-agroportal.json @@ -0,0 +1,76 @@ +[ + { + "field-name": "keyword", + "term-uri-field": "keywordTermURL", + "cvoc-url": "https://data.agroportal.lirmm.fr/", + "js-url": "https://domain.tld/assets/cvoc/ontoportal.js", + "headers": { + "Authorization": "apikey token=XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX" + }, + "protocol": "ontoportal", + "retrieval-uri": "https://data.agroportal.lirmm.fr/ontologies/{keywordVocabulary}/classes/{encodeUrl:keywordTermURL}?language=en,fr", + "term-parent-uri": "", + "allow-free-text": true, + "languages": "en, fr", + "vocabs": { + "AGROVOC": { + "vocabularyUri": "https://data.agroportal.lirmm.fr/ontologies/AGROVOC", + "uriSpace": "http" + }, + "ONTOBIOTOPE": { + "vocabularyUri": "https://data.agroportal.lirmm.fr/ontologies/ONTOBIOTOPE", + "uriSpace": "http" + }, + "CROPUSAGE": { + "vocabularyUri": "https://data.agroportal.lirmm.fr/ontologies/CROPUSAGE", + "uriSpace": "http" + } + }, + "managed-fields": { + "vocabularyName": "keywordVocabulary", + "termName": "keywordValue", + "vocabularyUri": "keywordVocabularyURI" + }, + "retrieval-filtering": { + "@context": { + "termName": "https://schema.org/name", + "vocabularyName": "https://dataverse.org/schema/vocabularyName", + "vocabularyUri": "https://dataverse.org/schema/vocabularyUri", + "lang": "@language", + "value": "@value" + }, + "@id": { + "pattern": "{0}", + "params": [ + "@id" + ] + }, + "termName": { + "pattern": "{0}", + "params": [ + "/prefLabel" + ], + "indexIn": "keywordValue" + }, + "vocabularyName": { + "pattern": "{0}", + "params": [ + "/links/ontology" + ] + }, + "vocabularyUri": { + "pattern": "{0}", + "params": [ + "/links/ontology" + ] + }, + "synonyms": { + "pattern": "{0}", + "params": [ + "/synonym" + ], + "indexIn": "keywordValue" + } + } + } +] diff --git a/src/test/resources/json/cvoc-orcid.json b/src/test/resources/json/cvoc-orcid.json new file mode 100644 index 00000000000..6b904aefc3f --- /dev/null +++ b/src/test/resources/json/cvoc-orcid.json @@ -0,0 +1,43 @@ +[ + { + "field-name": "creator", + "term-uri-field": "creator", + "js-url": "https://gdcc.github.io/dataverse-external-vocab-support/scripts/people.js", + "protocol": "orcid", + "retrieval-uri": "https://pub.orcid.org/v3.0/{0}/person", + "allow-free-text": true, + "prefix": "https://orcid.org/", + "managed-fields": {}, + "languages": "", + "vocabs": { + "orcid": { + "uriSpace": "https://orcid.org/" + } + }, + "retrieval-filtering": { + "@context": { + "personName": "https://schema.org/name", + "scheme": "http://www.w3.org/2004/02/skos/core#inScheme" + }, + "personName": { + "pattern": "{0}, {1}", + "params": [ + "/name/family-name/value", + "/name/given-names/value" + ] + }, + "@id": { + "pattern": "{0}", + "params": [ + "@id" + ] + }, + "scheme": { + "pattern": "ORCID" + }, + "@type": { + "pattern": "https://schema.org/Person" + } + } + } +] diff --git a/src/test/resources/json/cvoc-skosmos.json b/src/test/resources/json/cvoc-skosmos.json new file mode 100644 index 00000000000..6d32b29f054 --- /dev/null +++ b/src/test/resources/json/cvoc-skosmos.json @@ -0,0 +1,69 @@ +[ + { + "field-name": "keyword", + "term-uri-field": "keywordTermURL", + "cvoc-url": "https://demo.skosmos.org/", + "js-url": "https://github.com/gdcc/dataverse-external-vocab-support/blob/main/scripts/skosmos.js", + "protocol": "skosmos", + "retrieval-uri": "https://demo.skosmos.org/rest/v1/data?uri={0}", + "term-parent-uri": "", + "allow-free-text": true, + "languages": "en, fr", + "vocabs": { + "agrovoc": { + "vocabularyUri": "http://aims.fao.org/vest-registry/kos/agrovoc", + "uriSpace": "http://aims.fao.org/aos/agrovoc/" + } + }, + "managed-fields": { + "vocabularyName": "keywordVocabulary", + "termName": "keywordValue", + "vocabularyUri": "keywordVocabularyURI" + }, + "retrieval-filtering": { + "@context": { + "termName": "https://schema.org/name", + "vocabularyName": "https://dataverse.org/schema/vocabularyName", + "vocabularyUri": "https://dataverse.org/schema/vocabularyUri", + "lang": "@language", + "value": "@value" + }, + "@id": { + "pattern": "{0}", + "params": [ + "@id" + ] + }, + "termName": { + "pattern": "{0}", + "params": [ + "/graph/uri=@id/prefLabel" + ] + }, + "vocabularyName": { + "pattern": "{0}", + "params": [ + "/graph/type=skos:ConceptScheme/prefLabel" + ] + }, + "vocabularyUri": { + "pattern": "{0}", + "params": [ + "/graph/type=skos:ConceptScheme/uri" + ] + }, + "synonyms": { + "pattern": "{0}", + "params": [ + "/graph/uri=@id/altLabel" + ] + }, + "genericTerm": { + "pattern": "{0}", + "params": [ + "/graph/type=skos:Concept/prefLabel" + ] + } + } + } +] From adf50744e42e44ad5f4f259e43c490859b7e8e0e Mon Sep 17 00:00:00 2001 From: Ludovic DANIEL Date: Thu, 2 May 2024 11:36:28 +0200 Subject: [PATCH 23/85] Update documentations related to PR 'CVOC : Indexed field accuracy (Ontoportal integration) #10505' --- doc/release-notes/9276-doc-cvoc-index-in.md | 16 +++++++++++++--- .../source/admin/metadatacustomization.rst | 6 ++++-- .../iq/dataverse/DatasetFieldServiceBean.java | 10 +++++----- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/doc/release-notes/9276-doc-cvoc-index-in.md b/doc/release-notes/9276-doc-cvoc-index-in.md index 5c4dd4ca10f..78289201511 100644 --- a/doc/release-notes/9276-doc-cvoc-index-in.md +++ b/doc/release-notes/9276-doc-cvoc-index-in.md @@ -2,7 +2,17 @@ ### Updates on Support for External Vocabulary Services -#### Indexed field accuracy +Multiple extensions of the External Vocabulary mechanism have been added. These extensions allow interaction with services based on the Ontoportal software and are expected to be generally useful for other service types. -For more relevant indexing, you can now map external vocabulary values to a `managed-fields` of a [:CVocConf setting](https://guides.dataverse.org/en/6.3/installation/config.html#cvocconf) by adding the key `indexIn` in `retrieval-filtering`. -For more information, please check [GDCC/dataverse-external-vocab-support documentation](https://github.com/gdcc/dataverse-external-vocab-support/tree/main/docs). \ No newline at end of file +These changes include: + +#### Improved Indexing with Compound Fields + +When using an external vocabulary service with compound fields, you can now specify which field(s) will include additional indexed information, such as translations of an entry into other languages. This is done by adding the `indexIn` in `retrieval-filtering`. (#10505) +For more information, please check [GDCC/dataverse-external-vocab-support documentation](https://github.com/gdcc/dataverse-external-vocab-support/tree/main/docs). + +#### Broader Support for Indexing Service Responses + +Indexing of the results from `retrieval-filtering` responses can now handle additional formats including Json Arrays of Strings and values from arbitrary keys within a JSON Object. (#10505) + +**** This documentation must be merged with 9276-allow-flexible-params-in-retrievaluri-cvoc.md (#10404) \ No newline at end of file diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst index 66911aa0ad1..e70cf0e0897 100644 --- a/doc/sphinx-guides/source/admin/metadatacustomization.rst +++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst @@ -552,6 +552,8 @@ Great care must be taken when reloading a metadata block. Matching is done on fi The ability to reload metadata blocks means that SQL update scripts don't need to be written for these changes. See also the :doc:`/developers/sql-upgrade-scripts` section of the Developer Guide. +.. _using-external-vocabulary-services: + Using External Vocabulary Services ---------------------------------- @@ -577,9 +579,9 @@ In general, the external vocabulary support mechanism may be a better choice for The specifics of the user interface for entering/selecting a vocabulary term and how that term is then displayed are managed by third-party Javascripts. The initial Javascripts that have been created provide auto-completion, displaying a list of choices that match what the user has typed so far, but other interfaces, such as displaying a tree of options for a hierarchical vocabulary, are possible. Similarly, existing scripts do relatively simple things for displaying a term - showing the term's name in the appropriate language and providing a link to an external URL with more information, but more sophisticated displays are possible. -Scripts supporting use of vocabularies from services supporting the SKOMOS protocol (see https://skosmos.org) and retrieving ORCIDs (from https://orcid.org) are available https://github.com/gdcc/dataverse-external-vocab-support. (Custom scripts can also be used and community members are encouraged to share new scripts through the dataverse-external-vocab-support repository.) +Scripts supporting use of vocabularies from services supporting the SKOMOS protocol (see https://skosmos.org), retrieving ORCIDs (from https://orcid.org), services based on Ontoportal product (see https://ontoportal.org/), and using ROR (https://ror.org/) are available https://github.com/gdcc/dataverse-external-vocab-support. (Custom scripts can also be used and community members are encouraged to share new scripts through the dataverse-external-vocab-support repository.) -Configuration involves specifying which fields are to be mapped, whether free-text entries are allowed, which vocabulary(ies) should be used, what languages those vocabulary(ies) are available in, and several service protocol and service instance specific parameters. +Configuration involves specifying which fields are to be mapped, to which Solr field they should be indexed, whether free-text entries are allowed, which vocabulary(ies) should be used, what languages those vocabulary(ies) are available in, and several service protocol and service instance specific parameters, including the ability to send HTTP headers on calls to the service. These are all defined in the :ref:`:CVocConf <:CVocConf>` setting as a JSON array. Details about the required elements as well as example JSON arrays are available at https://github.com/gdcc/dataverse-external-vocab-support, along with an example metadata block that can be used for testing. The scripts required can be hosted locally or retrieved dynamically from https://gdcc.github.io/ (similar to how dataverse-previewers work). diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java index b1717431e41..43648fa3b6d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java @@ -330,7 +330,7 @@ public Map getCVocConf(boolean byTermUriField){ logger.warning("Ignoring External Vocabulary setting for non-existent child field: " + managedFields.getString(s)); } else { - logger.info("Found: " + dft.getName()); + logger.fine("Found: " + dft.getName()); } } } @@ -372,10 +372,10 @@ public void registerExternalVocabValues(DatasetField df) { /** * Retrieves indexable strings from a cached externalvocabularyvalue entry filtered through retrieval-filtering configuration. *

- * This method externalvocabularyvalue entries have been filtered and contains a single JsonObject. - * Is handled : Strings, Array of Objects with "lang" and ("value" or "content") keys, Object with Strings as value or Object with Array of Strings as value. - * The string, or the "value/content"s for each language are added to the set. - * This method can retrieve string values to be indexed in term-uri-field (parameter defined in CVOC configuration) or in "indexIn" field (optional parameter of retrieval-filtering defined in CVOC configuration). + * This method assumes externalvocabularyvalue entries have been filtered and that they contain a single JsonObject. + * Cases Handled : A String, an Array of Strings, an Array of Objects with "value" or "content" keys, an Object with one or more entries that have String values or Array values with a set of String values. + * The string(s), or the "value/content"s for each language are added to the set. + * Retrieved string values are indexed in the term-uri-field (parameter defined in CVOC configuration) by default, or in the field specified by an optional "indexIn" parameter in the retrieval-filtering defined in the CVOC configuration. *

* Any parsing error results in no entries (there can be unfiltered entries with * unknown structure - getting some strings from such an entry could give fairly From d441361fbca99190260eca70a9aa082f5d9d2a80 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 14 May 2024 14:15:17 -0400 Subject: [PATCH 24/85] add default-provider --- scripts/installer/as-setup.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/installer/as-setup.sh b/scripts/installer/as-setup.sh index 34deddf51a3..8007b86a392 100755 --- a/scripts/installer/as-setup.sh +++ b/scripts/installer/as-setup.sh @@ -111,6 +111,7 @@ function preliminary_setup() ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.fake.label=Fake DOI Provider" ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.fake.authority=10.5072" ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.fake.shoulder=FK2/" + ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.default-provider=fake" # jvm-options use colons as separators, escape as literal #DOI_DATACITERESTAPIURL_ESC=`echo $DOI_DATACITERESTAPIURL | sed -e 's/:/\\\:/'` #./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.testDC.datacite.rest-api-url=$DOI_DATACITERESTAPIURL_ESC" From 1da716f817def0d1d26f1a8c090c2e04fc11a6aa Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 14 May 2024 14:32:15 -0400 Subject: [PATCH 25/85] add check for default pid --- .../harvard/iq/dataverse/settings/ConfigCheckService.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/ConfigCheckService.java b/src/main/java/edu/harvard/iq/dataverse/settings/ConfigCheckService.java index 29a9d8956a3..daeff7b6cb9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/ConfigCheckService.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/ConfigCheckService.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse.settings; import edu.harvard.iq.dataverse.MailServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactoryBean; import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key; import edu.harvard.iq.dataverse.util.FileUtil; @@ -32,6 +33,8 @@ public class ConfigCheckService { MailSessionProducer mailSessionProducer; @Inject MailServiceBean mailService; + @Inject + PidProviderFactoryBean pidProviderFactoryBean; public static class ConfigurationError extends RuntimeException { public ConfigurationError(String message) { @@ -132,6 +135,6 @@ public void checkSystemMailSetup() { * @return True if all checks successful, false otherwise. */ private boolean checkPidProviders() { - return PidUtil.getManagedProviderIds().size() > 0; + return (PidUtil.getManagedProviderIds().size() > 0) && (pidProviderFactoryBean.getDefaultPidGenerator()!=null); } } From 6cf286d5606965a694d375e5cdb35509b230b5ed Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 14 May 2024 14:37:40 -0400 Subject: [PATCH 26/85] add logging to distiguish failure cases --- .../iq/dataverse/settings/ConfigCheckService.java | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/ConfigCheckService.java b/src/main/java/edu/harvard/iq/dataverse/settings/ConfigCheckService.java index daeff7b6cb9..96222f40daf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/ConfigCheckService.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/ConfigCheckService.java @@ -135,6 +135,16 @@ public void checkSystemMailSetup() { * @return True if all checks successful, false otherwise. */ private boolean checkPidProviders() { - return (PidUtil.getManagedProviderIds().size() > 0) && (pidProviderFactoryBean.getDefaultPidGenerator()!=null); + // Check if at least one PidProvider capable of editing/minting PIDs is configured. + boolean valid=true; + if(!(PidUtil.getManagedProviderIds().size() > 0)) { + valid = false; + logger.warning("No PID providers configured"); + } + if (pidProviderFactoryBean.getDefaultPidGenerator()==null){ + valid=false; + logger.warning("No default PID provider configured"); + } + return valid; } } From d0674f08979632d8d512f517a4d4f7e669179f4a Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 20 May 2024 10:42:07 -0400 Subject: [PATCH 27/85] This commit adds mechanisms for giving the admin some extra options in limiting the search-related bells and whistles luxuries - such as the facets and the object counts for the types left unchecked in the object types facets - more granularly. The default behavior is left intact in all cases. #10570 --- .../edu/harvard/iq/dataverse/DatasetPage.java | 21 ++++++++ .../search/SearchIncludeFragment.java | 52 ++++++++++++++++--- .../settings/SettingsServiceBean.java | 3 ++ src/main/webapp/search-include-fragment.xhtml | 9 ++-- 4 files changed, 76 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index d9cb10026a3..69ce66caa64 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -8,6 +8,7 @@ import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; +import edu.harvard.iq.dataverse.authorization.users.GuestUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.dataaccess.StorageIO; @@ -138,6 +139,7 @@ import jakarta.faces.event.AjaxBehaviorEvent; import jakarta.servlet.ServletOutputStream; import jakarta.servlet.http.HttpServletResponse; +import jakarta.servlet.http.HttpServletRequest; import org.apache.commons.text.StringEscapeUtils; import org.apache.commons.lang3.mutable.MutableBoolean; @@ -787,6 +789,25 @@ public boolean isIndexedVersion() { return isIndexedVersion = false; } + // plus we have mechanisms for disabling the facets selectively, just for + // the guests, or anonymous users: + if (session.getUser() instanceof GuestUser) { + if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableSolrFacetsForGuestUsers, false)) { + return isIndexedVersion = false; + } + + // An even lower grade of user than Guest is a truly anonymous user - + // a guest user who came without the session cookie: + Map cookies = FacesContext.getCurrentInstance().getExternalContext().getRequestCookieMap(); + if (!(cookies != null && cookies.containsKey("JSESSIONID"))) { + if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableSolrFacetsForAnonymousUsers, false)) { + return isIndexedVersion = false; + } + } + + } + + // The version is SUPPOSED to be indexed if it's the latest published version, or a // draft. So if none of the above is true, we can return false right away. if (!(workingVersion.isDraft() || isThisLatestReleasedVersion())) { diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 9be6c34aa8b..1e2e4764869 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -21,6 +21,7 @@ import edu.harvard.iq.dataverse.SettingsWrapper; import edu.harvard.iq.dataverse.ThumbnailServiceWrapper; import edu.harvard.iq.dataverse.WidgetWrapper; +import edu.harvard.iq.dataverse.authorization.users.GuestUser; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; @@ -395,7 +396,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused } } - if (!wasSolrErrorEncountered() && selectedTypesList.size() < 3 && !isSolrTemporarilyUnavailable() && !isFacetsDisabled()) { + if (!wasSolrErrorEncountered() && selectedTypesList.size() < 3 && !isSolrTemporarilyUnavailable() && !isFacetsDisabled() && !isUncheckedTypesFacetDisabled()) { // If some types are NOT currently selected, we will need to // run a second search to obtain the numbers of the unselected types: @@ -1086,20 +1087,59 @@ public void setSolrTemporarilyUnavailable(boolean solrIsTemporarilyUnavailable) this.solrIsTemporarilyUnavailable = solrIsTemporarilyUnavailable; } + Boolean solrFacetsDisabled = null; /** * Indicates that the fragment should not be requesting facets in Solr * searches and rendering them on the page. * @return true if disabled; false by default */ public boolean isFacetsDisabled() { - // The method is used in rendered="..." logic. So we are using - // SettingsWrapper to make sure we are not looking it up repeatedly - // (settings are not expensive to look up, but - // still). + if (this.solrFacetsDisabled != null) { + return this.solrFacetsDisabled; + } + + if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableSolrFacets, false)) { + return this.solrFacetsDisabled = true; + } + + // We also have mechanisms for disabling the facets selectively, just for + // the guests, or anonymous users: + if (session.getUser() instanceof GuestUser) { + if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableSolrFacetsForGuestUsers, false)) { + return this.solrFacetsDisabled = true; + } + + // An even lower grade of user than Guest is a truly anonymous user - + // a guest user who came without the session cookie: + Map cookies = FacesContext.getCurrentInstance().getExternalContext().getRequestCookieMap(); + if (!(cookies != null && cookies.containsKey("JSESSIONID"))) { + if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableSolrFacetsForAnonymousUsers, false)) { + return this.solrFacetsDisabled = true; + } + } + } - return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableSolrFacets, false); + return this.solrFacetsDisabled = false; } + Boolean disableSecondPassSearch = null; + + /** + * Indicates that we do not need to run the second search query to populate + * the counts for *unchecked* type facets. + * @return true if disabled; false by default + */ + public boolean isUncheckedTypesFacetDisabled() { + if (this.disableSecondPassSearch != null) { + return this.disableSecondPassSearch; + } + if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableUncheckedTypesFacet, false)) { + return this.disableSecondPassSearch = true; + } + return this.disableSecondPassSearch = false; + } + + public boolean isRootDv() { return rootDv; } diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 35d70498c3f..c1d3c6bee74 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -663,6 +663,9 @@ Whether Harvesting (OAI) service is enabled * and dataset pages instantly */ DisableSolrFacets, + DisableSolrFacetsForGuestUsers, + DisableSolrFacetsForAnonymousUsers, + DisableUncheckedTypesFacet, /** * When ingesting tabular data files, store the generated tab-delimited * files *with* the variable names line up top. diff --git a/src/main/webapp/search-include-fragment.xhtml b/src/main/webapp/search-include-fragment.xhtml index 1b1b8594928..505fe681363 100644 --- a/src/main/webapp/search-include-fragment.xhtml +++ b/src/main/webapp/search-include-fragment.xhtml @@ -132,9 +132,10 @@ - + +

@@ -164,9 +165,10 @@ - + + @@ -196,9 +198,10 @@ - + + From d338caec0310d1466fc3dbcf83d672af8b9c6e6c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 9 May 2024 15:57:11 -0400 Subject: [PATCH 28/85] only delete possible orphans --- .../iq/dataverse/search/IndexServiceBean.java | 61 +++++++++++++------ 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index e61b93a741f..6b7a74cdeef 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -486,9 +486,40 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr String solrIdDeaccessioned = determineDeaccessionedDatasetId(dataset); StringBuilder debug = new StringBuilder(); debug.append("\ndebug:\n"); + DatasetVersion latestVersion = dataset.getLatestVersion(); + String latestVersionStateString = latestVersion.getVersionState().name(); + DatasetVersion.VersionState latestVersionState = latestVersion.getVersionState(); + DatasetVersion releasedVersion = dataset.getReleasedVersion(); + boolean atLeastOnePublishedVersion = false; + if (releasedVersion != null) { + atLeastOnePublishedVersion = true; + } else { + atLeastOnePublishedVersion = false; + } + List solrIdsOfFilesToDelete = null; + + try { + solrIdsOfFilesToDelete = findFilesOfParentDataset(dataset.getId()); + List fileMetadatas = latestVersion.getFileMetadatas(); + + for (FileMetadata fileMetadata : fileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); + solrIdsOfFilesToDelete.remove(solrIdOfPublishedFile); + } + if (releasedVersion != null && !releasedVersion.equals(latestVersion)) { + fileMetadatas = releasedVersion.getFileMetadatas(); + for (FileMetadata fileMetadata : fileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); + solrIdsOfFilesToDelete.remove(solrIdOfPublishedFile); + } + } + } catch (SearchException | NullPointerException ex) { + logger.fine("could not run search of files to delete: " + ex); + } int numPublishedVersions = 0; List versions = dataset.getVersions(); - List solrIdsOfFilesToDelete = new ArrayList<>(); + //List solrIdsOfFilesToDelete = new ArrayList<>(); + //Debugging loop for (DatasetVersion datasetVersion : versions) { Long versionDatabaseId = datasetVersion.getId(); String versionTitle = datasetVersion.getTitle(); @@ -500,10 +531,10 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr debug.append("version found with database id " + versionDatabaseId + "\n"); debug.append("- title: " + versionTitle + "\n"); debug.append("- semanticVersion-VersionState: " + semanticVersion + "-" + versionState + "\n"); - List fileMetadatas = datasetVersion.getFileMetadatas(); List fileInfo = new ArrayList<>(); + List fileMetadatas = datasetVersion.getFileMetadatas(); + for (FileMetadata fileMetadata : fileMetadatas) { - String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); /** * It sounds weird but the first thing we'll do is preemptively * delete the Solr documents of all published files. Don't @@ -515,10 +546,9 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr * searchable. See also * https://github.com/IQSS/dataverse/issues/762 */ - solrIdsOfFilesToDelete.add(solrIdOfPublishedFile); fileInfo.add(fileMetadata.getDataFile().getId() + ":" + fileMetadata.getLabel()); } - try { +// try { /** * Preemptively delete *all* Solr documents for files associated * with the dataset based on a Solr query. @@ -539,11 +569,11 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr * @todo We should also delete the corresponding Solr * "permission" documents for the files. */ - List allFilesForDataset = findFilesOfParentDataset(dataset.getId()); - solrIdsOfFilesToDelete.addAll(allFilesForDataset); - } catch (SearchException | NullPointerException ex) { - logger.fine("could not run search of files to delete: " + ex); - } + //List allFilesForDataset = findFilesOfParentDataset(dataset.getId()); + //solrIdsOfFilesToDelete.addAll(allFilesForDataset); +// } catch (SearchException | NullPointerException ex) { +// logger.fine("could not run search of files to delete: " + ex); +// } int numFiles = 0; if (fileMetadatas != null) { numFiles = fileMetadatas.size(); @@ -555,16 +585,7 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr IndexResponse resultOfAttemptToPremptivelyDeletePublishedFiles = solrIndexService.deleteMultipleSolrIds(solrIdsOfFilesToDelete); debug.append("result of attempt to premptively deleted published files before reindexing: " + resultOfAttemptToPremptivelyDeletePublishedFiles + "\n"); } - DatasetVersion latestVersion = dataset.getLatestVersion(); - String latestVersionStateString = latestVersion.getVersionState().name(); - DatasetVersion.VersionState latestVersionState = latestVersion.getVersionState(); - DatasetVersion releasedVersion = dataset.getReleasedVersion(); - boolean atLeastOnePublishedVersion = false; - if (releasedVersion != null) { - atLeastOnePublishedVersion = true; - } else { - atLeastOnePublishedVersion = false; - } + Map desiredCards = new LinkedHashMap<>(); /** * @todo refactor all of this below and have a single method that takes From 5684020068e15904365b20030d09080ace72dff2 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 9 May 2024 16:28:40 -0400 Subject: [PATCH 29/85] more redundant deletes --- .../iq/dataverse/search/IndexServiceBean.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 6b7a74cdeef..71bd7fd9bb1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1935,9 +1935,9 @@ private String removeDeaccessioned(Dataset dataset) { StringBuilder result = new StringBuilder(); String deleteDeaccessionedResult = removeSolrDocFromIndex(determineDeaccessionedDatasetId(dataset)); result.append(deleteDeaccessionedResult); - List docIds = findSolrDocIdsForFilesToDelete(dataset, IndexableDataset.DatasetState.DEACCESSIONED); - String deleteFilesResult = removeMultipleSolrDocs(docIds); - result.append(deleteFilesResult); +// List docIds = findSolrDocIdsForFilesToDelete(dataset, IndexableDataset.DatasetState.DEACCESSIONED); +// String deleteFilesResult = removeMultipleSolrDocs(docIds); +// result.append(deleteFilesResult); return result.toString(); } @@ -1945,9 +1945,9 @@ private String removePublished(Dataset dataset) { StringBuilder result = new StringBuilder(); String deletePublishedResult = removeSolrDocFromIndex(determinePublishedDatasetSolrDocId(dataset)); result.append(deletePublishedResult); - List docIds = findSolrDocIdsForFilesToDelete(dataset, IndexableDataset.DatasetState.PUBLISHED); - String deleteFilesResult = removeMultipleSolrDocs(docIds); - result.append(deleteFilesResult); +// List docIds = findSolrDocIdsForFilesToDelete(dataset, IndexableDataset.DatasetState.PUBLISHED); +// String deleteFilesResult = removeMultipleSolrDocs(docIds); +// result.append(deleteFilesResult); return result.toString(); } From 67dc2b06e4274abb7a0c491e4fc59049d450e45b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 22 May 2024 15:05:20 -0400 Subject: [PATCH 30/85] add suffix to checks, shuffle logging --- .../iq/dataverse/search/IndexServiceBean.java | 119 +++++++++--------- 1 file changed, 62 insertions(+), 57 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 71bd7fd9bb1..18d6d77a686 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -37,6 +37,7 @@ import java.util.concurrent.Future; import java.util.concurrent.Semaphore; import java.util.function.Function; +import java.util.logging.Level; import java.util.logging.Logger; import java.util.stream.Collectors; import jakarta.annotation.PostConstruct; @@ -500,15 +501,18 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr try { solrIdsOfFilesToDelete = findFilesOfParentDataset(dataset.getId()); - List fileMetadatas = latestVersion.getFileMetadatas(); - - for (FileMetadata fileMetadata : fileMetadatas) { - String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); + logger.fine("Existing file docs: " + String.join(", ", solrIdsOfFilesToDelete)); + if(latestVersion.isDeaccessioned() && !atLeastOnePublishedVersion) { + List latestFileMetadatas = latestVersion.getFileMetadatas(); + String suffix = (new IndexableDataset(latestVersion)).getDatasetState().getSuffix(); + for (FileMetadata fileMetadata : latestFileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId() + suffix; solrIdsOfFilesToDelete.remove(solrIdOfPublishedFile); } + } if (releasedVersion != null && !releasedVersion.equals(latestVersion)) { - fileMetadatas = releasedVersion.getFileMetadatas(); - for (FileMetadata fileMetadata : fileMetadatas) { + List releasedFileMetadatas = releasedVersion.getFileMetadatas(); + for (FileMetadata fileMetadata : releasedFileMetadatas) { String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); solrIdsOfFilesToDelete.remove(solrIdOfPublishedFile); } @@ -516,74 +520,75 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr } catch (SearchException | NullPointerException ex) { logger.fine("could not run search of files to delete: " + ex); } + logger.fine("File docs to delete: " + String.join(", ", solrIdsOfFilesToDelete)); int numPublishedVersions = 0; List versions = dataset.getVersions(); //List solrIdsOfFilesToDelete = new ArrayList<>(); - //Debugging loop - for (DatasetVersion datasetVersion : versions) { - Long versionDatabaseId = datasetVersion.getId(); - String versionTitle = datasetVersion.getTitle(); - String semanticVersion = datasetVersion.getSemanticVersion(); - DatasetVersion.VersionState versionState = datasetVersion.getVersionState(); - if (versionState.equals(DatasetVersion.VersionState.RELEASED)) { - numPublishedVersions += 1; - } - debug.append("version found with database id " + versionDatabaseId + "\n"); - debug.append("- title: " + versionTitle + "\n"); - debug.append("- semanticVersion-VersionState: " + semanticVersion + "-" + versionState + "\n"); - List fileInfo = new ArrayList<>(); - List fileMetadatas = datasetVersion.getFileMetadatas(); + if (logger.isLoggable(Level.FINE)) { + for (DatasetVersion datasetVersion : versions) { + Long versionDatabaseId = datasetVersion.getId(); + String versionTitle = datasetVersion.getTitle(); + String semanticVersion = datasetVersion.getSemanticVersion(); + DatasetVersion.VersionState versionState = datasetVersion.getVersionState(); + if (versionState.equals(DatasetVersion.VersionState.RELEASED)) { + numPublishedVersions += 1; + } + debug.append("version found with database id " + versionDatabaseId + "\n"); + debug.append("- title: " + versionTitle + "\n"); + debug.append("- semanticVersion-VersionState: " + semanticVersion + "-" + versionState + "\n"); + List fileInfo = new ArrayList<>(); + List fileMetadatas = datasetVersion.getFileMetadatas(); - for (FileMetadata fileMetadata : fileMetadatas) { - /** - * It sounds weird but the first thing we'll do is preemptively - * delete the Solr documents of all published files. Don't - * worry, published files will be re-indexed later along with - * the dataset. We do this so users can delete files from - * published versions of datasets and then re-publish a new - * version without fear that their old published files (now - * deleted from the latest published version) will be - * searchable. See also - * https://github.com/IQSS/dataverse/issues/762 - */ - fileInfo.add(fileMetadata.getDataFile().getId() + ":" + fileMetadata.getLabel()); - } + for (FileMetadata fileMetadata : fileMetadatas) { + /** + * It sounds weird but the first thing we'll do is preemptively delete the Solr + * documents of all published files. Don't worry, published files will be + * re-indexed later along with the dataset. We do this so users can delete files + * from published versions of datasets and then re-publish a new version without + * fear that their old published files (now deleted from the latest published + * version) will be searchable. See also + * https://github.com/IQSS/dataverse/issues/762 + */ + fileInfo.add(fileMetadata.getDataFile().getId() + ":" + fileMetadata.getLabel()); + } // try { /** - * Preemptively delete *all* Solr documents for files associated - * with the dataset based on a Solr query. + * Preemptively delete *all* Solr documents for files associated with the + * dataset based on a Solr query. * - * We must query Solr for this information because the file has - * been deleted from the database ( perhaps when Solr was down, - * as reported in https://github.com/IQSS/dataverse/issues/2086 - * ) so the database doesn't even know about the file. It's an - * orphan. + * We must query Solr for this information because the file has been deleted + * from the database ( perhaps when Solr was down, as reported in + * https://github.com/IQSS/dataverse/issues/2086 ) so the database doesn't even + * know about the file. It's an orphan. * - * @todo This Solr query should make the iteration above based - * on the database unnecessary because it the Solr query should - * find all files for the dataset. We can probably remove the - * iteration above after an "index all" has been performed. - * Without an "index all" we won't be able to find files based - * on parentId because that field wasn't searchable in 4.0. + * @todo This Solr query should make the iteration above based on the database + * unnecessary because it the Solr query should find all files for the + * dataset. We can probably remove the iteration above after an "index + * all" has been performed. Without an "index all" we won't be able to + * find files based on parentId because that field wasn't searchable in + * 4.0. * - * @todo We should also delete the corresponding Solr - * "permission" documents for the files. + * @todo We should also delete the corresponding Solr "permission" documents for + * the files. */ - //List allFilesForDataset = findFilesOfParentDataset(dataset.getId()); - //solrIdsOfFilesToDelete.addAll(allFilesForDataset); + // List allFilesForDataset = findFilesOfParentDataset(dataset.getId()); + // solrIdsOfFilesToDelete.addAll(allFilesForDataset); // } catch (SearchException | NullPointerException ex) { // logger.fine("could not run search of files to delete: " + ex); // } - int numFiles = 0; - if (fileMetadatas != null) { - numFiles = fileMetadatas.size(); + int numFiles = 0; + if (fileMetadatas != null) { + numFiles = fileMetadatas.size(); + } + debug.append("- files: " + numFiles + " " + fileInfo.toString() + "\n"); } - debug.append("- files: " + numFiles + " " + fileInfo.toString() + "\n"); } debug.append("numPublishedVersions: " + numPublishedVersions + "\n"); if (doNormalSolrDocCleanUp) { - IndexResponse resultOfAttemptToPremptivelyDeletePublishedFiles = solrIndexService.deleteMultipleSolrIds(solrIdsOfFilesToDelete); - debug.append("result of attempt to premptively deleted published files before reindexing: " + resultOfAttemptToPremptivelyDeletePublishedFiles + "\n"); + if(!solrIdsOfFilesToDelete.isEmpty()) { + IndexResponse resultOfAttemptToPremptivelyDeletePublishedFiles = solrIndexService.deleteMultipleSolrIds(solrIdsOfFilesToDelete); + debug.append("result of attempt to premptively deleted published files before reindexing: " + resultOfAttemptToPremptivelyDeletePublishedFiles + "\n"); + } } Map desiredCards = new LinkedHashMap<>(); From 2ad902c65eccfa8d955996965a135b14cef4cc87 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 22 May 2024 16:26:43 -0400 Subject: [PATCH 31/85] fix delete logic --- .../iq/dataverse/search/IndexServiceBean.java | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 18d6d77a686..437287aa755 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -502,13 +502,15 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr try { solrIdsOfFilesToDelete = findFilesOfParentDataset(dataset.getId()); logger.fine("Existing file docs: " + String.join(", ", solrIdsOfFilesToDelete)); - if(latestVersion.isDeaccessioned() && !atLeastOnePublishedVersion) { - List latestFileMetadatas = latestVersion.getFileMetadatas(); - String suffix = (new IndexableDataset(latestVersion)).getDatasetState().getSuffix(); - for (FileMetadata fileMetadata : latestFileMetadatas) { - String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId() + suffix; - solrIdsOfFilesToDelete.remove(solrIdOfPublishedFile); - } + //We keep the latest version's docs unless it is deaccessioned and there is no published/released version + //So skip the loop removing those docs from the delete list in that case + if ((!latestVersion.isDeaccessioned() || atLeastOnePublishedVersion)) { + List latestFileMetadatas = latestVersion.getFileMetadatas(); + String suffix = (new IndexableDataset(latestVersion)).getDatasetState().getSuffix(); + for (FileMetadata fileMetadata : latestFileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId() + suffix; + solrIdsOfFilesToDelete.remove(solrIdOfPublishedFile); + } } if (releasedVersion != null && !releasedVersion.equals(latestVersion)) { List releasedFileMetadatas = releasedVersion.getFileMetadatas(); From 34b03fed73933c6687069f680ad2eac23c36164b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 22 May 2024 16:27:47 -0400 Subject: [PATCH 32/85] drafts already deleted --- .../iq/dataverse/search/IndexServiceBean.java | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 437287aa755..10c9b9bbe07 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -673,11 +673,11 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr desiredCards.put(DatasetVersion.VersionState.DRAFT, false); if (doNormalSolrDocCleanUp) { - List solrDocIdsForDraftFilesToDelete = findSolrDocIdsForDraftFilesToDelete(dataset); + //List solrDocIdsForDraftFilesToDelete = findSolrDocIdsForDraftFilesToDelete(dataset); String deleteDraftDatasetVersionResult = removeSolrDocFromIndex(solrIdDraftDataset); - String deleteDraftFilesResults = deleteDraftFiles(solrDocIdsForDraftFilesToDelete); - results.append("Attempting to delete traces of drafts. Result: ") - .append(deleteDraftDatasetVersionResult).append(deleteDraftFilesResults).append("\n"); + //String deleteDraftFilesResults = deleteDraftFiles(solrDocIdsForDraftFilesToDelete); + //results.append("Attempting to delete traces of drafts. Result: ") + // .append(deleteDraftDatasetVersionResult).append(deleteDraftFilesResults).append("\n"); } /** @@ -721,11 +721,11 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr desiredCards.put(DatasetVersion.VersionState.DRAFT, false); if (doNormalSolrDocCleanUp) { - List solrDocIdsForDraftFilesToDelete = findSolrDocIdsForDraftFilesToDelete(dataset); + //List solrDocIdsForDraftFilesToDelete = findSolrDocIdsForDraftFilesToDelete(dataset); String deleteDraftDatasetVersionResult = removeSolrDocFromIndex(solrIdDraftDataset); - String deleteDraftFilesResults = deleteDraftFiles(solrDocIdsForDraftFilesToDelete); - results.append("The latest version is published. Attempting to delete drafts. Result: ") - .append(deleteDraftDatasetVersionResult).append(deleteDraftFilesResults).append("\n"); + //String deleteDraftFilesResults = deleteDraftFiles(solrDocIdsForDraftFilesToDelete); + //results.append("The latest version is published. Attempting to delete drafts. Result: ") + // .append(deleteDraftDatasetVersionResult).append(deleteDraftFilesResults).append("\n"); } desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); @@ -822,13 +822,13 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr } } - private String deleteDraftFiles(List solrDocIdsForDraftFilesToDelete) { +/* private String deleteDraftFiles(List solrDocIdsForDraftFilesToDelete) { String deleteDraftFilesResults = ""; IndexResponse indexResponse = solrIndexService.deleteMultipleSolrIds(solrDocIdsForDraftFilesToDelete); deleteDraftFilesResults = indexResponse.toString(); return deleteDraftFilesResults; } - +*/ private IndexResponse indexDatasetPermissions(Dataset dataset) { boolean disabledForDebugging = false; if (disabledForDebugging) { From 90bfcf4234b56d8aef5f18c7a19df86483c2cb8b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 22 May 2024 16:28:49 -0400 Subject: [PATCH 33/85] don't run though file doc creation if not using it --- .../harvard/iq/dataverse/search/IndexServiceBean.java | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 10c9b9bbe07..985712da158 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1224,8 +1224,8 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set Date: Thu, 23 May 2024 16:27:27 -0400 Subject: [PATCH 34/85] release note/guide entries for the new settings in the pr. #10570 --- .../10570-extra-facet-settings.md | 4 ++++ .../source/installation/config.rst | 19 ++++++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 doc/release-notes/10570-extra-facet-settings.md diff --git a/doc/release-notes/10570-extra-facet-settings.md b/doc/release-notes/10570-extra-facet-settings.md new file mode 100644 index 00000000000..9d68defc9a3 --- /dev/null +++ b/doc/release-notes/10570-extra-facet-settings.md @@ -0,0 +1,4 @@ +Extra settings have been added giving an instance admin more choices in +selectively limiting the availability of search facets on the Collection and Dataset pages. +See the [Disable Solr Facets](https://guides.dataverse.org/en/6.3/installation/config.html#DisableSolrFacets) sections of the Config Guide for more info. + diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 907631e6236..1d76313a92c 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -3851,7 +3851,7 @@ If ``:SolrFullTextIndexing`` is set to true, the content of files of any size wi :DisableSolrFacets ++++++++++++++++++ -Setting this to ``true`` will make the collection ("dataverse") page start showing search results without the usual search facets on the left side of the page. A message will be shown in that column informing the users that facets are temporarily unavailable. Generating the facets is more resource-intensive for Solr than the main search results themselves, so applying this measure will significantly reduce the load on the search engine when its performance becomes an issue. +Setting this to ``true`` will make the collection ("dataverse") page start showing search results without the usual search facets on the left side of the page. A message will be shown in that column informing the users that facets are temporarily unavailable. Generating the facets may in some cases be more resource-intensive for Solr than the main search results themselves, so applying this measure will significantly reduce the load on the search engine when its performance becomes an issue. This setting can be used in combination with the "circuit breaker" mechanism on the Solr side (see the "Installing Solr" section of the Installation Prerequisites guide). An admin can choose to enable it, or even create an automated system for enabling it in response to Solr beginning to drop incoming requests with the HTTP code 503. @@ -3860,6 +3860,23 @@ To enable the setting:: curl -X PUT -d true "http://localhost:8080/api/admin/settings/:DisableSolrFacets" +:DisableSolrFacetsForGuestUsers ++++++++++++++++++++++++++++++++ + +Similar to the above, but will disable the facets for Guest users only. + +:DisableSolrFacetsForAnonymousUsers ++++++++++++++++++++++++++++++++++++ + +Same idea as with the 2 settings above. For the purposes of this setting, a request is considered "anonymous", if it came in without the JSESSION cookie supplied. A UI user who is browsing the holdings without logging in will have a valid JSESSION cookie, tied to a guest session. The main purpose of this setting is to hide the facets from bots, scripted crawlers and such (most of which - though not all - do not use cookies). Not letting the bots anywhere near the facets can serve a dual purpose on a busy instance experiencing problems with such abuse - some CPU cycles and resources can be saved by not having to generate the facets. And, even more importantly, it can prevent bots from attempting to crawl the facet trees, which has a potential for multiplying the service load. + +.. _:DisableUncheckedTypesFacet + +:DisableUncheckedTypesFacet ++++++++++++++++++++++++++++ + +Another option for reducing the load on solr on a busy instance. Rather than disabling all the search facets, this setting affects only one - the facet on the upper left of the collection page, where users can select the type of objects to search - Collections ("Dataverses"), Datasets and/or Files. With this option set to true, the numbers of results will only be shown for the types actually selected (i.e. only for the search results currently shown to the user). This minor feature - being able to tell the user how many files (for example) they *would* find, *if* they chose to search for files, by clicking the Files facet - essentially doubles the expense of running the search. That may still be negligible on an instance with lighter holdings, but can make a significant difference for a large and heavily used archive. + .. _:SignUpUrl: :SignUpUrl From 47f126469d6513e8164b7993761f2981c979e170 Mon Sep 17 00:00:00 2001 From: landreev Date: Fri, 24 May 2024 16:02:15 -0400 Subject: [PATCH 35/85] Update doc/sphinx-guides/source/installation/config.rst Co-authored-by: Philip Durbin --- doc/sphinx-guides/source/installation/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 1d76313a92c..d58199f2b21 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -3863,7 +3863,7 @@ To enable the setting:: :DisableSolrFacetsForGuestUsers +++++++++++++++++++++++++++++++ -Similar to the above, but will disable the facets for Guest users only. +Similar to the above, but will disable the facets for Guest (unauthenticated) users only. :DisableSolrFacetsForAnonymousUsers +++++++++++++++++++++++++++++++++++ From a7839b50f8875a4b3e5d700c9d12a627a65f0e2a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 3 Jun 2024 10:48:10 -0400 Subject: [PATCH 36/85] add permission doc deletes, check/delete per-version perm docs via api --- .../iq/dataverse/DataFileServiceBean.java | 8 +++ .../iq/dataverse/search/IndexServiceBean.java | 49 ++++++++++++++++++- 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 41ea6ae39f0..7f38107af6b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse; +import edu.harvard.iq.dataverse.DatasetVersion.VersionState; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; @@ -759,6 +760,13 @@ public List findAll() { return em.createQuery("select object(o) from DataFile as o order by o.id", DataFile.class).getResultList(); } + public List findVersionStates(Long fileId) { + Query query = em.createQuery( + "select distinct dv.versionState from DatasetVersion dv where dv.id in (select fm.datasetVersion.id from FileMetadata fm where fm.dataFile.id=:fileId)"); + query.setParameter("fileId", fileId); + return query.getResultList(); + } + public DataFile save(DataFile dataFile) { if (dataFile.isMergeable()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 985712da158..bab7b196f2a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse.search; import edu.harvard.iq.dataverse.*; +import edu.harvard.iq.dataverse.DatasetVersion.VersionState; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUserServiceBean; import edu.harvard.iq.dataverse.batch.util.LoggingUtil; @@ -503,7 +504,7 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr solrIdsOfFilesToDelete = findFilesOfParentDataset(dataset.getId()); logger.fine("Existing file docs: " + String.join(", ", solrIdsOfFilesToDelete)); //We keep the latest version's docs unless it is deaccessioned and there is no published/released version - //So skip the loop removing those docs from the delete list in that case + //So skip the loop removing those docs from the delete list except in that case if ((!latestVersion.isDeaccessioned() || atLeastOnePublishedVersion)) { List latestFileMetadatas = latestVersion.getFileMetadatas(); String suffix = (new IndexableDataset(latestVersion)).getDatasetState().getSuffix(); @@ -588,6 +589,10 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr debug.append("numPublishedVersions: " + numPublishedVersions + "\n"); if (doNormalSolrDocCleanUp) { if(!solrIdsOfFilesToDelete.isEmpty()) { + for(String file: solrIdsOfFilesToDelete) { + //Also remove associated permission docs + solrIdsOfFilesToDelete.add(file+"_permission"); + } IndexResponse resultOfAttemptToPremptivelyDeletePublishedFiles = solrIndexService.deleteMultipleSolrIds(solrIdsOfFilesToDelete); debug.append("result of attempt to premptively deleted published files before reindexing: " + resultOfAttemptToPremptivelyDeletePublishedFiles + "\n"); } @@ -2088,8 +2093,48 @@ public List findPermissionsInSolrOnly() throws SearchException { SolrDocumentList list = rsp.getResults(); for (SolrDocument doc: list) { long id = Long.parseLong((String) doc.getFieldValue(SearchFields.DEFINITION_POINT_DVOBJECT_ID)); + String docId = (String)doc.getFieldValue(SearchFields.ID); if(!dvObjectService.checkExists(id)) { - permissionInSolrOnly.add((String)doc.getFieldValue(SearchFields.ID)); + permissionInSolrOnly.add(docId); + } else { + DvObject obj = dvObjectService.findDvObject(id); + if (obj instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersion(); + if (docId.endsWith("draft_permission")) { + if (!dv.isDraft()) { + permissionInSolrOnly.add(docId); + } + } else if (docId.endsWith("deaccessioned_permission")) { + if (!dv.isDeaccessioned()) { + permissionInSolrOnly.add(docId); + } + } else { + if (d.getReleasedVersion() != null) { + permissionInSolrOnly.add(docId); + } + } + } else if (obj instanceof DataFile f) { + List states = dataFileService.findVersionStates(f.getId()); + Set strings = states.stream().map(VersionState::toString).collect(Collectors.toSet()); + logger.info("for " + docId + " states: " + String.join(", ", strings)); + if (docId.endsWith("draft_permission")) { + if (!states.contains(VersionState.DRAFT)) { + permissionInSolrOnly.add(docId); + } + } else if (docId.endsWith("deaccessioned_permission")) { + if (!states.contains(VersionState.DEACCESSIONED) && states.size() == 1) { + permissionInSolrOnly.add(docId); + } + } else { + if (!states.contains(VersionState.RELEASED)) { + permissionInSolrOnly.add(docId); + } else if (!dataFileService.findMostRecentVersionFileIsIn(f).getDatasetVersion() + .equals(f.getOwner().getReleasedVersion())) { + permissionInSolrOnly.add(docId); + } + + } + } } } if (cursorMark.equals(nextCursorMark)) { From da5b10b40ab3b364218c914ca9586eae888ce809 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 12 Jun 2024 17:29:44 -0400 Subject: [PATCH 37/85] typo - released logic backwards --- .../java/edu/harvard/iq/dataverse/search/IndexServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index d2aa52ce395..98bcdf385fc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -2099,7 +2099,7 @@ public List findPermissionsInSolrOnly() throws SearchException { permissionInSolrOnly.add(docId); } } else { - if (d.getReleasedVersion() != null) { + if (d.getReleasedVersion() == null) { permissionInSolrOnly.add(docId); } } From 04580a3f4403d1a7dbf98e54cae411ff9ff5f9eb Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 12 Jun 2024 17:30:10 -0400 Subject: [PATCH 38/85] add dataset doc cleanup, fix looping error --- .../iq/dataverse/search/IndexServiceBean.java | 51 +++++++++++++------ 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 98bcdf385fc..01a6039b91b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -492,14 +492,12 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr boolean atLeastOnePublishedVersion = false; if (releasedVersion != null) { atLeastOnePublishedVersion = true; - } else { - atLeastOnePublishedVersion = false; } - List solrIdsOfFilesToDelete = null; + List solrIdsOfDocsToDelete = null; try { - solrIdsOfFilesToDelete = findFilesOfParentDataset(dataset.getId()); - logger.fine("Existing file docs: " + String.join(", ", solrIdsOfFilesToDelete)); + solrIdsOfDocsToDelete = findFilesOfParentDataset(dataset.getId()); + logger.fine("Existing file docs: " + String.join(", ", solrIdsOfDocsToDelete)); //We keep the latest version's docs unless it is deaccessioned and there is no published/released version //So skip the loop removing those docs from the delete list except in that case if ((!latestVersion.isDeaccessioned() || atLeastOnePublishedVersion)) { @@ -507,20 +505,36 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr String suffix = (new IndexableDataset(latestVersion)).getDatasetState().getSuffix(); for (FileMetadata fileMetadata : latestFileMetadatas) { String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId() + suffix; - solrIdsOfFilesToDelete.remove(solrIdOfPublishedFile); + solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); } } if (releasedVersion != null && !releasedVersion.equals(latestVersion)) { List releasedFileMetadatas = releasedVersion.getFileMetadatas(); for (FileMetadata fileMetadata : releasedFileMetadatas) { String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); - solrIdsOfFilesToDelete.remove(solrIdOfPublishedFile); + solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); } } + //Clear any unused dataset docs + if (!latestVersion.isDraft()) { + // The latest version is released, so should delete any draft docs for the + // dataset + solrIdsOfDocsToDelete.add(solrDocIdentifierDataset + dataset.getId() + draftSuffix); + } + if (!atLeastOnePublishedVersion) { + // There's no released version, so should delete any normal state docs for the + // dataset + solrIdsOfDocsToDelete.add(solrDocIdentifierDataset + dataset.getId()); + } + if (atLeastOnePublishedVersion || !latestVersion.isDeaccessioned()) { + // There's a released version or a draft, so should delete any deaccessioned + // state docs for the dataset + solrIdsOfDocsToDelete.add(solrDocIdentifierDataset + dataset.getId() + deaccessionedSuffix); + } } catch (SearchException | NullPointerException ex) { logger.fine("could not run search of files to delete: " + ex); } - logger.fine("File docs to delete: " + String.join(", ", solrIdsOfFilesToDelete)); + logger.fine("Solr docs to delete: " + String.join(", ", solrIdsOfDocsToDelete)); int numPublishedVersions = 0; List versions = dataset.getVersions(); //List solrIdsOfFilesToDelete = new ArrayList<>(); @@ -585,12 +599,17 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr } debug.append("numPublishedVersions: " + numPublishedVersions + "\n"); if (doNormalSolrDocCleanUp) { - if(!solrIdsOfFilesToDelete.isEmpty()) { - for(String file: solrIdsOfFilesToDelete) { + + if(!solrIdsOfDocsToDelete.isEmpty()) { + List solrIdsOfPermissionDocsToDelete = new ArrayList<>(); + for(String file: solrIdsOfDocsToDelete) { //Also remove associated permission docs - solrIdsOfFilesToDelete.add(file+"_permission"); + solrIdsOfPermissionDocsToDelete.add(file + discoverabilityPermissionSuffix); } - IndexResponse resultOfAttemptToPremptivelyDeletePublishedFiles = solrIndexService.deleteMultipleSolrIds(solrIdsOfFilesToDelete); + solrIdsOfDocsToDelete.addAll(solrIdsOfPermissionDocsToDelete); + logger.fine("Solr docs and perm docs to delete: " + String.join(", ", solrIdsOfDocsToDelete)); + + IndexResponse resultOfAttemptToPremptivelyDeletePublishedFiles = solrIndexService.deleteMultipleSolrIds(solrIdsOfDocsToDelete); debug.append("result of attempt to premptively deleted published files before reindexing: " + resultOfAttemptToPremptivelyDeletePublishedFiles + "\n"); } } @@ -2118,9 +2137,11 @@ public List findPermissionsInSolrOnly() throws SearchException { } else { if (!states.contains(VersionState.RELEASED)) { permissionInSolrOnly.add(docId); - } else if (!dataFileService.findMostRecentVersionFileIsIn(f).getDatasetVersion() - .equals(f.getOwner().getReleasedVersion())) { - permissionInSolrOnly.add(docId); + } else { + if(dataFileService.findFileMetadataByDatasetVersionIdAndDataFileId(f.getOwner().getReleasedVersion().getId(), f.getId()) == null) { + logger.info("Adding doc " + docId + " to list of permissions in Solr only"); + permissionInSolrOnly.add(docId); + } } } From c16fdd5396fab0f7890d221965a632889046467f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 12 Jun 2024 17:44:48 -0400 Subject: [PATCH 39/85] Fix to version check This is only used in determining the most recent version a dataset is in on the file page, e.g. for https://demo.dataverse.org/file.xhtml ?persistentId=doi:10.70122/FK2/FO0MPQ/KNG6PA&version=3.0 I confirmed that demo shows version 1 in this example whereas it should show version 2 (which this commit fixes). --- .../java/edu/harvard/iq/dataverse/DataFileServiceBean.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 7f38107af6b..21f925f8981 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -384,7 +384,8 @@ public FileMetadata findMostRecentVersionFileIsIn(DataFile file) { if (fileMetadatas == null || fileMetadatas.isEmpty()) { return null; } else { - return fileMetadatas.get(0); + // This assumes the order of filemetadatas is from first to most recent, which is true as of v6.3 + return fileMetadatas.get(fileMetadatas.size() - 1); } } From 7f56478d2f61161bb797cecdbc52669b67992cbe Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 13 Jun 2024 14:02:22 -0400 Subject: [PATCH 40/85] minor simplification --- .../java/edu/harvard/iq/dataverse/search/IndexServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 01a6039b91b..bd4aa27ba68 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -486,8 +486,8 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr StringBuilder debug = new StringBuilder(); debug.append("\ndebug:\n"); DatasetVersion latestVersion = dataset.getLatestVersion(); - String latestVersionStateString = latestVersion.getVersionState().name(); DatasetVersion.VersionState latestVersionState = latestVersion.getVersionState(); + String latestVersionStateString = latestVersionState.name(); DatasetVersion releasedVersion = dataset.getReleasedVersion(); boolean atLeastOnePublishedVersion = false; if (releasedVersion != null) { From 57b7ed92428bf24079f9fe969c4e0fedbb2cfa4e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 13 Jun 2024 14:40:04 -0400 Subject: [PATCH 41/85] cleanup --- .../iq/dataverse/search/IndexServiceBean.java | 260 ++++++------------ 1 file changed, 88 insertions(+), 172 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index bd4aa27ba68..2afb5d26082 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -13,6 +13,7 @@ import edu.harvard.iq.dataverse.datavariable.VariableMetadataUtil; import edu.harvard.iq.dataverse.datavariable.VariableServiceBean; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; +import edu.harvard.iq.dataverse.search.IndexableDataset.DatasetState; import edu.harvard.iq.dataverse.settings.FeatureFlags; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; @@ -476,12 +477,8 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr * @todo should we use solrDocIdentifierDataset or * IndexableObject.IndexableTypes.DATASET.getName() + "_" ? */ - // String solrIdPublished = solrDocIdentifierDataset + dataset.getId(); String solrIdPublished = determinePublishedDatasetSolrDocId(dataset); String solrIdDraftDataset = IndexableObject.IndexableTypes.DATASET.getName() + "_" + dataset.getId() + IndexableDataset.DatasetState.WORKING_COPY.getSuffix(); - // String solrIdDeaccessioned = IndexableObject.IndexableTypes.DATASET.getName() - // + "_" + dataset.getId() + - // IndexableDataset.DatasetState.DEACCESSIONED.getSuffix(); String solrIdDeaccessioned = determineDeaccessionedDatasetId(dataset); StringBuilder debug = new StringBuilder(); debug.append("\ndebug:\n"); @@ -494,112 +491,53 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr atLeastOnePublishedVersion = true; } List solrIdsOfDocsToDelete = null; - - try { - solrIdsOfDocsToDelete = findFilesOfParentDataset(dataset.getId()); - logger.fine("Existing file docs: " + String.join(", ", solrIdsOfDocsToDelete)); - //We keep the latest version's docs unless it is deaccessioned and there is no published/released version - //So skip the loop removing those docs from the delete list except in that case - if ((!latestVersion.isDeaccessioned() || atLeastOnePublishedVersion)) { - List latestFileMetadatas = latestVersion.getFileMetadatas(); - String suffix = (new IndexableDataset(latestVersion)).getDatasetState().getSuffix(); - for (FileMetadata fileMetadata : latestFileMetadatas) { - String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId() + suffix; - solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + if (logger.isLoggable(Level.FINE)) { + writeDebugInfo(debug, dataset); + } + if (doNormalSolrDocCleanUp) { + try { + solrIdsOfDocsToDelete = findFilesOfParentDataset(dataset.getId()); + logger.fine("Existing file docs: " + String.join(", ", solrIdsOfDocsToDelete)); + // We keep the latest version's docs unless it is deaccessioned and there is no + // published/released version + // So skip the loop removing those docs from the delete list except in that case + if ((!latestVersion.isDeaccessioned() || atLeastOnePublishedVersion)) { + List latestFileMetadatas = latestVersion.getFileMetadatas(); + String suffix = (new IndexableDataset(latestVersion)).getDatasetState().getSuffix(); + for (FileMetadata fileMetadata : latestFileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId() + + suffix; + solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + } } - } - if (releasedVersion != null && !releasedVersion.equals(latestVersion)) { - List releasedFileMetadatas = releasedVersion.getFileMetadatas(); - for (FileMetadata fileMetadata : releasedFileMetadatas) { - String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); - solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + if (releasedVersion != null && !releasedVersion.equals(latestVersion)) { + List releasedFileMetadatas = releasedVersion.getFileMetadatas(); + for (FileMetadata fileMetadata : releasedFileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); + solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + } } - } - //Clear any unused dataset docs - if (!latestVersion.isDraft()) { - // The latest version is released, so should delete any draft docs for the - // dataset - solrIdsOfDocsToDelete.add(solrDocIdentifierDataset + dataset.getId() + draftSuffix); - } - if (!atLeastOnePublishedVersion) { - // There's no released version, so should delete any normal state docs for the - // dataset - solrIdsOfDocsToDelete.add(solrDocIdentifierDataset + dataset.getId()); - } - if (atLeastOnePublishedVersion || !latestVersion.isDeaccessioned()) { - // There's a released version or a draft, so should delete any deaccessioned - // state docs for the dataset - solrIdsOfDocsToDelete.add(solrDocIdentifierDataset + dataset.getId() + deaccessionedSuffix); - } - } catch (SearchException | NullPointerException ex) { - logger.fine("could not run search of files to delete: " + ex); - } - logger.fine("Solr docs to delete: " + String.join(", ", solrIdsOfDocsToDelete)); - int numPublishedVersions = 0; - List versions = dataset.getVersions(); - //List solrIdsOfFilesToDelete = new ArrayList<>(); - if (logger.isLoggable(Level.FINE)) { - for (DatasetVersion datasetVersion : versions) { - Long versionDatabaseId = datasetVersion.getId(); - String versionTitle = datasetVersion.getTitle(); - String semanticVersion = datasetVersion.getSemanticVersion(); - DatasetVersion.VersionState versionState = datasetVersion.getVersionState(); - if (versionState.equals(DatasetVersion.VersionState.RELEASED)) { - numPublishedVersions += 1; + // Clear any unused dataset docs + if (!latestVersion.isDraft()) { + // The latest version is released, so should delete any draft docs for the + // dataset + solrIdsOfDocsToDelete.add(solrIdDraftDataset); } - debug.append("version found with database id " + versionDatabaseId + "\n"); - debug.append("- title: " + versionTitle + "\n"); - debug.append("- semanticVersion-VersionState: " + semanticVersion + "-" + versionState + "\n"); - List fileInfo = new ArrayList<>(); - List fileMetadatas = datasetVersion.getFileMetadatas(); - - for (FileMetadata fileMetadata : fileMetadatas) { - /** - * It sounds weird but the first thing we'll do is preemptively delete the Solr - * documents of all published files. Don't worry, published files will be - * re-indexed later along with the dataset. We do this so users can delete files - * from published versions of datasets and then re-publish a new version without - * fear that their old published files (now deleted from the latest published - * version) will be searchable. See also - * https://github.com/IQSS/dataverse/issues/762 - */ - fileInfo.add(fileMetadata.getDataFile().getId() + ":" + fileMetadata.getLabel()); + if (!atLeastOnePublishedVersion) { + // There's no released version, so should delete any normal state docs for the + // dataset + solrIdsOfDocsToDelete.add(solrIdPublished); } -// try { - /** - * Preemptively delete *all* Solr documents for files associated with the - * dataset based on a Solr query. - * - * We must query Solr for this information because the file has been deleted - * from the database ( perhaps when Solr was down, as reported in - * https://github.com/IQSS/dataverse/issues/2086 ) so the database doesn't even - * know about the file. It's an orphan. - * - * @todo This Solr query should make the iteration above based on the database - * unnecessary because it the Solr query should find all files for the - * dataset. We can probably remove the iteration above after an "index - * all" has been performed. Without an "index all" we won't be able to - * find files based on parentId because that field wasn't searchable in - * 4.0. - * - * @todo We should also delete the corresponding Solr "permission" documents for - * the files. - */ - // List allFilesForDataset = findFilesOfParentDataset(dataset.getId()); - // solrIdsOfFilesToDelete.addAll(allFilesForDataset); -// } catch (SearchException | NullPointerException ex) { -// logger.fine("could not run search of files to delete: " + ex); -// } - int numFiles = 0; - if (fileMetadatas != null) { - numFiles = fileMetadatas.size(); + if (atLeastOnePublishedVersion || !latestVersion.isDeaccessioned()) { + // There's a released version or a draft, so should delete any deaccessioned + // state docs for the dataset + solrIdsOfDocsToDelete.add(solrIdDeaccessioned); } - debug.append("- files: " + numFiles + " " + fileInfo.toString() + "\n"); + } catch (SearchException | NullPointerException ex) { + logger.fine("could not run search of files to delete: " + ex); } - } - debug.append("numPublishedVersions: " + numPublishedVersions + "\n"); - if (doNormalSolrDocCleanUp) { - + logger.fine("Solr docs to delete: " + String.join(", ", solrIdsOfDocsToDelete)); + if(!solrIdsOfDocsToDelete.isEmpty()) { List solrIdsOfPermissionDocsToDelete = new ArrayList<>(); for(String file: solrIdsOfDocsToDelete) { @@ -636,19 +574,7 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr .append(indexDraftResult).append("\n"); desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); - if (doNormalSolrDocCleanUp) { - String deleteDeaccessionedResult = removeDeaccessioned(dataset); - results.append("Draft exists, no need for deaccessioned version. Deletion attempted for ") - .append(solrIdDeaccessioned).append(" (and files). Result: ") - .append(deleteDeaccessionedResult).append("\n"); - } - desiredCards.put(DatasetVersion.VersionState.RELEASED, false); - if (doNormalSolrDocCleanUp) { - String deletePublishedResults = removePublished(dataset); - results.append("No published version. Attempting to delete traces of published version from index. Result: ") - .append(deletePublishedResults).append("\n"); - } /** * Desired state for existence of cards: {DRAFT=true, @@ -687,19 +613,7 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr results.append("No draft version. Attempting to index as deaccessioned. Result: ").append(indexDeaccessionedVersionResult).append("\n"); desiredCards.put(DatasetVersion.VersionState.RELEASED, false); - if (doNormalSolrDocCleanUp) { - String deletePublishedResults = removePublished(dataset); - results.append("No published version. Attempting to delete traces of published version from index. Result: ").append(deletePublishedResults).append("\n"); - } - desiredCards.put(DatasetVersion.VersionState.DRAFT, false); - if (doNormalSolrDocCleanUp) { - //List solrDocIdsForDraftFilesToDelete = findSolrDocIdsForDraftFilesToDelete(dataset); - String deleteDraftDatasetVersionResult = removeSolrDocFromIndex(solrIdDraftDataset); - //String deleteDraftFilesResults = deleteDraftFiles(solrDocIdsForDraftFilesToDelete); - //results.append("Attempting to delete traces of drafts. Result: ") - // .append(deleteDraftDatasetVersionResult).append(deleteDraftFilesResults).append("\n"); - } /** * Desired state for existence of cards: {DEACCESSIONED=true, @@ -741,20 +655,7 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr results.append("Attempted to index " + solrIdPublished).append(". Result: ").append(indexReleasedVersionResult).append("\n"); desiredCards.put(DatasetVersion.VersionState.DRAFT, false); - if (doNormalSolrDocCleanUp) { - //List solrDocIdsForDraftFilesToDelete = findSolrDocIdsForDraftFilesToDelete(dataset); - String deleteDraftDatasetVersionResult = removeSolrDocFromIndex(solrIdDraftDataset); - //String deleteDraftFilesResults = deleteDraftFiles(solrDocIdsForDraftFilesToDelete); - //results.append("The latest version is published. Attempting to delete drafts. Result: ") - // .append(deleteDraftDatasetVersionResult).append(deleteDraftFilesResults).append("\n"); - } - desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); - if (doNormalSolrDocCleanUp) { - String deleteDeaccessionedResult = removeDeaccessioned(dataset); - results.append("No need for deaccessioned version. Deletion attempted for ") - .append(solrIdDeaccessioned).append(". Result: ").append(deleteDeaccessionedResult); - } /** * Desired state for existence of cards: {RELEASED=true, @@ -801,11 +702,6 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr .append(solrIdDraftDataset).append(" (limited visibility). Result: ").append(indexDraftResult).append("\n"); desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); - if (doNormalSolrDocCleanUp) { - String deleteDeaccessionedResult = removeDeaccessioned(dataset); - results.append("No need for deaccessioned version. Deletion attempted for ") - .append(solrIdDeaccessioned).append(". Result: ").append(deleteDeaccessionedResult); - } /** * Desired state for existence of cards: {DRAFT=true, @@ -843,7 +739,45 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr } } -/* private String deleteDraftFiles(List solrDocIdsForDraftFilesToDelete) { + private void writeDebugInfo(StringBuilder debug, Dataset dataset) { + List versions = dataset.getVersions(); + int numPublishedVersions = 0; + for (DatasetVersion datasetVersion : versions) { + Long versionDatabaseId = datasetVersion.getId(); + String versionTitle = datasetVersion.getTitle(); + String semanticVersion = datasetVersion.getSemanticVersion(); + DatasetVersion.VersionState versionState = datasetVersion.getVersionState(); + if (versionState.equals(DatasetVersion.VersionState.RELEASED)) { + numPublishedVersions += 1; + } + debug.append("version found with database id " + versionDatabaseId + "\n"); + debug.append("- title: " + versionTitle + "\n"); + debug.append("- semanticVersion-VersionState: " + semanticVersion + "-" + versionState + "\n"); + List fileInfo = new ArrayList<>(); + List fileMetadatas = datasetVersion.getFileMetadatas(); + + for (FileMetadata fileMetadata : fileMetadatas) { + /** + * It sounds weird but the first thing we'll do is preemptively delete the Solr + * documents of all published files. Don't worry, published files will be + * re-indexed later along with the dataset. We do this so users can delete files + * from published versions of datasets and then re-publish a new version without + * fear that their old published files (now deleted from the latest published + * version) will be searchable. See also + * https://github.com/IQSS/dataverse/issues/762 + */ + fileInfo.add(fileMetadata.getDataFile().getId() + ":" + fileMetadata.getLabel()); + } + int numFiles = 0; + if (fileMetadatas != null) { + numFiles = fileMetadatas.size(); + } + debug.append("- files: " + numFiles + " " + fileInfo.toString() + "\n"); + } + debug.append("numPublishedVersions: " + numPublishedVersions + "\n"); + } + + /* private String deleteDraftFiles(List solrDocIdsForDraftFilesToDelete) { String deleteDraftFilesResults = ""; IndexResponse indexResponse = solrIndexService.deleteMultipleSolrIds(solrDocIdsForDraftFilesToDelete); deleteDraftFilesResults = indexResponse.toString(); @@ -925,15 +859,17 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set docIds = findSolrDocIdsForFilesToDelete(dataset, IndexableDataset.DatasetState.DEACCESSIONED); -// String deleteFilesResult = removeMultipleSolrDocs(docIds); -// result.append(deleteFilesResult); - return result.toString(); - } - - private String removePublished(Dataset dataset) { - StringBuilder result = new StringBuilder(); - String deletePublishedResult = removeSolrDocFromIndex(determinePublishedDatasetSolrDocId(dataset)); - result.append(deletePublishedResult); -// List docIds = findSolrDocIdsForFilesToDelete(dataset, IndexableDataset.DatasetState.PUBLISHED); -// String deleteFilesResult = removeMultipleSolrDocs(docIds); -// result.append(deleteFilesResult); - return result.toString(); - } - private Dataverse findRootDataverseCached() { if (true) { /** From a52a8384ffebb2244e687aef1cbe479aa06f5b76 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 13 Jun 2024 16:03:33 -0400 Subject: [PATCH 42/85] docs --- doc/release-notes/10579-avoid-solr-deletes.md | 9 +++++++++ doc/sphinx-guides/source/developers/performance.rst | 1 + doc/sphinx-guides/source/installation/config.rst | 3 +++ 3 files changed, 13 insertions(+) create mode 100644 doc/release-notes/10579-avoid-solr-deletes.md diff --git a/doc/release-notes/10579-avoid-solr-deletes.md b/doc/release-notes/10579-avoid-solr-deletes.md new file mode 100644 index 00000000000..1062a2fb78f --- /dev/null +++ b/doc/release-notes/10579-avoid-solr-deletes.md @@ -0,0 +1,9 @@ +A features flag called "reduce-solr-deletes" has been added to improve how datafiles are indexed. When the flag is enabled, +Dataverse wil avoid pre-emptively deleting existing solr documents for the files prior to sending updated information. This +should improve performance and will allow additional optimizations going forward. + +The /api/admin/index/status and /api/admin/index/clear-orphans calls +(see https://guides.dataverse.org/en/latest/admin/solr-search-index.html#index-and-database-consistency) +will now find and remove (respectively) additional permissions related solr documents that were not being detected before. +Reducing the overall number of documents will improve solr performance and large sites may wish to periodically call the +clear-orphans API. \ No newline at end of file diff --git a/doc/sphinx-guides/source/developers/performance.rst b/doc/sphinx-guides/source/developers/performance.rst index 562fa330d75..0044899a581 100644 --- a/doc/sphinx-guides/source/developers/performance.rst +++ b/doc/sphinx-guides/source/developers/performance.rst @@ -121,6 +121,7 @@ While in the past Solr performance hasn't been much of a concern, in recent year We are tracking performance problems in `#10469 `_. In a meeting with a Solr expert on 2024-05-10 we were advised to avoid joins as much as possible. (It was acknowledged that many Solr users make use of joins because they have to, like we do, to keep some documents private.) Toward that end we have added two feature flags called ``avoid-expensive-solr-join`` and ``add-publicobject-solr-field`` as explained under :ref:`feature-flags`. It was confirmed experimentally that performing the join on all the public objects (published collections, datasets and files), i.e., the bulk of the content in the search index, was indeed very expensive, especially on a large instance the size of the IQSS prod. archive, especially under indexing load. We confirmed that it was in fact unnecessary and were able to replace it with a boolean field directly in the indexed documents, which is achieved by the two feature flags above. However, as of writing this, this mechanism should still be considered experimental. +Another flag, ``reduce-solr-deletes``, avoids deleting solr documents for files in a dataset prior to sending updates. This is expected to improve indexing performance to some extent and is a step towards avoiding unnecessary updates (i.e. when a doc would not change). Datasets with Large Numbers of Files or Versions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 8fb9460892b..f6e33a2678d 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -3274,6 +3274,9 @@ please find all known feature flags below. Any of these flags can be activated u * - add-publicobject-solr-field - Adds an extra boolean field `PublicObject_b:true` for public content (published Collections, Datasets and Files). Once reindexed with these fields, we can rely on it to remove a very expensive Solr join on all such documents in Solr queries, significantly improving overall performance (by enabling the feature flag above, `avoid-expensive-solr-join`). These two flags are separate so that an instance can reindex their holdings before enabling the optimization in searches, thus avoiding having their public objects temporarily disappear from search results while the reindexing is in progress. - ``Off`` + * - reduce-solr-deletes + - Avoids deleting and recreating solr documents for dataset files when reindexing. + - ``Off`` **Note:** Feature flags can be set via any `supported MicroProfile Config API source`_, e.g. the environment variable ``DATAVERSE_FEATURE_XXX`` (e.g. ``DATAVERSE_FEATURE_API_SESSION_AUTH=1``). These environment variables can be set in your shell before starting Payara. If you are using :doc:`Docker for development `, you can set them in the `docker compose `_ file. From 1150ff4fa718eb3665eeddad420b33589d9c574d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 13 Jun 2024 16:03:45 -0400 Subject: [PATCH 43/85] feature flag --- .../iq/dataverse/search/IndexServiceBean.java | 269 +++++++++++++----- .../iq/dataverse/settings/FeatureFlags.java | 13 + 2 files changed, 217 insertions(+), 65 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 2afb5d26082..1d1098a33d3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -482,6 +482,78 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr String solrIdDeaccessioned = determineDeaccessionedDatasetId(dataset); StringBuilder debug = new StringBuilder(); debug.append("\ndebug:\n"); + boolean reduceSolrDeletes = FeatureFlags.REDUCE_SOLR_DELETES.enabled(); + if (!reduceSolrDeletes) { + int numPublishedVersions = 0; + List versions = dataset.getVersions(); + List solrIdsOfFilesToDelete = new ArrayList<>(); + for (DatasetVersion datasetVersion : versions) { + Long versionDatabaseId = datasetVersion.getId(); + String versionTitle = datasetVersion.getTitle(); + String semanticVersion = datasetVersion.getSemanticVersion(); + DatasetVersion.VersionState versionState = datasetVersion.getVersionState(); + if (versionState.equals(DatasetVersion.VersionState.RELEASED)) { + numPublishedVersions += 1; + } + debug.append("version found with database id " + versionDatabaseId + "\n"); + debug.append("- title: " + versionTitle + "\n"); + debug.append("- semanticVersion-VersionState: " + semanticVersion + "-" + versionState + "\n"); + List fileMetadatas = datasetVersion.getFileMetadatas(); + List fileInfo = new ArrayList<>(); + for (FileMetadata fileMetadata : fileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); + /** + * It sounds weird but the first thing we'll do is preemptively + * delete the Solr documents of all published files. Don't + * worry, published files will be re-indexed later along with + * the dataset. We do this so users can delete files from + * published versions of datasets and then re-publish a new + * version without fear that their old published files (now + * deleted from the latest published version) will be + * searchable. See also + * https://github.com/IQSS/dataverse/issues/762 + */ + solrIdsOfFilesToDelete.add(solrIdOfPublishedFile); + fileInfo.add(fileMetadata.getDataFile().getId() + ":" + fileMetadata.getLabel()); + } + try { + /** + * Preemptively delete *all* Solr documents for files associated + * with the dataset based on a Solr query. + * + * We must query Solr for this information because the file has + * been deleted from the database ( perhaps when Solr was down, + * as reported in https://github.com/IQSS/dataverse/issues/2086 + * ) so the database doesn't even know about the file. It's an + * orphan. + * + * @todo This Solr query should make the iteration above based + * on the database unnecessary because it the Solr query should + * find all files for the dataset. We can probably remove the + * iteration above after an "index all" has been performed. + * Without an "index all" we won't be able to find files based + * on parentId because that field wasn't searchable in 4.0. + * + * @todo We should also delete the corresponding Solr + * "permission" documents for the files. + */ + List allFilesForDataset = findFilesOfParentDataset(dataset.getId()); + solrIdsOfFilesToDelete.addAll(allFilesForDataset); + } catch (SearchException | NullPointerException ex) { + logger.fine("could not run search of files to delete: " + ex); + } + int numFiles = 0; + if (fileMetadatas != null) { + numFiles = fileMetadatas.size(); + } + debug.append("- files: " + numFiles + " " + fileInfo.toString() + "\n"); + } + debug.append("numPublishedVersions: " + numPublishedVersions + "\n"); + if (doNormalSolrDocCleanUp) { + IndexResponse resultOfAttemptToPremptivelyDeletePublishedFiles = solrIndexService.deleteMultipleSolrIds(solrIdsOfFilesToDelete); + debug.append("result of attempt to premptively deleted published files before reindexing: " + resultOfAttemptToPremptivelyDeletePublishedFiles + "\n"); + } + } DatasetVersion latestVersion = dataset.getLatestVersion(); DatasetVersion.VersionState latestVersionState = latestVersion.getVersionState(); String latestVersionStateString = latestVersionState.name(); @@ -490,65 +562,69 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr if (releasedVersion != null) { atLeastOnePublishedVersion = true; } - List solrIdsOfDocsToDelete = null; - if (logger.isLoggable(Level.FINE)) { - writeDebugInfo(debug, dataset); - } - if (doNormalSolrDocCleanUp) { - try { - solrIdsOfDocsToDelete = findFilesOfParentDataset(dataset.getId()); - logger.fine("Existing file docs: " + String.join(", ", solrIdsOfDocsToDelete)); - // We keep the latest version's docs unless it is deaccessioned and there is no - // published/released version - // So skip the loop removing those docs from the delete list except in that case - if ((!latestVersion.isDeaccessioned() || atLeastOnePublishedVersion)) { - List latestFileMetadatas = latestVersion.getFileMetadatas(); - String suffix = (new IndexableDataset(latestVersion)).getDatasetState().getSuffix(); - for (FileMetadata fileMetadata : latestFileMetadatas) { - String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId() - + suffix; - solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + if (reduceSolrDeletes) { + List solrIdsOfDocsToDelete = null; + if (logger.isLoggable(Level.FINE)) { + writeDebugInfo(debug, dataset); + } + if (doNormalSolrDocCleanUp) { + try { + solrIdsOfDocsToDelete = findFilesOfParentDataset(dataset.getId()); + logger.fine("Existing file docs: " + String.join(", ", solrIdsOfDocsToDelete)); + // We keep the latest version's docs unless it is deaccessioned and there is no + // published/released version + // So skip the loop removing those docs from the delete list except in that case + if ((!latestVersion.isDeaccessioned() || atLeastOnePublishedVersion)) { + List latestFileMetadatas = latestVersion.getFileMetadatas(); + String suffix = (new IndexableDataset(latestVersion)).getDatasetState().getSuffix(); + for (FileMetadata fileMetadata : latestFileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId() + + suffix; + solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + } } - } - if (releasedVersion != null && !releasedVersion.equals(latestVersion)) { - List releasedFileMetadatas = releasedVersion.getFileMetadatas(); - for (FileMetadata fileMetadata : releasedFileMetadatas) { - String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); - solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + if (releasedVersion != null && !releasedVersion.equals(latestVersion)) { + List releasedFileMetadatas = releasedVersion.getFileMetadatas(); + for (FileMetadata fileMetadata : releasedFileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); + solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + } } + // Clear any unused dataset docs + if (!latestVersion.isDraft()) { + // The latest version is released, so should delete any draft docs for the + // dataset + solrIdsOfDocsToDelete.add(solrIdDraftDataset); + } + if (!atLeastOnePublishedVersion) { + // There's no released version, so should delete any normal state docs for the + // dataset + solrIdsOfDocsToDelete.add(solrIdPublished); + } + if (atLeastOnePublishedVersion || !latestVersion.isDeaccessioned()) { + // There's a released version or a draft, so should delete any deaccessioned + // state docs for the dataset + solrIdsOfDocsToDelete.add(solrIdDeaccessioned); + } + } catch (SearchException | NullPointerException ex) { + logger.fine("could not run search of files to delete: " + ex); } - // Clear any unused dataset docs - if (!latestVersion.isDraft()) { - // The latest version is released, so should delete any draft docs for the - // dataset - solrIdsOfDocsToDelete.add(solrIdDraftDataset); - } - if (!atLeastOnePublishedVersion) { - // There's no released version, so should delete any normal state docs for the - // dataset - solrIdsOfDocsToDelete.add(solrIdPublished); - } - if (atLeastOnePublishedVersion || !latestVersion.isDeaccessioned()) { - // There's a released version or a draft, so should delete any deaccessioned - // state docs for the dataset - solrIdsOfDocsToDelete.add(solrIdDeaccessioned); - } - } catch (SearchException | NullPointerException ex) { - logger.fine("could not run search of files to delete: " + ex); - } - logger.fine("Solr docs to delete: " + String.join(", ", solrIdsOfDocsToDelete)); + logger.fine("Solr docs to delete: " + String.join(", ", solrIdsOfDocsToDelete)); - if(!solrIdsOfDocsToDelete.isEmpty()) { - List solrIdsOfPermissionDocsToDelete = new ArrayList<>(); - for(String file: solrIdsOfDocsToDelete) { - //Also remove associated permission docs - solrIdsOfPermissionDocsToDelete.add(file + discoverabilityPermissionSuffix); + if (!solrIdsOfDocsToDelete.isEmpty()) { + List solrIdsOfPermissionDocsToDelete = new ArrayList<>(); + for (String file : solrIdsOfDocsToDelete) { + // Also remove associated permission docs + solrIdsOfPermissionDocsToDelete.add(file + discoverabilityPermissionSuffix); + } + solrIdsOfDocsToDelete.addAll(solrIdsOfPermissionDocsToDelete); + logger.fine("Solr docs and perm docs to delete: " + String.join(", ", solrIdsOfDocsToDelete)); + + IndexResponse resultOfAttemptToPremptivelyDeletePublishedFiles = solrIndexService + .deleteMultipleSolrIds(solrIdsOfDocsToDelete); + debug.append("result of attempt to premptively deleted published files before reindexing: " + + resultOfAttemptToPremptivelyDeletePublishedFiles + "\n"); } - solrIdsOfDocsToDelete.addAll(solrIdsOfPermissionDocsToDelete); - logger.fine("Solr docs and perm docs to delete: " + String.join(", ", solrIdsOfDocsToDelete)); - - IndexResponse resultOfAttemptToPremptivelyDeletePublishedFiles = solrIndexService.deleteMultipleSolrIds(solrIdsOfDocsToDelete); - debug.append("result of attempt to premptively deleted published files before reindexing: " + resultOfAttemptToPremptivelyDeletePublishedFiles + "\n"); } } @@ -574,7 +650,19 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr .append(indexDraftResult).append("\n"); desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); + if (!reduceSolrDeletes && doNormalSolrDocCleanUp) { + String deleteDeaccessionedResult = removeDeaccessioned(dataset); + results.append("Draft exists, no need for deaccessioned version. Deletion attempted for ") + .append(solrIdDeaccessioned).append(" (and files). Result: ") + .append(deleteDeaccessionedResult).append("\n"); + } + desiredCards.put(DatasetVersion.VersionState.RELEASED, false); + if (!reduceSolrDeletes && doNormalSolrDocCleanUp) { + String deletePublishedResults = removePublished(dataset); + results.append("No published version. Attempting to delete traces of published version from index. Result: ") + .append(deletePublishedResults).append("\n"); + } /** * Desired state for existence of cards: {DRAFT=true, @@ -613,7 +701,19 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr results.append("No draft version. Attempting to index as deaccessioned. Result: ").append(indexDeaccessionedVersionResult).append("\n"); desiredCards.put(DatasetVersion.VersionState.RELEASED, false); + if (!reduceSolrDeletes && doNormalSolrDocCleanUp) { + String deletePublishedResults = removePublished(dataset); + results.append("No published version. Attempting to delete traces of published version from index. Result: ").append(deletePublishedResults).append("\n"); + } + desiredCards.put(DatasetVersion.VersionState.DRAFT, false); + if (!reduceSolrDeletes && doNormalSolrDocCleanUp) { + List solrDocIdsForDraftFilesToDelete = findSolrDocIdsForDraftFilesToDelete(dataset); + String deleteDraftDatasetVersionResult = removeSolrDocFromIndex(solrIdDraftDataset); + String deleteDraftFilesResults = deleteDraftFiles(solrDocIdsForDraftFilesToDelete); + results.append("Attempting to delete traces of drafts. Result: ") + .append(deleteDraftDatasetVersionResult).append(deleteDraftFilesResults).append("\n"); + } /** * Desired state for existence of cards: {DEACCESSIONED=true, @@ -655,7 +755,20 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr results.append("Attempted to index " + solrIdPublished).append(". Result: ").append(indexReleasedVersionResult).append("\n"); desiredCards.put(DatasetVersion.VersionState.DRAFT, false); + if (!reduceSolrDeletes && doNormalSolrDocCleanUp) { + List solrDocIdsForDraftFilesToDelete = findSolrDocIdsForDraftFilesToDelete(dataset); + String deleteDraftDatasetVersionResult = removeSolrDocFromIndex(solrIdDraftDataset); + String deleteDraftFilesResults = deleteDraftFiles(solrDocIdsForDraftFilesToDelete); + results.append("The latest version is published. Attempting to delete drafts. Result: ") + .append(deleteDraftDatasetVersionResult).append(deleteDraftFilesResults).append("\n"); + } + desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); + if (!reduceSolrDeletes && doNormalSolrDocCleanUp) { + String deleteDeaccessionedResult = removeDeaccessioned(dataset); + results.append("No need for deaccessioned version. Deletion attempted for ") + .append(solrIdDeaccessioned).append(". Result: ").append(deleteDeaccessionedResult); + } /** * Desired state for existence of cards: {RELEASED=true, @@ -702,6 +815,11 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr .append(solrIdDraftDataset).append(" (limited visibility). Result: ").append(indexDraftResult).append("\n"); desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); + if (!reduceSolrDeletes && doNormalSolrDocCleanUp) { + String deleteDeaccessionedResult = removeDeaccessioned(dataset); + results.append("No need for deaccessioned version. Deletion attempted for ") + .append(solrIdDeaccessioned).append(". Result: ").append(deleteDeaccessionedResult); + } /** * Desired state for existence of cards: {DRAFT=true, @@ -777,13 +895,6 @@ private void writeDebugInfo(StringBuilder debug, Dataset dataset) { debug.append("numPublishedVersions: " + numPublishedVersions + "\n"); } - /* private String deleteDraftFiles(List solrDocIdsForDraftFilesToDelete) { - String deleteDraftFilesResults = ""; - IndexResponse indexResponse = solrIndexService.deleteMultipleSolrIds(solrDocIdsForDraftFilesToDelete); - deleteDraftFilesResults = indexResponse.toString(); - return deleteDraftFilesResults; - } -*/ private IndexResponse indexDatasetPermissions(Dataset dataset) { boolean disabledForDebugging = false; if (disabledForDebugging) { @@ -866,10 +977,8 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set docIds = findSolrDocIdsForFilesToDelete(dataset, IndexableDataset.DatasetState.DEACCESSIONED); + String deleteFilesResult = removeMultipleSolrDocs(docIds); + result.append(deleteFilesResult); + return result.toString(); + } + + //Only used when FeatureFlags.REDUCE_SOLR_DELETES is disabled + private String removePublished(Dataset dataset) { + StringBuilder result = new StringBuilder(); + String deletePublishedResult = removeSolrDocFromIndex(determinePublishedDatasetSolrDocId(dataset)); + result.append(deletePublishedResult); + List docIds = findSolrDocIdsForFilesToDelete(dataset, IndexableDataset.DatasetState.PUBLISHED); + String deleteFilesResult = removeMultipleSolrDocs(docIds); + result.append(deleteFilesResult); + return result.toString(); + } + + // Only used when FeatureFlags.REDUCE_SOLR_DELETES is disabled + private String deleteDraftFiles(List solrDocIdsForDraftFilesToDelete) { + String deleteDraftFilesResults = ""; + IndexResponse indexResponse = solrIndexService.deleteMultipleSolrIds(solrDocIdsForDraftFilesToDelete); + deleteDraftFilesResults = indexResponse.toString(); + return deleteDraftFilesResults; + } + private Dataverse findRootDataverseCached() { if (true) { /** diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java index 14a7ab86f22..d523bf92e63 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java @@ -58,6 +58,19 @@ public enum FeatureFlags { * @since Dataverse 6.3 */ ADD_PUBLICOBJECT_SOLR_FIELD("add-publicobject-solr-field"), + /** + * Dataverse normally deletes all solr documents related to a dataset's files + * when the dataset is reindexed. With this flag enabled, additional logic is + * added to the reindex process to delete only the solr documents that are no + * longer needed. (Required docs will be updated rather than deleted and + * replaced.) Enabling this feature flag should make the reindex process + * faster without impacting the search results. + * + * @apiNote Raise flag by setting + * "dataverse.feature.reduce-solr-deletes" + * @since Dataverse 6.3 + */ + REDUCE_SOLR_DELETES("reduce-solr-deletes"), ; final String flag; From 058c28b21e500455d4dd51c13594b1073e316274 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 13 Jun 2024 16:06:43 -0400 Subject: [PATCH 44/85] info -> fine --- .../edu/harvard/iq/dataverse/search/IndexServiceBean.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 1d1098a33d3..567ef1ecbd8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -2180,7 +2180,7 @@ public List findPermissionsInSolrOnly() throws SearchException { } else if (obj instanceof DataFile f) { List states = dataFileService.findVersionStates(f.getId()); Set strings = states.stream().map(VersionState::toString).collect(Collectors.toSet()); - logger.info("for " + docId + " states: " + String.join(", ", strings)); + logger.fine("States for " + docId + ": " + String.join(", ", strings)); if (docId.endsWith("draft_permission")) { if (!states.contains(VersionState.DRAFT)) { permissionInSolrOnly.add(docId); @@ -2194,7 +2194,7 @@ public List findPermissionsInSolrOnly() throws SearchException { permissionInSolrOnly.add(docId); } else { if(dataFileService.findFileMetadataByDatasetVersionIdAndDataFileId(f.getOwner().getReleasedVersion().getId(), f.getId()) == null) { - logger.info("Adding doc " + docId + " to list of permissions in Solr only"); + logger.fine("Adding doc " + docId + " to list of permissions in Solr only"); permissionInSolrOnly.add(docId); } } From 1b0d3a1cc20438c3d3236b9cf190cedc5903a82b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 13 Jun 2024 17:27:03 -0400 Subject: [PATCH 45/85] note eliminating more orphan perm docs --- doc/sphinx-guides/source/developers/performance.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/performance.rst b/doc/sphinx-guides/source/developers/performance.rst index 0044899a581..6c864bec257 100644 --- a/doc/sphinx-guides/source/developers/performance.rst +++ b/doc/sphinx-guides/source/developers/performance.rst @@ -121,7 +121,7 @@ While in the past Solr performance hasn't been much of a concern, in recent year We are tracking performance problems in `#10469 `_. In a meeting with a Solr expert on 2024-05-10 we were advised to avoid joins as much as possible. (It was acknowledged that many Solr users make use of joins because they have to, like we do, to keep some documents private.) Toward that end we have added two feature flags called ``avoid-expensive-solr-join`` and ``add-publicobject-solr-field`` as explained under :ref:`feature-flags`. It was confirmed experimentally that performing the join on all the public objects (published collections, datasets and files), i.e., the bulk of the content in the search index, was indeed very expensive, especially on a large instance the size of the IQSS prod. archive, especially under indexing load. We confirmed that it was in fact unnecessary and were able to replace it with a boolean field directly in the indexed documents, which is achieved by the two feature flags above. However, as of writing this, this mechanism should still be considered experimental. -Another flag, ``reduce-solr-deletes``, avoids deleting solr documents for files in a dataset prior to sending updates. This is expected to improve indexing performance to some extent and is a step towards avoiding unnecessary updates (i.e. when a doc would not change). +Another flag, ``reduce-solr-deletes``, avoids deleting solr documents for files in a dataset prior to sending updates. It also eliminates several causes of orphan permission documents. This is expected to improve indexing performance to some extent and is a step towards avoiding unnecessary updates (i.e. when a doc would not change). Datasets with Large Numbers of Files or Versions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 00020e2e14be599d49bbd7400d37454cf91717b7 Mon Sep 17 00:00:00 2001 From: Steven Ferey Date: Fri, 14 Jun 2024 17:39:06 +0200 Subject: [PATCH 46/85] 10288 Add keywordTermURI metadata in keyword block (#10371) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * New keywordTermURI Metadata in keyword Metadata Block * update of the keywordVocabularyURI metadata to make it consistent with its name Controlled Vocabulary URL * fix description and watermark properties * 10288 adding documentation * 10288 - adaptation of the SolR schema and dataset exports * 10288 - Adjustment of typo and sql * Adaptations for Dataverse 6.2 * 10288 - rollback keywordVocabularyURL to keywordVocabularyURI * 10288 - removing obsolete SQL script * 10288 - Label modification to follow Dataverse recommendations * 10288 - Added valueURI attribute for OpenAire export * Fix NoResultException on DatasetServiceBean.findDeep (.getSingleResult():L137) --------- Co-authored-by: Ludovic DANIEL Co-authored-by: Jérôme ROUCOU --- conf/solr/9.3.0/schema.xml | 2 + ...-add-term_uri-metadata-in-keyword-block.md | 53 ++++++++ ...dataset-create-new-all-default-fields.json | 12 ++ scripts/api/data/metadatablocks/citation.tsv | 113 +++++++++--------- .../iq/dataverse/DatasetFieldConstant.java | 5 +- .../harvard/iq/dataverse/DatasetKeyword.java | 68 ----------- .../export/openaire/OpenAireExportUtil.java | 22 ++-- .../java/propertyFiles/citation.properties | 3 + .../export/OpenAireExportUtilTest.java | 2 + .../export/SchemaDotOrgExporterTest.java | 1 + .../dataverse/export/dataset-all-defaults.txt | 12 ++ .../dataset-create-new-all-ddi-fields.json | 12 ++ .../dataverse/export/ddi/dataset-finch1.json | 12 ++ src/test/resources/json/dataset-finch2.json | 12 ++ .../json/dataset-long-description.json | 12 ++ 15 files changed, 208 insertions(+), 133 deletions(-) create mode 100644 doc/release-notes/10288-add-term_uri-metadata-in-keyword-block.md delete mode 100644 src/main/java/edu/harvard/iq/dataverse/DatasetKeyword.java diff --git a/conf/solr/9.3.0/schema.xml b/conf/solr/9.3.0/schema.xml index 521e7a7db72..5dde750573d 100644 --- a/conf/solr/9.3.0/schema.xml +++ b/conf/solr/9.3.0/schema.xml @@ -326,6 +326,7 @@ + @@ -565,6 +566,7 @@ + diff --git a/doc/release-notes/10288-add-term_uri-metadata-in-keyword-block.md b/doc/release-notes/10288-add-term_uri-metadata-in-keyword-block.md new file mode 100644 index 00000000000..eb3a79dbf25 --- /dev/null +++ b/doc/release-notes/10288-add-term_uri-metadata-in-keyword-block.md @@ -0,0 +1,53 @@ +### New keywordTermURI Metadata in keyword Metadata Block + +Adding a new metadata `keywordTermURI` to the `keyword` metadata block to facilitate the integration of controlled vocabulary services, in particular by adding the possibility of saving the "term" and its associated URI. For more information, see #10288 and PR #10371. + +## Upgrade Instructions + +1\. Update the Citation metadata block + +- `wget https://github.com/IQSS/dataverse/releases/download/v6.3/citation.tsv` +- `curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @citation.tsv -H "Content-type: text/tab-separated-values"` + +2\. Update your Solr `schema.xml` to include the new field. + + For details, please see https://guides.dataverse.org/en/latest/admin/metadatacustomization.html#updating-the-solr-schema + + +3\. Reindex Solr. + + Once the schema.xml is updated, Solr must be restarted and a reindex initiated. + For details, see https://guides.dataverse.org/en/latest/admin/solr-search-index.html but here is the reindex command: + + `curl http://localhost:8080/api/admin/index` + + +4\. Run ReExportAll to update dataset metadata exports. Follow the instructions in the [Metadata Export of Admin Guide](https://guides.dataverse.org/en/latest/admin/metadataexport.html#batch-exports-through-the-api). + + +## Notes for Dataverse Installation Administrators + +### Data migration to the new `keywordTermURI` field + +You can migrate your `keywordValue` data containing URIs to the new `keywordTermURI` field. +In case of data migration, view the affected data with the following database query: + +``` +SELECT value FROM datasetfieldvalue dfv +INNER JOIN datasetfield df ON df.id = dfv.datasetfield_id +WHERE df.datasetfieldtype_id = (SELECT id FROM datasetfieldtype WHERE name = 'keywordValue') +AND value ILIKE 'http%'; +``` + +If you wish to migrate your data, a database update is then necessary: + +``` +UPDATE datasetfield df +SET datasetfieldtype_id = (SELECT id FROM datasetfieldtype WHERE name = 'keywordTermURI') +FROM datasetfieldvalue dfv +WHERE dfv.datasetfield_id = df.id +AND df.datasetfieldtype_id = (SELECT id FROM datasetfieldtype WHERE name = 'keywordValue') +AND dfv.value ILIKE 'http%'; +``` + +A ['Reindex in Place'](https://guides.dataverse.org/en/latest/admin/solr-search-index.html#reindex-in-place) will be required and ReExportAll will need to be run to update the metadata exports of the dataset. Follow the directions in the [Admin Guide](http://guides.dataverse.org/en/latest/admin/metadataexport.html#batch-exports-through-the-api). \ No newline at end of file diff --git a/scripts/api/data/dataset-create-new-all-default-fields.json b/scripts/api/data/dataset-create-new-all-default-fields.json index 3bcf134bc76..cc856c6372f 100644 --- a/scripts/api/data/dataset-create-new-all-default-fields.json +++ b/scripts/api/data/dataset-create-new-all-default-fields.json @@ -231,6 +231,12 @@ "typeClass": "primitive", "value": "KeywordTerm1" }, + "keywordTermURI": { + "typeName": "keywordTermURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://keywordTermURI1.org" + }, "keywordVocabulary": { "typeName": "keywordVocabulary", "multiple": false, @@ -251,6 +257,12 @@ "typeClass": "primitive", "value": "KeywordTerm2" }, + "keywordTermURI": { + "typeName": "keywordTermURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://keywordTermURI2.org" + }, "keywordVocabulary": { "typeName": "keywordVocabulary", "multiple": false, diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index 82da5a12eaf..18354f2b1f7 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -23,62 +23,63 @@ subject Subject The area of study relevant to the Dataset text 19 TRUE TRUE TRUE TRUE TRUE TRUE citation http://purl.org/dc/terms/subject keyword Keyword A key term that describes an important aspect of the Dataset and information about any controlled vocabulary used none 20 FALSE FALSE TRUE FALSE TRUE FALSE citation keywordValue Term A key term that describes important aspects of the Dataset text 21 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE keyword citation - keywordVocabulary Controlled Vocabulary Name The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) text 22 (#VALUE) FALSE FALSE FALSE FALSE TRUE FALSE keyword citation - keywordVocabularyURI Controlled Vocabulary URL The URL where one can access information about the term's controlled vocabulary https:// url 23 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE keyword citation - topicClassification Topic Classification Indicates a broad, important topic or subject that the Dataset covers and information about any controlled vocabulary used none 24 FALSE FALSE TRUE FALSE FALSE FALSE citation - topicClassValue Term A topic or subject term text 25 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE topicClassification citation - topicClassVocab Controlled Vocabulary Name The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) text 26 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation - topicClassVocabURI Controlled Vocabulary URL The URL where one can access information about the term's controlled vocabulary https:// url 27 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation - publication Related Publication The article or report that uses the data in the Dataset. The full list of related publications will be displayed on the metadata tab none 28 FALSE FALSE TRUE FALSE TRUE FALSE citation http://purl.org/dc/terms/isReferencedBy - publicationCitation Citation The full bibliographic citation for the related publication textbox 29 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/dc/terms/bibliographicCitation - publicationIDType Identifier Type The type of identifier that uniquely identifies a related publication text 30 #VALUE: TRUE TRUE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifierScheme - publicationIDNumber Identifier The identifier for a related publication text 31 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifier - publicationURL URL The URL form of the identifier entered in the Identifier field, e.g. the DOI URL if a DOI was entered in the Identifier field. Used to display what was entered in the ID Type and ID Number fields as a link. If what was entered in the Identifier field has no URL form, the URL of the publication webpage is used, e.g. a journal article webpage https:// url 32 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE publication citation https://schema.org/distribution - notesText Notes Additional information about the Dataset textbox 33 FALSE FALSE FALSE FALSE TRUE FALSE citation - language Language A language that the Dataset's files is written in text 34 TRUE TRUE TRUE TRUE FALSE FALSE citation http://purl.org/dc/terms/language - producer Producer The entity, such a person or organization, managing the finances or other administrative processes involved in the creation of the Dataset none 35 FALSE FALSE TRUE FALSE FALSE FALSE citation - producerName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 36 #VALUE TRUE FALSE FALSE TRUE FALSE TRUE producer citation - producerAffiliation Affiliation The name of the entity affiliated with the producer, e.g. an organization's name Organization XYZ text 37 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerAbbreviation Abbreviated Name The producer's abbreviated name (e.g. IQSS, ICPSR) text 38 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerURL URL The URL of the producer's website https:// url 39 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerLogoURL Logo URL The URL of the producer's logo https:// url 40
FALSE FALSE FALSE FALSE FALSE FALSE producer citation - productionDate Production Date The date when the data were produced (not distributed, published, or archived) YYYY-MM-DD date 41 TRUE FALSE FALSE TRUE FALSE FALSE citation - productionPlace Production Location The location where the data and any related materials were produced or collected text 42 TRUE FALSE TRUE TRUE FALSE FALSE citation - contributor Contributor The entity, such as a person or organization, responsible for collecting, managing, or otherwise contributing to the development of the Dataset none 43 : FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/contributor - contributorType Type Indicates the type of contribution made to the dataset text 44 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE contributor citation - contributorName Name The name of the contributor, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 45 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE contributor citation - grantNumber Funding Information Information about the Dataset's financial support none 46 : FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/sponsor - grantNumberAgency Agency The agency that provided financial support for the Dataset Organization XYZ text 47 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation - grantNumberValue Identifier The grant identifier or contract identifier of the agency that provided financial support for the Dataset text 48 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation - distributor Distributor The entity, such as a person or organization, designated to generate copies of the Dataset, including any editions or revisions none 49 FALSE FALSE TRUE FALSE FALSE FALSE citation - distributorName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 50 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE distributor citation - distributorAffiliation Affiliation The name of the entity affiliated with the distributor, e.g. an organization's name Organization XYZ text 51 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorAbbreviation Abbreviated Name The distributor's abbreviated name (e.g. IQSS, ICPSR) text 52 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorURL URL The URL of the distributor's webpage https:// url 53 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorLogoURL Logo URL The URL of the distributor's logo image, used to show the image on the Dataset's page https:// url 54
FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributionDate Distribution Date The date when the Dataset was made available for distribution/presentation YYYY-MM-DD date 55 TRUE FALSE FALSE TRUE FALSE FALSE citation - depositor Depositor The entity, such as a person or organization, that deposited the Dataset in the repository 1) FamilyName, GivenName or 2) Organization text 56 FALSE FALSE FALSE FALSE FALSE FALSE citation - dateOfDeposit Deposit Date The date when the Dataset was deposited into the repository YYYY-MM-DD date 57 FALSE FALSE FALSE TRUE FALSE FALSE citation http://purl.org/dc/terms/dateSubmitted - timePeriodCovered Time Period The time period that the data refer to. Also known as span. This is the time period covered by the data, not the dates of coding, collecting data, or making documents machine-readable none 58 ; FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/temporalCoverage - timePeriodCoveredStart Start Date The start date of the time period that the data refer to YYYY-MM-DD date 59 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation - timePeriodCoveredEnd End Date The end date of the time period that the data refer to YYYY-MM-DD date 60 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation - dateOfCollection Date of Collection The dates when the data were collected or generated none 61 ; FALSE FALSE TRUE FALSE FALSE FALSE citation - dateOfCollectionStart Start Date The date when the data collection started YYYY-MM-DD date 62 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation - dateOfCollectionEnd End Date The date when the data collection ended YYYY-MM-DD date 63 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation - kindOfData Data Type The type of data included in the files (e.g. survey data, clinical data, or machine-readable text) text 64 TRUE FALSE TRUE TRUE FALSE FALSE citation http://rdf-vocabulary.ddialliance.org/discovery#kindOfData - series Series Information about the dataset series to which the Dataset belong none 65 : FALSE FALSE TRUE FALSE FALSE FALSE citation - seriesName Name The name of the dataset series text 66 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE series citation - seriesInformation Information Can include 1) a history of the series and 2) a summary of features that apply to the series textbox 67 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE series citation - software Software Information about the software used to generate the Dataset none 68 , FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasGeneratedBy - softwareName Name The name of software used to generate the Dataset text 69 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation - softwareVersion Version The version of the software used to generate the Dataset, e.g. 4.11 text 70 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation - relatedMaterial Related Material Information, such as a persistent ID or citation, about the material related to the Dataset, such as appendices or sampling information available outside of the Dataset textbox 71 FALSE FALSE TRUE FALSE FALSE FALSE citation - relatedDatasets Related Dataset Information, such as a persistent ID or citation, about a related dataset, such as previous research on the Dataset's subject textbox 72 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/relation - otherReferences Other Reference Information, such as a persistent ID or citation, about another type of resource that provides background or supporting material to the Dataset text 73 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/references - dataSources Data Source Information, such as a persistent ID or citation, about sources of the Dataset (e.g. a book, article, serial, or machine-readable data file) textbox 74 FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasDerivedFrom - originOfSources Origin of Historical Sources For historical sources, the origin and any rules followed in establishing them as sources textbox 75 FALSE FALSE FALSE FALSE FALSE FALSE citation - characteristicOfSources Characteristic of Sources Characteristics not already noted elsewhere textbox 76 FALSE FALSE FALSE FALSE FALSE FALSE citation - accessToSources Documentation and Access to Sources 1) Methods or procedures for accessing data sources and 2) any special permissions needed for access textbox 77 FALSE FALSE FALSE FALSE FALSE FALSE citation + keywordTermURI Term URI A URI that points to the web presence of the Keyword Term https:// url 22 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE keyword citation + keywordVocabulary Controlled Vocabulary Name The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) text 23 (#VALUE) FALSE FALSE FALSE FALSE TRUE FALSE keyword citation + keywordVocabularyURI Controlled Vocabulary URL The URL where one can access information about the term's controlled vocabulary https:// url 24 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE keyword citation + topicClassification Topic Classification Indicates a broad, important topic or subject that the Dataset covers and information about any controlled vocabulary used none 25 FALSE FALSE TRUE FALSE FALSE FALSE citation + topicClassValue Term A topic or subject term text 26 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE topicClassification citation + topicClassVocab Controlled Vocabulary Name The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) text 27 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation + topicClassVocabURI Controlled Vocabulary URL The URL where one can access information about the term's controlled vocabulary https:// url 28 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation + publication Related Publication The article or report that uses the data in the Dataset. The full list of related publications will be displayed on the metadata tab none 29 FALSE FALSE TRUE FALSE TRUE FALSE citation http://purl.org/dc/terms/isReferencedBy + publicationCitation Citation The full bibliographic citation for the related publication textbox 30 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/dc/terms/bibliographicCitation + publicationIDType Identifier Type The type of identifier that uniquely identifies a related publication text 31 #VALUE: TRUE TRUE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifierScheme + publicationIDNumber Identifier The identifier for a related publication text 32 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifier + publicationURL URL The URL form of the identifier entered in the Identifier field, e.g. the DOI URL if a DOI was entered in the Identifier field. Used to display what was entered in the ID Type and ID Number fields as a link. If what was entered in the Identifier field has no URL form, the URL of the publication webpage is used, e.g. a journal article webpage https:// url 33 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE publication citation https://schema.org/distribution + notesText Notes Additional information about the Dataset textbox 34 FALSE FALSE FALSE FALSE TRUE FALSE citation + language Language A language that the Dataset's files is written in text 35 TRUE TRUE TRUE TRUE FALSE FALSE citation http://purl.org/dc/terms/language + producer Producer The entity, such a person or organization, managing the finances or other administrative processes involved in the creation of the Dataset none 36 FALSE FALSE TRUE FALSE FALSE FALSE citation + producerName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 37 #VALUE TRUE FALSE FALSE TRUE FALSE TRUE producer citation + producerAffiliation Affiliation The name of the entity affiliated with the producer, e.g. an organization's name Organization XYZ text 38 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerAbbreviation Abbreviated Name The producer's abbreviated name (e.g. IQSS, ICPSR) text 39 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerURL URL The URL of the producer's website https:// url 40 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerLogoURL Logo URL The URL of the producer's logo https:// url 41
FALSE FALSE FALSE FALSE FALSE FALSE producer citation + productionDate Production Date The date when the data were produced (not distributed, published, or archived) YYYY-MM-DD date 42 TRUE FALSE FALSE TRUE FALSE FALSE citation + productionPlace Production Location The location where the data and any related materials were produced or collected text 43 TRUE FALSE TRUE TRUE FALSE FALSE citation + contributor Contributor The entity, such as a person or organization, responsible for collecting, managing, or otherwise contributing to the development of the Dataset none 44 : FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/contributor + contributorType Type Indicates the type of contribution made to the dataset text 45 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE contributor citation + contributorName Name The name of the contributor, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 46 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE contributor citation + grantNumber Funding Information Information about the Dataset's financial support none 47 : FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/sponsor + grantNumberAgency Agency The agency that provided financial support for the Dataset Organization XYZ text 48 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation + grantNumberValue Identifier The grant identifier or contract identifier of the agency that provided financial support for the Dataset text 49 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation + distributor Distributor The entity, such as a person or organization, designated to generate copies of the Dataset, including any editions or revisions none 50 FALSE FALSE TRUE FALSE FALSE FALSE citation + distributorName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 51 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE distributor citation + distributorAffiliation Affiliation The name of the entity affiliated with the distributor, e.g. an organization's name Organization XYZ text 52 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorAbbreviation Abbreviated Name The distributor's abbreviated name (e.g. IQSS, ICPSR) text 53 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorURL URL The URL of the distributor's webpage https:// url 54 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorLogoURL Logo URL The URL of the distributor's logo image, used to show the image on the Dataset's page https:// url 55
FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributionDate Distribution Date The date when the Dataset was made available for distribution/presentation YYYY-MM-DD date 56 TRUE FALSE FALSE TRUE FALSE FALSE citation + depositor Depositor The entity, such as a person or organization, that deposited the Dataset in the repository 1) FamilyName, GivenName or 2) Organization text 57 FALSE FALSE FALSE FALSE FALSE FALSE citation + dateOfDeposit Deposit Date The date when the Dataset was deposited into the repository YYYY-MM-DD date 58 FALSE FALSE FALSE TRUE FALSE FALSE citation http://purl.org/dc/terms/dateSubmitted + timePeriodCovered Time Period The time period that the data refer to. Also known as span. This is the time period covered by the data, not the dates of coding, collecting data, or making documents machine-readable none 59 ; FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/temporalCoverage + timePeriodCoveredStart Start Date The start date of the time period that the data refer to YYYY-MM-DD date 60 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation + timePeriodCoveredEnd End Date The end date of the time period that the data refer to YYYY-MM-DD date 61 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation + dateOfCollection Date of Collection The dates when the data were collected or generated none 62 ; FALSE FALSE TRUE FALSE FALSE FALSE citation + dateOfCollectionStart Start Date The date when the data collection started YYYY-MM-DD date 63 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation + dateOfCollectionEnd End Date The date when the data collection ended YYYY-MM-DD date 64 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation + kindOfData Data Type The type of data included in the files (e.g. survey data, clinical data, or machine-readable text) text 65 TRUE FALSE TRUE TRUE FALSE FALSE citation http://rdf-vocabulary.ddialliance.org/discovery#kindOfData + series Series Information about the dataset series to which the Dataset belong none 66 : FALSE FALSE TRUE FALSE FALSE FALSE citation + seriesName Name The name of the dataset series text 67 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE series citation + seriesInformation Information Can include 1) a history of the series and 2) a summary of features that apply to the series textbox 68 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE series citation + software Software Information about the software used to generate the Dataset none 69 , FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasGeneratedBy + softwareName Name The name of software used to generate the Dataset text 70 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation + softwareVersion Version The version of the software used to generate the Dataset, e.g. 4.11 text 71 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation + relatedMaterial Related Material Information, such as a persistent ID or citation, about the material related to the Dataset, such as appendices or sampling information available outside of the Dataset textbox 72 FALSE FALSE TRUE FALSE FALSE FALSE citation + relatedDatasets Related Dataset Information, such as a persistent ID or citation, about a related dataset, such as previous research on the Dataset's subject textbox 73 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/relation + otherReferences Other Reference Information, such as a persistent ID or citation, about another type of resource that provides background or supporting material to the Dataset text 74 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/references + dataSources Data Source Information, such as a persistent ID or citation, about sources of the Dataset (e.g. a book, article, serial, or machine-readable data file) textbox 75 FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasDerivedFrom + originOfSources Origin of Historical Sources For historical sources, the origin and any rules followed in establishing them as sources textbox 76 FALSE FALSE FALSE FALSE FALSE FALSE citation + characteristicOfSources Characteristic of Sources Characteristics not already noted elsewhere textbox 77 FALSE FALSE FALSE FALSE FALSE FALSE citation + accessToSources Documentation and Access to Sources 1) Methods or procedures for accessing data sources and 2) any special permissions needed for access textbox 78 FALSE FALSE FALSE FALSE FALSE FALSE citation #controlledVocabulary DatasetField Value identifier displayOrder subject Agricultural Sciences D01 0 subject Arts and Humanities D0 1 diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java index 22bad42df96..d91aa101eb5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java @@ -91,8 +91,9 @@ public class DatasetFieldConstant implements java.io.Serializable { public final static String datasetVersionValue="datasetVersionValue"; public final static String versionDate="versionDate"; public final static String keywordValue="keywordValue"; - public final static String keywordVocab="keywordVocabulary"; //SEK 6/10/2016 to match what is in the db - public final static String keywordVocabURI="keywordVocabularyURI"; //SEK 6/10/2016 to match what is in the db + public final static String keywordTermURI="keywordTermURI"; + public final static String keywordVocab="keywordVocabulary"; + public final static String keywordVocabURI="keywordVocabularyURI"; public final static String topicClassValue="topicClassValue"; public final static String topicClassVocab="topicClassVocab"; public final static String topicClassVocabURI="topicClassVocabURI"; diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetKeyword.java b/src/main/java/edu/harvard/iq/dataverse/DatasetKeyword.java deleted file mode 100644 index 747e3c068f1..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetKeyword.java +++ /dev/null @@ -1,68 +0,0 @@ -package edu.harvard.iq.dataverse; - -/** - * - * @author skraffmiller - */ - -public class DatasetKeyword { - - private int displayOrder; - public int getDisplayOrder() { - return this.displayOrder; - } - public void setDisplayOrder(int displayOrder) { - this.displayOrder = displayOrder; - } - - private DatasetField value; - public DatasetField getValue() { - return this.value; - } - public void setValue(DatasetField value) { - this.value = value; - } - - private DatasetVersion datasetVersion; - public DatasetVersion getDatasetVersion() { - return datasetVersion; - } - public void setDatasetVersion(DatasetVersion metadata) { - this.datasetVersion = metadata; - } - /* - @Version - private Long version; - public Long getVersion() { - return this.version; - } - public void setVersion(Long version) { - this.version = version; - } */ - - private DatasetField vocab; - public DatasetField getVocab() { - return this.vocab; - } - public void setVocab(DatasetField vocab) { - this.vocab = vocab; - } - - private DatasetField vocabURI; - public DatasetField getVocabURI() { - return this.vocabURI; - } - public void setVocabURI(DatasetField vocabURI) { - this.vocabURI = vocabURI; - } - - - public boolean isEmpty() { - /*return ((value==null || value.getValue().trim().equals("")) - && (vocab==null || vocab.getValue().trim().equals("")) - && (vocabURI==null || vocabURI.getValue().trim().equals("")));*/ - return false; - } - - -} diff --git a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java index 49ceabc5900..4b8822e8b66 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java @@ -437,7 +437,7 @@ public static void writeSubjectsElement(XMLStreamWriter xmlw, DatasetVersionDTO for (String subject : fieldDTO.getMultipleVocab()) { if (StringUtils.isNotBlank(subject)) { subject_check = writeOpenTag(xmlw, "subjects", subject_check); - writeSubjectElement(xmlw, null, null, subject, language); + writeSubjectElement(xmlw, null, null, null, subject, language); } } } @@ -446,7 +446,8 @@ public static void writeSubjectsElement(XMLStreamWriter xmlw, DatasetVersionDTO for (HashSet fieldDTOs : fieldDTO.getMultipleCompound()) { String subject = null; String subjectScheme = null; - String schemeURI = null; + String keywordTermURI = null; + String keywordVocabURI = null; for (Iterator iterator = fieldDTOs.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); @@ -454,18 +455,22 @@ public static void writeSubjectsElement(XMLStreamWriter xmlw, DatasetVersionDTO subject = next.getSinglePrimitive(); } + if (DatasetFieldConstant.keywordTermURI.equals(next.getTypeName())) { + keywordTermURI = next.getSinglePrimitive(); + } + if (DatasetFieldConstant.keywordVocab.equals(next.getTypeName())) { subjectScheme = next.getSinglePrimitive(); } - + if (DatasetFieldConstant.keywordVocabURI.equals(next.getTypeName())) { - schemeURI = next.getSinglePrimitive(); + keywordVocabURI = next.getSinglePrimitive(); } } if (StringUtils.isNotBlank(subject)) { subject_check = writeOpenTag(xmlw, "subjects", subject_check); - writeSubjectElement(xmlw, subjectScheme, schemeURI, subject, language); + writeSubjectElement(xmlw, subjectScheme, keywordTermURI, keywordVocabURI, subject, language); } } } @@ -493,7 +498,7 @@ public static void writeSubjectsElement(XMLStreamWriter xmlw, DatasetVersionDTO if (StringUtils.isNotBlank(subject)) { subject_check = writeOpenTag(xmlw, "subjects", subject_check); - writeSubjectElement(xmlw, subjectScheme, schemeURI, subject, language); + writeSubjectElement(xmlw, subjectScheme, null, schemeURI, subject, language); } } } @@ -513,7 +518,7 @@ public static void writeSubjectsElement(XMLStreamWriter xmlw, DatasetVersionDTO * @param language * @throws XMLStreamException */ - private static void writeSubjectElement(XMLStreamWriter xmlw, String subjectScheme, String schemeURI, String value, String language) throws XMLStreamException { + private static void writeSubjectElement(XMLStreamWriter xmlw, String subjectScheme, String valueURI, String schemeURI, String value, String language) throws XMLStreamException { // write a subject Map subject_map = new HashMap(); @@ -524,6 +529,9 @@ private static void writeSubjectElement(XMLStreamWriter xmlw, String subjectSche if (StringUtils.isNotBlank(subjectScheme)) { subject_map.put("subjectScheme", subjectScheme); } + if (StringUtils.isNotBlank(valueURI)) { + subject_map.put("valueURI", valueURI); + } if (StringUtils.isNotBlank(schemeURI)) { subject_map.put("schemeURI", schemeURI); } diff --git a/src/main/java/propertyFiles/citation.properties b/src/main/java/propertyFiles/citation.properties index f35ede79b50..cb864eb78e9 100644 --- a/src/main/java/propertyFiles/citation.properties +++ b/src/main/java/propertyFiles/citation.properties @@ -22,6 +22,7 @@ datasetfieldtype.dsDescriptionValue.title=Text datasetfieldtype.dsDescriptionDate.title=Date datasetfieldtype.subject.title=Subject datasetfieldtype.keyword.title=Keyword +datasetfieldtype.keywordTermURI.title=Term URI datasetfieldtype.keywordValue.title=Term datasetfieldtype.keywordVocabulary.title=Controlled Vocabulary Name datasetfieldtype.keywordVocabularyURI.title=Controlled Vocabulary URL @@ -100,6 +101,7 @@ datasetfieldtype.dsDescriptionValue.description=A summary describing the purpose datasetfieldtype.dsDescriptionDate.description=The date when the description was added to the Dataset. If the Dataset contains more than one description, e.g. the data producer supplied one description and the data repository supplied another, this date is used to distinguish between the descriptions datasetfieldtype.subject.description=The area of study relevant to the Dataset datasetfieldtype.keyword.description=A key term that describes an important aspect of the Dataset and information about any controlled vocabulary used +datasetfieldtype.keywordTermURI.description=A URI that points to the web presence of the Keyword Term datasetfieldtype.keywordValue.description=A key term that describes important aspects of the Dataset datasetfieldtype.keywordVocabulary.description=The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) datasetfieldtype.keywordVocabularyURI.description=The URL where one can access information about the term's controlled vocabulary @@ -178,6 +180,7 @@ datasetfieldtype.dsDescriptionValue.watermark= datasetfieldtype.dsDescriptionDate.watermark=YYYY-MM-DD datasetfieldtype.subject.watermark= datasetfieldtype.keyword.watermark= +datasetfieldtype.keywordTermURI.watermark=https:// datasetfieldtype.keywordValue.watermark= datasetfieldtype.keywordVocabulary.watermark= datasetfieldtype.keywordVocabularyURI.watermark=https:// diff --git a/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java index 8ebdeea6243..2da15147255 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java @@ -305,8 +305,10 @@ public void testSubjectsElement() throws XMLStreamException, IOException { + "Engineering" + "Law" + "KeywordTerm1" + "KeywordTerm2" + "", stringWriter.toString()); diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index d600ccac53c..2139589b4c3 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -322,6 +322,7 @@ private static void mockDatasetFieldSvc() { DatasetFieldType keywordType = datasetFieldTypeSvc.add(new DatasetFieldType("keyword", DatasetFieldType.FieldType.TEXT, true)); Set keywordChildTypes = new HashSet<>(); keywordChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("keywordValue", DatasetFieldType.FieldType.TEXT, false))); + keywordChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("keywordTermURI", DatasetFieldType.FieldType.TEXT, false))); keywordChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("keywordVocabulary", DatasetFieldType.FieldType.TEXT, false))); keywordChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("keywordVocabularyURI", DatasetFieldType.FieldType.TEXT, false))); keywordType.setChildDatasetFieldTypes(keywordChildTypes); diff --git a/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt b/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt index 342d4b6fabf..431f069cb03 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt +++ b/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt @@ -271,6 +271,12 @@ "typeClass": "primitive", "value": "KeywordTerm1" }, + "keywordTermURI": { + "typeName": "keywordTermURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://keywordTermURI1.org" + }, "keywordVocabulary": { "typeName": "keywordVocabulary", "multiple": false, @@ -291,6 +297,12 @@ "typeClass": "primitive", "value": "KeywordTerm2" }, + "keywordTermURI": { + "typeName": "keywordTermURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://keywordTermURI2.org" + }, "keywordVocabulary": { "typeName": "keywordVocabulary", "multiple": false, diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json index b23741517c9..9cf04bd0e05 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json +++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json @@ -230,6 +230,12 @@ "typeClass": "primitive", "value": "KeywordTerm1" }, + "keywordTermURI": { + "typeName": "keywordTermURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://keywordTermURI1.org" + }, "keywordVocabulary": { "typeName": "keywordVocabulary", "multiple": false, @@ -250,6 +256,12 @@ "typeClass": "primitive", "value": "KeywordTerm2" }, + "keywordTermURI": { + "typeName": "keywordTermURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://keywordTermURI2.org" + }, "keywordVocabulary": { "typeName": "keywordVocabulary", "multiple": false, diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json index f9882aed3dd..2d4ca078962 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json +++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json @@ -238,6 +238,12 @@ "typeClass": "primitive", "value": "Keyword Value 1" }, + "keywordTermURI": { + "typeName": "keywordTermURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://keywordTermURI1.org" + }, "keywordVocabulary": { "typeName": "keywordVocabulary", "multiple": false, @@ -258,6 +264,12 @@ "typeClass": "primitive", "value": "Keyword Value Two" }, + "keywordTermURI": { + "typeName": "keywordTermURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://keywordTermURI1.org" + }, "keywordVocabulary": { "typeName": "keywordVocabulary", "multiple": false, diff --git a/src/test/resources/json/dataset-finch2.json b/src/test/resources/json/dataset-finch2.json index 4bd6f33eb42..b214eacfa3c 100644 --- a/src/test/resources/json/dataset-finch2.json +++ b/src/test/resources/json/dataset-finch2.json @@ -100,6 +100,12 @@ "typeClass": "primitive", "value": "KeywordTerm1" }, + "keywordTermURI": { + "typeName": "keywordTermURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://keywordTermURI1.org" + }, "keywordVocabulary": { "typeName": "keywordVocabulary", "multiple": false, @@ -120,6 +126,12 @@ "typeClass": "primitive", "value": "KeywordTerm2" }, + "keywordTermURI": { + "typeName": "keywordTermURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://keywordTermURI2.org" + }, "keywordVocabulary": { "typeName": "keywordVocabulary", "multiple": false, diff --git a/src/test/resources/json/dataset-long-description.json b/src/test/resources/json/dataset-long-description.json index a6e5c291322..4d5478b0f63 100644 --- a/src/test/resources/json/dataset-long-description.json +++ b/src/test/resources/json/dataset-long-description.json @@ -92,6 +92,12 @@ "typeClass": "primitive", "value": "KeywordTerm1" }, + "keywordTermURI": { + "typeName": "keywordTermURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://keywordTermURI1.org" + }, "keywordVocabulary": { "typeName": "keywordVocabulary", "multiple": false, @@ -112,6 +118,12 @@ "typeClass": "primitive", "value": "KeywordTerm2" }, + "keywordTermURI": { + "typeName": "keywordTermURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://keywordTermURI2.org" + }, "keywordVocabulary": { "typeName": "keywordVocabulary", "multiple": false, From d113d7c358147b9caa7e4b148807dd28eb32c9d9 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 18 Jun 2024 09:02:30 -0400 Subject: [PATCH 47/85] Deprecate rsync: remove from guides, mark code as deprecated (#10620) * remove rsync docs #8985 * add deprecation notices to Java code for rsync #8985 * delete deprecated config options from guides #8985 * whoops, put :UploadMethods doc ref back #8985 * restore :PublicInstall to non-deprecated #8985 --- doc/release-notes/8985-deprecate-rsync.md | 8 + .../checksumValidationSuccess.json | 5 - doc/sphinx-guides/source/api/native-api.rst | 6 +- .../source/developers/big-data-support.rst | 178 +----------------- .../source/installation/config.rst | 24 +-- .../source/user/dataset-management.rst | 34 +--- .../source/user/find-use-data.rst | 13 -- .../data/storageSites/add-storage-site.json | 6 - .../harvard/iq/dataverse/SettingsWrapper.java | 11 +- .../harvard/iq/dataverse/api/Datasets.java | 1 + .../DataCaptureModuleException.java | 2 + .../DataCaptureModuleUtil.java | 7 + .../impl/RequestRsyncScriptCommand.java | 1 + ...RepositoryStorageAbstractionLayerPage.java | 4 + ...RepositoryStorageAbstractionLayerUtil.java | 6 + .../settings/SettingsServiceBean.java | 8 + .../iq/dataverse/util/SystemConfig.java | 14 +- 17 files changed, 61 insertions(+), 267 deletions(-) create mode 100644 doc/release-notes/8985-deprecate-rsync.md delete mode 100644 doc/sphinx-guides/source/_static/installation/files/root/big-data-support/checksumValidationSuccess.json delete mode 100644 scripts/api/data/storageSites/add-storage-site.json diff --git a/doc/release-notes/8985-deprecate-rsync.md b/doc/release-notes/8985-deprecate-rsync.md new file mode 100644 index 00000000000..44563f292fd --- /dev/null +++ b/doc/release-notes/8985-deprecate-rsync.md @@ -0,0 +1,8 @@ +Support for rsync has been deprecated. Information has been removed from the guides for rsync and related software such as Data Capture Module (DCM) and Repository Storage Abstraction Layer (RSAL). You can still find this information in [older versions](https://guides.dataverse.org/en/6.2/developers/big-data-support.html#data-capture-module-dcm) of the guides. + +The following related database settings have been deprecated as well: + +- :DataCaptureModuleUrl +- :DownloadMethods +- :LocalDataAccessPath +- :RepositoryStorageAbstractionLayerUrl diff --git a/doc/sphinx-guides/source/_static/installation/files/root/big-data-support/checksumValidationSuccess.json b/doc/sphinx-guides/source/_static/installation/files/root/big-data-support/checksumValidationSuccess.json deleted file mode 100644 index 6b609c4c65e..00000000000 --- a/doc/sphinx-guides/source/_static/installation/files/root/big-data-support/checksumValidationSuccess.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "status": "validation passed", - "uploadFolder": "OS7O8Y", - "totalSize": 72 -} diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 8c54a937353..04123321e54 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -2315,7 +2315,7 @@ The fully expanded example above (without environment variables) looks like this curl "https://demo.dataverse.org/api/datasets/24/locks?type=Ingest" -Currently implemented lock types are ``Ingest``, ``Workflow``, ``InReview``, ``DcmUpload``, ``finalizePublication``, ``EditInProgress`` and ``FileValidationFailed``. +Currently implemented lock types are ``Ingest``, ``Workflow``, ``InReview``, ``DcmUpload`` (deprecated), ``finalizePublication``, ``EditInProgress`` and ``FileValidationFailed``. The API will output the list of locks, for example:: @@ -2406,7 +2406,7 @@ Use the following API to list ALL the locks on all the datasets in your installa The listing can be filtered by specific lock type **and/or** user, using the following *optional* query parameters: * ``userIdentifier`` - To list the locks owned by a specific user -* ``type`` - To list the locks of the type specified. If the supplied value does not match a known lock type, the API will return an error and a list of valid lock types. As of writing this, the implemented lock types are ``Ingest``, ``Workflow``, ``InReview``, ``DcmUpload``, ``finalizePublication``, ``EditInProgress`` and ``FileValidationFailed``. +* ``type`` - To list the locks of the type specified. If the supplied value does not match a known lock type, the API will return an error and a list of valid lock types. As of writing this, the implemented lock types are ``Ingest``, ``Workflow``, ``InReview``, ``DcmUpload`` (deprecated), ``finalizePublication``, ``EditInProgress`` and ``FileValidationFailed``. For example: @@ -3192,7 +3192,7 @@ Note: you can use the combination of cURL's ``-J`` (``--remote-header-name``) an Restrict Files ~~~~~~~~~~~~~~ -Restrict or unrestrict an existing file where ``id`` is the database id of the file or ``pid`` is the persistent id (DOI or Handle) of the file to restrict. Note that some Dataverse installations do not allow the ability to restrict files. +Restrict or unrestrict an existing file where ``id`` is the database id of the file or ``pid`` is the persistent id (DOI or Handle) of the file to restrict. Note that some Dataverse installations do not allow the ability to restrict files (see :ref:`:PublicInstall`). A curl example using an ``id`` diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 5ea97029271..4aaed10512e 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -1,7 +1,7 @@ Big Data Support ================ -Big data support includes some highly experimental options. Eventually more of this content will move to the Installation Guide. +Big data support includes some experimental options. Eventually more of this content will move to the Installation Guide. .. contents:: |toctitle| :local: @@ -187,179 +187,3 @@ As described in that document, Globus transfers can be initiated by choosing the An overview of the control and data transfer interactions between components was presented at the 2022 Dataverse Community Meeting and can be viewed in the `Integrations and Tools Session Video `_ around the 1 hr 28 min mark. See also :ref:`Globus settings <:GlobusSettings>`. - -Data Capture Module (DCM) -------------------------- - -Please note: The DCM feature is deprecated. - -Data Capture Module (DCM) is an experimental component that allows users to upload large datasets via rsync over ssh. - -DCM was developed and tested using Glassfish but these docs have been updated with references to Payara. - -Install a DCM -~~~~~~~~~~~~~ - -Installation instructions can be found at https://github.com/sbgrid/data-capture-module/blob/master/doc/installation.md. Note that shared storage (posix or AWS S3) between your Dataverse installation and your DCM is required. You cannot use a DCM with Swift at this point in time. - -.. FIXME: Explain what ``dataverse.files.dcm-s3-bucket-name`` is for and what it has to do with ``dataverse.files.s3.bucket-name``. - -Once you have installed a DCM, you will need to configure two database settings on the Dataverse installation side. These settings are documented in the :doc:`/installation/config` section of the Installation Guide: - -- ``:DataCaptureModuleUrl`` should be set to the URL of a DCM you installed. -- ``:UploadMethods`` should include ``dcm/rsync+ssh``. - -This will allow your Dataverse installation to communicate with your DCM, so that your Dataverse installation can download rsync scripts for your users. - -Downloading rsync scripts via Your Dataverse Installation's API -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The rsync script can be downloaded from your Dataverse installation via API using an authorized API token. In the curl example below, substitute ``$PERSISTENT_ID`` with a DOI or Handle: - -``curl -H "X-Dataverse-key: $API_TOKEN" $DV_BASE_URL/api/datasets/:persistentId/dataCaptureModule/rsync?persistentId=$PERSISTENT_ID`` - -How a DCM reports checksum success or failure to your Dataverse Installation -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Once the user uploads files to a DCM, that DCM will perform checksum validation and report to your Dataverse installation the results of that validation. The DCM must be configured to pass the API token of a superuser. The implementation details, which are subject to change, are below. - -The JSON that a DCM sends to your Dataverse installation on successful checksum validation looks something like the contents of :download:`checksumValidationSuccess.json <../_static/installation/files/root/big-data-support/checksumValidationSuccess.json>` below: - -.. literalinclude:: ../_static/installation/files/root/big-data-support/checksumValidationSuccess.json - :language: json - -- ``status`` - The valid strings to send are ``validation passed`` and ``validation failed``. -- ``uploadFolder`` - This is the directory on disk where your Dataverse installation should attempt to find the files that a DCM has moved into place. There should always be a ``files.sha`` file and a least one data file. ``files.sha`` is a manifest of all the data files and their checksums. The ``uploadFolder`` directory is inside the directory where data is stored for the dataset and may have the same name as the "identifier" of the persistent id (DOI or Handle). For example, you would send ``"uploadFolder": "DNXV2H"`` in the JSON file when the absolute path to this directory is ``/usr/local/payara6/glassfish/domains/domain1/files/10.5072/FK2/DNXV2H/DNXV2H``. -- ``totalSize`` - Your Dataverse installation will use this value to represent the total size in bytes of all the files in the "package" that's created. If 360 data files and one ``files.sha`` manifest file are in the ``uploadFolder``, this value is the sum of the 360 data files. - - -Here's the syntax for sending the JSON. - -``curl -H "X-Dataverse-key: $API_TOKEN" -X POST -H 'Content-type: application/json' --upload-file checksumValidationSuccess.json $DV_BASE_URL/api/datasets/:persistentId/dataCaptureModule/checksumValidation?persistentId=$PERSISTENT_ID`` - - -Steps to set up a DCM mock for Development -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -See instructions at https://github.com/sbgrid/data-capture-module/blob/master/doc/mock.md - - -Add Dataverse Installation settings to use mock (same as using DCM, noted above): - -- ``curl http://localhost:8080/api/admin/settings/:DataCaptureModuleUrl -X PUT -d "http://localhost:5000"`` -- ``curl http://localhost:8080/api/admin/settings/:UploadMethods -X PUT -d "dcm/rsync+ssh"`` - -At this point you should be able to download a placeholder rsync script. Your Dataverse installation is then waiting for news from the DCM about if checksum validation has succeeded or not. First, you have to put files in place, which is usually the job of the DCM. You should substitute "X1METO" for the "identifier" of the dataset you create. You must also use the proper path for where you store files in your dev environment. - -- ``mkdir /usr/local/payara6/glassfish/domains/domain1/files/10.5072/FK2/X1METO`` -- ``mkdir /usr/local/payara6/glassfish/domains/domain1/files/10.5072/FK2/X1METO/X1METO`` -- ``cd /usr/local/payara6/glassfish/domains/domain1/files/10.5072/FK2/X1METO/X1METO`` -- ``echo "hello" > file1.txt`` -- ``shasum file1.txt > files.sha`` - - - -Now the files are in place and you need to send JSON to your Dataverse installation with a success or failure message as described above. Make a copy of ``doc/sphinx-guides/source/_static/installation/files/root/big-data-support/checksumValidationSuccess.json`` and put the identifier in place such as "X1METO" under "uploadFolder"). Then use curl as described above to send the JSON. - -Troubleshooting -~~~~~~~~~~~~~~~ - -The following low level command should only be used when troubleshooting the "import" code a DCM uses but is documented here for completeness. - -``curl -H "X-Dataverse-key: $API_TOKEN" -X POST "$DV_BASE_URL/api/batch/jobs/import/datasets/files/$DATASET_DB_ID?uploadFolder=$UPLOAD_FOLDER&totalSize=$TOTAL_SIZE"`` - -Repository Storage Abstraction Layer (RSAL) -------------------------------------------- - -Please note: The RSAL feature is deprecated. - -Steps to set up a DCM via Docker for Development -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -See https://github.com/IQSS/dataverse/blob/develop/conf/docker-dcm/readme.md - -Using the RSAL Docker Containers -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -- Create a dataset (either with the procedure mentioned in DCM Docker Containers, or another process) -- Publish the dataset (from the client container): ``cd /mnt; ./publish_major.bash ${database_id}`` -- Run the RSAL component of the workflow (from the host): ``docker exec -it rsalsrv /opt/rsal/scn/pub.py`` -- If desired, from the client container you can download the dataset following the instructions in the dataset access section of the dataset page. - -Configuring the RSAL Mock -~~~~~~~~~~~~~~~~~~~~~~~~~ - -Info for configuring the RSAL Mock: https://github.com/sbgrid/rsal/tree/master/mocks - -Also, to configure your Dataverse installation to use the new workflow you must do the following (see also the :doc:`workflows` section): - -1. Configure the RSAL URL: - -``curl -X PUT -d 'http://:5050' http://localhost:8080/api/admin/settings/:RepositoryStorageAbstractionLayerUrl`` - -2. Update workflow json with correct URL information: - -Edit internal-httpSR-workflow.json and replace url and rollbackUrl to be the url of your RSAL mock. - -3. Create the workflow: - -``curl http://localhost:8080/api/admin/workflows -X POST --data-binary @internal-httpSR-workflow.json -H "Content-type: application/json"`` - -4. List available workflows: - -``curl http://localhost:8080/api/admin/workflows`` - -5. Set the workflow (id) as the default workflow for the appropriate trigger: - -``curl http://localhost:8080/api/admin/workflows/default/PrePublishDataset -X PUT -d 2`` - -6. Check that the trigger has the appropriate default workflow set: - -``curl http://localhost:8080/api/admin/workflows/default/PrePublishDataset`` - -7. Add RSAL to whitelist - -8. When finished testing, unset the workflow: - -``curl -X DELETE http://localhost:8080/api/admin/workflows/default/PrePublishDataset`` - -Configuring download via rsync -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -In order to see the rsync URLs, you must run this command: - -``curl -X PUT -d 'rsal/rsync' http://localhost:8080/api/admin/settings/:DownloadMethods`` - -.. TODO: Document these in the Installation Guide once they're final. - -To specify replication sites that appear in rsync URLs: - -Download :download:`add-storage-site.json <../../../../scripts/api/data/storageSites/add-storage-site.json>` and adjust it to meet your needs. The file should look something like this: - -.. literalinclude:: ../../../../scripts/api/data/storageSites/add-storage-site.json - -Then add the storage site using curl: - -``curl -H "Content-type:application/json" -X POST http://localhost:8080/api/admin/storageSites --upload-file add-storage-site.json`` - -You make a storage site the primary site by passing "true". Pass "false" to make it not the primary site. (id "1" in the example): - -``curl -X PUT -d true http://localhost:8080/api/admin/storageSites/1/primaryStorage`` - -You can delete a storage site like this (id "1" in the example): - -``curl -X DELETE http://localhost:8080/api/admin/storageSites/1`` - -You can view a single storage site like this: (id "1" in the example): - -``curl http://localhost:8080/api/admin/storageSites/1`` - -You can view all storage site like this: - -``curl http://localhost:8080/api/admin/storageSites`` - -In the GUI, this is called "Local Access". It's where you can compute on files on your cluster. - -``curl http://localhost:8080/api/admin/settings/:LocalDataAccessPath -X PUT -d "/programs/datagrid"`` - - diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 8fb9460892b..213ac827819 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -4218,20 +4218,6 @@ This is useful for specific cases where an installation's files are stored in pu ``curl -X PUT -d true http://localhost:8080/api/admin/settings/:PublicInstall`` -:DataCaptureModuleUrl -+++++++++++++++++++++ - -The URL for your Data Capture Module (DCM) installation. This component is experimental and can be downloaded from https://github.com/sbgrid/data-capture-module . - -``curl -X PUT -d 'https://dcm.example.edu' http://localhost:8080/api/admin/settings/:DataCaptureModuleUrl`` - -:RepositoryStorageAbstractionLayerUrl -+++++++++++++++++++++++++++++++++++++ - -The URL for your Repository Storage Abstraction Layer (RSAL) installation. This component is experimental and can be downloaded from https://github.com/sbgrid/rsal . - -``curl -X PUT -d 'https://rsal.example.edu' http://localhost:8080/api/admin/settings/:RepositoryStorageAbstractionLayerUrl`` - .. _:UploadMethods: :UploadMethods @@ -4241,23 +4227,15 @@ This setting controls which upload methods are available to users of your Datave - ``native/http``: Corresponds to "Upload with HTTP via your browser" and APIs that use HTTP (SWORD and native). - ``dvwebloader``: Corresponds to :ref:`folder-upload`. Note that ``dataverse.files..upload-redirect`` must be set to "true" on an S3 store for this method to show up in the UI. In addition, :ref:`:WebloaderUrl` must be set. CORS allowed on the S3 bucket. See :ref:`cors-s3-bucket`. -- ``dcm/rsync+ssh``: Corresponds to "Upload with rsync+ssh via Data Capture Module (DCM)". A lot of setup is required, as explained in the :doc:`/developers/big-data-support` section of the Developer Guide. Out of the box only ``native/http`` is enabled and will work without further configuration. To add multiple upload method, separate them using a comma like this: -``curl -X PUT -d 'native/http,dcm/rsync+ssh' http://localhost:8080/api/admin/settings/:UploadMethods`` +``curl -X PUT -d 'native/http,dvwebloader' http://localhost:8080/api/admin/settings/:UploadMethods`` You'll always want at least one upload method, so the easiest way to remove one of them is to simply ``PUT`` just the one you want, like this: ``curl -X PUT -d 'native/http' http://localhost:8080/api/admin/settings/:UploadMethods`` -:DownloadMethods -++++++++++++++++ - -This setting is experimental and related to Repository Storage Abstraction Layer (RSAL). - -``curl -X PUT -d 'rsal/rsync' http://localhost:8080/api/admin/settings/:DownloadMethods`` - :GuestbookResponsesPageDisplayLimit +++++++++++++++++++++++++++++++++++ diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst index d803aae6d19..a1e214589e3 100755 --- a/doc/sphinx-guides/source/user/dataset-management.rst +++ b/doc/sphinx-guides/source/user/dataset-management.rst @@ -84,7 +84,7 @@ HTTP Upload is a common browser-based file upload tool you may be familiar with Once you have uploaded files, you will be able to edit file metadata, restrict access to files [#f1]_ , and/or add tags. Click "Save Changes" to complete the upload. If you uploaded a file by mistake, you can delete it before saving by clicking the checkbox to select the file, and then clicking the "Delete" button above the Files Table. -File upload limit size varies based on Dataverse installation. The file upload size limit can be found in the text above the HTTP upload widget. If you need to upload a very large file or a very large *number* of files, consider using rsync + SSH upload if your Dataverse installation offers it. +File upload limit size varies based on Dataverse installation. The file upload size limit can be found in the text above the HTTP upload widget. If you need to upload a very large file or a very large *number* of files, consider using DVUploader (see :ref:`DVUploader`). .. [#f1] Some Dataverse installations do not allow this feature. @@ -100,37 +100,7 @@ Folder Upload Some Dataverse installations support the ability to upload files from a local folder and subfolders. To do this, click the "Upload from Folder" button, select the folder you wish to upload, select/unselect specific files, and click "Start Uploads". More detailed instructions are available in the `DVWebloader wiki `_. -.. _rsync_upload: - -rsync + SSH Upload ------------------- - -rsync is typically used for synchronizing files and directories between two different systems, using SSH to connect rather than HTTP. Some Dataverse installations allow uploads using rsync, to facilitate large file transfers in a reliable and secure manner. - -File Upload Script -~~~~~~~~~~~~~~~~~~ - -An rsync-enabled Dataverse installation has a file upload process that differs from the traditional browser-based upload process you may be used to. In order to transfer your data to the Dataverse installation's storage, you will need to complete the following steps: - -1. Create your dataset. In rsync-enabled Dataverse installations, you cannot upload files until the dataset creation process is complete. After you hit "Save Dataset" on the Dataset Creation page, you will be taken to the page for your dataset. - -2. On the dataset page, click the "+ Upload Files" button. This will open a box with instructions and a link to the file upload script. - -3. Make sure your files are ready for upload. You will need to have one directory that you can point the upload script to. All files in this directory and in any subdirectories will be uploaded. The directory structure will be preserved, and will be reproduced when your dataset is downloaded from the Dataverse installation. Note that your data will be uploaded in the form of a data package, and each dataset can only host one such package. Be sure that all files you want to include are present before you upload. - -4. Download the rsync file upload script by clicking the "Download Script" button in the Upload Files instruction box. There are no requirements for where you save the script; put it somewhere you can find it. Downloading the upload script will put a temporary lock on your dataset to prepare it for upload. While your dataset is locked, you will not be able to delete or publish your dataset, or edit its metadata. Once you upload your files and Dataverse installation processes them, your dataset will be automatically unlocked and these disabled functions will be enabled again. If you have downloaded the script and locked your dataset, but you have then changed your mind and decided *not* to upload files, please contact Support about unlocking your dataset. - -5. To begin the upload process, you will need to run the script you downloaded. For this, you will have to go outside your browser and open a terminal (AKA command line) window on your computer. Use the terminal to navigate to the directory where you saved the upload script, and run the command that the Upload Files instruction box provides. This will begin the upload script. Please note that this upload script will expire 7 days after you downloaded it. If it expires and you still need to use it, simply download the script from the Dataverse installation again. - -**Note:** Unlike other operating systems, Windows does not come with rsync supported by default. We have not optimized this feature for Windows users, but you may be able to get it working if you install the right Unix utilities. (If you have found a way to get this feature working for you on Windows, you can contribute it to our project. Please reference our `Contributing to the Dataverse Project `_ document in the root of the source tree.) - -6. Follow the instructions provided by the upload script running in your terminal. It will direct you to enter the full path of the directory where your dataset files are located, and then it will start the upload process. Once you've initiated the upload, if you need to cancel it then you can do so by canceling the script running in your terminal window. If your upload gets interrupted, you can resume it from the same point later. - -7. Once the upload script completes its job, the Dataverse installation will begin processing your data upload and running a checksum validation. This may take some time depending on the file size of your upload. During processing, you will see a blue bar at the bottom of the dataset page that reads "Upload in progress..." - -8. Once processing is complete, you will be notified. At this point you can publish your dataset and your data will be available for download on the dataset page. - -**Note:** A dataset can only hold one data package. If you need to replace the data package in your dataset, contact Support. +.. _DVUploader: Command-line DVUploader ----------------------- diff --git a/doc/sphinx-guides/source/user/find-use-data.rst b/doc/sphinx-guides/source/user/find-use-data.rst index bea23cbcd0e..4bf45774b53 100755 --- a/doc/sphinx-guides/source/user/find-use-data.rst +++ b/doc/sphinx-guides/source/user/find-use-data.rst @@ -142,19 +142,6 @@ Downloading a Dataverse File Package via URL Dataverse File Packages are typically used to represent extremely large files or bundles containing a large number of files. Dataverse File Packages are often too large to be reliably downloaded using a web browser. When you click to download a Dataverse File Package, instead of automatically initiating the download in your web browser, the Dataverse installation displays a plaintext URL for the location of the file. To ensure a reliable, resumable download, we recommend using `GNU Wget `_ in a command line terminal or using a download manager software of your choice. If you try to simply paste the URL into your web browser then the download may overwhelm your browser, resulting in an interrupted, timed out, or otherwise failed download. -.. _rsync_download: - -Downloading a Dataverse File Package via rsync -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -rsync is typically used for synchronizing files and directories between two different systems. Some Dataverse installations allow downloads using rsync, to facilitate large file transfers in a reliable and secure manner. - -rsync-enabled Dataverse installations offer a new file download process that differs from traditional browser-based downloading. Instead of multiple files, each dataset uploaded via rsync contains a single "Dataverse File Package". When you download this package you will receive a folder that contains all files from the dataset, arranged in the exact folder structure in which they were originally uploaded. - -In a dataset containing a Dataverse File Package, the information to download and/or access is in outlined the **Data Access** listed under the Access File button. If the data is locally available to you (on a shared drive, for example) you will find the folder path to access the data locally. To download, use one of the rsync commands provided. There may be multiple commands, each corresponding to a different mirror that hosts the Dataverse File Package. Go outside your browser and open a terminal (AKA command line) window on your computer. Use the terminal to run the command that corresponds with the mirror of your choice. It’s usually best to choose the mirror that is geographically closest to you. Running this command will initiate the download process. - -After you've downloaded the Dataverse File Package, you may want to double-check that your download went perfectly. Under **Verify Data**, you'll find a command that you can run in your terminal that will initiate a checksum to ensure that the data you downloaded matches the data in the Dataverse installation precisely. This way, you can ensure the integrity of the data you're working with. - Explore Data ------------ diff --git a/scripts/api/data/storageSites/add-storage-site.json b/scripts/api/data/storageSites/add-storage-site.json deleted file mode 100644 index d13ec2f165d..00000000000 --- a/scripts/api/data/storageSites/add-storage-site.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "hostname": "dataverse.librascholar.edu", - "name": "LibraScholar, USA", - "primaryStorage": true, - "transferProtocols": "rsync,posix,globus" -} diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java index 7854f5adfd8..48196591b19 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java @@ -305,14 +305,16 @@ public boolean isPublicInstall(){ } return publicInstall; } - + + @Deprecated(forRemoval = true, since = "2024-07-07") public boolean isRsyncUpload() { if (rsyncUpload == null) { rsyncUpload = getUploadMethodAvailable(SystemConfig.FileUploadMethods.RSYNC.toString()); } return rsyncUpload; } - + + @Deprecated(forRemoval = true, since = "2024-07-07") public boolean isRsyncDownload() { if (rsyncDownload == null) { rsyncDownload = systemConfig.isRsyncDownload(); @@ -379,7 +381,8 @@ public boolean isWebloaderUpload() { } return webloaderUpload; } - + + @Deprecated(forRemoval = true, since = "2024-07-07") public boolean isRsyncOnly() { if (rsyncOnly == null) { String downloadMethods = getValueForKey(SettingsServiceBean.Key.DownloadMethods); @@ -398,7 +401,7 @@ public boolean isRsyncOnly() { } return rsyncOnly; } - + public boolean isHTTPUpload(){ if (httpUpload == null) { httpUpload = getUploadMethodAvailable(SystemConfig.FileUploadMethods.NATIVE.toString()); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index fc0afc562fc..9e9f89c8140 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2297,6 +2297,7 @@ public Response removeDatasetLogo(@Context ContainerRequestContext crc, @PathPar } } + @Deprecated(forRemoval = true, since = "2024-07-07") @GET @AuthRequired @Path("{identifier}/dataCaptureModule/rsync") diff --git a/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleException.java b/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleException.java index 3329d92b7a9..474674bda73 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleException.java +++ b/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleException.java @@ -1,7 +1,9 @@ package edu.harvard.iq.dataverse.datacapturemodule; +@Deprecated(forRemoval = true, since = "2024-07-07") public class DataCaptureModuleException extends Exception { + @Deprecated(forRemoval = true, since = "2024-07-07") public DataCaptureModuleException(String message, Throwable cause) { super(message, cause); } diff --git a/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleUtil.java b/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleUtil.java index 460e4727afc..094d3976133 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleUtil.java @@ -12,10 +12,12 @@ import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; +@Deprecated(forRemoval = true, since = "2024-07-07") public class DataCaptureModuleUtil { private static final Logger logger = Logger.getLogger(DataCaptureModuleUtil.class.getCanonicalName()); + @Deprecated(forRemoval = true, since = "2024-07-07") public static boolean rsyncSupportEnabled(String uploadMethodsSettings) { logger.fine("uploadMethodsSettings: " + uploadMethodsSettings);; if (uploadMethodsSettings==null){ @@ -28,6 +30,7 @@ public static boolean rsyncSupportEnabled(String uploadMethodsSettings) { /** * generate JSON to send to DCM */ + @Deprecated(forRemoval = true, since = "2024-07-07") public static JsonObject generateJsonForUploadRequest(AuthenticatedUser user, Dataset dataset) { JsonObjectBuilder jab = Json.createObjectBuilder(); // The general rule should be to always pass the user id and dataset identifier to the DCM. @@ -39,6 +42,7 @@ public static JsonObject generateJsonForUploadRequest(AuthenticatedUser user, Da /** * transfer script from DCM */ + @Deprecated(forRemoval = true, since = "2024-07-07") public static ScriptRequestResponse getScriptFromRequest(HttpResponse uploadRequest) { int status = uploadRequest.getStatus(); JsonNode body = uploadRequest.getBody(); @@ -54,6 +58,7 @@ public static ScriptRequestResponse getScriptFromRequest(HttpResponse return scriptRequestResponse; } + @Deprecated(forRemoval = true, since = "2024-07-07") static UploadRequestResponse makeUploadRequest(HttpResponse uploadRequest) { int status = uploadRequest.getStatus(); String body = uploadRequest.getBody(); @@ -61,6 +66,7 @@ static UploadRequestResponse makeUploadRequest(HttpResponse uploadReques return new UploadRequestResponse(uploadRequest.getStatus(), body); } + @Deprecated(forRemoval = true, since = "2024-07-07") public static String getMessageFromException(DataCaptureModuleException ex) { if (ex == null) { return "DataCaptureModuleException was null!"; @@ -76,6 +82,7 @@ public static String getMessageFromException(DataCaptureModuleException ex) { return message + " was caused by " + cause.getMessage(); } + @Deprecated(forRemoval = true, since = "2024-07-07") public static String getScriptName(DatasetVersion datasetVersion) { return "upload-" + datasetVersion.getDataset().getIdentifier().replace("/", "_") + ".bash"; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RequestRsyncScriptCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RequestRsyncScriptCommand.java index a29e7fdd59c..6b7baa7d01b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RequestRsyncScriptCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RequestRsyncScriptCommand.java @@ -27,6 +27,7 @@ * "actiontype" in the actionlogrecord rather than "InternalError" if you throw * a CommandExecutionException. */ +@Deprecated(forRemoval = true, since = "2024-07-07") @RequiredPermissions(Permission.EditDataset) public class RequestRsyncScriptCommand extends AbstractCommand { diff --git a/src/main/java/edu/harvard/iq/dataverse/repositorystorageabstractionlayer/RepositoryStorageAbstractionLayerPage.java b/src/main/java/edu/harvard/iq/dataverse/repositorystorageabstractionlayer/RepositoryStorageAbstractionLayerPage.java index c252d2e3330..9edb536eda2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/repositorystorageabstractionlayer/RepositoryStorageAbstractionLayerPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/repositorystorageabstractionlayer/RepositoryStorageAbstractionLayerPage.java @@ -11,6 +11,7 @@ import jakarta.inject.Named; import jakarta.json.JsonArray; +@Deprecated(forRemoval = true, since = "2024-07-07") @Stateless @Named public class RepositoryStorageAbstractionLayerPage { @@ -22,17 +23,20 @@ public class RepositoryStorageAbstractionLayerPage { @EJB StorageSiteServiceBean storageSiteServiceBean; + @Deprecated(forRemoval = true, since = "2024-07-07") public String getLocalDataAccessDirectory(DatasetVersion datasetVersion) { String localDataAccessParentDir = settingsService.getValueForKey(SettingsServiceBean.Key.LocalDataAccessPath); return RepositoryStorageAbstractionLayerUtil.getLocalDataAccessDirectory(localDataAccessParentDir, datasetVersion.getDataset()); } + @Deprecated(forRemoval = true, since = "2024-07-07") public List getRsyncSites(DatasetVersion datasetVersion) { List storageSites = storageSiteServiceBean.findAll(); JsonArray storageSitesAsJson = RepositoryStorageAbstractionLayerUtil.getStorageSitesAsJson(storageSites); return RepositoryStorageAbstractionLayerUtil.getRsyncSites(datasetVersion.getDataset(), storageSitesAsJson); } + @Deprecated(forRemoval = true, since = "2024-07-07") public String getVerifyDataCommand(DatasetVersion datasetVersion) { return RepositoryStorageAbstractionLayerUtil.getVerifyDataCommand(datasetVersion.getDataset()); } diff --git a/src/main/java/edu/harvard/iq/dataverse/repositorystorageabstractionlayer/RepositoryStorageAbstractionLayerUtil.java b/src/main/java/edu/harvard/iq/dataverse/repositorystorageabstractionlayer/RepositoryStorageAbstractionLayerUtil.java index 8501fba3ce0..0d547402676 100644 --- a/src/main/java/edu/harvard/iq/dataverse/repositorystorageabstractionlayer/RepositoryStorageAbstractionLayerUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/repositorystorageabstractionlayer/RepositoryStorageAbstractionLayerUtil.java @@ -13,10 +13,12 @@ import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonObject; +@Deprecated(forRemoval = true, since = "2024-07-07") public class RepositoryStorageAbstractionLayerUtil { private static final Logger logger = Logger.getLogger(RepositoryStorageAbstractionLayerUtil.class.getCanonicalName()); + @Deprecated(forRemoval = true, since = "2024-07-07") public static List getRsyncSites(Dataset dataset, JsonArray rsalSitesAsJson) { List rsalSites = new ArrayList<>(); boolean leafDirectoryOnly = false; @@ -30,6 +32,7 @@ public static List getRsyncSites(Dataset dataset, JsonArray rsalSites return rsalSites; } + @Deprecated(forRemoval = true, since = "2024-07-07") static String getLocalDataAccessDirectory(String localDataAccessParentDir, Dataset dataset) { if (localDataAccessParentDir == null) { localDataAccessParentDir = File.separator + "UNCONFIGURED ( " + SettingsServiceBean.Key.LocalDataAccessPath + " )"; @@ -38,6 +41,7 @@ static String getLocalDataAccessDirectory(String localDataAccessParentDir, Datas return localDataAccessParentDir + File.separator + getDirectoryContainingTheData(dataset, leafDirectoryOnly); } + @Deprecated(forRemoval = true, since = "2024-07-07") static String getVerifyDataCommand(Dataset dataset) { boolean leafDirectoryOnly = true; // TODO: if "files.sha" is defined somewhere, use it. @@ -51,6 +55,7 @@ static String getVerifyDataCommand(Dataset dataset) { * leafDirectoryOnly. See also * http://www.gnu.org/software/coreutils/manual/html_node/basename-invocation.html */ + @Deprecated(forRemoval = true, since = "2024-07-07") public static String getDirectoryContainingTheData(Dataset dataset, boolean leafDirectoryOnly) { /** * FIXME: What if there is more than one package in the dataset? @@ -81,6 +86,7 @@ public static String getDirectoryContainingTheData(Dataset dataset, boolean leaf * RSAL or some other "big data" component live for a list of remotes sites * to which a particular dataset is replicated to. */ + @Deprecated(forRemoval = true, since = "2024-07-07") static JsonArray getStorageSitesAsJson(List storageSites) { JsonArrayBuilder arraybuilder = Json.createArrayBuilder(); if (storageSites == null || storageSites.isEmpty()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 35d70498c3f..a1975b0b975 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -55,6 +55,10 @@ public enum Key { CustomDatasetSummaryFields, /** * Defines a public installation -- all datafiles are unrestricted + * + * This was added along with CloudEnvironmentName and ComputeBaseUrl. + * See https://github.com/IQSS/dataverse/issues/3776 and + * https://github.com/IQSS/dataverse/pull/3967 */ PublicInstall, /** @@ -75,9 +79,12 @@ public enum Key { /** * For example, https://datacapture.example.org */ + @Deprecated(forRemoval = true, since = "2024-07-07") DataCaptureModuleUrl, + @Deprecated(forRemoval = true, since = "2024-07-07") RepositoryStorageAbstractionLayerUrl, UploadMethods, + @Deprecated(forRemoval = true, since = "2024-07-07") DownloadMethods, /** * If the data replicated around the world using RSAL (Repository @@ -87,6 +94,7 @@ public enum Key { * TODO: Think about if it makes sense to make this a column in the * StorageSite database table. */ + @Deprecated(forRemoval = true, since = "2024-07-07") LocalDataAccessPath, /** * The algorithm used to generate PIDs, randomString (default) or diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index 3f2f36ea36a..f9801419e47 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -752,6 +752,7 @@ public enum FileUploadMethods { * DCM stands for Data Capture Module. Right now it supports upload over * rsync+ssh but DCM may support additional methods in the future. */ + @Deprecated(forRemoval = true, since = "2024-07-07") RSYNC("dcm/rsync+ssh"), /** * Traditional Dataverse file handling, which tends to involve users @@ -809,6 +810,7 @@ public enum FileDownloadMethods { * RSAL stands for Repository Storage Abstraction Layer. Downloads don't * go through Glassfish. */ + @Deprecated(forRemoval = true, since = "2024-07-07") RSYNC("rsal/rsync"), NATIVE("native/http"), GLOBUS("globus") @@ -862,6 +864,7 @@ public String toString() { */ public enum TransferProtocols { + @Deprecated(forRemoval = true, since = "2024-07-07") RSYNC("rsync"), /** * POSIX includes NFS. This is related to Key.LocalDataAccessPath in @@ -898,7 +901,8 @@ public boolean isPublicInstall(){ boolean saneDefault = false; return settingsService.isTrueForKey(SettingsServiceBean.Key.PublicInstall, saneDefault); } - + + @Deprecated(forRemoval = true, since = "2024-07-07") public boolean isRsyncUpload(){ return getMethodAvailable(SystemConfig.FileUploadMethods.RSYNC.toString(), true); } @@ -915,7 +919,8 @@ public boolean isWebloaderUpload(){ public boolean isHTTPUpload(){ return getMethodAvailable(SystemConfig.FileUploadMethods.NATIVE.toString(), true); } - + + @Deprecated(forRemoval = true, since = "2024-07-07") public boolean isRsyncOnly(){ String downloadMethods = settingsService.getValueForKey(SettingsServiceBean.Key.DownloadMethods); if(downloadMethods == null){ @@ -931,11 +936,12 @@ public boolean isRsyncOnly(){ return Arrays.asList(uploadMethods.toLowerCase().split("\\s*,\\s*")).size() == 1 && uploadMethods.toLowerCase().equals(SystemConfig.FileUploadMethods.RSYNC.toString()); } } - + + @Deprecated(forRemoval = true, since = "2024-07-07") public boolean isRsyncDownload() { return getMethodAvailable(SystemConfig.FileUploadMethods.RSYNC.toString(), false); } - + public boolean isHTTPDownload() { return getMethodAvailable(SystemConfig.FileUploadMethods.NATIVE.toString(), false); } From 853965ec6058ddb988e09ef4a8fcb38c1a151619 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva <142103991+jp-tosca@users.noreply.github.com> Date: Tue, 18 Jun 2024 15:19:44 -0400 Subject: [PATCH 48/85] Banner IT improvements after resolving issues on rocky. (#10565) * Rocky IX * Logging pretty print * Removed use of function with hardcoded valie * Clearing banners * Added codeblock with JSON * Banners size * Trace added * Import fix * Pull test * lOG TEST * Log test * Log test * ASD * Debug commit * Debug commit * Dbug commit * Dbug commit * Bbug commit * Dbug cmmt * dbg cmmt * dbg cmmit * Dbg cmmt * reset bannermessage * Reset Admin * Reset AdminIT * Reset admin IT * Clear console output * Last changes * Last changes for this test * Last update * Locale check * Docs and makefile to build guides * Makefile changes * Value of sphinx version loaded from requirements.txt * Last changes to makefile and docs * Correction to the docs --- .../10565-banner-test-improvements.md | 1 + doc/sphinx-guides/source/api/native-api.rst | 2 + .../source/developers/documentation.rst | 15 ++++- makefile | 14 ++++ .../harvard/iq/dataverse/BannerMessage.java | 2 +- .../dataverse/BannerMessageServiceBean.java | 2 + .../edu/harvard/iq/dataverse/api/Admin.java | 24 +++++-- .../edu/harvard/iq/dataverse/api/AdminIT.java | 66 +++++++++++-------- .../edu/harvard/iq/dataverse/api/UtilIT.java | 14 ---- 9 files changed, 90 insertions(+), 50 deletions(-) create mode 100644 doc/release-notes/10565-banner-test-improvements.md create mode 100644 makefile diff --git a/doc/release-notes/10565-banner-test-improvements.md b/doc/release-notes/10565-banner-test-improvements.md new file mode 100644 index 00000000000..d9030f2a0c3 --- /dev/null +++ b/doc/release-notes/10565-banner-test-improvements.md @@ -0,0 +1 @@ +The endpoint `api/admin/bannerMessage` has been extended so the ID is returned when created \ No newline at end of file diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 04123321e54..894f84d2aac 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -5194,6 +5194,8 @@ Delete the setting under ``name``:: Manage Banner Messages ~~~~~~~~~~~~~~~~~~~~~~ +.. warning:: Adding a banner message with a language that is not supported by the installation will result in a 500-Internal Server Error response when trying to access to the /bannerMessage. + Communications to users can be handled via banner messages that are displayed at the top of all pages within your Dataverse installation. Two types of banners can be configured: - A banner message where dismissibleByUser is set to false will be displayed to anyone viewing the application. These messages will be dismissible for a given session but will be displayed in any subsequent session until they are deleted by the Admin. This type of banner message is useful for situations such as upcoming maintenance windows and other downtime. diff --git a/doc/sphinx-guides/source/developers/documentation.rst b/doc/sphinx-guides/source/developers/documentation.rst index a4b8c027445..5ec74249f4d 100755 --- a/doc/sphinx-guides/source/developers/documentation.rst +++ b/doc/sphinx-guides/source/developers/documentation.rst @@ -92,9 +92,20 @@ Open a terminal, change directories to ``doc/sphinx-guides``, activate (or react Building the Guides with a Sphinx Docker Container ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +**Building with Docker and Makefile:** + +We have added a Makefile to simplify the process of building the guides using a Docker container, you can use some of the following from the repository root: + +- `make docs-html` +- `make docs-pdf` +- `make docs-epub` +- `make docs-all` + +**Building with Docker and CLI:** + If you want to build the guides using a Docker container, execute the following command in the repository root: -``docker run -it --rm -v $(pwd):/docs sphinxdoc/sphinx:3.5.4 bash -c "cd doc/sphinx-guides && pip3 install -r requirements.txt && make html"`` +``docker run -it --rm -v $(pwd):/docs sphinxdoc/sphinx:7.2.6 bash -c "cd doc/sphinx-guides && pip3 install -r requirements.txt && make html"`` Previewing the Guides ^^^^^^^^^^^^^^^^^^^^^ @@ -148,7 +159,7 @@ The HTML version of the guides is the official one. Any other formats are mainta If you would like to build a PDF version of the guides and have Docker installed, please try the command below from the root of the git repo: -``docker run -it --rm -v $(pwd):/docs sphinxdoc/sphinx-latexpdf:3.5.4 bash -c "cd doc/sphinx-guides && pip3 install -r requirements.txt && make latexpdf LATEXMKOPTS=\"-interaction=nonstopmode\"; cd ../.. && ls -1 doc/sphinx-guides/build/latex/Dataverse.pdf"`` +``docker run -it --rm -v $(pwd):/docs sphinxdoc/sphinx-latexpdf:7.2.6 bash -c "cd doc/sphinx-guides && pip3 install -r requirements.txt && make latexpdf LATEXMKOPTS=\"-interaction=nonstopmode\"; cd ../.. && ls -1 doc/sphinx-guides/build/latex/Dataverse.pdf"`` A few notes about the command above: diff --git a/makefile b/makefile new file mode 100644 index 00000000000..315ff9c508c --- /dev/null +++ b/makefile @@ -0,0 +1,14 @@ + +SPHINX_VERSION = $(shell grep "Sphinx" ./doc/sphinx-guides/requirements.txt | awk -F'==' '{print $$2}') +docs-html: + docker run -it --rm -v $$(pwd):/docs sphinxdoc/sphinx:$(SPHINX_VERSION) bash -c "cd doc/sphinx-guides && pip3 install -r requirements.txt && make clean && make html" + +docs-pdf: + docker run -it --rm -v $$(pwd):/docs sphinxdoc/sphinx-latexpdf:$(SPHINX_VERSION) bash -c "cd doc/sphinx-guides && pip3 install -r requirements.txt && make clean && make latexpdf LATEXMKOPTS=\"-interaction=nonstopmode\"; cd ../.. && ls -1 doc/sphinx-guides/build/latex/Dataverse.pdf" + +docs-epub: + docker run -it --rm -v $$(pwd):/docs sphinxdoc/sphinx:$(SPHINX_VERSION) bash -c "cd doc/sphinx-guides && pip3 install -r requirements.txt && make clean && make epub" + +docs-all: + docker run -it --rm -v $$(pwd):/docs sphinxdoc/sphinx:$(SPHINX_VERSION) bash -c "cd doc/sphinx-guides && pip3 install -r requirements.txt && make clean && make html && make epub" + docker run -it --rm -v $$(pwd):/docs sphinxdoc/sphinx-latexpdf:$(SPHINX_VERSION) bash -c "cd doc/sphinx-guides && pip3 install -r requirements.txt && make latexpdf LATEXMKOPTS=\"-interaction=nonstopmode\"; cd ../.. && ls -1 doc/sphinx-guides/build/latex/Dataverse.pdf" \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/BannerMessage.java b/src/main/java/edu/harvard/iq/dataverse/BannerMessage.java index 214e26965fa..003d1057972 100644 --- a/src/main/java/edu/harvard/iq/dataverse/BannerMessage.java +++ b/src/main/java/edu/harvard/iq/dataverse/BannerMessage.java @@ -46,7 +46,7 @@ public void setBannerMessageTexts(Collection bannerMessageTex public String getDisplayValue(){ - String retVal = ""; + String retVal = null; for (BannerMessageText msgTxt : this.getBannerMessageTexts()) { if (msgTxt.getLang().equals(BundleUtil.getCurrentLocale().getLanguage())) { retVal = msgTxt.getMessage(); diff --git a/src/main/java/edu/harvard/iq/dataverse/BannerMessageServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/BannerMessageServiceBean.java index 0e757998d58..3961bd064db 100644 --- a/src/main/java/edu/harvard/iq/dataverse/BannerMessageServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/BannerMessageServiceBean.java @@ -46,8 +46,10 @@ public List findAllBannerMessages() { public void save( BannerMessage message ) { em.persist(message); + em.flush(); } + public void deleteBannerMessage(Object pk) { BannerMessage message = em.find(BannerMessage.class, pk); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 154fa2350bd..46bffb4a7f6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -2357,8 +2357,13 @@ public Response addBannerMessage(JsonObject jsonObject) throws WrappedResponse { messageText.setBannerMessage(toAdd); toAdd.getBannerMessageTexts().add(messageText); } - bannerMessageService.save(toAdd); - return ok("Banner Message added successfully."); + bannerMessageService.save(toAdd); + + JsonObjectBuilder jsonObjectBuilder = Json.createObjectBuilder() + .add("message", "Banner Message added successfully.") + .add("id", toAdd.getId()); + + return ok(jsonObjectBuilder); } catch (Exception e) { logger.warning("Unexpected Exception: " + e.getMessage()); @@ -2398,10 +2403,19 @@ public Response deactivateBannerMessage(@PathParam("id") Long id) throws Wrapped @Path("/bannerMessage") public Response getBannerMessages(@PathParam("id") Long id) throws WrappedResponse { - return ok(bannerMessageService.findAllBannerMessages().stream() - .map(m -> jsonObjectBuilder().add("id", m.getId()).add("displayValue", m.getDisplayValue())) - .collect(toJsonArray())); + List messagesList = bannerMessageService.findAllBannerMessages(); + + for (BannerMessage message : messagesList) { + if ("".equals(message.getDisplayValue())) { + return error(Response.Status.INTERNAL_SERVER_ERROR, "No banner messages found for this locale."); + } + } + JsonArrayBuilder messages = messagesList.stream() + .map(m -> jsonObjectBuilder().add("id", m.getId()).add("displayValue", m.getDisplayValue())) + .collect(toJsonArray()); + + return ok(messages); } @POST diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java index 44f062e8254..6d7dd2eae29 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java @@ -15,22 +15,21 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.List; -import static jakarta.ws.rs.core.Response.Status.FORBIDDEN; -import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; + + + import java.util.Map; import java.util.UUID; +import java.util.logging.Level; import java.util.logging.Logger; -import static jakarta.ws.rs.core.Response.Status.CREATED; -import static jakarta.ws.rs.core.Response.Status.INTERNAL_SERVER_ERROR; -import static jakarta.ws.rs.core.Response.Status.OK; -import static jakarta.ws.rs.core.Response.Status.UNAUTHORIZED; +import static jakarta.ws.rs.core.Response.Status.*; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.notNullValue; @@ -832,36 +831,47 @@ public void testClearThumbnailFailureFlag(){ @Test public void testBannerMessages(){ - - String pathToJsonFile = "scripts/api/data/bannerMessageError.json"; - Response addBannerMessageErrorResponse = UtilIT.addBannerMessage(pathToJsonFile); + + //We check for existing banner messages and get the number of existing messages + Response getBannerMessageResponse = UtilIT.getBannerMessages(); + getBannerMessageResponse.prettyPrint(); + getBannerMessageResponse.then().assertThat() + .statusCode(OK.getStatusCode()); + Integer numBannerMessages = + JsonPath.from(getBannerMessageResponse.getBody().asString()).getInt("data.size()"); + + //We add a banner message with an error in the json file + String pathToJsonFile = "scripts/api/data/bannerMessageError.json"; + Response addBannerMessageErrorResponse = UtilIT.addBannerMessage(pathToJsonFile); addBannerMessageErrorResponse.prettyPrint(); - String body = addBannerMessageErrorResponse.getBody().asString(); - String status = JsonPath.from(body).getString("status"); - assertEquals("ERROR", status); + addBannerMessageErrorResponse.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()) + .body("status", equalTo("ERROR")); + //We add a banner message with a correct json file pathToJsonFile = "scripts/api/data/bannerMessageTest.json"; - Response addBannerMessageResponse = UtilIT.addBannerMessage(pathToJsonFile); addBannerMessageResponse.prettyPrint(); - body = addBannerMessageResponse.getBody().asString(); - status = JsonPath.from(body).getString("status"); - assertEquals("OK", status); + addBannerMessageResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("status", equalTo("OK")) + .body("data.message", equalTo("Banner Message added successfully.")); + Long addedBanner = Long.valueOf( + JsonPath.from(addBannerMessageResponse.getBody().asString()).getLong("data.id")); - Response getBannerMessageResponse = UtilIT.getBannerMessages(); + //We get the banner messages and check that the number of messages has increased by 1 + getBannerMessageResponse = UtilIT.getBannerMessages(); getBannerMessageResponse.prettyPrint(); - body = getBannerMessageResponse.getBody().asString(); - status = JsonPath.from(body).getString("status"); - assertEquals("OK", status); - String deleteId = UtilIT.getBannerMessageIdFromResponse(getBannerMessageResponse.getBody().asString()); - - System.out.print("delete id: " + deleteId); - - Response deleteBannerMessageResponse = UtilIT.deleteBannerMessage(new Long (deleteId)); + getBannerMessageResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.size()", equalTo(numBannerMessages + 1)); + + //We delete the banner message + Response deleteBannerMessageResponse = UtilIT.deleteBannerMessage(addedBanner); deleteBannerMessageResponse.prettyPrint(); - body = deleteBannerMessageResponse.getBody().asString(); - status = JsonPath.from(body).getString("status"); - assertEquals("OK", status); + deleteBannerMessageResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("status", equalTo("OK")); } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index c107ea97b51..b9ae97649a9 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -3387,20 +3387,6 @@ static Response deleteBannerMessage(Long id) { return deleteBannerMessageResponse; } - static String getBannerMessageIdFromResponse(String getBannerMessagesResponse) { - StringReader rdr = new StringReader(getBannerMessagesResponse); - JsonObject json = Json.createReader(rdr).readObject(); - - for (JsonObject obj : json.getJsonArray("data").getValuesAs(JsonObject.class)) { - String message = obj.getString("displayValue"); - if (message.equals("Banner Message For Deletion")) { - return obj.getJsonNumber("id").toString(); - } - } - - return "0"; - } - static Response getDatasetJsonLDMetadata(Integer datasetId, String apiToken) { Response response = given() .header(API_TOKEN_HTTP_HEADER, apiToken) From c141941b0b1363176f994b224e27b70f0d735109 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva <142103991+jp-tosca@users.noreply.github.com> Date: Thu, 20 Jun 2024 10:33:00 -0400 Subject: [PATCH 49/85] Null banner JSON message fix (#10640) * Null banner JSON message fix * message change --- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 46bffb4a7f6..d60884bad2f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -2337,6 +2337,7 @@ public Response addBannerMessage(JsonObject jsonObject) throws WrappedResponse { BannerMessage toAdd = new BannerMessage(); try { + String dismissible = jsonObject.getString("dismissibleByUser"); boolean dismissibleByUser = false; @@ -2367,7 +2368,7 @@ public Response addBannerMessage(JsonObject jsonObject) throws WrappedResponse { } catch (Exception e) { logger.warning("Unexpected Exception: " + e.getMessage()); - return error(Status.BAD_REQUEST, "Add Banner Message unexpected exception: " + e.getMessage()); + return error(Status.BAD_REQUEST, "Add Banner Message unexpected exception: invalid or missing JSON object."); } } From 83a904a5604af2699f27b43cfbf5d0c152272c31 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 20 Jun 2024 12:54:18 -0400 Subject: [PATCH 50/85] renamed the "anonymous"/"no jsession" setting, per review #10570 --- doc/sphinx-guides/source/installation/config.rst | 4 ++-- src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 2 +- .../harvard/iq/dataverse/search/SearchIncludeFragment.java | 2 +- .../harvard/iq/dataverse/settings/SettingsServiceBean.java | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index cb8161a6f69..009e964f27e 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -3871,8 +3871,8 @@ To enable the setting:: Similar to the above, but will disable the facets for Guest (unauthenticated) users only. -:DisableSolrFacetsForAnonymousUsers -+++++++++++++++++++++++++++++++++++ +:DisableSolrFacetsWithoutJsession ++++++++++++++++++++++++++++++++++ Same idea as with the 2 settings above. For the purposes of this setting, a request is considered "anonymous", if it came in without the JSESSION cookie supplied. A UI user who is browsing the holdings without logging in will have a valid JSESSION cookie, tied to a guest session. The main purpose of this setting is to hide the facets from bots, scripted crawlers and such (most of which - though not all - do not use cookies). Not letting the bots anywhere near the facets can serve a dual purpose on a busy instance experiencing problems with such abuse - some CPU cycles and resources can be saved by not having to generate the facets. And, even more importantly, it can prevent bots from attempting to crawl the facet trees, which has a potential for multiplying the service load. diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index f160b2409ee..eae4a9f2977 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -800,7 +800,7 @@ public boolean isIndexedVersion() { // a guest user who came without the session cookie: Map cookies = FacesContext.getCurrentInstance().getExternalContext().getRequestCookieMap(); if (!(cookies != null && cookies.containsKey("JSESSIONID"))) { - if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableSolrFacetsForAnonymousUsers, false)) { + if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableSolrFacetsWithoutJsession, false)) { return isIndexedVersion = false; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 542c392eb94..4f3f6e46e48 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -1113,7 +1113,7 @@ public boolean isFacetsDisabled() { // a guest user who came without the session cookie: Map cookies = FacesContext.getCurrentInstance().getExternalContext().getRequestCookieMap(); if (!(cookies != null && cookies.containsKey("JSESSIONID"))) { - if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableSolrFacetsForAnonymousUsers, false)) { + if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableSolrFacetsWithoutJsession, false)) { return this.solrFacetsDisabled = true; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 094268f80b7..8ed96690e84 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -672,7 +672,7 @@ Whether Harvesting (OAI) service is enabled */ DisableSolrFacets, DisableSolrFacetsForGuestUsers, - DisableSolrFacetsForAnonymousUsers, + DisableSolrFacetsWithoutJsession, DisableUncheckedTypesFacet, /** * When ingesting tabular data files, store the generated tab-delimited From a1ffe0b08bf691d6d47d3d97d723f8f3c1718ffd Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 20 Jun 2024 13:25:32 -0400 Subject: [PATCH 51/85] missing : in a setting hyperlink #10570 --- doc/sphinx-guides/source/installation/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 009e964f27e..c36a8026a6f 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -3876,7 +3876,7 @@ Similar to the above, but will disable the facets for Guest (unauthenticated) us Same idea as with the 2 settings above. For the purposes of this setting, a request is considered "anonymous", if it came in without the JSESSION cookie supplied. A UI user who is browsing the holdings without logging in will have a valid JSESSION cookie, tied to a guest session. The main purpose of this setting is to hide the facets from bots, scripted crawlers and such (most of which - though not all - do not use cookies). Not letting the bots anywhere near the facets can serve a dual purpose on a busy instance experiencing problems with such abuse - some CPU cycles and resources can be saved by not having to generate the facets. And, even more importantly, it can prevent bots from attempting to crawl the facet trees, which has a potential for multiplying the service load. -.. _:DisableUncheckedTypesFacet +.. _:DisableUncheckedTypesFacet: :DisableUncheckedTypesFacet +++++++++++++++++++++++++++ From 0f0c3b67975262bc00c4b31990d8cfe11bf8a95f Mon Sep 17 00:00:00 2001 From: landreev Date: Thu, 20 Jun 2024 17:36:50 -0400 Subject: [PATCH 52/85] Upgrade Solr to 9.4.1 (#10636) * As discussed in sec.92, getting rid of the conf/solr/9.3.0 directory, in favor of keeping the config files in conf/solr/ going forward. * Removed the hard-coded solr version number from the installer makefile * removed the hard-coded 9.3.0 from the metadata customization guide. * changed to reflect 9.4.1 as the currently required version * updated the "classic" dev. install instruction, to reflect the new solr version * a release note. * #92sec move solr conf files into versioned subdirectory * Revert "#92sec move solr conf files into versioned subdirectory" This reverts commit eaa9c7c997ffe1eda48721a08a7edadd7b4affee. * getting rid of 9.3.0 in this shellspec script #10636 * updated solr.version in the parent pom file (it is used in making docker images, apparently). #10636 * removed the remaining instances of the old solr version here and there #10636 * Removed the solr insallation instructions from the "classic dev. install" guide, replaced them with "follow the instructions provided in the main Installation/prerequisites gujide". No need to duplicate these in both places. #10636. * checking in @stevenwinship's workaround for the failing shellcheck test. #10636 --------- Co-authored-by: Don Sizemore --- conf/solr/{9.3.0 => }/schema.xml | 0 conf/solr/{9.3.0 => }/solrconfig.xml | 0 conf/solr/{9.3.0 => }/update-fields.sh | 0 doc/release-notes/solr-9.4.1.md | 14 ++++++++ .../installation/files/etc/init.d/solr | 2 +- .../files/etc/systemd/solr.service | 6 ++-- .../source/admin/metadatacustomization.rst | 4 +-- .../source/developers/classic-dev-env.rst | 36 ++----------------- .../source/installation/prerequisites.rst | 16 ++++----- docker/compose/demo/compose.yml | 2 +- modules/container-configbaker/assembly.xml | 2 +- modules/dataverse-parent/pom.xml | 2 +- scripts/installer/Makefile | 8 ++--- tests/shell/spec/update_fields_spec.sh | 27 +++++++------- 14 files changed, 51 insertions(+), 68 deletions(-) rename conf/solr/{9.3.0 => }/schema.xml (100%) rename conf/solr/{9.3.0 => }/solrconfig.xml (100%) rename conf/solr/{9.3.0 => }/update-fields.sh (100%) create mode 100644 doc/release-notes/solr-9.4.1.md diff --git a/conf/solr/9.3.0/schema.xml b/conf/solr/schema.xml similarity index 100% rename from conf/solr/9.3.0/schema.xml rename to conf/solr/schema.xml diff --git a/conf/solr/9.3.0/solrconfig.xml b/conf/solr/solrconfig.xml similarity index 100% rename from conf/solr/9.3.0/solrconfig.xml rename to conf/solr/solrconfig.xml diff --git a/conf/solr/9.3.0/update-fields.sh b/conf/solr/update-fields.sh similarity index 100% rename from conf/solr/9.3.0/update-fields.sh rename to conf/solr/update-fields.sh diff --git a/doc/release-notes/solr-9.4.1.md b/doc/release-notes/solr-9.4.1.md new file mode 100644 index 00000000000..13624a272ab --- /dev/null +++ b/doc/release-notes/solr-9.4.1.md @@ -0,0 +1,14 @@ +Solr 9.4.1 is now the version recommended in our installation guides and used with automated testing. There is a known security issue in the previously recommended version 9.3.0: https://nvd.nist.gov/vuln/detail/CVE-2023-36478. While the risk of an exploit should not be significant unless the Solr instance is accessible from the outside networks (which we have always recommended against), existing Dataverse installations should consider upgrading. + +For the upgrade instructions section: + +[note that 6.3 will contain other solr-related changes, so the instructions may need to contain information merged from multiple release notes!] + +If you are upgrading Solr: + - Install solr-9.4.1 following the instructions from the Installation guide. + - Run a full reindex to populate the search catalog. + - Note that it may be possible to skip the reindexing step by simply moving the existing `.../server/solr/collection1/` under the new `solr-9.4.1` installation directory. This however has not been thoroughly tested and is not officially supported. + + + + diff --git a/doc/sphinx-guides/source/_static/installation/files/etc/init.d/solr b/doc/sphinx-guides/source/_static/installation/files/etc/init.d/solr index f7dba504e70..14df734cca7 100755 --- a/doc/sphinx-guides/source/_static/installation/files/etc/init.d/solr +++ b/doc/sphinx-guides/source/_static/installation/files/etc/init.d/solr @@ -5,7 +5,7 @@ # chkconfig: 35 92 08 # description: Starts and stops Apache Solr -SOLR_DIR="/usr/local/solr/solr-9.3.0" +SOLR_DIR="/usr/local/solr/solr-9.4.1" SOLR_COMMAND="bin/solr" SOLR_ARGS="-m 1g" SOLR_USER=solr diff --git a/doc/sphinx-guides/source/_static/installation/files/etc/systemd/solr.service b/doc/sphinx-guides/source/_static/installation/files/etc/systemd/solr.service index 2ceeb0016d6..8ccf7652a49 100644 --- a/doc/sphinx-guides/source/_static/installation/files/etc/systemd/solr.service +++ b/doc/sphinx-guides/source/_static/installation/files/etc/systemd/solr.service @@ -5,9 +5,9 @@ After = syslog.target network.target remote-fs.target nss-lookup.target [Service] User = solr Type = forking -WorkingDirectory = /usr/local/solr/solr-9.3.0 -ExecStart = /usr/local/solr/solr-9.3.0/bin/solr start -m 1g -ExecStop = /usr/local/solr/solr-9.3.0/bin/solr stop +WorkingDirectory = /usr/local/solr/solr-9.4.1 +ExecStart = /usr/local/solr/solr-9.4.1/bin/solr start -m 1g +ExecStop = /usr/local/solr/solr-9.4.1/bin/solr stop LimitNOFILE=65000 LimitNPROC=65000 Restart=on-failure diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst index e70cf0e0897..8752f11c1e5 100644 --- a/doc/sphinx-guides/source/admin/metadatacustomization.rst +++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst @@ -513,7 +513,7 @@ the Solr schema configuration, including any enabled metadata schemas: ``curl "http://localhost:8080/api/admin/index/solr/schema"`` -You can use :download:`update-fields.sh <../../../../conf/solr/9.3.0/update-fields.sh>` to easily add these to the +You can use :download:`update-fields.sh <../../../../conf/solr/update-fields.sh>` to easily add these to the Solr schema you installed for your Dataverse installation. The script needs a target XML file containing your Solr schema. (See the :doc:`/installation/prerequisites/` section of @@ -537,7 +537,7 @@ from some place else than your Dataverse installation). Please note that reconfigurations of your Solr index might require a re-index. Usually release notes indicate a necessary re-index, but for your custom metadata you will need to keep track on your own. -Please note also that if you are going to make a pull request updating ``conf/solr/9.3.0/schema.xml`` with fields you have +Please note also that if you are going to make a pull request updating ``conf/solr/schema.xml`` with fields you have added, you should first load all the custom metadata blocks in ``scripts/api/data/metadatablocks`` (including ones you don't care about) to create a complete list of fields. (This might change in the future.) diff --git a/doc/sphinx-guides/source/developers/classic-dev-env.rst b/doc/sphinx-guides/source/developers/classic-dev-env.rst index 82e10b727ef..0ebb9431383 100755 --- a/doc/sphinx-guides/source/developers/classic-dev-env.rst +++ b/doc/sphinx-guides/source/developers/classic-dev-env.rst @@ -136,41 +136,9 @@ On Linux, you should just install PostgreSQL using your favorite package manager Install Solr ^^^^^^^^^^^^ -`Solr `_ 9.3.0 is required. +`Solr `_ 9.4.1 is required. -To install Solr, execute the following commands: - -``sudo mkdir /usr/local/solr`` - -``sudo chown $USER /usr/local/solr`` - -``cd /usr/local/solr`` - -``curl -O https://archive.apache.org/dist/solr/solr/9.3.0/solr-9.3.0.tgz`` - -``tar xvfz solr-9.3.0.tgz`` - -``cd solr-9.3.0/server/solr`` - -``cp -r configsets/_default collection1`` - -``curl -O https://raw.githubusercontent.com/IQSS/dataverse/develop/conf/solr/9.3.0/schema.xml`` - -``curl -O https://raw.githubusercontent.com/IQSS/dataverse/develop/conf/solr/9.3.0/schema_dv_mdb_fields.xml`` - -``mv schema*.xml collection1/conf`` - -``curl -O https://raw.githubusercontent.com/IQSS/dataverse/develop/conf/solr/9.3.0/solrconfig.xml`` - -``mv solrconfig.xml collection1/conf/solrconfig.xml`` - -``cd /usr/local/solr/solr-9.3.0`` - -(Please note that the extra jetty argument below is a security measure to limit connections to Solr to only your computer. For extra security, run a firewall.) - -``bin/solr start -j "-Djetty.host=127.0.0.1"`` - -``bin/solr create_core -c collection1 -d server/solr/collection1/conf`` +Follow the instructions in the "Installing Solr" section of :doc:`/installation/prerequisites` in the main Installation guide. Install Service Dependencies Using Docker Compose ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/installation/prerequisites.rst b/doc/sphinx-guides/source/installation/prerequisites.rst index 7f6e34789b8..571223e0aa6 100644 --- a/doc/sphinx-guides/source/installation/prerequisites.rst +++ b/doc/sphinx-guides/source/installation/prerequisites.rst @@ -163,7 +163,7 @@ The Dataverse software search index is powered by Solr. Supported Versions ================== -The Dataverse software has been tested with Solr version 9.3.0. Future releases in the 9.x series are likely to be compatible. Please get in touch (:ref:`support`) if you are having trouble with a newer version. +The Dataverse software has been tested with Solr version 9.4.1. Future releases in the 9.x series are likely to be compatible. Please get in touch (:ref:`support`) if you are having trouble with a newer version. Installing Solr =============== @@ -178,19 +178,19 @@ Become the ``solr`` user and then download and configure Solr:: su - solr cd /usr/local/solr - wget https://archive.apache.org/dist/solr/solr/9.3.0/solr-9.3.0.tgz - tar xvzf solr-9.3.0.tgz - cd solr-9.3.0 + wget https://archive.apache.org/dist/solr/solr/9.4.1/solr-9.4.1.tgz + tar xvzf solr-9.4.1.tgz + cd solr-9.4.1 cp -r server/solr/configsets/_default server/solr/collection1 You should already have a "dvinstall.zip" file that you downloaded from https://github.com/IQSS/dataverse/releases . Unzip it into ``/tmp``. Then copy the files into place:: - cp /tmp/dvinstall/schema*.xml /usr/local/solr/solr-9.3.0/server/solr/collection1/conf - cp /tmp/dvinstall/solrconfig.xml /usr/local/solr/solr-9.3.0/server/solr/collection1/conf + cp /tmp/dvinstall/schema*.xml /usr/local/solr/solr-9.4.1/server/solr/collection1/conf + cp /tmp/dvinstall/solrconfig.xml /usr/local/solr/solr-9.4.1/server/solr/collection1/conf Note: The Dataverse Project team has customized Solr to boost results that come from certain indexed elements inside the Dataverse installation, for example prioritizing results from Dataverse collections over Datasets. If you would like to remove this, edit your ``solrconfig.xml`` and remove the ```` element and its contents. If you have ideas about how this boosting could be improved, feel free to contact us through our Google Group https://groups.google.com/forum/#!forum/dataverse-dev . -A Dataverse installation requires a change to the ``jetty.xml`` file that ships with Solr. Edit ``/usr/local/solr/solr-9.3.0/server/etc/jetty.xml`` , increasing ``requestHeaderSize`` from ``8192`` to ``102400`` +A Dataverse installation requires a change to the ``jetty.xml`` file that ships with Solr. Edit ``/usr/local/solr/solr-9.4.1/server/etc/jetty.xml`` , increasing ``requestHeaderSize`` from ``8192`` to ``102400`` Solr will warn about needing to increase the number of file descriptors and max processes in a production environment but will still run with defaults. We have increased these values to the recommended levels by adding ulimit -n 65000 to the init script, and the following to ``/etc/security/limits.conf``:: @@ -209,7 +209,7 @@ Solr launches asynchronously and attempts to use the ``lsof`` binary to watch fo Finally, you need to tell Solr to create the core "collection1" on startup:: - echo "name=collection1" > /usr/local/solr/solr-9.3.0/server/solr/collection1/core.properties + echo "name=collection1" > /usr/local/solr/solr-9.4.1/server/solr/collection1/core.properties Dataverse collection ("dataverse") page uses Solr very heavily. On a busy instance this may cause the search engine to become the performance bottleneck, making these pages take increasingly longer to load, potentially affecting the overall performance of the application and/or causing Solr itself to crash. If this is observed on your instance, we recommend uncommenting the following lines in the ```` section of the ``solrconfig.xml`` file:: diff --git a/docker/compose/demo/compose.yml b/docker/compose/demo/compose.yml index 6c2bdcf79a4..e6ffc9f392a 100644 --- a/docker/compose/demo/compose.yml +++ b/docker/compose/demo/compose.yml @@ -103,7 +103,7 @@ services: solr: container_name: "solr" hostname: "solr" - image: solr:9.3.0 + image: solr:9.4.1 depends_on: - solr_initializer restart: on-failure diff --git a/modules/container-configbaker/assembly.xml b/modules/container-configbaker/assembly.xml index 3285eef510a..3c55e6d64b6 100644 --- a/modules/container-configbaker/assembly.xml +++ b/modules/container-configbaker/assembly.xml @@ -8,7 +8,7 @@ - conf/solr/9.3.0 + conf/solr solr diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 612902b47a4..02f601ba0f6 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -150,7 +150,7 @@ 6.2023.8 42.7.2 - 9.3.0 + 9.4.1 1.12.290 26.30.0 diff --git a/scripts/installer/Makefile b/scripts/installer/Makefile index 399bc65168a..8ea95534986 100644 --- a/scripts/installer/Makefile +++ b/scripts/installer/Makefile @@ -55,13 +55,13 @@ ${JHOVE_SCHEMA}: ../../conf/jhove/jhoveConfig.xsd ${INSTALLER_ZIP_DIR} @echo copying jhove schema file /bin/cp ../../conf/jhove/jhoveConfig.xsd ${INSTALLER_ZIP_DIR} -${SOLR_SCHEMA}: ../../conf/solr/9.3.0/schema.xml ../../conf/solr/9.3.0/update-fields.sh ${INSTALLER_ZIP_DIR} +${SOLR_SCHEMA}: ../../conf/solr/schema.xml ../../conf/solr/update-fields.sh ${INSTALLER_ZIP_DIR} @echo copying Solr schema file - /bin/cp ../../conf/solr/9.3.0/schema.xml ../../conf/solr/9.3.0/update-fields.sh ${INSTALLER_ZIP_DIR} + /bin/cp ../../conf/solr/schema.xml ../../conf/solr/update-fields.sh ${INSTALLER_ZIP_DIR} -${SOLR_CONFIG}: ../../conf/solr/9.3.0/solrconfig.xml ${INSTALLER_ZIP_DIR} +${SOLR_CONFIG}: ../../conf/solr/solrconfig.xml ${INSTALLER_ZIP_DIR} @echo copying Solr config file - /bin/cp ../../conf/solr/9.3.0/solrconfig.xml ${INSTALLER_ZIP_DIR} + /bin/cp ../../conf/solr/solrconfig.xml ${INSTALLER_ZIP_DIR} ${PYTHON_FILES}: README_python.txt install.py installConfig.py installAppServer.py installUtils.py requirements.txt default.config interactive.config ${INSTALLER_ZIP_DIR} @echo copying Python installer files diff --git a/tests/shell/spec/update_fields_spec.sh b/tests/shell/spec/update_fields_spec.sh index 48054a121b7..fa61743bf6f 100644 --- a/tests/shell/spec/update_fields_spec.sh +++ b/tests/shell/spec/update_fields_spec.sh @@ -1,16 +1,17 @@ #shellcheck shell=sh +#shellcheck disable=SC2154 update_fields() { - ../../conf/solr/9.3.0/update-fields.sh "$@" + ../../conf/solr/update-fields.sh "$@" } Describe "Update fields command" Describe "can operate on upstream data" - copyUpstreamSchema() { cp ../../conf/solr/9.3.0/schema.xml data/solr/upstream-schema.xml; } + copyUpstreamSchema() { cp ../../conf/solr/schema.xml data/solr/upstream-schema.xml; } AfterAll 'copyUpstreamSchema' - Path schema-xml="../../conf/solr/9.3.0/schema.xml" + Path schema-xml="../../conf/solr/schema.xml" It "needs upstream schema.xml" The path schema-xml should be exist End @@ -115,16 +116,16 @@ Describe "Update fields command" End Describe "succeeds because" - setup() { cp data/solr/minimal-schema.xml data/solr/minimal-schema-work.xml; } - cleanup() { rm data/solr/minimal-schema-work.xml; } - BeforeEach 'setup' - AfterEach 'cleanup' + setup1() { cp data/solr/minimal-schema.xml data/solr/minimal-schema-work.xml; } + cleanup1() { rm data/solr/minimal-schema-work.xml; } + BeforeEach 'setup1' + AfterEach 'cleanup1' deleteUpstreamSchema() { rm data/solr/upstream-schema.xml; } AfterAll 'deleteUpstreamSchema' - match_content() { - grep -q "$@" "${match_content}" + match_content1() { + grep -q "$@" "${match_content1}" } It "prints nothing when editing minimal schema" @@ -133,8 +134,8 @@ Describe "Update fields command" The status should equal 0 The output should equal "" The path data/solr/minimal-schema-work.xml should be file - The path data/solr/minimal-schema-work.xml should satisfy match_content " Date: Fri, 21 Jun 2024 08:31:29 +0200 Subject: [PATCH 53/85] build,doc: update with Payara 6.2024.6 release --- doc/release-notes/10494-payara-upgrade.md | 4 ++-- doc/sphinx-guides/source/developers/classic-dev-env.rst | 6 +++--- doc/sphinx-guides/source/installation/prerequisites.rst | 6 +++--- doc/sphinx-guides/source/qa/test-automation.md | 2 +- modules/dataverse-parent/pom.xml | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/doc/release-notes/10494-payara-upgrade.md b/doc/release-notes/10494-payara-upgrade.md index 050faee1f3e..1e8f9aef7ba 100644 --- a/doc/release-notes/10494-payara-upgrade.md +++ b/doc/release-notes/10494-payara-upgrade.md @@ -1,6 +1,6 @@ -# Upgrade Payara to v6.2024.4 +# Upgrade Payara to v6.2024.6 -With this version of Dataverse, we encourage you to upgrade to version 6.2024.4. +With this version of Dataverse, we encourage you to upgrade to version 6.2024.6. This will address security issues accumulated since the release of 6.2023.8, which was required since Dataverse release 6.0. If you are using GDCC containers, this upgrade is included when pulling new release images. diff --git a/doc/sphinx-guides/source/developers/classic-dev-env.rst b/doc/sphinx-guides/source/developers/classic-dev-env.rst index a193c1f1b40..d305019004e 100755 --- a/doc/sphinx-guides/source/developers/classic-dev-env.rst +++ b/doc/sphinx-guides/source/developers/classic-dev-env.rst @@ -93,15 +93,15 @@ On Linux, install ``jq`` from your package manager or download a binary from htt Install Payara ~~~~~~~~~~~~~~ -Payara 6.2024.4 or higher is required. +Payara 6.2024.6 or higher is required. To install Payara, run the following commands: ``cd /usr/local`` -``sudo curl -O -L https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/6.2024.4/payara-6.2024.4.zip`` +``sudo curl -O -L https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/6.2024.6/payara-6.2024.6.zip`` -``sudo unzip payara-6.2024.4.zip`` +``sudo unzip payara-6.2024.6.zip`` ``sudo chown -R $USER /usr/local/payara6`` diff --git a/doc/sphinx-guides/source/installation/prerequisites.rst b/doc/sphinx-guides/source/installation/prerequisites.rst index eb3db50fdd6..151d44e6841 100644 --- a/doc/sphinx-guides/source/installation/prerequisites.rst +++ b/doc/sphinx-guides/source/installation/prerequisites.rst @@ -44,7 +44,7 @@ On RHEL/derivative you can make Java 17 the default with the ``alternatives`` co Payara ------ -Payara 6.2024.4 is recommended. Newer versions might work fine. Regular updates are recommended. +Payara 6.2024.6 is recommended. Newer versions might work fine. Regular updates are recommended. Installing Payara ================= @@ -55,8 +55,8 @@ Installing Payara - Download and install Payara (installed in ``/usr/local/payara6`` in the example commands below):: - # wget https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/6.2024.4/payara-6.2024.4.zip - # unzip payara-6.2024.4.zip + # wget https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/6.2024.6/payara-6.2024.6.zip + # unzip payara-6.2024.6.zip # mv payara6 /usr/local If nexus.payara.fish is ever down for maintenance, Payara distributions are also available from https://repo1.maven.org/maven2/fish/payara/distributions/payara/ diff --git a/doc/sphinx-guides/source/qa/test-automation.md b/doc/sphinx-guides/source/qa/test-automation.md index d2de33b48a5..fe0d51f9174 100644 --- a/doc/sphinx-guides/source/qa/test-automation.md +++ b/doc/sphinx-guides/source/qa/test-automation.md @@ -52,7 +52,7 @@ Go to the end of the log and then scroll up, looking for the failure. A failed A ``` TASK [dataverse : download payara zip] ***************************************** -fatal: [localhost]: FAILED! => {"changed": false, "dest": "/tmp/payara.zip", "elapsed": 10, "msg": "Request failed: ", "url": "https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/6.2024.4/payara-6.2024.4.zip"} +fatal: [localhost]: FAILED! => {"changed": false, "dest": "/tmp/payara.zip", "elapsed": 10, "msg": "Request failed: ", "url": "https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/6.2024.6/payara-6.2024.6.zip"} ``` In the example above, if Payara can't be downloaded, we're obviously going to have problems deploying Dataverse to it! diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index c7166e69813..e81c087056a 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -148,7 +148,7 @@ -Duser.timezone=${project.timezone} -Dfile.encoding=${project.build.sourceEncoding} -Duser.language=${project.language} -Duser.region=${project.region} - 6.2024.4 + 6.2024.6 42.7.2 9.4.1 1.12.290 From 7dd1f05e8c4b7d5509d7e6aa3f09c9caa1a57e02 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 21 Jun 2024 08:32:51 +0200 Subject: [PATCH 54/85] refactor: remove stale property for Jakarta EE API version As of Dataverse 6.0 we use Jakarta EE 10. The version of these dependencies comes from the Payara BOM and this property has no longer any use. Removing it will avoid future confusion. --- modules/dataverse-parent/pom.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index e81c087056a..f286a44841f 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -155,7 +155,6 @@ 26.30.0 - 8.0.0 1.7.35 2.15.1 1.2 From 10b63fe53f9a1ef214c5196ec7016e62a9f1ec83 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Fri, 21 Jun 2024 09:20:04 +0200 Subject: [PATCH 55/85] doc: add upgrade instructions for Payara 6.2024.6 #10494 As requested by @qqmyers --- doc/release-notes/10494-payara-upgrade.md | 106 ++++++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/doc/release-notes/10494-payara-upgrade.md b/doc/release-notes/10494-payara-upgrade.md index 1e8f9aef7ba..e8ecb6756e0 100644 --- a/doc/release-notes/10494-payara-upgrade.md +++ b/doc/release-notes/10494-payara-upgrade.md @@ -3,4 +3,110 @@ With this version of Dataverse, we encourage you to upgrade to version 6.2024.6. This will address security issues accumulated since the release of 6.2023.8, which was required since Dataverse release 6.0. +## Instructions for Upgrading + If you are using GDCC containers, this upgrade is included when pulling new release images. +No manual intervention is necessary. + +We recommend you ensure you followed all update instructions from the past releases regarding Payara. +(Latest Payara update was for [v6.0](https://github.com/IQSS/dataverse/releases/tag/v6.0)) + +Upgrading requires a maintenance window and downtime. Please plan ahead, create backups of your database, etc. + +The steps below are a simple matter of reusing your existing domain directory with the new distribution. +But we also recommend that you review the Payara upgrade instructions as it could be helpful during any troubleshooting: +[Payara Release Notes](https://docs.payara.fish/community/docs/Release%20Notes/Release%20Notes%206.2024.6.html) +We assume you are already on a Dataverse 6.x installation, using a Payara 6.x release. + +```shell +export PAYARA=/usr/local/payara5 +``` + +(or `setenv PAYARA /usr/local/payara6` if you are using a `csh`-like shell) + +1\. Undeploy the previous version + +```shell + $PAYARA/bin/asadmin list-applications + $PAYARA/bin/asadmin undeploy dataverse<-version> +``` + +2\. Stop Payara + +```shell + service payara stop + rm -rf $PAYARA/glassfish/domains/domain1/generated + rm -rf $PAYARA/glassfish/domains/domain1/osgi-cache + rm -rf $PAYARA/glassfish/domains/domain1/lib/databases +``` + +3\. Move the current Payara directory out of the way + +```shell + mv $PAYARA $PAYARA.MOVED +``` + +4\. Download the new Payara version (6.2024.6), and unzip it in its place + +5\. Replace the brand new payara/glassfish/domains/domain1 with your old, preserved domain1 + +6\. Make sure that you have the following `--add-opens` options in your domain.xml. If not present, add them: + +```diff +--- payara-6.2023.8/glassfish/domains/domain1/config/domain.xml ++++ payara-6.2024.6/glassfish/domains/domain1/config/domain.xml +@@ -212,12 +212,16 @@ + --add-opens=java.naming/javax.naming.spi=ALL-UNNAMED + --add-opens=java.rmi/sun.rmi.transport=ALL-UNNAMED + --add-opens=java.logging/java.util.logging=ALL-UNNAMED ++ --add-opens=java.management/javax.management=ALL-UNNAMED ++ --add-opens=java.management/javax.management.openmbean=ALL-UNNAMED + [17|]--add-exports=java.base/sun.net.www=ALL-UNNAMED + [17|]--add-exports=java.base/sun.security.util=ALL-UNNAMED + [17|]--add-opens=java.base/java.lang.invoke=ALL-UNNAMED + [17|]--add-opens=java.desktop/java.beans=ALL-UNNAMED + [17|]--add-exports=jdk.naming.dns/com.sun.jndi.dns=ALL-UNNAMED + [17|]--add-exports=java.naming/com.sun.jndi.ldap=ALL-UNNAMED ++ [17|]--add-opens=java.base/java.io=ALL-UNNAMED ++ [21|]--add-opens=java.base/jdk.internal.misc=ALL-UNNAMED + -Xmx512m + -XX:NewRatio=2 + -XX:+UnlockDiagnosticVMOptions +@@ -447,12 +451,16 @@ + --add-opens=java.naming/javax.naming.spi=ALL-UNNAMED + --add-opens=java.rmi/sun.rmi.transport=ALL-UNNAMED + --add-opens=java.logging/java.util.logging=ALL-UNNAMED ++ --add-opens=java.management/javax.management=ALL-UNNAMED ++ --add-opens=java.management/javax.management.openmbean=ALL-UNNAMED + [17|]--add-exports=java.base/sun.net.www=ALL-UNNAMED + [17|]--add-exports=java.base/sun.security.util=ALL-UNNAMED + [17|]--add-opens=java.base/java.lang.invoke=ALL-UNNAMED + [17|]--add-opens=java.desktop/java.beans=ALL-UNNAMED + [17|]--add-exports=jdk.naming.dns/com.sun.jndi.dns=ALL-UNNAMED + [17|]--add-exports=java.naming/com.sun.jndi.ldap=ALL-UNNAMED ++ [17|]--add-opens=java.base/java.io=ALL-UNNAMED ++ [21|]--add-opens=java.base/jdk.internal.misc=ALL-UNNAMED + -Xmx512m + -XX:NewRatio=2 + -XX:+UnlockDiagnosticVMOptions +``` +(You can also save this as a patch file and try to apply it.) + + +7\. Start Payara + +```shell + service payara start +``` + +8\. Deploy this version. + +```shell + $PAYARA/bin/asadmin deploy dataverse-6.3.war +``` + +9\. Restart payara + +```shell + service payara stop + service payara start From b405081a62ac42dbcb94d25b2a9b7d2afb055939 Mon Sep 17 00:00:00 2001 From: luddaniel <83018819+luddaniel@users.noreply.github.com> Date: Fri, 21 Jun 2024 16:37:28 +0200 Subject: [PATCH 56/85] CVOC: Fix NPE (#10603) * Fixed NPE * CVOC - Improved logger message and avoid http call on invalid url --- .../iq/dataverse/DatasetFieldServiceBean.java | 44 ++++++++++++++----- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java index 14d1f564ee6..34595728fa7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java @@ -6,6 +6,7 @@ import java.net.URISyntaxException; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; +import java.security.InvalidParameterException; import java.sql.Timestamp; import java.text.MessageFormat; import java.time.Instant; @@ -20,8 +21,6 @@ import jakarta.ejb.EJB; import jakarta.ejb.Stateless; -import jakarta.ejb.TransactionAttribute; -import jakarta.ejb.TransactionAttributeType; import jakarta.inject.Named; import jakarta.json.Json; import jakarta.json.JsonArray; @@ -523,10 +522,19 @@ public void registerExternalTerm(JsonObject cvocEntry, String term, List Date: Fri, 21 Jun 2024 12:15:27 -0400 Subject: [PATCH 57/85] Fix DOI/ID message when deaccession a Dataset (#10610) * Fix DOI/ID message when deaccession a Dataset * Change to pull the DS from the Version * Test added and changed to use asString over toString * Change Global to PID * Refactor deaccessionDataset method to handle both datasetId and persistentId --- .../harvard/iq/dataverse/api/Datasets.java | 5 +++- .../harvard/iq/dataverse/api/DatasetsIT.java | 17 ++++++++++- .../edu/harvard/iq/dataverse/api/FilesIT.java | 1 + .../edu/harvard/iq/dataverse/api/UtilIT.java | 29 ++++++++++++++++--- 4 files changed, 46 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 9e9f89c8140..8daefd02f5c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -4839,7 +4839,10 @@ public Response deaccessionDataset(@Context ContainerRequestContext crc, @PathPa } } execCommand(new DeaccessionDatasetVersionCommand(req, datasetVersion, false)); - return ok("Dataset " + datasetId + " deaccessioned for version " + versionId); + + return ok("Dataset " + + (":persistentId".equals(datasetId) ? datasetVersion.getDataset().getGlobalId().asString() : datasetId) + + " deaccessioned for version " + versionId); } catch (JsonParsingException jpe) { return error(Response.Status.BAD_REQUEST, "Error parsing Json: " + jpe.getMessage()); } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index d2d14b824bd..cb9481d3491 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -3910,6 +3910,7 @@ public void getDatasetVersionCitation() { String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDatasetResponse.prettyPrint(); createDatasetResponse.then().assertThat().statusCode(CREATED.getStatusCode()); int datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); @@ -3928,7 +3929,9 @@ public void getDatasetVersionCitation() { publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); Response deaccessionDatasetResponse = UtilIT.deaccessionDataset(datasetId, DS_VERSION_LATEST_PUBLISHED, "Test deaccession reason.", null, apiToken); - deaccessionDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); + deaccessionDatasetResponse.prettyPrint(); + deaccessionDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()) + .assertThat().body("data.message", containsString(String.valueOf(datasetId))); // includeDeaccessioned false Response getDatasetVersionCitationNotDeaccessioned = UtilIT.getDatasetVersionCitation(datasetId, DS_VERSION_LATEST_PUBLISHED, false, apiToken); @@ -3939,8 +3942,20 @@ public void getDatasetVersionCitation() { getDatasetVersionCitationDeaccessioned.then().assertThat() .statusCode(OK.getStatusCode()) .body("data.message", containsString("DEACCESSIONED VERSION")); + + publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken); + publishDatasetResponse.prettyPrint(); + publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); + + String persistentId = JsonPath.from(createDatasetResponse.body().asString()).getString("data.persistentId"); + + deaccessionDatasetResponse = UtilIT.deaccessionDataset(persistentId, DS_VERSION_LATEST_PUBLISHED, "Test deaccession reason.", null, apiToken); + deaccessionDatasetResponse.prettyPrint(); + deaccessionDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()) + .assertThat().body("data.message", containsString(String.valueOf(persistentId))); } + @Test public void getVersionFiles() throws IOException, InterruptedException { Response createUser = UtilIT.createRandomUser(); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java index 7150b32c2b0..e3c26284d55 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java @@ -1526,6 +1526,7 @@ public void testGetFileInfo() { .body("data.label", equalTo(newFileNameSecondUpdate)); // The following tests cover cases where the dataset version is deaccessioned + Response deaccessionDatasetResponse = UtilIT.deaccessionDataset(datasetId, "3.0", "Test reason", null, superUserApiToken); deaccessionDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index b9ae97649a9..257610dbc32 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -3845,17 +3845,38 @@ static Response getHasBeenDeleted(String dataFileId, String apiToken) { .get("/api/files/" + dataFileId + "/hasBeenDeleted"); } - static Response deaccessionDataset(Integer datasetId, String version, String deaccessionReason, String deaccessionForwardURL, String apiToken) { + static Response deaccessionDataset(int datasetId, String version, String deaccessionReason, String deaccessionForwardURL, String apiToken) { + return deaccessionDataset(String.valueOf(datasetId), version, deaccessionReason, deaccessionForwardURL, apiToken); + } + + static Response deaccessionDataset(String datasetIdOrPersistentId, String versionId, String deaccessionReason, String deaccessionForwardURL, String apiToken) { + + String idInPath = datasetIdOrPersistentId; // Assume it's a number. + String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path. + if (!NumberUtils.isCreatable(datasetIdOrPersistentId)) { + idInPath = ":persistentId"; + optionalQueryParam = "?persistentId=" + datasetIdOrPersistentId; + } + JsonObjectBuilder jsonObjectBuilder = Json.createObjectBuilder(); jsonObjectBuilder.add("deaccessionReason", deaccessionReason); if (deaccessionForwardURL != null) { jsonObjectBuilder.add("deaccessionForwardURL", deaccessionForwardURL); } + String jsonString = jsonObjectBuilder.build().toString(); + StringBuilder query = new StringBuilder() + .append("/api/datasets/") + .append(idInPath) + .append("/versions/") + .append(versionId) + .append("/deaccession") + .append(optionalQueryParam); + return given() - .header(API_TOKEN_HTTP_HEADER, apiToken) - .body(jsonString) - .post("/api/datasets/" + datasetId + "/versions/" + version + "/deaccession"); + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(jsonString) + .post(query.toString()); } static Response getDownloadSize(Integer datasetId, From ee986d59dd04643b8d007c8c39d1e0cc6be21b1e Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 24 Jun 2024 15:58:13 -0400 Subject: [PATCH 58/85] A quick fix for #10611. --- .../harvard/iq/dataverse/search/IndexServiceBean.java | 8 +++++++- .../harvard/iq/dataverse/search/SearchServiceBean.java | 5 ++--- .../edu/harvard/iq/dataverse/settings/FeatureFlags.java | 9 +++++++++ 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 0102459ab9f..d3286d3be4b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -888,8 +888,14 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set Date: Mon, 24 Jun 2024 22:27:58 +0200 Subject: [PATCH 59/85] CVOC : Adding hidden metadata fields (Ontoportal integration) (#10503) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add hidden metadata fields for cvoc JS * adding release note * Accept review suggestion on documentation * CVOC: fix typo to have "data-cvoc-managedfields" on dataset page visualization * Add cvoc index to identity bloc metadata --------- Co-authored-by: Jérôme ROUCOU --- .../10503-cvoc-hidden-html-fields.md | 11 ++++++++ src/main/webapp/metadataFragment.xhtml | 25 ++++++++++++++----- 2 files changed, 30 insertions(+), 6 deletions(-) create mode 100644 doc/release-notes/10503-cvoc-hidden-html-fields.md diff --git a/doc/release-notes/10503-cvoc-hidden-html-fields.md b/doc/release-notes/10503-cvoc-hidden-html-fields.md new file mode 100644 index 00000000000..e3ea0463fb8 --- /dev/null +++ b/doc/release-notes/10503-cvoc-hidden-html-fields.md @@ -0,0 +1,11 @@ +## Release Highlights + +### Updates on Support for External Vocabulary Services + +#### Hidden HTML Fields + +External Controlled Vocabulary scripts, configured via [:CVocConf](https://guides.dataverse.org/en/6.3/installation/config.html#cvocconf), can now access the values of managed fields as well as the term-uri-field for use in constructing the metadata view for a dataset. + +Those values are hidden and can be found with the html attribute `data-cvoc-metadata-name`. + +For more information, see [#10503](https://github.com/IQSS/dataverse/pull/10503). diff --git a/src/main/webapp/metadataFragment.xhtml b/src/main/webapp/metadataFragment.xhtml index 24d17b27a1f..43d54f64c43 100755 --- a/src/main/webapp/metadataFragment.xhtml +++ b/src/main/webapp/metadataFragment.xhtml @@ -90,7 +90,7 @@ - +
- + @@ -125,7 +125,7 @@ - + @@ -151,7 +151,8 @@ - + +
- + + + + + + + + - +
From e24405cd0f3a5d50e5c46916f84108aca318fa89 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 25 Jun 2024 10:35:45 -0400 Subject: [PATCH 60/85] skip remove loops if list starts empty --- .../iq/dataverse/search/IndexServiceBean.java | 35 ++++++++++--------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 42b740076ec..63ce35114e2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -571,23 +571,26 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr try { solrIdsOfDocsToDelete = findFilesOfParentDataset(dataset.getId()); logger.fine("Existing file docs: " + String.join(", ", solrIdsOfDocsToDelete)); - // We keep the latest version's docs unless it is deaccessioned and there is no - // published/released version - // So skip the loop removing those docs from the delete list except in that case - if ((!latestVersion.isDeaccessioned() || atLeastOnePublishedVersion)) { - List latestFileMetadatas = latestVersion.getFileMetadatas(); - String suffix = (new IndexableDataset(latestVersion)).getDatasetState().getSuffix(); - for (FileMetadata fileMetadata : latestFileMetadatas) { - String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId() - + suffix; - solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + if (!solrIdsOfDocsToDelete.isEmpty()) { + // We keep the latest version's docs unless it is deaccessioned and there is no + // published/released version + // So skip the loop removing those docs from the delete list except in that case + if ((!latestVersion.isDeaccessioned() || atLeastOnePublishedVersion)) { + List latestFileMetadatas = latestVersion.getFileMetadatas(); + String suffix = (new IndexableDataset(latestVersion)).getDatasetState().getSuffix(); + for (FileMetadata fileMetadata : latestFileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + + fileMetadata.getDataFile().getId() + suffix; + solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + } } - } - if (releasedVersion != null && !releasedVersion.equals(latestVersion)) { - List releasedFileMetadatas = releasedVersion.getFileMetadatas(); - for (FileMetadata fileMetadata : releasedFileMetadatas) { - String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); - solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + if (releasedVersion != null && !releasedVersion.equals(latestVersion)) { + List releasedFileMetadatas = releasedVersion.getFileMetadatas(); + for (FileMetadata fileMetadata : releasedFileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + + fileMetadata.getDataFile().getId(); + solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + } } } // Clear any unused dataset docs From db8b2d2ceef7cb69ee1f03d4b318025545a6754d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 25 Jun 2024 11:47:00 -0400 Subject: [PATCH 61/85] Delete src/main/java/edu/harvard/iq/dataverse/export/PdfCodeBookExporter.java --- .../dataverse/export/PdfCodeBookExporter.java | 81 ------------------- 1 file changed, 81 deletions(-) delete mode 100644 src/main/java/edu/harvard/iq/dataverse/export/PdfCodeBookExporter.java diff --git a/src/main/java/edu/harvard/iq/dataverse/export/PdfCodeBookExporter.java b/src/main/java/edu/harvard/iq/dataverse/export/PdfCodeBookExporter.java deleted file mode 100644 index 4772c09ffd5..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/export/PdfCodeBookExporter.java +++ /dev/null @@ -1,81 +0,0 @@ - - -package edu.harvard.iq.dataverse.export; - -import com.google.auto.service.AutoService; - -import edu.harvard.iq.dataverse.export.ddi.DdiExportUtil; -import io.gdcc.spi.export.ExportDataProvider; -import io.gdcc.spi.export.ExportException; -import io.gdcc.spi.export.Exporter; -import edu.harvard.iq.dataverse.util.BundleUtil; -import jakarta.ws.rs.core.MediaType; - -import javax.xml.stream.XMLStreamException; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.Locale; -import java.util.Optional; - -@AutoService(Exporter.class) -public class PdfCodeBookExporter implements Exporter { - - @Override - public String getFormatName() { - return "pdf"; - } - - @Override - public String getDisplayName(Locale locale) { - String displayName = BundleUtil.getStringFromBundle("dataset.exportBtn.itemLabel.pdf", locale); - return Optional.ofNullable(displayName).orElse("DDI pdf codebook"); - } - - @Override - public void exportDataset(ExportDataProvider dataProvider, OutputStream outputStream) throws ExportException { - Optional ddiInputStreamOptional = dataProvider.getPrerequisiteInputStream(); - if (ddiInputStreamOptional.isPresent()) { - try (InputStream ddiInputStream = ddiInputStreamOptional.get()) { - DdiExportUtil.datasetPdfDDI(ddiInputStream, outputStream); - } catch (IOException e) { - throw new ExportException("Cannot open export_ddi cached file"); - } catch (XMLStreamException xse) { - throw new ExportException("Caught XMLStreamException performing DDI export"); - } - } else { - throw new ExportException("No prerequisite input stream found"); - } - } - - @Override - public Boolean isHarvestable() { - // No, we don't want this format to be harvested! - // For datasets with tabular data the portions of the DDIs - // become huge and expensive to parse; even as they don't contain any - // metadata useful to remote harvesters. -- L.A. 4.5 - return false; - } - - @Override - public Boolean isAvailableToUsers() { - return true; - } - - @Override - public Optional getPrerequisiteFormatName() { - //This exporter relies on being able to get the output of the ddi exporter - return Optional.of("ddi"); - } - - @Override - public String getMediaType() { - return MediaType.WILDCARD; - }; -} - - - From 88e86c17f3578c2baf86a2e836bdb0b0ddd59007 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 25 Jun 2024 11:48:26 -0400 Subject: [PATCH 62/85] Update pom.xml --- pom.xml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pom.xml b/pom.xml index efb10d8ed5e..f45e8fd9033 100644 --- a/pom.xml +++ b/pom.xml @@ -463,12 +463,6 @@ commons-compress
- - - org.apache.xmlgraphics - fop - 2.8 - org.duracloud common From a95bb715947d2d610bc6a12f5f62a63af67ae244 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 25 Jun 2024 11:49:48 -0400 Subject: [PATCH 63/85] Update Datasets.java --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 1bb0d559e84..1a5eba52a62 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -206,7 +206,7 @@ public Response getDataset(@Context ContainerRequestContext crc, @PathParam("id" // WORKS on published datasets, which are open to the world. -- L.A. 4.5 @GET @Path("/export") - @Produces({"application/xml", "application/json", "application/html", "*/*" }) + @Produces({"application/xml", "application/json", "application/html", "application/ld+json", "*/*" }) public Response exportDataset(@QueryParam("persistentId") String persistentId, @QueryParam("exporter") String exporter, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) { try { From 899f1509b62d5a707473e463ec6c58e6cbce9f9c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 25 Jun 2024 11:51:39 -0400 Subject: [PATCH 64/85] Update DdiExportUtil.java --- .../dataverse/export/ddi/DdiExportUtil.java | 53 ------------------- 1 file changed, 53 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index 2d59cd0b514..9a689f7a4ed 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -27,7 +27,6 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; -import java.net.URL; import java.nio.file.Files; import java.nio.file.Paths; import java.time.LocalDate; @@ -63,22 +62,6 @@ import javax.xml.transform.stream.StreamResult; import java.io.InputStream; - -import java.io.OutputStream; -import javax.xml.transform.Result; -import javax.xml.transform.Source; -import javax.xml.transform.Transformer; -import javax.xml.transform.TransformerFactory; -import javax.xml.transform.sax.SAXResult; -import javax.xml.transform.stream.StreamSource; - -import org.apache.fop.apps.FOUserAgent; -import org.apache.fop.apps.Fop; -import org.apache.fop.apps.FopFactory; -import org.apache.fop.apps.MimeConstants; -import java.io.File; - - public class DdiExportUtil { private static final Logger logger = Logger.getLogger(DdiExportUtil.class.getCanonicalName()); @@ -2116,42 +2099,6 @@ private static boolean checkParentElement(XMLStreamWriter xmlw, String elementNa return true; } - public static void datasetPdfDDI(InputStream datafile, OutputStream outputStream) throws XMLStreamException { - try { - String sysId = DdiExportUtil.class.getClassLoader().getResource("edu/harvard/iq/dataverse/ddi-to-fo.xsl").toURI().toString(); - InputStream styleSheetInput = DdiExportUtil.class.getClassLoader().getResourceAsStream("edu/harvard/iq/dataverse/ddi-to-fo.xsl"); - - final FopFactory fopFactory = FopFactory.newInstance(new File(".").toURI()); - FOUserAgent foUserAgent = fopFactory.newFOUserAgent(); - - try { - Fop fop = fopFactory.newFop(MimeConstants.MIME_PDF, foUserAgent, outputStream); - // Setup XSLT - TransformerFactory factory = TransformerFactory.newInstance(); - Source mySrc = new StreamSource(styleSheetInput); - mySrc.setSystemId(sysId); - Transformer transformer = factory.newTransformer(mySrc); - - // Set the value of a in the stylesheet - transformer.setParameter("versionParam", "2.0"); - - // Setup input for XSLT transformation - Source src = new StreamSource(datafile); - - // Resulting SAX events (the generated FO) must be piped through to FOP - Result res = new SAXResult(fop.getDefaultHandler()); - - // Start XSLT transformation and FOP processing - transformer.transform(src, res); - - } catch (Exception e) { - logger.severe(e.getMessage()); - } - } catch (Exception e) { - logger.severe(e.getMessage()); - } - } - public static void datasetHtmlDDI(InputStream datafile, OutputStream outputStream) throws XMLStreamException { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); From 91dcca7d99d85cb6ede7603b94cb0e810514b488 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 25 Jun 2024 11:52:20 -0400 Subject: [PATCH 65/85] Delete src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/i18n.inc.xsl --- .../resources/edu/harvard/iq/dataverse/ddi-pdf/i18n.inc.xsl | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/i18n.inc.xsl diff --git a/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/i18n.inc.xsl b/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/i18n.inc.xsl deleted file mode 100644 index edf876f3b04..00000000000 --- a/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/i18n.inc.xsl +++ /dev/null @@ -1,5 +0,0 @@ - - - - - From a5591a5d4a78996af6771adfbaee93bb00a28389 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 25 Jun 2024 11:52:58 -0400 Subject: [PATCH 66/85] Delete src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_en.properties.xml --- .../ddi-pdf/messages_en.properties.xml | 174 ------------------ 1 file changed, 174 deletions(-) delete mode 100644 src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_en.properties.xml diff --git a/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_en.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_en.properties.xml deleted file mode 100644 index d8e98dfd3c6..00000000000 --- a/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_en.properties.xml +++ /dev/null @@ -1,174 +0,0 @@ - - -Generated by Properties2Xml on Fri Apr 11 09:45:39 EDT 2008 -Valid -Frequency table not shown -Derivation -discrete -Data Collection Mode -Other Processing -Other Acknowledgment(s) -Untitled -Identification -Click here to access/export data files from Nesstar format -Value -Percentage -Weighting -Primary Investigator(s) -This document was generated using the -Sampling -Cases -Access Conditions -Source -Modalities -Rights & Disclaimer -Definition -Estimates of Sampling Error -Data Files -Geographic Coverage -April -Mean -Metadata Production -Data Collection -Missing Data -Scripts and programs -Variable(s) -Interviewer instructions -Funding Agency/ies -November -Missing -Version -Universe -Contributor(s) -Access Authority -Data Processing & Appraisal -Scope -Administrative documents -StdDev -Contact(s) -Label -Technical documents -Decimal(s) -Type -Literal question -Concepts -Range -Abstract -June -Supervision -Other Forms of Data Appraisal -References -Accessibility -Data Collection Dates -Data Editing -Questionnaires -Valid case(s) -Reports and analytical documents -Copyright -Documentation -Deviations from Sample Design -Publisher(s) -February -Dataset contains -Acknowledgment(s) -Continuous -Standard deviation -Variables Description -Producer -Production Date - -The Explorer allows you to view data files and export them to common statistical formats -Discrete -Group -July -Filename -Cases -Name -Warning: these figures indicate the number of cases found in the data file. They cannot be interpreted as summary statistics of the population of interest. -Statistical tables -December -Subjects -Processing Checks -software -Interviewer's instructions -Table of Contents -Document Information -Subgroup(s) -Keywords -group(s) -W -Weight -Files Description -Notes -Data Collection Notes -file(s) -continuous -Disclaimer -Content -variable(s) -Other Producer(s) -Producers & Sponsors -Data Cleaning Notes -Distributor(s) -Overview -Citation Requirements -September -Category -Confidentiality -Statistics -May -Undetermined -Structure -file -Pre-question -Response Rate -Width -Recoding and Derivation -Series -October -Unit of Analysis -Data Processing Notes -Kind of Data -File -Time Period(s) -File Content -Invalid -Vars -cont. -Key(s) -Question -Source of information -Imputation -Security -To open this file, you will need the free -Other resources -Data Dictionnary -Information -January -Other documents -Minimum -Scope & Coverage -Metadata Producer(s) -Show more info -Data Collector(s) -Post-question -Topics -Sampling Procedure -File Structure -Variables List -Format -Sampling Notes -Variables Group(s) -Description -Categories -Maximum -Depositor(s) -August -NW -Cover Page -Weighted -March - total - showing a subset of -Countries -question details - From 81c2abd2afeee0be516ddba1d70fd35e770be1bf Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 25 Jun 2024 11:53:15 -0400 Subject: [PATCH 67/85] Delete src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_es.properties.xml --- .../ddi-pdf/messages_es.properties.xml | 170 ------------------ 1 file changed, 170 deletions(-) delete mode 100644 src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_es.properties.xml diff --git a/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_es.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_es.properties.xml deleted file mode 100644 index 9cfcdaf6e7e..00000000000 --- a/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_es.properties.xml +++ /dev/null @@ -1,170 +0,0 @@ - - -Generated by Properties2Xml on Fri Apr 11 09:45:40 EDT 2008 -Válido -No se presentan las tablas de frecuencias -Derivación -discreta -Método de Recolección -Otros relacionados al procesamiento -Otros Reconocimientos -Sin título -Identificación -Presione aquí para acceder/exportar al archivo(s) de datos -Valor -Porcentaje -Ponderando -Investigadores Principales -Este documento fue producido utilizando el -Muestreo -Casos -Condiciones de uso -Fuente -Modalidades -Derechos y Notas Legales -Definición -Estimaciones del Error Muestral -Archivo de Datos -Cobertura Geográfica -Abril -Media -Producción de los Metadatos -Recolección de Datos -Datos perdidos -Programas informáticos -Variable(s) -Manual del encuestador -Agencias Auspiciadoras -Noviembre -Valores perdidos -Versión -Universo -Contribuidor(es) -Institución Propietaria -Tratamiento y Validación de Datos -Dominio Temático -Documentos Administrativos -Desviación -Contacto(s) -Etiqueta -Documentos Técnicos -Decimal(es) -Tipo -Pregunta textual -Conceptos -Rango -Resumen -Junio -Supervisión -Otras Formas de Validación de los Datos -Referencias -Accesibilidad -Fechas de Recolección de Datos -Procesamiento de Datos -Cuestionarios -Casos válidos -Reportes y documentos analíticos -Derechos de Autor -Documentación -Modificaciones al Diseño Muestral -Editor(es) -Febrero -Contenido de la Base de Datos -Reconocimiento(s) -Contínua -Desviación estándar -Descripción de la variable -Productor -Fecha de Producción -El Explorador NESSTAR permite visualizar los archivos de datos y exportarlos a diferentes formatos estadísticos -Discreta -Grupo -Julio -Nombre del Archivo -Casos -Nombre -Cuadros estadísticos -Diciembre -Temas -Controles de Tratamiento -software -Manual del encuestador -Indice -Información acerca de la Documentación -Subgrupo(s) -Palabra Clave -grupo(s) -P -Ponderador -Descripción de los Archivos -Notas -Notas sobre la Recolección de Datos -archivo(s) -continua -Nota Legal -Contenido -variable(s) -Otros Productores -Productores y Auspiciadores -Notas acerca de la Depuración de los Datos -Distribuidor(es) -Resumen General -Forma de citar -Septiembre -Categoría -Confidencialidad -Estadística - -Mayo -Indeterminado -Estructura -archivo -Pre-pregunta -Tasa de Respuesta -Ancho -Recodificación y Derivación -Series -Octubre -Unidad de Análisis -Notas sobre el Procesamiento de Datos -Tipo de Datos -Archivo -Periodo de Referencia -Contenido del Archivo -Inválido -Vars. -cont. -Clave(s) -Pregunta -Fuente de información -Imputación -Seguridad -Para abrir este archivo se necesita el software gratuito -Otros recursos -Diccionario de Datos -Información -Enero -Otros documentos -Mínimo -Cobertura y Dominio Temático -Productor de los Metadatos -Mostrar más información -Entrevistador(es) -Pos-pregunta -Temas -Procedimiento de Muestreo -Estructura del Archivo -Lista de variables -Formato -Notas sobre el Muestreo -Grupo(s) de Variables -Descripción -Categorías -Máximo -Depositante(s) -Agosto -NP -Carátula -Ponderado -Marzo - From 61fe51f7624d153a13a73c23d82e9c8a56fb32e6 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 25 Jun 2024 11:53:26 -0400 Subject: [PATCH 68/85] Delete src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_fr.properties.xml --- .../ddi-pdf/messages_fr.properties.xml | 173 ------------------ 1 file changed, 173 deletions(-) delete mode 100644 src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_fr.properties.xml diff --git a/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_fr.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_fr.properties.xml deleted file mode 100644 index 9fa4d2178b1..00000000000 --- a/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_fr.properties.xml +++ /dev/null @@ -1,173 +0,0 @@ - - -Generated by Properties2Xml on Fri Apr 11 09:45:40 EDT 2008 -Valide -Tableau de fréquences non-affiché -Mode de calcul -discrète -Méthode de collecte -Autre traitement -Autre(s) remerciement(s) -Sans titre -Identification -Cliquer ici pour accéder/exporter les fichiers de données du format Nesstar -Valeur -Pourcentage -Pondération -Enquêteur(s) principal/aux -Ce document a été généré à l'aide du -Echantillonage -Enregistrements -Conditions d'accès -Source -Modalités -Responsabilité et droits d'auteurs -Définition -Estimation des erreurs d'échantillonage -Fichiers de données -Couverture géographique -Avril -Moyenne -Production des méta-données -Collecte des données -Valeures manquantes -Programmes informatiques -Variable(s) -Instructions aux enquêteurs -Financement -Novembre -Manquant -Version -Univers -Contributeur(s) -Agence propriétaire -Traitement et évaluation des données -Domaines thématiques -Documents administratifs -Ecart type -Contact(s) -Libellé -Documents techniques -Décimale(s) -Type -Formulation de la question -Concepts -Gamme -Résumé -Juin -Supervision -Autres formes d'évaluation des données -Références -Accessibilité -Dates de collecte -Edition des données -Questionnaires -Cas valide(s) -Rapports et documents analytiques -Droits d'auteurs -Documentation -Déviations par rapport à l'échantillon initial -Editeur(s) -Février -Le jeu de données contient -Remerciement(s) -Continue -Ecart type -Description des variables -Producteur -Date de production - -L'Explorer vous permet d'accéder aux données et de les exporter vers les formats statistiques les plus courants -Discrète -Groupe -Juillet -Nom du fichier -Enreg. -Nom -Avertissement: Ces chiffres indiquent le nombre de cas identifiés dans le fichier de données. Ils ne peuvent pas être interpretés comme étant représentatifs de la population concernée. -Tableaux statistiques -Décembre -Sujets -Contrôles de traitement - -Instructions aux enquêteurs -Table des matières -Informations sur le document -Sous-groupe(s) -Mots-clé -groupe(s) -P -Pondération -Description des fichiers -Notes -Notes sur la collecte -fichier(s) -continue -Responsabilité(s) -Contenu -variable(s) -Autre(s) producteur(s) -Producteurs et sponsors -Notes sur l'apurement des données -Distributeur(s) -Aperçu -Citation -Septembre -Catégorie -Confidentialité -Statistiques -Mai -Indéterminé -Structure -fichier -Pré-question -Taux de réponse -Taille -Formulation de la question -Recodage et dérivation -Série -Octobre -Unité d'analyse -Notes sur le traitement des données -Type d'étude -Fichier -Période(s) de référence -Contenu du fichier -Non-valide -Vars -suite -Clé(s) -Question -Source d'information -Imputation -Sécurité -Pour ouvrir ce fichier, vous avez besoin du logiciel gratuit -Autres resources -Dictionnaire des variables -Information -Janvier -Autres documents -Minimum -Domaines thématiques et couverture -Producteur(s) des méta-données -Information complémentaire -Enquêteurs -Post-question -Thèmes -Procédure d'échantillonage -Structure du fichier -Liste des variables -Format -Notes sur l'échantillonage -Groupe(s) de variables -Description -Catégories -Maximum -Dépositaire(s) -Août -NP -Couverture -Pondéré -Mars -question details - From a87d1bcdee77f78fcfa287502838526cd85b54b5 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 25 Jun 2024 11:53:42 -0400 Subject: [PATCH 69/85] Delete src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_ja.properties.xml --- .../ddi-pdf/messages_ja.properties.xml | 161 ------------------ 1 file changed, 161 deletions(-) delete mode 100644 src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_ja.properties.xml diff --git a/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_ja.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_ja.properties.xml deleted file mode 100644 index bc5dbb06154..00000000000 --- a/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_ja.properties.xml +++ /dev/null @@ -1,161 +0,0 @@ - - -Generated by Properties2Xml on Tue Feb 13 13:55:43 EST 2007 -有効な -度数表(Frequency table)は表示されません -由来 -不連続な -データ収集モード -その他の確認事項 -識別番号 -データファイルにアクセスするにはここをクリックしてください -無題 - -割合 -ウェイティング -第一次調査官 -この文書はToolkitを使用して作られました -サンプリング -ケース -アクセス条件 -情報源 -様相 -権利及び声明文 -定義 -サンプルエラーの見積もり -データファイル -地理的な適用範囲 -4月 -平均 -メタデータ製作 -データ収集 -損失データ -スクリプトおよびプログラム -可変的 -面接者の指示 -出資機関 -11月 -バージョン -共通の -貢献者 -アクセス権限 -データ処理、評価 -範囲, 領域 -管理用文章 -連絡先 -ラベル -技術的な文書 -小数点 -タイプ -文字の質問 -概念 -範囲 -要約 -6月 -監督 -その他ファーマットのデータ評価 -参照 -アクセス、入手法 -データ収集日 -データ編集 -質問 -レポートおよび分析的な文書 -有効な場合 -コピーライト -書類 -サンプルデザインによる偏差 -発行者 -2月 -データセットに含まれる -確認事項 -連続的な -標準偏差 -変数の記述 -製作者 -製作日 -” Explorer”によってデータファイルを参照することも一般的に使えわれている統計データフォーマットに変換。抽出することも可能です -不連続性 -グループ -7月 -ファイルの名前 -ケース -名前 -統計表 -12月 -主題, 内容 -工程監査 -ソフト -面接者への指示 -目録 -書類の情報 -サブグループ -キーワード - -グループ -ウェイト -ファイルの詳細 -メモ -データ収集メモ -ファイル -継続的な -声明文 -内容 -変数 -その他の製作者 -製作者とスポンサー -データクリーニングメモ -分配者 -概略 -引用する場合の必要条件 -9月 -カテゴリー -機密性、コンフィデンシャリティー -5月 -未定 -構造 -ファイル -調査前の質問 -回答比率 - -記録と誘導 -シリー -10月 -分析の単位 -データ処理メモ -データの種類 - -ファイル -期間 -ファイルの内容 -無効 -キー -情報源 -非難 -セキュリティー -このファイルを開けるには、無料で配布されているNesstar Explorer が必要です。 -その他の資料 -データ辞典 -情報 -1月 -その他の書類 -最小値 -規模及び適用範囲 -メタデータ製作者 -さらにインフォメーションを表示 -データ収集者 -調査後の質問 -サンプリングの手順 -ファイルの構造 -変数のリスト -フォーマット -サンプリングメモ -変数のグループ -詳細 -カテゴリー -最大値 -デポジター、提供者、供託者 -8月 -表紙 -ウェイトされた -3月 - From 73edb3da1228c71d9dddc5be01140290610cd74e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 25 Jun 2024 11:53:53 -0400 Subject: [PATCH 70/85] Delete src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_nn.properties.xml --- .../ddi-pdf/messages_nn.properties.xml | 174 ------------------ 1 file changed, 174 deletions(-) delete mode 100644 src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_nn.properties.xml diff --git a/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_nn.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_nn.properties.xml deleted file mode 100644 index fdf14f5dfcd..00000000000 --- a/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_nn.properties.xml +++ /dev/null @@ -1,174 +0,0 @@ - - -Generated by Properties2Xml on Fri Apr 11 09:45:39 EDT 2008 -Gyldige -Frekvenstabell ikke vist -Avledning -diskret -Type datainnsamling -Annen prosessering -Andre identifikatorer og krediteringer -Uten tittel -Identifisering -Click here to access/export data files from Nesstar format -Verdi -Prosent -Vekting -Primary Investigator(s) -Dette dokumentet ble generert av -Utvalg -Enheter -Tilgangsbetingelser -Kilde -Modaliteter -Rights & Disclaimer -Definisjon -Estimert utvalgsfeil -Datafiler -Geografisk omfang -April -Mean -Metadata-produksjon -Datainnsamling -Manglende data -Script og programmer -Variable(r) -Instruksjoner til intervjueren -Sponsor/finansierende institusjon(er) -November -Missing -Versjon -Univers -Bidragsyter(e) -Tilgangskontrollør -Dataprosessering og -evaluering -Omfang -Administrative dokumenter -Standardavvik -Kontaktperson(er) -Merkelapp -Tekniske dokumenter -Desimal(er) -Type -Spørsmålstekst -Begrep(er) -Rekkevidde -Sammendrag -Juni -Supervision -Andre former for dataevaluering -Referanser -Tilgjengelighet -Datainnsamlingsdatoer -Dataredigering -Spørreskjema -Gyldige enheter -Rapporter og analysedokumenter -Copyright -Dokumentasjon -Avvik fra utvalgsdesign -Utgiver(e) -Februar -Datasettet inneholder -Krediteringer -Kontinuerlig -Standardavvik -Variabelbeskrivelse -Produsent -Produksjonsdato - -The Explorer allows you to view data files and export them to common statistical formats -Diskret -Gruppe -Juli -Filnavn -Enheter -Navn -Advarsel: disse tallene indikerer antall enheter (cases) i datafilen. De kan ikke tolkes som oppsummert statistikk for populasjonen. -Statistiske tabeller -Desember -Emner -Prosesseringssjekk -programvare -Instruksjoner til intervjueren -Innholdsfortegnelse -Dokumentinformasjon -Undergruppe(r) -Nøkkelord -gruppe(r) -W -Vekt -Filbeskrivelse -Kommentarer -Datainnsamlingskommentarer -file(r) -kontinuerlig -Fraskrivelse -Innhold -variable(r) -Andre produsenter -Produsenter og sponsorer -Kommentarer om datarensing -Distributør(er) -Oversikt -Sitatkrav -September -Kategori -Konfidensialitet -Statistikk -Mai -Uavklart -Struktur -fil -Tekst før spørsmål -Responsrate -Bredde -Omkodinger og utledninger -Serie -Oktober -Analyseenhet -Dataprosesseringskommentarer -Datatype -Fil -Tidsperiode(r) -Filinnhold -Ugyldig -Variabler -kont. -Nøkler -Spørsmål -Kilde for informasjon -Imputasjon -Sikkerhet -For å åpne denne filen trenger du følgende gratisverktøy -Andre ressurser -Dataordbok -Informasjon -Januar -Andre dokumenter -Minimum -Omfang -Metadataprodusenter -Vis mer informasjon -Datainnsamler(e) -Tekst etter spørsmål -Emner -Utvalgsprosedyre -Filstruktur -Variabelliste -Format -Utvalgskommentarer -Variabelgrupper -Beskrivelse -Kategorier -Maksimum -Utgiver(e) -August -NW -Forside -Vektet -Mars - total - viser et utvalg av -Land -spørsmålsdetaljer - From f3dd0461184d70f75daff6727f041e76e22df637 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 25 Jun 2024 11:54:04 -0400 Subject: [PATCH 71/85] Delete src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_no.properties.xml --- .../ddi-pdf/messages_no.properties.xml | 174 ------------------ 1 file changed, 174 deletions(-) delete mode 100644 src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_no.properties.xml diff --git a/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_no.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_no.properties.xml deleted file mode 100644 index fdf14f5dfcd..00000000000 --- a/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_no.properties.xml +++ /dev/null @@ -1,174 +0,0 @@ - - -Generated by Properties2Xml on Fri Apr 11 09:45:39 EDT 2008 -Gyldige -Frekvenstabell ikke vist -Avledning -diskret -Type datainnsamling -Annen prosessering -Andre identifikatorer og krediteringer -Uten tittel -Identifisering -Click here to access/export data files from Nesstar format -Verdi -Prosent -Vekting -Primary Investigator(s) -Dette dokumentet ble generert av -Utvalg -Enheter -Tilgangsbetingelser -Kilde -Modaliteter -Rights & Disclaimer -Definisjon -Estimert utvalgsfeil -Datafiler -Geografisk omfang -April -Mean -Metadata-produksjon -Datainnsamling -Manglende data -Script og programmer -Variable(r) -Instruksjoner til intervjueren -Sponsor/finansierende institusjon(er) -November -Missing -Versjon -Univers -Bidragsyter(e) -Tilgangskontrollør -Dataprosessering og -evaluering -Omfang -Administrative dokumenter -Standardavvik -Kontaktperson(er) -Merkelapp -Tekniske dokumenter -Desimal(er) -Type -Spørsmålstekst -Begrep(er) -Rekkevidde -Sammendrag -Juni -Supervision -Andre former for dataevaluering -Referanser -Tilgjengelighet -Datainnsamlingsdatoer -Dataredigering -Spørreskjema -Gyldige enheter -Rapporter og analysedokumenter -Copyright -Dokumentasjon -Avvik fra utvalgsdesign -Utgiver(e) -Februar -Datasettet inneholder -Krediteringer -Kontinuerlig -Standardavvik -Variabelbeskrivelse -Produsent -Produksjonsdato - -The Explorer allows you to view data files and export them to common statistical formats -Diskret -Gruppe -Juli -Filnavn -Enheter -Navn -Advarsel: disse tallene indikerer antall enheter (cases) i datafilen. De kan ikke tolkes som oppsummert statistikk for populasjonen. -Statistiske tabeller -Desember -Emner -Prosesseringssjekk -programvare -Instruksjoner til intervjueren -Innholdsfortegnelse -Dokumentinformasjon -Undergruppe(r) -Nøkkelord -gruppe(r) -W -Vekt -Filbeskrivelse -Kommentarer -Datainnsamlingskommentarer -file(r) -kontinuerlig -Fraskrivelse -Innhold -variable(r) -Andre produsenter -Produsenter og sponsorer -Kommentarer om datarensing -Distributør(er) -Oversikt -Sitatkrav -September -Kategori -Konfidensialitet -Statistikk -Mai -Uavklart -Struktur -fil -Tekst før spørsmål -Responsrate -Bredde -Omkodinger og utledninger -Serie -Oktober -Analyseenhet -Dataprosesseringskommentarer -Datatype -Fil -Tidsperiode(r) -Filinnhold -Ugyldig -Variabler -kont. -Nøkler -Spørsmål -Kilde for informasjon -Imputasjon -Sikkerhet -For å åpne denne filen trenger du følgende gratisverktøy -Andre ressurser -Dataordbok -Informasjon -Januar -Andre dokumenter -Minimum -Omfang -Metadataprodusenter -Vis mer informasjon -Datainnsamler(e) -Tekst etter spørsmål -Emner -Utvalgsprosedyre -Filstruktur -Variabelliste -Format -Utvalgskommentarer -Variabelgrupper -Beskrivelse -Kategorier -Maksimum -Utgiver(e) -August -NW -Forside -Vektet -Mars - total - viser et utvalg av -Land -spørsmålsdetaljer - From 7a350fa948ad695cc68986f7ab9965855019f63b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 25 Jun 2024 11:54:14 -0400 Subject: [PATCH 72/85] Delete src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_ru.properties.xml --- .../ddi-pdf/messages_ru.properties.xml | 169 ------------------ 1 file changed, 169 deletions(-) delete mode 100644 src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_ru.properties.xml diff --git a/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_ru.properties.xml b/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_ru.properties.xml deleted file mode 100644 index 06fde85af5e..00000000000 --- a/src/main/resources/edu/harvard/iq/dataverse/ddi-pdf/messages_ru.properties.xml +++ /dev/null @@ -1,169 +0,0 @@ - - -Generated by Properties2Xml on Fri Apr 11 09:45:40 EDT 2008 -Валидный -Частотная таблица не выводится -Расчет -дискретная -Способ сбора данных -Прочая обработка -Другие участники -Безымянный -Индентификация -Щелкните здесь, чтобы получить доступ к файлам или экспортировать их -Значение -Процент -Взвешивание -Первичный(е) исследователь(и) -Документ был сгенерирован с использованием -Выборка -Наблюдения -Условия доступа -Источник -Модальности -Авторские права и ограничения ответственности -Определение -Оценки ошибок выборки -Файлы данных -Географический охват -Апрель -Среднее -Разработка метаданных -Сбор данных -Пропущенные данные -Скрипты и программы -Переменная(ые) -Инструкции интервьюеру -Кто финансировал -Ноябрь -Пропущенные -Версия -Генеральная совокупность -Участник(и) -Права доступа -Обработка и инспекция данных -Охват -Административные документы -СтдОткл -Контак(ы) -Метка -Технические документы -Десятичные -Тип -Текст вопроса -Концепции -Диапазон -Резюме -Июнь -Контроль -Другие формы инспекции данных -Установки -Доступность -Даты сбора данных -Редактирование данных -Вопросники -Валидное(ые) наблюдение(я) -Отчеты и аналитические документы -Авторские права -Документация -Отклонения от дизайна выборки -Издатель(и) -Февраль -Набор данных содержит -Участник(и) -Непрерывная -Стандартное отклонение -Описание переменных -Разработчик -Дата разработки -Проводник позволяет просматривать файлы данных и экспортировать их в распространенные статистические форматы -Дикретная -Группа -Июль -Имя файла -Наблюдения -Имя -Статистичсекие таблицы -Декабрь -Темы -Контроль обработки -программное обеспечение -Инструкции интервьюеру -Оглавление -Информация о документе -Подгруппа(ы) -Ключевые слова -группа(ы) -B -Вес -Описание файла -Примечания -Примечания по сбору данных -файл(ы) -непрерывная -Ограничения ответственности -Содержание -переменная(ые) -Другие разработчики -Разработчики и спонсоры -Примечания по чистке данных -Дистрибьютор(ы) -Обзор -Требования по цитированию -Сентябрь -Категория -Конфиденциальность -Статистики -Май -Неопределенный -Структура -файл -Текст, предваряющий вопрос -Доля ответов -Ширина -Перекодировка и расчеты -Серия -Октябрь -Единица анализа -Примечания по обработке данных -Тип данных -Файл -Период(ы) времени -Содержание файла -Некорректный -Переменные -непр. -Ключ(и) -Вопрос -Источник информации -Импутация -Безопасность -Чтобы открыть этот файл, необходимо иметь свободным -Прочие источники -Словарь данных -Информация -Январь -Прочие документы -Минимум -Охват и покрытие -Разработчик(и) метаданных -Показать дополнительную информацию -Кто собирал данные -Текст после вопроса -Разделы -Процедура выборки -Структура файла -Список переменных -Формат -Примечания по выборке -Группа(ы) переменных -Описание -Категории -Максимум -Депозитор(ы) -Август -HB -Титульный лист -взвешенные -Март - From ab6dd1ce828686ddadc22307dfa683e078c85992 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 25 Jun 2024 11:54:34 -0400 Subject: [PATCH 73/85] Delete src/main/resources/edu/harvard/iq/dataverse/ddi-to-fo.xsl --- .../edu/harvard/iq/dataverse/ddi-to-fo.xsl | 4435 ----------------- 1 file changed, 4435 deletions(-) delete mode 100644 src/main/resources/edu/harvard/iq/dataverse/ddi-to-fo.xsl diff --git a/src/main/resources/edu/harvard/iq/dataverse/ddi-to-fo.xsl b/src/main/resources/edu/harvard/iq/dataverse/ddi-to-fo.xsl deleted file mode 100644 index 26fd7c23479..00000000000 --- a/src/main/resources/edu/harvard/iq/dataverse/ddi-to-fo.xsl +++ /dev/null @@ -1,4435 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - default-page - - - - - - - - - - - - - - - - - - - - - - Times - - - - - - - - - - - - - - - - - - - - - - - - - 50 - - - - - - - 0 - 0 - 0 - 1 - - - - - - - 1 - 0 - - - - - - - 0 - 1 - - - - - - ddp - toolkit - toolkit - toolkit - - - - - - - - () - - - - - - , - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1 - 1 - 1 - 1 - 0 - - - - - 1 - 1 - 1 - 1 - 0 - - - - - 1 - 1 - 1 - 1 - 1 - 0 - - - - - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 0 - - - - - 1 - 1 - 1 - 0 - - - - - 1 - 1 - 1 - 1 - 1 - 1 - 0 - - - - - 1 - 1 - 0 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - : - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - , - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - : - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - , - - - - - - - - - - - - - - - - - - , - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - *** - - **** - - - - - - - - - - - - - - - , - - - - - - - - : - - - - - - - - - - - - - () - , - - - - , - - - - - - - - - - - () - , - - - - - - - : - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # - - - - - - - - - - - # - - - - - - - - - - - - - - - - - : - - - - - - :  - - - - - - - -  () - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - : - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - () - , - - - - - - - - - - , - - , - - - - - - - - - - () - , - - , - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - , - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - .. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - . - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # - - - - - : - - - - - - - - - - - - - - - - - - - - - [= - - - - - - ] - - - - [= - - - - - - - . - - - ] - - - - [= - - ] - - - [ - - =* - - / - - - ] - - - - - - - - - - - - - - [ - - / - - ] - - - - - - - - - - - - - - - - - - - - - - - - - - - [ - - = - - - / - - - - - - - - - - ] - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - , - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - () - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ( - ) - - - - - - - - - - - - - - - - - - - - : - - - - - - - - - - - - - - - - - - *** - *** - - - - - , - - - - - - - , - - - - - - , - - - - - - - - - - - - - , - - - - - , - - - - - , " - - " - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - , - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - , - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - <h2> - - - </h2> - - - <br/> - - - - - - - - - - - - - - - - - - - -   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Z - - - - - - - - - - - - - - - - - - - - - - - - - - - - NaN - - - - - - - - - - - From f73c5f89ddee573d2fb52e81f4f61d6186081538 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 25 Jun 2024 11:55:27 -0400 Subject: [PATCH 74/85] Update Bundle.properties --- src/main/java/propertyFiles/Bundle.properties | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 0e32ba5b30d..157f2ecaf54 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -1433,7 +1433,6 @@ dataset.exportBtn.itemLabel.json=JSON dataset.exportBtn.itemLabel.oai_ore=OAI_ORE dataset.exportBtn.itemLabel.dataciteOpenAIRE=OpenAIRE dataset.exportBtn.itemLabel.html=DDI HTML Codebook -dataset.exportBtn.itemLabel.pdf=DDI PDF Codebook license.custom=Custom Dataset Terms license.custom.description=Custom terms specific to this dataset metrics.title=Metrics From b464b24296a15ffcb137d283f07965a2a2449e4b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 25 Jun 2024 15:06:48 -0400 Subject: [PATCH 75/85] drop COMMIT_WITHIN which breaks autoSoftCommit by maxTime in solrconfig --- .../harvard/iq/dataverse/search/IndexServiceBean.java | 11 +++++------ .../iq/dataverse/search/SolrIndexServiceBean.java | 6 +++--- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 0102459ab9f..10e6c2e6516 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -312,7 +312,7 @@ public Future indexDataverse(Dataverse dataverse, boolean processPaths) String status; try { if (dataverse.getId() != null) { - solrClientService.getSolrClient().add(docs, COMMIT_WITHIN); + solrClientService.getSolrClient().add(docs); } else { logger.info("WARNING: indexing of a dataverse with no id attempted"); } @@ -345,7 +345,6 @@ public void indexDatasetInNewTransaction(Long datasetId) { //Dataset dataset) { private static final Map INDEXING_NOW = new ConcurrentHashMap<>(); // semaphore for async indexing private static final Semaphore ASYNC_INDEX_SEMAPHORE = new Semaphore(JvmSettings.MAX_ASYNC_INDEXES.lookupOptional(Integer.class).orElse(4), true); - static final int COMMIT_WITHIN = 30000; //Same as current autoHardIndex time @Inject @Metric(name = "index_permit_wait_time", absolute = true, unit = MetricUnits.NANOSECONDS, @@ -1562,7 +1561,7 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set d final SolrInputDocuments docs = toSolrDocs(indexableDataset, datafilesInDraftVersion); try { - solrClientService.getSolrClient().add(docs.getDocuments(), COMMIT_WITHIN); + solrClientService.getSolrClient().add(docs.getDocuments()); } catch (SolrServerException | IOException ex) { if (ex.getCause() instanceof SolrServerException) { throw new SolrServerException(ex); @@ -1814,7 +1813,7 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc sid.removeField(SearchFields.SUBTREE); sid.addField(SearchFields.SUBTREE, paths); - UpdateResponse addResponse = solrClientService.getSolrClient().add(sid, COMMIT_WITHIN); + UpdateResponse addResponse = solrClientService.getSolrClient().add(sid); if (object.isInstanceofDataset()) { for (DataFile df : dataset.getFiles()) { solrQuery.setQuery(SearchUtil.constructQuery(SearchFields.ENTITY_ID, df.getId().toString())); @@ -1869,7 +1868,7 @@ public String delete(Dataverse doomed) { logger.fine("deleting Solr document for dataverse " + doomed.getId()); UpdateResponse updateResponse; try { - updateResponse = solrClientService.getSolrClient().deleteById(solrDocIdentifierDataverse + doomed.getId(), COMMIT_WITHIN); + updateResponse = solrClientService.getSolrClient().deleteById(solrDocIdentifierDataverse + doomed.getId()); } catch (SolrServerException | IOException ex) { return ex.toString(); } @@ -1889,7 +1888,7 @@ public String removeSolrDocFromIndex(String doomed) { logger.fine("deleting Solr document: " + doomed); UpdateResponse updateResponse; try { - updateResponse = solrClientService.getSolrClient().deleteById(doomed, COMMIT_WITHIN); + updateResponse = solrClientService.getSolrClient().deleteById(doomed); } catch (SolrServerException | IOException ex) { return ex.toString(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java index 19235bb5a14..cfe29ea08c7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java @@ -356,7 +356,7 @@ private void persistToSolr(Collection docs) throws SolrServer /** * @todo Do something with these responses from Solr. */ - UpdateResponse addResponse = solrClientService.getSolrClient().add(docs, IndexServiceBean.COMMIT_WITHIN); + UpdateResponse addResponse = solrClientService.getSolrClient().add(docs); } public IndexResponse indexPermissionsOnSelfAndChildren(long definitionPointId) { @@ -496,7 +496,7 @@ public IndexResponse deleteMultipleSolrIds(List solrIdsToDelete) { return new IndexResponse("nothing to delete"); } try { - solrClientService.getSolrClient().deleteById(solrIdsToDelete, IndexServiceBean.COMMIT_WITHIN); + solrClientService.getSolrClient().deleteById(solrIdsToDelete); } catch (SolrServerException | IOException ex) { /** * @todo mark these for re-deletion @@ -509,7 +509,7 @@ public IndexResponse deleteMultipleSolrIds(List solrIdsToDelete) { public JsonObjectBuilder deleteAllFromSolrAndResetIndexTimes() throws SolrServerException, IOException { JsonObjectBuilder response = Json.createObjectBuilder(); logger.info("attempting to delete all Solr documents before a complete re-index"); - solrClientService.getSolrClient().deleteByQuery("*:*", IndexServiceBean.COMMIT_WITHIN); + solrClientService.getSolrClient().deleteByQuery("*:*"); int numRowsAffected = dvObjectService.clearAllIndexTimes(); response.add(numRowsClearedByClearAllIndexTimes, numRowsAffected); response.add(messageString, "Solr index and database index timestamps cleared."); From 9fc757f9fa37ef43383a0ad628fd137231249de6 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 25 Jun 2024 15:21:58 -0400 Subject: [PATCH 76/85] 10581 request access email fix (#10653) * #10581 fix subject line of request access email * #10581 add sorter * #10581 addl user info for request email * #10581 add break between custom Q responses * #10581 remove orderby --- .../iq/dataverse/GuestbookResponse.java | 40 +++++++++++++++---- .../harvard/iq/dataverse/MailServiceBean.java | 2 +- .../harvard/iq/dataverse/util/MailUtil.java | 5 ++- src/main/java/propertyFiles/Bundle.properties | 6 ++- .../iq/dataverse/util/MailUtilTest.java | 8 +++- 5 files changed, 49 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java index 9041ccf887c..1ea7d02791d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java @@ -17,6 +17,8 @@ import java.util.List; import jakarta.persistence.*; import jakarta.validation.constraints.Size; +import java.util.Collections; +import java.util.Comparator; /** * @@ -178,7 +180,7 @@ public GuestbookResponse(GuestbookResponse source){ this.setSessionId(source.getSessionId()); List customQuestionResponses = new ArrayList<>(); if (!source.getCustomQuestionResponses().isEmpty()){ - for (CustomQuestionResponse customQuestionResponse : source.getCustomQuestionResponses() ){ + for (CustomQuestionResponse customQuestionResponse : source.getCustomQuestionResponsesSorted() ){ CustomQuestionResponse customQuestionResponseAdd = new CustomQuestionResponse(); customQuestionResponseAdd.setResponse(customQuestionResponse.getResponse()); customQuestionResponseAdd.setCustomQuestion(customQuestionResponse.getCustomQuestion()); @@ -254,6 +256,18 @@ public String getResponseDate() { public List getCustomQuestionResponses() { return customQuestionResponses; } + + public List getCustomQuestionResponsesSorted(){ + + Collections.sort(customQuestionResponses, (CustomQuestionResponse cqr1, CustomQuestionResponse cqr2) -> { + int a = cqr1.getCustomQuestion().getDisplayOrder(); + int b = cqr2.getCustomQuestion().getDisplayOrder(); + return Integer.valueOf(a).compareTo(b); + }); + + + return customQuestionResponses; + } public void setCustomQuestionResponses(List customQuestionResponses) { this.customQuestionResponses = customQuestionResponses; @@ -317,7 +331,11 @@ public void setSessionId(String sessionId) { this.sessionId= sessionId; } - public String toHtmlFormattedResponse() { + public String toHtmlFormattedResponse(){ + return toHtmlFormattedResponse(null); + } + + public String toHtmlFormattedResponse(AuthenticatedUser requestor) { StringBuilder sb = new StringBuilder(); @@ -326,17 +344,25 @@ public String toHtmlFormattedResponse() { sb.append(BundleUtil.getStringFromBundle("dataset.guestbookResponse.respondent") + "
    \n
  • " + BundleUtil.getStringFromBundle("name") + ": " + getName() + "
  • \n
  • "); sb.append(" " + BundleUtil.getStringFromBundle("email") + ": " + getEmail() + "
  • \n
  • "); - sb.append( - " " + BundleUtil.getStringFromBundle("institution") + ": " + wrapNullAnswer(getInstitution()) + "
  • \n
  • "); - sb.append(" " + BundleUtil.getStringFromBundle("position") + ": " + wrapNullAnswer(getPosition()) + "
\n"); + sb.append(" " + BundleUtil.getStringFromBundle("institution") + ": " + wrapNullAnswer(getInstitution()) + "\n
  • "); + sb.append(" " + BundleUtil.getStringFromBundle("position") + ": " + wrapNullAnswer(getPosition()) + "
  • "); + + //Add requestor information to response to help dataset admin with request processing + if (requestor != null){ + sb.append("\n
  • " + BundleUtil.getStringFromBundle("dataset.guestbookResponse.requestor.id") + ": " + requestor.getId()+ "
  • "); + sb.append("\n
  • " + BundleUtil.getStringFromBundle("dataset.guestbookResponse.requestor.identifier") + ": " + requestor.getIdentifier()+ "
  • \n"); + } else { + sb.append("\n"); + } + sb.append(BundleUtil.getStringFromBundle("dataset.guestbookResponse.guestbook.additionalQuestions") + ":
      \n"); - for (CustomQuestionResponse cqr : getCustomQuestionResponses()) { + for (CustomQuestionResponse cqr : getCustomQuestionResponsesSorted()) { sb.append("
    • " + BundleUtil.getStringFromBundle("dataset.guestbookResponse.question") + ": " + cqr.getCustomQuestion().getQuestionString() + "
      " + BundleUtil.getStringFromBundle("dataset.guestbookResponse.answer") + ": " - + wrapNullAnswer(cqr.getResponse()) + "
    • \n"); + + wrapNullAnswer(cqr.getResponse()) + "\n
      "); } sb.append("
    "); return sb.toString(); diff --git a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java index 1eee9c65501..7359ef8eb33 100644 --- a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java @@ -456,7 +456,7 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio GuestbookResponse gbr = far.getGuestbookResponse(); if (gbr != null) { messageText += MessageFormat.format( - BundleUtil.getStringFromBundle("notification.email.requestFileAccess.guestbookResponse"), gbr.toHtmlFormattedResponse()); + BundleUtil.getStringFromBundle("notification.email.requestFileAccess.guestbookResponse"), gbr.toHtmlFormattedResponse(requestor)); } return messageText; case GRANTFILEACCESS: diff --git a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java index ccec3e5f09b..36c249de834 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java @@ -35,7 +35,10 @@ public static String getSubjectTextBasedOnNotification(UserNotification userNoti case CREATEDV: return BundleUtil.getStringFromBundle("notification.email.create.dataverse.subject", rootDvNameAsList); case REQUESTFILEACCESS: - return BundleUtil.getStringFromBundle("notification.email.request.file.access.subject", Arrays.asList(rootDvNameAsList.get(0), datasetDisplayName)); + String userNameFirst = userNotification.getRequestor().getFirstName(); + String userNameLast = userNotification.getRequestor().getLastName(); + String userIdentifier = userNotification.getRequestor().getIdentifier(); + return BundleUtil.getStringFromBundle("notification.email.request.file.access.subject", Arrays.asList(rootDvNameAsList.get(0), userNameFirst, userNameLast, userIdentifier, datasetDisplayName)); case REQUESTEDFILEACCESS: return BundleUtil.getStringFromBundle("notification.email.requested.file.access.subject", Arrays.asList(rootDvNameAsList.get(0), datasetDisplayName)); case GRANTFILEACCESS: diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 2996ccb509b..3e19a19d8dc 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -757,8 +757,8 @@ dashboard.card.datamove.dataset.command.error.indexingProblem=Dataset could not notification.email.create.dataverse.subject={0}: Your dataverse has been created notification.email.create.dataset.subject={0}: Dataset "{1}" has been created notification.email.dataset.created.subject={0}: Dataset "{1}" has been created -notification.email.request.file.access.subject={0}: Access has been requested for a restricted file in dataset "{1}" -notification.email.requested.file.access.subject={0}: You have requested access to a restricted file in dataset "{1}" +notification.email.request.file.access.subject={0}: {1} {2} ({3}) requested access to dataset "{4}" +notification.email.requested.file.access.subject={0}: You have requested access to a restricted file in dataset "{1}" notification.email.grant.file.access.subject={0}: You have been granted access to a restricted file notification.email.rejected.file.access.subject={0}: Your request for access to a restricted file has been rejected notification.email.submit.dataset.subject={0}: Dataset "{1}" has been submitted for review @@ -1400,6 +1400,8 @@ dataset.guestbookResponse.respondent=Respondent dataset.guestbookResponse.question=Q dataset.guestbookResponse.answer=A dataset.guestbookResponse.noResponse=(No Response) +dataset.guestbookResponse.requestor.id=authenticatedUserId +dataset.guestbookResponse.requestor.identifier=authenticatedUserIdentifier # dataset.xhtml diff --git a/src/test/java/edu/harvard/iq/dataverse/util/MailUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/MailUtilTest.java index 0756c4650fb..f9236ab8338 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/MailUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/MailUtilTest.java @@ -2,6 +2,7 @@ import edu.harvard.iq.dataverse.DataverseServiceBean; import edu.harvard.iq.dataverse.UserNotification; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; @@ -82,7 +83,12 @@ public void testSubjectRevokeRole() { @Test public void testSubjectRequestFileAccess() { userNotification.setType(UserNotification.Type.REQUESTFILEACCESS); - assertEquals("LibraScholar: Access has been requested for a restricted file in dataset \"\"", MailUtil.getSubjectTextBasedOnNotification(userNotification, null)); + AuthenticatedUser requestor = new AuthenticatedUser(); + requestor.setFirstName("Tom"); + requestor.setLastName("Jones"); + requestor.setUserIdentifier("TJ-1234"); + userNotification.setRequestor(requestor); + assertEquals("LibraScholar: Tom Jones (@TJ-1234) requested access to dataset \"\"", MailUtil.getSubjectTextBasedOnNotification(userNotification, null)); } @Test From fc020be4e95c163509a055f452111aadd06eddcb Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 25 Jun 2024 15:37:03 -0400 Subject: [PATCH 77/85] missed use --- .../java/edu/harvard/iq/dataverse/search/IndexServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 10e6c2e6516..b7b2760e79b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1826,7 +1826,7 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc } sid.removeField(SearchFields.SUBTREE); sid.addField(SearchFields.SUBTREE, paths); - addResponse = solrClientService.getSolrClient().add(sid, COMMIT_WITHIN); + addResponse = solrClientService.getSolrClient().add(sid); } } } From 9f504e214438af9fa6fa450a88fdabc3ac994ccf Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 25 Jun 2024 19:13:32 -0400 Subject: [PATCH 78/85] optional indexing of the "metadata source" for harvested files as well - it appears that it may have been missed in the original #10464 (? - will confirm). #10611 --- .../harvard/iq/dataverse/search/IndexServiceBean.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index d3286d3be4b..1eaf012876d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1402,7 +1402,14 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set Date: Tue, 25 Jun 2024 19:32:53 -0400 Subject: [PATCH 79/85] a release note (#10611) --- doc/release-notes/10611-harvested-origin-facet.md | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 doc/release-notes/10611-harvested-origin-facet.md diff --git a/doc/release-notes/10611-harvested-origin-facet.md b/doc/release-notes/10611-harvested-origin-facet.md new file mode 100644 index 00000000000..89ab6eb7639 --- /dev/null +++ b/doc/release-notes/10611-harvested-origin-facet.md @@ -0,0 +1,10 @@ +NOTE that this release note supercedes the 10464-add-name-harvesting-client-facet.md note from the PR 10464. + +An option has been added to index the name of the Harvesting Client as the "Metadata Source" of harvested datasets and files; if enabled, the Metadata Source facet will be showing separate entries for the content harvested from different sources, instead of the current, default behavior where there is one "Harvested" facet for all such content. + + +TODO: for the v6.3 release note: +If you choose to enable the extended "Metadata Souce" facet for harvested content, set the optional feature flage (jvm option) `dataverse.feature.index-harvested-metadata-source=true` before reindexing. + +[Please note that the upgrade instruction in 6.3 will contain a suggestion to run full reindex, as part of the Solr upgrade, so the sentence above will need to be added to that section] + From bb7944928942a683718d93900775f7c1535f3609 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 25 Jun 2024 19:39:51 -0400 Subject: [PATCH 80/85] an entry in the config guide for the new feature flag. --- doc/sphinx-guides/source/installation/config.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 213ac827819..4b2542f45fd 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -3274,6 +3274,9 @@ please find all known feature flags below. Any of these flags can be activated u * - add-publicobject-solr-field - Adds an extra boolean field `PublicObject_b:true` for public content (published Collections, Datasets and Files). Once reindexed with these fields, we can rely on it to remove a very expensive Solr join on all such documents in Solr queries, significantly improving overall performance (by enabling the feature flag above, `avoid-expensive-solr-join`). These two flags are separate so that an instance can reindex their holdings before enabling the optimization in searches, thus avoiding having their public objects temporarily disappear from search results while the reindexing is in progress. - ``Off`` + * - index-harvested-metadata-source + - If enabled, this will index the name of the Harvesting Client as the "Metadata Source" of harvested datasets and files; so that the Metadata Source facet on the collection page will be showing separate entries for the content harvested from different sources/via different clients, instead of the current, default behavior where there is one "Harvested" facet for all such content. Requires a reindex. + - ``Off`` **Note:** Feature flags can be set via any `supported MicroProfile Config API source`_, e.g. the environment variable ``DATAVERSE_FEATURE_XXX`` (e.g. ``DATAVERSE_FEATURE_API_SESSION_AUTH=1``). These environment variables can be set in your shell before starting Payara. If you are using :doc:`Docker for development `, you can set them in the `docker compose `_ file. From bcc50aab30e9a6685a51f5695bf5d03306038dc1 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 26 Jun 2024 09:43:26 -0400 Subject: [PATCH 81/85] fixes file access request email perm check --- .../java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java index ab9e4f9be66..5370e9ac564 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java @@ -573,7 +573,7 @@ public boolean requestAccess(DataFile dataFile, GuestbookResponse gbr){ public void sendRequestFileAccessNotification(Dataset dataset, Long fileId, AuthenticatedUser requestor) { Timestamp ts = new Timestamp(new Date().getTime()); - permissionService.getUsersWithPermissionOn(Permission.ManageDatasetPermissions, dataset).stream().forEach((au) -> { + permissionService.getUsersWithPermissionOn(Permission.ManageFilePermissions, dataset).stream().forEach((au) -> { userNotificationService.sendNotification(au, ts, UserNotification.Type.REQUESTFILEACCESS, fileId, null, requestor, true); }); //send the user that requested access a notification that they requested the access From 65b2dcbdbfc6321be89035294a7caca9d3f29f87 Mon Sep 17 00:00:00 2001 From: landreev Date: Thu, 27 Jun 2024 10:56:04 -0400 Subject: [PATCH 82/85] Update 10494-payara-upgrade.md --- doc/release-notes/10494-payara-upgrade.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/doc/release-notes/10494-payara-upgrade.md b/doc/release-notes/10494-payara-upgrade.md index e8ecb6756e0..b1a6c6fda8e 100644 --- a/doc/release-notes/10494-payara-upgrade.md +++ b/doc/release-notes/10494-payara-upgrade.md @@ -19,7 +19,7 @@ But we also recommend that you review the Payara upgrade instructions as it coul We assume you are already on a Dataverse 6.x installation, using a Payara 6.x release. ```shell -export PAYARA=/usr/local/payara5 +export PAYARA=/usr/local/payara6 ``` (or `setenv PAYARA /usr/local/payara6` if you are using a `csh`-like shell) @@ -91,7 +91,13 @@ export PAYARA=/usr/local/payara5 -XX:+UnlockDiagnosticVMOptions ``` (You can also save this as a patch file and try to apply it.) - +TODO: For the combined 6.3 release note, I would consider replacing the patch format above with just the 4 specific options, for clarity etc+. (L.A.) As in: +``` + --add-opens=java.management/javax.management=ALL-UNNAMED + --add-opens=java.management/javax.management.openmbean=ALL-UNNAMED + [17|]--add-opens=java.base/java.io=ALL-UNNAMED + [21|]--add-opens=java.base/jdk.internal.misc=ALL-UNNAMED +``` 7\. Start Payara From 7b7f2e398a1f663de199eb11cb4af3c5929d7523 Mon Sep 17 00:00:00 2001 From: landreev Date: Thu, 27 Jun 2024 10:57:10 -0400 Subject: [PATCH 83/85] Update 10494-payara-upgrade.md --- doc/release-notes/10494-payara-upgrade.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/release-notes/10494-payara-upgrade.md b/doc/release-notes/10494-payara-upgrade.md index b1a6c6fda8e..23ee0e698f7 100644 --- a/doc/release-notes/10494-payara-upgrade.md +++ b/doc/release-notes/10494-payara-upgrade.md @@ -91,7 +91,8 @@ export PAYARA=/usr/local/payara6 -XX:+UnlockDiagnosticVMOptions ``` (You can also save this as a patch file and try to apply it.) -TODO: For the combined 6.3 release note, I would consider replacing the patch format above with just the 4 specific options, for clarity etc+. (L.A.) As in: + +TODO: For the combined 6.3 release note, I would consider replacing the patch format above with just the 4 specific options, for clarity etc. (L.A.) As in: ``` --add-opens=java.management/javax.management=ALL-UNNAMED --add-opens=java.management/javax.management.openmbean=ALL-UNNAMED From 9985378dbebe0d4d76ea6b85f3501ccdef27fa12 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 28 Jun 2024 11:20:15 -0400 Subject: [PATCH 84/85] IQSS/10137-2 Add flag to remove Return To Author Reason (#10655) * drop COMMIT_WITHIN which breaks autoSoftCommit by maxTime in solrconfig * add disable-return-to-author-reason feature flag * release note, doc updates * missed use --- .../10137-2-add-disable-reason-flag.md | 6 ++++ doc/sphinx-guides/source/api/native-api.rst | 4 +-- .../source/installation/config.rst | 8 ++++-- .../harvard/iq/dataverse/api/Datasets.java | 4 ++- .../impl/ReturnDatasetToAuthorCommand.java | 3 +- .../iq/dataverse/settings/FeatureFlags.java | 10 +++++++ src/main/java/propertyFiles/Bundle.properties | 3 +- src/main/webapp/dataset.xhtml | 26 +++++++++-------- .../ReturnDatasetToAuthorCommandTest.java | 28 ++++++++++++++++++- 9 files changed, 73 insertions(+), 19 deletions(-) create mode 100644 doc/release-notes/10137-2-add-disable-reason-flag.md diff --git a/doc/release-notes/10137-2-add-disable-reason-flag.md b/doc/release-notes/10137-2-add-disable-reason-flag.md new file mode 100644 index 00000000000..ee5257466ee --- /dev/null +++ b/doc/release-notes/10137-2-add-disable-reason-flag.md @@ -0,0 +1,6 @@ +## Release Highlights + +### Feature flag to remove the required "reason" field in the "Return To Author" dialog + +A reason field, that is required to not be empty, was added in v6.2. Installations that handle author communications through email or another system may prefer to not be required to use this new field. v6.2 includes a new +disable-return-to-author-reason feature flag that can be enabled to drop the reason field from the dialog and make sending a reason optional in the api/datasets/{id}/returnToAuthor call. diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 894f84d2aac..2d0dc714132 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -2231,7 +2231,7 @@ The people who need to review the dataset (often curators or journal editors) ca Return a Dataset to Author ~~~~~~~~~~~~~~~~~~~~~~~~~~ -After the curators or journal editors have reviewed a dataset that has been submitted for review (see "Submit for Review", above) they can either choose to publish the dataset (see the ``:publish`` "action" above) or return the dataset to its authors. In the web interface there is a "Return to Author" button (see :doc:`/user/dataset-management`), but the interface does not provide a way to explain **why** the dataset is being returned. There is a way to do this outside of this interface, however. Instead of clicking the "Return to Author" button in the UI, a curator can write a "reason for return" into the database via API. +After the curators or journal editors have reviewed a dataset that has been submitted for review (see "Submit for Review", above) they can either choose to publish the dataset (see the ``:publish`` "action" above) or return the dataset to its authors. In the web interface there is a "Return to Author" button (see :doc:`/user/dataset-management`). The same operation can be done via this API call. Here's how curators can send a "reason for return" to the dataset authors. First, the curator creates a JSON file that contains the reason for return: @@ -2254,7 +2254,7 @@ The fully expanded example above (without environment variables) looks like this curl -H "X-Dataverse-key: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/datasets/:persistentId/returnToAuthor?persistentId=doi:10.5072/FK2/J8SJZB" -H "Content-type: application/json" -d @reason-for-return.json The review process can sometimes resemble a tennis match, with the authors submitting and resubmitting the dataset over and over until the curators are satisfied. Each time the curators send a "reason for return" via API, that reason is sent by email and is persisted into the database, stored at the dataset version level. -The reason is required, please note that you can still type a creative and meaningful comment such as "The author would like to modify his dataset", "Files are missing", "Nothing to report" or "A curation report with comments and suggestions/instructions will follow in another email" that suits your situation. +Note the reason is required, unless the `disable-return-to-author-reason` feature flag has been set (see :ref:`feature-flags`). Reason is a free text field and could be as simple as "The author would like to modify his dataset", "Files are missing", "Nothing to report" or "A curation report with comments and suggestions/instructions will follow in another email" that suits your situation. The :ref:`send-feedback` API call may be useful as a way to move the conversation to email. However, note that these emails go to contacts (versus authors) and there is no database record of the email contents. (:ref:`dataverse.mail.cc-support-on-contact-email` will send a copy of these emails to the support email address which would provide a record.) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index f6b6b5bb968..d06a396c190 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -3277,9 +3277,13 @@ please find all known feature flags below. Any of these flags can be activated u * - reduce-solr-deletes - Avoids deleting and recreating solr documents for dataset files when reindexing. - ``Off`` - * - index-harvested-metadata-source - - If enabled, this will index the name of the Harvesting Client as the "Metadata Source" of harvested datasets and files; so that the Metadata Source facet on the collection page will be showing separate entries for the content harvested from different sources/via different clients, instead of the current, default behavior where there is one "Harvested" facet for all such content. Requires a reindex. + * - reduce-solr-deletes + - Avoids deleting and recreating solr documents for dataset files when reindexing. - ``Off`` + * - disable-return-to-author-reason + - Removes the reason field in the `Publish/Return To Author` dialog that was added as a required field in v6.2 and makes the reason an optional parameter in the :ref:`return-a-dataset` API call. + - ``Off`` + **Note:** Feature flags can be set via any `supported MicroProfile Config API source`_, e.g. the environment variable ``DATAVERSE_FEATURE_XXX`` (e.g. ``DATAVERSE_FEATURE_API_SESSION_AUTH=1``). These environment variables can be set in your shell before starting Payara. If you are using :doc:`Docker for development `, you can set them in the `docker compose `_ file. diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 78e551780dc..0e0ebaaf54f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -45,6 +45,7 @@ import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.search.IndexServiceBean; +import edu.harvard.iq.dataverse.settings.FeatureFlags; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit; @@ -2478,7 +2479,8 @@ public Response returnToAuthor(@Context ContainerRequestContext crc, @PathParam( Dataset dataset = findDatasetOrDie(idSupplied); String reasonForReturn = null; reasonForReturn = json.getString("reasonForReturn"); - if (reasonForReturn == null || reasonForReturn.isEmpty()) { + if ((reasonForReturn == null || reasonForReturn.isEmpty()) + && !FeatureFlags.DISABLE_RETURN_TO_AUTHOR_REASON.enabled()) { return error(Response.Status.BAD_REQUEST, BundleUtil.getStringFromBundle("dataset.reject.datasetNotInReview")); } AuthenticatedUser authenticatedUser = getRequestAuthenticatedUserOrDie(crc); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java index f3b33f82524..8d8fddeda6b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java @@ -11,6 +11,7 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; +import edu.harvard.iq.dataverse.settings.FeatureFlags; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.workflows.WorkflowComment; import java.io.IOException; @@ -26,7 +27,7 @@ public class ReturnDatasetToAuthorCommand extends AbstractDatasetCommand + +

    - #{bundle['dataset.rejectMessage']} + #{bundle['dataset.rejectMessage']} #{disableReasonField ? '':bundle['dataset.rejectMessageReason']}

    - - -

    - -

    - - + + + +

    + +

    + + +
    getUsersWithPermissionOn(Permission permission, D } */ @Test + @JvmSetting(key = JvmSettings.FEATURE_FLAG, value = "false", varArgs = "disable-return-to-author-reason") void testDatasetNull() { assertThrows(IllegalArgumentException.class, () -> new ReturnDatasetToAuthorCommand(dataverseRequest, null, "")); @@ -179,6 +183,7 @@ public void testNotInReviewDataset() { } @Test + @JvmSetting(key = JvmSettings.FEATURE_FLAG, value = "false", varArgs = "disable-return-to-author-reason") public void testEmptyOrNullComment(){ dataset.getLatestVersion().setVersionState(DatasetVersion.VersionState.DRAFT); Dataset updatedDataset = null; @@ -198,6 +203,27 @@ public void testEmptyOrNullComment(){ } assertEquals(expected, actual); } + + /** Test the disable reason flag + * @throws Exception when the test is in error. + */ + @Test + @JvmSetting(key = JvmSettings.FEATURE_FLAG, value = "true", varArgs = "disable-return-to-author-reason") + public void testEmptyOrNullCommentWhenDisabled() throws Exception { + dataset.getLatestVersion().setVersionState(DatasetVersion.VersionState.DRAFT); + Dataset updatedDataset = null; + + testEngine.submit(new AddLockCommand(dataverseRequest, dataset, + new DatasetLock(DatasetLock.Reason.InReview, dataverseRequest.getAuthenticatedUser()))); + + updatedDataset = testEngine.submit(new ReturnDatasetToAuthorCommand(dataverseRequest, dataset, null)); + assertNotNull(updatedDataset); + testEngine.submit(new AddLockCommand(dataverseRequest, dataset, + new DatasetLock(DatasetLock.Reason.InReview, dataverseRequest.getAuthenticatedUser()))); + updatedDataset = testEngine.submit(new ReturnDatasetToAuthorCommand(dataverseRequest, dataset, "")); + assertNotNull(updatedDataset); + } + @Test public void testAllGood() { From 1026a3ab343cdf8f2d23500d89a227d0b61e70b2 Mon Sep 17 00:00:00 2001 From: Steven Ferey Date: Mon, 1 Jul 2024 17:07:52 +0200 Subject: [PATCH 85/85] remove duplicate properties keys (#9718) * remove duplicate properties keys * Change of label for the advanced.search.datasets.persistentId.tip key * initial value for advanced.search.datasets.persistentId.tip key * Update Bundle.properties Assuming the change is OK with the submitter so we can move this forward --------- Co-authored-by: qqmyers --- src/main/java/propertyFiles/Bundle.properties | 20 ++----------------- .../java/propertyFiles/biomedical.properties | 1 - .../java/propertyFiles/customGSD.properties | 2 -- 3 files changed, 2 insertions(+), 21 deletions(-) diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index c0705e70b1a..0325a47f626 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -960,18 +960,16 @@ advanced.search.header.datasets=Datasets advanced.search.header.files=Files advanced.search.files.name.tip=The name given to identify the file. advanced.search.files.description.tip=A summary describing the file and its variables. -advanced.search.files.persistentId.tip=The persistent identifier for the file. advanced.search.files.persistentId=Data File Persistent ID -advanced.search.files.persistentId.tip=The unique persistent identifier for a data file, which can be a Handle or DOI in Dataverse. +advanced.search.files.persistentId.tip=The unique persistent identifier for the file. advanced.search.files.fileType=File Type advanced.search.files.fileType.tip=The file type, e.g. Comma Separated Values, Plain Text, R, etc. advanced.search.files.variableName=Variable Name advanced.search.files.variableName.tip=The name of the variable's column in the data frame. advanced.search.files.variableLabel=Variable Label advanced.search.files.variableLabel.tip=A short description of the variable. -advanced.search.datasets.persistentId.tip=The persistent identifier for the Dataset. advanced.search.datasets.persistentId=Persistent Identifier -advanced.search.datasets.persistentId.tip=The Dataset's unique persistent identifier, either a DOI or Handle +advanced.search.datasets.persistentId.tip=The persistent identifier for the Dataset. advanced.search.files.fileTags=File Tags advanced.search.files.fileTags.tip=Terms such "Documentation", "Data", or "Code" that have been applied to files. @@ -2341,20 +2339,6 @@ citationFrame.banner.closeIcon=Close this message, go to dataset citationFrame.banner.countdownMessage= This message will close in citationFrame.banner.countdownMessage.seconds=seconds -# Friendly AuthenticationProvider names -authenticationProvider.name.builtin=Dataverse -authenticationProvider.name.null=(provider is unknown) -authenticationProvider.name.github=GitHub -authenticationProvider.name.google=Google -authenticationProvider.name.orcid=ORCiD -authenticationProvider.name.orcid-sandbox=ORCiD Sandbox -authenticationProvider.name.shib=Shibboleth -ingest.csv.invalidHeader=Invalid header row. One of the cells is empty. -ingest.csv.lineMismatch=Mismatch between line counts in first and final passes!, {0} found on first pass, but {1} found on second. -ingest.csv.recordMismatch=Reading mismatch, line {0} of the Data file: {1} delimited values expected, {2} found. -ingest.csv.nullStream=Stream can't be null. -citationFrame.banner.countdownMessage.seconds=seconds - #file-edit-popup-fragment.xhtml #editFilesFragment.xhtml dataset.access.accessHeader=Restrict Access dataset.access.accessHeader.invalid.state=Define Data Access diff --git a/src/main/java/propertyFiles/biomedical.properties b/src/main/java/propertyFiles/biomedical.properties index 1bffed2ee03..7392ba823c4 100644 --- a/src/main/java/propertyFiles/biomedical.properties +++ b/src/main/java/propertyFiles/biomedical.properties @@ -96,7 +96,6 @@ controlledvocabulary.studyAssayMeasurementType.targeted_sequencing=targeted sequ controlledvocabulary.studyAssayMeasurementType.transcription_factor_binding_(chip-seq)=transcription factor binding (ChIP-Seq) controlledvocabulary.studyAssayMeasurementType.transcription_factor_binding_site_identification=transcription factor binding site identification controlledvocabulary.studyAssayMeasurementType.transcription_profiling=transcription profiling -controlledvocabulary.studyAssayMeasurementType.transcription_profiling=transcription profiling controlledvocabulary.studyAssayMeasurementType.transcription_profiling_(microarray)=transcription profiling (Microarray) controlledvocabulary.studyAssayMeasurementType.transcription_profiling_(rna-seq)=transcription profiling (RNA-Seq) controlledvocabulary.studyAssayMeasurementType.trap_translational_profiling=TRAP translational profiling diff --git a/src/main/java/propertyFiles/customGSD.properties b/src/main/java/propertyFiles/customGSD.properties index 40dc0328053..2375596fe2f 100644 --- a/src/main/java/propertyFiles/customGSD.properties +++ b/src/main/java/propertyFiles/customGSD.properties @@ -161,7 +161,6 @@ controlledvocabulary.gsdFacultyName.mcloskey,_karen=MCloskey, Karen controlledvocabulary.gsdFacultyName.mehrotra,_rahul=Mehrotra, Rahul controlledvocabulary.gsdFacultyName.menchaca,_alejandra=Menchaca, Alejandra controlledvocabulary.gsdFacultyName.menges,_achim=Menges, Achim -controlledvocabulary.gsdFacultyName.menges,_achim=Menges, Achim controlledvocabulary.gsdFacultyName.michalatos,_panagiotis=Michalatos, Panagiotis controlledvocabulary.gsdFacultyName.moe,_kiel=Moe, Kiel controlledvocabulary.gsdFacultyName.molinsky,_jennifer=Molinsky, Jennifer @@ -507,7 +506,6 @@ controlledvocabulary.gsdCourseName.06323:_brownfields_practicum=06323: Brownfiel controlledvocabulary.gsdCourseName.06333:_aquatic_ecology=06333: Aquatic Ecology controlledvocabulary.gsdCourseName.06335:_phytotechnologies=06335: Phytotechnologies controlledvocabulary.gsdCourseName.06337:_changing_natural_and_built_coastal_environments=06337: Changing Natural and Built Coastal Environments -controlledvocabulary.gsdCourseName.06337:_changing_natural_and_built_coastal_environments=06337: Changing Natural and Built Coastal Environments controlledvocabulary.gsdCourseName.06338:_introduction_to_computational_design=06338: Introduction to Computational Design controlledvocabulary.gsdCourseName.06436:_expanded_mechanisms_/_empirical_materialisms=06436: Expanded Mechanisms / Empirical Materialisms controlledvocabulary.gsdCourseName.06450:_high_performance_buildings_and_systems_integration=06450: High Performance Buildings and Systems Integration