From 5df4720277928789f14f94faa2ca4b108a23082d Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 3 Jul 2024 14:36:15 -0400 Subject: [PATCH 01/84] avoid making non-managed fields hidden --- .../iq/dataverse/DatasetFieldServiceBean.java | 13 +++++++++++++ .../edu/harvard/iq/dataverse/SettingsWrapper.java | 12 ++++++++++++ src/main/webapp/metadataFragment.xhtml | 7 +++++-- 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java index 34595728fa7..317b6347c00 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java @@ -84,6 +84,9 @@ public class DatasetFieldServiceBean implements java.io.Serializable { //Note that for primitive fields, the prent and term-uri-field are the same and these maps have the same entry Map cvocMapByTermUri = null; + //Flat list of cvoc managed fields + Set managedFieldSet = null; + //The hash of the existing CVocConf setting. Used to determine when the setting has changed and it needs to be re-parsed to recreate the cvocMaps String oldHash = null; @@ -282,6 +285,10 @@ public Map getCVocConf(boolean byTermUriField){ String cvocSetting = settingsService.getValueForKey(SettingsServiceBean.Key.CVocConf); if (cvocSetting == null || cvocSetting.isEmpty()) { oldHash=null; + //Release old maps + cvocMap=null; + cvocMapByTermUri=null; + managedFieldSet = null; return new HashMap<>(); } String newHash = DigestUtils.md5Hex(cvocSetting); @@ -291,6 +298,7 @@ public Map getCVocConf(boolean byTermUriField){ oldHash=newHash; cvocMap=new HashMap<>(); cvocMapByTermUri=new HashMap<>(); + managedFieldSet = new HashSet<>(); try (JsonReader jsonReader = Json.createReader(new StringReader(settingsService.getValueForKey(SettingsServiceBean.Key.CVocConf)))) { JsonArray cvocConfJsonArray = jsonReader.readArray(); @@ -331,6 +339,7 @@ public Map getCVocConf(boolean byTermUriField){ + managedFields.getString(s)); } else { logger.fine("Found: " + dft.getName()); + managedFieldSet.add(dft.getName()); } } } @@ -342,6 +351,10 @@ public Map getCVocConf(boolean byTermUriField){ return byTermUriField ? cvocMapByTermUri : cvocMap; } + public Set getManagedFieldSet() { + return managedFieldSet; + } + /** * Adds information about the external vocabulary term being used in this DatasetField to the ExternalVocabularyValue table if it doesn't already exist. * @param df - the primitive/parent compound field containing a newly saved value diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java index 48196591b19..6e187ae6cef 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java @@ -98,6 +98,7 @@ public class SettingsWrapper implements java.io.Serializable { //External Vocabulary support private Map cachedCvocMap = null; private Map cachedCvocByTermFieldMap = null; + private Set managedFieldSet; private Long zipDownloadLimit = null; @@ -806,6 +807,17 @@ public Map getCVocConf(boolean byTermField) { } } + public boolean isManagedField(String fieldName) { + + if(managedFieldSet == null) { + managedFieldSet = fieldService.getManagedFieldSet(); + } + if(managedFieldSet == null) { + return false; + } + return managedFieldSet.contains(fieldName); + } + public String getMetricsUrl() { if (metricsUrl == null) { metricsUrl = getValueForKey(SettingsServiceBean.Key.MetricsUrl); diff --git a/src/main/webapp/metadataFragment.xhtml b/src/main/webapp/metadataFragment.xhtml index 43d54f64c43..a2e67fb80c1 100755 --- a/src/main/webapp/metadataFragment.xhtml +++ b/src/main/webapp/metadataFragment.xhtml @@ -171,8 +171,11 @@ rendered="#{(cvocOnDsf or cvocOnCvPart) and not ( cvPart.key.datasetFieldType.name.equals(cvocConf.get(cvPart.key.datasetFieldType.id).getString('term-uri-field')) or cvPart.key.datasetFieldType.name.equals(cvocConf.get(dsf.datasetFieldType.id).getString('term-uri-field')))}" - styleClass="hidden"> - + > + + + + From c4f75cfbd537eee2e9c84d436014f0986b8e5b7b Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 3 Jul 2024 15:48:14 -0400 Subject: [PATCH 02/84] fix/refactor managedField display --- .../iq/dataverse/DatasetFieldServiceBean.java | 16 +++++++++------- .../harvard/iq/dataverse/SettingsWrapper.java | 12 ++++++------ src/main/webapp/metadataFragment.xhtml | 11 ++++------- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java index 317b6347c00..0f9f9b39f1c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java @@ -84,8 +84,8 @@ public class DatasetFieldServiceBean implements java.io.Serializable { //Note that for primitive fields, the prent and term-uri-field are the same and these maps have the same entry Map cvocMapByTermUri = null; - //Flat list of cvoc managed fields - Set managedFieldSet = null; + //Flat list of cvoc term-uri and managed fields by Id + Set cvocFieldSet = null; //The hash of the existing CVocConf setting. Used to determine when the setting has changed and it needs to be re-parsed to recreate the cvocMaps String oldHash = null; @@ -288,7 +288,7 @@ public Map getCVocConf(boolean byTermUriField){ //Release old maps cvocMap=null; cvocMapByTermUri=null; - managedFieldSet = null; + cvocFieldSet = null; return new HashMap<>(); } String newHash = DigestUtils.md5Hex(cvocSetting); @@ -298,7 +298,7 @@ public Map getCVocConf(boolean byTermUriField){ oldHash=newHash; cvocMap=new HashMap<>(); cvocMapByTermUri=new HashMap<>(); - managedFieldSet = new HashSet<>(); + cvocFieldSet = new HashSet<>(); try (JsonReader jsonReader = Json.createReader(new StringReader(settingsService.getValueForKey(SettingsServiceBean.Key.CVocConf)))) { JsonArray cvocConfJsonArray = jsonReader.readArray(); @@ -315,11 +315,13 @@ public Map getCVocConf(boolean byTermUriField){ if (termUriField.equals(dft.getName())) { logger.fine("Found primitive field for term uri : " + dft.getName() + ": " + dft.getId()); cvocMapByTermUri.put(dft.getId(), jo); + cvocFieldSet.add(dft.getId()); } } else { DatasetFieldType childdft = findByNameOpt(jo.getString("term-uri-field")); logger.fine("Found term child field: " + childdft.getName()+ ": " + childdft.getId()); cvocMapByTermUri.put(childdft.getId(), jo); + cvocFieldSet.add(childdft.getId()); if (childdft.getParentDatasetFieldType() != dft) { logger.warning("Term URI field (" + childdft.getDisplayName() + ") not a child of parent: " + dft.getDisplayName()); @@ -339,7 +341,7 @@ public Map getCVocConf(boolean byTermUriField){ + managedFields.getString(s)); } else { logger.fine("Found: " + dft.getName()); - managedFieldSet.add(dft.getName()); + cvocFieldSet.add(dft.getId()); } } } @@ -351,8 +353,8 @@ public Map getCVocConf(boolean byTermUriField){ return byTermUriField ? cvocMapByTermUri : cvocMap; } - public Set getManagedFieldSet() { - return managedFieldSet; + public Set getCvocFieldSet() { + return cvocFieldSet; } /** diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java index 6e187ae6cef..222d2881cd2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java @@ -98,7 +98,7 @@ public class SettingsWrapper implements java.io.Serializable { //External Vocabulary support private Map cachedCvocMap = null; private Map cachedCvocByTermFieldMap = null; - private Set managedFieldSet; + private Set cvocFieldSet; private Long zipDownloadLimit = null; @@ -807,15 +807,15 @@ public Map getCVocConf(boolean byTermField) { } } - public boolean isManagedField(String fieldName) { + public boolean isCvocField(Long fieldId) { - if(managedFieldSet == null) { - managedFieldSet = fieldService.getManagedFieldSet(); + if(cvocFieldSet == null) { + cvocFieldSet = fieldService.getCvocFieldSet(); } - if(managedFieldSet == null) { + if(cvocFieldSet == null) { return false; } - return managedFieldSet.contains(fieldName); + return cvocFieldSet.contains(fieldId); } public String getMetricsUrl() { diff --git a/src/main/webapp/metadataFragment.xhtml b/src/main/webapp/metadataFragment.xhtml index a2e67fb80c1..494940370e1 100755 --- a/src/main/webapp/metadataFragment.xhtml +++ b/src/main/webapp/metadataFragment.xhtml @@ -129,8 +129,8 @@ - - + + - - - - + styleClass="hidden"> + From 512da5cdbe762507d5ab486e89429e1c6c2020ca Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 8 Jul 2024 09:30:59 -0400 Subject: [PATCH 03/84] lower logging for plain text ext cvv case --- .../java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java index 0f9f9b39f1c..5fae2310fb9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java @@ -487,7 +487,8 @@ public JsonObject getExternalVocabularyValue(String termUri) { logger.warning("Problem parsing external vocab value for uri: " + termUri + " : " + e.getMessage()); } } catch (NoResultException nre) { - logger.warning("No external vocab value for uri: " + termUri); + //Could just be a plain text value + logger.fine("No external vocab value for uri: " + termUri); } return null; } From 22b8ca7a43f8f681c7dc0a6a407e319d1e832710 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 11 Jul 2024 15:18:21 -0400 Subject: [PATCH 04/84] add cvoc to managed internal cvv field --- src/main/webapp/metadataFragment.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/metadataFragment.xhtml b/src/main/webapp/metadataFragment.xhtml index 494940370e1..a6d409268df 100755 --- a/src/main/webapp/metadataFragment.xhtml +++ b/src/main/webapp/metadataFragment.xhtml @@ -381,7 +381,7 @@ -
+
From e20463211bbca844611ed5d2745a3389ea9d7303 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 11 Jul 2024 15:18:55 -0400 Subject: [PATCH 05/84] increase priority/specificity to avoid conflict --- src/main/webapp/resources/css/structure.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/resources/css/structure.css b/src/main/webapp/resources/css/structure.css index 64d07038f2f..17a0e69d85d 100644 --- a/src/main/webapp/resources/css/structure.css +++ b/src/main/webapp/resources/css/structure.css @@ -1069,7 +1069,7 @@ progress::-webkit-progress-value { .select2-selection__clear, .select2-selection__rendered { vertical-align: -webkit-baseline-middle; } -.select2-results li { +.select2-results ul > li { font-weight:400; font-size:14px; } From 920898472d799d17d1431415a6bc4bbc3b586f51 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 11 Jul 2024 16:15:22 -0400 Subject: [PATCH 06/84] add/prioritize getting child field in cvocConf --- src/main/webapp/metadataFragment.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/metadataFragment.xhtml b/src/main/webapp/metadataFragment.xhtml index a6d409268df..09704b4e2c7 100755 --- a/src/main/webapp/metadataFragment.xhtml +++ b/src/main/webapp/metadataFragment.xhtml @@ -377,7 +377,7 @@ - + From bb56118a51073ba7c11112c06f2293a40d088c2b Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 12 Jul 2024 15:59:52 -0400 Subject: [PATCH 07/84] only show hidden cvoc fields if term uri field exists --- .../harvard/iq/dataverse/DatasetFieldCompoundValue.java | 9 +++++++++ src/main/webapp/metadataFragment.xhtml | 6 +++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldCompoundValue.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldCompoundValue.java index c679cd7edad..c03baec73af 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldCompoundValue.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldCompoundValue.java @@ -225,6 +225,15 @@ public Pair getLinkComponents() { return linkComponents.get(parentDatasetField.getDatasetFieldType().getName()); } + public boolean hasChildOfType(String name) { + for (DatasetField child : childDatasetFields) { + if (child.getDatasetFieldType().getName().equals(name)) { + return true; + } + } + return false; + } + private Map removeLastComma(Map mapIn) { Iterator> itr = mapIn.entrySet().iterator(); diff --git a/src/main/webapp/metadataFragment.xhtml b/src/main/webapp/metadataFragment.xhtml index 09704b4e2c7..f6aad71f408 100755 --- a/src/main/webapp/metadataFragment.xhtml +++ b/src/main/webapp/metadataFragment.xhtml @@ -126,11 +126,11 @@ - + - + From 95250a2945e220655d9f0fe126699d57e9c19e54 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 12 Jul 2024 16:04:36 -0400 Subject: [PATCH 08/84] typo --- src/main/webapp/metadataFragment.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/metadataFragment.xhtml b/src/main/webapp/metadataFragment.xhtml index f6aad71f408..1d247c4ffd1 100755 --- a/src/main/webapp/metadataFragment.xhtml +++ b/src/main/webapp/metadataFragment.xhtml @@ -129,7 +129,7 @@ - + From 9063739fb2baaa874132f68f93a6bb49e898f00a Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 12 Jul 2024 17:55:48 -0400 Subject: [PATCH 09/84] use getString --- src/main/webapp/metadataFragment.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/metadataFragment.xhtml b/src/main/webapp/metadataFragment.xhtml index 1d247c4ffd1..0da2953a0d0 100755 --- a/src/main/webapp/metadataFragment.xhtml +++ b/src/main/webapp/metadataFragment.xhtml @@ -126,7 +126,7 @@ - + From 4b904cfd503b852b9fa6c2933b0112aa37392c08 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 18 Jul 2024 10:09:25 -0400 Subject: [PATCH 10/84] tweak ror styling --- src/main/webapp/resources/css/structure.css | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/main/webapp/resources/css/structure.css b/src/main/webapp/resources/css/structure.css index 17a0e69d85d..db1e85f7631 100644 --- a/src/main/webapp/resources/css/structure.css +++ b/src/main/webapp/resources/css/structure.css @@ -670,6 +670,16 @@ div.edit-field div.ui-message {margin:6px 0;} .metadata-container pre { white-space: pre-wrap !important; } + +#metadata_author img { + vertical-align: text-top; + margin-right:1px; + margin-left: 3px; +} + +#metadata_author img.ror { + vertical-align: middle; +} /* TRUNCATION */ div.more-block {text-align:center; padding-top:250px; width:100%; position:absolute; bottom:0; background:linear-gradient(180deg,hsla(0,0%,100%,0),#fff 80%);} button.desc-more-link {margin:0; padding:0;} From 1ccf834ebb52447e597b9b830265cb9cbc03ab56 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Sun, 21 Jul 2024 12:28:13 -0400 Subject: [PATCH 11/84] release note --- doc/release-notes/10711-CVoc Updates.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/release-notes/10711-CVoc Updates.md diff --git a/doc/release-notes/10711-CVoc Updates.md b/doc/release-notes/10711-CVoc Updates.md new file mode 100644 index 00000000000..f747bedb049 --- /dev/null +++ b/doc/release-notes/10711-CVoc Updates.md @@ -0,0 +1 @@ +Changes in Dataverse and updates to the ORCID and ROR external vocabulary scripts support deploying these for the citation block author field (and others). From 65ec69f97c74078e4db5d7194e1a5fd6a99f9b50 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 5 Aug 2024 17:17:31 -0400 Subject: [PATCH 12/84] a quick experimental AddReplaceFileHelper implementation of adding Globus-uploaded files #10623 --- .../iq/dataverse/EditDatafilesPage.java | 8 ++- .../dataverse/globus/GlobusServiceBean.java | 51 ++++++++++++++----- 2 files changed, 44 insertions(+), 15 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index 993cb02b66b..99a44058809 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -2121,8 +2121,12 @@ public void handleFileUpload(FileUploadEvent event) throws IOException { } /** - * Using information from the DropBox choose, ingest the chosen files - * https://www.dropbox.com/developers/dropins/chooser/js + * External, aka "Direct" Upload. + * The file(s) have been uploaded to physical storage (such as S3) directly, + * this call is to create and add the DataFiles to the Dataset on the Dataverse + * side. The method does NOT finalize saving the datafiles in the database - + * that will happen when the user clicks 'Save', similar to how the "normal" + * uploads are handled. * * @param event */ diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index fb50214c259..02bc4667ea6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -61,6 +61,9 @@ import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.datasetutility.AddReplaceFileHelper; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.settings.JvmSettings; @@ -70,6 +73,7 @@ import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.util.UrlSignerUtil; import edu.harvard.iq.dataverse.util.json.JsonUtil; +import jakarta.ws.rs.core.Response; @Stateless @Named("GlobusServiceBean") @@ -81,6 +85,8 @@ public class GlobusServiceBean implements java.io.Serializable { protected SettingsServiceBean settingsSvc; @Inject DataverseSession session; + @Inject + DataverseRequestServiceBean dataverseRequestSvc; @EJB protected AuthenticationServiceBean authSvc; @EJB @@ -92,7 +98,13 @@ public class GlobusServiceBean implements java.io.Serializable { @EJB FileDownloadServiceBean fileDownloadService; @EJB - DataFileServiceBean dataFileService; + DataFileServiceBean dataFileSvc; + @EJB + PermissionServiceBean permissionSvc; + @EJB + IngestServiceBean ingestSvc; + @EJB + SystemConfig systemConfig; private static final Logger logger = Logger.getLogger(GlobusServiceBean.class.getCanonicalName()); private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); @@ -764,7 +776,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files"); logger.fine("Size: " + newfilesJsonArray.size()); logger.fine("Val: " + JsonUtil.prettyPrint(newfilesJsonArray.getJsonObject(0))); - JsonArrayBuilder jsonDataSecondAPI = Json.createArrayBuilder(); + JsonArrayBuilder addFilesJsonData = Json.createArrayBuilder(); for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { @@ -790,7 +802,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S path = Json.createPatchBuilder() .add("/mimeType", newfileJsonObject.get(0).getString("mime")).build(); fileJsonObject = path.apply(fileJsonObject); - jsonDataSecondAPI.add(fileJsonObject); + addFilesJsonData.add(fileJsonObject); countSuccess++; // } else { // globusLogger.info(fileName @@ -805,20 +817,32 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S } } - String newjsonData = jsonDataSecondAPI.build().toString(); + String newjsonData = addFilesJsonData.build().toString(); - globusLogger.info("Successfully generated new JsonData for Second API call"); + globusLogger.info("Successfully generated new JsonData for addFiles call"); - String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST " + /*String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST " + httpRequestUrl + "/api/datasets/:persistentId/addFiles?persistentId=doi:" + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; - System.out.println("*******====command ==== " + command); + System.out.println("*******====command ==== " + command);*/ // ToDo - refactor to call AddReplaceFileHelper.addFiles directly instead of // calling API - - String output = addFilesAsync(command, globusLogger); - if (output.equalsIgnoreCase("ok")) { + + // a quick experimental AddReplaceFileHelper implementation: + AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper( + dataverseRequestSvc.getDataverseRequest(), + this.ingestSvc, + this.datasetSvc, + this.dataFileSvc, + this.permissionSvc, + this.commandEngine, + this.systemConfig + ); + + Response addFilesResponse = addFileHelper.addFiles(newjsonData, dataset, authUser); + + if (Response.Status.OK.equals(addFilesResponse.getStatusInfo())) { // if(!taskSkippedFiles) if (countError == 0) { userNotificationService.sendNotification((AuthenticatedUser) authUser, @@ -830,10 +854,10 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(), countSuccess + " files added out of " + countAll, true); } - globusLogger.info("Successfully completed api/datasets/:persistentId/addFiles call "); + globusLogger.info("Successfully completed addFiles call "); } else { globusLogger.log(Level.SEVERE, - "******* Error while executing api/datasets/:persistentId/add call ", command); + "******* Error while executing addFiles ", newjsonData); } } @@ -848,6 +872,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S e.printStackTrace(); globusLogger.info("Exception from globusUpload call " + e.getMessage()); datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); + // } } if (ruleId != null) { @@ -1261,7 +1286,7 @@ public void writeGuestbookAndStartTransfer(GuestbookResponse guestbookResponse, Long fileId = Long.parseLong(idAsString); // If we need to create a GuestBookResponse record, we have to // look up the DataFile object for this file: - df = dataFileService.findCheapAndEasy(fileId); + df = dataFileSvc.findCheapAndEasy(fileId); selectedFiles.add(df); if (!doNotSaveGuestbookResponse) { guestbookResponse.setDataFile(df); From 04951609c20f954573eb1d50eebe8ef08d464ed8 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 6 Aug 2024 16:40:59 -0400 Subject: [PATCH 13/84] no need to try to calculate checksums if this globus storage isn't dataverse-accessible. #10623 --- .../dataverse/globus/GlobusServiceBean.java | 41 +++++++++++-------- .../timer/DataverseTimerServiceBean.java | 4 +- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 02bc4667ea6..d6b56b51fa5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -1160,25 +1160,30 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger) // ToDo: what if the file does not exist in s3 // ToDo: what if checksum calculation failed - do { - try { - StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); - in = dataFileStorageIO.getInputStream(); - checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); - count = 3; - } catch (IOException ioex) { - count = 3; - logger.fine(ioex.getMessage()); - globusLogger.info( - "DataFile (fullPath " + fullPath + ") does not appear to be accessible within Dataverse: "); - } catch (Exception ex) { - count = count + 1; - ex.printStackTrace(); - logger.info(ex.getMessage()); - Thread.sleep(5000); - } + String storageDriverId = DataAccess.getDriverIdAndStorageLocation(fullPath)[0]; - } while (count < 3); + if (StorageIO.isDataverseAccessible(storageDriverId)) { + do { + try { + StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); + in = dataFileStorageIO.getInputStream(); + checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); + count = 3; + } catch (IOException ioex) { + count = 3; + logger.fine(ioex.getMessage()); + globusLogger.info( + "DataFile (fullPath " + fullPath + ") does not appear to be accessible within Dataverse: "); + } catch (Exception ex) { + count = count + 1; + ex.printStackTrace(); + logger.info(ex.getMessage()); + Thread.sleep(5000); + } + + + } while (count < 3); + } if (checksumVal.length() == 0) { checksumVal = "Not available in Dataverse"; diff --git a/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java index 6eb3a8df0bc..a783b211b36 100644 --- a/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java @@ -120,13 +120,13 @@ public void handleTimeout(jakarta.ejb.Timer timer) { } try { - logger.log(Level.INFO,"Handling timeout on " + InetAddress.getLocalHost().getCanonicalHostName()); + logger.log(Level.FINE,"Handling timeout on " + InetAddress.getLocalHost().getCanonicalHostName()); } catch (UnknownHostException ex) { Logger.getLogger(DataverseTimerServiceBean.class.getName()).log(Level.SEVERE, null, ex); } if (timer.getInfo() instanceof MotherTimerInfo) { - logger.info("Behold! I am the Master Timer, king of all timers! I'm here to create all the lesser timers!"); + logger.fine("Behold! I am the Master Timer, king of all timers! I'm here to create all the lesser timers!"); removeHarvestTimers(); for (HarvestingClient client : harvestingClientService.getAllHarvestingClients()) { createHarvestTimer(client); From ba661387bba24e770ae5c0f9cade3b64db7614f3 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 12 Aug 2024 10:43:45 -0400 Subject: [PATCH 14/84] more globus mods (work in progress). #10623 --- .../harvard/iq/dataverse/api/Datasets.java | 7 +- .../dataverse/globus/GlobusServiceBean.java | 485 +++++++++++------- .../{GlobusTask.java => GlobusTaskState.java} | 6 +- .../iq/dataverse/globus/GlobusUtil.java | 33 ++ .../dataverse/ingest/IngestServiceBean.java | 5 +- .../iq/dataverse/settings/FeatureFlags.java | 5 + .../iq/dataverse/util/SystemConfig.java | 9 + 7 files changed, 363 insertions(+), 187 deletions(-) rename src/main/java/edu/harvard/iq/dataverse/globus/{GlobusTask.java => GlobusTaskState.java} (93%) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 4b919c5ed82..b93257bc0c3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -4009,6 +4009,7 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, logger.info(" ==== (api addGlobusFilesToDataset) jsonData ====== " + jsonData); if (!systemConfig.isHTTPUpload()) { + // @todo why isHTTPUpload()? - shouldn't it be checking isGlobusUpload() here? return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); } @@ -4075,7 +4076,11 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, String requestUrl = SystemConfig.getDataverseSiteUrlStatic(); // Async Call - globusService.globusUpload(jsonObject, token, dataset, requestUrl, authUser); + try { + globusService.globusUpload(jsonObject, token, dataset, requestUrl, authUser); + } catch (IllegalArgumentException ex) { + return badRequest("Invalid parameters: "+ex.getMessage()); + } return ok("Async call to Globus Upload started "); diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index d6b56b51fa5..eb1eb47611a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -22,7 +22,6 @@ import jakarta.json.JsonString; import jakarta.json.JsonValue.ValueType; import jakarta.json.stream.JsonParsingException; -import jakarta.servlet.http.HttpServletRequest; import jakarta.ws.rs.HttpMethod; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; @@ -33,7 +32,6 @@ import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; -import java.net.URLEncoder; import java.sql.Timestamp; import java.text.SimpleDateFormat; import java.time.Duration; @@ -62,10 +60,10 @@ import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.datasetutility.AddReplaceFileHelper; -import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; +import edu.harvard.iq.dataverse.settings.FeatureFlags; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; @@ -73,6 +71,8 @@ import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.util.UrlSignerUtil; import edu.harvard.iq.dataverse.util.json.JsonUtil; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; import jakarta.ws.rs.core.Response; @Stateless @@ -105,6 +105,8 @@ public class GlobusServiceBean implements java.io.Serializable { IngestServiceBean ingestSvc; @EJB SystemConfig systemConfig; + @PersistenceContext(unitName = "VDCNet-ejbPU") + private EntityManager em; private static final Logger logger = Logger.getLogger(GlobusServiceBean.class.getCanonicalName()); private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); @@ -391,19 +393,33 @@ private void monitorTemporaryPermissions(String ruleId, long datasetId) { * @return * @throws MalformedURLException */ - public GlobusTask getTask(String accessToken, String taskId, Logger globusLogger) throws MalformedURLException { + public GlobusTaskState getTask(String accessToken, String taskId, Logger globusLogger) { - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId); + Logger myLogger = globusLogger != null ? globusLogger : logger; + + URL url; + try { + url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId); + } catch (MalformedURLException mue) { + myLogger.warning("Malformed URL exception when trying to contact Globus. Globus API url: " + + "https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + + taskId); + return null; + } MakeRequestResponse result = makeRequest(url, "Bearer", accessToken, "GET", null); - GlobusTask task = null; + GlobusTaskState task = null; if (result.status == 200) { - task = parseJson(result.jsonResponse, GlobusTask.class, false); + task = parseJson(result.jsonResponse, GlobusTaskState.class, false); } if (result.status != 200) { - globusLogger.warning("Cannot find information for the task " + taskId + " : Reason : " + // @todo It should probably retry it 2-3 times before giving up; + // similarly, it should probably differentiate between a "no such task" + // response and something intermittent like a server/network error or + // an expired token... i.e. something that's recoverable (?) + myLogger.warning("Cannot find information for the task " + taskId + " : Reason : " + result.jsonResponse.toString()); } @@ -646,11 +662,17 @@ private String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken, List< @Asynchronous @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, String httpRequestUrl, - AuthenticatedUser authUser) throws ExecutionException, InterruptedException, MalformedURLException { + AuthenticatedUser authUser) throws IllegalArgumentException, ExecutionException, InterruptedException, MalformedURLException { - Integer countAll = 0; - Integer countSuccess = 0; - Integer countError = 0; + // Before we do anything else, let's do some basic validation of what + // we've been passed: + + JsonArray filesJsonArray = jsonData.getJsonArray("files"); + + if (filesJsonArray == null || filesJsonArray.size() < 1) { + throw new IllegalArgumentException("No valid json entries supplied for the files being uploaded"); + } + String logTimestamp = logFormatter.format(new Date()); Logger globusLogger = Logger.getLogger( "edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusUpload" + logTimestamp); @@ -674,11 +696,13 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S } logger.fine("json: " + JsonUtil.prettyPrint(jsonData)); + + globusLogger.info("Globus upload initiated"); String taskIdentifier = jsonData.getString("taskIdentifier"); GlobusEndpoint endpoint = getGlobusEndpoint(dataset); - GlobusTask task = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger); + GlobusTaskState task = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger); String ruleId = getRuleId(endpoint, task.getOwner_id(), "rw"); logger.fine("Found rule: " + ruleId); if (ruleId != null) { @@ -688,15 +712,43 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S rulesCache.invalidate(ruleId); } } - + // Wait before first check Thread.sleep(5000); + + if (FeatureFlags.GLOBUS_USE_EXPERIMENTAL_ASYNC_FRAMEWORK.enabled()) { + + // Save the task information in the database so that the Globus monitoring + // service can continue checking on its progress. + + GlobusTaskInProgress taskInProgress = new GlobusTaskInProgress(taskIdentifier, GlobusTaskInProgress.TaskType.UPLOAD, dataset, endpoint.getClientToken(), token, new Timestamp(new Date().getTime())); + em.persist(taskInProgress); + + // Save the metadata entries that define the files that are being uploaded + // in the database. These entries will be used once/if the uploads + // completes successfully to add the files to the dataset. + + for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { + ExternalFileUploadInProgress fileUploadRecord = new ExternalFileUploadInProgress(taskIdentifier, fileJsonObject.toString()); + + em.persist(fileUploadRecord); + } + + return; + } + + + // the old implementation that relies on looping continuosly, + // sleeping-then-checking the task status repeatedly: + // globus task status check + // (the method below performs continuous looped checks of the remote + // Globus API, monitoring it for as long as it takes for the task to + // finish one way or another!) task = globusStatusCheck(endpoint, taskIdentifier, globusLogger); + // @todo null check, or make sure it's never null String taskStatus = getTaskStatus(task); - globusLogger.info("Starting a globusUpload "); - if (ruleId != null) { // Transfer is complete, so delete rule deletePermission(ruleId, dataset, globusLogger); @@ -739,138 +791,11 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S } else { try { - // - - List inputList = new ArrayList(); - JsonArray filesJsonArray = jsonData.getJsonArray("files"); - - if (filesJsonArray != null) { - String datasetIdentifier = dataset.getAuthorityForFileStorage() + "/" - + dataset.getIdentifierForFileStorage(); - - for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { - - // storageIdentifier s3://gcs5-bucket1:1781cfeb8a7-748c270a227c from - // externalTool - String storageIdentifier = fileJsonObject.getString("storageIdentifier"); - String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); - String storeId = parts[0]; - // If this is an S3 store, we need to split out the bucket name - String[] bits = parts[1].split(":"); - String bucketName = ""; - if (bits.length > 1) { - bucketName = bits[0]; - } - String fileId = bits[bits.length - 1]; - - // fullpath s3://gcs5-bucket1/10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 - // or globus:///10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 - String fullPath = storeId + "://" + bucketName + "/" + datasetIdentifier + "/" + fileId; - String fileName = fileJsonObject.getString("fileName"); - - inputList.add(fileId + "IDsplit" + fullPath + "IDsplit" + fileName); - } - - // calculateMissingMetadataFields: checksum, mimetype - JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList, globusLogger); - JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files"); - logger.fine("Size: " + newfilesJsonArray.size()); - logger.fine("Val: " + JsonUtil.prettyPrint(newfilesJsonArray.getJsonObject(0))); - JsonArrayBuilder addFilesJsonData = Json.createArrayBuilder(); - - for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { - - countAll++; - String storageIdentifier = fileJsonObject.getString("storageIdentifier"); - String fileName = fileJsonObject.getString("fileName"); - String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); - // If this is an S3 store, we need to split out the bucket name - String[] bits = parts[1].split(":"); - if (bits.length > 1) { - } - String fileId = bits[bits.length - 1]; - - List newfileJsonObject = IntStream.range(0, newfilesJsonArray.size()) - .mapToObj(index -> ((JsonObject) newfilesJsonArray.get(index)).getJsonObject(fileId)) - .filter(Objects::nonNull).collect(Collectors.toList()); - if (newfileJsonObject != null) { - logger.fine("List Size: " + newfileJsonObject.size()); - // if (!newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) { - JsonPatch path = Json.createPatchBuilder() - .add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build(); - fileJsonObject = path.apply(fileJsonObject); - path = Json.createPatchBuilder() - .add("/mimeType", newfileJsonObject.get(0).getString("mime")).build(); - fileJsonObject = path.apply(fileJsonObject); - addFilesJsonData.add(fileJsonObject); - countSuccess++; - // } else { - // globusLogger.info(fileName - // + " will be skipped from adding to dataset by second API due to missing - // values "); - // countError++; - // } - } else { - globusLogger.info(fileName - + " will be skipped from adding to dataset by second API due to missing values "); - countError++; - } - } - - String newjsonData = addFilesJsonData.build().toString(); - - globusLogger.info("Successfully generated new JsonData for addFiles call"); - - /*String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST " - + httpRequestUrl + "/api/datasets/:persistentId/addFiles?persistentId=doi:" - + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; - System.out.println("*******====command ==== " + command);*/ - - // ToDo - refactor to call AddReplaceFileHelper.addFiles directly instead of - // calling API - - // a quick experimental AddReplaceFileHelper implementation: - AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper( - dataverseRequestSvc.getDataverseRequest(), - this.ingestSvc, - this.datasetSvc, - this.dataFileSvc, - this.permissionSvc, - this.commandEngine, - this.systemConfig - ); - - Response addFilesResponse = addFileHelper.addFiles(newjsonData, dataset, authUser); - - if (Response.Status.OK.equals(addFilesResponse.getStatusInfo())) { - // if(!taskSkippedFiles) - if (countError == 0) { - userNotificationService.sendNotification((AuthenticatedUser) authUser, - new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETED, - dataset.getId(), countSuccess + " files added out of " + countAll, true); - } else { - userNotificationService.sendNotification((AuthenticatedUser) authUser, - new Timestamp(new Date().getTime()), - UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(), - countSuccess + " files added out of " + countAll, true); - } - globusLogger.info("Successfully completed addFiles call "); - } else { - globusLogger.log(Level.SEVERE, - "******* Error while executing addFiles ", newjsonData); - } - - } - - globusLogger.info("Files processed: " + countAll.toString()); - globusLogger.info("Files added successfully: " + countSuccess.toString()); - globusLogger.info("Files failures: " + countError.toString()); - globusLogger.info("Finished upload via Globus job."); - + processUploadedFiles(filesJsonArray, dataset, authUser, globusLogger); } catch (Exception e) { - logger.info("Exception from globusUpload call "); + logger.info("Exception from processUploadedFiles call "); e.printStackTrace(); - globusLogger.info("Exception from globusUpload call " + e.getMessage()); + globusLogger.info("Exception from processUploadedFiles call " + e.getMessage()); datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); // } @@ -883,7 +808,164 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S fileHandler.close(); } } + /** + * The code in this method is copy-and-pasted from the previous Borealis + * implemenation + * @todo see if it can be refactored and simplified a bit, the json manipulation + * specifically (?) + * @param filesJsonArray JsonArray containing files metadata entries as passed to /addGlobusFiles + * @param dataset the dataset + * @param authUser the user that should be be performing the addFiles call + * finalizing adding the files to the Dataset. Note that this + * user will need to be obtained from the saved api token, when this + * method is called via the TaskMonitoringService + * @param myLogger the Logger; if null, the main logger of the service bean will be used + * @throws IOException, InterruptedException, ExecutionException @todo may need to throw more exceptions (?) + */ + private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, AuthenticatedUser authUser, Logger myLogger) throws IOException, InterruptedException, ExecutionException { + myLogger = myLogger != null ? myLogger : logger; + + Integer countAll = 0; + Integer countSuccess = 0; + Integer countError = 0; + + List inputList = new ArrayList(); + + String datasetIdentifier = dataset.getAuthorityForFileStorage() + "/" + + dataset.getIdentifierForFileStorage(); + + for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { + + // storageIdentifier s3://gcs5-bucket1:1781cfeb8a7-748c270a227c from + // externalTool + String storageIdentifier = fileJsonObject.getString("storageIdentifier"); + String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); + String storeId = parts[0]; + // If this is an S3 store, we need to split out the bucket name + String[] bits = parts[1].split(":"); + String bucketName = ""; + if (bits.length > 1) { + bucketName = bits[0]; + } + String fileId = bits[bits.length - 1]; + + // fullpath s3://gcs5-bucket1/10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 + // or globus:///10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 + String fullPath = storeId + "://" + bucketName + "/" + datasetIdentifier + "/" + fileId; + String fileName = fileJsonObject.getString("fileName"); + + inputList.add(fileId + "IDsplit" + fullPath + "IDsplit" + fileName); + } + + // calculateMissingMetadataFields: checksum, mimetype + JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList, myLogger); + JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files"); + logger.fine("Size: " + newfilesJsonArray.size()); + logger.fine("Val: " + JsonUtil.prettyPrint(newfilesJsonArray.getJsonObject(0))); + JsonArrayBuilder addFilesJsonData = Json.createArrayBuilder(); + + for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { + + countAll++; + String storageIdentifier = fileJsonObject.getString("storageIdentifier"); + String fileName = fileJsonObject.getString("fileName"); + String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); + // If this is an S3 store, we need to split out the bucket name + String[] bits = parts[1].split(":"); + if (bits.length > 1) { + } + String fileId = bits[bits.length - 1]; + + List newfileJsonObject = IntStream.range(0, newfilesJsonArray.size()) + .mapToObj(index -> ((JsonObject) newfilesJsonArray.get(index)).getJsonObject(fileId)) + .filter(Objects::nonNull).collect(Collectors.toList()); + if (newfileJsonObject != null) { + logger.fine("List Size: " + newfileJsonObject.size()); + // if (!newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) { + JsonPatch path = Json.createPatchBuilder() + .add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build(); + fileJsonObject = path.apply(fileJsonObject); + path = Json.createPatchBuilder() + .add("/mimeType", newfileJsonObject.get(0).getString("mime")).build(); + fileJsonObject = path.apply(fileJsonObject); + addFilesJsonData.add(fileJsonObject); + countSuccess++; + // } else { + // globusLogger.info(fileName + // + " will be skipped from adding to dataset by second API due to missing + // values "); + // countError++; + // } + } else { + myLogger.info(fileName + + " will be skipped from adding to dataset in the final AddReplaceFileHelper.addFiles() call. "); + countError++; + } + } + + String newjsonData = addFilesJsonData.build().toString(); + + myLogger.info("Successfully generated new JsonData for addFiles call"); + + /*String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST " + + httpRequestUrl + "/api/datasets/:persistentId/addFiles?persistentId=doi:" + + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; + System.out.println("*******====command ==== " + command);*/ + // ToDo - refactor to call AddReplaceFileHelper.addFiles directly instead of + // calling API + // a quick experimental AddReplaceFileHelper implementation: + AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper( + dataverseRequestSvc.getDataverseRequest(), + this.ingestSvc, + this.datasetSvc, + this.dataFileSvc, + this.permissionSvc, + this.commandEngine, + this.systemConfig + ); + + Response addFilesResponse = addFileHelper.addFiles(newjsonData, dataset, authUser); + + if (Response.Status.OK.equals(addFilesResponse.getStatusInfo())) { + // if(!taskSkippedFiles) + if (countError == 0) { + userNotificationService.sendNotification((AuthenticatedUser) authUser, + new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETED, + dataset.getId(), countSuccess + " files added out of " + countAll, true); + } else { + userNotificationService.sendNotification((AuthenticatedUser) authUser, + new Timestamp(new Date().getTime()), + UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(), + countSuccess + " files added out of " + countAll, true); + } + myLogger.info("Successfully completed addFiles call "); + } else { + myLogger.log(Level.SEVERE, + "******* Error while executing addFiles ", newjsonData); + } + myLogger.info("Files processed: " + countAll); + myLogger.info("Files added successfully: " + countSuccess); + myLogger.info("Files failures: " + countError); + myLogger.info("Finished upload via Globus job."); + + } + + /** + * I don't think this method is needed at all. (I suspect that it's a remnant + * from the times when *multiple* individual /add calls needed to be performed + * for each file being added. So this was part of a framework that attempted + * to run this calls in parallel, potentially speeding things up (similarly to + * how the checksums are being calculated in parallel for multiple files). + * As of now, this method doesn't do anything "asynchronous" - there is one + * /addFiles call, and the method below will wait for it to complete, via the + * CompletableFuture.get(). (L.A.) + * @param curlCommand + * @param globusLogger + * @return + * @throws ExecutionException + * @throws InterruptedException + */ public String addFilesAsync(String curlCommand, Logger globusLogger) throws ExecutionException, InterruptedException { CompletableFuture addFilesFuture = CompletableFuture.supplyAsync(() -> { @@ -983,7 +1065,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro // If the rules_cache times out, the permission will be deleted. Presumably that // doesn't affect a // globus task status check - GlobusTask task = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger); + GlobusTaskState task = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger); String ruleId = getRuleId(endpoint, task.getOwner_id(), "r"); if (ruleId != null) { logger.fine("Found rule: " + ruleId); @@ -999,6 +1081,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro logger.warning("ruleId not found for taskId: " + taskIdentifier); } task = globusStatusCheck(endpoint, taskIdentifier, globusLogger); + // @todo null check String taskStatus = getTaskStatus(task); // Transfer is done (success or failure) so delete the rule @@ -1033,61 +1116,38 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro Executor executor = Executors.newFixedThreadPool(10); - private GlobusTask globusStatusCheck(GlobusEndpoint endpoint, String taskId, Logger globusLogger) + private GlobusTaskState globusStatusCheck(GlobusEndpoint endpoint, String taskId, Logger globusLogger) throws MalformedURLException { - boolean taskCompletion = false; - String status = ""; - GlobusTask task = null; + boolean taskCompleted = false; + GlobusTaskState task = null; int pollingInterval = SystemConfig.getIntLimitFromStringOrDefault( settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusPollingInterval), 50); do { try { globusLogger.info("checking globus transfer task " + taskId); Thread.sleep(pollingInterval * 1000); + // Call the (centralized) Globus API to check on the task state/status: task = getTask(endpoint.getClientToken(), taskId, globusLogger); - if (task != null) { - status = task.getStatus(); - if (status != null) { - // The task is in progress. - if (status.equalsIgnoreCase("ACTIVE")) { - if (task.getNice_status().equalsIgnoreCase("ok") - || task.getNice_status().equalsIgnoreCase("queued")) { - taskCompletion = false; - } else { - taskCompletion = true; - // status = "FAILED" + "#" + task.getNice_status() + "#" + - // task.getNice_status_short_description(); - } - } else { - // The task is either succeeded, failed or inactive. - taskCompletion = true; - // status = status + "#" + task.getNice_status() + "#" + - // task.getNice_status_short_description(); - } - } else { - // status = "FAILED"; - taskCompletion = true; - } - } else { - // status = "FAILED"; - taskCompletion = true; - } + taskCompleted = GlobusUtil.isTaskCompleted(task); } catch (Exception ex) { ex.printStackTrace(); } - } while (!taskCompletion); + } while (!taskCompleted); globusLogger.info("globus transfer task completed successfully"); return task; } - - private String getTaskStatus(GlobusTask task) { + + private String getTaskStatus(GlobusTaskState task) { String status = null; if (task != null) { status = task.getStatus(); if (status != null) { // The task is in progress but is not ok or queued + // (L.A.) I think the assumption here is that this method is called + // exclusively on tasks that have already completed. So that's why + // it is safe to assume that "ACTIVE" means "FAILED". if (status.equalsIgnoreCase("ACTIVE")) { status = "FAILED" + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); } else { @@ -1158,7 +1218,16 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger) String fileName = id.split("IDsplit")[2]; // ToDo: what if the file does not exist in s3 + // (L.A.) - good question. maybe it should call .open and .exists() here? + // otherwise, there doesn't seem to be any diagnostics as to which + // files uploaded successfully and which failed (?) + // ... however, any partially successful upload cases should be + // properly handled later, during the .addFiles() call - only + // the files that actually exists in storage remotely will be + // added to the dataset permanently then. // ToDo: what if checksum calculation failed + // (L.A.) - this appears to have been addressed - by using "Not available in Dataverse" + // in place of a checksum. String storageDriverId = DataAccess.getDriverIdAndStorageLocation(fullPath)[0]; @@ -1180,8 +1249,6 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger) logger.info(ex.getMessage()); Thread.sleep(5000); } - - } while (count < 3); } @@ -1311,5 +1378,57 @@ public void writeGuestbookAndStartTransfer(GuestbookResponse guestbookResponse, } } } + + public List findAllOngoingTasks() { + return em.createQuery("select object(o) from GlobusTaskInProgress as o order by o.startTime", GlobusTaskInProgress.class).getResultList(); + } + + public void deleteTask(GlobusTaskInProgress task) { + GlobusTaskInProgress mergedTask = em.merge(task); + em.remove(mergedTask); + } + + public List findExternalUploadsByTaskId(String taskId) { + return em.createNamedQuery("ExternalFileUploadInProgress.findByTaskId").setParameter("taskId", taskId).getResultList(); + } + + // @todo this may or may not need to be async (?) + public void addFilesOnSuccess(GlobusTaskInProgress globusTask) { + List fileUploadsInProgress = findExternalUploadsByTaskId(globusTask.getTaskId()); + + if (fileUploadsInProgress == null || fileUploadsInProgress.size() < 1) { + // @todo log error message; do nothing + return; + } + Dataset dataset = globusTask.getDataset(); + AuthenticatedUser authUser = authSvc.lookupUser(globusTask.getApiToken()); + if (authUser == null) { + // @todo log error message; do nothing + return; + } + + JsonArrayBuilder filesJsonArrayBuilder = Json.createArrayBuilder(); + + for (ExternalFileUploadInProgress pendingFile : fileUploadsInProgress) { + String jsonInfoString = pendingFile.getFileInfo(); + JsonObject fileObject = JsonUtil.getJsonObject(jsonInfoString); + filesJsonArrayBuilder.add(fileObject); + } + + JsonArray filesJsonArray = filesJsonArrayBuilder.build(); + + if (filesJsonArray == null || filesJsonArray.size() < 1) { + // @todo log error message; do nothing + return; + } + + try { + processUploadedFiles(filesJsonArray, dataset, authUser, null); + } catch (Exception ex) { + // @todo log error message; make sure the error notification to the + // has been sent (may or may not have already been sent inside the + // method above). + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTask.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskState.java similarity index 93% rename from src/main/java/edu/harvard/iq/dataverse/globus/GlobusTask.java rename to src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskState.java index c2b01779f4a..b5db20d46c1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTask.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskState.java @@ -1,6 +1,10 @@ package edu.harvard.iq.dataverse.globus; -public class GlobusTask { +/** + * This class is used to store the state of an ongoing Globus task (transfer) + * as reported by the Globus task API. + */ +public class GlobusTaskState { private String DATA_TYPE; private String type; diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java index 92cf8ac7704..67594ad1a5e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java @@ -30,4 +30,37 @@ public static JsonObject getFilesMap(List dataFiles, Dataset d) { } return filesBuilder.build(); } + + public static boolean isTaskCompleted(GlobusTaskState task) { + if (task != null) { + String status = task.getStatus(); + if (status != null) { + if (status.equalsIgnoreCase("ACTIVE")) { + if (task.getNice_status().equalsIgnoreCase("ok") + || task.getNice_status().equalsIgnoreCase("queued")) { + return false; + } + } + } + } + return true; + } + + public static boolean isTaskSucceeded(GlobusTaskState task) { + String status = null; + if (task != null) { + status = task.getStatus(); + if (status != null) { + status = status.toUpperCase(); + if (status.equals("ACTIVE") || status.startsWith("FAILED") || status.startsWith("INACTIVE")) { + // There are cases where a failed task may still be showing + // as "ACTIVE". But it is definitely safe to assume that it + // has not completed *successfully*. + return false; + } + return true; + } + } + return false; + } } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index 9bacafd173f..3f76a319902 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -345,12 +345,13 @@ public List saveAndAddFilesToDataset(DatasetVersion version, StorageIO dataAccess = DataAccess.getStorageIO(dataFile); //Populate metadata dataAccess.open(DataAccessOption.READ_ACCESS); - + // (this will make a remote call to check if the file exists + // and obtain its size) confirmedFileSize = dataAccess.getSize(); // For directly-uploaded files, we will perform the file size // limit and quota checks here. Perform them *again*, in - // some cases: a directly uploaded files have already been + // some cases: files directly uploaded via the UI have already been // checked (for the sake of being able to reject the upload // before the user clicks "save"). But in case of direct // uploads via API, these checks haven't been performed yet, diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java index 021977ff8c6..746e6e3b75d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java @@ -91,6 +91,11 @@ public enum FeatureFlags { * @since Dataverse 6.3 */ DISABLE_RETURN_TO_AUTHOR_REASON("disable-return-to-author-reason"), + /** + * TEMPORARY feature flag for the new Globus upload framework (will only be + * used for testing). + */ + GLOBUS_USE_EXPERIMENTAL_ASYNC_FRAMEWORK("globus-use-experimental-async-framework"), ; final String flag; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index f9801419e47..7417a5db4d4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -82,6 +82,7 @@ public class SystemConfig { private String buildNumber = null; private static final String JVM_TIMER_SERVER_OPTION = "dataverse.timerServer"; + private static final String JVM_GLOBUS_TASK_MONITORING_OPTION = "dataverse.globus.taskMonitoringServer"; private static final long DEFAULT_GUESTBOOK_RESPONSES_DISPLAY_LIMIT = 5000L; private static final long DEFAULT_THUMBNAIL_SIZE_LIMIT_IMAGE = 3000000L; // 3 MB @@ -545,6 +546,14 @@ public boolean isTimerServer() { } return false; } + + public boolean isGlobusTaskMonitoringServer() { + String optionValue = System.getProperty(JVM_GLOBUS_TASK_MONITORING_OPTION); + if ("true".equalsIgnoreCase(optionValue)) { + return true; + } + return false; + } public String getFooterCopyrightAndYear() { return BundleUtil.getStringFromBundle("footer.copyright", Arrays.asList(Year.now().getValue() + "")); From dac53023309a17b6761388203a6040ee7199f382 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 12 Aug 2024 13:14:00 -0400 Subject: [PATCH 15/84] new class files that weren't included in the last commit #10623 --- .../ExternalFileUploadInProgress.java | 131 ++++++++++++ .../globus/GlobusTaskInProgress.java | 188 ++++++++++++++++++ .../globus/TaskMonitoringServiceBean.java | 78 ++++++++ 3 files changed, 397 insertions(+) create mode 100644 src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java diff --git a/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java b/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java new file mode 100644 index 00000000000..dadde64608c --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java @@ -0,0 +1,131 @@ +/* + * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license + * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template + */ +package edu.harvard.iq.dataverse; + +import jakarta.persistence.Column; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.NamedQueries; +import jakarta.persistence.NamedQuery; +import java.io.Serializable; +import javax.persistence.Entity; +import javax.persistence.GeneratedValue; +import javax.persistence.GenerationType; +import javax.persistence.Id; + +/** + * + * @author landreev + * + * The name of the class is provisional. I'm open to better-sounding alternatives, + * if anyone can think of any. + * But I wanted to avoid having the word "Globus" in the entity name. I'm adding + * it specifically for the Globus use case. But I'm guessing there's a chance + * this setup may come in handy for other types of datafile uploads that happen + * externally. (?) + */ +@NamedQueries({ + @NamedQuery( name="ExternalFileUploadInProgress.deleteByTaskId", + query="DELETE FROM ExternalFileUploadInProgress f WHERE f.taskId=:taskId"), + @NamedQuery(name = "ExternalFileUploadInProgress.findByTaskId", + query = "SELECT f FROM ExternalFileUploadInProgress f WHERE f.taskId=:taskId")}) +@Entity +public class ExternalFileUploadInProgress implements Serializable { + + private static final long serialVersionUID = 1L; + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + /** + * Rather than saving various individual fields defining the datafile, + * which would essentially replicate the DataFile table, we are simply + * storing the full json record as passed to the API here. + */ + @Column(columnDefinition = "TEXT", nullable = false) + private String fileInfo; + + /** + * This is Globus-specific task id associated with the upload in progress + */ + @Column(nullable = false) + private String taskId; + + /** + * The Dataset to which the files are being added. + * (@todo may not be necessary? - since the corresponding task is tied to a specific + * dataset already?) + */ + /*@ManyToOne + private Dataset dataset;*/ + + /*public ExternalFileUploadInProgress(String taskId, Dataset dataset, String fileInfo) { + this.taskId = taskId; + this.fileInfo = fileInfo; + this.dataset = dataset; + }*/ + + public ExternalFileUploadInProgress(String taskId, String fileInfo) { + this.taskId = taskId; + this.fileInfo = fileInfo; + } + + public String getFileInfo() { + return fileInfo; + } + + public void setFileInfo(String fileInfo) { + this.fileInfo = fileInfo; + } + + public String getTaskId() { + return taskId; + } + + public void setTaskId(String taskId) { + this.taskId = taskId; + } + + /*public Dataset getDataset() { + return dataset; + } + + public void setDataset(Dataset dataset) { + this.dataset = dataset; + }*/ + + @Override + public int hashCode() { + int hash = 0; + hash += (id != null ? id.hashCode() : 0); + return hash; + } + + @Override + public boolean equals(Object object) { + // TODO: Warning - this method won't work in the case the id fields are not set + if (!(object instanceof ExternalFileUploadInProgress)) { + return false; + } + ExternalFileUploadInProgress other = (ExternalFileUploadInProgress) object; + if ((this.id == null && other.id != null) || (this.id != null && !this.id.equals(other.id))) { + return false; + } + return true; + } + + @Override + public String toString() { + return "edu.harvard.iq.dataverse.ExternalFileUploadInProgress[ id=" + id + " ]"; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java new file mode 100644 index 00000000000..7b12ec0a3ad --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java @@ -0,0 +1,188 @@ +/* + * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license + * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template + */ +package edu.harvard.iq.dataverse.globus; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import jakarta.persistence.Column; +import jakarta.persistence.EnumType; +import jakarta.persistence.Enumerated; +import jakarta.persistence.ManyToOne; +import java.io.Serializable; +import java.sql.Timestamp; +import java.util.Arrays; +import javax.persistence.Entity; +import javax.persistence.GeneratedValue; +import javax.persistence.GenerationType; +import javax.persistence.Id; + +/** + * + * @author landreev + */ +@Entity +public class GlobusTaskInProgress implements Serializable { + + private static final long serialVersionUID = 1L; + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + /** + * Globus-side identifier of the task in progress, upload or download + */ + @Column(nullable = false) + private String taskId; + + GlobusTaskInProgress(String taskIdentifier, TaskType taskType, Dataset dataset, String clientToken, ApiToken token, Timestamp timestamp) { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + + /** + * I was considering giving this enum type a more specific name "TransferType" + * - but maybe there will be another use case where we need to keep track of + * Globus tasks that are not data transfers (?) + */ + public enum TaskType { + + UPLOAD("UPLOAD"), + DOWNLOAD("DOWNLOAD"); + + private final String text; + + private TaskType(final String text) { + this.text = text; + } + + public static TaskType fromString(String text) { + if (text != null) { + for (TaskType taskType : TaskType.values()) { + if (text.equals(taskType.text)) { + return taskType; + } + } + } + throw new IllegalArgumentException("TaskType must be one of these values: " + Arrays.asList(TaskType.values()) + "."); + } + + @Override + public String toString() { + return text; + } + } + + @Column(nullable = false) + @Enumerated(EnumType.STRING) + private TaskType taskType; + + /** + * Globus API token that should be used to monitor the status of the task + */ + @Column(nullable = false) + private String globusToken; + + /** + * This is the Dataverse API token of the user who initiated the Globus task + */ + private String apiToken; + + @ManyToOne + private Dataset dataset; + + @Column( nullable = false ) + private Timestamp startTime; + + + public GlobusTaskInProgress(String taskId, TaskType taskType, Dataset dataset, String clientToken, String apiToken, Timestamp startTime) { + this.taskId = taskId; + this.taskType = taskType; + this.globusToken = clientToken; + this.apiToken = apiToken; + this.dataset = dataset; + this.startTime = startTime; + } + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + public String getTaskId() { + return taskId; + } + + public void setTaskId(String taskId) { + this.taskId = taskId; + } + + public TaskType getTaskType() { + return taskType; + } + + public void setTaskType(TaskType taskType) { + this.taskType = taskType; + } + + public String getGlobusToken() { + return globusToken; + } + + public void setGlobusToken(String clientToken) { + this.globusToken = clientToken; + } + + public String getApiToken() { + return apiToken; + } + + public void setApiToken(String apiToken) { + this.apiToken = apiToken; + } + + public Dataset getDataset() { + return dataset; + } + + public void setDataset(Dataset dataset) { + this.dataset = dataset; + } + + public Timestamp getStartTime() { + return startTime; + } + + public void setStartTime(Timestamp startTime) { + this.startTime = startTime; + } + + @Override + public int hashCode() { + int hash = 0; + hash += (id != null ? id.hashCode() : 0); + return hash; + } + + @Override + public boolean equals(Object object) { + // TODO: Warning - this method won't work in the case the id fields are not set + if (!(object instanceof GlobusTaskInProgress)) { + return false; + } + GlobusTaskInProgress other = (GlobusTaskInProgress) object; + if ((this.id == null && other.id != null) || (this.id != null && !this.id.equals(other.id))) { + return false; + } + return true; + } + + @Override + public String toString() { + return "edu.harvard.iq.dataverse.globus.GlobusTaskInProgress[ id=" + id + " ]"; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java new file mode 100644 index 00000000000..da31ded90db --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java @@ -0,0 +1,78 @@ +/* + * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license + * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template + */ +package edu.harvard.iq.dataverse.globus; + +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.SystemConfig; +import jakarta.annotation.PostConstruct; +import jakarta.annotation.Resource; +import jakarta.ejb.EJB; +import jakarta.ejb.Singleton; +import jakarta.ejb.Startup; +import jakarta.enterprise.concurrent.ManagedScheduledExecutorService; +import java.util.List; +import java.util.concurrent.TimeUnit; + +/** + * + * This Singleton monitors ongoing Globus tasks by checking with the centralized + * Globus API on the status of all the registered ongoing tasks. + * When a successful completion of a task is detected, the service triggers + * the execution of the associated tasks (for example, finalizing adding datafiles + * to the dataset on completion of a remote Globus upload). When a task fails or + * terminates abnormally, a message is logged and the task record is deleted + * from the database. + * + * @author landreev + */ +@Singleton +@Startup +public class TaskMonitoringServiceBean { + @Resource + ManagedScheduledExecutorService scheduler; + + @EJB + SystemConfig systemConfig; + @EJB + SettingsServiceBean settingsSvc; + @EJB + GlobusServiceBean globusService; + + @PostConstruct + public void init() { + if (systemConfig.isGlobusTaskMonitoringServer()) { + int pollingInterval = SystemConfig.getIntLimitFromStringOrDefault( + settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusPollingInterval), 60); + this.scheduler.scheduleAtFixedRate(this::checkOngoingTasks, + 0, pollingInterval, + TimeUnit.SECONDS); + } + } + + /** + * This method will be executed on a timer-like schedule, continuously + * monitoring all the ongoing external Globus tasks (transfers). + * @todo make sure the executions do not overlap/stack up + */ + public void checkOngoingTasks() { + List tasks = globusService.findAllOngoingTasks(); + + tasks.forEach(t -> { + GlobusTaskState retrieved = globusService.getTask(t.getGlobusToken(), t.getTaskId(), null); + if (GlobusUtil.isTaskCompleted(retrieved)) { + if (GlobusUtil.isTaskSucceeded(retrieved)) { + // Do our thing, finalize adding the files to the dataset + globusService.addFilesOnSuccess(t); + } + // Whether it finished successfully, or failed in the process, + // there's no need to keep monitoring this task, so we can + // delete it. + globusService.deleteTask(t); + // @todo double-check that the locks have been properly handled + } + }); + } + +} From 408034125998e5cd40dda2ff27374cab4bdacd21 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 12 Aug 2024 15:21:45 -0400 Subject: [PATCH 16/84] fixing some bad changes that got committed earlier #10623 --- .../ExternalFileUploadInProgress.java | 41 ++++++------------ .../dataverse/globus/GlobusServiceBean.java | 2 +- .../globus/GlobusTaskInProgress.java | 42 ++++++++++--------- 3 files changed, 35 insertions(+), 50 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java b/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java index dadde64608c..ab6a1798307 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java +++ b/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java @@ -5,14 +5,15 @@ package edu.harvard.iq.dataverse; import jakarta.persistence.Column; -import jakarta.persistence.ManyToOne; +import jakarta.persistence.Index; import jakarta.persistence.NamedQueries; import jakarta.persistence.NamedQuery; +import jakarta.persistence.Table; import java.io.Serializable; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; /** * @@ -31,6 +32,7 @@ @NamedQuery(name = "ExternalFileUploadInProgress.findByTaskId", query = "SELECT f FROM ExternalFileUploadInProgress f WHERE f.taskId=:taskId")}) @Entity +@Table(indexes = {@Index(columnList="taskid")}) public class ExternalFileUploadInProgress implements Serializable { private static final long serialVersionUID = 1L; @@ -51,29 +53,18 @@ public void setId(Long id) { * which would essentially replicate the DataFile table, we are simply * storing the full json record as passed to the API here. */ - @Column(columnDefinition = "TEXT", nullable = false) + @Column(columnDefinition = "TEXT", nullable=false) private String fileInfo; /** * This is Globus-specific task id associated with the upload in progress */ - @Column(nullable = false) + @Column(nullable=false) private String taskId; - /** - * The Dataset to which the files are being added. - * (@todo may not be necessary? - since the corresponding task is tied to a specific - * dataset already?) - */ - /*@ManyToOne - private Dataset dataset;*/ - - /*public ExternalFileUploadInProgress(String taskId, Dataset dataset, String fileInfo) { - this.taskId = taskId; - this.fileInfo = fileInfo; - this.dataset = dataset; - }*/ - + public ExternalFileUploadInProgress() { + } + public ExternalFileUploadInProgress(String taskId, String fileInfo) { this.taskId = taskId; this.fileInfo = fileInfo; @@ -95,14 +86,6 @@ public void setTaskId(String taskId) { this.taskId = taskId; } - /*public Dataset getDataset() { - return dataset; - } - - public void setDataset(Dataset dataset) { - this.dataset = dataset; - }*/ - @Override public int hashCode() { int hash = 0; diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index eb1eb47611a..6b78925beb0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -721,7 +721,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S // Save the task information in the database so that the Globus monitoring // service can continue checking on its progress. - GlobusTaskInProgress taskInProgress = new GlobusTaskInProgress(taskIdentifier, GlobusTaskInProgress.TaskType.UPLOAD, dataset, endpoint.getClientToken(), token, new Timestamp(new Date().getTime())); + GlobusTaskInProgress taskInProgress = new GlobusTaskInProgress(taskIdentifier, GlobusTaskInProgress.TaskType.UPLOAD, dataset, endpoint.getClientToken(), token.getTokenString(), new Timestamp(new Date().getTime())); em.persist(taskInProgress); // Save the metadata entries that define the files that are being uploaded diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java index 7b12ec0a3ad..210f08710dc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java @@ -5,24 +5,26 @@ package edu.harvard.iq.dataverse.globus; import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.authorization.users.ApiToken; import jakarta.persistence.Column; import jakarta.persistence.EnumType; import jakarta.persistence.Enumerated; import jakarta.persistence.ManyToOne; +import jakarta.persistence.Table; +import jakarta.persistence.UniqueConstraint; import java.io.Serializable; import java.sql.Timestamp; import java.util.Arrays; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; /** * * @author landreev */ @Entity +@Table(uniqueConstraints = {@UniqueConstraint(columnNames = "taskid")}) public class GlobusTaskInProgress implements Serializable { private static final long serialVersionUID = 1L; @@ -33,13 +35,9 @@ public class GlobusTaskInProgress implements Serializable { /** * Globus-side identifier of the task in progress, upload or download */ - @Column(nullable = false) + @Column(nullable=false, unique = true) private String taskId; - GlobusTaskInProgress(String taskIdentifier, TaskType taskType, Dataset dataset, String clientToken, ApiToken token, Timestamp timestamp) { - throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody - } - /** * I was considering giving this enum type a more specific name "TransferType" * - but maybe there will be another use case where we need to keep track of @@ -73,14 +71,14 @@ public String toString() { } } - @Column(nullable = false) + @Column @Enumerated(EnumType.STRING) private TaskType taskType; /** * Globus API token that should be used to monitor the status of the task */ - @Column(nullable = false) + @Column private String globusToken; /** @@ -91,19 +89,23 @@ public String toString() { @ManyToOne private Dataset dataset; - @Column( nullable = false ) + @Column private Timestamp startTime; - - public GlobusTaskInProgress(String taskId, TaskType taskType, Dataset dataset, String clientToken, String apiToken, Timestamp startTime) { - this.taskId = taskId; - this.taskType = taskType; - this.globusToken = clientToken; - this.apiToken = apiToken; + public GlobusTaskInProgress() { + } + + GlobusTaskInProgress(String taskId, TaskType taskType, Dataset dataset, String globusToken, String apiToken, Timestamp startTime) { + this.taskId = taskId; + this.taskType = taskType; this.dataset = dataset; - this.startTime = startTime; + this.globusToken = globusToken; + this.apiToken = apiToken; + this.startTime = startTime; } + + public Long getId() { return id; } From e086a60aad4a758f3c6c1b1d2797985b7ade380a Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 13 Aug 2024 09:25:35 -0400 Subject: [PATCH 17/84] cleanup #10623 --- .../harvard/iq/dataverse/globus/GlobusServiceBean.java | 9 ++++++++- .../iq/dataverse/globus/TaskMonitoringServiceBean.java | 8 ++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 6b78925beb0..9ab3a2df567 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -966,6 +966,7 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut * @throws ExecutionException * @throws InterruptedException */ + /* public String addFilesAsync(String curlCommand, Logger globusLogger) throws ExecutionException, InterruptedException { CompletableFuture addFilesFuture = CompletableFuture.supplyAsync(() -> { @@ -1018,7 +1019,7 @@ private String addFiles(String curlCommand, Logger globusLogger) { } return status; - } + } */ @Asynchronous public void globusDownload(String jsonData, Dataset dataset, User authUser) throws MalformedURLException { @@ -1431,4 +1432,10 @@ public void addFilesOnSuccess(GlobusTaskInProgress globusTask) { // method above). } } + + public void deleteExternalUploadRecords(String taskId) { + em.createNamedQuery("ExternalFileUploadInProgress.deleteByTaskId") + .setParameter("taskId", taskId) + .executeUpdate(); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java index da31ded90db..bd274d44e38 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java @@ -14,6 +14,7 @@ import jakarta.enterprise.concurrent.ManagedScheduledExecutorService; import java.util.List; import java.util.concurrent.TimeUnit; +import java.util.logging.Logger; /** * @@ -30,6 +31,8 @@ @Singleton @Startup public class TaskMonitoringServiceBean { + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.globus.TaskMonitoringServiceBean"); + @Resource ManagedScheduledExecutorService scheduler; @@ -43,11 +46,14 @@ public class TaskMonitoringServiceBean { @PostConstruct public void init() { if (systemConfig.isGlobusTaskMonitoringServer()) { + logger.info("Starting Globus task monitoring service"); int pollingInterval = SystemConfig.getIntLimitFromStringOrDefault( settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusPollingInterval), 60); this.scheduler.scheduleAtFixedRate(this::checkOngoingTasks, 0, pollingInterval, TimeUnit.SECONDS); + } else { + logger.info("Skipping Globus task monitor initialization"); } } @@ -57,6 +63,7 @@ public void init() { * @todo make sure the executions do not overlap/stack up */ public void checkOngoingTasks() { + logger.info("Performing a scheduled external Globus task check"); List tasks = globusService.findAllOngoingTasks(); tasks.forEach(t -> { @@ -69,6 +76,7 @@ public void checkOngoingTasks() { // Whether it finished successfully, or failed in the process, // there's no need to keep monitoring this task, so we can // delete it. + globusService.deleteExternalUploadRecords(t.getTaskId()); globusService.deleteTask(t); // @todo double-check that the locks have been properly handled } From 35ce7ef1892baa92b36e2350193eec4a63db6237 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 14 Aug 2024 14:37:38 -0400 Subject: [PATCH 18/84] more testing/debugging #10623 --- .../dataverse/globus/GlobusServiceBean.java | 41 +++++++++++++++---- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 9ab3a2df567..3f6d0bf7c68 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -60,6 +60,7 @@ import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.datasetutility.AddReplaceFileHelper; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; @@ -71,9 +72,12 @@ import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.util.UrlSignerUtil; import edu.harvard.iq.dataverse.util.json.JsonUtil; +import jakarta.json.JsonReader; import jakarta.persistence.EntityManager; import jakarta.persistence.PersistenceContext; +import jakarta.servlet.http.HttpServletRequest; import jakarta.ws.rs.core.Response; +import org.apache.http.util.EntityUtils; @Stateless @Named("GlobusServiceBean") @@ -810,7 +814,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S } /** * The code in this method is copy-and-pasted from the previous Borealis - * implemenation + * implemenation. * @todo see if it can be refactored and simplified a bit, the json manipulation * specifically (?) * @param filesJsonArray JsonArray containing files metadata entries as passed to /addGlobusFiles @@ -907,6 +911,11 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut myLogger.info("Successfully generated new JsonData for addFiles call"); + myLogger.info("Files processed: " + countAll); + myLogger.info("Files added successfully: " + countSuccess); + myLogger.info("Files failures: " + countError); + myLogger.info("Finished upload via Globus job."); + /*String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST " + httpRequestUrl + "/api/datasets/:persistentId/addFiles?persistentId=doi:" + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; @@ -914,8 +923,15 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut // ToDo - refactor to call AddReplaceFileHelper.addFiles directly instead of // calling API // a quick experimental AddReplaceFileHelper implementation: + + // Passing null for the HttpServletRequest to make a new DataverseRequest. + // The parent method is executed asynchronously, so the real request + // that was associated with the original API call that triggered this upload + // cannot be obtained. + DataverseRequest dataverseRequest = new DataverseRequest(authUser, (HttpServletRequest)null); + AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper( - dataverseRequestSvc.getDataverseRequest(), + dataverseRequest, this.ingestSvc, this.datasetSvc, this.dataFileSvc, @@ -923,10 +939,18 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut this.commandEngine, this.systemConfig ); + + // The old code had 2 sec. of sleep, so ... + Thread.sleep(2000); Response addFilesResponse = addFileHelper.addFiles(newjsonData, dataset, authUser); - if (Response.Status.OK.equals(addFilesResponse.getStatusInfo())) { + JsonReader jsonReader = Json.createReader(new StringReader((String) addFilesResponse.getEntity().toString())); + JsonObject jsonObject = jsonReader.readObject(); + String addFilesStatus = jsonObject.getString("status"); + String addFilesMessage = jsonObject.getJsonObject("data").getString("message"); + + if ("OK".equalsIgnoreCase(addFilesStatus)) { // if(!taskSkippedFiles) if (countError == 0) { userNotificationService.sendNotification((AuthenticatedUser) authUser, @@ -942,13 +966,14 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut } else { myLogger.log(Level.SEVERE, "******* Error while executing addFiles ", newjsonData); + // @todo send Failure notification + if (addFilesResponse != null) { + myLogger.info("addFilesResponse status: " + addFilesStatus); + myLogger.info("addFilesResponse message" + addFilesMessage); + } } - myLogger.info("Files processed: " + countAll); - myLogger.info("Files added successfully: " + countSuccess); - myLogger.info("Files failures: " + countError); - myLogger.info("Finished upload via Globus job."); - + } /** From 8f00cdb852d8ece974ec8feacbea32d1f938faf1 Mon Sep 17 00:00:00 2001 From: Eva Roddeck Date: Thu, 15 Aug 2024 14:18:25 +0200 Subject: [PATCH 19/84] null check for name array #10343 --- .../java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java | 2 +- .../java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java index f68957ad060..80e32184731 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java @@ -123,7 +123,7 @@ public static JsonObject getPersonOrOrganization(String name, boolean organizati if (!name.replaceFirst(",", "").contains(",")) { // contributorName=, String[] fullName = name.split(", "); - givenName = fullName[1]; + givenName = fullName.length > 1 ? fullName[1] : null; familyName = fullName[0]; } } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java index c3d9fd8fcd3..1f1a2d7af00 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java @@ -89,6 +89,7 @@ public void testName() { verifyIsPerson("kcjim11, kcjim11", "kcjim11", "kcjim11"); verifyIsPerson("Bartholomew 3, James", "James", "Bartholomew 3"); + verifyIsPerson("Smith, ", null, "Smith"); } private void verifyIsOrganization(String fullName) { From b335c36fdabf01dd78897e7c5c60cde4151740cf Mon Sep 17 00:00:00 2001 From: Eva Roddeck Date: Thu, 15 Aug 2024 14:29:04 +0200 Subject: [PATCH 20/84] release notes #10343 --- doc/release-notes/10343-trailing-comma.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 doc/release-notes/10343-trailing-comma.md diff --git a/doc/release-notes/10343-trailing-comma.md b/doc/release-notes/10343-trailing-comma.md new file mode 100644 index 00000000000..03bd18715d7 --- /dev/null +++ b/doc/release-notes/10343-trailing-comma.md @@ -0,0 +1,5 @@ +### Trailing commas in author name now permitted + +When an author name ends on a comma (e.g. "Smith,") a dataset cannot be properly loaded when using json-ld. A null check fixes this. + +For more information, see #10343. \ No newline at end of file From 91377ba38820f722c89aeaae820dc63e6ae76b63 Mon Sep 17 00:00:00 2001 From: Eva Roddeck Date: Fri, 16 Aug 2024 09:47:03 +0200 Subject: [PATCH 21/84] added another test case + normalize String for assertion #10343 --- .../edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java index 1f1a2d7af00..d772ba2b9da 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java @@ -90,6 +90,7 @@ public void testName() { verifyIsPerson("Bartholomew 3, James", "James", "Bartholomew 3"); verifyIsPerson("Smith, ", null, "Smith"); + verifyIsPerson("Smith,", null, "Smith"); } private void verifyIsOrganization(String fullName) { @@ -107,7 +108,7 @@ private void verifyIsPerson(String fullName, String givenName, String familyName private void verifyIsPerson(String fullName, String givenName, String familyName, boolean isPerson) { JsonObject obj = PersonOrOrgUtil.getPersonOrOrganization(fullName, false, isPerson); System.out.println(JsonUtil.prettyPrint(obj)); - assertEquals(obj.getString("fullName"),fullName); + assertEquals(obj.getString("fullName"), StringUtil.normalize(fullName)); assertTrue(obj.getBoolean("isPerson")); assertEquals(obj.containsKey("givenName"), givenName != null); if(obj.containsKey("givenName") && givenName != null) { From d4b9bac8366a2b909c5c32cf5e3f4361d6abc7d5 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 16 Aug 2024 20:21:44 -0400 Subject: [PATCH 22/84] this is a working, but still work-in-progress state of things - needs some cleanup and refinements. #10623 --- .../iq/dataverse/DatasetServiceBean.java | 11 +- .../harvard/iq/dataverse/api/Datasets.java | 2 + .../datasetutility/AddReplaceFileHelper.java | 4 +- .../impl/UpdateDatasetVersionCommand.java | 26 +- .../dataverse/globus/GlobusServiceBean.java | 232 ++++++++++-------- .../globus/GlobusTaskInProgress.java | 13 +- .../globus/TaskMonitoringServiceBean.java | 14 +- 7 files changed, 182 insertions(+), 120 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index dab0ff43fcf..832d7192965 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -412,12 +412,20 @@ public boolean checkDatasetLock(Long datasetId) { List lock = lockCounter.getResultList(); return lock.size()>0; } - + + public List getLocksByDatasetId(Long datasetId) { + TypedQuery locksQuery = em.createNamedQuery("DatasetLock.getLocksByDatasetId", DatasetLock.class); + locksQuery.setParameter("datasetId", datasetId); + return locksQuery.getResultList(); + } + public List getDatasetLocksByUser( AuthenticatedUser user) { return listLocks(null, user); } + // @todo: we'll be better off getting rid of this method and using the other + // version of addDatasetLock() (that uses datasetId instead of Dataset). @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public DatasetLock addDatasetLock(Dataset dataset, DatasetLock lock) { lock.setDataset(dataset); @@ -467,6 +475,7 @@ public DatasetLock addDatasetLock(Long datasetId, DatasetLock.Reason reason, Lon * is {@code aReason}. * @param dataset the dataset whose locks (for {@code aReason}) will be removed. * @param aReason The reason of the locks that will be removed. + * @todo this should probably take dataset_id, not a dataset */ @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public void removeDatasetLocks(Dataset dataset, DatasetLock.Reason aReason) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index b93257bc0c3..4c547f5295f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -4035,6 +4035,8 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, return wr.getResponse(); } + // @todo check if the dataset is already locked! + JsonObject jsonObject = null; try { jsonObject = JsonUtil.getJsonObject(jsonData); diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index 0143fced87c..2b35a4cc783 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -2139,9 +2139,9 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { logger.log(Level.WARNING, "Dataset not locked for EditInProgress "); } else { datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); - logger.log(Level.INFO, "Removed EditInProgress lock "); + logger.log(Level.INFO, "Removed EditInProgress lock "+eipLock.getId()); } - + try { Command cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, clone); ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java index 994f4c7dfb6..faf8884b08d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java @@ -14,6 +14,7 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; +import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.DatasetFieldUtil; import edu.harvard.iq.dataverse.util.FileMetadataUtil; @@ -102,7 +103,10 @@ public Dataset execute(CommandContext ctxt) throws CommandException { } Dataset theDataset = getDataset(); - ctxt.permissions().checkUpdateDatasetVersionLock(theDataset, getRequest(), this); + //ctxt.permissions().checkUpdateDatasetVersionLock(theDataset, getRequest(), this); + // this is an experiment (probably temporary) + checkUpdateDatasetVersionLock(ctxt); + Dataset savedDataset = null; DatasetVersion persistedVersion = clone; @@ -297,5 +301,23 @@ public boolean onSuccess(CommandContext ctxt, Object r) { ctxt.index().asyncIndexDataset((Dataset) r, true); return true; } - + + private void checkUpdateDatasetVersionLock(CommandContext ctxt) throws IllegalCommandException { + List locks = ctxt.datasets().getLocksByDatasetId(getDataset().getId()); + //locks.forEach(lock -> { + for (DatasetLock lock : locks) { + // Ingest lock is ok: + if (DatasetLock.Reason.Ingest != lock.getReason()) { + // with Workflow lock *some* users can edit; + // any other kind of lock - nope + if (DatasetLock.Reason.Workflow != lock.getReason() + || !ctxt.permissions().isMatchingWorkflowLock(getDataset(), + getUser().getIdentifier(), + getRequest().getWFInvocationId())) { + throw new IllegalCommandException( + BundleUtil.getStringFromBundle("dataset.message.locked.editNotAllowed"), this); + } + } + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 3f6d0bf7c68..03134d811a7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -51,6 +51,7 @@ import org.primefaces.PrimeFaces; import com.google.gson.Gson; +import edu.harvard.iq.dataverse.api.ApiConstants; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; @@ -68,6 +69,7 @@ import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; +import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.util.UrlSignerUtil; @@ -148,7 +150,7 @@ private String getRuleId(GlobusEndpoint endpoint, String principal, String permi * @param ruleId - Globus rule id - assumed to be associated with the * dataset's file path (should not be called with a user * specified rule id w/o further checking) - * @param datasetId - the id of the dataset associated with the rule + * @param dataset - the dataset associated with the rule * @param globusLogger - a separate logger instance, may be null */ public void deletePermission(String ruleId, Dataset dataset, Logger globusLogger) { @@ -690,7 +692,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S fileHandlerSuceeded = true; } catch (IOException | SecurityException ex) { Logger.getLogger(DatasetServiceBean.class.getName()).log(Level.SEVERE, null, ex); - return; + return; // @todo ? } if (fileHandlerSuceeded) { @@ -706,8 +708,8 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S String taskIdentifier = jsonData.getString("taskIdentifier"); GlobusEndpoint endpoint = getGlobusEndpoint(dataset); - GlobusTaskState task = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger); - String ruleId = getRuleId(endpoint, task.getOwner_id(), "rw"); + GlobusTaskState taskState = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger); + String ruleId = getRuleId(endpoint, taskState.getOwner_id(), "rw"); logger.fine("Found rule: " + ruleId); if (ruleId != null) { Long datasetId = rulesCache.getIfPresent(ruleId); @@ -725,7 +727,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S // Save the task information in the database so that the Globus monitoring // service can continue checking on its progress. - GlobusTaskInProgress taskInProgress = new GlobusTaskInProgress(taskIdentifier, GlobusTaskInProgress.TaskType.UPLOAD, dataset, endpoint.getClientToken(), token.getTokenString(), new Timestamp(new Date().getTime())); + GlobusTaskInProgress taskInProgress = new GlobusTaskInProgress(taskIdentifier, GlobusTaskInProgress.TaskType.UPLOAD, dataset, endpoint.getClientToken(), token.getTokenString(), ruleId, new Timestamp(new Date().getTime())); em.persist(taskInProgress); // Save the metadata entries that define the files that are being uploaded @@ -746,17 +748,16 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S // sleeping-then-checking the task status repeatedly: // globus task status check - // (the method below performs continuous looped checks of the remote + // (the following method performs continuous looped checks of the remote // Globus API, monitoring it for as long as it takes for the task to // finish one way or another!) - task = globusStatusCheck(endpoint, taskIdentifier, globusLogger); + taskState = globusStatusCheck(endpoint, taskIdentifier, globusLogger); // @todo null check, or make sure it's never null - String taskStatus = getTaskStatus(task); + String taskStatus = getTaskStatus(taskState); if (ruleId != null) { // Transfer is complete, so delete rule deletePermission(ruleId, dataset, globusLogger); - } // If success, switch to an EditInProgress lock - do this before removing the @@ -764,8 +765,17 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S // Keeping a lock through the add datafiles API call avoids a conflicting edit // and keeps any open dataset page refreshing until the datafile appears if (!(taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE"))) { - datasetSvc.addDatasetLock(dataset, - new DatasetLock(DatasetLock.Reason.EditInProgress, authUser, "Completing Globus Upload")); + globusLogger.info("Finished upload via Globus job."); + + DatasetLock editLock = datasetSvc.addDatasetLock(dataset.getId(), + DatasetLock.Reason.EditInProgress, + (authUser).getId(), + "Completing Globus Upload"); + if (editLock != null) { + dataset.addLock(editLock); + } else { + globusLogger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); + } } DatasetLock gLock = dataset.getLockFor(DatasetLock.Reason.GlobusUpload); @@ -785,7 +795,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S */ datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.GlobusUpload); } - + if (taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE")) { String comment = "Reason : " + taskStatus.split("#")[1] + "
Short Description : " + taskStatus.split("#")[2]; @@ -911,10 +921,9 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut myLogger.info("Successfully generated new JsonData for addFiles call"); - myLogger.info("Files processed: " + countAll); - myLogger.info("Files added successfully: " + countSuccess); - myLogger.info("Files failures: " + countError); - myLogger.info("Finished upload via Globus job."); + myLogger.info("Files passed to /addGlobusFiles: " + countAll); + myLogger.info("Files processed successfully: " + countSuccess); + myLogger.info("Files failures to process: " + countError); /*String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST " + httpRequestUrl + "/api/datasets/:persistentId/addFiles?persistentId=doi:" @@ -925,7 +934,7 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut // a quick experimental AddReplaceFileHelper implementation: // Passing null for the HttpServletRequest to make a new DataverseRequest. - // The parent method is executed asynchronously, so the real request + // The parent method is always executed asynchronously, so the real request // that was associated with the original API call that triggered this upload // cannot be obtained. DataverseRequest dataverseRequest = new DataverseRequest(authUser, (HttpServletRequest)null); @@ -945,12 +954,38 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut Response addFilesResponse = addFileHelper.addFiles(newjsonData, dataset, authUser); - JsonReader jsonReader = Json.createReader(new StringReader((String) addFilesResponse.getEntity().toString())); - JsonObject jsonObject = jsonReader.readObject(); - String addFilesStatus = jsonObject.getString("status"); - String addFilesMessage = jsonObject.getJsonObject("data").getString("message"); + if (addFilesResponse == null) { + logger.info("null response from addFiles call"); + //@todo add this case to the user notification in case of error + return; + } + + JsonObject addFilesJsonObject = JsonUtil.getJsonObject(addFilesResponse.getEntity().toString()); - if ("OK".equalsIgnoreCase(addFilesStatus)) { + // @todo null checks etc. + String addFilesStatus = addFilesJsonObject.getString("status", null); + myLogger.info("addFilesResponse status: " + addFilesStatus); + + + if (ApiConstants.STATUS_OK.equalsIgnoreCase(addFilesStatus)) { + if (addFilesJsonObject.containsKey("data")) { + JsonObject responseFilesData = addFilesJsonObject.getJsonObject("data"); + if (responseFilesData.containsKey("Result")) { + JsonObject addFilesResult = responseFilesData.getJsonObject("Result"); + + Integer addFilesTotal = addFilesResult.getInt("Total number of files", -1); + Integer addFilesSuccess = addFilesResult.getInt("Number of files successfully added", -1); + // @todo handle -1 (missing values) above + // @todo log all this stuff in a task-specific log (??) + myLogger.info("Files processed by addFiles: " + addFilesTotal + ", successfully added: " + addFilesSuccess); + // @todo incorporate this into the user notification + } else { + logger.warning("Malformed addFiles data section: "+ responseFilesData.toString()); + } + } else { + logger.warning("Malformed addFiles response json: " + addFilesJsonObject.toString()); + } + // if(!taskSkippedFiles) if (countError == 0) { userNotificationService.sendNotification((AuthenticatedUser) authUser, @@ -963,89 +998,21 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut countSuccess + " files added out of " + countAll, true); } myLogger.info("Successfully completed addFiles call "); + } else if (ApiConstants.STATUS_ERROR.equalsIgnoreCase(addFilesStatus)) { + String addFilesMessage = addFilesJsonObject.getString("message", null); + + myLogger.log(Level.SEVERE, + "******* Error while executing addFiles ", newjsonData); + myLogger.log(Level.SEVERE, "****** Output from addFiles: ", addFilesMessage); + // @todo send Failure notification + } else { myLogger.log(Level.SEVERE, "******* Error while executing addFiles ", newjsonData); // @todo send Failure notification - if (addFilesResponse != null) { - myLogger.info("addFilesResponse status: " + addFilesStatus); - myLogger.info("addFilesResponse message" + addFilesMessage); - } } - - } - /** - * I don't think this method is needed at all. (I suspect that it's a remnant - * from the times when *multiple* individual /add calls needed to be performed - * for each file being added. So this was part of a framework that attempted - * to run this calls in parallel, potentially speeding things up (similarly to - * how the checksums are being calculated in parallel for multiple files). - * As of now, this method doesn't do anything "asynchronous" - there is one - * /addFiles call, and the method below will wait for it to complete, via the - * CompletableFuture.get(). (L.A.) - * @param curlCommand - * @param globusLogger - * @return - * @throws ExecutionException - * @throws InterruptedException - */ - /* - public String addFilesAsync(String curlCommand, Logger globusLogger) - throws ExecutionException, InterruptedException { - CompletableFuture addFilesFuture = CompletableFuture.supplyAsync(() -> { - try { - Thread.sleep(2000); - } catch (InterruptedException e) { - e.printStackTrace(); - } - return (addFiles(curlCommand, globusLogger)); - }, executor).exceptionally(ex -> { - globusLogger.fine("Something went wrong : " + ex.getLocalizedMessage()); - ex.printStackTrace(); - return null; - }); - - String result = addFilesFuture.get(); - - return result; - } - - private String addFiles(String curlCommand, Logger globusLogger) { - ProcessBuilder processBuilder = new ProcessBuilder(); - Process process = null; - String line; - String status = ""; - - try { - globusLogger.info("Call to : " + curlCommand); - processBuilder.command("bash", "-c", curlCommand); - process = processBuilder.start(); - process.waitFor(); - - BufferedReader br = new BufferedReader(new InputStreamReader(process.getInputStream())); - - StringBuilder sb = new StringBuilder(); - while ((line = br.readLine()) != null) - sb.append(line); - globusLogger.info(" API Output : " + sb.toString()); - JsonObject jsonObject = null; - jsonObject = JsonUtil.getJsonObject(sb.toString()); - - status = jsonObject.getString("status"); - } catch (Exception ex) { - if (ex instanceof JsonParsingException) { - globusLogger.log(Level.SEVERE, "Error parsing dataset json."); - } else { - globusLogger.log(Level.SEVERE, - "******* Unexpected Exception while executing api/datasets/:persistentId/add call ", ex); - } - } - - return status; - } */ - @Asynchronous public void globusDownload(String jsonData, Dataset dataset, User authUser) throws MalformedURLException { @@ -1418,22 +1385,75 @@ public List findExternalUploadsByTaskId(String tas return em.createNamedQuery("ExternalFileUploadInProgress.findByTaskId").setParameter("taskId", taskId).getResultList(); } - // @todo this may or may not need to be async (?) - public void addFilesOnSuccess(GlobusTaskInProgress globusTask) { - List fileUploadsInProgress = findExternalUploadsByTaskId(globusTask.getTaskId()); - - if (fileUploadsInProgress == null || fileUploadsInProgress.size() < 1) { - // @todo log error message; do nothing - return; - } + // @todo duplicated code, merge with the code handling the "classic" upload workflow + public void processCompletedTask(GlobusTaskInProgress globusTask, boolean taskSuccess) { + String ruleId = globusTask.getRuleId(); Dataset dataset = globusTask.getDataset(); + if (ruleId != null) { + // Transfer is complete, so delete rule + deletePermission(ruleId, dataset, logger); + } + AuthenticatedUser authUser = authSvc.lookupUser(globusTask.getApiToken()); if (authUser == null) { // @todo log error message; do nothing return; } + + // Switch the locks on the dataset: + // @todo is it necessary? what is wrong exactly with keeping the Globus + // lock on for the duration of the process? + if (taskSuccess) { + DatasetLock editLock = datasetSvc.addDatasetLock(dataset.getId(), + DatasetLock.Reason.EditInProgress, + (authUser).getId(), + "Completing Globus Upload"); + if (editLock != null) { + dataset.addLock(editLock); + } else { + logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); + } + } + + // Remove the Globus lock, regardless of whether this is a success or failure + DatasetLock globusUploadLock = dataset.getLockFor(DatasetLock.Reason.GlobusUpload); + if (globusUploadLock == null) { + logger.log(Level.WARNING, "No GlobusUpload lock found for dataset"); + } else { + logger.log(Level.FINE, "Removing GlobusUpload lock " + globusUploadLock.getId()); + /* + * Note: This call to remove a lock only works immediately because it is in + * another service bean. Despite the removeDatasetLocks method having the + * REQUIRES_NEW transaction annotation, when the globusUpload method and that + * method were in the same bean (globusUpload was in the DatasetServiceBean to + * start), the globus lock was still seen in the API call initiated in the + * addFilesAsync method called within the globusUpload method. I.e. it appeared + * that the lock removal was not committed/visible outside this method until + * globusUpload itself ended. + */ + datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.GlobusUpload); + } + + if (taskSuccess && GlobusTaskInProgress.TaskType.UPLOAD.equals(globusTask.getTaskType())) { + List fileUploadsInProgress = findExternalUploadsByTaskId(globusTask.getTaskId()); + + if (fileUploadsInProgress == null || fileUploadsInProgress.size() < 1) { + // @todo log error message; do nothing + return; + } + addFilesOnSuccess(dataset, authUser, fileUploadsInProgress); + } + // Handle locks/rules/etc. (?) + if (ruleId != null) { + deletePermission(ruleId, dataset, logger); + logger.info("Removed upload permission: " + ruleId); + } + } + + public void addFilesOnSuccess(Dataset dataset, AuthenticatedUser authUser, List fileUploadsInProgress) { + JsonArrayBuilder filesJsonArrayBuilder = Json.createArrayBuilder(); for (ExternalFileUploadInProgress pendingFile : fileUploadsInProgress) { diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java index 210f08710dc..d3d06d38151 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java @@ -86,6 +86,9 @@ public String toString() { */ private String apiToken; + @Column + private String ruleId; + @ManyToOne private Dataset dataset; @@ -95,12 +98,13 @@ public String toString() { public GlobusTaskInProgress() { } - GlobusTaskInProgress(String taskId, TaskType taskType, Dataset dataset, String globusToken, String apiToken, Timestamp startTime) { + GlobusTaskInProgress(String taskId, TaskType taskType, Dataset dataset, String globusToken, String apiToken, String ruleId, Timestamp startTime) { this.taskId = taskId; this.taskType = taskType; this.dataset = dataset; this.globusToken = globusToken; this.apiToken = apiToken; + this.ruleId = ruleId; this.startTime = startTime; } @@ -146,6 +150,13 @@ public void setApiToken(String apiToken) { this.apiToken = apiToken; } + public String getRuleId() { + return ruleId; + } + + public void setRuleId(String ruleId) { + this.ruleId = ruleId; + } public Dataset getDataset() { return dataset; } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java index bd274d44e38..4db25072b6b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java @@ -49,6 +49,7 @@ public void init() { logger.info("Starting Globus task monitoring service"); int pollingInterval = SystemConfig.getIntLimitFromStringOrDefault( settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusPollingInterval), 60); + // @todo scheduleAtFixedDelay() this.scheduler.scheduleAtFixedRate(this::checkOngoingTasks, 0, pollingInterval, TimeUnit.SECONDS); @@ -65,20 +66,17 @@ public void init() { public void checkOngoingTasks() { logger.info("Performing a scheduled external Globus task check"); List tasks = globusService.findAllOngoingTasks(); - + tasks.forEach(t -> { GlobusTaskState retrieved = globusService.getTask(t.getGlobusToken(), t.getTaskId(), null); if (GlobusUtil.isTaskCompleted(retrieved)) { - if (GlobusUtil.isTaskSucceeded(retrieved)) { - // Do our thing, finalize adding the files to the dataset - globusService.addFilesOnSuccess(t); - } + // Do our thing, finalize adding the files to the dataset + globusService.processCompletedTask(t, GlobusUtil.isTaskSucceeded(retrieved)); // Whether it finished successfully, or failed in the process, // there's no need to keep monitoring this task, so we can - // delete it. - globusService.deleteExternalUploadRecords(t.getTaskId()); + // delete it. + //globusService.deleteExternalUploadRecords(t.getTaskId()); globusService.deleteTask(t); - // @todo double-check that the locks have been properly handled } }); } From 9c62b81c484111df3ed9162c8353aa50bd4b295b Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 19 Aug 2024 10:47:29 -0400 Subject: [PATCH 23/84] refined logging #10623 --- .../dataverse/globus/GlobusServiceBean.java | 231 ++++++++---------- .../iq/dataverse/globus/GlobusUtil.java | 27 ++ .../globus/TaskMonitoringServiceBean.java | 61 ++++- 3 files changed, 191 insertions(+), 128 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 03134d811a7..5e68128c954 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -679,20 +679,24 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S throw new IllegalArgumentException("No valid json entries supplied for the files being uploaded"); } - String logTimestamp = logFormatter.format(new Date()); + Date startDate = new Date(); + + String logTimestamp = logFormatter.format(startDate); Logger globusLogger = Logger.getLogger( "edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusUpload" + logTimestamp); - String logFileName = System.getProperty("com.sun.aas.instanceRoot") + File.separator + "logs" + File.separator + "globusUpload_id_" + dataset.getId() + "_" + logTimestamp + String logFileName = System.getProperty("com.sun.aas.instanceRoot") + File.separator + "logs" + File.separator + "globusUpload_" + dataset.getId() + "_" + logTimestamp + ".log"; FileHandler fileHandler; - boolean fileHandlerSuceeded; + boolean fileHandlerSuceeded = false; try { fileHandler = new FileHandler(logFileName); globusLogger.setUseParentHandlers(false); fileHandlerSuceeded = true; } catch (IOException | SecurityException ex) { Logger.getLogger(DatasetServiceBean.class.getName()).log(Level.SEVERE, null, ex); - return; // @todo ? + //return; // @todo I don't think we need to return here? + fileHandler = null; + } if (fileHandlerSuceeded) { @@ -727,7 +731,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S // Save the task information in the database so that the Globus monitoring // service can continue checking on its progress. - GlobusTaskInProgress taskInProgress = new GlobusTaskInProgress(taskIdentifier, GlobusTaskInProgress.TaskType.UPLOAD, dataset, endpoint.getClientToken(), token.getTokenString(), ruleId, new Timestamp(new Date().getTime())); + GlobusTaskInProgress taskInProgress = new GlobusTaskInProgress(taskIdentifier, GlobusTaskInProgress.TaskType.UPLOAD, dataset, endpoint.getClientToken(), token.getTokenString(), ruleId, new Timestamp(startDate.getTime())); em.persist(taskInProgress); // Save the metadata entries that define the files that are being uploaded @@ -740,6 +744,11 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S em.persist(fileUploadRecord); } + if (fileHandler != null) { + fileHandler.close(); + } + + // return and forget return; } @@ -753,19 +762,62 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S // finish one way or another!) taskState = globusStatusCheck(endpoint, taskIdentifier, globusLogger); // @todo null check, or make sure it's never null - String taskStatus = getTaskStatus(taskState); + String taskStatus = GlobusUtil.getTaskStatus(taskState); + boolean taskSuccess = GlobusUtil.isTaskCompleted(taskState); + + processCompletedUploadTask(dataset, filesJsonArray, authUser, ruleId, globusLogger, fileHandler, taskSuccess, taskStatus); + + if (fileHandler != null) { + fileHandler.close(); + } + } + /** + * As the name suggests, the method completes and finalizes an upload task, + * whether it completed successfully or failed. (In the latter case, it + * simply sends a failure notification and does some cleanup). + * The method is called in both task monitoring scenarios: the old method, + * that relies on continuous looping, and the new, implemented on the basis + * of timer-like monitoring from a dedicated monitoring Singleton service. + * @param dataset the dataset + * @param filesJsonArray JsonArray containing files metadata entries as passed to /addGlobusFiles + * @param authUser the user that should be be performing the addFiles call + * finalizing adding the files to the Dataset. Note that this + * user will need to be obtained from the saved api token, when this + * method is called via the TaskMonitoringService + * @param ruleId Globus rule/permission id associated with the task + * @param myLogger the Logger; if null, the main logger of the service bean will be used + * @param fileHandler FileHandler associated with the Logger, when not null + * @param taskSuccess boolean task status of the completed task + * @param taskState human-readable task status label as reported by the Globus API + * the method should not throw any exceptions; all the exceptions thrown + * by the methods within are expected to be intercepted. + */ + private void processCompletedUploadTask(Dataset dataset, + JsonArray filesJsonArray, + AuthenticatedUser authUser, + String ruleId, + Logger globusLogger, + FileHandler fileHandler, + boolean taskSuccess, + String taskStatus) { + + Logger myLogger = globusLogger == null ? logger : globusLogger; + if (ruleId != null) { // Transfer is complete, so delete rule - deletePermission(ruleId, dataset, globusLogger); + deletePermission(ruleId, dataset, myLogger); } - + // If success, switch to an EditInProgress lock - do this before removing the // GlobusUpload lock // Keeping a lock through the add datafiles API call avoids a conflicting edit - // and keeps any open dataset page refreshing until the datafile appears - if (!(taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE"))) { - globusLogger.info("Finished upload via Globus job."); + // and keeps any open dataset page refreshing until the datafile appears. + + // @todo is it necessary? what is wrong exactly with keeping the Globus + // lock on for the duration of the process? + if (taskSuccess) { + myLogger.info("Finished upload via Globus job."); DatasetLock editLock = datasetSvc.addDatasetLock(dataset.getId(), DatasetLock.Reason.EditInProgress, @@ -774,7 +826,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S if (editLock != null) { dataset.addLock(editLock); } else { - globusLogger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); + myLogger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); } } @@ -792,36 +844,50 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S * addFilesAsync method called within the globusUpload method. I.e. it appeared * that the lock removal was not committed/visible outside this method until * globusUpload itself ended. + * (from @landreev:) If I understand the comment above correctly - annotations + * like "@TransactionAttribute(REQUIRES_NEW) do NOT work when you call a method + * directly within the same service bean. Strictly speaking, it's not the + * "within the same bean" part that is the key, rather, these annotations + * only apply when calling a method via an @EJB-defined service. So it + * is generally possible to call another method within FooServiceBean + * with the REQUIRES_NEW transaction taking effect - but then it would need + * to define *itself* as an @EJB - + * @EJB FooServiceBean fooSvc; + * ... + * fooSvc.doSomethingInNewTransaction(...); + * etc. */ datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.GlobusUpload); } - if (taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE")) { + if (!taskSuccess) { String comment = "Reason : " + taskStatus.split("#")[1] + "
Short Description : " + taskStatus.split("#")[2]; userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(), comment, true); - globusLogger.info("Globus task failed "); + myLogger.info("Globus task failed "); } else { try { - processUploadedFiles(filesJsonArray, dataset, authUser, globusLogger); + processUploadedFiles(filesJsonArray, dataset, authUser, myLogger); } catch (Exception e) { logger.info("Exception from processUploadedFiles call "); e.printStackTrace(); - globusLogger.info("Exception from processUploadedFiles call " + e.getMessage()); + myLogger.info("Exception from processUploadedFiles call " + e.getMessage()); datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); - // } } if (ruleId != null) { - deletePermission(ruleId, dataset, globusLogger); - globusLogger.info("Removed upload permission: " + ruleId); - } - if (fileHandlerSuceeded) { - fileHandler.close(); + deletePermission(ruleId, dataset, myLogger); + myLogger.info("Removed upload permission: " + ruleId); } + //if (fileHandler != null) { + // fileHandler.close(); + //} + } + + /** * The code in this method is copy-and-pasted from the previous Borealis * implemenation. @@ -1075,7 +1141,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro } task = globusStatusCheck(endpoint, taskIdentifier, globusLogger); // @todo null check - String taskStatus = getTaskStatus(task); + String taskStatus = GlobusUtil.getTaskStatus(task); // Transfer is done (success or failure) so delete the rule if (ruleId != null) { @@ -1132,30 +1198,6 @@ private GlobusTaskState globusStatusCheck(GlobusEndpoint endpoint, String taskId return task; } - private String getTaskStatus(GlobusTaskState task) { - String status = null; - if (task != null) { - status = task.getStatus(); - if (status != null) { - // The task is in progress but is not ok or queued - // (L.A.) I think the assumption here is that this method is called - // exclusively on tasks that have already completed. So that's why - // it is safe to assume that "ACTIVE" means "FAILED". - if (status.equalsIgnoreCase("ACTIVE")) { - status = "FAILED" + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); - } else { - // The task is either succeeded, failed or inactive. - status = status + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); - } - } else { - status = "FAILED"; - } - } else { - status = "FAILED"; - } - return status; - } - public JsonObject calculateMissingMetadataFields(List inputList, Logger globusLogger) throws InterruptedException, ExecutionException, IOException { @@ -1386,98 +1428,41 @@ public List findExternalUploadsByTaskId(String tas } // @todo duplicated code, merge with the code handling the "classic" upload workflow - public void processCompletedTask(GlobusTaskInProgress globusTask, boolean taskSuccess) { + public void processCompletedTask(GlobusTaskInProgress globusTask, boolean taskSuccess, String taskStatus, Logger taskLogger) { String ruleId = globusTask.getRuleId(); Dataset dataset = globusTask.getDataset(); - - if (ruleId != null) { - // Transfer is complete, so delete rule - deletePermission(ruleId, dataset, logger); - } - AuthenticatedUser authUser = authSvc.lookupUser(globusTask.getApiToken()); if (authUser == null) { // @todo log error message; do nothing return; } - // Switch the locks on the dataset: - // @todo is it necessary? what is wrong exactly with keeping the Globus - // lock on for the duration of the process? - if (taskSuccess) { - DatasetLock editLock = datasetSvc.addDatasetLock(dataset.getId(), - DatasetLock.Reason.EditInProgress, - (authUser).getId(), - "Completing Globus Upload"); - if (editLock != null) { - dataset.addLock(editLock); - } else { - logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); - } - } - - // Remove the Globus lock, regardless of whether this is a success or failure - DatasetLock globusUploadLock = dataset.getLockFor(DatasetLock.Reason.GlobusUpload); - if (globusUploadLock == null) { - logger.log(Level.WARNING, "No GlobusUpload lock found for dataset"); - } else { - logger.log(Level.FINE, "Removing GlobusUpload lock " + globusUploadLock.getId()); - /* - * Note: This call to remove a lock only works immediately because it is in - * another service bean. Despite the removeDatasetLocks method having the - * REQUIRES_NEW transaction annotation, when the globusUpload method and that - * method were in the same bean (globusUpload was in the DatasetServiceBean to - * start), the globus lock was still seen in the API call initiated in the - * addFilesAsync method called within the globusUpload method. I.e. it appeared - * that the lock removal was not committed/visible outside this method until - * globusUpload itself ended. - */ - datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.GlobusUpload); - } - - if (taskSuccess && GlobusTaskInProgress.TaskType.UPLOAD.equals(globusTask.getTaskType())) { + if (GlobusTaskInProgress.TaskType.UPLOAD.equals(globusTask.getTaskType())) { List fileUploadsInProgress = findExternalUploadsByTaskId(globusTask.getTaskId()); if (fileUploadsInProgress == null || fileUploadsInProgress.size() < 1) { // @todo log error message; do nothing return; } - addFilesOnSuccess(dataset, authUser, fileUploadsInProgress); - } - - // Handle locks/rules/etc. (?) - if (ruleId != null) { - deletePermission(ruleId, dataset, logger); - logger.info("Removed upload permission: " + ruleId); - } - } - - public void addFilesOnSuccess(Dataset dataset, AuthenticatedUser authUser, List fileUploadsInProgress) { - - JsonArrayBuilder filesJsonArrayBuilder = Json.createArrayBuilder(); - - for (ExternalFileUploadInProgress pendingFile : fileUploadsInProgress) { - String jsonInfoString = pendingFile.getFileInfo(); - JsonObject fileObject = JsonUtil.getJsonObject(jsonInfoString); - filesJsonArrayBuilder.add(fileObject); - } - - JsonArray filesJsonArray = filesJsonArrayBuilder.build(); - - if (filesJsonArray == null || filesJsonArray.size() < 1) { - // @todo log error message; do nothing - return; - } - - try { - processUploadedFiles(filesJsonArray, dataset, authUser, null); - } catch (Exception ex) { - // @todo log error message; make sure the error notification to the - // has been sent (may or may not have already been sent inside the - // method above). + + JsonArrayBuilder filesJsonArrayBuilder = Json.createArrayBuilder(); + + for (ExternalFileUploadInProgress pendingFile : fileUploadsInProgress) { + String jsonInfoString = pendingFile.getFileInfo(); + JsonObject fileObject = JsonUtil.getJsonObject(jsonInfoString); + filesJsonArrayBuilder.add(fileObject); + } + + JsonArray filesJsonArray = filesJsonArrayBuilder.build(); + + //processCompletedUploadTask(dataset, filesJsonArray, authUser, ruleId, globusLogger, fileHandler, taskSuccess, taskStatus); + processCompletedUploadTask(dataset, filesJsonArray, authUser, ruleId, taskLogger, null, taskSuccess, taskStatus); + } else { + // @todo extend this async. framework to handle Glonus downloads as well } + } - + public void deleteExternalUploadRecords(String taskId) { em.createNamedQuery("ExternalFileUploadInProgress.deleteByTaskId") .setParameter("taskId", taskId) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java index 67594ad1a5e..652898591ac 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java @@ -63,4 +63,31 @@ public static boolean isTaskSucceeded(GlobusTaskState task) { } return false; } + /** + * Produces a human-readable Status label of a completed task + * @param GlobusTaskState task - a looked-up state of a task as reported by Globus API + */ + public static String getTaskStatus(GlobusTaskState task) { + String status = null; + if (task != null) { + status = task.getStatus(); + if (status != null) { + // The task is in progress but is not ok or queued + // (L.A.) I think the assumption here is that this method is called + // exclusively on tasks that have already completed. So that's why + // it is safe to assume that "ACTIVE" means "FAILED". + if (status.equalsIgnoreCase("ACTIVE")) { + status = "FAILED" + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); + } else { + // The task is either succeeded, failed or inactive. + status = status + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); + } + } else { + status = "FAILED"; + } + } else { + status = "FAILED"; + } + return status; + } } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java index 4db25072b6b..a74d0c3f747 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java @@ -12,8 +12,13 @@ import jakarta.ejb.Singleton; import jakarta.ejb.Startup; import jakarta.enterprise.concurrent.ManagedScheduledExecutorService; +import java.io.File; +import java.io.IOException; +import java.text.SimpleDateFormat; +import java.util.Date; import java.util.List; import java.util.concurrent.TimeUnit; +import java.util.logging.FileHandler; import java.util.logging.Logger; /** @@ -42,15 +47,16 @@ public class TaskMonitoringServiceBean { SettingsServiceBean settingsSvc; @EJB GlobusServiceBean globusService; - + + private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); + @PostConstruct public void init() { if (systemConfig.isGlobusTaskMonitoringServer()) { logger.info("Starting Globus task monitoring service"); int pollingInterval = SystemConfig.getIntLimitFromStringOrDefault( settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusPollingInterval), 60); - // @todo scheduleAtFixedDelay() - this.scheduler.scheduleAtFixedRate(this::checkOngoingTasks, + this.scheduler.scheduleWithFixedDelay(this::checkOngoingTasks, 0, pollingInterval, TimeUnit.SECONDS); } else { @@ -68,17 +74,62 @@ public void checkOngoingTasks() { List tasks = globusService.findAllOngoingTasks(); tasks.forEach(t -> { - GlobusTaskState retrieved = globusService.getTask(t.getGlobusToken(), t.getTaskId(), null); + FileHandler taskLogHandler = getTaskLogHandler(t); + Logger taskLogger = getTaskLogger(t, taskLogHandler); + + GlobusTaskState retrieved = globusService.getTask(t.getGlobusToken(), t.getTaskId(), taskLogger); if (GlobusUtil.isTaskCompleted(retrieved)) { // Do our thing, finalize adding the files to the dataset - globusService.processCompletedTask(t, GlobusUtil.isTaskSucceeded(retrieved)); + globusService.processCompletedTask(t, GlobusUtil.isTaskSucceeded(retrieved), GlobusUtil.getTaskStatus(retrieved), taskLogger); // Whether it finished successfully, or failed in the process, // there's no need to keep monitoring this task, so we can // delete it. //globusService.deleteExternalUploadRecords(t.getTaskId()); globusService.deleteTask(t); } + + if (taskLogHandler != null) { + // @todo it should be prudent to cache these loggers and handlers + // between monitoring runs + taskLogHandler.close(); + } }); } + private FileHandler getTaskLogHandler(GlobusTaskInProgress task) { + if (task == null) { + return null; + } + + Date startDate = new Date(task.getStartTime().getTime()); + String logTimeStamp = logFormatter.format(startDate); + + String logFileName = System.getProperty("com.sun.aas.instanceRoot") + File.separator + "logs" + File.separator + "globusUpload_" + task.getDataset().getId() + "_" + logTimeStamp + + ".log"; + FileHandler fileHandler; + try { + fileHandler = new FileHandler(logFileName); + } catch (IOException | SecurityException ex) { + // @todo log this error somehow? + fileHandler = null; + } + return fileHandler; + } + + private Logger getTaskLogger(GlobusTaskInProgress task, FileHandler logFileHandler) { + if (logFileHandler == null) { + return null; + } + Date startDate = new Date(task.getStartTime().getTime()); + String logTimeStamp = logFormatter.format(startDate); + + Logger taskLogger = Logger.getLogger( + "edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusUpload" + logTimeStamp); + taskLogger.setUseParentHandlers(false); + + taskLogger.addHandler(logFileHandler); + + return taskLogger; + } + } From 8cdff8d66eb655e8def3e35b21f1a3d438f5608a Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 19 Aug 2024 19:51:43 -0400 Subject: [PATCH 24/84] Added notifications for various failure cases. #10623 --- .../harvard/iq/dataverse/MailServiceBean.java | 27 +++- .../iq/dataverse/UserNotification.java | 3 +- .../iq/dataverse/api/ApiConstants.java | 4 + .../providers/builtin/DataverseUserPage.java | 2 + .../datasetutility/AddReplaceFileHelper.java | 4 +- .../dataverse/globus/GlobusServiceBean.java | 120 ++++++++++-------- .../globus/TaskMonitoringServiceBean.java | 3 +- .../harvard/iq/dataverse/util/MailUtil.java | 17 +++ src/main/java/propertyFiles/Bundle.properties | 9 +- src/main/webapp/dataverseuser.xhtml | 14 ++ 10 files changed, 144 insertions(+), 59 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java index 7359ef8eb33..d29649ad3a6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java @@ -623,6 +623,7 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio comment )) ; return downloadCompletedMessage; + case GLOBUSUPLOADCOMPLETEDWITHERRORS: dataset = (Dataset) targetObject; messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); @@ -633,8 +634,30 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio comment )) ; return uploadCompletedWithErrorsMessage; + + case GLOBUSUPLOADREMOTEFAILURE: + dataset = (Dataset) targetObject; + messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); + String uploadFailedRemotelyMessage = messageText + BundleUtil.getStringFromBundle("notification.mail.globus.upload.failedRemotely", Arrays.asList( + systemConfig.getDataverseSiteUrl(), + dataset.getGlobalId().asString(), + dataset.getDisplayName(), + comment + )) ; + return uploadFailedRemotelyMessage; - case GLOBUSDOWNLOADCOMPLETEDWITHERRORS: + case GLOBUSUPLOADLOCALFAILURE: + dataset = (Dataset) targetObject; + messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); + String uploadFailedLocallyMessage = messageText + BundleUtil.getStringFromBundle("notification.mail.globus.upload.failedLocally", Arrays.asList( + systemConfig.getDataverseSiteUrl(), + dataset.getGlobalId().asString(), + dataset.getDisplayName(), + comment + )) ; + return uploadFailedLocallyMessage; + + case GLOBUSDOWNLOADCOMPLETEDWITHERRORS: dataset = (Dataset) targetObject; messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); String downloadCompletedWithErrorsMessage = messageText + BundleUtil.getStringFromBundle("notification.mail.globus.download.completedWithErrors", Arrays.asList( @@ -763,6 +786,8 @@ public Object getObjectOfNotification (UserNotification userNotification){ return versionService.find(userNotification.getObjectId()); case GLOBUSUPLOADCOMPLETED: case GLOBUSUPLOADCOMPLETEDWITHERRORS: + case GLOBUSUPLOADREMOTEFAILURE: + case GLOBUSUPLOADLOCALFAILURE: case GLOBUSDOWNLOADCOMPLETED: case GLOBUSDOWNLOADCOMPLETEDWITHERRORS: return datasetService.find(userNotification.getObjectId()); diff --git a/src/main/java/edu/harvard/iq/dataverse/UserNotification.java b/src/main/java/edu/harvard/iq/dataverse/UserNotification.java index 280c2075494..2d37540fab3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/UserNotification.java +++ b/src/main/java/edu/harvard/iq/dataverse/UserNotification.java @@ -39,7 +39,8 @@ public enum Type { CHECKSUMIMPORT, CHECKSUMFAIL, CONFIRMEMAIL, APIGENERATED, INGESTCOMPLETED, INGESTCOMPLETEDWITHERRORS, PUBLISHFAILED_PIDREG, WORKFLOW_SUCCESS, WORKFLOW_FAILURE, STATUSUPDATED, DATASETCREATED, DATASETMENTIONED, GLOBUSUPLOADCOMPLETED, GLOBUSUPLOADCOMPLETEDWITHERRORS, - GLOBUSDOWNLOADCOMPLETED, GLOBUSDOWNLOADCOMPLETEDWITHERRORS, REQUESTEDFILEACCESS; + GLOBUSDOWNLOADCOMPLETED, GLOBUSDOWNLOADCOMPLETEDWITHERRORS, REQUESTEDFILEACCESS, + GLOBUSUPLOADREMOTEFAILURE, GLOBUSUPLOADLOCALFAILURE; public String getDescription() { return BundleUtil.getStringFromBundle("notification.typeDescription." + this.name()); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java b/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java index 347a8946a46..a2faaf3637c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java @@ -17,4 +17,8 @@ private ApiConstants() { public static final String DS_VERSION_LATEST = ":latest"; public static final String DS_VERSION_DRAFT = ":draft"; public static final String DS_VERSION_LATEST_PUBLISHED = ":latest-published"; + + // addFiles call + public static final String API_ADD_FILES_COUNT_PROCESSED = "Total number of files"; + public static final String API_ADD_FILES_COUNT_SUCCESSFULL = "Number of files successfully added"; } diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java index a0e3f899443..48afb2b830a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java @@ -528,6 +528,8 @@ public void displayNotification() { case GLOBUSUPLOADCOMPLETEDWITHERRORS: case GLOBUSDOWNLOADCOMPLETED: case GLOBUSDOWNLOADCOMPLETEDWITHERRORS: + case GLOBUSUPLOADREMOTEFAILURE: + case GLOBUSUPLOADLOCALFAILURE: userNotification.setTheObject(datasetService.find(userNotification.getObjectId())); break; diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index 2b35a4cc783..336fa9b5b7a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -2167,8 +2167,8 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { } JsonObjectBuilder result = Json.createObjectBuilder() - .add("Total number of files", totalNumberofFiles) - .add("Number of files successfully added", successNumberofFiles); + .add(ApiConstants.API_ADD_FILES_COUNT_PROCESSED, totalNumberofFiles) + .add(ApiConstants.API_ADD_FILES_COUNT_SUCCESSFULL, successNumberofFiles); return Response.ok().entity(Json.createObjectBuilder() diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 5e68128c954..4bb478d26ed 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -766,7 +766,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S boolean taskSuccess = GlobusUtil.isTaskCompleted(taskState); - processCompletedUploadTask(dataset, filesJsonArray, authUser, ruleId, globusLogger, fileHandler, taskSuccess, taskStatus); + processCompletedUploadTask(dataset, filesJsonArray, authUser, ruleId, globusLogger, taskSuccess, taskStatus); if (fileHandler != null) { fileHandler.close(); @@ -798,7 +798,6 @@ private void processCompletedUploadTask(Dataset dataset, AuthenticatedUser authUser, String ruleId, Logger globusLogger, - FileHandler fileHandler, boolean taskSuccess, String taskStatus) { @@ -861,11 +860,17 @@ private void processCompletedUploadTask(Dataset dataset, } if (!taskSuccess) { - String comment = "Reason : " + taskStatus.split("#")[1] + "
Short Description : " - + taskStatus.split("#")[2]; + String comment; + if (taskStatus != null) { + comment = "Reason : " + taskStatus.split("#")[1] + "
Short Description : " + + taskStatus.split("#")[2]; + } else { + comment = "No further information available"; + } + + myLogger.info("Globus Upload task failed "); userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), - UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(), comment, true); - myLogger.info("Globus task failed "); + UserNotification.Type.GLOBUSUPLOADREMOTEFAILURE, dataset.getId(), comment, true); } else { try { @@ -908,6 +913,8 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut Integer countAll = 0; Integer countSuccess = 0; Integer countError = 0; + Integer countAddFilesSuccess = 0; + String notificationErrorMessage = ""; List inputList = new ArrayList(); @@ -991,13 +998,22 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut myLogger.info("Files processed successfully: " + countSuccess); myLogger.info("Files failures to process: " + countError); - /*String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST " - + httpRequestUrl + "/api/datasets/:persistentId/addFiles?persistentId=doi:" - + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; - System.out.println("*******====command ==== " + command);*/ - // ToDo - refactor to call AddReplaceFileHelper.addFiles directly instead of - // calling API - // a quick experimental AddReplaceFileHelper implementation: + if (countSuccess < 1) { + // We don't have any valid entries to call addFiles() for; so, no + // need to proceed. + notificationErrorMessage = "Failed to successfully process any of the file entries, " + + "out of the " + countAll + " total as submitted to Dataverse"; + userNotificationService.sendNotification((AuthenticatedUser) authUser, + new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADREMOTEFAILURE, + dataset.getId(), notificationErrorMessage, true); + return; + } else if (countSuccess < countAll) { + notificationErrorMessage = "Out of the " + countAll + " file entries submitted to /addGlobusFiles " + + "only " + countSuccess + " could be successfully parsed and processed. "; + } + + // A new AddReplaceFileHelper implementation, replacing the old one that + // was relying on calling /addFiles api via curl: // Passing null for the HttpServletRequest to make a new DataverseRequest. // The parent method is always executed asynchronously, so the real request @@ -1028,55 +1044,57 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut JsonObject addFilesJsonObject = JsonUtil.getJsonObject(addFilesResponse.getEntity().toString()); - // @todo null checks etc. + // @todo null check? String addFilesStatus = addFilesJsonObject.getString("status", null); myLogger.info("addFilesResponse status: " + addFilesStatus); - if (ApiConstants.STATUS_OK.equalsIgnoreCase(addFilesStatus)) { - if (addFilesJsonObject.containsKey("data")) { - JsonObject responseFilesData = addFilesJsonObject.getJsonObject("data"); - if (responseFilesData.containsKey("Result")) { - JsonObject addFilesResult = responseFilesData.getJsonObject("Result"); - - Integer addFilesTotal = addFilesResult.getInt("Total number of files", -1); - Integer addFilesSuccess = addFilesResult.getInt("Number of files successfully added", -1); - // @todo handle -1 (missing values) above - // @todo log all this stuff in a task-specific log (??) - myLogger.info("Files processed by addFiles: " + addFilesTotal + ", successfully added: " + addFilesSuccess); - // @todo incorporate this into the user notification - } else { - logger.warning("Malformed addFiles data section: "+ responseFilesData.toString()); - } - } else { - logger.warning("Malformed addFiles response json: " + addFilesJsonObject.toString()); - } - - // if(!taskSkippedFiles) - if (countError == 0) { - userNotificationService.sendNotification((AuthenticatedUser) authUser, - new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETED, - dataset.getId(), countSuccess + " files added out of " + countAll, true); + if (addFilesJsonObject.containsKey("data") && addFilesJsonObject.getJsonObject("data").containsKey("Result")) { + + //Integer countAddFilesTotal = addFilesJsonObject.getJsonObject("data").getJsonObject("Result").getInt(ApiConstants.API_ADD_FILES_COUNT_PROCESSED, -1); + countAddFilesSuccess = addFilesJsonObject.getJsonObject("data").getJsonObject("Result").getInt(ApiConstants.API_ADD_FILES_COUNT_SUCCESSFULL, -1); + myLogger.info("Files successfully added by addFiles(): " + countAddFilesSuccess); + } else { - userNotificationService.sendNotification((AuthenticatedUser) authUser, - new Timestamp(new Date().getTime()), - UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(), - countSuccess + " files added out of " + countAll, true); + myLogger.warning("Malformed addFiles response json: " + addFilesJsonObject.toString()); + notificationErrorMessage = "Malformed response received when attempting to add the files to the dataset. "; } - myLogger.info("Successfully completed addFiles call "); + + myLogger.info("Completed addFiles call "); } else if (ApiConstants.STATUS_ERROR.equalsIgnoreCase(addFilesStatus)) { String addFilesMessage = addFilesJsonObject.getString("message", null); - + myLogger.log(Level.SEVERE, "******* Error while executing addFiles ", newjsonData); myLogger.log(Level.SEVERE, "****** Output from addFiles: ", addFilesMessage); - // @todo send Failure notification + notificationErrorMessage += "Error response received when attempting to add the files to the dataset: " + addFilesMessage + " "; } else { myLogger.log(Level.SEVERE, "******* Error while executing addFiles ", newjsonData); - // @todo send Failure notification + notificationErrorMessage += "Unexpected error encountered when attemptingh to add the files to the dataset."; + } + + // if(!taskSkippedFiles) + if (countAddFilesSuccess == countAll) { + userNotificationService.sendNotification((AuthenticatedUser) authUser, + new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETED, + dataset.getId(), countSuccess + " files added out of " + countAll, true); + } else if (countAddFilesSuccess > 0) { + // success, but partial: + userNotificationService.sendNotification((AuthenticatedUser) authUser, + new Timestamp(new Date().getTime()), + UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(), + countSuccess + " files added out of " + countAll + notificationErrorMessage, true); + } else { + notificationErrorMessage = "".equals(notificationErrorMessage) + ? " No additional information is available." : notificationErrorMessage; + userNotificationService.sendNotification((AuthenticatedUser) authUser, + new Timestamp(new Date().getTime()), + UserNotification.Type.GLOBUSUPLOADLOCALFAILURE, dataset.getId(), + notificationErrorMessage, true); } + } @Asynchronous @@ -1140,7 +1158,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro logger.warning("ruleId not found for taskId: " + taskIdentifier); } task = globusStatusCheck(endpoint, taskIdentifier, globusLogger); - // @todo null check + // @todo null check? String taskStatus = GlobusUtil.getTaskStatus(task); // Transfer is done (success or failure) so delete the rule @@ -1427,13 +1445,13 @@ public List findExternalUploadsByTaskId(String tas return em.createNamedQuery("ExternalFileUploadInProgress.findByTaskId").setParameter("taskId", taskId).getResultList(); } - // @todo duplicated code, merge with the code handling the "classic" upload workflow public void processCompletedTask(GlobusTaskInProgress globusTask, boolean taskSuccess, String taskStatus, Logger taskLogger) { String ruleId = globusTask.getRuleId(); Dataset dataset = globusTask.getDataset(); AuthenticatedUser authUser = authSvc.lookupUser(globusTask.getApiToken()); if (authUser == null) { // @todo log error message; do nothing + // (the fields in GlobusTaskInProgress are not nullable though - ?) return; } @@ -1442,6 +1460,7 @@ public void processCompletedTask(GlobusTaskInProgress globusTask, boolean taskSu if (fileUploadsInProgress == null || fileUploadsInProgress.size() < 1) { // @todo log error message; do nothing + // (will this ever happen though?) return; } @@ -1455,10 +1474,9 @@ public void processCompletedTask(GlobusTaskInProgress globusTask, boolean taskSu JsonArray filesJsonArray = filesJsonArrayBuilder.build(); - //processCompletedUploadTask(dataset, filesJsonArray, authUser, ruleId, globusLogger, fileHandler, taskSuccess, taskStatus); - processCompletedUploadTask(dataset, filesJsonArray, authUser, ruleId, taskLogger, null, taskSuccess, taskStatus); + processCompletedUploadTask(dataset, filesJsonArray, authUser, ruleId, taskLogger, taskSuccess, taskStatus); } else { - // @todo extend this async. framework to handle Glonus downloads as well + // @todo eventually, extend this async. framework to handle Glonus downloads as well } } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java index a74d0c3f747..c956831317c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java @@ -67,7 +67,6 @@ public void init() { /** * This method will be executed on a timer-like schedule, continuously * monitoring all the ongoing external Globus tasks (transfers). - * @todo make sure the executions do not overlap/stack up */ public void checkOngoingTasks() { logger.info("Performing a scheduled external Globus task check"); @@ -90,7 +89,7 @@ public void checkOngoingTasks() { if (taskLogHandler != null) { // @todo it should be prudent to cache these loggers and handlers - // between monitoring runs + // between monitoring runs (should be fairly easy to do) taskLogHandler.close(); } }); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java index 36c249de834..f81ce093815 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java @@ -99,6 +99,23 @@ public static String getSubjectTextBasedOnNotification(UserNotification userNoti } catch (Exception e) { return BundleUtil.getStringFromBundle("notification.email.globus.uploadCompletedWithErrors.subject", rootDvNameAsList); } + case GLOBUSUPLOADREMOTEFAILURE: + try { + DatasetVersion version = (DatasetVersion)objectOfNotification; + List dsNameAsList = Arrays.asList(version.getDataset().getDisplayName()); + return BundleUtil.getStringFromBundle("notification.email.globus.uploadFailedRemotely.subject", dsNameAsList); + + } catch (Exception e) { + return BundleUtil.getStringFromBundle("notification.email.globus.uploadFailedRemotely.subject", rootDvNameAsList); + } + case GLOBUSUPLOADLOCALFAILURE: + try { + DatasetVersion version = (DatasetVersion)objectOfNotification; + List dsNameAsList = Arrays.asList(version.getDataset().getDisplayName()); + return BundleUtil.getStringFromBundle("notification.email.globus.uploadFailedLocally.subject", dsNameAsList); + } catch (Exception e) { + return BundleUtil.getStringFromBundle("notification.email.globus.uploadFailedLocally.subject", rootDvNameAsList); + } case GLOBUSDOWNLOADCOMPLETEDWITHERRORS: try { DatasetVersion version = (DatasetVersion)objectOfNotification; diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 4b366522966..e0488f95e2c 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -253,11 +253,16 @@ notification.mail.import.filesystem=Dataset {2} ({0}/dataset.xhtml?persistentId= notification.mail.globus.upload.completed=Globus transfer to Dataset {2} was successful. File(s) have been uploaded and verified.

{3}
notification.mail.globus.download.completed=Globus transfer of file(s) from the dataset {2} was successful.

{3}
notification.mail.globus.upload.completedWithErrors=Globus transfer to Dataset {2} is complete with errors.

{3}
+notification.mail.globus.upload.failedRemotely=Remote data transfer between Globus collections for Dataset {2} failed, reported via Globus API.

{3}
+notification.mail.globus.upload.failedLocally=Dataverse received a confirmation of a successful Globus data transfer for Dataset {2}, but failed to add the files to the dataset locally.

{3}
notification.mail.globus.download.completedWithErrors=Globus transfer from the dataset {2} is complete with errors.

{3}
notification.import.filesystem=Dataset {1} has been successfully uploaded and verified. notification.globus.upload.completed=Globus transfer to Dataset {1} was successful. File(s) have been uploaded and verified. notification.globus.download.completed=Globus transfer from the dataset {1} was successful. notification.globus.upload.completedWithErrors=Globus transfer to Dataset {1} is complete with errors. +notification.globus.upload.failedRemotely=Remote data transfer between Globus collections for Dataset {2} failed, reported via Globus API.

{3}
+notification.globus.upload.failedLocally=Dataverse received a confirmation of a successful Globus data transfer for Dataset {2}, but failed to add the files to the dataset locally.

{3}
+ notification.globus.download.completedWithErrors=Globus transfer from the dataset {1} is complete with errors. notification.import.checksum={1}, dataset had file checksums added via a batch job. removeNotification=Remove Notification @@ -823,8 +828,8 @@ notification.email.datasetWasMentioned.subject={0}: A Dataset Relationship has b notification.email.globus.uploadCompleted.subject={0}: Files uploaded successfully via Globus and verified notification.email.globus.downloadCompleted.subject={0}: Files downloaded successfully via Globus notification.email.globus.uploadCompletedWithErrors.subject={0}: Uploaded files via Globus with errors -notification.email.globus.downloadCompletedWithErrors.subject={0}: Downloaded files via Globus with errors - +notification.email.globus.uploadFailedRemotely.subject={0}: Failed to upload files via Globus +notification.email.globus.uploadFailedLocally.subject={0}: Failed to add files uploaded via Globus to dataset # dataverse.xhtml dataverse.name=Dataverse Name dataverse.name.title=The project, department, university, professor, or journal this dataverse will contain data for. diff --git a/src/main/webapp/dataverseuser.xhtml b/src/main/webapp/dataverseuser.xhtml index 9ed8b5209b6..d061348ad87 100644 --- a/src/main/webapp/dataverseuser.xhtml +++ b/src/main/webapp/dataverseuser.xhtml @@ -367,6 +367,20 @@
+ + + + + + + + + + + + + + From 531e25c05df0ec507b5e3c0650033b0691c96df2 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 20 Aug 2024 09:23:36 -0400 Subject: [PATCH 25/84] Config guide entry. #10623 --- doc/sphinx-guides/source/installation/config.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 0038c188ea5..01bf1419a52 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -3259,6 +3259,13 @@ The email for your institution that you'd like to appear in bag-info.txt. See :r Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_BAGIT_SOURCEORG_EMAIL``. +.. _dataverse.bagit.sourceorg.email: + +dataverse.globus.taskMonitoringServer ++++++++++++++++++++++++++++++++++++++ + +This setting is required in conjunction with the ``globus-use-experimental-async-framework`` feature flag. Setting it to true designates the Dataverse instance to serve as the dedicated polling server. It is needed so that the new framework can be used in a multi-node installation. + .. _feature-flags: Feature Flags @@ -3294,6 +3301,9 @@ please find all known feature flags below. Any of these flags can be activated u * - disable-return-to-author-reason - Removes the reason field in the `Publish/Return To Author` dialog that was added as a required field in v6.2 and makes the reason an optional parameter in the :ref:`return-a-dataset` API call. - ``Off`` + * - globus-use-experimental-async-framework + - Activates a new experimental implementation of Globus polling of ongoing remote data transfers that does not rely on the instance staying up continuously for the duration of the transfers and saves the state information about Globus upload requests in the database. Added in v6.4. Note that the JVM option ``dataverse.globus.taskMonitoringServer`` described above must also be enabled on one (and only one, in a multi-node installation) Dataverse instance. + - ``Off`` **Note:** Feature flags can be set via any `supported MicroProfile Config API source`_, e.g. the environment variable From 007d7150763b55db95c35d3007900cfe46d0f50d Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 20 Aug 2024 09:47:41 -0400 Subject: [PATCH 26/84] Added a few more doc notes. #10623 --- doc/release-notes/10623-globus-improvements.md | 1 + doc/sphinx-guides/source/developers/big-data-support.rst | 2 ++ doc/sphinx-guides/source/developers/globus-api.rst | 2 ++ 3 files changed, 5 insertions(+) create mode 100644 doc/release-notes/10623-globus-improvements.md diff --git a/doc/release-notes/10623-globus-improvements.md b/doc/release-notes/10623-globus-improvements.md new file mode 100644 index 00000000000..03579b59631 --- /dev/null +++ b/doc/release-notes/10623-globus-improvements.md @@ -0,0 +1 @@ +A new alternative implementation of Globus polling during upload data transfers has been added in this release. This experimental framework does not rely on the instance staying up continuously for the duration of the transfer and saves the state information about Globus upload requests in the database. See the `globus-use-experimental-async-framework` feature flag in the Configuration guide. \ No newline at end of file diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 4aaed10512e..99ea4d2e0ba 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -187,3 +187,5 @@ As described in that document, Globus transfers can be initiated by choosing the An overview of the control and data transfer interactions between components was presented at the 2022 Dataverse Community Meeting and can be viewed in the `Integrations and Tools Session Video `_ around the 1 hr 28 min mark. See also :ref:`Globus settings <:GlobusSettings>`. + +An alternative, experimental implementation of Globus polling of ongoing upload transfers has been added in v6.4. This framework does not rely on the instance staying up continuously for the duration of the transfer and saves the state information about Globus upload requests in the database. Due to its experimental nature it is not enabled by default. See the ``globus-use-experimental-async-framework`` feature flag and the JVM option ``dataverse.globus.taskMonitoringServer`` described in the Configuration guide. diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index 902fc9db2ee..0f16dc704ef 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -185,6 +185,8 @@ As the transfer can take significant time and the API call is asynchronous, the Once the transfer completes, Dataverse will remove the write permission for the principal. +An alternative, experimental implementation of Globus polling of ongoing upload transfers has been added in v6.4. This new framework does not rely on the instance staying up continuously for the duration of the transfer and saves the state information about Globus upload requests in the database. Due to its experimental nature it is not enabled by default. See the ``globus-use-experimental-async-framework`` feature flag and the JVM option ``dataverse.globus.taskMonitoringServer`` described in the Configuration guide. + Note that when using a managed endpoint that uses the Globus S3 Connector, the checksum should be correct as Dataverse can validate it. For file-based endpoints, the checksum should be included if available but Dataverse cannot verify it. In the remote/reference case, where there is no transfer to monitor, the standard /addFiles API call (see :ref:`direct-add-to-dataset-api`) is used instead. There are no changes for the Globus case. From 6fcb2856f15ff88b9b0ae4f82dd3caf40b5cb2c3 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 20 Aug 2024 10:47:55 -0400 Subject: [PATCH 27/84] typo #10623 --- src/main/webapp/dataverseuser.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/dataverseuser.xhtml b/src/main/webapp/dataverseuser.xhtml index d061348ad87..bcb71733716 100644 --- a/src/main/webapp/dataverseuser.xhtml +++ b/src/main/webapp/dataverseuser.xhtml @@ -374,7 +374,7 @@ - + From f6882df1ea49acb322cd5091a6bf1f6d8e075cfd Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 20 Aug 2024 11:17:14 -0400 Subject: [PATCH 28/84] cut-and-paste error #10623 --- doc/sphinx-guides/source/installation/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 01bf1419a52..731eef71c57 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -3259,7 +3259,7 @@ The email for your institution that you'd like to appear in bag-info.txt. See :r Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_BAGIT_SOURCEORG_EMAIL``. -.. _dataverse.bagit.sourceorg.email: +.. _dataverse.globus.taskMonitoringServer: dataverse.globus.taskMonitoringServer +++++++++++++++++++++++++++++++++++++ From 4ae3ee6e4c268fa5911bff8e05377d85f45e5a94 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 20 Aug 2024 11:31:10 -0400 Subject: [PATCH 29/84] (#10623) --- .../java/edu/harvard/iq/dataverse/settings/FeatureFlags.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java index 746e6e3b75d..c8983b80df1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java @@ -92,8 +92,7 @@ public enum FeatureFlags { */ DISABLE_RETURN_TO_AUTHOR_REASON("disable-return-to-author-reason"), /** - * TEMPORARY feature flag for the new Globus upload framework (will only be - * used for testing). + * Feature flag for the new Globus upload framework. */ GLOBUS_USE_EXPERIMENTAL_ASYNC_FRAMEWORK("globus-use-experimental-async-framework"), ; From 9cf4e1b90888cf0d78f3659c2098bf142b5192a0 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 21 Aug 2024 18:26:24 -0400 Subject: [PATCH 30/84] some minor cleanup changes #10623 --- .../ExternalFileUploadInProgress.java | 4 ---- .../datasetutility/AddReplaceFileHelper.java | 4 ++-- .../impl/UpdateDatasetVersionCommand.java | 22 +------------------ .../globus/GlobusTaskInProgress.java | 4 ---- 4 files changed, 3 insertions(+), 31 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java b/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java index ab6a1798307..96bfd3f63f5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java +++ b/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java @@ -1,7 +1,3 @@ -/* - * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license - * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template - */ package edu.harvard.iq.dataverse; import jakarta.persistence.Column; diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index 336fa9b5b7a..8e7c07b80bb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -2139,7 +2139,7 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { logger.log(Level.WARNING, "Dataset not locked for EditInProgress "); } else { datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); - logger.log(Level.INFO, "Removed EditInProgress lock "+eipLock.getId()); + logger.log(Level.FINE, "Removed EditInProgress lock"); } try { @@ -2306,7 +2306,7 @@ public Response replaceFiles(String jsonData, Dataset ds, User authUser) { logger.warning("Dataset not locked for EditInProgress "); } else { datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); - logger.info("Removed EditInProgress lock "); + logger.fine("Removed EditInProgress lock "); } try { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java index faf8884b08d..cad61000e69 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java @@ -103,9 +103,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { } Dataset theDataset = getDataset(); - //ctxt.permissions().checkUpdateDatasetVersionLock(theDataset, getRequest(), this); - // this is an experiment (probably temporary) - checkUpdateDatasetVersionLock(ctxt); + ctxt.permissions().checkUpdateDatasetVersionLock(theDataset, getRequest(), this); Dataset savedDataset = null; @@ -302,22 +300,4 @@ public boolean onSuccess(CommandContext ctxt, Object r) { return true; } - private void checkUpdateDatasetVersionLock(CommandContext ctxt) throws IllegalCommandException { - List locks = ctxt.datasets().getLocksByDatasetId(getDataset().getId()); - //locks.forEach(lock -> { - for (DatasetLock lock : locks) { - // Ingest lock is ok: - if (DatasetLock.Reason.Ingest != lock.getReason()) { - // with Workflow lock *some* users can edit; - // any other kind of lock - nope - if (DatasetLock.Reason.Workflow != lock.getReason() - || !ctxt.permissions().isMatchingWorkflowLock(getDataset(), - getUser().getIdentifier(), - getRequest().getWFInvocationId())) { - throw new IllegalCommandException( - BundleUtil.getStringFromBundle("dataset.message.locked.editNotAllowed"), this); - } - } - } - } } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java index d3d06d38151..0a56b4933a1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java @@ -1,7 +1,3 @@ -/* - * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license - * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template - */ package edu.harvard.iq.dataverse.globus; import edu.harvard.iq.dataverse.Dataset; From 45fb938acb3fcd027920d8f5308a026713dc942c Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 21 Aug 2024 18:30:49 -0400 Subject: [PATCH 31/84] cosmetic #10623 --- .../engine/command/impl/UpdateDatasetVersionCommand.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java index cad61000e69..e04cae13b35 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java @@ -14,7 +14,6 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; -import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.DatasetFieldUtil; import edu.harvard.iq.dataverse.util.FileMetadataUtil; @@ -104,7 +103,6 @@ public Dataset execute(CommandContext ctxt) throws CommandException { Dataset theDataset = getDataset(); ctxt.permissions().checkUpdateDatasetVersionLock(theDataset, getRequest(), this); - Dataset savedDataset = null; DatasetVersion persistedVersion = clone; From b3f79fe3102a308df1c8354465adb1f3b9bd2244 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 21 Aug 2024 18:40:39 -0400 Subject: [PATCH 32/84] extra L in SUCCESSFUL (#10623) --- src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java | 2 +- .../iq/dataverse/datasetutility/AddReplaceFileHelper.java | 2 +- .../java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java b/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java index a2faaf3637c..15114085c21 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java @@ -20,5 +20,5 @@ private ApiConstants() { // addFiles call public static final String API_ADD_FILES_COUNT_PROCESSED = "Total number of files"; - public static final String API_ADD_FILES_COUNT_SUCCESSFULL = "Number of files successfully added"; + public static final String API_ADD_FILES_COUNT_SUCCESSFUL = "Number of files successfully added"; } diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index 8e7c07b80bb..a470f08f736 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -2168,7 +2168,7 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { JsonObjectBuilder result = Json.createObjectBuilder() .add(ApiConstants.API_ADD_FILES_COUNT_PROCESSED, totalNumberofFiles) - .add(ApiConstants.API_ADD_FILES_COUNT_SUCCESSFULL, successNumberofFiles); + .add(ApiConstants.API_ADD_FILES_COUNT_SUCCESSFUL, successNumberofFiles); return Response.ok().entity(Json.createObjectBuilder() diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 4bb478d26ed..ae84cad545c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -1052,7 +1052,7 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut if (addFilesJsonObject.containsKey("data") && addFilesJsonObject.getJsonObject("data").containsKey("Result")) { //Integer countAddFilesTotal = addFilesJsonObject.getJsonObject("data").getJsonObject("Result").getInt(ApiConstants.API_ADD_FILES_COUNT_PROCESSED, -1); - countAddFilesSuccess = addFilesJsonObject.getJsonObject("data").getJsonObject("Result").getInt(ApiConstants.API_ADD_FILES_COUNT_SUCCESSFULL, -1); + countAddFilesSuccess = addFilesJsonObject.getJsonObject("data").getJsonObject("Result").getInt(ApiConstants.API_ADD_FILES_COUNT_SUCCESSFUL, -1); myLogger.info("Files successfully added by addFiles(): " + countAddFilesSuccess); } else { From 1acae684c31542591ba9e407e8c50cb80de1993a Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 21 Aug 2024 18:58:08 -0400 Subject: [PATCH 33/84] better Globus service availability checks #10623 --- .../harvard/iq/dataverse/api/Datasets.java | 22 ++++++++++++++----- src/main/java/propertyFiles/Bundle.properties | 1 + 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 4c547f5295f..aa6989d365a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3908,7 +3908,7 @@ public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathP if (!systemConfig.isGlobusUpload()) { return error(Response.Status.SERVICE_UNAVAILABLE, - BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled")); + BundleUtil.getStringFromBundle("datasets.api.globusuploaddisabled")); } // ------------------------------------- @@ -4008,11 +4008,6 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, logger.info(" ==== (api addGlobusFilesToDataset) jsonData ====== " + jsonData); - if (!systemConfig.isHTTPUpload()) { - // @todo why isHTTPUpload()? - shouldn't it be checking isGlobusUpload() here? - return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); - } - // ------------------------------------- // (1) Get the user from the API key // ------------------------------------- @@ -4035,6 +4030,21 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, return wr.getResponse(); } + // Is Globus upload service available? + + // ... on this Dataverse instance? + if (!systemConfig.isGlobusUpload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.globusUploadDisabled")); + } + + // ... and on this specific Dataset? + String storeId = dataset.getEffectiveStorageDriverId(); + // acceptsGlobusTransfers should only be true for an S3 or globus store + if (!GlobusAccessibleStore.acceptsGlobusTransfers(storeId) + && !GlobusAccessibleStore.allowsGlobusReferences(storeId)) { + return badRequest(BundleUtil.getStringFromBundle("datasets.api.globusuploaddisabled")); + } + // @todo check if the dataset is already locked! JsonObject jsonObject = null; diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index e0488f95e2c..25ae8ad00c0 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -1765,6 +1765,7 @@ file.fromWebloaderAfterCreate.tip=An option to upload a folder of files will be file.fromWebloader=Upload a Folder file.api.httpDisabled=File upload via HTTP is not available for this installation of Dataverse. +file.api.globusUploadDisabled=File upload via Globus is not available for this installation of Dataverse. file.api.alreadyHasPackageFile=File upload via HTTP disabled since this dataset already contains a package file. file.replace.original=Original File file.editFiles=Edit Files From 5ba28883e2cdce99da768ebaa07ddd055f1b1229 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 21 Aug 2024 19:03:18 -0400 Subject: [PATCH 34/84] better Globus service availability checks #10623 --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index aa6989d365a..ac5b9147dff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3908,7 +3908,7 @@ public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathP if (!systemConfig.isGlobusUpload()) { return error(Response.Status.SERVICE_UNAVAILABLE, - BundleUtil.getStringFromBundle("datasets.api.globusuploaddisabled")); + BundleUtil.getStringFromBundle("file.api.globusUploadDisabled")); } // ------------------------------------- From 2512eab74e82284a4eae6fe29c172a869dbb64a8 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 21 Aug 2024 19:12:43 -0400 Subject: [PATCH 35/84] removed an unnecessary @todo (#10623) --- .../edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index ae84cad545c..6d3138856f2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -687,19 +687,16 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S String logFileName = System.getProperty("com.sun.aas.instanceRoot") + File.separator + "logs" + File.separator + "globusUpload_" + dataset.getId() + "_" + logTimestamp + ".log"; FileHandler fileHandler; - boolean fileHandlerSuceeded = false; + try { fileHandler = new FileHandler(logFileName); globusLogger.setUseParentHandlers(false); - fileHandlerSuceeded = true; } catch (IOException | SecurityException ex) { Logger.getLogger(DatasetServiceBean.class.getName()).log(Level.SEVERE, null, ex); - //return; // @todo I don't think we need to return here? fileHandler = null; - } - if (fileHandlerSuceeded) { + if (fileHandler != null) { globusLogger.addHandler(fileHandler); } else { globusLogger = logger; From 6b06d9472326b28c3d47e549e14e5ee2af7e6bd3 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 21 Aug 2024 19:20:22 -0400 Subject: [PATCH 36/84] cosmetic #10623 --- .../edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 2 -- .../iq/dataverse/globus/TaskMonitoringServiceBean.java | 4 ---- 2 files changed, 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 6d3138856f2..133da0a1cab 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -810,8 +810,6 @@ private void processCompletedUploadTask(Dataset dataset, // Keeping a lock through the add datafiles API call avoids a conflicting edit // and keeps any open dataset page refreshing until the datafile appears. - // @todo is it necessary? what is wrong exactly with keeping the Globus - // lock on for the duration of the process? if (taskSuccess) { myLogger.info("Finished upload via Globus job."); diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java index c956831317c..5dd4ce312b6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java @@ -1,7 +1,3 @@ -/* - * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license - * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template - */ package edu.harvard.iq.dataverse.globus; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; From 6d06927169f0c7780ba02791ce3b77d90b9121b3 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 22 Aug 2024 10:54:06 -0400 Subject: [PATCH 37/84] more changes per feedback. (saving the api token in the GlobusTask entity was a BAD idea!) #10623 --- .../harvard/iq/dataverse/api/Datasets.java | 6 +--- .../dataverse/globus/GlobusServiceBean.java | 9 +++--- .../globus/GlobusTaskInProgress.java | 29 +++++++++++-------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index ac5b9147dff..960cc408ee5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -4077,19 +4077,15 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); } - - ApiToken token = authSvc.findApiTokenByUser(authUser); - if(uriInfo != null) { logger.info(" ==== (api uriInfo.getRequestUri()) jsonData ====== " + uriInfo.getRequestUri().toString()); } - String requestUrl = SystemConfig.getDataverseSiteUrlStatic(); // Async Call try { - globusService.globusUpload(jsonObject, token, dataset, requestUrl, authUser); + globusService.globusUpload(jsonObject, dataset, requestUrl, authUser); } catch (IllegalArgumentException ex) { return badRequest("Invalid parameters: "+ex.getMessage()); } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 133da0a1cab..ac3c81622fc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -667,7 +667,7 @@ private String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken, List< @Asynchronous @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, String httpRequestUrl, + public void globusUpload(JsonObject jsonData, Dataset dataset, String httpRequestUrl, AuthenticatedUser authUser) throws IllegalArgumentException, ExecutionException, InterruptedException, MalformedURLException { // Before we do anything else, let's do some basic validation of what @@ -728,7 +728,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S // Save the task information in the database so that the Globus monitoring // service can continue checking on its progress. - GlobusTaskInProgress taskInProgress = new GlobusTaskInProgress(taskIdentifier, GlobusTaskInProgress.TaskType.UPLOAD, dataset, endpoint.getClientToken(), token.getTokenString(), ruleId, new Timestamp(startDate.getTime())); + GlobusTaskInProgress taskInProgress = new GlobusTaskInProgress(taskIdentifier, GlobusTaskInProgress.TaskType.UPLOAD, dataset, endpoint.getClientToken(), authUser, ruleId, new Timestamp(startDate.getTime())); em.persist(taskInProgress); // Save the metadata entries that define the files that are being uploaded @@ -1443,10 +1443,9 @@ public List findExternalUploadsByTaskId(String tas public void processCompletedTask(GlobusTaskInProgress globusTask, boolean taskSuccess, String taskStatus, Logger taskLogger) { String ruleId = globusTask.getRuleId(); Dataset dataset = globusTask.getDataset(); - AuthenticatedUser authUser = authSvc.lookupUser(globusTask.getApiToken()); + AuthenticatedUser authUser = globusTask.getLocalUser(); if (authUser == null) { - // @todo log error message; do nothing - // (the fields in GlobusTaskInProgress are not nullable though - ?) + // @todo log error message; do nothing return; } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java index 0a56b4933a1..8644bca6143 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse.globus; import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import jakarta.persistence.Column; import jakarta.persistence.EnumType; import jakarta.persistence.Enumerated; @@ -14,6 +15,7 @@ import jakarta.persistence.GeneratedValue; import jakarta.persistence.GenerationType; import jakarta.persistence.Id; +import jakarta.persistence.JoinColumn; /** * @@ -67,24 +69,27 @@ public String toString() { } } - @Column + @Column(nullable=false) @Enumerated(EnumType.STRING) private TaskType taskType; /** * Globus API token that should be used to monitor the status of the task */ - @Column + @Column(nullable=false) private String globusToken; /** - * This is the Dataverse API token of the user who initiated the Globus task - */ - private String apiToken; + * This is the the user who initiated the Globus task + */ + @ManyToOne + @JoinColumn + private AuthenticatedUser user; - @Column + @Column(nullable=false) private String ruleId; + @JoinColumn(nullable = false) @ManyToOne private Dataset dataset; @@ -94,12 +99,12 @@ public String toString() { public GlobusTaskInProgress() { } - GlobusTaskInProgress(String taskId, TaskType taskType, Dataset dataset, String globusToken, String apiToken, String ruleId, Timestamp startTime) { + GlobusTaskInProgress(String taskId, TaskType taskType, Dataset dataset, String globusToken, AuthenticatedUser authUser, String ruleId, Timestamp startTime) { this.taskId = taskId; this.taskType = taskType; this.dataset = dataset; this.globusToken = globusToken; - this.apiToken = apiToken; + this.user = authUser; this.ruleId = ruleId; this.startTime = startTime; } @@ -138,12 +143,12 @@ public void setGlobusToken(String clientToken) { this.globusToken = clientToken; } - public String getApiToken() { - return apiToken; + public AuthenticatedUser getLocalUser() { + return user; } - public void setApiToken(String apiToken) { - this.apiToken = apiToken; + public void setLocalUser(AuthenticatedUser authUser) { + this.user = authUser; } public String getRuleId() { From 69cfe2909deb099a201442d36101d6686204ad70 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 23 Aug 2024 12:40:58 -0400 Subject: [PATCH 38/84] changed the polling interval default in the new TaskMonitoringServiceBean #10623 --- doc/sphinx-guides/source/installation/config.rst | 2 +- .../iq/dataverse/globus/TaskMonitoringServiceBean.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 731eef71c57..fe438504f06 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -4784,7 +4784,7 @@ The URL where the `dataverse-globus tasks = globusService.findAllOngoingTasks(); tasks.forEach(t -> { From d223a8f69166778b5015d7d9ad4f8e733ce22394 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 23 Aug 2024 13:31:02 -0400 Subject: [PATCH 39/84] more changes/refinements per review feedback (#10623) --- .../iq/dataverse/globus/TaskMonitoringServiceBean.java | 3 ++- .../harvard/iq/dataverse/ingest/IngestServiceBean.java | 4 ++-- .../edu/harvard/iq/dataverse/settings/JvmSettings.java | 1 + .../java/edu/harvard/iq/dataverse/util/SystemConfig.java | 9 --------- src/main/java/propertyFiles/Bundle.properties | 2 +- 5 files changed, 6 insertions(+), 13 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java index 341714539cf..fdb2b222804 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.globus; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.SystemConfig; import jakarta.annotation.PostConstruct; @@ -48,7 +49,7 @@ public class TaskMonitoringServiceBean { @PostConstruct public void init() { - if (systemConfig.isGlobusTaskMonitoringServer()) { + if (JvmSettings.GLOBUS_TASK_MONITORING_SERVER.lookupOptional(Boolean.class).orElse(false)) { logger.info("Starting Globus task monitoring service"); int pollingInterval = SystemConfig.getIntLimitFromStringOrDefault( settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusPollingInterval), 600); diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index 3f76a319902..b42fd950528 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -345,8 +345,8 @@ public List saveAndAddFilesToDataset(DatasetVersion version, StorageIO dataAccess = DataAccess.getStorageIO(dataFile); //Populate metadata dataAccess.open(DataAccessOption.READ_ACCESS); - // (this will make a remote call to check if the file exists - // and obtain its size) + // (the .open() above makes a remote call to check if + // the file exists and obtains its size) confirmedFileSize = dataAccess.getSize(); // For directly-uploaded files, we will perform the file size diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 9d13be005c9..262d488acab 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -51,6 +51,7 @@ public enum JvmSettings { DOCROOT_DIRECTORY(SCOPE_FILES, "docroot"), GUESTBOOK_AT_REQUEST(SCOPE_FILES, "guestbook-at-request"), GLOBUS_CACHE_MAXAGE(SCOPE_FILES, "globus-cache-maxage"), + GLOBUS_TASK_MONITORING_SERVER(SCOPE_FILES, "globus-monitoring-server"), //STORAGE DRIVER SETTINGS SCOPE_DRIVER(SCOPE_FILES), diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index 7417a5db4d4..c664d7d6730 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -82,7 +82,6 @@ public class SystemConfig { private String buildNumber = null; private static final String JVM_TIMER_SERVER_OPTION = "dataverse.timerServer"; - private static final String JVM_GLOBUS_TASK_MONITORING_OPTION = "dataverse.globus.taskMonitoringServer"; private static final long DEFAULT_GUESTBOOK_RESPONSES_DISPLAY_LIMIT = 5000L; private static final long DEFAULT_THUMBNAIL_SIZE_LIMIT_IMAGE = 3000000L; // 3 MB @@ -547,14 +546,6 @@ public boolean isTimerServer() { return false; } - public boolean isGlobusTaskMonitoringServer() { - String optionValue = System.getProperty(JVM_GLOBUS_TASK_MONITORING_OPTION); - if ("true".equalsIgnoreCase(optionValue)) { - return true; - } - return false; - } - public String getFooterCopyrightAndYear() { return BundleUtil.getStringFromBundle("footer.copyright", Arrays.asList(Year.now().getValue() + "")); } diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 25ae8ad00c0..d7e30c5e0e3 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -253,7 +253,7 @@ notification.mail.import.filesystem=Dataset {2} ({0}/dataset.xhtml?persistentId= notification.mail.globus.upload.completed=Globus transfer to Dataset {2} was successful. File(s) have been uploaded and verified.

{3}
notification.mail.globus.download.completed=Globus transfer of file(s) from the dataset {2} was successful.

{3}
notification.mail.globus.upload.completedWithErrors=Globus transfer to Dataset {2} is complete with errors.

{3}
-notification.mail.globus.upload.failedRemotely=Remote data transfer between Globus collections for Dataset {2} failed, reported via Globus API.

{3}
+notification.mail.globus.upload.failedRemotely=Remote data transfer between Globus endpoints for Dataset {2} failed, as reported via Globus API.

{3}
notification.mail.globus.upload.failedLocally=Dataverse received a confirmation of a successful Globus data transfer for Dataset {2}, but failed to add the files to the dataset locally.

{3}
notification.mail.globus.download.completedWithErrors=Globus transfer from the dataset {2} is complete with errors.

{3}
notification.import.filesystem=Dataset {1} has been successfully uploaded and verified. From 0ca5e621002711befaba23ba21208e2c19f08a92 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 23 Aug 2024 14:32:19 -0400 Subject: [PATCH 40/84] added an upfront locks check to the /addGlobusFiles api #10623 --- .../java/edu/harvard/iq/dataverse/api/Datasets.java | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 960cc408ee5..47505ef3879 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -99,6 +99,7 @@ import java.util.stream.Collectors; import static edu.harvard.iq.dataverse.api.ApiConstants.*; +import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; @@ -4045,7 +4046,16 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, return badRequest(BundleUtil.getStringFromBundle("datasets.api.globusuploaddisabled")); } - // @todo check if the dataset is already locked! + // Check if the dataset is already locked + // We are reusing the code and logic used by various command to determine + // if there are any locks on the dataset that would prevent the current + // users from modifying it: + try { + DataverseRequest dataverseRequest = createDataverseRequest(authUser); + permissionService.checkEditDatasetLock(dataset, dataverseRequest, new UpdateDatasetVersionCommand(dataset, dataverseRequest)); + } catch (IllegalCommandException icex) { + return error(Response.Status.FORBIDDEN, "Dataset " + datasetId + " is locked: " + icex.getLocalizedMessage()); + } JsonObject jsonObject = null; try { From 23d0f6c8db678c2f87e7fc2ff4bffabbc3b44d94 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 23 Aug 2024 14:44:26 -0400 Subject: [PATCH 41/84] added an upfront locks check to the /addGlobusFiles api #10623 --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 47505ef3879..d8e010940cf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -4052,7 +4052,7 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, // users from modifying it: try { DataverseRequest dataverseRequest = createDataverseRequest(authUser); - permissionService.checkEditDatasetLock(dataset, dataverseRequest, new UpdateDatasetVersionCommand(dataset, dataverseRequest)); + permissionService.checkEditDatasetLock(dataset, dataverseRequest, null); } catch (IllegalCommandException icex) { return error(Response.Status.FORBIDDEN, "Dataset " + datasetId + " is locked: " + icex.getLocalizedMessage()); } From b8878b00f3ece6b9b595fa4367e559fe7db2e471 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 6 Sep 2024 10:32:05 -0400 Subject: [PATCH 42/84] fix labels when cvoc is used --- src/main/webapp/metadataFragment.xhtml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/webapp/metadataFragment.xhtml b/src/main/webapp/metadataFragment.xhtml index 0da2953a0d0..723f95148cd 100755 --- a/src/main/webapp/metadataFragment.xhtml +++ b/src/main/webapp/metadataFragment.xhtml @@ -251,7 +251,7 @@
- + #{dsf.datasetFieldType.localeTitle}

-