From 709b4da87c0186efc049a9b7625fd04fdddd9797 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 24 Nov 2025 16:16:15 -0500 Subject: [PATCH 01/31] try async command for archiving --- .../edu/harvard/iq/dataverse/DatasetPage.java | 25 ++++++++---------- .../iq/dataverse/EjbDataverseEngine.java | 26 +++++++++++++++++++ src/main/java/propertyFiles/Bundle.properties | 1 + 3 files changed, 38 insertions(+), 14 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 20617160a1c..b97b8ec6578 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -6101,20 +6101,17 @@ public void archiveVersion(Long id) { AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dv); if (cmd != null) { try { - DatasetVersion version = commandEngine.submit(cmd); - if (!version.getArchivalCopyLocationStatus().equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)) { - logger.info( - "DatasetVersion id=" + version.getId() + " submitted to Archive, status: " + dv.getArchivalCopyLocationStatus()); - } else { - logger.severe("Error submitting version " + version.getId() + " due to conflict/error at Archive"); - } - if (version.getArchivalCopyLocation() != null) { - setVersionTabList(resetVersionTabList()); - this.setVersionTabListForPostLoad(getVersionTabList()); - JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("datasetversion.archive.success")); - } else { - JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("datasetversion.archive.failure")); - } + commandEngine.submitAsync(cmd); + + // Set initial pending status + dv.setArchivalCopyLocation(DatasetVersion.ARCHIVAL_STATUS_PENDING); + + logger.info( + "DatasetVersion id=" + dv.getId() + " submitted to Archive, status: " + dv.getArchivalCopyLocationStatus()); + setVersionTabList(resetVersionTabList()); + this.setVersionTabListForPostLoad(getVersionTabList()); + JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("datasetversion.archive.inprogress")); + } catch (CommandException ex) { logger.log(Level.SEVERE, "Unexpected Exception calling submit archive command", ex); JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("datasetversion.archive.failure")); diff --git a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java index 4d6d59cb013..5a3f105497d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java +++ b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java @@ -31,6 +31,9 @@ import java.util.Map; import java.util.Set; + +import jakarta.ejb.AsyncResult; +import jakarta.ejb.Asynchronous; import jakarta.ejb.EJB; import jakarta.ejb.Stateless; import jakarta.inject.Named; @@ -45,6 +48,7 @@ import java.util.Arrays; import java.util.EnumSet; import java.util.Stack; +import java.util.concurrent.Future; import java.util.logging.Level; import java.util.logging.Logger; import jakarta.annotation.Resource; @@ -348,6 +352,28 @@ public R submit(Command aCommand) throws CommandException { logSvc.log(logRec); } } + + /** + * Submits a command for asynchronous execution. + * The command will be executed in a separate thread and won't block the caller. + * + * @param The return type of the command + * @param aCommand The command to execute + * @param user The user executing the command + * @return A Future representing the pending result + * @throws CommandException if the command cannot be submitted + */ + @Asynchronous + public Future submitAsync(Command aCommand) throws CommandException { + try { + logger.log(Level.INFO, "Submitting async command: {0}", aCommand.getClass().getSimpleName()); + R result = submit(aCommand); + return new AsyncResult<>(result); + } catch (Exception e) { + logger.log(Level.SEVERE, "Async command execution failed: " + aCommand.getClass().getSimpleName(), e); + throw e; + } + } protected void completeCommand(Command command, Object r, Stack called) { diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index f6c0054a43a..d9b9fd7bc48 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2699,6 +2699,7 @@ dataset.notlinked.msg=There was a problem linking this dataset to yours: dataset.linking.popop.already.linked.note=Note: This dataset is already linked to the following dataverse(s): dataset.linking.popup.not.linked.note=Note: This dataset is not linked to any of your accessible dataverses datasetversion.archive.success=Archival copy of Version successfully submitted +datasetversion.archive.inprogress= Data Project archiving has been started datasetversion.archive.failure=Error in submitting an archival copy datasetversion.update.failure=Dataset Version Update failed. Changes are still in the DRAFT version. datasetversion.update.archive.failure=Dataset Version Update succeeded, but the attempt to update the archival copy failed. From 6487c1433f1c960d645250cea421c1659120d3c9 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 24 Nov 2025 17:07:48 -0500 Subject: [PATCH 02/31] save status --- src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index b97b8ec6578..0bf0db42728 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -6101,10 +6101,12 @@ public void archiveVersion(Long id) { AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dv); if (cmd != null) { try { - commandEngine.submitAsync(cmd); - + // Set initial pending status dv.setArchivalCopyLocation(DatasetVersion.ARCHIVAL_STATUS_PENDING); + dv = datasetVersionService.merge(dv); + + commandEngine.submitAsync(cmd); logger.info( "DatasetVersion id=" + dv.getId() + " submitted to Archive, status: " + dv.getArchivalCopyLocationStatus()); From 9d32051fe76d0914fc35d21f693211054fc0c38a Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 8 Jan 2026 13:07:23 -0500 Subject: [PATCH 03/31] refactor, use persistArchivalCopyLocation everywhere --- .../edu/harvard/iq/dataverse/DatasetPage.java | 2 +- .../iq/dataverse/DatasetVersionServiceBean.java | 17 +++++++++++++++++ .../edu/harvard/iq/dataverse/api/Datasets.java | 1 + .../impl/AbstractSubmitToArchiveCommand.java | 3 ++- 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 0bf0db42728..281734cd66e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -6104,7 +6104,7 @@ public void archiveVersion(Long id) { // Set initial pending status dv.setArchivalCopyLocation(DatasetVersion.ARCHIVAL_STATUS_PENDING); - dv = datasetVersionService.merge(dv); + datasetVersionService.persistArchivalCopyLocation(dv); commandEngine.submitAsync(cmd); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 60df1fd3dfd..7656f975d2a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -1333,4 +1333,21 @@ public Long getDatasetVersionCount(Long datasetId, boolean canViewUnpublishedVer return em.createQuery(cq).getSingleResult(); } + + + /** + * Update the archival copy location for a specific version of a dataset. Archiving can be long-running and other parallel updates to the datasetversion have likely occurred + * + * @param dv + * The dataset version whose archival copy location we want to update. Must not be {@code null}. + * @param archivalStatusPending + * the JSON status string, may be {@code null}. + */ + public void persistArchivalCopyLocation(DatasetVersion dv) { + em.createNativeQuery( + "UPDATE datasetversion SET archivalcopylocation = ?1 WHERE id = ?2") + .setParameter(1, dv.getArchivalCopyLocation()) + .setParameter(2, dv.getId()) + .executeUpdate(); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 1b3016ec2f4..c8e66115575 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1280,6 +1280,7 @@ public Response publishDataset(@Context ContainerRequestContext crc, @PathParam( if (archiveCommand != null) { // Delete the record of any existing copy since it is now out of date/incorrect updateVersion.setArchivalCopyLocation(null); + datasetVersionSvc.persistArchivalCopyLocation(updateVersion); /* * Then try to generate and submit an archival copy. Note that running this * command within the CuratePublishedDatasetVersionCommand was causing an error: diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index 29c27d0396d..7e39a8e7b85 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -63,7 +63,8 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException { token = ctxt.authentication().generateApiTokenForUser(user); } performArchiveSubmission(version, token, requestedSettings); - return ctxt.em().merge(version); + ctxt.datasetVersion().persistArchivalCopyLocation(version); + return version; } /** From ec5046cc161193fd102481a9a53cb439c5768f94 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 12 Jan 2026 10:55:48 -0500 Subject: [PATCH 04/31] catch OLE when persisting archivalcopylocation --- .../dataverse/DatasetVersionServiceBean.java | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 7656f975d2a..b5e964e5673 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -33,6 +33,7 @@ import jakarta.json.JsonObjectBuilder; import jakarta.persistence.EntityManager; import jakarta.persistence.NoResultException; +import jakarta.persistence.OptimisticLockException; import jakarta.persistence.PersistenceContext; import jakarta.persistence.Query; import jakarta.persistence.TypedQuery; @@ -1336,18 +1337,25 @@ public Long getDatasetVersionCount(Long datasetId, boolean canViewUnpublishedVer /** - * Update the archival copy location for a specific version of a dataset. Archiving can be long-running and other parallel updates to the datasetversion have likely occurred + * Update the archival copy location for a specific version of a dataset. Archiving can be long-running and other parallel updates to the datasetversion have likely occurred so this method will check + * for OptimisticLockExceptions and retry the update with the latest version. * * @param dv * The dataset version whose archival copy location we want to update. Must not be {@code null}. - * @param archivalStatusPending - * the JSON status string, may be {@code null}. */ public void persistArchivalCopyLocation(DatasetVersion dv) { - em.createNativeQuery( - "UPDATE datasetversion SET archivalcopylocation = ?1 WHERE id = ?2") - .setParameter(1, dv.getArchivalCopyLocation()) - .setParameter(2, dv.getId()) - .executeUpdate(); + try { + em.merge(dv); + em.flush(); // Force the update and version check immediately + } catch (OptimisticLockException ole) { + logger.log(Level.INFO, "OptimisticLockException while persisting archival copy location for DatasetVersion id={0}. Retrying on latest version.", dv.getId()); + DatasetVersion currentVersion = find(dv.getId()); + if (currentVersion != null) { + currentVersion.setArchivalCopyLocation(dv.getArchivalCopyLocation()); + em.merge(currentVersion); + } else { + logger.log(Level.SEVERE, "Could not find DatasetVersion with id={0} to retry persisting archival copy location after OptimisticLockException.", dv.getId()); + } + } } } From c1055b87cd3445adc0a21f4248c1ec2fb4442774 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 25 Nov 2025 14:23:22 -0500 Subject: [PATCH 05/31] Add obsolete state, update display, add supportsDelete --- .../edu/harvard/iq/dataverse/DatasetPage.java | 81 ++++++++++++------- .../harvard/iq/dataverse/DatasetVersion.java | 1 + .../impl/AbstractSubmitToArchiveCommand.java | 4 + .../GoogleCloudSubmitToArchiveCommand.java | 39 ++++++++- src/main/java/propertyFiles/Bundle.properties | 1 + src/main/webapp/dataset-versions.xhtml | 8 +- 6 files changed, 103 insertions(+), 31 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 281734cd66e..0832560eafb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -42,6 +42,7 @@ import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; import edu.harvard.iq.dataverse.export.ExportService; import edu.harvard.iq.dataverse.util.cache.CacheFactoryBean; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import io.gdcc.spi.export.ExportException; import io.gdcc.spi.export.Exporter; import edu.harvard.iq.dataverse.ingest.IngestRequest; @@ -105,6 +106,9 @@ import jakarta.faces.view.ViewScoped; import jakarta.inject.Inject; import jakarta.inject.Named; +import jakarta.json.Json; +import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; import jakarta.persistence.OptimisticLockException; import org.apache.commons.lang3.StringUtils; @@ -2992,27 +2996,40 @@ public String updateCurrentVersion() { String className = settingsService.get(SettingsServiceBean.Key.ArchiverClassName.toString()); AbstractSubmitToArchiveCommand archiveCommand = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), updateVersion); if (archiveCommand != null) { - // Delete the record of any existing copy since it is now out of date/incorrect - updateVersion.setArchivalCopyLocation(null); - /* - * Then try to generate and submit an archival copy. Note that running this - * command within the CuratePublishedDatasetVersionCommand was causing an error: - * "The attribute [id] of class - * [edu.harvard.iq.dataverse.DatasetFieldCompoundValue] is mapped to a primary - * key column in the database. Updates are not allowed." To avoid that, and to - * simplify reporting back to the GUI whether this optional step succeeded, I've - * pulled this out as a separate submit(). - */ - try { - updateVersion = commandEngine.submit(archiveCommand); - if (!updateVersion.getArchivalCopyLocationStatus().equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)) { - successMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.success"); - } else { - errorMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure"); + //There is an archiver configured, so now decide what to dO: + // If a successful copy exists, don't automatically update, just note the old copy is obsolete (and enable the superadmin button in the display to allow a ~manual update if desired) + // If pending or an obsolete copy exists, do nothing (nominally if a pending run succeeds and we're updating the current version here, it should be marked as obsolete - ignoring for now since updates within the time an archiving run is pending should be rare + // If a failure or null, rerun archiving now. If a failure is due to an exiting copy in the repo, we'll fail again + String status = updateVersion.getArchivalCopyLocationStatus(); + if((status==null) || status.equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)){ + // Delete the record of any existing copy since it is now out of date/incorrect + updateVersion.setArchivalCopyLocation(null); + /* + * Then try to generate and submit an archival copy. Note that running this + * command within the CuratePublishedDatasetVersionCommand was causing an error: + * "The attribute [id] of class + * [edu.harvard.iq.dataverse.DatasetFieldCompoundValue] is mapped to a primary + * key column in the database. Updates are not allowed." To avoid that, and to + * simplify reporting back to the GUI whether this optional step succeeded, I've + * pulled this out as a separate submit(). + */ + try { + updateVersion = commandEngine.submit(archiveCommand); + if (!updateVersion.getArchivalCopyLocationStatus().equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)) { + successMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.success"); + } else { + errorMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure"); + } + } catch (CommandException ex) { + errorMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure") + " - " + ex.toString(); + logger.severe(ex.getMessage()); } - } catch (CommandException ex) { - errorMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure") + " - " + ex.toString(); - logger.severe(ex.getMessage()); + } else if(status.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS)) { + JsonObject archivalLocation = JsonUtil.getJsonObject(updateVersion.getArchivalCopyLocation()); + JsonObjectBuilder job = Json.createObjectBuilder(archivalLocation); + job.add(DatasetVersion.ARCHIVAL_STATUS,DatasetVersion.ARCHIVAL_STATUS_OBSOLETE); + updateVersion.setArchivalCopyLocation(JsonUtil.prettyPrint(job.build())); + datasetVersionService.merge(updateVersion); } } } @@ -6094,14 +6111,16 @@ public void refreshPaginator() { * * @param id - the id of the datasetversion to archive. */ - public void archiveVersion(Long id) { + public void archiveVersion(Long id, boolean force) { if (session.getUser() instanceof AuthenticatedUser) { DatasetVersion dv = datasetVersionService.retrieveDatasetVersionByVersionId(id).getDatasetVersion(); String className = settingsWrapper.getValueForKey(SettingsServiceBean.Key.ArchiverClassName, null); AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dv); if (cmd != null) { try { - + String status = dv.getArchivalCopyLocationStatus(); + if(status == null || (force && cmd.supportsDelete())){ + // Set initial pending status dv.setArchivalCopyLocation(DatasetVersion.ARCHIVAL_STATUS_PENDING); datasetVersionService.persistArchivalCopyLocation(dv); @@ -6113,7 +6132,7 @@ public void archiveVersion(Long id) { setVersionTabList(resetVersionTabList()); this.setVersionTabListForPostLoad(getVersionTabList()); JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("datasetversion.archive.inprogress")); - + } } catch (CommandException ex) { logger.log(Level.SEVERE, "Unexpected Exception calling submit archive command", ex); JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("datasetversion.archive.failure")); @@ -6146,21 +6165,26 @@ public boolean isArchivable() { return archivable; } + /** Method to decide if a 'Submit' button should be enabled for archiving a dataset version. */ public boolean isVersionArchivable() { if (versionArchivable == null) { // If this dataset isn't in an archivable collection return false versionArchivable = false; if (isArchivable()) { - boolean checkForArchivalCopy = false; + // Otherwise, we need to know if the archiver is single-version-only // If it is, we have to check for an existing archived version to answer the // question String className = settingsWrapper.getValueForKey(SettingsServiceBean.Key.ArchiverClassName, null); if (className != null) { try { + boolean checkForArchivalCopy = false; Class clazz = Class.forName(className); Method m = clazz.getMethod("isSingleVersion", SettingsWrapper.class); + Method m2 = clazz.getMethod("supportsDelete"); + Object[] params = { settingsWrapper }; + boolean supportsDelete = (Boolean) m2.invoke(null); checkForArchivalCopy = (Boolean) m.invoke(null, params); if (checkForArchivalCopy) { @@ -6168,9 +6192,12 @@ public boolean isVersionArchivable() { // one version is already archived (or attempted - any non-null status) versionArchivable = !isSomeVersionArchived(); } else { - // If we allow multiple versions or didn't find one that has had archiving run - // on it, we can archive, so return true - versionArchivable = true; + // If we didn't find one that has had archiving run + // on it, or we archiving per version is supported and either + // the status is null or the archiver can delete prior runs and status isn't success, + // we can archive, so return true + String status = workingVersion.getArchivalCopyLocationStatus(); + versionArchivable = (status == null) || ((!status.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS) && (!status.equals(DatasetVersion.ARCHIVAL_STATUS_PENDING)) && supportsDelete)); } } catch (ClassNotFoundException | IllegalAccessException | IllegalArgumentException | InvocationTargetException | NoSuchMethodException | SecurityException e) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 93b0ccfef61..0de0dedc860 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -132,6 +132,7 @@ public enum VersionState { public static final String ARCHIVAL_STATUS_PENDING = "pending"; public static final String ARCHIVAL_STATUS_SUCCESS = "success"; public static final String ARCHIVAL_STATUS_FAILURE = "failure"; + public static final String ARCHIVAL_STATUS_OBSOLETE = "obsolete"; @Id @GeneratedValue(strategy = GenerationType.IDENTITY) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index 7e39a8e7b85..f7716534b7f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -184,4 +184,8 @@ public static boolean isSingleVersion(SettingsWrapper settingsWrapper) { public static boolean isSingleVersion(SettingsServiceBean settingsService) { return false; } + + public static boolean supportsDelete() { + return false; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java index 7dfb9f07e19..97ca104f01c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java @@ -44,6 +44,11 @@ public GoogleCloudSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersi super(aRequest, version); } + @Override + public static boolean supportsDelete() { + return true; + } + @Override public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map requestedSettings) { logger.fine("In GoogleCloudSubmitToArchiveCommand..."); @@ -73,6 +78,34 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') .replace('.', '-').toLowerCase(); + // Check for and delete existing files for this version + String dataciteFileName = spaceName + "/datacite.v" + dv.getFriendlyVersionNumber() + ".xml"; + String bagFileName = spaceName + "/" + spaceName + ".v" + dv.getFriendlyVersionNumber() + ".zip"; + + logger.fine("Checking for existing files in archive..."); + + try { + Blob existingDatacite = bucket.get(dataciteFileName); + if (existingDatacite != null && existingDatacite.exists()) { + logger.fine("Found existing datacite.xml, deleting: " + dataciteFileName); + existingDatacite.delete(); + logger.fine("Deleted existing datacite.xml"); + } + } catch (StorageException se) { + logger.warning("Error checking/deleting existing datacite.xml: " + se.getMessage()); + } + + try { + Blob existingBag = bucket.get(bagFileName); + if (existingBag != null && existingBag.exists()) { + logger.fine("Found existing bag file, deleting: " + bagFileName); + existingBag.delete(); + logger.fine("Deleted existing bag file"); + } + } catch (StorageException se) { + logger.warning("Error checking/deleting existing bag file: " + se.getMessage()); + } + String dataciteXml = getDataCiteXml(dv); MessageDigest messageDigest = MessageDigest.getInstance("MD5"); try (PipedInputStream dataciteIn = new PipedInputStream(); @@ -102,7 +135,7 @@ public void run() { Thread.sleep(10); i++; } - Blob dcXml = bucket.create(spaceName + "/datacite.v" + dv.getFriendlyVersionNumber() + ".xml", digestInputStream, "text/xml", Bucket.BlobWriteOption.doesNotExist()); + Blob dcXml = bucket.create(dataciteFileName, digestInputStream, "text/xml", Bucket.BlobWriteOption.doesNotExist()); dcThread.join(); String checksum = dcXml.getMd5ToHexString(); @@ -131,7 +164,7 @@ public void run() { try (PipedInputStream in = new PipedInputStream(100000); DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest)) { Thread bagThread = startBagThread(dv, in, digestInputStream2, dataciteXml, token); - Blob bag = bucket.create(spaceName + "/" + fileName, digestInputStream2, "application/zip", + Blob bag = bucket.create(bagFileName, digestInputStream2, "application/zip", Bucket.BlobWriteOption.doesNotExist()); if (bag.getSize() == 0) { throw new IOException("Empty Bag"); @@ -139,7 +172,7 @@ public void run() { bagThread.join(); checksum = bag.getMd5ToHexString(); - logger.fine("Bag: " + fileName + " added with checksum: " + checksum); + logger.fine("Bag: " + bagFileName + " added with checksum: " + checksum); localchecksum = Hex.encodeHexString(digestInputStream2.getMessageDigest().digest()); if (!success || !checksum.equals(localchecksum)) { logger.severe(success ? checksum + " not equal to " + localchecksum diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index d9b9fd7bc48..dbc2ce40657 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2141,6 +2141,7 @@ file.dataFilesTab.versions.headers.contributors.withheld=Contributor name(s) wit file.dataFilesTab.versions.headers.published=Published on file.dataFilesTab.versions.headers.archived=Archival Status file.dataFilesTab.versions.headers.archived.success=Archived +file.dataFilesTab.versions.headers.archived.obsolete=Original Version Archived file.dataFilesTab.versions.headers.archived.pending=Pending file.dataFilesTab.versions.headers.archived.failure=Failed file.dataFilesTab.versions.headers.archived.notarchived=Not Archived diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index 9e5f0a9b24d..1f33675bd3d 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -169,9 +169,15 @@ + + + + + + - From f912fd043945850ac87d396833cdc9c94d62f56c Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 25 Nov 2025 14:34:32 -0500 Subject: [PATCH 06/31] doc that api doesn't handls supportsDelete yet --- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 18f28569d7d..10aadde57b6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -2067,6 +2067,7 @@ public Response submitDatasetVersionToArchive(@Context ContainerRequestContext c if(dv==null) { return error(Status.BAD_REQUEST, "Requested version not found."); } + //ToDo - allow forcing with a non-success status for archivers that supportsDelete() if (dv.getArchivalCopyLocation() == null) { String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName); // Note - the user is being sent via the createDataverseRequest(au) call to the @@ -2132,7 +2133,7 @@ public Response archiveAllUnarchivedDatasetVersions(@Context ContainerRequestCon try { AuthenticatedUser au = getRequestAuthenticatedUserOrDie(crc); - + //ToDo - allow forcing with a non-success status for archivers that supportsDelete() List dsl = datasetversionService.getUnarchivedDatasetVersions(); if (dsl != null) { if (listonly) { From 00f115e23e50f8d70338256fbd34d8270a9900a1 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 25 Nov 2025 14:55:51 -0500 Subject: [PATCH 07/31] support reflective and instance calls re: delete capability --- .../java/edu/harvard/iq/dataverse/DatasetPage.java | 2 +- .../impl/AbstractSubmitToArchiveCommand.java | 14 +++++++++++--- .../impl/GoogleCloudSubmitToArchiveCommand.java | 5 ++++- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 0832560eafb..09669fb789e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -6119,7 +6119,7 @@ public void archiveVersion(Long id, boolean force) { if (cmd != null) { try { String status = dv.getArchivalCopyLocationStatus(); - if(status == null || (force && cmd.supportsDelete())){ + if(status == null || (force && cmd.canDelete())){ // Set initial pending status dv.setArchivalCopyLocation(DatasetVersion.ARCHIVAL_STATUS_PENDING); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index f7716534b7f..aaeef193ff4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -185,7 +185,15 @@ public static boolean isSingleVersion(SettingsServiceBean settingsService) { return false; } - public static boolean supportsDelete() { - return false; - } + /** Whether the archiver can delete existing archival files (and thus can retry when the existing files are incomplete/obsolete) + * A static version supports calls via reflection while the instance method supports inheritance for use on actual command instances (see DatasetPage for both use cases). + * @return + */ + public static boolean supportsDelete() { + return false; + } + + public boolean canDelete() { + return supportsDelete(); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java index 97ca104f01c..61a38cffc99 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java @@ -44,10 +44,13 @@ public GoogleCloudSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersi super(aRequest, version); } - @Override public static boolean supportsDelete() { return true; } + @Override + public boolean canDelete() { + return supportsDelete(); + } @Override public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map requestedSettings) { From bc403703ab672d1ac30ba16d928a3eaa1de87214 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 10 Dec 2025 16:14:30 -0500 Subject: [PATCH 08/31] use query to update status, async everywhere --- .../java/edu/harvard/iq/dataverse/DatasetPage.java | 14 +++++--------- .../edu/harvard/iq/dataverse/api/Datasets.java | 10 +++------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 09669fb789e..db9e9caa671 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -3014,21 +3014,18 @@ public String updateCurrentVersion() { * pulled this out as a separate submit(). */ try { - updateVersion = commandEngine.submit(archiveCommand); - if (!updateVersion.getArchivalCopyLocationStatus().equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)) { - successMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.success"); - } else { - errorMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure"); - } + commandEngine.submitAsync(archiveCommand); + JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("datasetversion.archive.inprogress")); } catch (CommandException ex) { errorMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure") + " - " + ex.toString(); logger.severe(ex.getMessage()); } } else if(status.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS)) { + //Not automatically replacing the old archival copy as creating it is expensive JsonObject archivalLocation = JsonUtil.getJsonObject(updateVersion.getArchivalCopyLocation()); JsonObjectBuilder job = Json.createObjectBuilder(archivalLocation); job.add(DatasetVersion.ARCHIVAL_STATUS,DatasetVersion.ARCHIVAL_STATUS_OBSOLETE); - updateVersion.setArchivalCopyLocation(JsonUtil.prettyPrint(job.build())); + datasetVersionService.setArchivalCopyLocation(updateVersion, JsonUtil.prettyPrint(job.build())); datasetVersionService.merge(updateVersion); } } @@ -6122,8 +6119,7 @@ public void archiveVersion(Long id, boolean force) { if(status == null || (force && cmd.canDelete())){ // Set initial pending status - dv.setArchivalCopyLocation(DatasetVersion.ARCHIVAL_STATUS_PENDING); - datasetVersionService.persistArchivalCopyLocation(dv); + datasetVersionService.setArchivalCopyLocation(dv, DatasetVersion.ARCHIVAL_STATUS_PENDING); commandEngine.submitAsync(cmd); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index c8e66115575..bf0f7c6668a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1279,7 +1279,7 @@ public Response publishDataset(@Context ContainerRequestContext crc, @PathParam( AbstractSubmitToArchiveCommand archiveCommand = ArchiverUtil.createSubmitToArchiveCommand(className, createDataverseRequest(user), updateVersion); if (archiveCommand != null) { // Delete the record of any existing copy since it is now out of date/incorrect - updateVersion.setArchivalCopyLocation(null); + datasetVersionSvc.setArchivalCopyLocation(updateVersion, null); datasetVersionSvc.persistArchivalCopyLocation(updateVersion); /* * Then try to generate and submit an archival copy. Note that running this @@ -1291,12 +1291,8 @@ public Response publishDataset(@Context ContainerRequestContext crc, @PathParam( * pulled this out as a separate submit(). */ try { - updateVersion = commandEngine.submit(archiveCommand); - if (!updateVersion.getArchivalCopyLocationStatus().equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)) { - successMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.success"); - } else { - successMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure"); - } + commandEngine.submitAsync(archiveCommand); + successMsg = BundleUtil.getStringFromBundle("datasetversion.archive.inprogress"); } catch (CommandException ex) { successMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure") + " - " + ex.toString(); logger.severe(ex.getMessage()); From df9b5cec3c83ec066dc274d35edea9ee9f9e98a6 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 12 Dec 2025 18:23:56 -0500 Subject: [PATCH 09/31] fixes for dataset page re: archiving --- src/main/webapp/dataset-versions.xhtml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/webapp/dataset-versions.xhtml b/src/main/webapp/dataset-versions.xhtml index 1f33675bd3d..89a8162c135 100644 --- a/src/main/webapp/dataset-versions.xhtml +++ b/src/main/webapp/dataset-versions.xhtml @@ -170,14 +170,14 @@ - + - From a64e1f749c2f44c14b4386e1c22195e1c65d8ea8 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 16 Jan 2026 13:33:19 -0500 Subject: [PATCH 10/31] merge issues --- src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index db9e9caa671..4b559af3878 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -6101,10 +6101,7 @@ public void refreshPaginator() { /** * This method can be called from *.xhtml files to allow archiving of a dataset - * version from the user interface. It is not currently (11/18) used in the IQSS/develop - * branch, but is used by QDR and is kept here in anticipation of including a - * GUI option to archive (already published) versions after other dataset page - * changes have been completed. + * version from the user interface. * * @param id - the id of the datasetversion to archive. */ From c55230ee81481b465323b16800e98679fe5fa36c Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 21 Jan 2026 17:38:37 -0500 Subject: [PATCH 11/31] merge fix of persistArchivalCopy method refactors --- .../edu/harvard/iq/dataverse/DatasetPage.java | 30 +++++++++---------- .../harvard/iq/dataverse/DatasetVersion.java | 24 ++++++++++----- .../harvard/iq/dataverse/api/Datasets.java | 2 +- 3 files changed, 32 insertions(+), 24 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 4b559af3878..fe17a137361 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -3022,10 +3022,8 @@ public String updateCurrentVersion() { } } else if(status.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS)) { //Not automatically replacing the old archival copy as creating it is expensive - JsonObject archivalLocation = JsonUtil.getJsonObject(updateVersion.getArchivalCopyLocation()); - JsonObjectBuilder job = Json.createObjectBuilder(archivalLocation); - job.add(DatasetVersion.ARCHIVAL_STATUS,DatasetVersion.ARCHIVAL_STATUS_OBSOLETE); - datasetVersionService.setArchivalCopyLocation(updateVersion, JsonUtil.prettyPrint(job.build())); + updateVersion.setArchivalStatus(DatasetVersion.ARCHIVAL_STATUS_OBSOLETE); + datasetVersionService.persistArchivalCopyLocation(updateVersion); datasetVersionService.merge(updateVersion); } } @@ -6113,18 +6111,18 @@ public void archiveVersion(Long id, boolean force) { if (cmd != null) { try { String status = dv.getArchivalCopyLocationStatus(); - if(status == null || (force && cmd.canDelete())){ - - // Set initial pending status - datasetVersionService.setArchivalCopyLocation(dv, DatasetVersion.ARCHIVAL_STATUS_PENDING); - - commandEngine.submitAsync(cmd); - - logger.info( - "DatasetVersion id=" + dv.getId() + " submitted to Archive, status: " + dv.getArchivalCopyLocationStatus()); - setVersionTabList(resetVersionTabList()); - this.setVersionTabListForPostLoad(getVersionTabList()); - JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("datasetversion.archive.inprogress")); + if (status == null || (force && cmd.canDelete())) { + + // Set initial pending status + dv.setArchivalCopyLocation(DatasetVersion.ARCHIVAL_STATUS_PENDING); + datasetVersionService.persistArchivalCopyLocation(dv); + commandEngine.submitAsync(cmd); + + logger.info( + "DatasetVersion id=" + dv.getId() + " submitted to Archive, status: " + dv.getArchivalCopyLocationStatus()); + setVersionTabList(resetVersionTabList()); + this.setVersionTabListForPostLoad(getVersionTabList()); + JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("datasetversion.archive.inprogress")); } } catch (CommandException ex) { logger.log(Level.SEVERE, "Unexpected Exception calling submit archive command", ex); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 0de0dedc860..1248a8266ab 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -232,8 +232,9 @@ public enum VersionState { @Transient private DatasetVersionDifference dvd; + //The Json version of the archivalCopyLocation string @Transient - private JsonObject archivalStatus; + private JsonObject archivalCopyLocationJson; public Long getId() { return this.id; @@ -384,24 +385,24 @@ public String getArchivalCopyLocation() { public String getArchivalCopyLocationStatus() { populateArchivalStatus(false); - if(archivalStatus!=null) { - return archivalStatus.getString(ARCHIVAL_STATUS); + if(archivalCopyLocationJson!=null) { + return archivalCopyLocationJson.getString(ARCHIVAL_STATUS); } return null; } public String getArchivalCopyLocationMessage() { populateArchivalStatus(false); - if(archivalStatus!=null) { - return archivalStatus.getString(ARCHIVAL_STATUS_MESSAGE); + if(archivalCopyLocationJson!=null) { + return archivalCopyLocationJson.getString(ARCHIVAL_STATUS_MESSAGE); } return null; } private void populateArchivalStatus(boolean force) { - if(archivalStatus ==null || force) { + if(archivalCopyLocationJson ==null || force) { if(archivalCopyLocation!=null) { try { - archivalStatus = JsonUtil.getJsonObject(archivalCopyLocation); + archivalCopyLocationJson = JsonUtil.getJsonObject(archivalCopyLocation); } catch(Exception e) { logger.warning("DatasetVersion id: " + id + "has a non-JsonObject value, parsing error: " + e.getMessage()); logger.fine(archivalCopyLocation); @@ -415,6 +416,15 @@ public void setArchivalCopyLocation(String location) { populateArchivalStatus(true); } + // COnvenience method to set only the status + public void setArchivalStatus(String status) { + populateArchivalStatus(false); + JsonObjectBuilder job = Json.createObjectBuilder(archivalCopyLocationJson); + job.add(DatasetVersion.ARCHIVAL_STATUS, status); + archivalCopyLocationJson = job.build(); + archivalCopyLocation = JsonUtil.prettyPrint(archivalCopyLocationJson); + } + public String getDeaccessionLink() { return deaccessionLink; } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index bf0f7c6668a..dba4b36d4da 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1279,7 +1279,7 @@ public Response publishDataset(@Context ContainerRequestContext crc, @PathParam( AbstractSubmitToArchiveCommand archiveCommand = ArchiverUtil.createSubmitToArchiveCommand(className, createDataverseRequest(user), updateVersion); if (archiveCommand != null) { // Delete the record of any existing copy since it is now out of date/incorrect - datasetVersionSvc.setArchivalCopyLocation(updateVersion, null); + updateVersion.setArchivalCopyLocation(null); datasetVersionSvc.persistArchivalCopyLocation(updateVersion); /* * Then try to generate and submit an archival copy. Note that running this From 905570a81563b8428042398ac1778fd4d380b61d Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 22 Jan 2026 12:57:38 -0500 Subject: [PATCH 12/31] add flag, docs --- doc/sphinx-guides/source/installation/config.rst | 10 ++++++++++ .../java/edu/harvard/iq/dataverse/DatasetPage.java | 3 ++- .../harvard/iq/dataverse/settings/FeatureFlags.java | 13 +++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index a9d5c7c0041..68982881d77 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2263,6 +2263,9 @@ At present, archiving classes include the DuraCloudSubmitToArchiveCommand, Local All current options support the :ref:`Archival Status API` calls and the same status is available in the dataset page version table (for contributors/those who could view the unpublished dataset, with more detail available to superusers). +Archival Bags are created per dataset version. By default, if a version is republished (via the superuser-only 'Update Current Version' publication option in the UI/API), a new archival bag is not created for the version. +If the archiver used is capable of deleting existing bags (Google, S3, and File Archivers) superusers can trigger a manual update of the archival bag, and, if the :ref:`:feature.archive-on-version-update` flag is set to true, this will be done automatically when 'Update Current Version' is used. + .. _Duracloud Configuration: Duracloud Configuration @@ -4031,6 +4034,13 @@ dataverse.feature.only-update-datacite-when-needed Only contact DataCite to update a DOI after checking to see if DataCite has outdated information (for efficiency, lighter load on DataCite, especially when using file DOIs). +.. _dataverse.feature.archive-on-version-update: + +dataverse.feature.archive-on-version-update ++++++++++++++++++++++++++++++++++++++++++++ + +Indicates whether archival bag creation should be triggered (if configured) when a version is updated and was already successfully archived, +i.e via the Update-Current-Version publication option. Setting the flag true only works if the archiver being used supports deleting existing archival bags. diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index fe17a137361..a091005b392 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -164,6 +164,7 @@ import edu.harvard.iq.dataverse.search.SearchFields; import edu.harvard.iq.dataverse.search.SearchUtil; import edu.harvard.iq.dataverse.search.SolrClientService; +import edu.harvard.iq.dataverse.settings.FeatureFlags; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.SignpostingResources; import edu.harvard.iq.dataverse.util.FileMetadataUtil; @@ -3001,7 +3002,7 @@ public String updateCurrentVersion() { // If pending or an obsolete copy exists, do nothing (nominally if a pending run succeeds and we're updating the current version here, it should be marked as obsolete - ignoring for now since updates within the time an archiving run is pending should be rare // If a failure or null, rerun archiving now. If a failure is due to an exiting copy in the repo, we'll fail again String status = updateVersion.getArchivalCopyLocationStatus(); - if((status==null) || status.equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)){ + if((status==null) || status.equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE) || (FeatureFlags.ARCHIVE_ON_VERSION_UPDATE.enabled() && archiveCommand.canDelete())){ // Delete the record of any existing copy since it is now out of date/incorrect updateVersion.setArchivalCopyLocation(null); /* diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java index 2e86fae610e..fdbdb257dbe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java @@ -249,6 +249,19 @@ public enum FeatureFlags { * @since Dataverse 6.9 */ ONLY_UPDATE_DATACITE_WHEN_NEEDED("only-update-datacite-when-needed"), + /** + * Indicates whether archival bag creation should be triggered (if configured) when a version + * is updated and was already successfully archived, i.e via the Update-Current-Version publication option. + * Since archiving can be resource intensive, it may not be worthwhile to automatically re-archive for the + * types of minor changes "Update-Current-Version" is intended for. Note that this flag is only effective + * for archivers that support deletion of existing files. When the flag is false, or the archiver cannot + * delete, the existing archival status will be changed to "Obsolete". + * + * * @apiNote Raise flag by setting "dataverse.feature.archive-on-version-update" + * + * @since Dataverse 6.10 + */ + ARCHIVE_ON_VERSION_UPDATE("archive-on-version-update"), ; From 521fbf68f2d6ba72b06343c32cf6154b027c899f Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 22 Jan 2026 15:01:50 -0500 Subject: [PATCH 13/31] add delete to local and S3 --- .../impl/LocalSubmitToArchiveCommand.java | 49 +++++++++- .../impl/S3SubmitToArchiveCommand.java | 94 ++++++++++++++++--- 2 files changed, 129 insertions(+), 14 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java index 462879f2ec9..34fadbed703 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java @@ -34,6 +34,14 @@ public class LocalSubmitToArchiveCommand extends AbstractSubmitToArchiveCommand public LocalSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); } + + public static boolean supportsDelete() { + return true; + } + @Override + public boolean canDelete() { + return supportsDelete(); + } @Override public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, @@ -57,15 +65,52 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') .replace('.', '-').toLowerCase(); + // Define file paths + String dataciteFileName = localPath + "/" + spaceName + "-datacite.v" + dv.getFriendlyVersionNumber() + ".xml"; + zipName = localPath + "/" + spaceName + "v" + dv.getFriendlyVersionNumber() + ".zip"; + + // Check for and delete existing files for this version + logger.fine("Checking for existing files in archive..."); + + File existingDatacite = new File(dataciteFileName); + if (existingDatacite.exists()) { + logger.fine("Found existing datacite.xml, deleting: " + dataciteFileName); + if (existingDatacite.delete()) { + logger.fine("Deleted existing datacite.xml"); + } else { + logger.warning("Failed to delete existing datacite.xml: " + dataciteFileName); + } + } + + File existingBag = new File(zipName); + if (existingBag.exists()) { + logger.fine("Found existing bag file, deleting: " + zipName); + if (existingBag.delete()) { + logger.fine("Deleted existing bag file"); + } else { + logger.warning("Failed to delete existing bag file: " + zipName); + } + } + + // Also check for and delete the .partial file if it exists + File existingPartial = new File(zipName + ".partial"); + if (existingPartial.exists()) { + logger.fine("Found existing partial bag file, deleting: " + zipName + ".partial"); + if (existingPartial.delete()) { + logger.fine("Deleted existing partial bag file"); + } else { + logger.warning("Failed to delete existing partial bag file: " + zipName + ".partial"); + } + } + String dataciteXml = getDataCiteXml(dv); FileUtils.writeStringToFile( - new File(localPath + "/" + spaceName + "-datacite.v" + dv.getFriendlyVersionNumber() + ".xml"), + new File(dataciteFileName), dataciteXml, StandardCharsets.UTF_8); BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); bagger.setNumConnections(getNumberOfBagGeneratorThreads()); bagger.setAuthenticationKey(token.getTokenString()); - zipName = localPath + "/" + spaceName + "v" + dv.getFriendlyVersionNumber() + ".zip"; //ToDo: generateBag(File f, true) seems to do the same thing (with a .tmp extension) - since we don't have to use a stream here, could probably just reuse the existing code? bagger.generateBag(new FileOutputStream(zipName + ".partial")); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java index 65531d775c8..768d5d03e1d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -14,9 +14,7 @@ import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; -import java.io.ByteArrayInputStream; import java.io.File; -import java.io.FileInputStream; import java.nio.charset.StandardCharsets; import java.util.Map; import java.util.concurrent.CompletableFuture; @@ -38,18 +36,15 @@ import software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider; import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; import software.amazon.awssdk.core.async.AsyncRequestBody; -import software.amazon.awssdk.core.sync.RequestBody; import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.s3.S3AsyncClient; import software.amazon.awssdk.services.s3.S3AsyncClientBuilder; -import software.amazon.awssdk.services.s3.S3Client; -import software.amazon.awssdk.services.s3.model.GetObjectAttributesRequest; -import software.amazon.awssdk.services.s3.model.GetObjectAttributesResponse; -import software.amazon.awssdk.services.s3.model.ObjectAttributes; +import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectResponse; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.NoSuchKeyException; import software.amazon.awssdk.services.s3.model.PutObjectRequest; import software.amazon.awssdk.services.s3.model.PutObjectResponse; -import software.amazon.awssdk.services.s3.S3ClientBuilder; -import software.amazon.awssdk.services.s3.S3Configuration; import software.amazon.awssdk.http.async.SdkAsyncHttpClient; import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient; import software.amazon.awssdk.utils.StringUtils; @@ -76,6 +71,14 @@ public class S3SubmitToArchiveCommand extends AbstractSubmitToArchiveCommand { public S3SubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); } + + public static boolean supportsDelete() { + return true; + } + @Override + public boolean canDelete() { + return supportsDelete(); + } @Override public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, @@ -105,10 +108,78 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t if (dataset.getLockFor(Reason.finalizePublication) == null) { spaceName = getSpaceName(dataset); - String dataciteXml = getDataCiteXml(dv); - // Add datacite.xml file + + // Define keys for datacite.xml and bag file String dcKey = spaceName + "/" + getDataCiteFileName(spaceName, dv) + ".xml"; + String bagKey = spaceName + "/" + getFileName(spaceName, dv) + ".zip"; + + // Check for and delete existing files for this version + logger.fine("Checking for existing files in archive..."); + + try { + HeadObjectRequest headDcRequest = HeadObjectRequest.builder() + .bucket(bucketName) + .key(dcKey) + .build(); + + s3.headObject(headDcRequest).join(); + + // If we get here, the object exists, so delete it + logger.fine("Found existing datacite.xml, deleting: " + dcKey); + DeleteObjectRequest deleteDcRequest = DeleteObjectRequest.builder() + .bucket(bucketName) + .key(dcKey) + .build(); + + CompletableFuture deleteDcFuture = s3.deleteObject(deleteDcRequest); + DeleteObjectResponse deleteDcResponse = deleteDcFuture.join(); + + if (deleteDcResponse.sdkHttpResponse().isSuccessful()) { + logger.fine("Deleted existing datacite.xml"); + } else { + logger.warning("Failed to delete existing datacite.xml: " + dcKey); + } + } catch (Exception e) { + if (e.getCause() instanceof NoSuchKeyException) { + logger.fine("No existing datacite.xml found"); + } else { + logger.warning("Error checking/deleting existing datacite.xml: " + e.getMessage()); + } + } + try { + HeadObjectRequest headBagRequest = HeadObjectRequest.builder() + .bucket(bucketName) + .key(bagKey) + .build(); + + s3.headObject(headBagRequest).join(); + + // If we get here, the object exists, so delete it + logger.fine("Found existing bag file, deleting: " + bagKey); + DeleteObjectRequest deleteBagRequest = DeleteObjectRequest.builder() + .bucket(bucketName) + .key(bagKey) + .build(); + + CompletableFuture deleteBagFuture = s3.deleteObject(deleteBagRequest); + DeleteObjectResponse deleteBagResponse = deleteBagFuture.join(); + + if (deleteBagResponse.sdkHttpResponse().isSuccessful()) { + logger.fine("Deleted existing bag file"); + } else { + logger.warning("Failed to delete existing bag file: " + bagKey); + } + } catch (Exception e) { + if (e.getCause() instanceof NoSuchKeyException) { + logger.fine("No existing bag file found"); + } else { + logger.warning("Error checking/deleting existing bag file: " + e.getMessage()); + } + } + + String dataciteXml = getDataCiteXml(dv); + // Add datacite.xml file PutObjectRequest putRequest = PutObjectRequest.builder() .bucket(bucketName) .key(dcKey) @@ -128,7 +199,6 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t // Store BagIt file String fileName = getFileName(spaceName, dv); - String bagKey = spaceName + "/" + fileName + ".zip"; // Add BagIt ZIP file // Google uses MD5 as one way to verify the // transfer From ba04ba2455529ed7f8f5bba5cf5818fc255f364e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 27 Jan 2026 16:50:42 -0500 Subject: [PATCH 14/31] fix doc ref --- doc/sphinx-guides/source/installation/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 68982881d77..d0b4eac6ab2 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2264,7 +2264,7 @@ At present, archiving classes include the DuraCloudSubmitToArchiveCommand, Local All current options support the :ref:`Archival Status API` calls and the same status is available in the dataset page version table (for contributors/those who could view the unpublished dataset, with more detail available to superusers). Archival Bags are created per dataset version. By default, if a version is republished (via the superuser-only 'Update Current Version' publication option in the UI/API), a new archival bag is not created for the version. -If the archiver used is capable of deleting existing bags (Google, S3, and File Archivers) superusers can trigger a manual update of the archival bag, and, if the :ref:`:feature.archive-on-version-update` flag is set to true, this will be done automatically when 'Update Current Version' is used. +If the archiver used is capable of deleting existing bags (Google, S3, and File Archivers) superusers can trigger a manual update of the archival bag, and, if the :ref:`:dataverse.feature.archive-on-version-update` flag is set to true, this will be done automatically when 'Update Current Version' is used. .. _Duracloud Configuration: From 7a186693a02683b752f898b18eb425d3ea84134d Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 27 Jan 2026 17:11:32 -0500 Subject: [PATCH 15/31] remove errant : char --- doc/sphinx-guides/source/installation/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index d0b4eac6ab2..d6cea5b16e3 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2264,7 +2264,7 @@ At present, archiving classes include the DuraCloudSubmitToArchiveCommand, Local All current options support the :ref:`Archival Status API` calls and the same status is available in the dataset page version table (for contributors/those who could view the unpublished dataset, with more detail available to superusers). Archival Bags are created per dataset version. By default, if a version is republished (via the superuser-only 'Update Current Version' publication option in the UI/API), a new archival bag is not created for the version. -If the archiver used is capable of deleting existing bags (Google, S3, and File Archivers) superusers can trigger a manual update of the archival bag, and, if the :ref:`:dataverse.feature.archive-on-version-update` flag is set to true, this will be done automatically when 'Update Current Version' is used. +If the archiver used is capable of deleting existing bags (Google, S3, and File Archivers) superusers can trigger a manual update of the archival bag, and, if the :ref:`dataverse.feature.archive-on-version-update` flag is set to true, this will be done automatically when 'Update Current Version' is used. .. _Duracloud Configuration: From ae91b78dbbec09899c9040730567e978698d406c Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 23 Jan 2026 15:05:21 -0500 Subject: [PATCH 16/31] no transaction time limit during bagging from command (not workflow) --- .../impl/AbstractSubmitToArchiveCommand.java | 63 +++- .../impl/DRSSubmitToArchiveCommand.java | 78 ++++- .../impl/DuraCloudSubmitToArchiveCommand.java | 295 +++++++++--------- .../GoogleCloudSubmitToArchiveCommand.java | 215 +++++++------ .../impl/LocalSubmitToArchiveCommand.java | 129 ++++---- .../impl/S3SubmitToArchiveCommand.java | 219 ++++++------- .../iq/dataverse/util/bagit/BagGenerator.java | 32 +- .../workflow/WorkflowServiceBean.java | 2 +- .../ArchivalSubmissionWorkflowStep.java | 40 ++- 9 files changed, 600 insertions(+), 473 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index aaeef193ff4..ffa79456902 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -2,8 +2,9 @@ import edu.harvard.iq.dataverse.DataCitation; import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetFieldConstant; +import edu.harvard.iq.dataverse.DatasetLock.Reason; import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.SettingsWrapper; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; @@ -17,7 +18,11 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.bagit.BagGenerator; import edu.harvard.iq.dataverse.util.bagit.OREMap; +import edu.harvard.iq.dataverse.util.json.JsonLDTerm; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; +import jakarta.ejb.TransactionAttribute; +import jakarta.ejb.TransactionAttributeType; +import jakarta.json.JsonObject; import java.io.IOException; import java.io.PipedInputStream; @@ -30,8 +35,8 @@ @RequiredPermissions(Permission.PublishDataset) public abstract class AbstractSubmitToArchiveCommand extends AbstractCommand { - private final DatasetVersion version; - private final Map requestedSettings = new HashMap(); + protected final DatasetVersion version; + protected final Map requestedSettings = new HashMap(); protected boolean success=false; private static final Logger logger = Logger.getLogger(AbstractSubmitToArchiveCommand.class.getName()); private static final int MAX_ZIP_WAIT = 20000; @@ -43,8 +48,16 @@ public AbstractSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion } @Override + @TransactionAttribute(TransactionAttributeType.REQUIRED) public DatasetVersion execute(CommandContext ctxt) throws CommandException { + // Check for locks while we're still in a transaction + Dataset dataset = version.getDataset(); + if (dataset.getLockFor(Reason.finalizePublication) != null + || dataset.getLockFor(Reason.FileValidationFailed) != null) { + throw new CommandException("Dataset is locked and cannot be archived", this); + } + String settings = ctxt.settings().getValueForKey(SettingsServiceBean.Key.ArchiverSettings); String[] settingsArray = settings.split(","); for (String setting : settingsArray) { @@ -62,11 +75,40 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException { //No un-expired token token = ctxt.authentication().generateApiTokenForUser(user); } - performArchiveSubmission(version, token, requestedSettings); - ctxt.datasetVersion().persistArchivalCopyLocation(version); + String dataCiteXml = getDataCiteXml(version); + OREMap oreMap = new OREMap(version, false); + JsonObject ore = oreMap.getOREMap(); + Map terms = getJsonLDTerms(oreMap); + performArchivingAndPersist(ctxt, version, dataCiteXml, ore, terms, token, requestedSettings); return version; } + // While we have a transaction context, get the terms needed to create the baginfo file + public Map getJsonLDTerms(OREMap oreMap) { + Map terms = new HashMap(); + terms.put(DatasetFieldConstant.datasetContact, oreMap.getContactTerm()); + terms.put(DatasetFieldConstant.datasetContactName, oreMap.getContactNameTerm()); + terms.put(DatasetFieldConstant.datasetContactEmail, oreMap.getContactEmailTerm()); + terms.put(DatasetFieldConstant.description, oreMap.getDescriptionTerm()); + terms.put(DatasetFieldConstant.descriptionText, oreMap.getDescriptionTextTerm()); + + return terms; + } + + @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED) + public WorkflowStepResult performArchivingAndPersist(CommandContext ctxt, DatasetVersion version, String dataCiteXml, JsonObject ore, Map terms, ApiToken token, Map requestedSetttings) { + // This runs OUTSIDE any transaction + BagGenerator.setNumConnections(getNumberOfBagGeneratorThreads()); + WorkflowStepResult wfsr = performArchiveSubmission(version, dataCiteXml, ore, terms, token, requestedSettings); + persistResult(ctxt, version); + return wfsr; + } + + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + private void persistResult(CommandContext ctxt, DatasetVersion versionWithStatus) { + // New transaction just for this quick operation + ctxt.datasetVersion().persistArchivalCopyLocation(versionWithStatus); + } /** * This method is the only one that should be overwritten by other classes. Note * that this method may be called from the execute method above OR from a @@ -75,10 +117,13 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException { * constructor and could be dropped from the parameter list.) * * @param version - the DatasetVersion to archive + * @param ore + * @param dataCiteXml + * @param terms * @param token - an API Token for the user performing this action * @param requestedSettings - a map of the names/values for settings required by this archiver (sent because this class is not part of the EJB context (by design) and has no direct access to service beans). */ - abstract public WorkflowStepResult performArchiveSubmission(DatasetVersion version, ApiToken token, Map requestedSetttings); + abstract public WorkflowStepResult performArchiveSubmission(DatasetVersion version, String dataCiteXml, JsonObject ore, Map terms, ApiToken token, Map requestedSetttings); protected int getNumberOfBagGeneratorThreads() { if (requestedSettings.get(BagGenerator.BAG_GENERATOR_THREADS) != null) { @@ -98,7 +143,7 @@ public String describe() { + version.getFriendlyVersionNumber()+")]"; } - String getDataCiteXml(DatasetVersion dv) { + public String getDataCiteXml(DatasetVersion dv) { DataCitation dc = new DataCitation(dv); Map metadata = dc.getDataCiteMetadata(); return DOIDataCiteRegisterService.getMetadataFromDvObject(dv.getDataset().getGlobalId().asString(), metadata, @@ -106,13 +151,13 @@ String getDataCiteXml(DatasetVersion dv) { } public Thread startBagThread(DatasetVersion dv, PipedInputStream in, DigestInputStream digestInputStream2, - String dataciteXml, ApiToken token) throws IOException, InterruptedException { + String dataciteXml, JsonObject ore, Map terms, ApiToken token) throws IOException, InterruptedException { Thread bagThread = new Thread(new Runnable() { public void run() { try (PipedOutputStream out = new PipedOutputStream(in)) { // Generate bag - BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); bagger.setNumConnections(getNumberOfBagGeneratorThreads()); + BagGenerator bagger = new BagGenerator(ore, dataciteXml, terms); bagger.setAuthenticationKey(token.getTokenString()); bagger.generateBag(out); success = true; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 78e8454255b..81bcbc25dda 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -4,13 +4,19 @@ import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.SettingsWrapper; +import edu.harvard.iq.dataverse.DatasetLock.Reason; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.engine.command.Command; +import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.bagit.OREMap; +import edu.harvard.iq.dataverse.util.json.JsonLDTerm; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; @@ -34,6 +40,8 @@ import java.util.Set; import java.util.logging.Logger; +import jakarta.ejb.TransactionAttribute; +import jakarta.ejb.TransactionAttributeType; import jakarta.json.Json; import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; @@ -77,13 +85,73 @@ public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implemen private static final String TRUST_CERT = "trust_cert"; private static final String TIMEOUT = "timeout"; + private String archivableAncestorAlias; + public DRSSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); } @Override - public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, - Map requestedSettings) { + @TransactionAttribute(TransactionAttributeType.REQUIRED) + public DatasetVersion execute(CommandContext ctxt) throws CommandException { + + + // Check for locks while we're still in a transaction + Dataset dataset = version.getDataset(); + if (dataset.getLockFor(Reason.finalizePublication) != null + || dataset.getLockFor(Reason.FileValidationFailed) != null) { + throw new CommandException("Dataset is locked and cannot be archived", this); + } + + String settings = ctxt.settings().getValueForKey(SettingsServiceBean.Key.ArchiverSettings); + String[] settingsArray = settings.split(","); + for (String setting : settingsArray) { + setting = setting.trim(); + if (!setting.startsWith(":")) { + logger.warning("Invalid Archiver Setting: " + setting); + } else { + requestedSettings.put(setting, ctxt.settings().get(setting)); + } + } + + // Compute archivable ancestor while we're in a transaction and entities are managed + JsonObject drsConfigObject = null; + try { + drsConfigObject = JsonUtil.getJsonObject(requestedSettings.get(DRS_CONFIG)); + } catch (Exception e) { + logger.warning("Unable to parse " + DRS_CONFIG + " setting as a Json object"); + } + + if (drsConfigObject != null) { + JsonObject adminMetadata = drsConfigObject.getJsonObject(ADMIN_METADATA); + if (adminMetadata != null) { + JsonObject collectionsObj = adminMetadata.getJsonObject(COLLECTIONS); + if (collectionsObj != null) { + Set collections = collectionsObj.keySet(); + Dataverse ancestor = dataset.getOwner(); + // Compute this while entities are still managed + archivableAncestorAlias = getArchivableAncestor(ancestor, collections); + } + } + } + + AuthenticatedUser user = getRequest().getAuthenticatedUser(); + ApiToken token = ctxt.authentication().findApiTokenByUser(user); + if (token == null) { + //No un-expired token + token = ctxt.authentication().generateApiTokenForUser(user); + } + String dataCiteXml = getDataCiteXml(version); + OREMap oreMap = new OREMap(version, false); + JsonObject ore = oreMap.getOREMap(); + Map terms = getJsonLDTerms(oreMap); + performArchivingAndPersist(ctxt, version, dataCiteXml, ore, terms, token, requestedSettings); + return version; + } + + @Override + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, String dataciteXml, JsonObject ore, + Map terms, ApiToken token, Map requestedSettings) { logger.fine("In DRSSubmitToArchiveCommand..."); JsonObject drsConfigObject = null; @@ -97,7 +165,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t Set collections = adminMetadata.getJsonObject(COLLECTIONS).keySet(); Dataset dataset = dv.getDataset(); Dataverse ancestor = dataset.getOwner(); - String alias = getArchivableAncestor(ancestor, collections); + String alias = archivableAncestorAlias; // Use the pre-computed alias instead of calling getArchivableAncestor again String spaceName = getSpaceName(dataset); String packageId = getFileName(spaceName, dv); @@ -113,7 +181,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t JsonObject collectionConfig = adminMetadata.getJsonObject(COLLECTIONS).getJsonObject(alias); - WorkflowStepResult s3Result = super.performArchiveSubmission(dv, token, requestedSettings); + WorkflowStepResult s3Result = super.performArchiveSubmission(dv, dataciteXml, ore, terms, token, requestedSettings); JsonObjectBuilder statusObject = Json.createObjectBuilder(); statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); @@ -242,7 +310,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t logger.severe("DRS Ingest Failed for: " + packageId + " - response does not include status and message"); return new Failure( - "DRS Archiver fail in Ingest call \" - response does not include status and message"); + "DRS Archiver fail in Ingest call - response does not include status and message"); } } else { logger.severe("DRS Ingest Failed for: " + packageId + " with status code: " + code); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java index fe4a25091d7..b1fa777478b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java @@ -2,7 +2,6 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.DatasetLock.Reason; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; @@ -10,6 +9,8 @@ import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.DuraCloudContext; import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.DuraCloudHost; import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.DuraCloudPort; + +import edu.harvard.iq.dataverse.util.json.JsonLDTerm; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; @@ -49,8 +50,8 @@ public DuraCloudSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion } @Override - public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, - Map requestedSettings) { + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, String dataciteXml, jakarta.json.JsonObject ore, + Map terms, ApiToken token, Map requestedSettings) { String port = requestedSettings.get(DURACLOUD_PORT) != null ? requestedSettings.get(DURACLOUD_PORT) : DEFAULT_PORT; @@ -64,173 +65,165 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t // This will make the archivalCopyLocation non-null after a failure which should // stop retries - if (dataset.getLockFor(Reason.finalizePublication) == null - && dataset.getLockFor(Reason.FileValidationFailed) == null) { - // Use Duracloud client classes to login - ContentStoreManager storeManager = new ContentStoreManagerImpl(host, port, dpnContext); - Credential credential = new Credential(System.getProperty("duracloud.username"), - System.getProperty("duracloud.password")); - storeManager.login(credential); + // Use Duracloud client classes to login + ContentStoreManager storeManager = new ContentStoreManagerImpl(host, port, dpnContext); + Credential credential = new Credential(System.getProperty("duracloud.username"), + System.getProperty("duracloud.password")); + storeManager.login(credential); + /* + * Aliases can contain upper case characters which are not allowed in space + * names. Similarly, aliases can contain '_' which isn't allowed in a space + * name. The line below replaces any upper case chars with lowercase and + * replaces any '_' with '.-' . The '-' after the dot assures we don't break the + * rule that + * "The last period in a aspace may not immediately be followed by a number". + * (Although we could check, it seems better to just add '.-' all the time.As + * written the replaceAll will also change any chars not valid in a spaceName to + * '.' which would avoid code breaking if the alias constraints change. That + * said, this line may map more than one alias to the same spaceName, e.g. + * "test" and "Test" aliases both map to the "test" space name. This does not + * break anything but does potentially put bags from more than one collection in + * the same space. + */ + String spaceName = dataset.getOwner().getAlias().toLowerCase().replaceAll("[^a-z0-9-]", ".dcsafe"); + String baseFileName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') + .replace('.', '-').toLowerCase() + "_v" + dv.getFriendlyVersionNumber(); + + ContentStore store; + //Set a failure status that will be updated if we succeed + JsonObjectBuilder statusObject = Json.createObjectBuilder(); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "Bag not transferred"); + + try { /* - * Aliases can contain upper case characters which are not allowed in space - * names. Similarly, aliases can contain '_' which isn't allowed in a space - * name. The line below replaces any upper case chars with lowercase and - * replaces any '_' with '.-' . The '-' after the dot assures we don't break the - * rule that - * "The last period in a aspace may not immediately be followed by a number". - * (Although we could check, it seems better to just add '.-' all the time.As - * written the replaceAll will also change any chars not valid in a spaceName to - * '.' which would avoid code breaking if the alias constraints change. That - * said, this line may map more than one alias to the same spaceName, e.g. - * "test" and "Test" aliases both map to the "test" space name. This does not - * break anything but does potentially put bags from more than one collection in - * the same space. + * If there is a failure in creating a space, it is likely that a prior version + * has not been fully processed (snapshot created, archiving completed and files + * and space deleted - currently manual operations done at the project's + * duracloud website) */ - String spaceName = dataset.getOwner().getAlias().toLowerCase().replaceAll("[^a-z0-9-]", ".dcsafe"); - String baseFileName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') - .replace('.', '-').toLowerCase() + "_v" + dv.getFriendlyVersionNumber(); - - ContentStore store; - //Set a failure status that will be updated if we succeed - JsonObjectBuilder statusObject = Json.createObjectBuilder(); - statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); - statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "Bag not transferred"); - - try { - /* - * If there is a failure in creating a space, it is likely that a prior version - * has not been fully processed (snapshot created, archiving completed and files - * and space deleted - currently manual operations done at the project's - * duracloud website) - */ - store = storeManager.getPrimaryContentStore(); - // Create space to copy archival files to - if (!store.spaceExists(spaceName)) { - store.createSpace(spaceName); - } - String dataciteXml = getDataCiteXml(dv); - - MessageDigest messageDigest = MessageDigest.getInstance("MD5"); - try (PipedInputStream dataciteIn = new PipedInputStream(); - DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { - // Add datacite.xml file - - Thread dcThread = new Thread(new Runnable() { - public void run() { - try (PipedOutputStream dataciteOut = new PipedOutputStream(dataciteIn)) { - - dataciteOut.write(dataciteXml.getBytes(StandardCharsets.UTF_8)); - dataciteOut.close(); - success=true; - } catch (Exception e) { - logger.severe("Error creating datacite.xml: " + e.getMessage()); - // TODO Auto-generated catch block - e.printStackTrace(); - } + store = storeManager.getPrimaryContentStore(); + // Create space to copy archival files to + if (!store.spaceExists(spaceName)) { + store.createSpace(spaceName); + } + + MessageDigest messageDigest = MessageDigest.getInstance("MD5"); + try (PipedInputStream dataciteIn = new PipedInputStream(); + DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { + // Add datacite.xml file + + Thread dcThread = new Thread(new Runnable() { + public void run() { + try (PipedOutputStream dataciteOut = new PipedOutputStream(dataciteIn)) { + + dataciteOut.write(dataciteXml.getBytes(StandardCharsets.UTF_8)); + dataciteOut.close(); + success=true; + } catch (Exception e) { + logger.severe("Error creating datacite.xml: " + e.getMessage()); + // TODO Auto-generated catch block + e.printStackTrace(); } - }); - dcThread.start(); - // Have seen Pipe Closed errors for other archivers when used as a workflow - // without this delay loop - int i = 0; - while (digestInputStream.available() <= 0 && i < 100) { - Thread.sleep(10); - i++; } - String checksum = store.addContent(spaceName, baseFileName + "_datacite.xml", digestInputStream, - -1l, null, null, null); - logger.fine("Content: datacite.xml added with checksum: " + checksum); - dcThread.join(); - String localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); + }); + dcThread.start(); + // Have seen Pipe Closed errors for other archivers when used as a workflow + // without this delay loop + int i = 0; + while (digestInputStream.available() <= 0 && i < 100) { + Thread.sleep(10); + i++; + } + String checksum = store.addContent(spaceName, baseFileName + "_datacite.xml", digestInputStream, + -1l, null, null, null); + logger.fine("Content: datacite.xml added with checksum: " + checksum); + dcThread.join(); + String localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); + if (!success || !checksum.equals(localchecksum)) { + logger.severe("Failure on " + baseFileName); + logger.severe(success ? checksum + " not equal to " + localchecksum : "failed to transfer to DuraCloud"); + try { + store.deleteContent(spaceName, baseFileName + "_datacite.xml"); + } catch (ContentStoreException cse) { + logger.warning(cse.getMessage()); + } + return new Failure("Error in transferring DataCite.xml file to DuraCloud", + "DuraCloud Submission Failure: incomplete metadata transfer"); + } + + // Store BagIt file + success = false; + String fileName = baseFileName + ".zip"; + + // Add BagIt ZIP file + // Although DuraCloud uses SHA-256 internally, it's API uses MD5 to verify the + // transfer + + messageDigest = MessageDigest.getInstance("MD5"); + try (PipedInputStream in = new PipedInputStream(100000); + DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest)) { + Thread bagThread = startBagThread(dv, in, digestInputStream2, dataciteXml, ore, terms, token); + checksum = store.addContent(spaceName, fileName, digestInputStream2, -1l, null, null, null); + bagThread.join(); + if (success) { + logger.fine("Content: " + fileName + " added with checksum: " + checksum); + localchecksum = Hex.encodeHexString(digestInputStream2.getMessageDigest().digest()); + } if (!success || !checksum.equals(localchecksum)) { - logger.severe("Failure on " + baseFileName); + logger.severe("Failure on " + fileName); logger.severe(success ? checksum + " not equal to " + localchecksum : "failed to transfer to DuraCloud"); try { + store.deleteContent(spaceName, fileName); store.deleteContent(spaceName, baseFileName + "_datacite.xml"); } catch (ContentStoreException cse) { logger.warning(cse.getMessage()); } - return new Failure("Error in transferring DataCite.xml file to DuraCloud", - "DuraCloud Submission Failure: incomplete metadata transfer"); - } - - // Store BagIt file - success = false; - String fileName = baseFileName + ".zip"; - - // Add BagIt ZIP file - // Although DuraCloud uses SHA-256 internally, it's API uses MD5 to verify the - // transfer - - messageDigest = MessageDigest.getInstance("MD5"); - try (PipedInputStream in = new PipedInputStream(100000); - DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest)) { - Thread bagThread = startBagThread(dv, in, digestInputStream2, dataciteXml, token); - checksum = store.addContent(spaceName, fileName, digestInputStream2, -1l, null, null, null); - bagThread.join(); - if (success) { - logger.fine("Content: " + fileName + " added with checksum: " + checksum); - localchecksum = Hex.encodeHexString(digestInputStream2.getMessageDigest().digest()); - } - if (!success || !checksum.equals(localchecksum)) { - logger.severe("Failure on " + fileName); - logger.severe(success ? checksum + " not equal to " + localchecksum : "failed to transfer to DuraCloud"); - try { - store.deleteContent(spaceName, fileName); - store.deleteContent(spaceName, baseFileName + "_datacite.xml"); - } catch (ContentStoreException cse) { - logger.warning(cse.getMessage()); - } - return new Failure("Error in transferring Zip file to DuraCloud", - "DuraCloud Submission Failure: incomplete archive transfer"); - } + return new Failure("Error in transferring Zip file to DuraCloud", + "DuraCloud Submission Failure: incomplete archive transfer"); } + } - logger.fine("DuraCloud Submission step: Content Transferred"); + logger.fine("DuraCloud Submission step: Content Transferred"); - // Document the location of dataset archival copy location (actually the URL - // where you can - // view it as an admin) - StringBuffer sb = new StringBuffer("https://"); - sb.append(host); - if (!port.equals("443")) { - sb.append(":" + port); - } - sb.append("/duradmin/spaces/sm/"); - sb.append(store.getStoreId()); - sb.append("/" + spaceName + "/" + fileName); - statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_SUCCESS); - statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, sb.toString()); - - logger.fine("DuraCloud Submission step complete: " + sb.toString()); - } catch (ContentStoreException | IOException e) { - // TODO Auto-generated catch block - logger.warning(e.getMessage()); - e.printStackTrace(); - return new Failure("Error in transferring file to DuraCloud", - "DuraCloud Submission Failure: archive file not transferred"); - } catch (InterruptedException e) { - logger.warning(e.getLocalizedMessage()); - e.printStackTrace(); + // Document the location of dataset archival copy location (actually the URL + // where you can + // view it as an admin) + StringBuffer sb = new StringBuffer("https://"); + sb.append(host); + if (!port.equals("443")) { + sb.append(":" + port); } - } catch (ContentStoreException e) { + sb.append("/duradmin/spaces/sm/"); + sb.append(store.getStoreId()); + sb.append("/" + spaceName + "/" + fileName); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_SUCCESS); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, sb.toString()); + + logger.fine("DuraCloud Submission step complete: " + sb.toString()); + } catch (ContentStoreException | IOException e) { + // TODO Auto-generated catch block logger.warning(e.getMessage()); e.printStackTrace(); - String mesg = "DuraCloud Submission Failure"; - if (!(1 == dv.getVersion()) || !(0 == dv.getMinorVersionNumber())) { - mesg = mesg + ": Prior Version archiving not yet complete?"; - } - return new Failure("Unable to create DuraCloud space with name: " + baseFileName, mesg); - } catch (NoSuchAlgorithmException e) { - logger.severe("MD5 MessageDigest not available!"); + return new Failure("Error in transferring file to DuraCloud", + "DuraCloud Submission Failure: archive file not transferred"); + } catch (InterruptedException e) { + logger.warning(e.getLocalizedMessage()); + e.printStackTrace(); } - finally { - dv.setArchivalCopyLocation(statusObject.build().toString()); + } catch (ContentStoreException e) { + logger.warning(e.getMessage()); + e.printStackTrace(); + String mesg = "DuraCloud Submission Failure"; + if (!(1 == dv.getVersion()) || !(0 == dv.getMinorVersionNumber())) { + mesg = mesg + ": Prior Version archiving not yet complete?"; } - } else { - logger.warning( - "DuraCloud Submision Workflow aborted: Dataset locked for finalizePublication, or because file validation failed"); - return new Failure("Dataset locked"); + return new Failure("Unable to create DuraCloud space with name: " + baseFileName, mesg); + } catch (NoSuchAlgorithmException e) { + logger.severe("MD5 MessageDigest not available!"); + } + finally { + dv.setArchivalCopyLocation(statusObject.build().toString()); } return WorkflowStepResult.OK; } else { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java index 61a38cffc99..f662de36792 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java @@ -7,7 +7,6 @@ import com.google.cloud.storage.StorageException; import com.google.cloud.storage.StorageOptions; import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetLock.Reason; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; @@ -16,11 +15,15 @@ import edu.harvard.iq.dataverse.settings.JvmSettings; import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.GoogleCloudBucket; import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.GoogleCloudProject; +import edu.harvard.iq.dataverse.util.bagit.BagGenerator; +import edu.harvard.iq.dataverse.util.json.JsonLDTerm; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; + import org.apache.commons.codec.binary.Hex; import jakarta.json.Json; +import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; import java.io.File; import java.io.FileInputStream; @@ -53,7 +56,7 @@ public boolean canDelete() { } @Override - public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map requestedSettings) { + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, String dataciteXml, JsonObject ore, Map terms, ApiToken token, Map requestedSettings) { logger.fine("In GoogleCloudSubmitToArchiveCommand..."); String bucketName = requestedSettings.get(GOOGLECLOUD_BUCKET); String projectName = requestedSettings.get(GOOGLECLOUD_PROJECT); @@ -76,135 +79,127 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t Bucket bucket = storage.get(bucketName); Dataset dataset = dv.getDataset(); - if (dataset.getLockFor(Reason.finalizePublication) == null) { - String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') - .replace('.', '-').toLowerCase(); + String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') + .replace('.', '-').toLowerCase(); - // Check for and delete existing files for this version - String dataciteFileName = spaceName + "/datacite.v" + dv.getFriendlyVersionNumber() + ".xml"; - String bagFileName = spaceName + "/" + spaceName + ".v" + dv.getFriendlyVersionNumber() + ".zip"; + // Check for and delete existing files for this version + String dataciteFileName = spaceName + "/datacite.v" + dv.getFriendlyVersionNumber() + ".xml"; + String bagFileName = spaceName + "/" + spaceName + ".v" + dv.getFriendlyVersionNumber() + ".zip"; - logger.fine("Checking for existing files in archive..."); + logger.fine("Checking for existing files in archive..."); - try { - Blob existingDatacite = bucket.get(dataciteFileName); - if (existingDatacite != null && existingDatacite.exists()) { - logger.fine("Found existing datacite.xml, deleting: " + dataciteFileName); - existingDatacite.delete(); - logger.fine("Deleted existing datacite.xml"); - } - } catch (StorageException se) { - logger.warning("Error checking/deleting existing datacite.xml: " + se.getMessage()); + try { + Blob existingDatacite = bucket.get(dataciteFileName); + if (existingDatacite != null && existingDatacite.exists()) { + logger.fine("Found existing datacite.xml, deleting: " + dataciteFileName); + existingDatacite.delete(); + logger.fine("Deleted existing datacite.xml"); } + } catch (StorageException se) { + logger.warning("Error checking/deleting existing datacite.xml: " + se.getMessage()); + } - try { - Blob existingBag = bucket.get(bagFileName); - if (existingBag != null && existingBag.exists()) { - logger.fine("Found existing bag file, deleting: " + bagFileName); - existingBag.delete(); - logger.fine("Deleted existing bag file"); - } - } catch (StorageException se) { - logger.warning("Error checking/deleting existing bag file: " + se.getMessage()); + try { + Blob existingBag = bucket.get(bagFileName); + if (existingBag != null && existingBag.exists()) { + logger.fine("Found existing bag file, deleting: " + bagFileName); + existingBag.delete(); + logger.fine("Deleted existing bag file"); } + } catch (StorageException se) { + logger.warning("Error checking/deleting existing bag file: " + se.getMessage()); + } - String dataciteXml = getDataCiteXml(dv); - MessageDigest messageDigest = MessageDigest.getInstance("MD5"); - try (PipedInputStream dataciteIn = new PipedInputStream(); - DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { - // Add datacite.xml file - - Thread dcThread = new Thread(new Runnable() { - public void run() { - try (PipedOutputStream dataciteOut = new PipedOutputStream(dataciteIn)) { - - dataciteOut.write(dataciteXml.getBytes(StandardCharsets.UTF_8)); - dataciteOut.close(); - success = true; - } catch (Exception e) { - logger.severe("Error creating datacite.xml: " + e.getMessage()); - // TODO Auto-generated catch block - e.printStackTrace(); - // throw new RuntimeException("Error creating datacite.xml: " + e.getMessage()); - } + // Upload datacite.xml + MessageDigest messageDigest = MessageDigest.getInstance("MD5"); + try (PipedInputStream dataciteIn = new PipedInputStream(); + DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { + // Add datacite.xml file + + Thread dcThread = new Thread(new Runnable() { + public void run() { + try (PipedOutputStream dataciteOut = new PipedOutputStream(dataciteIn)) { + + dataciteOut.write(dataciteXml.getBytes(StandardCharsets.UTF_8)); + dataciteOut.close(); + success = true; + } catch (Exception e) { + logger.severe("Error creating datacite.xml: " + e.getMessage()); + e.printStackTrace(); } - }); - dcThread.start(); - // Have seen Pipe Closed errors for other archivers when used as a workflow - // without this delay loop - int i = 0; - while (digestInputStream.available() <= 0 && i < 100) { - Thread.sleep(10); - i++; } - Blob dcXml = bucket.create(dataciteFileName, digestInputStream, "text/xml", Bucket.BlobWriteOption.doesNotExist()); + }); + dcThread.start(); + // Have seen Pipe Closed errors for other archivers when used as a workflow + // without this delay loop + int i = 0; + while (digestInputStream.available() <= 0 && i < 100) { + Thread.sleep(10); + i++; + } + Blob dcXml = bucket.create(dataciteFileName, digestInputStream, "text/xml", Bucket.BlobWriteOption.doesNotExist()); + + dcThread.join(); + String checksum = dcXml.getMd5ToHexString(); + logger.fine("Content: datacite.xml added with checksum: " + checksum); + String localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); + if (!success || !checksum.equals(localchecksum)) { + logger.severe("Failure on " + spaceName); + logger.severe(success ? checksum + " not equal to " + localchecksum : "datacite.xml transfer did not succeed"); + try { + dcXml.delete(Blob.BlobSourceOption.generationMatch()); + } catch (StorageException se) { + logger.warning(se.getMessage()); + } + return new Failure("Error in transferring DataCite.xml file to GoogleCloud", + "GoogleCloud Submission Failure: incomplete metadata transfer"); + } - dcThread.join(); - String checksum = dcXml.getMd5ToHexString(); - logger.fine("Content: datacite.xml added with checksum: " + checksum); - String localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); + // Store BagIt file + success = false; + + // Add BagIt ZIP file + // Google uses MD5 as one way to verify the + // transfer + messageDigest = MessageDigest.getInstance("MD5"); + try (PipedInputStream in = new PipedInputStream(100000); + DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest)) { + Thread bagThread = startBagThread(dv, in, digestInputStream2, dataciteXml, ore, terms, token); + Blob bag = bucket.create(bagFileName, digestInputStream2, "application/zip", + Bucket.BlobWriteOption.doesNotExist()); + if (bag.getSize() == 0) { + throw new IOException("Empty Bag"); + } + bagThread.join(); + + checksum = bag.getMd5ToHexString(); + logger.fine("Bag: " + bagFileName + " added with checksum: " + checksum); + localchecksum = Hex.encodeHexString(digestInputStream2.getMessageDigest().digest()); if (!success || !checksum.equals(localchecksum)) { - logger.severe("Failure on " + spaceName); - logger.severe(success ? checksum + " not equal to " + localchecksum : "datacite.xml transfer did not succeed"); + logger.severe(success ? checksum + " not equal to " + localchecksum + : "bag transfer did not succeed"); try { - dcXml.delete(Blob.BlobSourceOption.generationMatch()); + bag.delete(Blob.BlobSourceOption.generationMatch()); } catch (StorageException se) { logger.warning(se.getMessage()); } - return new Failure("Error in transferring DataCite.xml file to GoogleCloud", - "GoogleCloud Submission Failure: incomplete metadata transfer"); + return new Failure("Error in transferring Zip file to GoogleCloud", + "GoogleCloud Submission Failure: incomplete archive transfer"); } + } - // Store BagIt file - success = false; - String fileName = spaceName + ".v" + dv.getFriendlyVersionNumber() + ".zip"; - - // Add BagIt ZIP file - // Google uses MD5 as one way to verify the - // transfer - messageDigest = MessageDigest.getInstance("MD5"); - try (PipedInputStream in = new PipedInputStream(100000); - DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest)) { - Thread bagThread = startBagThread(dv, in, digestInputStream2, dataciteXml, token); - Blob bag = bucket.create(bagFileName, digestInputStream2, "application/zip", - Bucket.BlobWriteOption.doesNotExist()); - if (bag.getSize() == 0) { - throw new IOException("Empty Bag"); - } - bagThread.join(); - - checksum = bag.getMd5ToHexString(); - logger.fine("Bag: " + bagFileName + " added with checksum: " + checksum); - localchecksum = Hex.encodeHexString(digestInputStream2.getMessageDigest().digest()); - if (!success || !checksum.equals(localchecksum)) { - logger.severe(success ? checksum + " not equal to " + localchecksum - : "bag transfer did not succeed"); - try { - bag.delete(Blob.BlobSourceOption.generationMatch()); - } catch (StorageException se) { - logger.warning(se.getMessage()); - } - return new Failure("Error in transferring Zip file to GoogleCloud", - "GoogleCloud Submission Failure: incomplete archive transfer"); - } - } + logger.fine("GoogleCloud Submission step: Content Transferred"); - logger.fine("GoogleCloud Submission step: Content Transferred"); + // Document the location of dataset archival copy location (actually the URL + // where you can view it as an admin) + // Changed to point at bucket where the zip and datacite.xml are visible - // Document the location of dataset archival copy location (actually the URL - // where you can view it as an admin) - // Changed to point at bucket where the zip and datacite.xml are visible + StringBuffer sb = new StringBuffer("https://console.cloud.google.com/storage/browser/"); + sb.append(bucketName + "/" + spaceName); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_SUCCESS); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, sb.toString()); - StringBuffer sb = new StringBuffer("https://console.cloud.google.com/storage/browser/"); - sb.append(bucketName + "/" + spaceName); - statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_SUCCESS); - statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, sb.toString()); - - } - } else { - logger.warning("GoogleCloud Submision Workflow aborted: Dataset locked for pidRegister"); - return new Failure("Dataset locked"); } } catch (Exception e) { logger.warning(e.getLocalizedMessage()); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java index 34fadbed703..38951c8a218 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java @@ -2,7 +2,6 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.DatasetLock.Reason; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.engine.command.Command; @@ -10,7 +9,7 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.BagItLocalPath; import edu.harvard.iq.dataverse.util.bagit.BagGenerator; -import edu.harvard.iq.dataverse.util.bagit.OREMap; +import edu.harvard.iq.dataverse.util.json.JsonLDTerm; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; @@ -19,6 +18,7 @@ import java.util.logging.Logger; import jakarta.json.Json; +import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; import java.io.File; @@ -44,94 +44,91 @@ public boolean canDelete() { } @Override - public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, - Map requestedSettings) { - logger.fine("In LocalCloudSubmitToArchive..."); + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, String dataciteXml, JsonObject ore, + Map terms, ApiToken token, Map requestedSettings) { + logger.fine("In LocalSubmitToArchive..."); String localPath = requestedSettings.get(BagItLocalPath.toString()); String zipName = null; - //Set a failure status that will be updated if we succeed + // Set a failure status that will be updated if we succeed JsonObjectBuilder statusObject = Json.createObjectBuilder(); statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "Bag not transferred"); try { - Dataset dataset = dv.getDataset(); - if (dataset.getLockFor(Reason.finalizePublication) == null - && dataset.getLockFor(Reason.FileValidationFailed) == null) { - - String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') - .replace('.', '-').toLowerCase(); + String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') + .replace('.', '-').toLowerCase(); - // Define file paths - String dataciteFileName = localPath + "/" + spaceName + "-datacite.v" + dv.getFriendlyVersionNumber() + ".xml"; - zipName = localPath + "/" + spaceName + "v" + dv.getFriendlyVersionNumber() + ".zip"; + // Define file paths + String dataciteFileName = localPath + "/" + spaceName + "-datacite.v" + dv.getFriendlyVersionNumber() + ".xml"; + zipName = localPath + "/" + spaceName + "v" + dv.getFriendlyVersionNumber() + ".zip"; - // Check for and delete existing files for this version - logger.fine("Checking for existing files in archive..."); - - File existingDatacite = new File(dataciteFileName); - if (existingDatacite.exists()) { - logger.fine("Found existing datacite.xml, deleting: " + dataciteFileName); - if (existingDatacite.delete()) { - logger.fine("Deleted existing datacite.xml"); - } else { - logger.warning("Failed to delete existing datacite.xml: " + dataciteFileName); - } - } + // Check for and delete existing files for this version + logger.fine("Checking for existing files in archive..."); - File existingBag = new File(zipName); - if (existingBag.exists()) { - logger.fine("Found existing bag file, deleting: " + zipName); - if (existingBag.delete()) { - logger.fine("Deleted existing bag file"); - } else { - logger.warning("Failed to delete existing bag file: " + zipName); - } + File existingDatacite = new File(dataciteFileName); + if (existingDatacite.exists()) { + logger.fine("Found existing datacite.xml, deleting: " + dataciteFileName); + if (existingDatacite.delete()) { + logger.fine("Deleted existing datacite.xml"); + } else { + logger.warning("Failed to delete existing datacite.xml: " + dataciteFileName); } + } - // Also check for and delete the .partial file if it exists - File existingPartial = new File(zipName + ".partial"); - if (existingPartial.exists()) { - logger.fine("Found existing partial bag file, deleting: " + zipName + ".partial"); - if (existingPartial.delete()) { - logger.fine("Deleted existing partial bag file"); - } else { - logger.warning("Failed to delete existing partial bag file: " + zipName + ".partial"); - } + File existingBag = new File(zipName); + if (existingBag.exists()) { + logger.fine("Found existing bag file, deleting: " + zipName); + if (existingBag.delete()) { + logger.fine("Deleted existing bag file"); + } else { + logger.warning("Failed to delete existing bag file: " + zipName); } + } - String dataciteXml = getDataCiteXml(dv); - - FileUtils.writeStringToFile( - new File(dataciteFileName), - dataciteXml, StandardCharsets.UTF_8); - BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); - bagger.setNumConnections(getNumberOfBagGeneratorThreads()); - bagger.setAuthenticationKey(token.getTokenString()); - //ToDo: generateBag(File f, true) seems to do the same thing (with a .tmp extension) - since we don't have to use a stream here, could probably just reuse the existing code? - bagger.generateBag(new FileOutputStream(zipName + ".partial")); - - File srcFile = new File(zipName + ".partial"); - File destFile = new File(zipName); - - if (srcFile.renameTo(destFile)) { - logger.fine("Localhost Submission step: Content Transferred"); - statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_SUCCESS); - statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "file://" + zipName); + // Also check for and delete the .partial file if it exists + File existingPartial = new File(zipName + ".partial"); + if (existingPartial.exists()) { + logger.fine("Found existing partial bag file, deleting: " + zipName + ".partial"); + if (existingPartial.delete()) { + logger.fine("Deleted existing partial bag file"); } else { - logger.warning("Unable to move " + zipName + ".partial to " + zipName); + logger.warning("Failed to delete existing partial bag file: " + zipName + ".partial"); } + } + + // Write datacite.xml file + FileUtils.writeStringToFile(new File(dataciteFileName), dataciteXml, StandardCharsets.UTF_8); + logger.fine("Datacite XML written to: " + dataciteFileName); + + // Generate bag + BagGenerator bagger = new BagGenerator(ore, dataciteXml, terms); + bagger.setAuthenticationKey(token.getTokenString()); + + boolean bagSuccess = bagger.generateBag(new FileOutputStream(zipName + ".partial")); + + if (!bagSuccess) { + logger.severe("Bag generation failed for " + zipName); + return new Failure("Local Submission Failure", "Bag generation failed"); + } + + File srcFile = new File(zipName + ".partial"); + File destFile = new File(zipName); + + if (srcFile.renameTo(destFile)) { + logger.fine("Localhost Submission step: Content Transferred to " + zipName); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_SUCCESS); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "file://" + zipName); } else { - logger.warning( - "Localhost Submision Workflow aborted: Dataset locked for finalizePublication, or because file validation failed"); - return new Failure("Dataset locked"); + logger.severe("Unable to move " + zipName + ".partial to " + zipName); + return new Failure("Local Submission Failure", "Unable to rename partial file to final file"); } } catch (Exception e) { logger.warning("Failed to archive " + zipName + " : " + e.getLocalizedMessage()); e.printStackTrace(); + return new Failure("Local Submission Failure", e.getLocalizedMessage() + ": check log for details"); } finally { dv.setArchivalCopyLocation(statusObject.build().toString()); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java index 768d5d03e1d..4198cb19fe9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -11,6 +11,7 @@ import edu.harvard.iq.dataverse.util.bagit.BagGenerator; import edu.harvard.iq.dataverse.util.bagit.OREMap; import edu.harvard.iq.dataverse.util.json.JsonUtil; +import edu.harvard.iq.dataverse.util.json.JsonLDTerm; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; @@ -81,8 +82,8 @@ public boolean canDelete() { } @Override - public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, - Map requestedSettings) { + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, String dataciteXml, JsonObject ore, + Map terms, ApiToken token, Map requestedSettings) { logger.fine("In S3SubmitToArchiveCommand..."); JsonObject configObject = null; @@ -105,139 +106,127 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t try { Dataset dataset = dv.getDataset(); - if (dataset.getLockFor(Reason.finalizePublication) == null) { - - spaceName = getSpaceName(dataset); - - // Define keys for datacite.xml and bag file - String dcKey = spaceName + "/" + getDataCiteFileName(spaceName, dv) + ".xml"; - String bagKey = spaceName + "/" + getFileName(spaceName, dv) + ".zip"; - - // Check for and delete existing files for this version - logger.fine("Checking for existing files in archive..."); - - try { - HeadObjectRequest headDcRequest = HeadObjectRequest.builder() - .bucket(bucketName) - .key(dcKey) - .build(); - - s3.headObject(headDcRequest).join(); - - // If we get here, the object exists, so delete it - logger.fine("Found existing datacite.xml, deleting: " + dcKey); - DeleteObjectRequest deleteDcRequest = DeleteObjectRequest.builder() - .bucket(bucketName) - .key(dcKey) - .build(); - - CompletableFuture deleteDcFuture = s3.deleteObject(deleteDcRequest); - DeleteObjectResponse deleteDcResponse = deleteDcFuture.join(); - - if (deleteDcResponse.sdkHttpResponse().isSuccessful()) { - logger.fine("Deleted existing datacite.xml"); - } else { - logger.warning("Failed to delete existing datacite.xml: " + dcKey); - } - } catch (Exception e) { - if (e.getCause() instanceof NoSuchKeyException) { - logger.fine("No existing datacite.xml found"); - } else { - logger.warning("Error checking/deleting existing datacite.xml: " + e.getMessage()); - } - } + spaceName = getSpaceName(dataset); - try { - HeadObjectRequest headBagRequest = HeadObjectRequest.builder() - .bucket(bucketName) - .key(bagKey) - .build(); - - s3.headObject(headBagRequest).join(); - - // If we get here, the object exists, so delete it - logger.fine("Found existing bag file, deleting: " + bagKey); - DeleteObjectRequest deleteBagRequest = DeleteObjectRequest.builder() - .bucket(bucketName) - .key(bagKey) - .build(); - - CompletableFuture deleteBagFuture = s3.deleteObject(deleteBagRequest); - DeleteObjectResponse deleteBagResponse = deleteBagFuture.join(); - - if (deleteBagResponse.sdkHttpResponse().isSuccessful()) { - logger.fine("Deleted existing bag file"); - } else { - logger.warning("Failed to delete existing bag file: " + bagKey); - } - } catch (Exception e) { - if (e.getCause() instanceof NoSuchKeyException) { - logger.fine("No existing bag file found"); - } else { - logger.warning("Error checking/deleting existing bag file: " + e.getMessage()); - } - } + // Define keys for datacite.xml and bag file + String dcKey = spaceName + "/" + getDataCiteFileName(spaceName, dv) + ".xml"; + String bagKey = spaceName + "/" + getFileName(spaceName, dv) + ".zip"; + + // Check for and delete existing files for this version + logger.fine("Checking for existing files in archive..."); + + try { + HeadObjectRequest headDcRequest = HeadObjectRequest.builder() + .bucket(bucketName) + .key(dcKey) + .build(); + + s3.headObject(headDcRequest).join(); - String dataciteXml = getDataCiteXml(dv); - // Add datacite.xml file - PutObjectRequest putRequest = PutObjectRequest.builder() + // If we get here, the object exists, so delete it + logger.fine("Found existing datacite.xml, deleting: " + dcKey); + DeleteObjectRequest deleteDcRequest = DeleteObjectRequest.builder() .bucket(bucketName) .key(dcKey) .build(); - CompletableFuture putFuture = s3.putObject(putRequest, - AsyncRequestBody.fromString(dataciteXml, StandardCharsets.UTF_8)); + CompletableFuture deleteDcFuture = s3.deleteObject(deleteDcRequest); + DeleteObjectResponse deleteDcResponse = deleteDcFuture.join(); + + if (deleteDcResponse.sdkHttpResponse().isSuccessful()) { + logger.fine("Deleted existing datacite.xml"); + } else { + logger.warning("Failed to delete existing datacite.xml: " + dcKey); + } + } catch (Exception e) { + if (e.getCause() instanceof NoSuchKeyException) { + logger.fine("No existing datacite.xml found"); + } else { + logger.warning("Error checking/deleting existing datacite.xml: " + e.getMessage()); + } + } + + try { + HeadObjectRequest headBagRequest = HeadObjectRequest.builder() + .bucket(bucketName) + .key(bagKey) + .build(); + + s3.headObject(headBagRequest).join(); + + // If we get here, the object exists, so delete it + logger.fine("Found existing bag file, deleting: " + bagKey); + DeleteObjectRequest deleteBagRequest = DeleteObjectRequest.builder() + .bucket(bucketName) + .key(bagKey) + .build(); - // Wait for the put operation to complete - PutObjectResponse putResponse = putFuture.join(); + CompletableFuture deleteBagFuture = s3.deleteObject(deleteBagRequest); + DeleteObjectResponse deleteBagResponse = deleteBagFuture.join(); - if (!putResponse.sdkHttpResponse().isSuccessful()) { - logger.warning("Could not write datacite xml to S3"); - return new Failure("S3 Archiver failed writing datacite xml file"); + if (deleteBagResponse.sdkHttpResponse().isSuccessful()) { + logger.fine("Deleted existing bag file"); + } else { + logger.warning("Failed to delete existing bag file: " + bagKey); + } + } catch (Exception e) { + if (e.getCause() instanceof NoSuchKeyException) { + logger.fine("No existing bag file found"); + } else { + logger.warning("Error checking/deleting existing bag file: " + e.getMessage()); } + } + + // Add datacite.xml file + PutObjectRequest putRequest = PutObjectRequest.builder() + .bucket(bucketName) + .key(dcKey) + .build(); + + CompletableFuture putFuture = s3.putObject(putRequest, + AsyncRequestBody.fromString(dataciteXml, StandardCharsets.UTF_8)); + + // Wait for the put operation to complete + PutObjectResponse putResponse = putFuture.join(); - // Store BagIt file - String fileName = getFileName(spaceName, dv); + if (!putResponse.sdkHttpResponse().isSuccessful()) { + logger.warning("Could not write datacite xml to S3"); + return new Failure("S3 Archiver failed writing datacite xml file"); + } - // Add BagIt ZIP file - // Google uses MD5 as one way to verify the - // transfer + // Store BagIt file + String fileName = getFileName(spaceName, dv); - // Generate bag - BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); - bagger.setAuthenticationKey(token.getTokenString()); - if (bagger.generateBag(fileName, false)) { - File bagFile = bagger.getBagFile(fileName); + // Generate bag + BagGenerator bagger = new BagGenerator(ore, dataciteXml, terms); + bagger.setAuthenticationKey(token.getTokenString()); + if (bagger.generateBag(fileName, false)) { + File bagFile = bagger.getBagFile(fileName); - UploadFileRequest uploadFileRequest = UploadFileRequest.builder() - .putObjectRequest(req -> req.bucket(bucketName).key(bagKey)).source(bagFile.toPath()) - .build(); + UploadFileRequest uploadFileRequest = UploadFileRequest.builder() + .putObjectRequest(req -> req.bucket(bucketName).key(bagKey)).source(bagFile.toPath()) + .build(); - FileUpload fileUpload = tm.uploadFile(uploadFileRequest); + FileUpload fileUpload = tm.uploadFile(uploadFileRequest); - CompletedFileUpload uploadResult = fileUpload.completionFuture().join(); + CompletedFileUpload uploadResult = fileUpload.completionFuture().join(); - if (uploadResult.response().sdkHttpResponse().isSuccessful()) { - logger.fine("S3 Submission step: Content Transferred"); + if (uploadResult.response().sdkHttpResponse().isSuccessful()) { + logger.fine("S3 Submission step: Content Transferred"); - statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_SUCCESS); - statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, - String.format("https://%s.s3.amazonaws.com/%s", bucketName, bagKey)); - } else { - logger.severe("Error sending file to S3: " + fileName); - return new Failure("Error in transferring Bag file to S3", - "S3 Submission Failure: incomplete transfer"); - } + statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_SUCCESS); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, + String.format("https://%s.s3.amazonaws.com/%s", bucketName, bagKey)); } else { - logger.warning("Could not write local Bag file " + fileName); - return new Failure("S3 Archiver fail writing temp local bag"); + logger.severe("Error sending file to S3: " + fileName); + return new Failure("Error in transferring Bag file to S3", + "S3 Submission Failure: incomplete transfer"); } - } else { - logger.warning( - "S3 Archiver Submision Workflow aborted: Dataset locked for publication/pidRegister"); - return new Failure("Dataset locked"); + logger.warning("Could not write local Bag file " + fileName); + return new Failure("S3 Archiver fail writing temp local bag"); } + } catch (Exception e) { logger.warning(e.getLocalizedMessage()); e.printStackTrace(); @@ -253,7 +242,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t return WorkflowStepResult.OK; } else { return new Failure( - "S3 Submission not configured - no \":S3ArchivalProfile\" and/or \":S3ArchivalConfig\" or no bucket-name defined in config."); + "S3 Submission not configured - no \":S3ArchivalProfile\" and/or \":S3ArchivalConfig\" or no bucket-name defined in config."); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index f24ebdb8655..12501d170d6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -24,6 +24,7 @@ import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.Map; import java.util.Set; import java.util.TreeSet; import java.util.Map.Entry; @@ -72,6 +73,7 @@ import com.google.gson.JsonSyntaxException; import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.DatasetFieldConstant; import edu.harvard.iq.dataverse.DataFile.ChecksumType; import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.settings.JvmSettings; @@ -120,10 +122,10 @@ public class BagGenerator { private boolean usetemp = false; - private int numConnections = 8; - public static final String BAG_GENERATOR_THREADS = BagGeneratorThreads.toString(); + private Map terms; - private OREMap oremap; + private static int numConnections = 8; + public static final String BAG_GENERATOR_THREADS = BagGeneratorThreads.toString(); static PrintWriter pw = null; @@ -139,15 +141,15 @@ public class BagGenerator { * and zipping are done in parallel, using a connection pool. The required space * on disk is ~ n+1/n of the final bag size, e.g. 125% of the bag size for a * 4-way parallel zip operation. + * @param terms * @throws Exception * @throws JsonSyntaxException */ - public BagGenerator(OREMap oreMap, String dataciteXml) throws JsonSyntaxException, Exception { - this.oremap = oreMap; - this.oremapObject = oreMap.getOREMap(); - //(JsonObject) new JsonParser().parse(oreMap.getOREMap().toString()); + public BagGenerator(jakarta.json.JsonObject oremapObject, String dataciteXml, Map terms) throws JsonSyntaxException, Exception { + this.oremapObject = oremapObject; this.dataciteXml = dataciteXml; + this.terms = terms; try { // Using Dataverse, all the URLs to be retrieved should be on the current server, so allowing self-signed certs and not verifying hostnames are useful in testing and @@ -768,12 +770,12 @@ private String generateInfoFile() { /* Contact, and it's subfields, are terms from citation.tsv whose mapping to a formal vocabulary and label in the oremap may change * so we need to find the labels used. */ - JsonLDTerm contactTerm = oremap.getContactTerm(); + JsonLDTerm contactTerm = terms.get(DatasetFieldConstant.datasetContact); if ((contactTerm != null) && aggregation.has(contactTerm.getLabel())) { JsonElement contacts = aggregation.get(contactTerm.getLabel()); - JsonLDTerm contactNameTerm = oremap.getContactNameTerm(); - JsonLDTerm contactEmailTerm = oremap.getContactEmailTerm(); + JsonLDTerm contactNameTerm = terms.get(DatasetFieldConstant.datasetContactName); + JsonLDTerm contactEmailTerm = terms.get(DatasetFieldConstant.datasetContactEmail); if (contacts.isJsonArray()) { for (int i = 0; i < contactsArray.size(); i++) { @@ -841,8 +843,8 @@ private String generateInfoFile() { /* Description, and it's subfields, are terms from citation.tsv whose mapping to a formal vocabulary and label in the oremap may change * so we need to find the labels used. */ - JsonLDTerm descriptionTerm = oremap.getDescriptionTerm(); - JsonLDTerm descriptionTextTerm = oremap.getDescriptionTextTerm(); + JsonLDTerm descriptionTerm = terms.get(DatasetFieldConstant.description); + JsonLDTerm descriptionTextTerm = terms.get(DatasetFieldConstant.descriptionText); if (descriptionTerm == null) { logger.warning("No description available for BagIt Info file"); } else { @@ -1124,9 +1126,9 @@ public void setAuthenticationKey(String tokenString) { apiKey = tokenString; } - public void setNumConnections(int numConnections) { - this.numConnections = numConnections; - logger.fine("BagGenerator will use " + numConnections + " threads"); + public static void setNumConnections(int numConnections) { + BagGenerator.numConnections = numConnections; + logger.fine("All BagGenerators will now use " + numConnections + " threads"); } } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java index ae1175f0e1d..d7fc3f96b02 100644 --- a/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java @@ -290,7 +290,7 @@ private void executeSteps(Workflow wf, WorkflowContext ctxt, int initialStepIdx try { if (res == WorkflowStepResult.OK) { logger.log(Level.INFO, "Workflow {0} step {1}: OK", new Object[]{ctxt.getInvocationId(), stepIdx}); - em.merge(ctxt.getDataset()); + // The dataset is merged in refresh(ctxt) ctxt = refresh(ctxt); } else if (res instanceof Failure) { logger.log(Level.WARNING, "Workflow {0} failed: {1}", new Object[]{ctxt.getInvocationId(), ((Failure) res).getReason()}); diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/ArchivalSubmissionWorkflowStep.java b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/ArchivalSubmissionWorkflowStep.java index b0567bff107..c6a5c8626ae 100644 --- a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/ArchivalSubmissionWorkflowStep.java +++ b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/ArchivalSubmissionWorkflowStep.java @@ -1,9 +1,14 @@ package edu.harvard.iq.dataverse.workflow.internalspi; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetLock.Reason; +import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.impl.AbstractSubmitToArchiveCommand; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.ArchiverUtil; +import edu.harvard.iq.dataverse.util.bagit.OREMap; +import edu.harvard.iq.dataverse.util.json.JsonLDTerm; import edu.harvard.iq.dataverse.workflow.WorkflowContext; import edu.harvard.iq.dataverse.workflow.step.Failure; import edu.harvard.iq.dataverse.workflow.step.WorkflowStep; @@ -14,6 +19,7 @@ import java.util.logging.Level; import java.util.logging.Logger; +import jakarta.json.JsonObject; import jakarta.servlet.http.HttpServletRequest; /** @@ -45,11 +51,43 @@ public WorkflowStepResult run(WorkflowContext context) { } } + Dataset d = context.getDataset(); + if (d.isLockedFor(Reason.FileValidationFailed)) { + logger.severe("Dataset locked for file validation failure - will not archive"); + return new Failure("File Validation Lock", "Dataset has file validation problem - will not archive"); + } DataverseRequest dvr = new DataverseRequest(context.getRequest().getAuthenticatedUser(), (HttpServletRequest) null); String className = requestedSettings.get(SettingsServiceBean.Key.ArchiverClassName.toString()); AbstractSubmitToArchiveCommand archiveCommand = ArchiverUtil.createSubmitToArchiveCommand(className, dvr, context.getDataset().getReleasedVersion()); if (archiveCommand != null) { - return (archiveCommand.performArchiveSubmission(context.getDataset().getReleasedVersion(), context.getApiToken(), requestedSettings)); + // Generate the required components for archiving + DatasetVersion version = context.getDataset().getReleasedVersion(); + + // Generate DataCite XML + String dataCiteXml = archiveCommand.getDataCiteXml(version); + + // Generate OREMap + OREMap oreMap = new OREMap(version, false); + JsonObject ore = oreMap.getOREMap(); + + // Get JSON-LD terms + Map terms = archiveCommand.getJsonLDTerms(oreMap); + + // Call the updated method with all required parameters + /* + * Note: because this must complete before the workflow can complete and update the version status in the db a long-running archive submission via workflow could hit a transaction timeout and fail. + * The commands themselves have been updated to run archive submission outside of any transaction and update the status in a separate transaction, so archiving a given version that way could succeed + * where this workflow failed. + */ + return archiveCommand.performArchiveSubmission( + version, + dataCiteXml, + ore, + terms, + context.getApiToken(), + requestedSettings + ); + } else { logger.severe("No Archiver instance could be created for name: " + className); return new Failure("No Archiver", "Could not create instance of class: " + className); From d2a25c392c4434d960871f13a6ed8f86458fc3f0 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Sat, 24 Jan 2026 09:58:33 -0500 Subject: [PATCH 17/31] use new transaction to start --- .../dataverse/DatasetVersionServiceBean.java | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index b5e964e5673..9c04acd6c5e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -28,6 +28,8 @@ import jakarta.ejb.EJB; import jakarta.ejb.EJBException; import jakarta.ejb.Stateless; +import jakarta.ejb.TransactionAttribute; +import jakarta.ejb.TransactionAttributeType; import jakarta.inject.Named; import jakarta.json.Json; import jakarta.json.JsonObjectBuilder; @@ -1337,25 +1339,21 @@ public Long getDatasetVersionCount(Long datasetId, boolean canViewUnpublishedVer /** - * Update the archival copy location for a specific version of a dataset. Archiving can be long-running and other parallel updates to the datasetversion have likely occurred so this method will check - * for OptimisticLockExceptions and retry the update with the latest version. + * Update the archival copy location for a specific version of a dataset. + * Archiving can be long-running and other parallel updates to the datasetversion have likely occurred + * so this method will just re-find the version rather than risking an + * OptimisticLockException and then having to retry in yert another transaction (since the OLE rolls this one back). * * @param dv * The dataset version whose archival copy location we want to update. Must not be {@code null}. */ + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public void persistArchivalCopyLocation(DatasetVersion dv) { - try { - em.merge(dv); - em.flush(); // Force the update and version check immediately - } catch (OptimisticLockException ole) { - logger.log(Level.INFO, "OptimisticLockException while persisting archival copy location for DatasetVersion id={0}. Retrying on latest version.", dv.getId()); - DatasetVersion currentVersion = find(dv.getId()); - if (currentVersion != null) { - currentVersion.setArchivalCopyLocation(dv.getArchivalCopyLocation()); - em.merge(currentVersion); - } else { - logger.log(Level.SEVERE, "Could not find DatasetVersion with id={0} to retry persisting archival copy location after OptimisticLockException.", dv.getId()); - } + DatasetVersion currentVersion = find(dv.getId()); + if (currentVersion != null) { + currentVersion.setArchivalCopyLocation(dv.getArchivalCopyLocation()); + } else { + logger.log(Level.SEVERE, "Could not find DatasetVersion with id={0} to retry persisting archival copy location after OptimisticLockException.", dv.getId()); } } } From a45b76b2cc4ad1d6e6ea324eeb51cb3cfcc37189 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Sat, 24 Jan 2026 09:59:00 -0500 Subject: [PATCH 18/31] typo --- .../edu/harvard/iq/dataverse/DatasetVersionServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 9c04acd6c5e..a5dd724104f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -1342,7 +1342,7 @@ public Long getDatasetVersionCount(Long datasetId, boolean canViewUnpublishedVer * Update the archival copy location for a specific version of a dataset. * Archiving can be long-running and other parallel updates to the datasetversion have likely occurred * so this method will just re-find the version rather than risking an - * OptimisticLockException and then having to retry in yert another transaction (since the OLE rolls this one back). + * OptimisticLockException and then having to retry in yet another transaction (since the OLE rolls this one back). * * @param dv * The dataset version whose archival copy location we want to update. Must not be {@code null}. From a4c583e1e6ab3db3cb5c8e570ca7ffbd1867d567 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Sat, 24 Jan 2026 10:38:13 -0500 Subject: [PATCH 19/31] Use pending, use JSON --- .../java/edu/harvard/iq/dataverse/DatasetPage.java | 14 ++++++++++---- .../edu/harvard/iq/dataverse/DatasetVersion.java | 4 ++-- .../edu/harvard/iq/dataverse/api/Datasets.java | 4 +++- .../ArchivalSubmissionWorkflowStep.java | 12 +++++++++--- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index a091005b392..7e168047f05 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -3004,7 +3004,11 @@ public String updateCurrentVersion() { String status = updateVersion.getArchivalCopyLocationStatus(); if((status==null) || status.equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE) || (FeatureFlags.ARCHIVE_ON_VERSION_UPDATE.enabled() && archiveCommand.canDelete())){ // Delete the record of any existing copy since it is now out of date/incorrect - updateVersion.setArchivalCopyLocation(null); + JsonObjectBuilder job = Json.createObjectBuilder(); + job.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_PENDING); + updateVersion.setArchivalCopyLocation(JsonUtil.prettyPrint(job.build())); + //Persist to db now + datasetVersionService.persistArchivalCopyLocation(updateVersion); /* * Then try to generate and submit an archival copy. Note that running this * command within the CuratePublishedDatasetVersionCommand was causing an error: @@ -3023,9 +3027,8 @@ public String updateCurrentVersion() { } } else if(status.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS)) { //Not automatically replacing the old archival copy as creating it is expensive - updateVersion.setArchivalStatus(DatasetVersion.ARCHIVAL_STATUS_OBSOLETE); + updateVersion.setArchivalStatusOnly(DatasetVersion.ARCHIVAL_STATUS_OBSOLETE); datasetVersionService.persistArchivalCopyLocation(updateVersion); - datasetVersionService.merge(updateVersion); } } } @@ -6115,7 +6118,10 @@ public void archiveVersion(Long id, boolean force) { if (status == null || (force && cmd.canDelete())) { // Set initial pending status - dv.setArchivalCopyLocation(DatasetVersion.ARCHIVAL_STATUS_PENDING); + JsonObjectBuilder job = Json.createObjectBuilder(); + job.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_PENDING); + dv.setArchivalCopyLocation(JsonUtil.prettyPrint(job.build())); + //Persist now datasetVersionService.persistArchivalCopyLocation(dv); commandEngine.submitAsync(cmd); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 1248a8266ab..4ff6ae5e723 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -416,8 +416,8 @@ public void setArchivalCopyLocation(String location) { populateArchivalStatus(true); } - // COnvenience method to set only the status - public void setArchivalStatus(String status) { + // Convenience method to just change the status without changing the location + public void setArchivalStatusOnly(String status) { populateArchivalStatus(false); JsonObjectBuilder job = Json.createObjectBuilder(archivalCopyLocationJson); job.add(DatasetVersion.ARCHIVAL_STATUS, status); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index dba4b36d4da..155522bbb5b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1279,7 +1279,9 @@ public Response publishDataset(@Context ContainerRequestContext crc, @PathParam( AbstractSubmitToArchiveCommand archiveCommand = ArchiverUtil.createSubmitToArchiveCommand(className, createDataverseRequest(user), updateVersion); if (archiveCommand != null) { // Delete the record of any existing copy since it is now out of date/incorrect - updateVersion.setArchivalCopyLocation(null); + JsonObjectBuilder job = Json.createObjectBuilder(); + job.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_PENDING); + updateVersion.setArchivalCopyLocation(JsonUtil.prettyPrint(job.build())); datasetVersionSvc.persistArchivalCopyLocation(updateVersion); /* * Then try to generate and submit an archival copy. Note that running this diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/ArchivalSubmissionWorkflowStep.java b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/ArchivalSubmissionWorkflowStep.java index c6a5c8626ae..9e9b434ba03 100644 --- a/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/ArchivalSubmissionWorkflowStep.java +++ b/src/main/java/edu/harvard/iq/dataverse/workflow/internalspi/ArchivalSubmissionWorkflowStep.java @@ -75,9 +75,15 @@ public WorkflowStepResult run(WorkflowContext context) { // Call the updated method with all required parameters /* - * Note: because this must complete before the workflow can complete and update the version status in the db a long-running archive submission via workflow could hit a transaction timeout and fail. - * The commands themselves have been updated to run archive submission outside of any transaction and update the status in a separate transaction, so archiving a given version that way could succeed - * where this workflow failed. + * Note: because this must complete before the workflow can complete and update the version status + * in the db a long-running archive submission via workflow could hit a transaction timeout and fail. + * The commands themselves have been updated to run archive submission outside of any transaction + * and update the status in a separate transaction, so archiving a given version that way could + * succeed where this workflow failed. + * + * Another difference when running in a workflow - this step has no way to set the archiving status to + * pending as is done when running archiving from the UI/API. Instead, there is a generic workflow + * lock on the dataset. */ return archiveCommand.performArchiveSubmission( version, From 305f7e3b73f7ec299bb25a86bddf645f34f23607 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 21 Jan 2026 17:38:37 -0500 Subject: [PATCH 20/31] merge fix of persistArchivalCopy method refactors --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 4ff6ae5e723..8a4a0cf3f53 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -392,7 +392,7 @@ public String getArchivalCopyLocationStatus() { } public String getArchivalCopyLocationMessage() { populateArchivalStatus(false); - if(archivalCopyLocationJson!=null) { + if(archivalCopyLocationJson!=null && archivalCopyLocationJson.containsKey(ARCHIVAL_STATUS_MESSAGE)) { return archivalCopyLocationJson.getString(ARCHIVAL_STATUS_MESSAGE); } return null; From d2282d9d02280f27491f3a87dbb9ac39ee6794ac Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 28 Jan 2026 15:20:14 -0500 Subject: [PATCH 21/31] combined release note --- doc/release-notes/12122-archiving updates.md | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 doc/release-notes/12122-archiving updates.md diff --git a/doc/release-notes/12122-archiving updates.md b/doc/release-notes/12122-archiving updates.md new file mode 100644 index 00000000000..2dd4eb6909f --- /dev/null +++ b/doc/release-notes/12122-archiving updates.md @@ -0,0 +1,8 @@ +## Notifications + +This release includes multiple updates to the process of creating archival bags including +- performance/scaling improvements for large datasets (multiple changes) +- bug fixes for when superusers see the "Submit" button to launch archiving from the dataset page version table +- new functionality to optionally suppress an archiving workflow when using the Update Current Version functionality and mark the current archive as out of date +- new functionality to support recreating an archival bag when Update Current Version has been used, which is available for archivers that can delete existing files +- \ No newline at end of file From 236fca47f9f5e57792c8201fd34fadc992f2c6ec Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 28 Jan 2026 15:30:21 -0500 Subject: [PATCH 22/31] missed change to static --- .../engine/command/impl/AbstractSubmitToArchiveCommand.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index ffa79456902..2b049f1c42a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -156,7 +156,7 @@ public Thread startBagThread(DatasetVersion dv, PipedInputStream in, DigestInput public void run() { try (PipedOutputStream out = new PipedOutputStream(in)) { // Generate bag - bagger.setNumConnections(getNumberOfBagGeneratorThreads()); + BagGenerator.setNumConnections(getNumberOfBagGeneratorThreads()); BagGenerator bagger = new BagGenerator(ore, dataciteXml, terms); bagger.setAuthenticationKey(token.getTokenString()); bagger.generateBag(out); From f3fb3dbb23f5251643d1ec224a6521d69dd26f0e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 19 Feb 2026 13:32:58 -0500 Subject: [PATCH 23/31] switch to jvm setting --- .../source/installation/config.rst | 20 +++++++++---------- .../edu/harvard/iq/dataverse/DatasetPage.java | 2 +- .../iq/dataverse/settings/JvmSettings.java | 1 + 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index a3c32c99a63..d9c2621d9a0 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2264,7 +2264,7 @@ At present, archiving classes include the DuraCloudSubmitToArchiveCommand, Local All current options support the :ref:`Archival Status API` calls and the same status is available in the dataset page version table (for contributors/those who could view the unpublished dataset, with more detail available to superusers). Archival Bags are created per dataset version. By default, if a version is republished (via the superuser-only 'Update Current Version' publication option in the UI/API), a new archival bag is not created for the version. -If the archiver used is capable of deleting existing bags (Google, S3, and File Archivers) superusers can trigger a manual update of the archival bag, and, if the :ref:`dataverse.feature.archive-on-version-update` flag is set to true, this will be done automatically when 'Update Current Version' is used. +If the archiver used is capable of deleting existing bags (Google, S3, and File Archivers) superusers can trigger a manual update of the archival bag, and, if the :ref:`dataverse.bagit.archive-on-version-update` flag is set to true, this will be done automatically when 'Update Current Version' is used. .. _Duracloud Configuration: @@ -3718,6 +3718,14 @@ The email for your institution that you'd like to appear in bag-info.txt. See :r Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_BAGIT_SOURCEORG_EMAIL``. +.. _dataverse.bagit.archive-on-version-update: + +dataverse.bagit.archive-on-version-update ++++++++++++++++++++++++++++++++++++++++++ + +Indicates whether archival bag creation should be triggered (if configured) when a version is updated and was already successfully archived, +i.e via the Update-Current-Version publication option. Setting the flag true only works if the archiver being used supports deleting existing archival bags. + .. _dataverse.files.globus-monitoring-server: dataverse.files.globus-monitoring-server @@ -4034,16 +4042,6 @@ dataverse.feature.only-update-datacite-when-needed Only contact DataCite to update a DOI after checking to see if DataCite has outdated information (for efficiency, lighter load on DataCite, especially when using file DOIs). -.. _dataverse.feature.archive-on-version-update: - -dataverse.feature.archive-on-version-update -+++++++++++++++++++++++++++++++++++++++++++ - -Indicates whether archival bag creation should be triggered (if configured) when a version is updated and was already successfully archived, -i.e via the Update-Current-Version publication option. Setting the flag true only works if the archiver being used supports deleting existing archival bags. - - - .. _:ApplicationServerSettings: Application Server Settings diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index c48468b63d0..b34dd89f5c8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -2999,7 +2999,7 @@ public String updateCurrentVersion() { // If pending or an obsolete copy exists, do nothing (nominally if a pending run succeeds and we're updating the current version here, it should be marked as obsolete - ignoring for now since updates within the time an archiving run is pending should be rare // If a failure or null, rerun archiving now. If a failure is due to an exiting copy in the repo, we'll fail again String status = updateVersion.getArchivalCopyLocationStatus(); - if((status==null) || status.equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE) || (FeatureFlags.ARCHIVE_ON_VERSION_UPDATE.enabled() && archiveCommand.canDelete())){ + if((status==null) || status.equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE) || (JvmSettings.BAGIT_ARCHIVE_ON_VERSION_UPDATE.lookupOptional(Boolean.class).orElse(false) && archiveCommand.canDelete())){ // Delete the record of any existing copy since it is now out of date/incorrect JsonObjectBuilder job = Json.createObjectBuilder(); job.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_PENDING); diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 05390ba8a8c..468bfcf1983 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -276,6 +276,7 @@ public enum JvmSettings { BAGIT_SOURCE_ORG_NAME(SCOPE_BAGIT_SOURCEORG, "name"), BAGIT_SOURCEORG_ADDRESS(SCOPE_BAGIT_SOURCEORG, "address"), BAGIT_SOURCEORG_EMAIL(SCOPE_BAGIT_SOURCEORG, "email"), + BAGIT_ARCHIVE_ON_VERSION_UPDATE(SCOPE_BAGIT, "archive-on-version-update"), // STORAGE USE SETTINGS SCOPE_STORAGEUSE(PREFIX, "storageuse"), From e642ef229b57c9f2e769547a62d0ed25d93e66e0 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 19 Feb 2026 13:34:23 -0500 Subject: [PATCH 24/31] fix param order per review --- .../engine/command/impl/AbstractSubmitToArchiveCommand.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index 2b049f1c42a..a4c67b54428 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -117,8 +117,8 @@ private void persistResult(CommandContext ctxt, DatasetVersion versionWithStatus * constructor and could be dropped from the parameter list.) * * @param version - the DatasetVersion to archive - * @param ore - * @param dataCiteXml + * @param dataCiteXml + * @param ore * @param terms * @param token - an API Token for the user performing this action * @param requestedSettings - a map of the names/values for settings required by this archiver (sent because this class is not part of the EJB context (by design) and has no direct access to service beans). From 9f5bb3f4b902a7f55a1dc33af3e7e62aa67305f4 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 19 Feb 2026 13:45:44 -0500 Subject: [PATCH 25/31] 443 fix per review --- .../engine/command/impl/DuraCloudSubmitToArchiveCommand.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java index b1fa777478b..aebbbffb4a2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java @@ -191,7 +191,7 @@ public void run() { // view it as an admin) StringBuffer sb = new StringBuffer("https://"); sb.append(host); - if (!port.equals("443")) { + if (!port.equals(DEFAULT_PORT)) { sb.append(":" + port); } sb.append("/duradmin/spaces/sm/"); From 47e199fc933c07b69a5a8e313ec645c546996cb0 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 19 Feb 2026 14:10:37 -0500 Subject: [PATCH 26/31] refactor per review --- .../impl/AbstractSubmitToArchiveCommand.java | 17 ++++++++++++++++ .../impl/DuraCloudSubmitToArchiveCommand.java | 4 ++-- .../GoogleCloudSubmitToArchiveCommand.java | 7 +++---- .../impl/LocalSubmitToArchiveCommand.java | 7 +++---- .../impl/S3SubmitToArchiveCommand.java | 20 +------------------ 5 files changed, 26 insertions(+), 29 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index a4c67b54428..f02b562c0b4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -37,6 +37,7 @@ public abstract class AbstractSubmitToArchiveCommand extends AbstractCommand requestedSettings = new HashMap(); + protected String spaceName = null; protected boolean success=false; private static final Logger logger = Logger.getLogger(AbstractSubmitToArchiveCommand.class.getName()); private static final int MAX_ZIP_WAIT = 20000; @@ -241,4 +242,20 @@ public static boolean supportsDelete() { public boolean canDelete() { return supportsDelete(); } + + protected String getDataCiteFileName(String spaceName, DatasetVersion dv) { + return spaceName + "_datacite.v" + dv.getFriendlyVersionNumber(); + } + + protected String getFileName(String spaceName, DatasetVersion dv) { + return spaceName + ".v" + dv.getFriendlyVersionNumber(); + } + + protected String getSpaceName(Dataset dataset) { + if (spaceName == null) { + spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-').replace('.', '-') + .toLowerCase(); + } + return spaceName; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java index aebbbffb4a2..033fd96be74 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java @@ -86,8 +86,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, String dat * the same space. */ String spaceName = dataset.getOwner().getAlias().toLowerCase().replaceAll("[^a-z0-9-]", ".dcsafe"); - String baseFileName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') - .replace('.', '-').toLowerCase() + "_v" + dv.getFriendlyVersionNumber(); + //This archiver doesn't use the standard spaceName, but does use it to generate the file name + String baseFileName = getFileName(getSpaceName(dataset), dv); ContentStore store; //Set a failure status that will be updated if we succeed diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java index f662de36792..bfb4ed8685b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java @@ -80,12 +80,11 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, String dat Dataset dataset = dv.getDataset(); - String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') - .replace('.', '-').toLowerCase(); + String spaceName = getSpaceName(dataset); // Check for and delete existing files for this version - String dataciteFileName = spaceName + "/datacite.v" + dv.getFriendlyVersionNumber() + ".xml"; - String bagFileName = spaceName + "/" + spaceName + ".v" + dv.getFriendlyVersionNumber() + ".zip"; + String dataciteFileName = getDataCiteFileName(spaceName, dv) + ".xml"; + String bagFileName = spaceName + "/" + getFileName(spaceName,dv) + ".zip"; logger.fine("Checking for existing files in archive..."); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java index 38951c8a218..18042894246 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java @@ -58,12 +58,11 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, String dat try { Dataset dataset = dv.getDataset(); - String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') - .replace('.', '-').toLowerCase(); + String spaceName = getSpaceName(dataset); // Define file paths - String dataciteFileName = localPath + "/" + spaceName + "-datacite.v" + dv.getFriendlyVersionNumber() + ".xml"; - zipName = localPath + "/" + spaceName + "v" + dv.getFriendlyVersionNumber() + ".zip"; + String dataciteFileName = localPath + "/" + getDataCiteFileName(spaceName, dv) + ".xml"; + zipName = localPath + "/" + getFileName(spaceName, dv) + ".zip"; // Check for and delete existing files for this version logger.fine("Checking for existing files in archive..."); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java index 4198cb19fe9..8d098d9193f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -2,14 +2,12 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.DatasetLock.Reason; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.S3ArchiverConfig; import edu.harvard.iq.dataverse.util.bagit.BagGenerator; -import edu.harvard.iq.dataverse.util.bagit.OREMap; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.json.JsonLDTerm; import edu.harvard.iq.dataverse.workflow.step.Failure; @@ -66,7 +64,7 @@ public class S3SubmitToArchiveCommand extends AbstractSubmitToArchiveCommand { private static final Config config = ConfigProvider.getConfig(); protected S3AsyncClient s3 = null; private S3TransferManager tm = null; - private String spaceName = null; + protected String bucketName = null; public S3SubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { @@ -246,22 +244,6 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, String dat } } - protected String getDataCiteFileName(String spaceName, DatasetVersion dv) { - return spaceName + "_datacite.v" + dv.getFriendlyVersionNumber(); - } - - protected String getFileName(String spaceName, DatasetVersion dv) { - return spaceName + ".v" + dv.getFriendlyVersionNumber(); - } - - protected String getSpaceName(Dataset dataset) { - if (spaceName == null) { - spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-').replace('.', '-') - .toLowerCase(); - } - return spaceName; - } - private S3AsyncClient createClient(JsonObject configObject) { // Create a builder for the S3AsyncClient From a21f6a393d6fd04db3387c4570f0f58ae9f883d6 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 19 Feb 2026 14:22:26 -0500 Subject: [PATCH 27/31] fix indent per review --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 8a4a0cf3f53..92bab58e8d6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -402,8 +402,8 @@ private void populateArchivalStatus(boolean force) { if(archivalCopyLocationJson ==null || force) { if(archivalCopyLocation!=null) { try { - archivalCopyLocationJson = JsonUtil.getJsonObject(archivalCopyLocation); - } catch(Exception e) { + archivalCopyLocationJson = JsonUtil.getJsonObject(archivalCopyLocation); + } catch (Exception e) { logger.warning("DatasetVersion id: " + id + "has a non-JsonObject value, parsing error: " + e.getMessage()); logger.fine(archivalCopyLocation); } From e0dad2cbd447edad6ec5218ec0eca26cec5865f4 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 19 Feb 2026 14:22:38 -0500 Subject: [PATCH 28/31] fix javadoc per review --- .../edu/harvard/iq/dataverse/util/bagit/BagGenerator.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index 12501d170d6..38b825bd3fd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -141,7 +141,9 @@ public class BagGenerator { * and zipping are done in parallel, using a connection pool. The required space * on disk is ~ n+1/n of the final bag size, e.g. 125% of the bag size for a * 4-way parallel zip operation. - * @param terms + * @param oremapObject - OAI-ORE Map file as a JSON object + * @param dataciteXml - DataCite XML file as a string + * @param terms - Map of schema.org/terms to their corresponding JsonLDTerm objects * @throws Exception * @throws JsonSyntaxException */ From f0282f33293ea0b18eb7d3c6e457157418ebeadc Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 19 Feb 2026 14:29:22 -0500 Subject: [PATCH 29/31] remove param in doc per review --- src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java index 5a3f105497d..4fa85a543d8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java +++ b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java @@ -359,7 +359,6 @@ public R submit(Command aCommand) throws CommandException { * * @param The return type of the command * @param aCommand The command to execute - * @param user The user executing the command * @return A Future representing the pending result * @throws CommandException if the command cannot be submitted */ From 0d0afe7bb8d0b52d2da7eb39b19d1f9524c67229 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 19 Feb 2026 16:09:28 -0500 Subject: [PATCH 30/31] add spacename to datacite file --- .../engine/command/impl/GoogleCloudSubmitToArchiveCommand.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java index bfb4ed8685b..fddaa445368 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java @@ -83,7 +83,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, String dat String spaceName = getSpaceName(dataset); // Check for and delete existing files for this version - String dataciteFileName = getDataCiteFileName(spaceName, dv) + ".xml"; + String dataciteFileName = spaceName + "/" + getDataCiteFileName(spaceName, dv) + ".xml"; String bagFileName = spaceName + "/" + getFileName(spaceName,dv) + ".zip"; logger.fine("Checking for existing files in archive..."); From 12944a54c9aed3fda1dda570be34a92d314fd237 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 19 Feb 2026 18:16:03 -0500 Subject: [PATCH 31/31] update test to match new name --- src/test/java/edu/harvard/iq/dataverse/api/BagIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java b/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java index 16c44003f35..b649ad6bb95 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java @@ -87,7 +87,7 @@ public void testBagItExport() throws IOException { .replace('.', '-').toLowerCase(); // spacename: doi-10-5072-fk2-fosg5q - String pathToZip = bagitExportDir + "/" + spaceName + "v1.0" + ".zip"; + String pathToZip = bagitExportDir + "/" + spaceName + ".v1.0" + ".zip"; try { // give the bag time to generate