From 4c0d891c52a08d366638fe562d466e12092277fc Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 15 Jul 2022 13:16:51 -0400 Subject: [PATCH 01/16] restore batch command --- .../dataverse/DatasetVersionServiceBean.java | 32 +++++ .../edu/harvard/iq/dataverse/api/Admin.java | 119 ++++++++++++++++-- 2 files changed, 140 insertions(+), 11 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 580d95b4b1d..b4d12d33a04 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -1187,4 +1187,36 @@ private DatasetVersion getPreviousVersionWithUnf(DatasetVersion datasetVersion) return null; } + /** + * Execute a query to return DatasetVersion + * + * @param queryString + * @return + */ + public List getUnarchivedDatasetVersions(){ + + String queryString = "SELECT OBJECT(o) FROM DatasetVersion AS o WHERE o.releaseTime IS NOT NULL and o.archivalCopyLocation IS NULL"; + + try { + TypedQuery query = em.createQuery(queryString, DatasetVersion.class); + List dsl = query.getResultList(); + return dsl; + + } catch (javax.persistence.NoResultException e) { + logger.log(Level.FINE, "No unarchived DatasetVersions found: {0}", queryString); + return null; + } catch (EJBException e) { + logger.log(Level.WARNING, "EJBException exception: {0}", e.getMessage()); + return null; + } + } // end getUnarchivedDatasetVersions + + /** + * Merges the passed datasetversion to the persistence context. + * @param ver the DatasetVersion whose new state we want to persist. + * @return The managed entity representing {@code ver}. + */ + public DatasetVersion merge( DatasetVersion ver ) { + return em.merge(ver); + } } // end class diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 78ec4a6edb5..e400c5a7659 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1805,31 +1805,37 @@ public Response validateDataFileHashValue(@PathParam("fileId") String fileId) { } - @GET - @Path("/submitDataVersionToArchive/{id}/{version}") - public Response submitDatasetVersionToArchive(@PathParam("id") String dsid, @PathParam("version") String versionNumber) { + @POST + @Path("/submitDatasetVersionToArchive/{id}/{version}") + public Response submitDatasetVersionToArchive(@PathParam("id") String dsid, + @PathParam("version") String versionNumber) { try { AuthenticatedUser au = findAuthenticatedUserOrDie(); - // Note - the user is being set in the session so it becomes part of the - // DataverseRequest and is sent to the back-end command where it is used to get - // the API Token which is then used to retrieve files (e.g. via S3 direct - // downloads) to create the Bag + if (!au.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + // Note - the user is being set in the session so it becomes part of the + // DataverseRequest and is sent to the back-end command where it is used to get + // the API Token which is then used to retrieve files (e.g. via S3 direct + // downloads) to create the Bag session.setUser(au); // TODO: Stop using session. Use createDataverseRequest instead. Dataset ds = findDatasetOrDie(dsid); DatasetVersion dv = datasetversionService.findByFriendlyVersionNumber(ds.getId(), versionNumber); if (dv.getArchivalCopyLocation() == null) { String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName); - AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dv); + AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, + dvRequestService.getDataverseRequest(), dv); if (cmd != null) { new Thread(new Runnable() { public void run() { try { DatasetVersion dv = commandEngine.submit(cmd); if (dv.getArchivalCopyLocation() != null) { - logger.info("DatasetVersion id=" + ds.getGlobalId().toString() + " v" + versionNumber + " submitted to Archive at: " - + dv.getArchivalCopyLocation()); + logger.info( + "DatasetVersion id=" + ds.getGlobalId().toString() + " v" + versionNumber + + " submitted to Archive at: " + dv.getArchivalCopyLocation()); } else { logger.severe("Error submitting version due to conflict/error at Archive"); } @@ -1838,7 +1844,8 @@ public void run() { } } }).start(); - return ok("Archive submission using " + cmd.getClass().getCanonicalName() + " started. Processing can take significant time for large datasets. View log and/or check archive for results."); + return ok("Archive submission using " + cmd.getClass().getCanonicalName() + + " started. Processing can take significant time for large datasets. View log and/or check archive for results."); } else { logger.log(Level.SEVERE, "Could not find Archiver class: " + className); return error(Status.INTERNAL_SERVER_ERROR, "Could not find Archiver class: " + className); @@ -1850,6 +1857,96 @@ public void run() { return error(Status.UNAUTHORIZED, "api key required"); } } + + + /** + * Iteratively archives all unarchived dataset versions + * @param + * listonly - don't archive, just list unarchived versions + * limit - max number to process + * lastestonly - only archive the latest versions + * @return + */ + @POST + @Path("/archiveAllUnarchivedDatasetVersions") + public Response archiveAllUnarchivedDatasetVersions(@QueryParam("listonly") boolean listonly, @QueryParam("limit") Integer limit, @QueryParam("latestonly") boolean latestonly) { + + try { + AuthenticatedUser au = findAuthenticatedUserOrDie(); + if (!au.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + // Note - the user is being set in the session so it becomes part of the + // DataverseRequest and is sent to the back-end command where it is used to get + // the API Token which is then used to retrieve files (e.g. via S3 direct + // downloads) to create the Bag + session.setUser(au); + List dsl = datasetversionService.getUnarchivedDatasetVersions(); + if (dsl != null) { + if (listonly) { + JsonArrayBuilder jab = Json.createArrayBuilder(); + logger.info("Unarchived versions found: "); + int current = 0; + for (DatasetVersion dv : dsl) { + if (limit != null && current >= limit) { + break; + } + if (!latestonly || dv.equals(dv.getDataset().getLatestVersionForCopy())) { + jab.add(dv.getDataset().getGlobalId().toString() + ", v" + dv.getFriendlyVersionNumber()); + logger.info(" " + dv.getDataset().getGlobalId().toString() + ", v" + dv.getFriendlyVersionNumber()); + current++; + } + } + return ok(jab); + } + String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName); + AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dsl.get(0)); + final DataverseRequest request = dvRequestService.getDataverseRequest(); + if (cmd != null) { + new Thread(new Runnable() { + public void run() { + int total = dsl.size(); + int successes = 0; + int failures = 0; + for (DatasetVersion dv : dsl) { + if (limit != null && (successes + failures) >= limit) { + break; + } + if (!latestonly || dv.equals(dv.getDataset().getLatestVersionForCopy())) { + try { + AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, request, dv); + + dv = commandEngine.submit(cmd); + if (!dv.getArchivalCopyLocation().equals("Attempted")) { + successes++; + logger.info("DatasetVersion id=" + dv.getDataset().getGlobalId().toString() + " v" + dv.getFriendlyVersionNumber() + " submitted to Archive at: " + + dv.getArchivalCopyLocation()); + } else { + failures++; + logger.severe("Error submitting version due to conflict/error at Archive for " + dv.getDataset().getGlobalId().toString() + " v" + dv.getFriendlyVersionNumber()); + } + } catch (CommandException ex) { + failures++; + logger.log(Level.SEVERE, "Unexpected Exception calling submit archive command", ex); + } + } + logger.fine(successes + failures + " of " + total + " archive submissions complete"); + } + logger.info("Archiving complete: " + successes + " Successes, " + failures + " Failures. See prior log messages for details."); + } + }).start(); + return ok("Archiving all unarchived published dataset versions using " + cmd.getClass().getCanonicalName() + ". Processing can take significant time for large datasets/ large numbers of dataset versions. View log and/or check archive for results."); + } else { + logger.log(Level.SEVERE, "Could not find Archiver class: " + className); + return error(Status.INTERNAL_SERVER_ERROR, "Could not find Archiver class: " + className); + } + } else { + return error(Status.BAD_REQUEST, "No unarchived published dataset versions found"); + } + } catch (WrappedResponse e1) { + return error(Status.UNAUTHORIZED, "api key required"); + } + } @DELETE @Path("/clearMetricsCache") From 6ea48782fed8938e4eb856b0f2148b54c926e6ed Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 15 Jul 2022 13:18:48 -0400 Subject: [PATCH 02/16] drop line removal --- .../engine/command/impl/DuraCloudSubmitToArchiveCommand.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java index 17f5b252930..f30183663e6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java @@ -42,6 +42,7 @@ public class DuraCloudSubmitToArchiveCommand extends AbstractSubmitToArchiveComm private static final String DURACLOUD_HOST = ":DuraCloudHost"; private static final String DURACLOUD_CONTEXT = ":DuraCloudContext"; + public DuraCloudSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); } From 5f4d9656305c1cb50748fa382e87eb0c38eb2e18 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 15 Jul 2022 13:31:02 -0400 Subject: [PATCH 03/16] drop superuser req as this is admin and command already requires perm --- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index e400c5a7659..88295d83193 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1812,9 +1812,6 @@ public Response submitDatasetVersionToArchive(@PathParam("id") String dsid, try { AuthenticatedUser au = findAuthenticatedUserOrDie(); - if (!au.isSuperuser()) { - return error(Response.Status.FORBIDDEN, "Superusers only."); - } // Note - the user is being set in the session so it becomes part of the // DataverseRequest and is sent to the back-end command where it is used to get // the API Token which is then used to retrieve files (e.g. via S3 direct @@ -1873,9 +1870,7 @@ public Response archiveAllUnarchivedDatasetVersions(@QueryParam("listonly") bool try { AuthenticatedUser au = findAuthenticatedUserOrDie(); - if (!au.isSuperuser()) { - return error(Response.Status.FORBIDDEN, "Superusers only."); - } + // Note - the user is being set in the session so it becomes part of the // DataverseRequest and is sent to the back-end command where it is used to get // the API Token which is then used to retrieve files (e.g. via S3 direct From 2357dd2afb1d808e431deb424790ad5bd98f4cdc Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 15 Jul 2022 13:37:28 -0400 Subject: [PATCH 04/16] add doc for batch archiving command --- .../source/installation/config.rst | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 5c227417271..b85a02aa71c 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1156,12 +1156,12 @@ For example: .. _Archiving API Call: -API Call -++++++++ +API Calls ++++++++++ -Once this configuration is complete, you, as a user with the *PublishDataset* permission, should be able to use the API call to manually submit a DatasetVersion for processing: +Once this configuration is complete, you, as a user with the *PublishDataset* permission, should be able to use the admin API call to manually submit a DatasetVersion for processing: -``curl -H "X-Dataverse-key: " http://localhost:8080/api/admin/submitDataVersionToArchive/{id}/{version}`` +``curl -X POST -H "X-Dataverse-key: " http://localhost:8080/api/admin/submitDatasetVersionToArchive/{id}/{version}`` where: @@ -1169,10 +1169,20 @@ where: ``{version}`` is the friendly version number, e.g. "1.2". -The submitDataVersionToArchive API (and the workflow discussed below) attempt to archive the dataset version via an archive specific method. For Chronopolis, a DuraCloud space named for the dataset (it's DOI with ':' and '.' replaced with '-') is created and two files are uploaded to it: a version-specific datacite.xml metadata file and a BagIt bag containing the data and an OAI-ORE map file. (The datacite.xml file, stored outside the Bag as well as inside is intended to aid in discovery while the ORE map file is 'complete', containing all user-entered metadata and is intended as an archival record.) +The submitDatasetVersionToArchive API (and the workflow discussed below) attempt to archive the dataset version via an archive specific method. For Chronopolis, a DuraCloud space named for the dataset (it's DOI with ':' and '.' replaced with '-') is created and two files are uploaded to it: a version-specific datacite.xml metadata file and a BagIt bag containing the data and an OAI-ORE map file. (The datacite.xml file, stored outside the Bag as well as inside is intended to aid in discovery while the ORE map file is 'complete', containing all user-entered metadata and is intended as an archival record.) In the Chronopolis case, since the transfer from the DuraCloud front-end to archival storage in Chronopolis can take significant time, it is currently up to the admin/curator to submit a 'snap-shot' of the space within DuraCloud and to monitor its successful transfer. Once transfer is complete the space should be deleted, at which point the Dataverse Software API call can be used to submit a Bag for other versions of the same Dataset. (The space is reused, so that archival copies of different Dataset versions correspond to different snapshots of the same DuraCloud space.). +A batch version of this admin api call is also available: + +``curl -X POST -H "X-Dataverse-key: " http://localhost:8080/api/admin/archiveAllUnarchivedDatasetVersions?listonly=true&limit=10&latestonly=true`` + +The archiveAllUnarchivedDatasetVersions call takes 3 optional configuration parameters. +* listonly=true will cause the API to list dataset versions that would be archived but will not take any action. +* limit= will limit the number of dataset versions archived in one api call to <= . +* latestonly=true will limit archiving to only the latest published versions of datasets instead of archiving all unarchived versions. + + PostPublication Workflow ++++++++++++++++++++++++ @@ -2566,7 +2576,7 @@ Number of errors to display to the user when creating DataFiles from a file uplo .. _:BagItHandlerEnabled: :BagItHandlerEnabled -+++++++++++++++++++++ +++++++++++++++++++++ Part of the database settings to configure the BagIt file handler. Enables the BagIt file handler. By default, the handler is disabled. From e2bb433a117935f84f9a9d3655b7b78fe6514995 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 15 Jul 2022 15:17:37 -0400 Subject: [PATCH 05/16] clarify archival bag language --- doc/sphinx-guides/source/installation/config.rst | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index b85a02aa71c..f54028fa345 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1063,7 +1063,9 @@ BagIt file handler configuration settings: BagIt Export ------------ -Your Dataverse installation may be configured to submit a copy of published Datasets, packaged as `Research Data Alliance conformant `_ zipped `BagIt `_ bags to `Chronopolis `_ via `DuraCloud `_ or alternately to any folder on the local filesystem. +Your Dataverse installation may be configured to submit a copy of published Datasets, packaged as `Research Data Alliance conformant `_ zipped `BagIt `_ archival Bags (sometimes called BagPacks) to `Chronopolis `_ via `DuraCloud `_ or alternately to any folder on the local filesystem. + +These archival Bags include all of the files and metadata in a given dataset version and are sufficient to recreate the dataset, e.g. in a new Dataverse instance, or postentially in another RDA-conformant repository. The Dataverse Software offers an internal archive workflow which may be configured as a PostPublication workflow via an admin API call to manually submit previously published Datasets and prior versions to a configured archive such as Chronopolis. The workflow creates a `JSON-LD `_ serialized `OAI-ORE `_ map file, which is also available as a metadata export format in the Dataverse Software web interface. @@ -1074,7 +1076,7 @@ At present, the DPNSubmitToArchiveCommand, LocalSubmitToArchiveCommand, and Goog Duracloud Configuration +++++++++++++++++++++++ -Also note that while the current Chronopolis implementation generates the bag and submits it to the archive's DuraCloud interface, the step to make a 'snapshot' of the space containing the Bag (and verify it's successful submission) are actions a curator must take in the DuraCloud interface. +Also note that while the current Chronopolis implementation generates the archival Bag and submits it to the archive's DuraCloud interface, the step to make a 'snapshot' of the space containing the archival Bag (and verify it's successful submission) are actions a curator must take in the DuraCloud interface. The minimal configuration to support an archiver integration involves adding a minimum of two Dataverse Software Keys and any required Payara jvm options. The example instructions here are specific to the DuraCloud Archiver\: @@ -1098,7 +1100,7 @@ It also can use one setting that is common to all Archivers: :BagGeneratorThread ``curl http://localhost:8080/api/admin/settings/:BagGeneratorThreads -X PUT -d '8'`` -By default, the Bag generator zips two datafiles at a time when creating the Bag. This setting can be used to lower that to 1, i.e. to decrease system load, or to increase it, e.g. to 4 or 8, to speed processing of many small files. +By default, the Bag generator zips two datafiles at a time when creating the archival Bag. This setting can be used to lower that to 1, i.e. to decrease system load, or to increase it, e.g. to 4 or 8, to speed processing of many small files. Archivers may require JVM options as well. For the Chronopolis archiver, the username and password associated with your organization's Chronopolis/DuraCloud account should be configured in Payara: @@ -1115,7 +1117,7 @@ ArchiverClassName - the fully qualified class to be used for archiving. For exam ``curl -X PUT -d "edu.harvard.iq.dataverse.engine.command.impl.LocalSubmitToArchiveCommand" http://localhost:8080/api/admin/settings/:ArchiverClassName`` -\:BagItLocalPath - the path to where you want to store BagIt. For example\: +\:BagItLocalPath - the path to where you want to store the archival Bags. For example\: ``curl -X PUT -d /home/path/to/storage http://localhost:8080/api/admin/settings/:BagItLocalPath`` @@ -1130,7 +1132,7 @@ ArchiverClassName - the fully qualified class to be used for archiving. For exam Google Cloud Configuration ++++++++++++++++++++++++++ -The Google Cloud Archiver can send Dataverse Project Bags to a bucket in Google's cloud, including those in the 'Coldline' storage class (cheaper, with slower access) +The Google Cloud Archiver can send archival Bags to a bucket in Google's cloud, including those in the 'Coldline' storage class (cheaper, with slower access) ``curl http://localhost:8080/api/admin/settings/:ArchiverClassName -X PUT -d "edu.harvard.iq.dataverse.engine.command.impl.GoogleCloudSubmitToArchiveCommand"`` From 5d3f6f564b62ff307719d8b80c7c0a7457a16f1e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 21 Jul 2022 19:21:54 -0400 Subject: [PATCH 06/16] Change to named query --- .../java/edu/harvard/iq/dataverse/DatasetVersion.java | 9 +++++++++ .../harvard/iq/dataverse/DatasetVersionServiceBean.java | 9 +++------ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index faa91b87e12..254340116d6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -39,6 +39,8 @@ import javax.persistence.Index; import javax.persistence.JoinColumn; import javax.persistence.ManyToOne; +import javax.persistence.NamedQueries; +import javax.persistence.NamedQuery; import javax.persistence.OneToMany; import javax.persistence.OneToOne; import javax.persistence.OrderBy; @@ -59,6 +61,13 @@ * * @author skraffmiller */ + +@NamedQueries({ + @NamedQuery(name = "DatasetVersion.findUnarchivedReleasedVersions", + query = "SELECT OBJECT(o) FROM DatasetVersion AS o WHERE o.releaseTime IS NOT NULL and o.archivalCopyLocation IS NULL" + )}) + + @Entity @Table(indexes = {@Index(columnList="dataset_id")}, uniqueConstraints = @UniqueConstraint(columnNames = {"dataset_id,versionnumber,minorversionnumber"})) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index b4d12d33a04..24f0b273d84 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -1195,15 +1195,12 @@ private DatasetVersion getPreviousVersionWithUnf(DatasetVersion datasetVersion) */ public List getUnarchivedDatasetVersions(){ - String queryString = "SELECT OBJECT(o) FROM DatasetVersion AS o WHERE o.releaseTime IS NOT NULL and o.archivalCopyLocation IS NULL"; - try { - TypedQuery query = em.createQuery(queryString, DatasetVersion.class); - List dsl = query.getResultList(); + @SuppressWarnings("unchecked") + List dsl = em.createNamedQuery("DatasetVersion.findUnarchivedReleasedVersion").getResultList(); return dsl; - } catch (javax.persistence.NoResultException e) { - logger.log(Level.FINE, "No unarchived DatasetVersions found: {0}", queryString); + logger.log(Level.FINE, "No unarchived DatasetVersions found: {0}"); return null; } catch (EJBException e) { logger.log(Level.WARNING, "EJBException exception: {0}", e.getMessage()); From 5fe18a7fd38ff9749aa686e73677c62016613728 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 21 Jul 2022 19:22:14 -0400 Subject: [PATCH 07/16] fix toDos re: createDataverseRequest() --- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 88295d83193..f74c75b974a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -105,9 +105,6 @@ import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.rolesToJson; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.toJsonArray; -import java.math.BigDecimal; - - import java.util.ArrayList; import java.util.Arrays; import java.util.Date; @@ -1816,14 +1813,13 @@ public Response submitDatasetVersionToArchive(@PathParam("id") String dsid, // DataverseRequest and is sent to the back-end command where it is used to get // the API Token which is then used to retrieve files (e.g. via S3 direct // downloads) to create the Bag - session.setUser(au); // TODO: Stop using session. Use createDataverseRequest instead. Dataset ds = findDatasetOrDie(dsid); DatasetVersion dv = datasetversionService.findByFriendlyVersionNumber(ds.getId(), versionNumber); if (dv.getArchivalCopyLocation() == null) { String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName); AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, - dvRequestService.getDataverseRequest(), dv); + createDataverseRequest(au), dv); if (cmd != null) { new Thread(new Runnable() { public void run() { @@ -1895,8 +1891,8 @@ public Response archiveAllUnarchivedDatasetVersions(@QueryParam("listonly") bool return ok(jab); } String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName); - AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dsl.get(0)); - final DataverseRequest request = dvRequestService.getDataverseRequest(); + final DataverseRequest request = createDataverseRequest(au); + AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, request, dsl.get(0)); if (cmd != null) { new Thread(new Runnable() { public void run() { From 86162de200ce6854cdcee6a5f65942856cf16ab6 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 21 Jul 2022 19:23:16 -0400 Subject: [PATCH 08/16] remove call to set session user --- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index f74c75b974a..0fb36049402 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1871,7 +1871,6 @@ public Response archiveAllUnarchivedDatasetVersions(@QueryParam("listonly") bool // DataverseRequest and is sent to the back-end command where it is used to get // the API Token which is then used to retrieve files (e.g. via S3 direct // downloads) to create the Bag - session.setUser(au); List dsl = datasetversionService.getUnarchivedDatasetVersions(); if (dsl != null) { if (listonly) { From ccb86538f1f913493b0828bdb9af2017d7323dfe Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 25 Jul 2022 16:00:51 -0400 Subject: [PATCH 09/16] use class --- .../edu/harvard/iq/dataverse/DatasetVersionServiceBean.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index f9a6014c45a..23fc1961b7d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -1205,8 +1205,7 @@ public DatasetVersion merge( DatasetVersion ver ) { public List getUnarchivedDatasetVersions(){ try { - @SuppressWarnings("unchecked") - List dsl = em.createNamedQuery("DatasetVersion.findUnarchivedReleasedVersion").getResultList(); + List dsl = em.createNamedQuery("DatasetVersion.findUnarchivedReleasedVersion", DatasetVersion.class).getResultList(); return dsl; } catch (javax.persistence.NoResultException e) { logger.log(Level.FINE, "No unarchived DatasetVersions found: {0}"); From 17baefc21e114501495169a1fee108ac84e8f178 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 25 Jul 2022 16:08:14 -0400 Subject: [PATCH 10/16] Add comment per review --- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 0fb36049402..87a8663f017 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1891,8 +1891,13 @@ public Response archiveAllUnarchivedDatasetVersions(@QueryParam("listonly") bool } String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName); final DataverseRequest request = createDataverseRequest(au); + // createSubmitToArchiveCommand() tries to find and instantiate an non-abstract + // implementation of AbstractSubmitToArchiveCommand based on the provided + // className. If a class with that name isn't found (or can't be instatiated, it + // will return null AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, request, dsl.get(0)); if (cmd != null) { + //Found an archiver to use new Thread(new Runnable() { public void run() { int total = dsl.size(); From 82d9e457ef25526b5a3a8077a3305e983ffe7384 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 26 Jul 2022 16:18:10 -0400 Subject: [PATCH 11/16] update/remove obsolete comments --- .../edu/harvard/iq/dataverse/api/Admin.java | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 87a8663f017..23c65a0f67c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1809,17 +1809,21 @@ public Response submitDatasetVersionToArchive(@PathParam("id") String dsid, try { AuthenticatedUser au = findAuthenticatedUserOrDie(); - // Note - the user is being set in the session so it becomes part of the - // DataverseRequest and is sent to the back-end command where it is used to get - // the API Token which is then used to retrieve files (e.g. via S3 direct - // downloads) to create the Bag + Dataset ds = findDatasetOrDie(dsid); DatasetVersion dv = datasetversionService.findByFriendlyVersionNumber(ds.getId(), versionNumber); if (dv.getArchivalCopyLocation() == null) { String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName); + // Note - the user is being sent via the createDataverseRequest(au) call to the + // back-end command where it is used to get the API Token which is + // then used to retrieve files (e.g. via S3 direct downloads) to create the Bag AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, createDataverseRequest(au), dv); + // createSubmitToArchiveCommand() tries to find and instantiate an non-abstract + // implementation of AbstractSubmitToArchiveCommand based on the provided + // className. If a class with that name isn't found (or can't be instatiated), it + // will return null if (cmd != null) { new Thread(new Runnable() { public void run() { @@ -1867,10 +1871,6 @@ public Response archiveAllUnarchivedDatasetVersions(@QueryParam("listonly") bool try { AuthenticatedUser au = findAuthenticatedUserOrDie(); - // Note - the user is being set in the session so it becomes part of the - // DataverseRequest and is sent to the back-end command where it is used to get - // the API Token which is then used to retrieve files (e.g. via S3 direct - // downloads) to create the Bag List dsl = datasetversionService.getUnarchivedDatasetVersions(); if (dsl != null) { if (listonly) { @@ -1890,6 +1890,9 @@ public Response archiveAllUnarchivedDatasetVersions(@QueryParam("listonly") bool return ok(jab); } String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName); + // Note - the user is being sent via the createDataverseRequest(au) call to the + // back-end command where it is used to get the API Token which is + // then used to retrieve files (e.g. via S3 direct downloads) to create the Bag final DataverseRequest request = createDataverseRequest(au); // createSubmitToArchiveCommand() tries to find and instantiate an non-abstract // implementation of AbstractSubmitToArchiveCommand based on the provided From abd392319709024396e3f0a546b7239e7ae66953 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 29 Jul 2022 13:06:01 -0400 Subject: [PATCH 12/16] updates for archival status (missed/lost) --- .../java/edu/harvard/iq/dataverse/DatasetPage.java | 11 ++++++++--- .../java/edu/harvard/iq/dataverse/api/Admin.java | 12 ++++++------ .../java/edu/harvard/iq/dataverse/api/Datasets.java | 3 +-- .../impl/DuraCloudSubmitToArchiveCommand.java | 2 +- 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index b56c3197ea8..bce2cd9d59a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -2754,7 +2754,7 @@ public String updateCurrentVersion() { */ try { updateVersion = commandEngine.submit(archiveCommand); - if (updateVersion.getArchivalCopyLocation() != null) { + if (!updateVersion.getArchivalCopyLocationStatus().equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)) { successMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.success"); } else { errorMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure"); @@ -5555,9 +5555,14 @@ public void archiveVersion(Long id) { if (cmd != null) { try { DatasetVersion version = commandEngine.submit(cmd); - logger.info("Archived to " + version.getArchivalCopyLocation()); + if (!version.getArchivalCopyLocationStatus().equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)) { + logger.info( + "DatasetVersion id=" + version.getId() + " submitted to Archive, status: " + dv.getArchivalCopyLocationStatus()); + } else { + logger.severe("Error submitting version " + version.getId() + " due to conflict/error at Archive"); + } if (version.getArchivalCopyLocation() != null) { - resetVersionTabList(); + setVersionTabList(resetVersionTabList()); this.setVersionTabListForPostLoad(getVersionTabList()); JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("datasetversion.archive.success")); } else { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index f4f53b6ca17..d6dc93f2199 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1836,10 +1836,10 @@ public Response submitDatasetVersionToArchive(@PathParam("id") String dsid, public void run() { try { DatasetVersion dv = commandEngine.submit(cmd); - if (dv.getArchivalCopyLocation() != null) { + if (!dv.getArchivalCopyLocationStatus().equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)) { logger.info( "DatasetVersion id=" + ds.getGlobalId().toString() + " v" + versionNumber - + " submitted to Archive at: " + dv.getArchivalCopyLocation()); + + " submitted to Archive, status: " + dv.getArchivalCopyLocationStatus()); } else { logger.severe("Error submitting version due to conflict/error at Archive"); } @@ -1855,7 +1855,7 @@ public void run() { return error(Status.INTERNAL_SERVER_ERROR, "Could not find Archiver class: " + className); } } else { - return error(Status.BAD_REQUEST, "Version already archived at: " + dv.getArchivalCopyLocation()); + return error(Status.BAD_REQUEST, "Version was already submitted for archiving."); } } catch (WrappedResponse e1) { return error(Status.UNAUTHORIZED, "api key required"); @@ -1922,10 +1922,10 @@ public void run() { AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, request, dv); dv = commandEngine.submit(cmd); - if (!dv.getArchivalCopyLocation().equals("Attempted")) { + if (!dv.getArchivalCopyLocationStatus().equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)) { successes++; - logger.info("DatasetVersion id=" + dv.getDataset().getGlobalId().toString() + " v" + dv.getFriendlyVersionNumber() + " submitted to Archive at: " - + dv.getArchivalCopyLocation()); + logger.info("DatasetVersion id=" + dv.getDataset().getGlobalId().toString() + " v" + dv.getFriendlyVersionNumber() + " submitted to Archive, status: " + + dv.getArchivalCopyLocationStatus()); } else { failures++; logger.severe("Error submitting version due to conflict/error at Archive for " + dv.getDataset().getGlobalId().toString() + " v" + dv.getFriendlyVersionNumber()); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 15b3cd2b9db..7941dfd70c8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1153,7 +1153,7 @@ public Response publishDataset(@PathParam("id") String id, @QueryParam("type") S */ try { updateVersion = commandEngine.submit(archiveCommand); - if (updateVersion.getArchivalCopyLocation() != null) { + if (!updateVersion.getArchivalCopyLocationStatus().equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE)) { successMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.success"); } else { successMsg = BundleUtil.getStringFromBundle("datasetversion.update.archive.failure"); @@ -3352,7 +3352,6 @@ public Response setDatasetVersionArchivalStatus(@PathParam("id") String datasetI dsv.setArchivalCopyLocation(JsonUtil.prettyPrint(update)); dsv = datasetversionService.merge(dsv); - logger.fine("location now: " + dsv.getArchivalCopyLocation()); logger.fine("status now: " + dsv.getArchivalCopyLocationStatus()); logger.fine("message now: " + dsv.getArchivalCopyLocationMessage()); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java index d37d9e655b0..c7da2247a31 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java @@ -63,7 +63,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t // ToDo - change after HDC 3A changes to status reporting // This will make the archivalCopyLocation non-null after a failure which should // stop retries - dv.setArchivalCopyLocation("Attempted"); + if (dataset.getLockFor(Reason.finalizePublication) == null && dataset.getLockFor(Reason.FileValidationFailed) == null) { // Use Duracloud client classes to login From 53dc116ea49c548c4dfdc379f131bb08e06ace1c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 29 Jul 2022 14:26:23 -0400 Subject: [PATCH 13/16] typo --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 9c07ebdb36d..b51e298efea 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -64,7 +64,7 @@ */ @NamedQueries({ - @NamedQuery(name = "DatasetVersion.findUnarchivedReleasedVersions", + @NamedQuery(name = "DatasetVersion.findUnarchivedReleasedVersion", query = "SELECT OBJECT(o) FROM DatasetVersion AS o WHERE o.releaseTime IS NOT NULL and o.archivalCopyLocation IS NULL" )}) From 2eed04b6f5f604b4e43699ef0f587d97681df364 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 5 Aug 2022 14:13:41 -0400 Subject: [PATCH 14/16] don't archive harvested datasets --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index b51e298efea..30815c43381 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -65,7 +65,7 @@ @NamedQueries({ @NamedQuery(name = "DatasetVersion.findUnarchivedReleasedVersion", - query = "SELECT OBJECT(o) FROM DatasetVersion AS o WHERE o.releaseTime IS NOT NULL and o.archivalCopyLocation IS NULL" + query = "SELECT OBJECT(o) FROM DatasetVersion AS o WHERE o.dataset.harvestedFrom IS NULL and o.releaseTime IS NOT NULL and o.archivalCopyLocation IS NULL" )}) From bae701104aef4a3b0f606c0dc0b675023188bb31 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 5 Aug 2022 17:03:03 -0400 Subject: [PATCH 15/16] lower list-only logging to fine --- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index d6dc93f2199..35cc1a1c501 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1882,7 +1882,7 @@ public Response archiveAllUnarchivedDatasetVersions(@QueryParam("listonly") bool if (dsl != null) { if (listonly) { JsonArrayBuilder jab = Json.createArrayBuilder(); - logger.info("Unarchived versions found: "); + logger.fine("Unarchived versions found: "); int current = 0; for (DatasetVersion dv : dsl) { if (limit != null && current >= limit) { @@ -1890,7 +1890,7 @@ public Response archiveAllUnarchivedDatasetVersions(@QueryParam("listonly") bool } if (!latestonly || dv.equals(dv.getDataset().getLatestVersionForCopy())) { jab.add(dv.getDataset().getGlobalId().toString() + ", v" + dv.getFriendlyVersionNumber()); - logger.info(" " + dv.getDataset().getGlobalId().toString() + ", v" + dv.getFriendlyVersionNumber()); + logger.fine(" " + dv.getDataset().getGlobalId().toString() + ", v" + dv.getFriendlyVersionNumber()); current++; } } From 4215ec5a32d69089f5ed018f8b176e2064d3d252 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 8 Aug 2022 14:50:55 -0400 Subject: [PATCH 16/16] doc /api response changes per QA --- doc/sphinx-guides/source/installation/config.rst | 6 ++++-- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index dfa49aa4011..5725cafa193 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -943,7 +943,7 @@ Some external tools are also ready to be translated, especially if they are usin Tools for Translators -+++++++++++++++++++++++++++++++++++++++++++++++++++++++ ++++++++++++++++++++++ The list below depicts a set of tools that can be used to ease the amount of work necessary for translating the Dataverse software by facilitating this collaborative effort and enabling the reuse of previous work: @@ -1189,13 +1189,15 @@ In the Chronopolis case, since the transfer from the DuraCloud front-end to arch A batch version of this admin api call is also available: -``curl -X POST -H "X-Dataverse-key: " http://localhost:8080/api/admin/archiveAllUnarchivedDatasetVersions?listonly=true&limit=10&latestonly=true`` +``curl -X POST -H "X-Dataverse-key: " 'http://localhost:8080/api/admin/archiveAllUnarchivedDatasetVersions?listonly=true&limit=10&latestonly=true'`` The archiveAllUnarchivedDatasetVersions call takes 3 optional configuration parameters. * listonly=true will cause the API to list dataset versions that would be archived but will not take any action. * limit= will limit the number of dataset versions archived in one api call to <= . * latestonly=true will limit archiving to only the latest published versions of datasets instead of archiving all unarchived versions. +Note that because archiving is done asynchronously, the calls above will return OK even if the user does not have the *PublishDataset* permission on the dataset(s) involved. Failures are indocated in the log and the archivalStatus calls in the native api can be used to check the status as well. + PostPublication Workflow ++++++++++++++++++++++++ diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 35cc1a1c501..ef08444af69 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1849,7 +1849,7 @@ public void run() { } }).start(); return ok("Archive submission using " + cmd.getClass().getCanonicalName() - + " started. Processing can take significant time for large datasets. View log and/or check archive for results."); + + " started. Processing can take significant time for large datasets and requires that the user have permission to publish the dataset. View log and/or check archive for results."); } else { logger.log(Level.SEVERE, "Could not find Archiver class: " + className); return error(Status.INTERNAL_SERVER_ERROR, "Could not find Archiver class: " + className); @@ -1940,7 +1940,7 @@ public void run() { logger.info("Archiving complete: " + successes + " Successes, " + failures + " Failures. See prior log messages for details."); } }).start(); - return ok("Archiving all unarchived published dataset versions using " + cmd.getClass().getCanonicalName() + ". Processing can take significant time for large datasets/ large numbers of dataset versions. View log and/or check archive for results."); + return ok("Starting to archive all unarchived published dataset versions using " + cmd.getClass().getCanonicalName() + ". Processing can take significant time for large datasets/ large numbers of dataset versions and requires that the user have permission to publish the dataset(s). View log and/or check archive for results."); } else { logger.log(Level.SEVERE, "Could not find Archiver class: " + className); return error(Status.INTERNAL_SERVER_ERROR, "Could not find Archiver class: " + className);