From 6eae5e4fec9be0435a91921881e0a64fab46dffd Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 21 Dec 2020 15:12:32 -0500 Subject: [PATCH 01/47] implement batch processing of new versions to archive --- .../dataverse/DatasetVersionServiceBean.java | 26 ++++++++- .../edu/harvard/iq/dataverse/api/Admin.java | 57 +++++++++++++++++++ 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index e4eb6aac88e..ea6a05a2c3c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -987,7 +987,7 @@ public List> getBasicDatasetVersionInfo(Dataset dataset) - public HashMap getFileMetadataHistory(DataFile df){ + public HashMap getFileMetadataHistory(DataFile df){ if (df == null){ throw new NullPointerException("DataFile 'df' cannot be null"); @@ -1165,4 +1165,28 @@ private DatasetVersion getPreviousVersionWithUnf(DatasetVersion datasetVersion) return null; } + /** + * Execute a query to return DatasetVersion + * + * @param queryString + * @return + */ + public List getUnarchivedDatasetVersions(){ + + String queryString = "select * from datasetversion where releasetime is not null and archivalcopylocation is null;"; + + try{ + TypedQuery query = em.createQuery(queryString, DatasetVersion.class); + List dsl = query.getResultList(); + return dsl; + + } catch (javax.persistence.NoResultException e) { + logger.log(Level.FINE, "No unarchived DatasetVersions found: {0}", queryString); + return null; + } catch (EJBException e) { + logger.log(Level.WARNING, "EJBException exception: {0}", e.getMessage()); + return null; + } + } // end getUnarchivedDatasetVersions + } // end class diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index b52665a7747..81fe1ecd2a9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1722,6 +1722,63 @@ public void run() { } } + + @GET + @Path("/archiveAllUnarchivedDataVersions") + public Response archiveAllUnarchivedDatasetVersions() { + + try { + AuthenticatedUser au = findAuthenticatedUserOrDie(); + // Note - the user is being set in the session so it becomes part of the + // DataverseRequest and is sent to the back-end command where it is used to get + // the API Token which is then used to retrieve files (e.g. via S3 direct + // downloads) to create the Bag + session.setUser(au); + List dsl = datasetversionService.getUnarchivedDatasetVersions(); + if (dsl != null) { + String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName); + AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dsl.get(0)); + + if (cmd != null) { + new Thread(new Runnable() { + public void run() { + int total = dsl.size(); + int successes = 0; + int failures = 0; + for (DatasetVersion dv : dsl) { + try { + AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dv); + + dv = commandEngine.submit(cmd); + if (dv.getArchivalCopyLocation() != null) { + successes++; + logger.info("DatasetVersion id=" + dv.getDataset().getGlobalId().toString() + " v" + dv.getFriendlyVersionNumber() + " submitted to Archive at: " + + dv.getArchivalCopyLocation()); + } else { + failures++; + logger.severe("Error submitting version due to conflict/error at Archive for " + dv.getDataset().getGlobalId().toString() + " v" + dv.getFriendlyVersionNumber()); + } + } catch (CommandException ex) { + logger.log(Level.SEVERE, "Unexpected Exception calling submit archive command", ex); + } + logger.fine(successes + failures + " of " + total + " archive submissions complete"); + } + logger.info("Archiving complete: " + successes + " Successes, " + failures + " Failures. See prior log messages for details."); + } + }).start(); + return ok("Archiving all unarchived published dataset versions using " + cmd.getClass().getCanonicalName() + ". Processing can take significant time for large datasets/ large numbers of dataset versions. View log and/or check archive for results."); + } else { + logger.log(Level.SEVERE, "Could not find Archiver class: " + className); + return error(Status.INTERNAL_SERVER_ERROR, "Could not find Archiver class: " + className); + } + } else { + return error(Status.BAD_REQUEST, "No unarchived published dataset versions found"); + } + } catch (WrappedResponse e1) { + return error(Status.UNAUTHORIZED, "api key required"); + } + } + @DELETE @Path("/clearMetricsCache") public Response clearMetricsCache() { From 8313404e6604daba3ee53d32d9b09e83ebaae9f2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 21 Dec 2020 15:26:19 -0500 Subject: [PATCH 02/47] add listonly and limit options, count commandEx as failure --- .../edu/harvard/iq/dataverse/api/Admin.java | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 81fe1ecd2a9..3c61d2e8919 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -46,6 +46,7 @@ import javax.json.JsonArrayBuilder; import javax.json.JsonObjectBuilder; import javax.ws.rs.DELETE; +import javax.ws.rs.DefaultValue; import javax.ws.rs.GET; import javax.ws.rs.POST; import javax.ws.rs.PUT; @@ -1723,9 +1724,16 @@ public void run() { } + /** + * Iteratively archives all unarchived dataset versions + * @param + * listonly - don't archive, just list unarchived versions + * limit - max number to process + * @return + */ @GET @Path("/archiveAllUnarchivedDataVersions") - public Response archiveAllUnarchivedDatasetVersions() { + public Response archiveAllUnarchivedDatasetVersions(@QueryParam("listonly") boolean listonly, @QueryParam("limit") Integer limit) { try { AuthenticatedUser au = findAuthenticatedUserOrDie(); @@ -1736,6 +1744,16 @@ public Response archiveAllUnarchivedDatasetVersions() { session.setUser(au); List dsl = datasetversionService.getUnarchivedDatasetVersions(); if (dsl != null) { + if (listonly) { + logger.info("Unarchived versions found: "); + int current = 0; + for (DatasetVersion dv : dsl) { + if (limit != null && current > limit) { + break; + } + logger.info(" " + dv.getDataset().getGlobalId().toString() + ", v" + dv.getFriendlyVersionNumber()); + } + } String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName); AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dsl.get(0)); @@ -1746,6 +1764,9 @@ public void run() { int successes = 0; int failures = 0; for (DatasetVersion dv : dsl) { + if (limit != null && (successes + failures) > limit) { + break; + } try { AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dv); @@ -1759,6 +1780,7 @@ public void run() { logger.severe("Error submitting version due to conflict/error at Archive for " + dv.getDataset().getGlobalId().toString() + " v" + dv.getFriendlyVersionNumber()); } } catch (CommandException ex) { + failures++; logger.log(Level.SEVERE, "Unexpected Exception calling submit archive command", ex); } logger.fine(successes + failures + " of " + total + " archive submissions complete"); From 70d923ae08b80d6248acc062ec836ed5812fa645 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 21 Dec 2020 15:36:50 -0500 Subject: [PATCH 03/47] send list in response for listonly --- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 3c61d2e8919..4fd3f43b127 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1745,14 +1745,17 @@ public Response archiveAllUnarchivedDatasetVersions(@QueryParam("listonly") bool List dsl = datasetversionService.getUnarchivedDatasetVersions(); if (dsl != null) { if (listonly) { + JsonArrayBuilder jab = Json.createArrayBuilder(); logger.info("Unarchived versions found: "); int current = 0; for (DatasetVersion dv : dsl) { if (limit != null && current > limit) { break; } + jab.add(dv.getDataset().getGlobalId().toString() + ", v" + dv.getFriendlyVersionNumber()); logger.info(" " + dv.getDataset().getGlobalId().toString() + ", v" + dv.getFriendlyVersionNumber()); } + return ok(jab); } String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName); AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dsl.get(0)); From 96d3723307c26668e5687f4ba61fb80d0d207a16 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 21 Dec 2020 15:51:02 -0500 Subject: [PATCH 04/47] fix query --- .../edu/harvard/iq/dataverse/DatasetVersionServiceBean.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index ea6a05a2c3c..344f8af3b87 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -1173,10 +1173,10 @@ private DatasetVersion getPreviousVersionWithUnf(DatasetVersion datasetVersion) */ public List getUnarchivedDatasetVersions(){ - String queryString = "select * from datasetversion where releasetime is not null and archivalcopylocation is null;"; + String queryString = "SELECT OBJECT(o) FROM DatasetVersion AS o WHERE o.releasetime IS NOT NULL and o.archivalcopylocation IS NULL"; try{ - TypedQuery query = em.createQuery(queryString, DatasetVersion.class); + TypedQuery query = em.createQuery(queryString, DatasetVersion.class); List dsl = query.getResultList(); return dsl; From cb9f374e6452cffa5069ef941a0a5f65a8248ca7 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 21 Dec 2020 16:00:54 -0500 Subject: [PATCH 05/47] case sensitive in query --- .../edu/harvard/iq/dataverse/DatasetVersionServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 344f8af3b87..3f46a25c91e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -1173,7 +1173,7 @@ private DatasetVersion getPreviousVersionWithUnf(DatasetVersion datasetVersion) */ public List getUnarchivedDatasetVersions(){ - String queryString = "SELECT OBJECT(o) FROM DatasetVersion AS o WHERE o.releasetime IS NOT NULL and o.archivalcopylocation IS NULL"; + String queryString = "SELECT OBJECT(o) FROM DatasetVersion AS o WHERE o.releaseTime IS NOT NULL and o.archivalCopyLocation IS NULL"; try{ TypedQuery query = em.createQuery(queryString, DatasetVersion.class); From 76e23960219f7cdf0cde5bede1cf8fda55fddd9e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 21 Dec 2020 16:24:13 -0500 Subject: [PATCH 06/47] param to only archive latest version --- .../edu/harvard/iq/dataverse/api/Admin.java | 38 +++++++++++-------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 4fd3f43b127..e06289dfac8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1729,11 +1729,12 @@ public void run() { * @param * listonly - don't archive, just list unarchived versions * limit - max number to process + * lastestonly - only archive the latest versions * @return */ @GET @Path("/archiveAllUnarchivedDataVersions") - public Response archiveAllUnarchivedDatasetVersions(@QueryParam("listonly") boolean listonly, @QueryParam("limit") Integer limit) { + public Response archiveAllUnarchivedDatasetVersions(@QueryParam("listonly") boolean listonly, @QueryParam("limit") Integer limit, @QueryParam("latestonly") boolean latestonly) { try { AuthenticatedUser au = findAuthenticatedUserOrDie(); @@ -1752,8 +1753,11 @@ public Response archiveAllUnarchivedDatasetVersions(@QueryParam("listonly") bool if (limit != null && current > limit) { break; } - jab.add(dv.getDataset().getGlobalId().toString() + ", v" + dv.getFriendlyVersionNumber()); - logger.info(" " + dv.getDataset().getGlobalId().toString() + ", v" + dv.getFriendlyVersionNumber()); + if (!latestonly || dv.equals(dv.getDataset().getLatestVersionForCopy())) { + jab.add(dv.getDataset().getGlobalId().toString() + ", v" + dv.getFriendlyVersionNumber()); + logger.info(" " + dv.getDataset().getGlobalId().toString() + ", v" + dv.getFriendlyVersionNumber()); + current++; + } } return ok(jab); } @@ -1770,21 +1774,23 @@ public void run() { if (limit != null && (successes + failures) > limit) { break; } - try { - AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dv); - - dv = commandEngine.submit(cmd); - if (dv.getArchivalCopyLocation() != null) { - successes++; - logger.info("DatasetVersion id=" + dv.getDataset().getGlobalId().toString() + " v" + dv.getFriendlyVersionNumber() + " submitted to Archive at: " - + dv.getArchivalCopyLocation()); - } else { + if (!latestonly || dv.equals(dv.getDataset().getLatestVersionForCopy())) { + try { + AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dv); + + dv = commandEngine.submit(cmd); + if (dv.getArchivalCopyLocation() != null) { + successes++; + logger.info("DatasetVersion id=" + dv.getDataset().getGlobalId().toString() + " v" + dv.getFriendlyVersionNumber() + " submitted to Archive at: " + + dv.getArchivalCopyLocation()); + } else { + failures++; + logger.severe("Error submitting version due to conflict/error at Archive for " + dv.getDataset().getGlobalId().toString() + " v" + dv.getFriendlyVersionNumber()); + } + } catch (CommandException ex) { failures++; - logger.severe("Error submitting version due to conflict/error at Archive for " + dv.getDataset().getGlobalId().toString() + " v" + dv.getFriendlyVersionNumber()); + logger.log(Level.SEVERE, "Unexpected Exception calling submit archive command", ex); } - } catch (CommandException ex) { - failures++; - logger.log(Level.SEVERE, "Unexpected Exception calling submit archive command", ex); } logger.fine(successes + failures + " of " + total + " archive submissions complete"); } From 2e8d990ad4b75719c2d8e6b35a0f3d104822f3c3 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 21 Dec 2020 16:41:58 -0500 Subject: [PATCH 07/47] off by one in limit --- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index e06289dfac8..9f819ff13a5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1750,7 +1750,7 @@ public Response archiveAllUnarchivedDatasetVersions(@QueryParam("listonly") bool logger.info("Unarchived versions found: "); int current = 0; for (DatasetVersion dv : dsl) { - if (limit != null && current > limit) { + if (limit != null && current >= limit) { break; } if (!latestonly || dv.equals(dv.getDataset().getLatestVersionForCopy())) { @@ -1771,7 +1771,7 @@ public void run() { int successes = 0; int failures = 0; for (DatasetVersion dv : dsl) { - if (limit != null && (successes + failures) > limit) { + if (limit != null && (successes + failures) >= limit) { break; } if (!latestonly || dv.equals(dv.getDataset().getLatestVersionForCopy())) { From b7968333b5950f44bbf086ebc1d020ee4ca4535f Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 23 Dec 2020 11:52:43 -0500 Subject: [PATCH 08/47] documentation --- doc/sphinx-guides/source/installation/config.rst | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 4a877eabff7..5b9433d7c31 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -866,9 +866,9 @@ For example: ``cp /usr/local/payara5/glassfish/domains/domain1/files/googlecloudkey.json`` -.. _Archiving API Call: +.. _Archiving API Calls: -API Call +API Calls ++++++++ Once this configuration is complete, you, as a user with the *PublishDataset* permission, should be able to use the API call to manually submit a DatasetVersion for processing: @@ -881,6 +881,18 @@ where: ``{version}`` is the friendly version number, e.g. "1.2". +A batch API call is also available that will attempt to archive any currently unarchived dataset versions: + +``curl -H "X-Dataverse-key: " http://localhost:8080/api/admin/archiveAllUnarchivedDataVersions`` + +The call supports three optional query parameters that can be used in combination: + +``listonly={true/false}`` default is false. Using true retrieves the list of unarchived versions but does not attempt to archive any + +``latestonly={true/false}`` default is false. Using true only lists/processes the most recently published version of a given dataset (instead of all published versions) + +``limit={n}`` default is no limit/process all unarchived versions (subject to other parameters). Defines a maximum number of versions to attempt to archive in response to one invocation of the API call. + The submitDataVersionToArchive API (and the workflow discussed below) attempt to archive the dataset version via an archive specific method. For Chronopolis, a DuraCloud space named for the dataset (it's DOI with ':' and '.' replaced with '-') is created and two files are uploaded to it: a version-specific datacite.xml metadata file and a BagIt bag containing the data and an OAI-ORE map file. (The datacite.xml file, stored outside the Bag as well as inside is intended to aid in discovery while the ORE map file is 'complete', containing all user-entered metadata and is intended as an archival record.) In the Chronopolis case, since the transfer from the DuraCloud front-end to archival storage in Chronopolis can take significant time, it is currently up to the admin/curator to submit a 'snap-shot' of the space within DuraCloud and to monitor its successful transfer. Once transfer is complete the space should be deleted, at which point the Dataverse API call can be used to submit a Bag for other versions of the same Dataset. (The space is reused, so that archival copies of different Dataset versions correspond to different snapshots of the same DuraCloud space.). From 006a4baff870ebd1c11c86caaacaf96511fadd0c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Jan 2021 12:28:55 -0500 Subject: [PATCH 09/47] Update doc/sphinx-guides/source/installation/config.rst Co-authored-by: Philip Durbin --- doc/sphinx-guides/source/installation/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 5b9433d7c31..84ec0699d62 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -869,7 +869,7 @@ For example: .. _Archiving API Calls: API Calls -++++++++ ++++++++++ Once this configuration is complete, you, as a user with the *PublishDataset* permission, should be able to use the API call to manually submit a DatasetVersion for processing: From bba8ba0a13703410a9196713c6920150291d4643 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Jan 2021 12:29:20 -0500 Subject: [PATCH 10/47] Update doc/sphinx-guides/source/installation/config.rst Co-authored-by: Philip Durbin --- doc/sphinx-guides/source/installation/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 84ec0699d62..a997f0e353f 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -887,7 +887,7 @@ A batch API call is also available that will attempt to archive any currently un The call supports three optional query parameters that can be used in combination: -``listonly={true/false}`` default is false. Using true retrieves the list of unarchived versions but does not attempt to archive any +``listonly={true/false}`` default is false. Using true retrieves the list of unarchived versions but does not attempt to archive any. ``latestonly={true/false}`` default is false. Using true only lists/processes the most recently published version of a given dataset (instead of all published versions) From 011c97a4b73775cf152e0cf06127d8da9e8d2780 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Jan 2021 12:29:46 -0500 Subject: [PATCH 11/47] Update doc/sphinx-guides/source/installation/config.rst Co-authored-by: Philip Durbin --- doc/sphinx-guides/source/installation/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index a997f0e353f..67ee66af763 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -889,7 +889,7 @@ The call supports three optional query parameters that can be used in combinatio ``listonly={true/false}`` default is false. Using true retrieves the list of unarchived versions but does not attempt to archive any. -``latestonly={true/false}`` default is false. Using true only lists/processes the most recently published version of a given dataset (instead of all published versions) +``latestonly={true/false}`` default is false. Using true only lists/processes the most recently published version of a given dataset (instead of all published versions). ``limit={n}`` default is no limit/process all unarchived versions (subject to other parameters). Defines a maximum number of versions to attempt to archive in response to one invocation of the API call. From 1a1c28ccb7a6c0427f349cd8569c516bca43bf68 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 8 Jan 2021 13:10:22 -0500 Subject: [PATCH 12/47] updates per review --- .../dataverse/DatasetVersionServiceBean.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 3f46a25c91e..33cc236b902 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -986,8 +986,8 @@ public List> getBasicDatasetVersionInfo(Dataset dataset) } // end getBasicDatasetVersionInfo - - public HashMap getFileMetadataHistory(DataFile df){ + //Not used? + public HashMap getFileMetadataHistory(DataFile df){ if (df == null){ throw new NullPointerException("DataFile 'df' cannot be null"); @@ -1175,18 +1175,18 @@ public List getUnarchivedDatasetVersions(){ String queryString = "SELECT OBJECT(o) FROM DatasetVersion AS o WHERE o.releaseTime IS NOT NULL and o.archivalCopyLocation IS NULL"; - try{ + try { TypedQuery query = em.createQuery(queryString, DatasetVersion.class); List dsl = query.getResultList(); return dsl; - + } catch (javax.persistence.NoResultException e) { logger.log(Level.FINE, "No unarchived DatasetVersions found: {0}", queryString); return null; - } catch (EJBException e) { - logger.log(Level.WARNING, "EJBException exception: {0}", e.getMessage()); - return null; - } + } catch (EJBException e) { + logger.log(Level.WARNING, "EJBException exception: {0}", e.getMessage()); + return null; + } } // end getUnarchivedDatasetVersions - + } // end class From 4549f0c224ecde4d099218ecaef430bd598634fb Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 23 Feb 2022 09:52:20 -0500 Subject: [PATCH 13/47] TDL Bundle text --- src/main/java/propertyFiles/Bundle.properties | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 8abca8ff3fd..35d813586ce 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -145,7 +145,7 @@ contact.header=Contact {0} contact.dataverse.header=Email Dataverse Contact contact.dataset.header=Email Dataset Contact contact.to=To -contact.support=Support +contact.support=TDL Dataverse Support contact.from=From contact.from.required=User email is required. contact.from.invalid=Email is invalid. @@ -317,9 +317,9 @@ login.System=Login System login.forgot.text=Forgot your password? login.builtin=Dataverse Account login.institution=Institutional Account -login.institution.blurb=Log in or sign up with your institutional account — more information about account creation. +login.institution.blurb=Log in or sign up with your institutional account — learn more. If you are not affiliated with a TDR member institution (see dropdown menu), please use the Google Login option. login.institution.support.blurbwithLink=Leaving your institution? Please contact {0} for assistance. -login.builtin.credential.usernameOrEmail=Username/Email +login.builtin.credential.usernameOrEmail=Admin ID login.builtin.credential.password=Password login.builtin.invalidUsernameEmailOrPassword=The username, email address, or password you entered is invalid. Need assistance accessing your account? login.signup.blurb=Sign up for a Dataverse account. @@ -335,12 +335,12 @@ login.button.orcid=Create or Connect your ORCID # authentication providers auth.providers.title=Other options auth.providers.tip=You can convert a Dataverse account to use one of the options above. More information about account creation. -auth.providers.title.builtin=Username/Email +auth.providers.title.builtin=Admin ID auth.providers.title.shib=Your Institution auth.providers.title.orcid=ORCID -auth.providers.title.google=Google +auth.providers.title.google=Google (No TDR affiliation) auth.providers.title.github=GitHub -auth.providers.blurb=Log in or sign up with your {0} account — more information about account creation. Having trouble? Please contact {3} for assistance. +auth.providers.blurb=Log in or sign up with your Google account — learn more. If you are not affiliated with a TDR member institution, please use the Google Login option. Having trouble? Please contact {3} for assistance. auth.providers.persistentUserIdName.orcid=ORCID iD auth.providers.persistentUserIdName.github=ID auth.providers.persistentUserIdTooltip.orcid=ORCID provides a persistent digital identifier that distinguishes you from other researchers. @@ -383,7 +383,7 @@ shib.welcomeExistingUserMessageDefaultInstitution=your institution shib.dataverseUsername=Dataverse Username shib.currentDataversePassword=Current Dataverse Password shib.accountInformation=Account Information -shib.offerToCreateNewAccount=This information is provided by your institution and will be used to create your Dataverse account. +shib.offerToCreateNewAccount=Contact your TDR liaison to get help and training. Published content cannot be easily deleted. shib.passwordRejected=Validation Error - Your account can only be converted if you provide the correct password for your existing account. If your existing account has been deactivated by an administrator, you cannot convert your account. # oauth2/firstLogin.xhtml From 56ff7bc1081b639ebac054c2f723f7dcec103285 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 24 Feb 2022 14:39:07 -0500 Subject: [PATCH 14/47] fix thread use of requestscoped service --- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index cd1574074e4..07d7455c20e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1784,7 +1784,7 @@ public void run() { * @return */ @GET - @Path("/archiveAllUnarchivedDataVersions") + @Path("/archiveAllUnarchivedDatasetVersions") public Response archiveAllUnarchivedDatasetVersions(@QueryParam("listonly") boolean listonly, @QueryParam("limit") Integer limit, @QueryParam("latestonly") boolean latestonly) { try { @@ -1814,7 +1814,7 @@ public Response archiveAllUnarchivedDatasetVersions(@QueryParam("listonly") bool } String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName); AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dsl.get(0)); - + final DataverseRequest request = dvRequestService.getDataverseRequest(); if (cmd != null) { new Thread(new Runnable() { public void run() { @@ -1827,7 +1827,7 @@ public void run() { } if (!latestonly || dv.equals(dv.getDataset().getLatestVersionForCopy())) { try { - AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dv); + AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, request, dv); dv = commandEngine.submit(cmd); if (dv.getArchivalCopyLocation() != null) { From 46f8554f951c1449cc6794a5250e11c8e5b868b3 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 24 Feb 2022 14:39:38 -0500 Subject: [PATCH 15/47] update doc to match api call name --- doc/sphinx-guides/source/installation/config.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index d4e0c0d6baa..0e77590125c 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -241,7 +241,7 @@ As for the "Remote only" authentication mode, it means that: - The "builtin" authentication provider has been disabled (:ref:`api-toggle-auth-provider`). Note that disabling the "builtin" authentication provider means that the API endpoint for converting an account from a remote auth provider will not work. Converting directly from one remote authentication provider to another (i.e. from GitHub to Google) is not supported. Conversion from remote is always to "builtin". Then the user initiates a conversion from "builtin" to remote. Note that longer term, the plan is to permit multiple login options to the same Dataverse installation account per https://github.com/IQSS/dataverse/issues/3487 (so all this talk of conversion will be moot) but for now users can only use a single login option, as explained in the :doc:`/user/account` section of the User Guide. In short, "remote only" might work for you if you only plan to use a single remote authentication provider such that no conversion between remote authentication providers will be necessary. File Storage: Using a Local Filesystem and/or Swift and/or object stores ---------------------------------------------------------------------------- +------------------------------------------------------------------------ By default, a Dataverse installation stores all data files (files uploaded by end users) on the filesystem at ``/usr/local/payara5/glassfish/domains/domain1/files``. This path can vary based on answers you gave to the installer (see the :ref:`dataverse-installer` section of the Installation Guide) or afterward by reconfiguring the ``dataverse.files.\.directory`` JVM option described below. @@ -954,7 +954,7 @@ where: A batch API call is also available that will attempt to archive any currently unarchived dataset versions: -``curl -H "X-Dataverse-key: " http://localhost:8080/api/admin/archiveAllUnarchivedDataVersions`` +``curl -H "X-Dataverse-key: " http://localhost:8080/api/admin/archiveAllUnarchivedDatasetVersions`` The call supports three optional query parameters that can be used in combination: From 33b85f4eb5e32b3774df05e9b32b6f91b23be2e0 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 24 Feb 2022 14:40:21 -0500 Subject: [PATCH 16/47] adjust to use a space per dataverse (alias) checks to see if space exists --- .../impl/DuraCloudSubmitToArchiveCommand.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java index 468e99f24c1..17e69d7f356 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java @@ -61,8 +61,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t Credential credential = new Credential(System.getProperty("duracloud.username"), System.getProperty("duracloud.password")); storeManager.login(credential); - - String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') + String spaceName=dataset.getOwner().getAlias(); + String baseFileName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') .replace('.', '-').toLowerCase(); ContentStore store; @@ -75,7 +75,9 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t */ store = storeManager.getPrimaryContentStore(); // Create space to copy archival files to - store.createSpace(spaceName); + if(!store.spaceExists(spaceName)) { + store.createSpace(spaceName); + } DataCitation dc = new DataCitation(dv); Map metadata = dc.getDataCiteMetadata(); String dataciteXml = DOIDataCiteRegisterService.getMetadataFromDvObject( @@ -105,7 +107,7 @@ public void run() { Thread.sleep(10); i++; } - String checksum = store.addContent(spaceName, "datacite.xml", digestInputStream, -1l, null, null, + String checksum = store.addContent(spaceName,baseFileName + "_datacite.xml", digestInputStream, -1l, null, null, null); logger.fine("Content: datacite.xml added with checksum: " + checksum); String localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); @@ -116,7 +118,7 @@ public void run() { } // Store BagIt file - String fileName = spaceName + "v" + dv.getFriendlyVersionNumber() + ".zip"; + String fileName = baseFileName + "v" + dv.getFriendlyVersionNumber() + ".zip"; // Add BagIt ZIP file // Although DuraCloud uses SHA-256 internally, it's API uses MD5 to verify the @@ -194,7 +196,7 @@ public void run() { if (!(1 == dv.getVersion()) || !(0 == dv.getMinorVersionNumber())) { mesg = mesg + ": Prior Version archiving not yet complete?"; } - return new Failure("Unable to create DuraCloud space with name: " + spaceName, mesg); + return new Failure("Unable to create DuraCloud space with name: " + baseFileName, mesg); } catch (NoSuchAlgorithmException e) { logger.severe("MD5 MessageDigest not available!"); } From e205f4b48c64607243812298026c4cfe65c454a0 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 24 Feb 2022 17:34:50 -0500 Subject: [PATCH 17/47] custom version --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 235d1ec0317..c7cf357dd83 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ --> edu.harvard.iq dataverse - 5.9 + 5.9-dev-tdl war dataverse From 30877d783c445543d9341a472c9e1b8f48b440cf Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 31 Mar 2022 13:03:31 -0400 Subject: [PATCH 18/47] Use try with resources to close connections for non-200 status --- .../iq/dataverse/util/bagit/BagGenerator.java | 73 ++++++++++--------- 1 file changed, 39 insertions(+), 34 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index 958e61f33e6..431fef4bf18 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -989,46 +989,51 @@ private HttpGet createNewGetRequest(URI url, String returnType) { return request; } - InputStreamSupplier getInputStreamSupplier(final String uri) { + InputStreamSupplier getInputStreamSupplier(final String uriString) { return new InputStreamSupplier() { public InputStream get() { - int tries = 0; - while (tries < 5) { - try { - logger.fine("Get # " + tries + " for " + uri); - HttpGet getMap = createNewGetRequest(new URI(uri), null); - logger.finest("Retrieving " + tries + ": " + uri); - CloseableHttpResponse response; - //Note - if we ever need to pass an HttpClientContext, we need a new one per thread. - response = client.execute(getMap); - if (response.getStatusLine().getStatusCode() == 200) { - logger.finest("Retrieved: " + uri); - return response.getEntity().getContent(); - } - logger.fine("Status: " + response.getStatusLine().getStatusCode()); - tries++; - - } catch (ClientProtocolException e) { - tries += 5; - // TODO Auto-generated catch block - e.printStackTrace(); - } catch (IOException e) { - // Retry if this is a potentially temporary error such - // as a timeout - tries++; - logger.log(Level.WARNING,"Attempt# " + tries + " : Unable to retrieve file: " + uri, e); - if (tries == 5) { - logger.severe("Final attempt failed for " + uri); + try { + URI uri = new URI(uriString); + + int tries = 0; + while (tries < 5) { + + logger.fine("Get # " + tries + " for " + uriString); + HttpGet getMap = createNewGetRequest(uri, null); + logger.finest("Retrieving " + tries + ": " + uriString); + try (CloseableHttpResponse response = client.execute(getMap)) { + // Note - if we ever need to pass an HttpClientContext, we need a new one per + // thread. + + if (response.getStatusLine().getStatusCode() == 200) { + logger.finest("Retrieved: " + uri); + return response.getEntity().getContent(); + } + logger.fine("Status: " + response.getStatusLine().getStatusCode()); + tries++; + + } catch (ClientProtocolException e) { + tries += 5; + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IOException e) { + // Retry if this is a potentially temporary error such + // as a timeout + tries++; + logger.log(Level.WARNING, "Attempt# " + tries + " : Unable to retrieve file: " + uriString, + e); + if (tries == 5) { + logger.severe("Final attempt failed for " + uriString); + } + e.printStackTrace(); } - e.printStackTrace(); - } catch (URISyntaxException e) { - tries += 5; - // TODO Auto-generated catch block - e.printStackTrace(); } + } catch (URISyntaxException e) { + // TODO Auto-generated catch block + e.printStackTrace(); } - logger.severe("Could not read: " + uri); + logger.severe("Could not read: " + uriString); return null; } }; From 4f354c9b7bac80d6932908dead0ccc44c3bdbae4 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 31 Mar 2022 13:32:36 -0400 Subject: [PATCH 19/47] munge to insure valid spaceName --- .../impl/DuraCloudSubmitToArchiveCommand.java | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java index 17e69d7f356..132309739f8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java @@ -61,7 +61,18 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t Credential credential = new Credential(System.getProperty("duracloud.username"), System.getProperty("duracloud.password")); storeManager.login(credential); - String spaceName=dataset.getOwner().getAlias(); + /* + * Aliases can contain upper case characters which are not allowed in space + * names. Similarly, aliases can contain '_' which isn't allowed in a space + * name. The line below replaces any upper case chars with lowercase and + * replaces any '_' with '.' As written the replaceAll will also change any + * chars not valid in a spaceName to '.' which would avoid code breaking if the + * alias constraints change. That said, this line may map more than one alias to + * the same spaceName, e.g. "test" and "Test" aliases both map to the "test" + * space name. This does not break anything but does potentially put bags from + * more than one collection in the same space. + */ + String spaceName=dataset.getOwner().getAlias().toLowerCase().replaceAll("[^a-z0-9-]", "."); String baseFileName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') .replace('.', '-').toLowerCase(); From eebee9629e98ac595ce1cb5b551e9ce808b0b8ca Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 31 Mar 2022 13:50:18 -0400 Subject: [PATCH 20/47] add STANDARD cookie spec trying to avoid InvalidHeader warnings from S3 calls - interpreting from github.com/aws/aws-sdk-java-v2/issues/2421 --- .../java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index 431fef4bf18..006e7d9b0e4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -47,6 +47,7 @@ import org.apache.commons.compress.utils.IOUtils; import org.apache.commons.text.WordUtils; import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.config.CookieSpecs; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; @@ -89,7 +90,7 @@ public class BagGenerator { private int timeout = 60; private RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) - .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000).build(); + .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000).setCookieSpec(CookieSpecs.STANDARD).build(); protected CloseableHttpClient client; private PoolingHttpClientConnectionManager cm = null; From 3c90bbeebbdab42a9f03bbac45e9ce725cc5f6e8 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 31 Mar 2022 14:51:30 -0400 Subject: [PATCH 21/47] modify zero-file behaviour to include empty manifest --- .../java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index 006e7d9b0e4..6f8dfd689bd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -278,7 +278,8 @@ public boolean generateBag(OutputStream outputStream) throws Exception { } createFileFromString(manifestName, sha1StringBuffer.toString()); } else { - logger.warning("No Hash values sent - Bag File does not meet BagIT specification requirement"); + logger.warning("No Hash values (no files?) sending empty manifest to nominally comply with BagIT specification requirement"); + createFileFromString("manifest-md5.txt", ""); } // bagit.txt - Required by spec createFileFromString("bagit.txt", "BagIt-Version: 1.0\r\nTag-File-Character-Encoding: UTF-8"); From 446063e7948be26b8ab8fd227c1dc5dac710a377 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 31 Mar 2022 14:55:36 -0400 Subject: [PATCH 22/47] set non-null copy location in failure cases to avoid retries --- .../engine/command/impl/DuraCloudSubmitToArchiveCommand.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java index 132309739f8..586cf771672 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java @@ -54,6 +54,9 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String host = requestedSettings.get(DURACLOUD_HOST); if (host != null) { Dataset dataset = dv.getDataset(); + //ToDo - change after HDC 3A changes to status reporting + //This will make the archivalCopyLocation non-null after a failure which should stop retries + dv.setArchivalCopyLocation("Attempted"); if (dataset.getLockFor(Reason.finalizePublication) == null && dataset.getLockFor(Reason.FileValidationFailed) == null) { // Use Duracloud client classes to login From 5b2ef347873177f72db78cb207ffd0718a14f4f2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 29 Mar 2022 20:31:07 -0400 Subject: [PATCH 23/47] Refactor/fix popup logic --- .../harvard/iq/dataverse/util/FileUtil.java | 76 ++++++++----------- 1 file changed, 32 insertions(+), 44 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 62d576193f6..6f6fa895d36 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -1471,73 +1471,61 @@ public static String getCiteDataFileFilename(String fileTitle, FileCitationExten * elaborate on the text "This file cannot be downloaded publicly." */ public static boolean isDownloadPopupRequired(DatasetVersion datasetVersion) { - // Each of these conditions is sufficient reason to have to - // present the user with the popup: - if (datasetVersion == null) { - logger.fine("Download popup required because datasetVersion is null."); - return false; - } - //0. if version is draft then Popup "not required" - if (!datasetVersion.isReleased()) { - logger.fine("Download popup required because datasetVersion has not been released."); - return false; - } - // 1. License and Terms of Use: - if (datasetVersion.getTermsOfUseAndAccess() != null) { - License license = datasetVersion.getTermsOfUseAndAccess().getLicense(); - if ((license == null && StringUtils.isNotBlank(datasetVersion.getTermsOfUseAndAccess().getTermsOfUse())) - || (license != null && !license.isDefault())) { - logger.fine("Download popup required because of license or terms of use."); - return true; - } - - // 2. Terms of Access: - if (!(datasetVersion.getTermsOfUseAndAccess().getTermsOfAccess() == null) && !datasetVersion.getTermsOfUseAndAccess().getTermsOfAccess().equals("")) { - logger.fine("Download popup required because of terms of access."); - return true; - } + logger.fine("Checking if download popup is required."); + Boolean answer = popupDueToStateOrTerms(datasetVersion); + if (answer != null) { + return answer; } - // 3. Guest Book: if (datasetVersion.getDataset() != null && datasetVersion.getDataset().getGuestbook() != null && datasetVersion.getDataset().getGuestbook().isEnabled() && datasetVersion.getDataset().getGuestbook().getDataverse() != null) { logger.fine("Download popup required because of guestbook."); return true; } - logger.fine("Download popup is not required."); return false; } - public static boolean isRequestAccessPopupRequired(DatasetVersion datasetVersion){ - // Each of these conditions is sufficient reason to have to - // present the user with the popup: + public static boolean isRequestAccessPopupRequired(DatasetVersion datasetVersion) { + + Boolean answer = popupDueToStateOrTerms(datasetVersion); + if (answer != null) { + return answer; + } + logger.fine("Request access popup is not required."); + return false; + } + + private static Boolean popupDueToStateOrTerms(DatasetVersion datasetVersion) { + Boolean answer = null; + // Each of these conditions is sufficient reason to have to + // present the user with the popup: if (datasetVersion == null) { - logger.fine("Download popup required because datasetVersion is null."); - return false; + logger.fine("Popup required because datasetVersion is null."); + answer = false; } - //0. if version is draft then Popup "not required" + // 0. if version is draft then Popup "not required" if (!datasetVersion.isReleased()) { - logger.fine("Download popup required because datasetVersion has not been released."); - return false; + logger.fine("Popup required because datasetVersion has not been released."); + answer = false; } // 1. License and Terms of Use: if (datasetVersion.getTermsOfUseAndAccess() != null) { - if (!datasetVersion.getTermsOfUseAndAccess().getLicense().isDefault() + License license = datasetVersion.getTermsOfUseAndAccess().getLicense(); + if ((license != null && !license.isDefault()) && !(datasetVersion.getTermsOfUseAndAccess().getTermsOfUse() == null - || datasetVersion.getTermsOfUseAndAccess().getTermsOfUse().equals(""))) { - logger.fine("Download popup required because of license or terms of use."); - return true; + || datasetVersion.getTermsOfUseAndAccess().getTermsOfUse().equals(""))) { + logger.fine("{opup required because of license or terms of use."); + answer = true; } // 2. Terms of Access: if (!(datasetVersion.getTermsOfUseAndAccess().getTermsOfAccess() == null) && !datasetVersion.getTermsOfUseAndAccess().getTermsOfAccess().equals("")) { - logger.fine("Download popup required because of terms of access."); - return true; + logger.fine("Popup required because of terms of access."); + answer = true; } - } + return answer; - logger.fine("Download popup is not required."); - return false; + } } /** From 917d2d9dd285d8e47dbbe859155efe3cad2f6b4a Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 29 Mar 2022 20:47:12 -0400 Subject: [PATCH 24/47] fix return - not clear why Eclipse didn't flag this --- src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 6f6fa895d36..b5ba114410d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -1523,9 +1523,8 @@ private static Boolean popupDueToStateOrTerms(DatasetVersion datasetVersion) { logger.fine("Popup required because of terms of access."); answer = true; } - return answer; - } + return answer; } /** From 04e4eade3e72b9bde91a66140184eaa49216cb13 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 30 Mar 2022 09:22:02 -0400 Subject: [PATCH 25/47] Use download popup's license test code, cleanup --- .../harvard/iq/dataverse/util/FileUtil.java | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index b5ba114410d..4b451d92a75 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -1496,35 +1496,34 @@ public static boolean isRequestAccessPopupRequired(DatasetVersion datasetVersion } private static Boolean popupDueToStateOrTerms(DatasetVersion datasetVersion) { - Boolean answer = null; + // Each of these conditions is sufficient reason to have to // present the user with the popup: if (datasetVersion == null) { logger.fine("Popup required because datasetVersion is null."); - answer = false; + return false; } // 0. if version is draft then Popup "not required" if (!datasetVersion.isReleased()) { logger.fine("Popup required because datasetVersion has not been released."); - answer = false; + return false; } // 1. License and Terms of Use: if (datasetVersion.getTermsOfUseAndAccess() != null) { License license = datasetVersion.getTermsOfUseAndAccess().getLicense(); - if ((license != null && !license.isDefault()) - && !(datasetVersion.getTermsOfUseAndAccess().getTermsOfUse() == null - || datasetVersion.getTermsOfUseAndAccess().getTermsOfUse().equals(""))) { - logger.fine("{opup required because of license or terms of use."); - answer = true; + if ((license == null && StringUtils.isNotBlank(datasetVersion.getTermsOfUseAndAccess().getTermsOfUse())) + || (license != null && !license.isDefault())) { + logger.fine("Download popup required because of license or terms of use."); + return true; } // 2. Terms of Access: if (!(datasetVersion.getTermsOfUseAndAccess().getTermsOfAccess() == null) && !datasetVersion.getTermsOfUseAndAccess().getTermsOfAccess().equals("")) { logger.fine("Popup required because of terms of access."); - answer = true; + return true; } } - return answer; + return null; } /** From 9a3913dfd2bc6f96ced98b53fdcd5cdbaf8c0720 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 30 Mar 2022 13:02:18 -0400 Subject: [PATCH 26/47] logging/comment updates per review --- .../java/edu/harvard/iq/dataverse/util/FileUtil.java | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 4b451d92a75..8d3d63da99d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -1495,17 +1495,22 @@ public static boolean isRequestAccessPopupRequired(DatasetVersion datasetVersion return false; } + /* Code shared by isDownloadPopupRequired and isRequestAccessPopupRequired. + * + * Returns Boolean to allow null = no decision. This allows the isDownloadPopupRequired method to then add another check w.r.t. guestbooks before returning its value. + * + */ private static Boolean popupDueToStateOrTerms(DatasetVersion datasetVersion) { // Each of these conditions is sufficient reason to have to // present the user with the popup: if (datasetVersion == null) { - logger.fine("Popup required because datasetVersion is null."); + logger.fine("Popup not required because datasetVersion is null."); return false; } // 0. if version is draft then Popup "not required" if (!datasetVersion.isReleased()) { - logger.fine("Popup required because datasetVersion has not been released."); + logger.fine("Popup not required because datasetVersion has not been released."); return false; } // 1. License and Terms of Use: @@ -1513,7 +1518,7 @@ private static Boolean popupDueToStateOrTerms(DatasetVersion datasetVersion) { License license = datasetVersion.getTermsOfUseAndAccess().getLicense(); if ((license == null && StringUtils.isNotBlank(datasetVersion.getTermsOfUseAndAccess().getTermsOfUse())) || (license != null && !license.isDefault())) { - logger.fine("Download popup required because of license or terms of use."); + logger.fine("Popup required because of license or terms of use."); return true; } @@ -1523,6 +1528,7 @@ private static Boolean popupDueToStateOrTerms(DatasetVersion datasetVersion) { return true; } } + //No decision based on the criteria above return null; } From 8e4962107ab888e6364fc15d4b66a326643c1379 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 31 Mar 2022 18:32:35 -0400 Subject: [PATCH 27/47] Try to avoid GC causing connection close --- .../harvard/iq/dataverse/dataaccess/S3AccessIO.java | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index ea19d29b41e..6bf43266ffc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -87,6 +87,7 @@ public class S3AccessIO extends StorageIO { private static HashMap driverClientMap = new HashMap(); private static HashMap driverTMMap = new HashMap(); + private S3Object s3Object=null; public S3AccessIO(T dvObject, DataAccessRequest req, String driverId) { super(dvObject, req, driverId); @@ -276,7 +277,8 @@ public void open(DataAccessOption... options) throws IOException { public InputStream getInputStream() throws IOException { if(super.getInputStream()==null) { try { - setInputStream(s3.getObject(new GetObjectRequest(bucketName, key)).getObjectContent()); + s3Object=s3.getObject(new GetObjectRequest(bucketName, key)); + setInputStream(s3Object.getObjectContent()); } catch (SdkClientException sce) { throw new IOException("Cannot get S3 object " + key + " ("+sce.getMessage()+")"); } @@ -291,6 +293,15 @@ public InputStream getInputStream() throws IOException { return super.getInputStream(); } + /*We're keeping a reference to the s3Object from which the stream comes to avoid it being garbage collected and triggering a + * 'Premature end of Content-Length delimited message body...' error (when GC'd the s3Object closes the connection the stream is using) + * So - here we get rid of our reference to the s3Object before calling the parent class closeInputStream + */ + public void closeInputStream() { + s3Object=null; + super.closeInputStream(); + } + @Override public Channel getChannel() throws IOException { if(super.getChannel()==null) { From 7fc6ba434ae37db04a4d53f60ec99d2daa0c1e73 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 31 Mar 2022 18:34:05 -0400 Subject: [PATCH 28/47] Clearer logging --- .../iq/dataverse/util/bagit/BagGenerator.java | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index 6f8dfd689bd..1380f2565dc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -59,7 +59,7 @@ import org.apache.http.conn.ssl.SSLConnectionSocketFactory; import org.apache.http.conn.ssl.TrustSelfSignedStrategy; import org.apache.http.ssl.SSLContextBuilder; - +import org.apache.http.util.EntityUtils; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; @@ -1007,14 +1007,20 @@ public InputStream get() { try (CloseableHttpResponse response = client.execute(getMap)) { // Note - if we ever need to pass an HttpClientContext, we need a new one per // thread. - - if (response.getStatusLine().getStatusCode() == 200) { + int statusCode= response.getStatusLine().getStatusCode(); + if (statusCode == 200) { logger.finest("Retrieved: " + uri); return response.getEntity().getContent(); } - logger.fine("Status: " + response.getStatusLine().getStatusCode()); - tries++; - + logger.warning("Attempt: " + tries + " - Unexpected Status when retrieving " + uriString + " : " + statusCode); + if(statusCode < 500) { + logger.fine("Will not retry for 40x errors"); + tries +=5; + } else { + tries++; + } + //Shouldn't be needed - leaving until the Premature end of Content-Legnth delimited message body errors are resolved + //EntityUtils.consumeQuietly(response.getEntity()); } catch (ClientProtocolException e) { tries += 5; // TODO Auto-generated catch block From 26bebc8c686339414284f80047ea23341627e4fb Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 Apr 2022 13:26:20 -0400 Subject: [PATCH 29/47] avoid spacename with 'final . followed by a number' --- .../impl/DuraCloudSubmitToArchiveCommand.java | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java index 586cf771672..7bdfd0c573d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java @@ -68,14 +68,18 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t * Aliases can contain upper case characters which are not allowed in space * names. Similarly, aliases can contain '_' which isn't allowed in a space * name. The line below replaces any upper case chars with lowercase and - * replaces any '_' with '.' As written the replaceAll will also change any - * chars not valid in a spaceName to '.' which would avoid code breaking if the - * alias constraints change. That said, this line may map more than one alias to - * the same spaceName, e.g. "test" and "Test" aliases both map to the "test" - * space name. This does not break anything but does potentially put bags from - * more than one collection in the same space. + * replaces any '_' with '.-' . The '-' after the dot assures we don't break the + * rule that + * "The last period in a aspace may not immediately be followed by a number". + * (Although we could check, it seems better to just add '.-' all the time.As + * written the replaceAll will also change any chars not valid in a spaceName to + * '.' which would avoid code breaking if the alias constraints change. That + * said, this line may map more than one alias to the same spaceName, e.g. + * "test" and "Test" aliases both map to the "test" space name. This does not + * break anything but does potentially put bags from more than one collection in + * the same space. */ - String spaceName=dataset.getOwner().getAlias().toLowerCase().replaceAll("[^a-z0-9-]", "."); + String spaceName=dataset.getOwner().getAlias().toLowerCase().replaceAll("[^a-z0-9-]", ".-"); String baseFileName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') .replace('.', '-').toLowerCase(); From 45d6e29055dcba90721eb61733dad03a0adbe20b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sat, 2 Apr 2022 17:55:07 -0400 Subject: [PATCH 30/47] Update to use join, try to provide rollback --- .../impl/DuraCloudSubmitToArchiveCommand.java | 104 +++++++++++------- 1 file changed, 63 insertions(+), 41 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java index 7bdfd0c573d..3ee0fe251a0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java @@ -31,6 +31,7 @@ import org.duracloud.client.ContentStoreManagerImpl; import org.duracloud.common.model.Credential; import org.duracloud.error.ContentStoreException; +import org.primefaces.component.log.Log; @RequiredPermissions(Permission.PublishDataset) public class DuraCloudSubmitToArchiveCommand extends AbstractSubmitToArchiveCommand implements Command { @@ -41,21 +42,26 @@ public class DuraCloudSubmitToArchiveCommand extends AbstractSubmitToArchiveComm private static final String DURACLOUD_PORT = ":DuraCloudPort"; private static final String DURACLOUD_HOST = ":DuraCloudHost"; private static final String DURACLOUD_CONTEXT = ":DuraCloudContext"; + boolean success = false; public DuraCloudSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); } @Override - public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map requestedSettings) { + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, + Map requestedSettings) { - String port = requestedSettings.get(DURACLOUD_PORT) != null ? requestedSettings.get(DURACLOUD_PORT) : DEFAULT_PORT; - String dpnContext = requestedSettings.get(DURACLOUD_CONTEXT) != null ? requestedSettings.get(DURACLOUD_CONTEXT) : DEFAULT_CONTEXT; + String port = requestedSettings.get(DURACLOUD_PORT) != null ? requestedSettings.get(DURACLOUD_PORT) + : DEFAULT_PORT; + String dpnContext = requestedSettings.get(DURACLOUD_CONTEXT) != null ? requestedSettings.get(DURACLOUD_CONTEXT) + : DEFAULT_CONTEXT; String host = requestedSettings.get(DURACLOUD_HOST); if (host != null) { Dataset dataset = dv.getDataset(); - //ToDo - change after HDC 3A changes to status reporting - //This will make the archivalCopyLocation non-null after a failure which should stop retries + // ToDo - change after HDC 3A changes to status reporting + // This will make the archivalCopyLocation non-null after a failure which should + // stop retries dv.setArchivalCopyLocation("Attempted"); if (dataset.getLockFor(Reason.finalizePublication) == null && dataset.getLockFor(Reason.FileValidationFailed) == null) { @@ -79,7 +85,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t * break anything but does potentially put bags from more than one collection in * the same space. */ - String spaceName=dataset.getOwner().getAlias().toLowerCase().replaceAll("[^a-z0-9-]", ".-"); + String spaceName = dataset.getOwner().getAlias().toLowerCase().replaceAll("[^a-z0-9-]", ".-"); String baseFileName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') .replace('.', '-').toLowerCase(); @@ -93,7 +99,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t */ store = storeManager.getPrimaryContentStore(); // Create space to copy archival files to - if(!store.spaceExists(spaceName)) { + if (!store.spaceExists(spaceName)) { store.createSpace(spaceName); } DataCitation dc = new DataCitation(dv); @@ -102,80 +108,99 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t dv.getDataset().getGlobalId().asString(), metadata, dv.getDataset()); MessageDigest messageDigest = MessageDigest.getInstance("MD5"); - try (PipedInputStream dataciteIn = new PipedInputStream(); DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { + try (PipedInputStream dataciteIn = new PipedInputStream(); + DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { // Add datacite.xml file - new Thread(new Runnable() { + Thread dcThread = new Thread(new Runnable() { public void run() { try (PipedOutputStream dataciteOut = new PipedOutputStream(dataciteIn)) { dataciteOut.write(dataciteXml.getBytes(Charset.forName("utf-8"))); dataciteOut.close(); + success=true; } catch (Exception e) { logger.severe("Error creating datacite.xml: " + e.getMessage()); // TODO Auto-generated catch block e.printStackTrace(); - throw new RuntimeException("Error creating datacite.xml: " + e.getMessage()); } } - }).start(); - //Have seen Pipe Closed errors for other archivers when used as a workflow without this delay loop - int i=0; - while(digestInputStream.available()<=0 && i<100) { + }); + dcThread.start(); + // Have seen Pipe Closed errors for other archivers when used as a workflow + // without this delay loop + int i = 0; + while (digestInputStream.available() <= 0 && i < 100) { Thread.sleep(10); i++; } - String checksum = store.addContent(spaceName,baseFileName + "_datacite.xml", digestInputStream, -1l, null, null, - null); + String checksum = store.addContent(spaceName, baseFileName + "_datacite.xml", digestInputStream, + -1l, null, null, null); logger.fine("Content: datacite.xml added with checksum: " + checksum); + dcThread.join(); String localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); - if (!checksum.equals(localchecksum)) { - logger.severe(checksum + " not equal to " + localchecksum); + if (!success || !checksum.equals(localchecksum)) { + logger.severe("Failure on " + baseFileName); + logger.severe(success ? checksum + " not equal to " + localchecksum : "failed to transfer to DuraCloud"); + try { + store.deleteContent(spaceName, baseFileName + "_datacite.xml"); + } catch (ContentStoreException cse) { + logger.warning(cse.getMessage()); + } return new Failure("Error in transferring DataCite.xml file to DuraCloud", "DuraCloud Submission Failure: incomplete metadata transfer"); } // Store BagIt file + success = false; String fileName = baseFileName + "v" + dv.getFriendlyVersionNumber() + ".zip"; // Add BagIt ZIP file // Although DuraCloud uses SHA-256 internally, it's API uses MD5 to verify the // transfer + messageDigest = MessageDigest.getInstance("MD5"); - try (PipedInputStream in = new PipedInputStream(); DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest)) { - new Thread(new Runnable() { + try (PipedInputStream in = new PipedInputStream(); + DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest)) { + Thread bagThread = new Thread(new Runnable() { public void run() { - try (PipedOutputStream out = new PipedOutputStream(in)){ + try (PipedOutputStream out = new PipedOutputStream(in)) { // Generate bag BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); bagger.setAuthenticationKey(token.getTokenString()); bagger.generateBag(out); + success = true; } catch (Exception e) { logger.severe("Error creating bag: " + e.getMessage()); // TODO Auto-generated catch block e.printStackTrace(); - throw new RuntimeException("Error creating bag: " + e.getMessage()); } } - }).start(); - i=0; - while(digestInputStream.available()<=0 && i<100) { + }); + bagThread.start(); + i = 0; + while (digestInputStream.available() <= 0 && i < 100) { Thread.sleep(10); i++; } - checksum = store.addContent(spaceName, fileName, digestInputStream2, -1l, null, null, - null); - logger.fine("Content: " + fileName + " added with checksum: " + checksum); - localchecksum = Hex.encodeHexString(digestInputStream2.getMessageDigest().digest()); - if (!checksum.equals(localchecksum)) { - logger.severe(checksum + " not equal to " + localchecksum); + checksum = store.addContent(spaceName, fileName, digestInputStream2, -1l, null, null, null); + bagThread.join(); + if (success) { + logger.fine("Content: " + fileName + " added with checksum: " + checksum); + localchecksum = Hex.encodeHexString(digestInputStream2.getMessageDigest().digest()); + } + if (!success || !checksum.equals(localchecksum)) { + logger.severe("Failure on " + fileName); + logger.severe(success ? checksum + " not equal to " + localchecksum : "failed to transfer to DuraCloud"); + try { + store.deleteContent(spaceName, fileName); + store.deleteContent(spaceName, baseFileName + "_datacite.xml"); + } catch (ContentStoreException cse) { + logger.warning(cse.getMessage()); + } return new Failure("Error in transferring Zip file to DuraCloud", "DuraCloud Submission Failure: incomplete archive transfer"); } - } catch (RuntimeException rte) { - logger.severe(rte.getMessage()); - return new Failure("Error in generating Bag", - "DuraCloud Submission Failure: archive file not created"); } logger.fine("DuraCloud Submission step: Content Transferred"); @@ -199,10 +224,6 @@ public void run() { e.printStackTrace(); return new Failure("Error in transferring file to DuraCloud", "DuraCloud Submission Failure: archive file not transferred"); - } catch (RuntimeException rte) { - logger.severe(rte.getMessage()); - return new Failure("Error in generating datacite.xml file", - "DuraCloud Submission Failure: metadata file not created"); } catch (InterruptedException e) { logger.warning(e.getLocalizedMessage()); e.printStackTrace(); @@ -219,7 +240,8 @@ public void run() { logger.severe("MD5 MessageDigest not available!"); } } else { - logger.warning("DuraCloud Submision Workflow aborted: Dataset locked for finalizePublication, or because file validation failed"); + logger.warning( + "DuraCloud Submision Workflow aborted: Dataset locked for finalizePublication, or because file validation failed"); return new Failure("Dataset locked"); } return WorkflowStepResult.OK; @@ -227,5 +249,5 @@ public void run() { return new Failure("DuraCloud Submission not configured - no \":DuraCloudHost\"."); } } - + } From acb61d288a781d40ab73f25b95d6eddcafa9c561 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 4 Apr 2022 15:52:29 -0400 Subject: [PATCH 31/47] Add thread control and avoid .- in spaceName which is also prohibited --- .../impl/DuraCloudSubmitToArchiveCommand.java | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java index 3ee0fe251a0..15d20672bff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java @@ -31,7 +31,6 @@ import org.duracloud.client.ContentStoreManagerImpl; import org.duracloud.common.model.Credential; import org.duracloud.error.ContentStoreException; -import org.primefaces.component.log.Log; @RequiredPermissions(Permission.PublishDataset) public class DuraCloudSubmitToArchiveCommand extends AbstractSubmitToArchiveCommand implements Command { @@ -42,6 +41,8 @@ public class DuraCloudSubmitToArchiveCommand extends AbstractSubmitToArchiveComm private static final String DURACLOUD_PORT = ":DuraCloudPort"; private static final String DURACLOUD_HOST = ":DuraCloudHost"; private static final String DURACLOUD_CONTEXT = ":DuraCloudContext"; + private static final int DEFAULT_THREADS = 2; + boolean success = false; public DuraCloudSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { @@ -57,6 +58,15 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String dpnContext = requestedSettings.get(DURACLOUD_CONTEXT) != null ? requestedSettings.get(DURACLOUD_CONTEXT) : DEFAULT_CONTEXT; String host = requestedSettings.get(DURACLOUD_HOST); + int bagThreads = DEFAULT_THREADS; + if (requestedSettings.get(BagGenerator.BAG_GENERATOR_THREADS) != null) { + try { + bagThreads=Integer.valueOf(requestedSettings.get(BagGenerator.BAG_GENERATOR_THREADS)); + } catch (NumberFormatException nfe) { + logger.warning("Can't parse the value of setting " + BagGenerator.BAG_GENERATOR_THREADS + " as an integer - using default:" + DEFAULT_THREADS); + } + } + if (host != null) { Dataset dataset = dv.getDataset(); // ToDo - change after HDC 3A changes to status reporting @@ -85,7 +95,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t * break anything but does potentially put bags from more than one collection in * the same space. */ - String spaceName = dataset.getOwner().getAlias().toLowerCase().replaceAll("[^a-z0-9-]", ".-"); + String spaceName = dataset.getOwner().getAlias().toLowerCase().replaceAll("[^a-z0-9-]", ".dcsafe"); String baseFileName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') .replace('.', '-').toLowerCase(); @@ -167,6 +177,7 @@ public void run() { try (PipedOutputStream out = new PipedOutputStream(in)) { // Generate bag BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); + bagger.setNumConnections(bagThreads); bagger.setAuthenticationKey(token.getTokenString()); bagger.generateBag(out); success = true; From ab8325b298d6fb74f1db3ff404a098aed9517dd6 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 4 Apr 2022 15:54:40 -0400 Subject: [PATCH 32/47] add method to change thread pool size --- .../edu/harvard/iq/dataverse/util/bagit/BagGenerator.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index 1380f2565dc..052e4ccf29b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -116,6 +116,7 @@ public class BagGenerator { private boolean usetemp = false; private int numConnections = 8; + public static final String BAG_GENERATOR_THREADS = ":BagGeneratorThreads"; private OREMap oremap; @@ -1093,4 +1094,8 @@ public void setAuthenticationKey(String tokenString) { apiKey = tokenString; } + public void setNumConnections(int numConnections) { + this.numConnections = numConnections; + } + } \ No newline at end of file From 5cbfd4a98e88a4196c9f021cf8927c6dacb66baf Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 4 Apr 2022 15:56:25 -0400 Subject: [PATCH 33/47] avoid local var --- .../engine/command/impl/DuraCloudSubmitToArchiveCommand.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java index 15d20672bff..838238f4e9e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java @@ -44,7 +44,7 @@ public class DuraCloudSubmitToArchiveCommand extends AbstractSubmitToArchiveComm private static final int DEFAULT_THREADS = 2; boolean success = false; - + int bagThreads = DEFAULT_THREADS; public DuraCloudSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); } @@ -58,7 +58,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String dpnContext = requestedSettings.get(DURACLOUD_CONTEXT) != null ? requestedSettings.get(DURACLOUD_CONTEXT) : DEFAULT_CONTEXT; String host = requestedSettings.get(DURACLOUD_HOST); - int bagThreads = DEFAULT_THREADS; + if (requestedSettings.get(BagGenerator.BAG_GENERATOR_THREADS) != null) { try { bagThreads=Integer.valueOf(requestedSettings.get(BagGenerator.BAG_GENERATOR_THREADS)); From 8f9e7ebc5a420d17f3308d9cd7e9884f9919500b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Apr 2022 14:11:22 -0400 Subject: [PATCH 34/47] don't close response in 200 case --- .../iq/dataverse/util/bagit/BagGenerator.java | 28 ++++++++++++++----- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index 052e4ccf29b..94b4cc1a6c7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.util.bagit; +import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.File; @@ -1005,23 +1006,34 @@ public InputStream get() { logger.fine("Get # " + tries + " for " + uriString); HttpGet getMap = createNewGetRequest(uri, null); logger.finest("Retrieving " + tries + ": " + uriString); - try (CloseableHttpResponse response = client.execute(getMap)) { + CloseableHttpResponse response = null; + try { + response = client.execute(getMap); // Note - if we ever need to pass an HttpClientContext, we need a new one per // thread. - int statusCode= response.getStatusLine().getStatusCode(); + int statusCode = response.getStatusLine().getStatusCode(); if (statusCode == 200) { logger.finest("Retrieved: " + uri); return response.getEntity().getContent(); } - logger.warning("Attempt: " + tries + " - Unexpected Status when retrieving " + uriString + " : " + statusCode); - if(statusCode < 500) { + logger.warning("Attempt: " + tries + " - Unexpected Status when retrieving " + uriString + + " : " + statusCode); + if (statusCode < 500) { logger.fine("Will not retry for 40x errors"); - tries +=5; + tries += 5; } else { tries++; } - //Shouldn't be needed - leaving until the Premature end of Content-Legnth delimited message body errors are resolved - //EntityUtils.consumeQuietly(response.getEntity()); + // Error handling + if (response != null) { + try { + EntityUtils.consumeQuietly(response.getEntity()); + response.close(); + } catch (IOException io) { + logger.warning( + "Exception closing response after status: " + statusCode + " on " + uri); + } + } } catch (ClientProtocolException e) { tries += 5; // TODO Auto-generated catch block @@ -1037,7 +1049,9 @@ public InputStream get() { } e.printStackTrace(); } + } + } catch (URISyntaxException e) { // TODO Auto-generated catch block e.printStackTrace(); From 14cec22f917e9503254888e89694213790bb7bf0 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Apr 2022 14:11:34 -0400 Subject: [PATCH 35/47] add version to datacite file --- .../engine/command/impl/DuraCloudSubmitToArchiveCommand.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java index 838238f4e9e..79c42853706 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java @@ -97,7 +97,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t */ String spaceName = dataset.getOwner().getAlias().toLowerCase().replaceAll("[^a-z0-9-]", ".dcsafe"); String baseFileName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') - .replace('.', '-').toLowerCase(); + .replace('.', '-').toLowerCase() + "v" + dv.getFriendlyVersionNumber(); ContentStore store; try { @@ -163,7 +163,7 @@ public void run() { // Store BagIt file success = false; - String fileName = baseFileName + "v" + dv.getFriendlyVersionNumber() + ".zip"; + String fileName = baseFileName + ".zip"; // Add BagIt ZIP file // Although DuraCloud uses SHA-256 internally, it's API uses MD5 to verify the From 345c97ae756a5eed6349c61a728b04d3b5b0a0d9 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Apr 2022 14:31:09 -0400 Subject: [PATCH 36/47] add _ before version --- .../engine/command/impl/DuraCloudSubmitToArchiveCommand.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java index 79c42853706..b3b303d7407 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java @@ -97,7 +97,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t */ String spaceName = dataset.getOwner().getAlias().toLowerCase().replaceAll("[^a-z0-9-]", ".dcsafe"); String baseFileName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') - .replace('.', '-').toLowerCase() + "v" + dv.getFriendlyVersionNumber(); + .replace('.', '-').toLowerCase() + "_v" + dv.getFriendlyVersionNumber(); ContentStore store; try { From 1be42f5b49e409b0acf37a135f4fc0307c220dc4 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Apr 2022 14:31:26 -0400 Subject: [PATCH 37/47] count success/fail correctly --- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 07d7455c20e..719b4aeb1ba 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1830,7 +1830,7 @@ public void run() { AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, request, dv); dv = commandEngine.submit(cmd); - if (dv.getArchivalCopyLocation() != null) { + if (!dv.getArchivalCopyLocation().equals("Attempted")) { successes++; logger.info("DatasetVersion id=" + dv.getDataset().getGlobalId().toString() + " v" + dv.getFriendlyVersionNumber() + " submitted to Archive at: " + dv.getArchivalCopyLocation()); From 4c87e1e00b68c695b8aaab92efdd7d106c7b81fe Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sat, 9 Apr 2022 18:03:10 -0400 Subject: [PATCH 38/47] removing TDL specific changes/obsolete changes --- modules/dataverse-parent/pom.xml | 2 +- .../java/edu/harvard/iq/dataverse/api/Admin.java | 8 ++++---- .../iq/dataverse/dataaccess/S3AccessIO.java | 13 +------------ .../impl/DuraCloudSubmitToArchiveCommand.java | 4 +--- src/main/java/propertyFiles/Bundle.properties | 14 +++++++------- 5 files changed, 14 insertions(+), 27 deletions(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 9d6584424bd..ab5a915e7e9 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -129,7 +129,7 @@ - 5.10.1-tdl + 5.10.1 11 UTF-8 diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 719b4aeb1ba..7b05d2e9993 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -45,7 +45,6 @@ import javax.json.JsonArrayBuilder; import javax.json.JsonObjectBuilder; import javax.ws.rs.DELETE; -import javax.ws.rs.DefaultValue; import javax.ws.rs.GET; import javax.ws.rs.POST; import javax.ws.rs.PUT; @@ -98,7 +97,6 @@ import java.io.IOException; import java.io.OutputStream; -import edu.harvard.iq.dataverse.util.json.JsonPrinter; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.rolesToJson; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.toJsonArray; @@ -1830,7 +1828,9 @@ public void run() { AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, request, dv); dv = commandEngine.submit(cmd); - if (!dv.getArchivalCopyLocation().equals("Attempted")) { + + //ToDo - Change when status PR is merged - a PENDING or SUCCESS status is a success + if (dv.getArchivalCopyLocation()!=null) { successes++; logger.info("DatasetVersion id=" + dv.getDataset().getGlobalId().toString() + " v" + dv.getFriendlyVersionNumber() + " submitted to Archive at: " + dv.getArchivalCopyLocation()); @@ -1845,7 +1845,7 @@ public void run() { } logger.fine(successes + failures + " of " + total + " archive submissions complete"); } - logger.info("Archiving complete: " + successes + " Successes, " + failures + " Failures. See prior log messages for details."); + logger.info("Archiving complete: " + successes + " Successfully started, " + failures + " Failures. See prior log messages for details."); } }).start(); return ok("Archiving all unarchived published dataset versions using " + cmd.getClass().getCanonicalName() + ". Processing can take significant time for large datasets/ large numbers of dataset versions. View log and/or check archive for results."); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 6bf43266ffc..ea19d29b41e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -87,7 +87,6 @@ public class S3AccessIO extends StorageIO { private static HashMap driverClientMap = new HashMap(); private static HashMap driverTMMap = new HashMap(); - private S3Object s3Object=null; public S3AccessIO(T dvObject, DataAccessRequest req, String driverId) { super(dvObject, req, driverId); @@ -277,8 +276,7 @@ public void open(DataAccessOption... options) throws IOException { public InputStream getInputStream() throws IOException { if(super.getInputStream()==null) { try { - s3Object=s3.getObject(new GetObjectRequest(bucketName, key)); - setInputStream(s3Object.getObjectContent()); + setInputStream(s3.getObject(new GetObjectRequest(bucketName, key)).getObjectContent()); } catch (SdkClientException sce) { throw new IOException("Cannot get S3 object " + key + " ("+sce.getMessage()+")"); } @@ -293,15 +291,6 @@ public InputStream getInputStream() throws IOException { return super.getInputStream(); } - /*We're keeping a reference to the s3Object from which the stream comes to avoid it being garbage collected and triggering a - * 'Premature end of Content-Length delimited message body...' error (when GC'd the s3Object closes the connection the stream is using) - * So - here we get rid of our reference to the s3Object before calling the parent class closeInputStream - */ - public void closeInputStream() { - s3Object=null; - super.closeInputStream(); - } - @Override public Channel getChannel() throws IOException { if(super.getChannel()==null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java index b3b303d7407..324cf268b7d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java @@ -70,9 +70,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t if (host != null) { Dataset dataset = dv.getDataset(); // ToDo - change after HDC 3A changes to status reporting - // This will make the archivalCopyLocation non-null after a failure which should - // stop retries - dv.setArchivalCopyLocation("Attempted"); + dv.setArchivalCopyLocation(null); if (dataset.getLockFor(Reason.finalizePublication) == null && dataset.getLockFor(Reason.FileValidationFailed) == null) { // Use Duracloud client classes to login diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index fb788d42664..9895cffe0e7 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -145,7 +145,7 @@ contact.header=Contact {0} contact.dataverse.header=Email Dataverse Contact contact.dataset.header=Email Dataset Contact contact.to=To -contact.support=TDL Dataverse Support +contact.support=Support contact.from=From contact.from.required=User email is required. contact.from.invalid=Email is invalid. @@ -317,9 +317,9 @@ login.System=Login System login.forgot.text=Forgot your password? login.builtin=Dataverse Account login.institution=Institutional Account -login.institution.blurb=Log in or sign up with your institutional account — learn more. If you are not affiliated with a TDR member institution (see dropdown menu), please use the Google Login option. +login.institution.blurb=Log in or sign up with your institutional account — more information about account creation. login.institution.support.blurbwithLink=Leaving your institution? Please contact {0} for assistance. -login.builtin.credential.usernameOrEmail=Admin ID +login.builtin.credential.usernameOrEmail=Username/Email login.builtin.credential.password=Password login.builtin.invalidUsernameEmailOrPassword=The username, email address, or password you entered is invalid. Need assistance accessing your account? login.signup.blurb=Sign up for a Dataverse account. @@ -335,12 +335,12 @@ login.button.orcid=Create or Connect your ORCID # authentication providers auth.providers.title=Other options auth.providers.tip=You can convert a Dataverse account to use one of the options above. More information about account creation. -auth.providers.title.builtin=Admin ID +auth.providers.title.builtin=Username/Email auth.providers.title.shib=Your Institution auth.providers.title.orcid=ORCID -auth.providers.title.google=Google (No TDR affiliation) +auth.providers.title.google=Google auth.providers.title.github=GitHub -auth.providers.blurb=Log in or sign up with your Google account — learn more. If you are not affiliated with a TDR member institution, please use the Google Login option. Having trouble? Please contact {3} for assistance. +auth.providers.blurb=Log in or sign up with your {0} account — more information about account creation. Having trouble? Please contact {3} for assistance. auth.providers.persistentUserIdName.orcid=ORCID iD auth.providers.persistentUserIdName.github=ID auth.providers.persistentUserIdTooltip.orcid=ORCID provides a persistent digital identifier that distinguishes you from other researchers. @@ -383,7 +383,7 @@ shib.welcomeExistingUserMessageDefaultInstitution=your institution shib.dataverseUsername=Dataverse Username shib.currentDataversePassword=Current Dataverse Password shib.accountInformation=Account Information -shib.offerToCreateNewAccount=Contact your TDR liaison to get help and training. Published content cannot be easily deleted. +shib.offerToCreateNewAccount=This information is provided by your institution and will be used to create your Dataverse account. shib.passwordRejected=Validation Error - Your account can only be converted if you provide the correct password for your existing account. If your existing account has been deactivated by an administrator, you cannot convert your account. # oauth2/firstLogin.xhtml From 3526d66f01d022552c55e357670becc3796d1255 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sat, 9 Apr 2022 18:04:11 -0400 Subject: [PATCH 39/47] change batch call to POST --- doc/sphinx-guides/source/installation/config.rst | 2 +- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 47246cce78f..6a81d7d1f75 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1012,7 +1012,7 @@ where: A batch API call is also available that will attempt to archive any currently unarchived dataset versions: -``curl -H "X-Dataverse-key: " http://localhost:8080/api/admin/archiveAllUnarchivedDatasetVersions`` +``curl -X POST -H "X-Dataverse-key: " http://localhost:8080/api/admin/archiveAllUnarchivedDatasetVersions`` The call supports three optional query parameters that can be used in combination: diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 7b05d2e9993..599f6652133 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1781,7 +1781,7 @@ public void run() { * lastestonly - only archive the latest versions * @return */ - @GET + @POST @Path("/archiveAllUnarchivedDatasetVersions") public Response archiveAllUnarchivedDatasetVersions(@QueryParam("listonly") boolean listonly, @QueryParam("limit") Integer limit, @QueryParam("latestonly") boolean latestonly) { From e4470bf23d7b48e492eb9e61ea6bf43548a5a6a9 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sat, 9 Apr 2022 18:04:39 -0400 Subject: [PATCH 40/47] remove unused import --- .../java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index 94b4cc1a6c7..08a46f523c2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -1,6 +1,5 @@ package edu.harvard.iq.dataverse.util.bagit; -import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.File; From 819447b7d438da447cf0d9f58708bc500f0df74d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sat, 9 Apr 2022 18:09:40 -0400 Subject: [PATCH 41/47] remove broader changes --- .../source/installation/config.rst | 18 +-- .../dataverse/DatasetVersionServiceBean.java | 26 +--- .../edu/harvard/iq/dataverse/api/Admin.java | 90 +----------- .../impl/DuraCloudSubmitToArchiveCommand.java | 139 ++++++------------ 4 files changed, 49 insertions(+), 224 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 6a81d7d1f75..77feed18491 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -995,10 +995,10 @@ For example: ``cp /usr/local/payara5/glassfish/domains/domain1/files/googlecloudkey.json`` -.. _Archiving API Calls: +.. _Archiving API Call: -API Calls -+++++++++ +API Call +++++++++ Once this configuration is complete, you, as a user with the *PublishDataset* permission, should be able to use the API call to manually submit a DatasetVersion for processing: @@ -1010,18 +1010,6 @@ where: ``{version}`` is the friendly version number, e.g. "1.2". -A batch API call is also available that will attempt to archive any currently unarchived dataset versions: - -``curl -X POST -H "X-Dataverse-key: " http://localhost:8080/api/admin/archiveAllUnarchivedDatasetVersions`` - -The call supports three optional query parameters that can be used in combination: - -``listonly={true/false}`` default is false. Using true retrieves the list of unarchived versions but does not attempt to archive any. - -``latestonly={true/false}`` default is false. Using true only lists/processes the most recently published version of a given dataset (instead of all published versions). - -``limit={n}`` default is no limit/process all unarchived versions (subject to other parameters). Defines a maximum number of versions to attempt to archive in response to one invocation of the API call. - The submitDataVersionToArchive API (and the workflow discussed below) attempt to archive the dataset version via an archive specific method. For Chronopolis, a DuraCloud space named for the dataset (it's DOI with ':' and '.' replaced with '-') is created and two files are uploaded to it: a version-specific datacite.xml metadata file and a BagIt bag containing the data and an OAI-ORE map file. (The datacite.xml file, stored outside the Bag as well as inside is intended to aid in discovery while the ORE map file is 'complete', containing all user-entered metadata and is intended as an archival record.) In the Chronopolis case, since the transfer from the DuraCloud front-end to archival storage in Chronopolis can take significant time, it is currently up to the admin/curator to submit a 'snap-shot' of the space within DuraCloud and to monitor its successful transfer. Once transfer is complete the space should be deleted, at which point the Dataverse Software API call can be used to submit a Bag for other versions of the same Dataset. (The space is reused, so that archival copies of different Dataset versions correspond to different snapshots of the same DuraCloud space.). diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 150cd656aed..580d95b4b1d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -1008,7 +1008,7 @@ public List> getBasicDatasetVersionInfo(Dataset dataset) } // end getBasicDatasetVersionInfo - //Not used? + public HashMap getFileMetadataHistory(DataFile df){ if (df == null){ @@ -1187,28 +1187,4 @@ private DatasetVersion getPreviousVersionWithUnf(DatasetVersion datasetVersion) return null; } - /** - * Execute a query to return DatasetVersion - * - * @param queryString - * @return - */ - public List getUnarchivedDatasetVersions(){ - - String queryString = "SELECT OBJECT(o) FROM DatasetVersion AS o WHERE o.releaseTime IS NOT NULL and o.archivalCopyLocation IS NULL"; - - try { - TypedQuery query = em.createQuery(queryString, DatasetVersion.class); - List dsl = query.getResultList(); - return dsl; - - } catch (javax.persistence.NoResultException e) { - logger.log(Level.FINE, "No unarchived DatasetVersions found: {0}", queryString); - return null; - } catch (EJBException e) { - logger.log(Level.WARNING, "EJBException exception: {0}", e.getMessage()); - return null; - } - } // end getUnarchivedDatasetVersions - } // end class diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 599f6652133..4085b504578 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -97,6 +97,7 @@ import java.io.IOException; import java.io.OutputStream; +import edu.harvard.iq.dataverse.util.json.JsonPrinter; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.rolesToJson; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.toJsonArray; @@ -1772,95 +1773,6 @@ public void run() { } } - - /** - * Iteratively archives all unarchived dataset versions - * @param - * listonly - don't archive, just list unarchived versions - * limit - max number to process - * lastestonly - only archive the latest versions - * @return - */ - @POST - @Path("/archiveAllUnarchivedDatasetVersions") - public Response archiveAllUnarchivedDatasetVersions(@QueryParam("listonly") boolean listonly, @QueryParam("limit") Integer limit, @QueryParam("latestonly") boolean latestonly) { - - try { - AuthenticatedUser au = findAuthenticatedUserOrDie(); - // Note - the user is being set in the session so it becomes part of the - // DataverseRequest and is sent to the back-end command where it is used to get - // the API Token which is then used to retrieve files (e.g. via S3 direct - // downloads) to create the Bag - session.setUser(au); - List dsl = datasetversionService.getUnarchivedDatasetVersions(); - if (dsl != null) { - if (listonly) { - JsonArrayBuilder jab = Json.createArrayBuilder(); - logger.info("Unarchived versions found: "); - int current = 0; - for (DatasetVersion dv : dsl) { - if (limit != null && current >= limit) { - break; - } - if (!latestonly || dv.equals(dv.getDataset().getLatestVersionForCopy())) { - jab.add(dv.getDataset().getGlobalId().toString() + ", v" + dv.getFriendlyVersionNumber()); - logger.info(" " + dv.getDataset().getGlobalId().toString() + ", v" + dv.getFriendlyVersionNumber()); - current++; - } - } - return ok(jab); - } - String className = settingsService.getValueForKey(SettingsServiceBean.Key.ArchiverClassName); - AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, dvRequestService.getDataverseRequest(), dsl.get(0)); - final DataverseRequest request = dvRequestService.getDataverseRequest(); - if (cmd != null) { - new Thread(new Runnable() { - public void run() { - int total = dsl.size(); - int successes = 0; - int failures = 0; - for (DatasetVersion dv : dsl) { - if (limit != null && (successes + failures) >= limit) { - break; - } - if (!latestonly || dv.equals(dv.getDataset().getLatestVersionForCopy())) { - try { - AbstractSubmitToArchiveCommand cmd = ArchiverUtil.createSubmitToArchiveCommand(className, request, dv); - - dv = commandEngine.submit(cmd); - - //ToDo - Change when status PR is merged - a PENDING or SUCCESS status is a success - if (dv.getArchivalCopyLocation()!=null) { - successes++; - logger.info("DatasetVersion id=" + dv.getDataset().getGlobalId().toString() + " v" + dv.getFriendlyVersionNumber() + " submitted to Archive at: " - + dv.getArchivalCopyLocation()); - } else { - failures++; - logger.severe("Error submitting version due to conflict/error at Archive for " + dv.getDataset().getGlobalId().toString() + " v" + dv.getFriendlyVersionNumber()); - } - } catch (CommandException ex) { - failures++; - logger.log(Level.SEVERE, "Unexpected Exception calling submit archive command", ex); - } - } - logger.fine(successes + failures + " of " + total + " archive submissions complete"); - } - logger.info("Archiving complete: " + successes + " Successfully started, " + failures + " Failures. See prior log messages for details."); - } - }).start(); - return ok("Archiving all unarchived published dataset versions using " + cmd.getClass().getCanonicalName() + ". Processing can take significant time for large datasets/ large numbers of dataset versions. View log and/or check archive for results."); - } else { - logger.log(Level.SEVERE, "Could not find Archiver class: " + className); - return error(Status.INTERNAL_SERVER_ERROR, "Could not find Archiver class: " + className); - } - } else { - return error(Status.BAD_REQUEST, "No unarchived published dataset versions found"); - } - } catch (WrappedResponse e1) { - return error(Status.UNAUTHORIZED, "api key required"); - } - } - @DELETE @Path("/clearMetricsCache") public Response clearMetricsCache() { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java index 324cf268b7d..468e99f24c1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java @@ -41,36 +41,19 @@ public class DuraCloudSubmitToArchiveCommand extends AbstractSubmitToArchiveComm private static final String DURACLOUD_PORT = ":DuraCloudPort"; private static final String DURACLOUD_HOST = ":DuraCloudHost"; private static final String DURACLOUD_CONTEXT = ":DuraCloudContext"; - private static final int DEFAULT_THREADS = 2; - - boolean success = false; - int bagThreads = DEFAULT_THREADS; + public DuraCloudSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); } @Override - public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, - Map requestedSettings) { + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map requestedSettings) { - String port = requestedSettings.get(DURACLOUD_PORT) != null ? requestedSettings.get(DURACLOUD_PORT) - : DEFAULT_PORT; - String dpnContext = requestedSettings.get(DURACLOUD_CONTEXT) != null ? requestedSettings.get(DURACLOUD_CONTEXT) - : DEFAULT_CONTEXT; + String port = requestedSettings.get(DURACLOUD_PORT) != null ? requestedSettings.get(DURACLOUD_PORT) : DEFAULT_PORT; + String dpnContext = requestedSettings.get(DURACLOUD_CONTEXT) != null ? requestedSettings.get(DURACLOUD_CONTEXT) : DEFAULT_CONTEXT; String host = requestedSettings.get(DURACLOUD_HOST); - - if (requestedSettings.get(BagGenerator.BAG_GENERATOR_THREADS) != null) { - try { - bagThreads=Integer.valueOf(requestedSettings.get(BagGenerator.BAG_GENERATOR_THREADS)); - } catch (NumberFormatException nfe) { - logger.warning("Can't parse the value of setting " + BagGenerator.BAG_GENERATOR_THREADS + " as an integer - using default:" + DEFAULT_THREADS); - } - } - if (host != null) { Dataset dataset = dv.getDataset(); - // ToDo - change after HDC 3A changes to status reporting - dv.setArchivalCopyLocation(null); if (dataset.getLockFor(Reason.finalizePublication) == null && dataset.getLockFor(Reason.FileValidationFailed) == null) { // Use Duracloud client classes to login @@ -78,24 +61,9 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t Credential credential = new Credential(System.getProperty("duracloud.username"), System.getProperty("duracloud.password")); storeManager.login(credential); - /* - * Aliases can contain upper case characters which are not allowed in space - * names. Similarly, aliases can contain '_' which isn't allowed in a space - * name. The line below replaces any upper case chars with lowercase and - * replaces any '_' with '.-' . The '-' after the dot assures we don't break the - * rule that - * "The last period in a aspace may not immediately be followed by a number". - * (Although we could check, it seems better to just add '.-' all the time.As - * written the replaceAll will also change any chars not valid in a spaceName to - * '.' which would avoid code breaking if the alias constraints change. That - * said, this line may map more than one alias to the same spaceName, e.g. - * "test" and "Test" aliases both map to the "test" space name. This does not - * break anything but does potentially put bags from more than one collection in - * the same space. - */ - String spaceName = dataset.getOwner().getAlias().toLowerCase().replaceAll("[^a-z0-9-]", ".dcsafe"); - String baseFileName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') - .replace('.', '-').toLowerCase() + "_v" + dv.getFriendlyVersionNumber(); + + String spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-') + .replace('.', '-').toLowerCase(); ContentStore store; try { @@ -107,109 +75,87 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t */ store = storeManager.getPrimaryContentStore(); // Create space to copy archival files to - if (!store.spaceExists(spaceName)) { - store.createSpace(spaceName); - } + store.createSpace(spaceName); DataCitation dc = new DataCitation(dv); Map metadata = dc.getDataCiteMetadata(); String dataciteXml = DOIDataCiteRegisterService.getMetadataFromDvObject( dv.getDataset().getGlobalId().asString(), metadata, dv.getDataset()); MessageDigest messageDigest = MessageDigest.getInstance("MD5"); - try (PipedInputStream dataciteIn = new PipedInputStream(); - DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { + try (PipedInputStream dataciteIn = new PipedInputStream(); DigestInputStream digestInputStream = new DigestInputStream(dataciteIn, messageDigest)) { // Add datacite.xml file - Thread dcThread = new Thread(new Runnable() { + new Thread(new Runnable() { public void run() { try (PipedOutputStream dataciteOut = new PipedOutputStream(dataciteIn)) { dataciteOut.write(dataciteXml.getBytes(Charset.forName("utf-8"))); dataciteOut.close(); - success=true; } catch (Exception e) { logger.severe("Error creating datacite.xml: " + e.getMessage()); // TODO Auto-generated catch block e.printStackTrace(); + throw new RuntimeException("Error creating datacite.xml: " + e.getMessage()); } } - }); - dcThread.start(); - // Have seen Pipe Closed errors for other archivers when used as a workflow - // without this delay loop - int i = 0; - while (digestInputStream.available() <= 0 && i < 100) { + }).start(); + //Have seen Pipe Closed errors for other archivers when used as a workflow without this delay loop + int i=0; + while(digestInputStream.available()<=0 && i<100) { Thread.sleep(10); i++; } - String checksum = store.addContent(spaceName, baseFileName + "_datacite.xml", digestInputStream, - -1l, null, null, null); + String checksum = store.addContent(spaceName, "datacite.xml", digestInputStream, -1l, null, null, + null); logger.fine("Content: datacite.xml added with checksum: " + checksum); - dcThread.join(); String localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); - if (!success || !checksum.equals(localchecksum)) { - logger.severe("Failure on " + baseFileName); - logger.severe(success ? checksum + " not equal to " + localchecksum : "failed to transfer to DuraCloud"); - try { - store.deleteContent(spaceName, baseFileName + "_datacite.xml"); - } catch (ContentStoreException cse) { - logger.warning(cse.getMessage()); - } + if (!checksum.equals(localchecksum)) { + logger.severe(checksum + " not equal to " + localchecksum); return new Failure("Error in transferring DataCite.xml file to DuraCloud", "DuraCloud Submission Failure: incomplete metadata transfer"); } // Store BagIt file - success = false; - String fileName = baseFileName + ".zip"; + String fileName = spaceName + "v" + dv.getFriendlyVersionNumber() + ".zip"; // Add BagIt ZIP file // Although DuraCloud uses SHA-256 internally, it's API uses MD5 to verify the // transfer - messageDigest = MessageDigest.getInstance("MD5"); - try (PipedInputStream in = new PipedInputStream(); - DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest)) { - Thread bagThread = new Thread(new Runnable() { + try (PipedInputStream in = new PipedInputStream(); DigestInputStream digestInputStream2 = new DigestInputStream(in, messageDigest)) { + new Thread(new Runnable() { public void run() { - try (PipedOutputStream out = new PipedOutputStream(in)) { + try (PipedOutputStream out = new PipedOutputStream(in)){ // Generate bag BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); - bagger.setNumConnections(bagThreads); bagger.setAuthenticationKey(token.getTokenString()); bagger.generateBag(out); - success = true; } catch (Exception e) { logger.severe("Error creating bag: " + e.getMessage()); // TODO Auto-generated catch block e.printStackTrace(); + throw new RuntimeException("Error creating bag: " + e.getMessage()); } } - }); - bagThread.start(); - i = 0; - while (digestInputStream.available() <= 0 && i < 100) { + }).start(); + i=0; + while(digestInputStream.available()<=0 && i<100) { Thread.sleep(10); i++; } - checksum = store.addContent(spaceName, fileName, digestInputStream2, -1l, null, null, null); - bagThread.join(); - if (success) { - logger.fine("Content: " + fileName + " added with checksum: " + checksum); - localchecksum = Hex.encodeHexString(digestInputStream2.getMessageDigest().digest()); - } - if (!success || !checksum.equals(localchecksum)) { - logger.severe("Failure on " + fileName); - logger.severe(success ? checksum + " not equal to " + localchecksum : "failed to transfer to DuraCloud"); - try { - store.deleteContent(spaceName, fileName); - store.deleteContent(spaceName, baseFileName + "_datacite.xml"); - } catch (ContentStoreException cse) { - logger.warning(cse.getMessage()); - } + checksum = store.addContent(spaceName, fileName, digestInputStream2, -1l, null, null, + null); + logger.fine("Content: " + fileName + " added with checksum: " + checksum); + localchecksum = Hex.encodeHexString(digestInputStream2.getMessageDigest().digest()); + if (!checksum.equals(localchecksum)) { + logger.severe(checksum + " not equal to " + localchecksum); return new Failure("Error in transferring Zip file to DuraCloud", "DuraCloud Submission Failure: incomplete archive transfer"); } + } catch (RuntimeException rte) { + logger.severe(rte.getMessage()); + return new Failure("Error in generating Bag", + "DuraCloud Submission Failure: archive file not created"); } logger.fine("DuraCloud Submission step: Content Transferred"); @@ -233,6 +179,10 @@ public void run() { e.printStackTrace(); return new Failure("Error in transferring file to DuraCloud", "DuraCloud Submission Failure: archive file not transferred"); + } catch (RuntimeException rte) { + logger.severe(rte.getMessage()); + return new Failure("Error in generating datacite.xml file", + "DuraCloud Submission Failure: metadata file not created"); } catch (InterruptedException e) { logger.warning(e.getLocalizedMessage()); e.printStackTrace(); @@ -244,13 +194,12 @@ public void run() { if (!(1 == dv.getVersion()) || !(0 == dv.getMinorVersionNumber())) { mesg = mesg + ": Prior Version archiving not yet complete?"; } - return new Failure("Unable to create DuraCloud space with name: " + baseFileName, mesg); + return new Failure("Unable to create DuraCloud space with name: " + spaceName, mesg); } catch (NoSuchAlgorithmException e) { logger.severe("MD5 MessageDigest not available!"); } } else { - logger.warning( - "DuraCloud Submision Workflow aborted: Dataset locked for finalizePublication, or because file validation failed"); + logger.warning("DuraCloud Submision Workflow aborted: Dataset locked for finalizePublication, or because file validation failed"); return new Failure("Dataset locked"); } return WorkflowStepResult.OK; @@ -258,5 +207,5 @@ public void run() { return new Failure("DuraCloud Submission not configured - no \":DuraCloudHost\"."); } } - + } From 48dd0e909d00661e81c2aadd5583a21f315bf105 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sat, 9 Apr 2022 18:24:36 -0400 Subject: [PATCH 42/47] remove all but thread code, add Google/Local archivers --- .../impl/AbstractSubmitToArchiveCommand.java | 14 +++ .../impl/DuraCloudSubmitToArchiveCommand.java | 2 + .../GoogleCloudSubmitToArchiveCommand.java | 1 + .../impl/LocalSubmitToArchiveCommand.java | 1 + .../iq/dataverse/util/bagit/BagGenerator.java | 101 +++++++----------- 5 files changed, 56 insertions(+), 63 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index 77ea680598f..4fa0961d134 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -11,6 +11,7 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.bagit.BagGenerator; import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; import java.util.Date; @@ -24,6 +25,7 @@ public abstract class AbstractSubmitToArchiveCommand extends AbstractCommand requestedSettings = new HashMap(); private static final Logger logger = Logger.getLogger(AbstractSubmitToArchiveCommand.class.getName()); + private static final int DEFAULT_THREADS = 2; public AbstractSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version.getDataset()); @@ -67,6 +69,18 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException { */ abstract public WorkflowStepResult performArchiveSubmission(DatasetVersion version, ApiToken token, Map requestedSetttings); + protected int getNumberOfBagGeneratorThreads() { + if (requestedSettings.get(BagGenerator.BAG_GENERATOR_THREADS) != null) { + try { + return Integer.valueOf(requestedSettings.get(BagGenerator.BAG_GENERATOR_THREADS)); + } catch (NumberFormatException nfe) { + logger.warning("Can't parse the value of setting " + BagGenerator.BAG_GENERATOR_THREADS + + " as an integer - using default:" + DEFAULT_THREADS); + } + } + return DEFAULT_THREADS; + } + @Override public String describe() { return super.describe() + "DatasetVersion: [" + version.getId() + " (v" diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java index 468e99f24c1..f30183663e6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DuraCloudSubmitToArchiveCommand.java @@ -42,6 +42,7 @@ public class DuraCloudSubmitToArchiveCommand extends AbstractSubmitToArchiveComm private static final String DURACLOUD_HOST = ":DuraCloudHost"; private static final String DURACLOUD_CONTEXT = ":DuraCloudContext"; + public DuraCloudSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { super(aRequest, version); } @@ -128,6 +129,7 @@ public void run() { try (PipedOutputStream out = new PipedOutputStream(in)){ // Generate bag BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); + bagger.setNumConnections(getNumberOfBagGeneratorThreads()); bagger.setAuthenticationKey(token.getTokenString()); bagger.generateBag(out); } catch (Exception e) { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java index cb729a9807a..af4c960c2d6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GoogleCloudSubmitToArchiveCommand.java @@ -121,6 +121,7 @@ public void run() { try (PipedOutputStream out = new PipedOutputStream(in)) { // Generate bag BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); + bagger.setNumConnections(getNumberOfBagGeneratorThreads()); bagger.setAuthenticationKey(token.getTokenString()); bagger.generateBag(out); } catch (Exception e) { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java index 64e635c7d3d..b336d9a77f9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LocalSubmitToArchiveCommand.java @@ -58,6 +58,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t new File(localPath + "/" + spaceName + "-datacite.v" + dv.getFriendlyVersionNumber() + ".xml"), dataciteXml, StandardCharsets.UTF_8); BagGenerator bagger = new BagGenerator(new OREMap(dv, false), dataciteXml); + bagger.setNumConnections(getNumberOfBagGeneratorThreads()); bagger.setAuthenticationKey(token.getTokenString()); zipName = localPath + "/" + spaceName + "v" + dv.getFriendlyVersionNumber() + ".zip"; bagger.generateBag(new FileOutputStream(zipName + ".partial")); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index 08a46f523c2..56676e3d00a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -47,7 +47,6 @@ import org.apache.commons.compress.utils.IOUtils; import org.apache.commons.text.WordUtils; import org.apache.http.client.ClientProtocolException; -import org.apache.http.client.config.CookieSpecs; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; @@ -59,7 +58,7 @@ import org.apache.http.conn.ssl.SSLConnectionSocketFactory; import org.apache.http.conn.ssl.TrustSelfSignedStrategy; import org.apache.http.ssl.SSLContextBuilder; -import org.apache.http.util.EntityUtils; + import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; @@ -90,7 +89,7 @@ public class BagGenerator { private int timeout = 60; private RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) - .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000).setCookieSpec(CookieSpecs.STANDARD).build(); + .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000).build(); protected CloseableHttpClient client; private PoolingHttpClientConnectionManager cm = null; @@ -279,8 +278,7 @@ public boolean generateBag(OutputStream outputStream) throws Exception { } createFileFromString(manifestName, sha1StringBuffer.toString()); } else { - logger.warning("No Hash values (no files?) sending empty manifest to nominally comply with BagIT specification requirement"); - createFileFromString("manifest-md5.txt", ""); + logger.warning("No Hash values sent - Bag File does not meet BagIT specification requirement"); } // bagit.txt - Required by spec createFileFromString("bagit.txt", "BagIt-Version: 1.0\r\nTag-File-Character-Encoding: UTF-8"); @@ -992,70 +990,46 @@ private HttpGet createNewGetRequest(URI url, String returnType) { return request; } - InputStreamSupplier getInputStreamSupplier(final String uriString) { + InputStreamSupplier getInputStreamSupplier(final String uri) { return new InputStreamSupplier() { public InputStream get() { - try { - URI uri = new URI(uriString); - - int tries = 0; - while (tries < 5) { - - logger.fine("Get # " + tries + " for " + uriString); - HttpGet getMap = createNewGetRequest(uri, null); - logger.finest("Retrieving " + tries + ": " + uriString); - CloseableHttpResponse response = null; - try { - response = client.execute(getMap); - // Note - if we ever need to pass an HttpClientContext, we need a new one per - // thread. - int statusCode = response.getStatusLine().getStatusCode(); - if (statusCode == 200) { - logger.finest("Retrieved: " + uri); - return response.getEntity().getContent(); - } - logger.warning("Attempt: " + tries + " - Unexpected Status when retrieving " + uriString - + " : " + statusCode); - if (statusCode < 500) { - logger.fine("Will not retry for 40x errors"); - tries += 5; - } else { - tries++; - } - // Error handling - if (response != null) { - try { - EntityUtils.consumeQuietly(response.getEntity()); - response.close(); - } catch (IOException io) { - logger.warning( - "Exception closing response after status: " + statusCode + " on " + uri); - } - } - } catch (ClientProtocolException e) { - tries += 5; - // TODO Auto-generated catch block - e.printStackTrace(); - } catch (IOException e) { - // Retry if this is a potentially temporary error such - // as a timeout - tries++; - logger.log(Level.WARNING, "Attempt# " + tries + " : Unable to retrieve file: " + uriString, - e); - if (tries == 5) { - logger.severe("Final attempt failed for " + uriString); - } - e.printStackTrace(); + int tries = 0; + while (tries < 5) { + try { + logger.fine("Get # " + tries + " for " + uri); + HttpGet getMap = createNewGetRequest(new URI(uri), null); + logger.finest("Retrieving " + tries + ": " + uri); + CloseableHttpResponse response; + //Note - if we ever need to pass an HttpClientContext, we need a new one per thread. + response = client.execute(getMap); + if (response.getStatusLine().getStatusCode() == 200) { + logger.finest("Retrieved: " + uri); + return response.getEntity().getContent(); } - + logger.fine("Status: " + response.getStatusLine().getStatusCode()); + tries++; + + } catch (ClientProtocolException e) { + tries += 5; + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IOException e) { + // Retry if this is a potentially temporary error such + // as a timeout + tries++; + logger.log(Level.WARNING,"Attempt# " + tries + " : Unable to retrieve file: " + uri, e); + if (tries == 5) { + logger.severe("Final attempt failed for " + uri); + } + e.printStackTrace(); + } catch (URISyntaxException e) { + tries += 5; + // TODO Auto-generated catch block + e.printStackTrace(); } - - } catch (URISyntaxException e) { - // TODO Auto-generated catch block - e.printStackTrace(); } - logger.severe("Could not read: " + uriString); + logger.severe("Could not read: " + uri); return null; } }; @@ -1109,6 +1083,7 @@ public void setAuthenticationKey(String tokenString) { public void setNumConnections(int numConnections) { this.numConnections = numConnections; + logger.fine("BagGenerator will use " + numConnections + " threads"); } } \ No newline at end of file From 3dfb0c316227708856f755ab3e5603134e5d6a36 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 12 Apr 2022 19:23:42 -0400 Subject: [PATCH 43/47] document setting/function --- .../source/developers/workflows.rst | 5 +++-- doc/sphinx-guides/source/installation/config.rst | 16 +++++++++++----- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/doc/sphinx-guides/source/developers/workflows.rst b/doc/sphinx-guides/source/developers/workflows.rst index c982edc08bb..c3ad039271f 100644 --- a/doc/sphinx-guides/source/developers/workflows.rst +++ b/doc/sphinx-guides/source/developers/workflows.rst @@ -180,7 +180,7 @@ archiver A step that sends an archival copy of a Dataset Version to a configured archiver, e.g. the DuraCloud interface of Chronopolis. See the `DuraCloud/Chronopolis Integration documentation `_ for further detail. -Note - the example step includes two settings required for any archiver and three (DuraCloud*) that are specific to DuraCloud. +Note - the example step includes two settings required for any archiver, three (DuraCloud*) that are specific to DuraCloud, and the optional BagGeneratorThreads setting that controls parallelism when creating the Bag. .. code:: json @@ -196,7 +196,8 @@ Note - the example step includes two settings required for any archiver and thre ":ArchiverSettings": "string", ":DuraCloudHost":"string", ":DuraCloudPort":"string", - ":DuraCloudContext":"string" + ":DuraCloudContext":"string", + ":BagGeneratorThreads" } } diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 7ed9fe1327d..2f098c25e36 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -932,7 +932,7 @@ The minimal configuration to support an archiver integration involves adding a m \:ArchiverSettings - the archiver class can access required settings including existing Dataverse installation settings and dynamically defined ones specific to the class. This setting is a comma-separated list of those settings. For example\: -``curl http://localhost:8080/api/admin/settings/:ArchiverSettings -X PUT -d ":DuraCloudHost, :DuraCloudPort, :DuraCloudContext"`` +``curl http://localhost:8080/api/admin/settings/:ArchiverSettings -X PUT -d ":DuraCloudHost, :DuraCloudPort, :DuraCloudContext, :BagGeneratorThreads"`` The DPN archiver defines three custom settings, one of which is required (the others have defaults): @@ -942,6 +942,12 @@ The DPN archiver defines three custom settings, one of which is required (the ot :DuraCloudPort and :DuraCloudContext are also defined if you are not using the defaults ("443" and "duracloud" respectively). (Note\: these settings are only in effect if they are listed in the \:ArchiverSettings. Otherwise, they will not be passed to the DuraCloud Archiver class.) +It also can use one setting that is common to all Archivers: :BagGeneratorThreads + +``curl http://localhost:8080/api/admin/settings/:BagGenerator -X PUT -d '8'`` + +By default, the Bag generator zips two datafiles at a time when creating the Bag. This setting can be used to lower that to 1, i.e. to decrease system load, or to increase it, e.g. to 4 or 8, to speed processing of many small files. + Archivers may require JVM options as well. For the Chronopolis archiver, the username and password associated with your organization's Chronopolis/DuraCloud account should be configured in Payara: ``./asadmin create-jvm-options '-Dduracloud.username=YOUR_USERNAME_HERE'`` @@ -963,9 +969,9 @@ ArchiverClassName - the fully qualified class to be used for archiving. For exam \:ArchiverSettings - the archiver class can access required settings including existing Dataverse installation settings and dynamically defined ones specific to the class. This setting is a comma-separated list of those settings. For example\: -``curl http://localhost:8080/api/admin/settings/:ArchiverSettings -X PUT -d ":BagItLocalPath"`` +``curl http://localhost:8080/api/admin/settings/:ArchiverSettings -X PUT -d ":BagItLocalPath, , :BagGeneratorThreads"`` -:BagItLocalPath is the file path that you've set in :ArchiverSettings. +:BagItLocalPath is the file path that you've set in :ArchiverSettings. See the DuraCloud archiver section for a description of :BagGeneratorThreads. .. _Google Cloud Configuration: @@ -976,9 +982,9 @@ The Google Cloud Archiver can send Dataverse Project Bags to a bucket in Google' ``curl http://localhost:8080/api/admin/settings/:ArchiverClassName -X PUT -d "edu.harvard.iq.dataverse.engine.command.impl.GoogleCloudSubmitToArchiveCommand"`` -``curl http://localhost:8080/api/admin/settings/:ArchiverSettings -X PUT -d ":GoogleCloudBucket, :GoogleCloudProject"`` +``curl http://localhost:8080/api/admin/settings/:ArchiverSettings -X PUT -d ":GoogleCloudBucket, :GoogleCloudProject, :BagGeneratorThreads"`` -The Google Cloud Archiver defines two custom settings, both are required. The credentials for your account, in the form of a json key file, must also be obtained and stored locally (see below): +The Google Cloud Archiver defines two custom settings, both are required. It can also use the :BagGeneratorThreads setting as described in the DuraCloud Archiver section above. The credentials for your account, in the form of a json key file, must also be obtained and stored locally (see below): In order to use the Google Cloud Archiver, you must have a Google account. You will need to create a project and bucket within that account and provide those values in the settings: From cd7602b5274ac8d23eff669fbf86ca4eb45a7c8d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 13 Apr 2022 18:23:34 -0400 Subject: [PATCH 44/47] documentation update per review --- doc/sphinx-guides/source/admin/integrations.rst | 12 +++++++----- .../source/developers/workflows.rst | 4 ++-- .../source/installation/config.rst | 17 ++++++++++++----- 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/doc/sphinx-guides/source/admin/integrations.rst b/doc/sphinx-guides/source/admin/integrations.rst index 5ee6372d56d..e748ec13057 100644 --- a/doc/sphinx-guides/source/admin/integrations.rst +++ b/doc/sphinx-guides/source/admin/integrations.rst @@ -12,7 +12,7 @@ Getting Data In A variety of integrations are oriented toward making it easier for your researchers to deposit data into your Dataverse installation. GitHub -+++++++ +++++++ Dataverse integration with GitHub is implemented via a Dataverse Uploader GitHub Action. It is a reusable, composite workflow for uploading a git repository or subdirectory into a dataset on a target Dataverse installation. The action is customizable, allowing users to choose to replace a dataset, add to the dataset, publish it or leave it as a draft version on Dataverse. The action provides some metadata to the dataset, such as the origin GitHub repository, and it preserves the directory tree structure. @@ -157,12 +157,14 @@ Archivematica Sponsored by the `Ontario Council of University Libraries (OCUL) `_, this technical integration enables users of Archivematica to select datasets from connected Dataverse installations and process them for long-term access and digital preservation. For more information and list of known issues, please refer to Artefactual's `release notes `_, `integration documentation `_, and the `project wiki `_. -DuraCloud/Chronopolis -+++++++++++++++++++++ +.. _rda-bagit-archiving: + +RDA BagIt (BagPack) Archiving ++++++++++++++++++++++++++++++ -A Dataverse installation can be configured to submit a copy of published Datasets, packaged as `Research Data Alliance conformant `_ zipped `BagIt `_ bags to the `Chronopolis `_ via `DuraCloud `_ +A Dataverse installation can be configured to submit a copy of published Datasets, packaged as `Research Data Alliance conformant `_ zipped `BagIt `_ bags to the `Chronopolis `_ via `DuraCloud `_, to a local file system, or to `Google Cloud Storage`_. -For details on how to configure this integration, look for "DuraCloud/Chronopolis" in the :doc:`/installation/config` section of the Installation Guide. +For details on how to configure this integration, see :ref:`:BagIt Export` in the :doc:`/installation/config` section of the Installation Guide. Future Integrations ------------------- diff --git a/doc/sphinx-guides/source/developers/workflows.rst b/doc/sphinx-guides/source/developers/workflows.rst index c3ad039271f..5efdb3a5370 100644 --- a/doc/sphinx-guides/source/developers/workflows.rst +++ b/doc/sphinx-guides/source/developers/workflows.rst @@ -178,7 +178,7 @@ Available variables are: archiver ++++++++ -A step that sends an archival copy of a Dataset Version to a configured archiver, e.g. the DuraCloud interface of Chronopolis. See the `DuraCloud/Chronopolis Integration documentation `_ for further detail. +A step that sends an archival copy of a Dataset Version to a configured archiver, e.g. the DuraCloud interface of Chronopolis. See the :ref:`rda-bagit-archiving` for further detail. Note - the example step includes two settings required for any archiver, three (DuraCloud*) that are specific to DuraCloud, and the optional BagGeneratorThreads setting that controls parallelism when creating the Bag. @@ -197,7 +197,7 @@ Note - the example step includes two settings required for any archiver, three ( ":DuraCloudHost":"string", ":DuraCloudPort":"string", ":DuraCloudContext":"string", - ":BagGeneratorThreads" + ":BagGeneratorThreads":"string" } } diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 2f098c25e36..f890f5312ff 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -944,7 +944,7 @@ The DPN archiver defines three custom settings, one of which is required (the ot It also can use one setting that is common to all Archivers: :BagGeneratorThreads -``curl http://localhost:8080/api/admin/settings/:BagGenerator -X PUT -d '8'`` +``curl http://localhost:8080/api/admin/settings/:BagGeneratorThreads -X PUT -d '8'`` By default, the Bag generator zips two datafiles at a time when creating the Bag. This setting can be used to lower that to 1, i.e. to decrease system load, or to increase it, e.g. to 4 or 8, to speed processing of many small files. @@ -969,9 +969,9 @@ ArchiverClassName - the fully qualified class to be used for archiving. For exam \:ArchiverSettings - the archiver class can access required settings including existing Dataverse installation settings and dynamically defined ones specific to the class. This setting is a comma-separated list of those settings. For example\: -``curl http://localhost:8080/api/admin/settings/:ArchiverSettings -X PUT -d ":BagItLocalPath, , :BagGeneratorThreads"`` +``curl http://localhost:8080/api/admin/settings/:ArchiverSettings -X PUT -d ":BagItLocalPath, :BagGeneratorThreads"`` -:BagItLocalPath is the file path that you've set in :ArchiverSettings. See the DuraCloud archiver section for a description of :BagGeneratorThreads. +:BagItLocalPath is the file path that you've set in :ArchiverSettings. See the DuraCloud Configuration section for a description of :BagGeneratorThreads. .. _Google Cloud Configuration: @@ -984,7 +984,7 @@ The Google Cloud Archiver can send Dataverse Project Bags to a bucket in Google' ``curl http://localhost:8080/api/admin/settings/:ArchiverSettings -X PUT -d ":GoogleCloudBucket, :GoogleCloudProject, :BagGeneratorThreads"`` -The Google Cloud Archiver defines two custom settings, both are required. It can also use the :BagGeneratorThreads setting as described in the DuraCloud Archiver section above. The credentials for your account, in the form of a json key file, must also be obtained and stored locally (see below): +The Google Cloud Archiver defines two custom settings, both are required. It can also use the :BagGeneratorThreads setting as described in the DuraCloud Configuration section above. The credentials for your account, in the form of a json key file, must also be obtained and stored locally (see below): In order to use the Google Cloud Archiver, you must have a Google account. You will need to create a project and bucket within that account and provide those values in the settings: @@ -2406,6 +2406,13 @@ For example, the LocalSubmitToArchiveCommand only uses the :BagItLocalPath setti ``curl -X PUT -d ':BagItLocalPath' http://localhost:8080/api/admin/settings/:ArchiverSettings`` +:BagGeneratorThreads +++++++++++++++++++++ + +An archiver setting shared by several implementations (e.g. DuraCloud, Google, and Local) that can make Bag generation use fewer or more threads in zipping datafiles that the default of 2 + +``curl http://localhost:8080/api/admin/settings/:BagGeneratorThreads -X PUT -d '8'`` + :DuraCloudHost ++++++++++++++ :DuraCloudPort @@ -2421,7 +2428,7 @@ These three settings define the host, port, and context used by the DuraCloudSub This is the local file system path to be used with the LocalSubmitToArchiveCommand class. It is recommended to use an absolute path. See the :ref:`Local Path Configuration` section above. :GoogleCloudBucket -++++++++++++++++++ +++++++++++++++++++ :GoogleCloudProject +++++++++++++++++++ From 36aa64ccfcc7e33cf861011860ebc83d13f25dfc Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 14 Apr 2022 10:14:05 -0400 Subject: [PATCH 45/47] add required space --- doc/sphinx-guides/source/admin/integrations.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/admin/integrations.rst b/doc/sphinx-guides/source/admin/integrations.rst index e748ec13057..d958b2d77d6 100644 --- a/doc/sphinx-guides/source/admin/integrations.rst +++ b/doc/sphinx-guides/source/admin/integrations.rst @@ -162,7 +162,7 @@ Sponsored by the `Ontario Council of University Libraries (OCUL) `_ zipped `BagIt `_ bags to the `Chronopolis `_ via `DuraCloud `_, to a local file system, or to `Google Cloud Storage`_. +A Dataverse installation can be configured to submit a copy of published Datasets, packaged as `Research Data Alliance conformant `_ zipped `BagIt `_ bags to the `Chronopolis `_ via `DuraCloud `_, to a local file system, or to `Google Cloud Storage `_. For details on how to configure this integration, see :ref:`:BagIt Export` in the :doc:`/installation/config` section of the Installation Guide. From 109a4a17e1116374cc11eb0a829dcdb42b0fe9db Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 14 Apr 2022 10:45:59 -0400 Subject: [PATCH 46/47] Update doc/sphinx-guides/source/developers/workflows.rst Co-authored-by: Philip Durbin --- doc/sphinx-guides/source/developers/workflows.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/workflows.rst b/doc/sphinx-guides/source/developers/workflows.rst index 5efdb3a5370..df63bf239fe 100644 --- a/doc/sphinx-guides/source/developers/workflows.rst +++ b/doc/sphinx-guides/source/developers/workflows.rst @@ -178,7 +178,7 @@ Available variables are: archiver ++++++++ -A step that sends an archival copy of a Dataset Version to a configured archiver, e.g. the DuraCloud interface of Chronopolis. See the :ref:`rda-bagit-archiving` for further detail. +A step that sends an archival copy of a Dataset Version to a configured archiver, e.g. the DuraCloud interface of Chronopolis. See :ref:`rda-bagit-archiving` for further detail. Note - the example step includes two settings required for any archiver, three (DuraCloud*) that are specific to DuraCloud, and the optional BagGeneratorThreads setting that controls parallelism when creating the Bag. From f6cea7c436ad4654f200c04faa0bbd3dab66987a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 14 Apr 2022 10:57:51 -0400 Subject: [PATCH 47/47] typo - remove : --- doc/sphinx-guides/source/admin/integrations.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/admin/integrations.rst b/doc/sphinx-guides/source/admin/integrations.rst index d958b2d77d6..8d3f53981e0 100644 --- a/doc/sphinx-guides/source/admin/integrations.rst +++ b/doc/sphinx-guides/source/admin/integrations.rst @@ -164,7 +164,7 @@ RDA BagIt (BagPack) Archiving A Dataverse installation can be configured to submit a copy of published Datasets, packaged as `Research Data Alliance conformant `_ zipped `BagIt `_ bags to the `Chronopolis `_ via `DuraCloud `_, to a local file system, or to `Google Cloud Storage `_. -For details on how to configure this integration, see :ref:`:BagIt Export` in the :doc:`/installation/config` section of the Installation Guide. +For details on how to configure this integration, see :ref:`BagIt Export` in the :doc:`/installation/config` section of the Installation Guide. Future Integrations -------------------