From c0c2bf1b7033aa0b3fe705f079f7f10f882c4508 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 14 Jan 2026 13:33:57 -0500 Subject: [PATCH 1/4] add olderThan param, drop global transaction, catch throwable and log --- .../source/admin/metadataexport.rst | 5 ++- .../iq/dataverse/DatasetServiceBean.java | 38 +++++++++++++++++-- .../harvard/iq/dataverse/api/Metadata.java | 18 ++++++--- 3 files changed, 51 insertions(+), 10 deletions(-) diff --git a/doc/sphinx-guides/source/admin/metadataexport.rst b/doc/sphinx-guides/source/admin/metadataexport.rst index 97baf3e0c8e..ab10bb974f1 100644 --- a/doc/sphinx-guides/source/admin/metadataexport.rst +++ b/doc/sphinx-guides/source/admin/metadataexport.rst @@ -22,12 +22,15 @@ In addition to the automated exports, a Dataverse installation admin can start a ``curl http://localhost:8080/api/admin/metadata/reExportAll`` +``curl http://localhost:8080/api/admin/metadata/reExportAll?olderThan=`` + ``curl http://localhost:8080/api/admin/metadata/clearExportTimestamps`` ``curl http://localhost:8080/api/admin/metadata/:persistentId/reExportDataset?persistentId=doi:10.5072/FK2/AAA000`` The first will attempt to export all the published, local (non-harvested) datasets that haven't been exported yet. -The second will *force* a re-export of every published, local dataset, regardless of whether it has already been exported or not. +The second will *force* a re-export of every published, local dataset, regardless of whether it has already been exported or not. +With the optional olderThan query parameter, the second will *force* re-export of all published, local datasets that were last exported before the olderThan date. The first two calls return a status message informing the administrator that the process has been launched (``{"status":"WORKFLOW_IN_PROGRESS"}``). The administrator can check the progress of the process via log files: ``[Payara directory]/glassfish/domains/domain1/logs/export_[time stamp].log``. diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index a58dad4f4c7..7c481541b2f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -698,6 +698,13 @@ public void reExportAllAsync() { exportAllDatasets(true); } + // reExportAll with a date *forces* a reexport on all published datasets that were not exported or were exported before the date; + @Asynchronous + public void reExportAllAsync(Date reExportDate) { + exportAllDatasets(true, reExportDate); + + } + public void reExportAll() { exportAllDatasets(true); } @@ -715,7 +722,12 @@ public void exportAll() { exportAllDatasets(false); } - public void exportAllDatasets(boolean forceReExport) { + private void exportAllDatasets(boolean b) { + exportAllDatasets(b, null); + } + + @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED) + private void exportAllDatasets(boolean forceReExport, Date reExportDate) { Integer countAll = 0; Integer countSuccess = 0; Integer countError = 0; @@ -757,9 +769,17 @@ public void exportAllDatasets(boolean forceReExport) { // can't trust dataset.getPublicationDate(), no. Date publicationDate = dataset.getReleasedVersion().getReleaseTime(); // we know this dataset has a non-null released version! Maybe not - SEK 8/19 (We do now! :) - if (forceReExport || (publicationDate != null - && (dataset.getLastExportTime() == null - || dataset.getLastExportTime().before(publicationDate)))) { + /** + * Three cases: force is true and no date given - reexport every dataset force + * is true and reExport date given - reexport datasets last exported before that + * date force is false, reExportDate ignored - reexport datasets last exported + * before they were last published + */ + if ((forceReExport && reExportDate == null) + || (forceReExport && dataset.getLastExportTime().before(reExportDate)) + || (forceReExport == false + && (publicationDate != null && (dataset.getLastExportTime() == null + || dataset.getLastExportTime().before(publicationDate))))) { countAll++; try { recordService.exportAllFormatsInNewTransaction(dataset); @@ -768,6 +788,15 @@ public void exportAllDatasets(boolean forceReExport) { } catch (Exception ex) { exportLogger.log(Level.INFO, "Error exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalId().asString() + "; " + ex.getMessage(), ex); countError++; + } catch (Throwable t) { + exportLogger.log(Level.SEVERE, "Fatal error exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalId().asString() + "; " + t.getClass().getName() + ": " + t.getMessage(), t); + exportLogger.info("Datasets processed before fatal error: " + countAll.toString()); + exportLogger.info("Datasets exported successfully: " + countSuccess.toString()); + exportLogger.info("Datasets failures: " + countError.toString()); + if (fileHandlerSuceeded) { + fileHandler.close(); + } + throw t; } } } @@ -1140,4 +1169,5 @@ public void saveStorageQuota(Dataset target, Long allocation) { } em.flush(); } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java b/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java index bd937878286..8d63d25cfd3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java @@ -8,11 +8,9 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetServiceBean; +import java.util.Date; import java.util.logging.Logger; import jakarta.ejb.EJB; -import jakarta.json.Json; -import jakarta.json.JsonArrayBuilder; -import jakarta.json.JsonObjectBuilder; import jakarta.ws.rs.*; import jakarta.ws.rs.core.Response; @@ -57,8 +55,18 @@ public Response exportAll() { @GET @Path("/reExportAll") @Produces("application/json") - public Response reExportAll() { - datasetService.reExportAllAsync(); + public Response reExportAll(@QueryParam(value = "olderThan") String olderThan) { + Date reExportDate = null; + if (olderThan != null && !olderThan.isEmpty()) { + try { + java.text.SimpleDateFormat dateFormat = new java.text.SimpleDateFormat("yyyy-MM-dd"); + dateFormat.setLenient(false); + reExportDate = dateFormat.parse(olderThan); + } catch (java.text.ParseException e) { + return error(Response.Status.BAD_REQUEST, "Invalid date format for olderThan parameter. Expected format: YYYY-MM-DD"); + } + } + datasetService.reExportAllAsync(reExportDate); return this.accepted(); } From 54f3e2cc1a77199958348ee8cc35324a6456d08e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 14 Jan 2026 13:35:30 -0500 Subject: [PATCH 2/4] drop always true fileHandlerSuceeded --- .../iq/dataverse/DatasetServiceBean.java | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 7c481541b2f..7aec9ea8ca0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -735,22 +735,14 @@ private void exportAllDatasets(boolean forceReExport, Date reExportDate) { Logger exportLogger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.DatasetServiceBean." + "ExportAll" + logTimestamp); String logFileName = System.getProperty("com.sun.aas.instanceRoot") + File.separator + "logs" + File.separator + "export_" + logTimestamp + ".log"; FileHandler fileHandler; - boolean fileHandlerSuceeded; try { fileHandler = new FileHandler(logFileName); exportLogger.setUseParentHandlers(false); - fileHandlerSuceeded = true; } catch (IOException | SecurityException ex) { Logger.getLogger(DatasetServiceBean.class.getName()).log(Level.SEVERE, null, ex); return; } - if (fileHandlerSuceeded) { - exportLogger.addHandler(fileHandler); - } else { - exportLogger = logger; - } - exportLogger.info("Starting an export all job"); for (Long datasetId : findAllLocalDatasetIds()) { @@ -793,9 +785,7 @@ private void exportAllDatasets(boolean forceReExport, Date reExportDate) { exportLogger.info("Datasets processed before fatal error: " + countAll.toString()); exportLogger.info("Datasets exported successfully: " + countSuccess.toString()); exportLogger.info("Datasets failures: " + countError.toString()); - if (fileHandlerSuceeded) { - fileHandler.close(); - } + fileHandler.close(); throw t; } } @@ -807,10 +797,7 @@ private void exportAllDatasets(boolean forceReExport, Date reExportDate) { exportLogger.info("Datasets failures: " + countError.toString()); exportLogger.info("Finished export-all job."); - if (fileHandlerSuceeded) { - fileHandler.close(); - } - + fileHandler.close(); } @Asynchronous From 514cb2bc8433e1b9c5a86f640e9ca2c7797b7906 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 14 Jan 2026 13:37:57 -0500 Subject: [PATCH 3/4] handle null lastExportDate with reExportDate --- src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 7aec9ea8ca0..cbca48f6988 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -768,7 +768,7 @@ private void exportAllDatasets(boolean forceReExport, Date reExportDate) { * before they were last published */ if ((forceReExport && reExportDate == null) - || (forceReExport && dataset.getLastExportTime().before(reExportDate)) + || (forceReExport && (dataset.getLastExportTime() == null || dataset.getLastExportTime().before(reExportDate))) || (forceReExport == false && (publicationDate != null && (dataset.getLastExportTime() == null || dataset.getLastExportTime().before(publicationDate))))) { From 4162baeaa1af76c7c724c08ff3a882fc174aaaf7 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 17 Feb 2026 10:51:28 -0500 Subject: [PATCH 4/4] restore use of handler per review --- src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index cbca48f6988..87964cd58fb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -738,11 +738,11 @@ private void exportAllDatasets(boolean forceReExport, Date reExportDate) { try { fileHandler = new FileHandler(logFileName); exportLogger.setUseParentHandlers(false); + exportLogger.addHandler(fileHandler); } catch (IOException | SecurityException ex) { Logger.getLogger(DatasetServiceBean.class.getName()).log(Level.SEVERE, null, ex); return; } - exportLogger.info("Starting an export all job"); for (Long datasetId : findAllLocalDatasetIds()) {