From a2adafcb77996766f201d7adb7647f374230e7f9 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Tue, 25 Apr 2023 11:43:53 +0200 Subject: [PATCH 01/59] async indexing after update command --- .../impl/UpdateDatasetVersionCommand.java | 22 ++----------- .../iq/dataverse/search/IndexServiceBean.java | 32 +++++++++++++++++-- src/main/resources/META-INF/persistence.xml | 1 + 3 files changed, 33 insertions(+), 22 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java index 33f64f23076..0ca290fd062 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java @@ -3,7 +3,6 @@ import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; -import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; @@ -12,17 +11,13 @@ import edu.harvard.iq.dataverse.util.DatasetFieldUtil; import edu.harvard.iq.dataverse.util.FileMetadataUtil; -import java.io.IOException; import java.util.ArrayList; import java.util.List; -import java.util.concurrent.Future; import java.util.logging.Level; import java.util.logging.Logger; import javax.validation.ConstraintViolationException; -import org.apache.solr.client.solrj.SolrServerException; - /** * * @author skraffmiller @@ -270,21 +265,8 @@ public Dataset execute(CommandContext ctxt) throws CommandException { @Override public boolean onSuccess(CommandContext ctxt, Object r) { - - boolean retVal = true; - Dataset dataset = (Dataset) r; - - try { - Future indexString = ctxt.index().indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post update dataset indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - retVal = false; - } - - return retVal; - + ctxt.index().asyncIndexDataset((Dataset) r, true); + return true; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index dbe79f147bd..16bb01e1de3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -366,9 +366,37 @@ public Future indexDatasetObjectInNewTransaction(Dataset dataset) throws return ret; } + private Map nextToIndex = new HashMap<>(); + private Set indexingNow = new HashSet<>(); + + synchronized private Dataset getNextToIndex(Long id, Dataset d) { + if (d == null) { + Dataset next = nextToIndex.remove(id); + if (next == null) { + indexingNow.remove(id); + } + return next; + } + if (indexingNow.contains(id)) { + nextToIndex.put(id, d); + return null; + } + indexingNow.add(id); + return d; + } + @Asynchronous - public Future asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { - return indexDataset(dataset, doNormalSolrDocCleanUp); + public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUpe) { + Long id = dataset.getId(); + Dataset next = getNextToIndex(id, dataset); + while (next != null) { + try { + indexDataset(next, doNormalSolrDocCleanUpe); + } catch (SolrServerException | IOException e) { + logger.warning("unable to index datasat " + id + ": " + e); + } + next = getNextToIndex(id, null); + } } @Asynchronous diff --git a/src/main/resources/META-INF/persistence.xml b/src/main/resources/META-INF/persistence.xml index 45552f36939..fd8519b7370 100644 --- a/src/main/resources/META-INF/persistence.xml +++ b/src/main/resources/META-INF/persistence.xml @@ -15,6 +15,7 @@ + From a6ff591974a822d04b5317f90e386e393e9434fc Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Tue, 25 Apr 2023 17:37:32 +0200 Subject: [PATCH 02/59] added comments to the code --- .../impl/UpdateDatasetVersionCommand.java | 6 ++++++ .../iq/dataverse/search/IndexServiceBean.java | 20 ++++++++++++++----- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java index 0ca290fd062..309bfcbf031 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java @@ -265,6 +265,12 @@ public Dataset execute(CommandContext ctxt) throws CommandException { @Override public boolean onSuccess(CommandContext ctxt, Object r) { + // Async indexing requires the eclipselink.persistence-context.flush-mode persistence parameter to be set to "commit", + // otherwise, it will cause table locking when trying to update the dataset when it is being indexed + // Async indexing significantly improves performance when updating datasets with thousands of files + // Indexing will be started immediately, unless an index is already busy for the given data + // (it will be scheduled then for later indexing of the newest version). + // See the documentation of asyncIndexDataset method for more details. ctxt.index().asyncIndexDataset((Dataset) r, true); return true; } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 16bb01e1de3..0dd62cbbc2b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -366,21 +366,31 @@ public Future indexDatasetObjectInNewTransaction(Dataset dataset) throws return ret; } + // The following two variables are only used in the synchronized getNextToIndex method and do not need to be synchronized themselves + + // nextToIndex contains datasets mapped by dataset id that were added for future indexing while the indexing was already ongoing for a given dataset + // (if there already was a dataset scheduled for indexing, it is overwritten and only the most recently requested version is kept in the map) private Map nextToIndex = new HashMap<>(); + // indexingNow is a set of dataset ids of datasets being indexed asynchronously right now private Set indexingNow = new HashSet<>(); + // When you pass null as Dataset parameter to this method, it indicates that the indexing of the dataset with "id" has finished + // Pass non-null Dataset to schedule it for indexing synchronized private Dataset getNextToIndex(Long id, Dataset d) { - if (d == null) { + if (d == null) { // -> indexing of the dataset with id has finished Dataset next = nextToIndex.remove(id); - if (next == null) { + if (next == null) { // -> no new indexing jobs were requested while indexing was ongoing + // the job can be stopped now indexingNow.remove(id); } return next; } - if (indexingNow.contains(id)) { + // index job is requested for a non-null dataset + if (indexingNow.contains(id)) { // -> indexing job is already ongoing, and a new job should not be started by the current thread -> return null nextToIndex.put(id, d); return null; } + // otherwise, start a new job indexingNow.add(id); return d; } @@ -388,14 +398,14 @@ synchronized private Dataset getNextToIndex(Long id, Dataset d) { @Asynchronous public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUpe) { Long id = dataset.getId(); - Dataset next = getNextToIndex(id, dataset); + Dataset next = getNextToIndex(id, dataset); // if there is an ongoing index job for this dataset, next is null (ongoing index job will reindex the newest version after current indexing finishes) while (next != null) { try { indexDataset(next, doNormalSolrDocCleanUpe); } catch (SolrServerException | IOException e) { logger.warning("unable to index datasat " + id + ": " + e); } - next = getNextToIndex(id, null); + next = getNextToIndex(id, null); // if dataset was not changed during the indexing (and no new job was requested), next is null and loop can be stopped } } From 956488b7cad6ee5c2973e0e4d815f3438f1a4502 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Wed, 26 Apr 2023 17:28:19 +0200 Subject: [PATCH 03/59] fixed creating dataverse timing issue --- src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java index 7185887ecc3..9438236197f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java +++ b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java @@ -280,6 +280,7 @@ public R submit(Command aCommand) throws CommandException { } } //This runs the onSuccess Methods for all commands in the stack when the outermost command completes + ctxt.em().flush(); // flush before accessing the object in completeCommand (e.g., indexing) this.completeCommand(aCommand, r, getContext().getCommandsCalled()); return r; From 31cd2922c34cad1d366dbc018fe5c23f1cb1f9a4 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Wed, 26 Apr 2023 19:35:06 +0200 Subject: [PATCH 04/59] flush in dataverse engine caused problems, it is now called specifiically before atempting indexing of the newly created dataverse --- src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java | 1 - .../iq/dataverse/engine/command/impl/CreateDataverseCommand.java | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java index 9438236197f..7185887ecc3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java +++ b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java @@ -280,7 +280,6 @@ public R submit(Command aCommand) throws CommandException { } } //This runs the onSuccess Methods for all commands in the stack when the outermost command completes - ctxt.em().flush(); // flush before accessing the object in completeCommand (e.g., indexing) this.completeCommand(aCommand, r, getContext().getCommandsCalled()); return r; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDataverseCommand.java index cece4230eed..ff07905616e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDataverseCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDataverseCommand.java @@ -170,6 +170,7 @@ public Dataverse execute(CommandContext ctxt) throws CommandException { @Override public boolean onSuccess(CommandContext ctxt, Object r) { + ctxt.em().flush(); // in the case when the root dataverse is created, IndexServiceBean need to see the root dataverse at initialization return ctxt.dataverses().index((Dataverse) r); } From 2ab7820bd58c9982f25e635423d8b62477473d90 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Wed, 26 Apr 2023 19:42:54 +0200 Subject: [PATCH 05/59] nullpointer fix in unit test --- .../dataverse/engine/command/impl/CreateDataverseCommand.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDataverseCommand.java index ff07905616e..cc5a57fa9a4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDataverseCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDataverseCommand.java @@ -170,7 +170,9 @@ public Dataverse execute(CommandContext ctxt) throws CommandException { @Override public boolean onSuccess(CommandContext ctxt, Object r) { - ctxt.em().flush(); // in the case when the root dataverse is created, IndexServiceBean need to see the root dataverse at initialization + if (ctxt.em() != null) { + ctxt.em().flush(); // in the case when the root dataverse is created, IndexServiceBean need to see the root dataverse at initialization + } return ctxt.dataverses().index((Dataverse) r); } From 94e58c1a123d6a9b34c0419a0fcbd2836048af73 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Wed, 26 Apr 2023 23:00:32 +0200 Subject: [PATCH 06/59] fixed the problem with locking -> reverted flush-mode setting --- .../impl/UpdateDatasetVersionCommand.java | 2 -- .../iq/dataverse/search/IndexServiceBean.java | 26 ++++++++++++++++--- src/main/resources/META-INF/persistence.xml | 1 - 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java index 309bfcbf031..cdc9f275895 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java @@ -265,8 +265,6 @@ public Dataset execute(CommandContext ctxt) throws CommandException { @Override public boolean onSuccess(CommandContext ctxt, Object r) { - // Async indexing requires the eclipselink.persistence-context.flush-mode persistence parameter to be set to "commit", - // otherwise, it will cause table locking when trying to update the dataset when it is being indexed // Async indexing significantly improves performance when updating datasets with thousands of files // Indexing will be started immediately, unless an index is already busy for the given data // (it will be scheduled then for later indexing of the newest version). diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 0dd62cbbc2b..c31a3110f5b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -426,8 +426,14 @@ public Future indexDvObject(DvObject objectIn) throws SolrServerExcepti } return null; } - + public Future indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { + Future result = doIndexDataset(dataset, doNormalSolrDocCleanUp); + updateLastIndexedTime(dataset.getId()); + return result; + } + + private Future doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { logger.fine("indexing dataset " + dataset.getId()); /** * @todo should we use solrDocIdentifierDataset or @@ -1443,17 +1449,29 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set d throw new IOException(ex); } } + return docs.getMessage(); + } + + @Asynchronous + private void updateLastIndexedTime(Long id) { + // indexing is often in a transaction with update statements + // if we flush on query (flush-mode auto), we want to prevent locking + // -> update the dataset asynchronously in a new transaction + updateLastIndexedTimeInNewTransaction(id); + } + + @TransactionAttribute(REQUIRES_NEW) + private void updateLastIndexedTimeInNewTransaction(Long id) { /// Dataset updatedDataset = /// (Dataset)dvObjectService.updateContentIndexTime(dataset); /// updatedDataset = null; // instead of making a call to dvObjectService, let's try and // modify the index time stamp using the local EntityManager: - DvObject dvObjectToModify = em.find(DvObject.class, docs.getDatasetId()); + DvObject dvObjectToModify = em.find(DvObject.class, id); dvObjectToModify.setIndexTime(new Timestamp(new Date().getTime())); dvObjectToModify = em.merge(dvObjectToModify); + em.flush(); dvObjectToModify = null; - - return docs.getMessage(); } /** diff --git a/src/main/resources/META-INF/persistence.xml b/src/main/resources/META-INF/persistence.xml index fd8519b7370..45552f36939 100644 --- a/src/main/resources/META-INF/persistence.xml +++ b/src/main/resources/META-INF/persistence.xml @@ -15,7 +15,6 @@ - From 33e56d1d2b8ad0e3b70a313eb3672c6de50818ae Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 27 Apr 2023 12:06:25 +0200 Subject: [PATCH 07/59] async ingest of uploaded file --- .../datasetutility/AddReplaceFileHelper.java | 28 ++++++++++++++----- .../dataverse/ingest/IngestServiceBean.java | 2 ++ 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index bf831d39965..f41550d8abd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -47,6 +47,8 @@ import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; + +import javax.ejb.Asynchronous; import javax.ejb.EJBException; import javax.json.Json; import javax.json.JsonArrayBuilder; @@ -1922,26 +1924,38 @@ private boolean step_100_startIngestJobs(){ } // Should only be one file in the list + Long id = null; + if (finalFileList.size() == 1) { + id = finalFileList.get(0).getId(); + } setNewlyAddedFiles(finalFileList); // clear old file list // finalFileList.clear(); - // TODO: Need to run ingwest async...... - //if (true){ - //return true; - //} - - if (!multifile) { + if (!multifile && id != null) { msg("pre ingest start"); // start the ingest! - ingestService.startIngestJobsForDataset(dataset, dvRequest.getAuthenticatedUser()); + asyncIngestOneFile(id, dvRequest.getAuthenticatedUser()); msg("post ingest start"); } return true; } + @Asynchronous + private void asyncIngestOneFile(Long id, AuthenticatedUser user) { + // prevent unresponsive servers with high cpu load by ingesting one at a time + ingestOneFileAtATime(id, user); + } + + synchronized private void ingestOneFileAtATime(Long id, AuthenticatedUser user) { + // query by id -> when flush-mode is auto, flush is on query, we make sure that the roles assignment added at create is flushed + DataFile dataFile = fileService.find(id); + if (dataFile.isIngestScheduled()) { + ingestService.startIngestJobs(Arrays.asList(dataFile), user); + } + } private void msg(String m){ logger.fine(m); diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index fd850ac1b9d..0d9f8ce375f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -467,6 +467,8 @@ public void startIngestJobsForDataset(Dataset dataset, AuthenticatedUser user) { // todo: investigate why when calling save with the file object // gotten from the loop, the roles assignment added at create is removed // (switching to refinding via id resolves that) + // possible explanation: when flush-mode is auto, flush is on query, + // we make sure that the roles assignment added at create is flushed dataFile = fileService.find(dataFile.getId()); scheduledFiles.add(dataFile); } From 8c23bf2030152373b8c357207456b91b45be710e Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 27 Apr 2023 15:02:34 +0200 Subject: [PATCH 08/59] async datset indexing is by default now --- .../dataverse/DatasetVersionServiceBean.java | 8 +-- .../harvard/iq/dataverse/api/Datasets.java | 27 +-------- .../edu/harvard/iq/dataverse/api/EditDDI.java | 6 +- .../edu/harvard/iq/dataverse/api/Index.java | 19 +----- .../impl/AbstractCreateDatasetCommand.java | 8 +-- .../impl/CreateDatasetVersionCommand.java | 2 +- .../DeaccessionDatasetVersionCommand.java | 10 +--- .../DeleteDatasetLinkingDataverseCommand.java | 10 +--- .../impl/DeleteDatasetVersionCommand.java | 8 +-- .../FinalizeDatasetPublicationCommand.java | 9 +-- .../command/impl/LinkDatasetCommand.java | 9 +-- .../command/impl/MoveDatasetCommand.java | 11 +--- .../command/impl/MoveDataverseCommand.java | 9 +-- .../command/impl/RedetectFileTypeCommand.java | 8 +-- .../command/impl/RegisterDvObjectCommand.java | 11 +--- .../impl/ReturnDatasetToAuthorCommand.java | 9 +-- .../impl/SetCurationStatusCommand.java | 9 +-- .../impl/SetDatasetCitationDateCommand.java | 10 +--- .../impl/SubmitDatasetForReviewCommand.java | 10 +--- .../command/impl/UpdateDataverseCommand.java | 10 +--- .../search/IndexBatchServiceBean.java | 24 ++------ .../iq/dataverse/search/IndexServiceBean.java | 58 +++++++++---------- .../command/impl/MoveDatasetCommandTest.java | 3 +- .../impl/MoveDataverseCommandTest.java | 6 +- .../ReturnDatasetToAuthorCommandTest.java | 3 +- .../SubmitDatasetForReviewCommandTest.java | 3 +- 26 files changed, 71 insertions(+), 229 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 439e4b17ed4..9f272ec6877 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -1118,13 +1118,7 @@ public JsonObjectBuilder fixMissingUnf(String datasetVersionId, boolean forceRec // reindexing the dataset, to make sure the new UNF is in SOLR: boolean doNormalSolrDocCleanUp = true; - try { - Future indexingResult = indexService.indexDataset(datasetVersion.getDataset(), doNormalSolrDocCleanUp); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post UNF update indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + datasetVersion.getDataset().getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, datasetVersion.getDataset()); - } + indexService.asyncIndexDataset(datasetVersion.getDataset(), doNormalSolrDocCleanUp); return info; } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index d40bc153141..0a71cec0eda 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2764,14 +2764,7 @@ public Response deleteLocks(@Context ContainerRequestContext crc, @PathParam("id } // kick of dataset reindexing, in case the locks removed // affected the search card: - try { - indexService.indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post lock removal indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, dataset); - - } + indexService.asyncIndexDataset(dataset, true); return ok("locks removed"); } return ok("dataset not locked"); @@ -2784,14 +2777,7 @@ public Response deleteLocks(@Context ContainerRequestContext crc, @PathParam("id dataset = findDatasetOrDie(id); // ... and kick of dataset reindexing, in case the lock removed // affected the search card: - try { - indexService.indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post lock removal indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, dataset); - - } + indexService.asyncIndexDataset(dataset, true); return ok("lock type " + lock.getReason() + " removed"); } return ok("no lock type " + lockType + " on the dataset"); @@ -2823,14 +2809,7 @@ public Response lockDataset(@Context ContainerRequestContext crc, @PathParam("id // refresh the dataset: dataset = findDatasetOrDie(id); // ... and kick of dataset reindexing: - try { - indexService.indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post add lock indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, dataset); - - } + indexService.asyncIndexDataset(dataset, true); return ok("dataset locked with lock type " + lockType); } catch (WrappedResponse wr) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/EditDDI.java b/src/main/java/edu/harvard/iq/dataverse/api/EditDDI.java index 513f27c9e3d..3960fe4e996 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/EditDDI.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/EditDDI.java @@ -247,11 +247,7 @@ private boolean createNewDraftVersion(ArrayList neededToUpdate } boolean doNormalSolrDocCleanUp = true; - try { - Future indexDatasetFuture = indexService.indexDataset(dataset, doNormalSolrDocCleanUp); - } catch (IOException | SolrServerException ex) { - logger.log(Level.SEVERE, "Couldn''t index dataset: " + ex.getMessage()); - } + indexService.asyncIndexDataset(dataset, doNormalSolrDocCleanUp); return true; } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Index.java b/src/main/java/edu/harvard/iq/dataverse/api/Index.java index 728d86addcb..1361de8fbf7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Index.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Index.java @@ -243,12 +243,7 @@ public Response indexTypeById(@PathParam("type") String type, @PathParam("id") L Dataset dataset = datasetService.find(id); if (dataset != null) { boolean doNormalSolrDocCleanUp = true; - try { - Future indexDatasetFuture = indexService.indexDataset(dataset, doNormalSolrDocCleanUp); - } catch (IOException | SolrServerException e) { - // - return error(Status.BAD_REQUEST, writeFailureToLog(e.getLocalizedMessage(), dataset)); - } + indexService.asyncIndexDataset(dataset, doNormalSolrDocCleanUp); return ok("starting reindex of dataset " + id); } else { @@ -266,11 +261,7 @@ public Response indexTypeById(@PathParam("type") String type, @PathParam("id") L * @todo How can we display the result to the user? */ boolean doNormalSolrDocCleanUp = true; - try { - Future indexDatasetFuture = indexService.indexDataset(datasetThatOwnsTheFile, doNormalSolrDocCleanUp); - } catch (IOException | SolrServerException e) { - writeFailureToLog(e.getLocalizedMessage(), datasetThatOwnsTheFile); - } + indexService.asyncIndexDataset(datasetThatOwnsTheFile, doNormalSolrDocCleanUp); return ok("started reindexing " + type + "/" + id); } else { @@ -318,11 +309,7 @@ public Response indexDatasetByPersistentId(@QueryParam("persistentId") String pe } if (dataset != null) { boolean doNormalSolrDocCleanUp = true; - try { - Future indexDatasetFuture = indexService.indexDataset(dataset, doNormalSolrDocCleanUp); - } catch (IOException | SolrServerException e) { - writeFailureToLog(e.getLocalizedMessage(), dataset); - } + indexService.asyncIndexDataset(dataset, doNormalSolrDocCleanUp); JsonObjectBuilder data = Json.createObjectBuilder(); data.add("message", "Reindexed dataset " + persistentId); data.add("id", dataset.getId()); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java index 8f477a66424..658daa16fea 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java @@ -134,13 +134,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { // TODO: this needs to be moved in to an onSuccess method; not adding to this PR as its out of scope // TODO: switch to asynchronous version when JPA sync works // ctxt.index().asyncIndexDataset(theDataset.getId(), true); - try{ - ctxt.index().indexDataset(theDataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post create dataset indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + theDataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, theDataset); - } + ctxt.index().asyncIndexDataset(theDataset, true); return theDataset; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java index 72439d4ba4a..adfad2de436 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java @@ -60,7 +60,7 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException { prepareDatasetAndVersion(); // TODO make async - // ctxt.index().indexDataset(dataset); + // ctxt.index().asyncIndexDataset(dataset); return ctxt.datasets().storeVersion(newVersion); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeaccessionDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeaccessionDatasetVersionCommand.java index 391e798e285..bf4beb3ec01 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeaccessionDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeaccessionDatasetVersionCommand.java @@ -106,15 +106,7 @@ public boolean onSuccess(CommandContext ctxt, Object r) { DatasetVersion version = (DatasetVersion) r; Dataset dataset = version.getDataset(); - try { - ctxt.index().indexDataset(dataset, true); - Future indexString = ctxt.index().indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post-publication indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - retVal = false; - } + ctxt.index().asyncIndexDataset(dataset, true); return retVal; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetLinkingDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetLinkingDataverseCommand.java index aada2663bf6..f21a2782609 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetLinkingDataverseCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetLinkingDataverseCommand.java @@ -66,15 +66,7 @@ public boolean onSuccess(CommandContext ctxt, Object r) { Dataset dataset = (Dataset) r; if (index) { - try { - ctxt.index().indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post delete linked dataset indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - retVal = false; - } - + ctxt.index().asyncIndexDataset(dataset, true); } return retVal; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetVersionCommand.java index 3f63c3c6d27..a67d7008ef8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetVersionCommand.java @@ -96,13 +96,7 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { } } boolean doNormalSolrDocCleanUp = true; - try { - ctxt.index().indexDataset(doomed, doNormalSolrDocCleanUp); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post delete version indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + doomed.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, doomed); - } + ctxt.index().asyncIndexDataset(doomed, doNormalSolrDocCleanUp); return; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java index cb46b36eb53..f0649266840 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java @@ -238,14 +238,7 @@ public boolean onSuccess(CommandContext ctxt, Object r) { } catch (Exception e) { logger.warning("Failure to send dataset published messages for : " + dataset.getId() + " : " + e.getMessage()); } - try { - Future indexString = ctxt.index().indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post-publication indexing failed. You can kick off a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - retVal = false; - } + ctxt.index().asyncIndexDataset(dataset, true); //re-indexing dataverses that have additional subjects if (!dataversesToIndex.isEmpty()){ diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LinkDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LinkDatasetCommand.java index da70529e76d..aef749d7e26 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LinkDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LinkDatasetCommand.java @@ -69,14 +69,7 @@ public boolean onSuccess(CommandContext ctxt, Object r) { boolean retVal = true; DatasetLinkingDataverse dld = (DatasetLinkingDataverse) r; - try { - ctxt.index().indexDataset(dld.getDataset(), true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post link dataset indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dld.getDataset().getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dld.getDataset()); - retVal = false; - } + ctxt.index().asyncIndexDataset(dld.getDataset(), true); return retVal; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDatasetCommand.java index 55d02362e88..94bcfa2f5b7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDatasetCommand.java @@ -141,15 +141,8 @@ public void executeImpl(CommandContext ctxt) throws CommandException { moved.setOwner(destination); ctxt.em().merge(moved); - try { - boolean doNormalSolrDocCleanUp = true; - ctxt.index().indexDataset(moved, doNormalSolrDocCleanUp); - - } catch (Exception e) { // RuntimeException e ) { - logger.log(Level.WARNING, "Exception while indexing:" + e.getMessage()); //, e); - throw new CommandException(BundleUtil.getStringFromBundle("dashboard.card.datamove.dataset.command.error.indexingProblem"), this); - - } + boolean doNormalSolrDocCleanUp = true; + ctxt.index().asyncIndexDataset(moved, doNormalSolrDocCleanUp); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDataverseCommand.java index dcae4e039e6..ea38f5a7af7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDataverseCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDataverseCommand.java @@ -302,14 +302,7 @@ public void executeImpl(CommandContext ctxt) throws CommandException { if (moved.getDatasetLinkingDataverses() != null && !moved.getDatasetLinkingDataverses().isEmpty()) { for (DatasetLinkingDataverse dld : moved.getDatasetLinkingDataverses()) { Dataset linkedDS = ctxt.datasets().find(dld.getDataset().getId()); - try { - ctxt.index().indexDataset(linkedDS, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post move dataverse dataset indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + linkedDS.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, linkedDS); - - } + ctxt.index().asyncIndexDataset(linkedDS, true); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RedetectFileTypeCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RedetectFileTypeCommand.java index 286b107a5fd..6e0e7ed92d7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RedetectFileTypeCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RedetectFileTypeCommand.java @@ -83,12 +83,8 @@ public DataFile execute(CommandContext ctxt) throws CommandException { throw new CommandException("Exception while attempting to save the new file type: " + EjbUtil.ejbExceptionToString(ex), this); } Dataset dataset = fileToRedetect.getOwner(); - try { - boolean doNormalSolrDocCleanUp = true; - ctxt.index().indexDataset(dataset, doNormalSolrDocCleanUp); - } catch (Exception ex) { - logger.info("Exception while reindexing files during file type redetection: " + ex.getLocalizedMessage()); - } + boolean doNormalSolrDocCleanUp = true; + ctxt.index().asyncIndexDataset(dataset, doNormalSolrDocCleanUp); try { ExportService instance = ExportService.getInstance(); instance.exportAllFormats(dataset); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java index 299d1a925f4..6da3bf0ad84 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java @@ -137,15 +137,8 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { //Only continue if you can successfully migrate the handle boolean doNormalSolrDocCleanUp = true; Dataset dataset = (Dataset) target; - try { - ctxt.index().indexDataset(dataset, doNormalSolrDocCleanUp); - ctxt.solrIndex().indexPermissionsForOneDvObject( dataset); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post migrate handle dataset indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - - } + ctxt.index().asyncIndexDataset(dataset, doNormalSolrDocCleanUp); + ctxt.solrIndex().indexPermissionsForOneDvObject( dataset); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java index ba0348f57d6..caf37ad4de1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java @@ -72,14 +72,7 @@ public boolean onSuccess(CommandContext ctxt, Object r) { boolean retVal = true; Dataset dataset = (Dataset) r; - try { - Future indexString = ctxt.index().indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post return to author indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - retVal = false; - } + ctxt.index().asyncIndexDataset(dataset, true); return retVal; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCurationStatusCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCurationStatusCommand.java index 72f0ef335fb..557f9dff622 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCurationStatusCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCurationStatusCommand.java @@ -101,14 +101,7 @@ public boolean onSuccess(CommandContext ctxt, Object r) { boolean retVal = true; Dataset dataset = (Dataset) r; - try { - Future indexString = ctxt.index().indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post submit for review indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - retVal = false; - } + ctxt.index().asyncIndexDataset(dataset, true); return retVal; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetDatasetCitationDateCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetDatasetCitationDateCommand.java index fe14d56562d..2cae9e51896 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetDatasetCitationDateCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetDatasetCitationDateCommand.java @@ -49,14 +49,8 @@ public boolean onSuccess(CommandContext ctxt, Object r) { boolean retVal = true; Dataset dataset = (Dataset) r; - try { - Future indexString = ctxt.index().indexDataset(dataset, false); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post set dataset citation date indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - retVal = false; - } + ctxt.index().asyncIndexDataset(dataset, false); + return retVal; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommand.java index 130030798ab..77a4bf5b8ba 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommand.java @@ -75,14 +75,8 @@ public boolean onSuccess(CommandContext ctxt, Object r) { boolean retVal = true; Dataset dataset = (Dataset) r; - try { - Future indexString = ctxt.index().indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post submit for review indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - retVal = false; - } + ctxt.index().asyncIndexDataset(dataset, true); + return retVal; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java index 57a3394ff77..218b0ea89d9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java @@ -113,13 +113,9 @@ public boolean onSuccess(CommandContext ctxt, Object r) { // first kick of async index of datasets // TODO: is this actually needed? Is there a better way to handle - try { - Dataverse result = (Dataverse) r; - List datasets = ctxt.datasets().findByOwnerId(result.getId()); - ctxt.index().asyncIndexDatasetList(datasets, true); - } catch (IOException | SolrServerException e) { - // these datasets are being indexed asynchrounously, so not sure how to handle errors here - } + Dataverse result = (Dataverse) r; + List datasets = ctxt.datasets().findByOwnerId(result.getId()); + ctxt.index().asyncIndexDatasetList(datasets, true); return ctxt.dataverses().index((Dataverse) r); } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexBatchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexBatchServiceBean.java index 34c145fa6e8..932f58d875d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexBatchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexBatchServiceBean.java @@ -205,15 +205,9 @@ public Future indexAllOrSubset(long numPartitions, long partitionId, boo int datasetFailureCount = 0; List datasetIds = datasetService.findAllOrSubsetOrderByFilesOwned(skipIndexed); for (Long id : datasetIds) { - try { - datasetIndexCount++; - logger.info("indexing dataset " + datasetIndexCount + " of " + datasetIds.size() + " (id=" + id + ")"); - Future result = indexService.indexDatasetInNewTransaction(id); - } catch (Exception e) { - //We want to keep running even after an exception so throw some more info into the log - datasetFailureCount++; - logger.info("FAILURE indexing dataset " + datasetIndexCount + " of " + datasetIds.size() + " (id=" + id + ") Exception info: " + e.getMessage()); - } + datasetIndexCount++; + logger.info("indexing dataset " + datasetIndexCount + " of " + datasetIds.size() + " (id=" + id + ")"); + indexService.indexDatasetInNewTransaction(id); } logger.info("done iterating through all datasets"); @@ -269,15 +263,9 @@ public void indexDataverseRecursively(Dataverse dataverse) { // index the Dataset children for (Long childId : datasetChildren) { - try { - datasetIndexCount++; - logger.info("indexing dataset " + datasetIndexCount + " of " + datasetChildren.size() + " (id=" + childId + ")"); - indexService.indexDatasetInNewTransaction(childId); - } catch (Exception e) { - //We want to keep running even after an exception so throw some more info into the log - datasetFailureCount++; - logger.info("FAILURE indexing dataset " + datasetIndexCount + " of " + datasetChildren.size() + " (id=" + childId + ") Exception info: " + e.getMessage()); - } + datasetIndexCount++; + logger.info("indexing dataset " + datasetIndexCount + " of " + datasetChildren.size() + " (id=" + childId + ")"); + indexService.indexDatasetInNewTransaction(childId); } long end = System.currentTimeMillis(); if (datasetFailureCount + dataverseFailureCount > 0){ diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index c31a3110f5b..4fad491cd01 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -348,35 +348,24 @@ public Future indexDataverse(Dataverse dataverse, boolean processPaths) } @TransactionAttribute(REQUIRES_NEW) - public Future indexDatasetInNewTransaction(Long datasetId) throws SolrServerException, IOException{ //Dataset dataset) { + public void indexDatasetInNewTransaction(Long datasetId) { //Dataset dataset) { boolean doNormalSolrDocCleanUp = false; Dataset dataset = em.find(Dataset.class, datasetId); - // return indexDataset(dataset, doNormalSolrDocCleanUp); - Future ret = indexDataset(dataset, doNormalSolrDocCleanUp); + asyncIndexDataset(dataset, doNormalSolrDocCleanUp); dataset = null; - return ret; - } - - @TransactionAttribute(REQUIRES_NEW) - public Future indexDatasetObjectInNewTransaction(Dataset dataset) throws SolrServerException, IOException{ //Dataset dataset) { - boolean doNormalSolrDocCleanUp = false; - // return indexDataset(dataset, doNormalSolrDocCleanUp); - Future ret = indexDataset(dataset, doNormalSolrDocCleanUp); - dataset = null; - return ret; } // The following two variables are only used in the synchronized getNextToIndex method and do not need to be synchronized themselves // nextToIndex contains datasets mapped by dataset id that were added for future indexing while the indexing was already ongoing for a given dataset // (if there already was a dataset scheduled for indexing, it is overwritten and only the most recently requested version is kept in the map) - private Map nextToIndex = new HashMap<>(); + private static Map nextToIndex = new HashMap<>(); // indexingNow is a set of dataset ids of datasets being indexed asynchronously right now - private Set indexingNow = new HashSet<>(); + private static Set indexingNow = new HashSet<>(); // When you pass null as Dataset parameter to this method, it indicates that the indexing of the dataset with "id" has finished // Pass non-null Dataset to schedule it for indexing - synchronized private Dataset getNextToIndex(Long id, Dataset d) { + synchronized private static Dataset getNextToIndex(Long id, Dataset d) { if (d == null) { // -> indexing of the dataset with id has finished Dataset next = nextToIndex.remove(id); if (next == null) { // -> no new indexing jobs were requested while indexing was ongoing @@ -401,33 +390,31 @@ public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUpe) Dataset next = getNextToIndex(id, dataset); // if there is an ongoing index job for this dataset, next is null (ongoing index job will reindex the newest version after current indexing finishes) while (next != null) { try { + logger.warning("indexing dataset " + id); indexDataset(next, doNormalSolrDocCleanUpe); + logger.warning("done indexing dataset " + id); } catch (SolrServerException | IOException e) { - logger.warning("unable to index datasat " + id + ": " + e); + logger.warning("unable to index dataset " + id + ": " + e); } next = getNextToIndex(id, null); // if dataset was not changed during the indexing (and no new job was requested), next is null and loop can be stopped } } - @Asynchronous - public void asyncIndexDatasetList(List datasets, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { + public void asyncIndexDatasetList(List datasets, boolean doNormalSolrDocCleanUp) { for(Dataset dataset : datasets) { - indexDataset(dataset, true); + asyncIndexDataset(dataset, true); } } - public Future indexDvObject(DvObject objectIn) throws SolrServerException, IOException { - + public void indexDvObject(DvObject objectIn) throws SolrServerException, IOException { if (objectIn.isInstanceofDataset() ){ - return (indexDataset((Dataset)objectIn, true)); - } - if (objectIn.isInstanceofDataverse() ){ - return (indexDataverse((Dataverse)objectIn)); + asyncIndexDataset((Dataset)objectIn, true); + } else if (objectIn.isInstanceofDataverse() ){ + indexDataverse((Dataverse)objectIn); } - return null; } - public Future indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { + private Future indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { Future result = doIndexDataset(dataset, doNormalSolrDocCleanUp); updateLastIndexedTime(dataset.getId()); return result; @@ -452,6 +439,7 @@ private Future doIndexDataset(Dataset dataset, boolean doNormalSolrDocCl List versions = dataset.getVersions(); List solrIdsOfFilesToDelete = new ArrayList<>(); for (DatasetVersion datasetVersion : versions) { + logger.warning("indexing version " + datasetVersion.getId()); Long versionDatabaseId = datasetVersion.getId(); String versionTitle = datasetVersion.getTitle(); String semanticVersion = datasetVersion.getSemanticVersion(); @@ -464,6 +452,7 @@ private Future doIndexDataset(Dataset dataset, boolean doNormalSolrDocCl debug.append("- semanticVersion-VersionState: " + semanticVersion + "-" + versionState + "\n"); List fileMetadatas = datasetVersion.getFileMetadatas(); List fileInfo = new ArrayList<>(); + logger.warning("iterating..."); for (FileMetadata fileMetadata : fileMetadatas) { String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); /** @@ -480,6 +469,7 @@ private Future doIndexDataset(Dataset dataset, boolean doNormalSolrDocCl solrIdsOfFilesToDelete.add(solrIdOfPublishedFile); fileInfo.add(fileMetadata.getDataFile().getId() + ":" + fileMetadata.getLabel()); } + logger.warning("adding more ids..."); try { /** * Preemptively delete *all* Solr documents for files associated @@ -513,10 +503,12 @@ private Future doIndexDataset(Dataset dataset, boolean doNormalSolrDocCl debug.append("- files: " + numFiles + " " + fileInfo.toString() + "\n"); } debug.append("numPublishedVersions: " + numPublishedVersions + "\n"); + logger.warning("cleanup..."); if (doNormalSolrDocCleanUp) { IndexResponse resultOfAttemptToPremptivelyDeletePublishedFiles = solrIndexService.deleteMultipleSolrIds(solrIdsOfFilesToDelete); debug.append("result of attempt to premptively deleted published files before reindexing: " + resultOfAttemptToPremptivelyDeletePublishedFiles + "\n"); } + logger.warning("init..."); DatasetVersion latestVersion = dataset.getLatestVersion(); String latestVersionStateString = latestVersion.getVersionState().name(); DatasetVersion.VersionState latestVersionState = latestVersion.getVersionState(); @@ -771,6 +763,8 @@ private String deleteDraftFiles(List solrDocIdsForDraftFilesToDelete) { } private IndexResponse indexDatasetPermissions(Dataset dataset) { + + logger.warning("indexing permissions"); boolean disabledForDebugging = false; if (disabledForDebugging) { /** @@ -781,14 +775,19 @@ private IndexResponse indexDatasetPermissions(Dataset dataset) { return new IndexResponse("permissions indexing disabled for debugging"); } IndexResponse indexResponse = solrIndexService.indexPermissionsOnSelfAndChildren(dataset); + logger.warning("done indexing permissions"); return indexResponse; } private String addOrUpdateDataset(IndexableDataset indexableDataset) throws SolrServerException, IOException { - return addOrUpdateDataset(indexableDataset, null); + logger.warning("addOrUpdateDataset"); + String result = addOrUpdateDataset(indexableDataset, null); + logger.warning("addOrUpdateDataset done"); + return result; } public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set datafilesInDraftVersion) throws SolrServerException, IOException { + logger.warning("toSolrDocs"); IndexableDataset.DatasetState state = indexableDataset.getDatasetState(); Dataset dataset = indexableDataset.getDatasetVersion().getDataset(); logger.fine("adding or updating Solr document for dataset id " + dataset.getId()); @@ -1433,6 +1432,7 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp){ - return null; + public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp){ } }; } diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDataverseCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDataverseCommandTest.java index 13b60f875d5..7a27625f7b8 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDataverseCommandTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDataverseCommandTest.java @@ -221,8 +221,7 @@ public Future indexDataverse(Dataverse dataverse){ } @Override - public Future indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp){ - return null; + public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp){ } @Override public Future indexDataverseInNewTransaction(Dataverse dataverse){ @@ -230,8 +229,7 @@ public Future indexDataverseInNewTransaction(Dataverse dataverse){ } @Override - public Future indexDatasetInNewTransaction(Long id){ - return null; + public void indexDatasetInNewTransaction(Long id){ } }; diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommandTest.java index 8b5556b70c9..a57beb172a6 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommandTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommandTest.java @@ -61,8 +61,7 @@ public AuthenticatedUser getAuthenticatedUser(String id) { public IndexServiceBean index() { return new IndexServiceBean() { @Override - public Future indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) { - return null; + public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) { } }; } diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommandTest.java index 1098b10a041..e882560c601 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommandTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommandTest.java @@ -60,8 +60,7 @@ public AuthenticatedUser getAuthenticatedUser(String id) { public IndexServiceBean index() { return new IndexServiceBean() { @Override - public Future indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) { - return null; + public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) { } }; } From 5006bcea3c7e3a61a6246dad8c593080a4e328ac Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 27 Apr 2023 15:10:56 +0200 Subject: [PATCH 09/59] removed previously added logger lines for debugging --- .../iq/dataverse/search/IndexServiceBean.java | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 4fad491cd01..18c38e04dc2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -390,9 +390,7 @@ public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUpe) Dataset next = getNextToIndex(id, dataset); // if there is an ongoing index job for this dataset, next is null (ongoing index job will reindex the newest version after current indexing finishes) while (next != null) { try { - logger.warning("indexing dataset " + id); indexDataset(next, doNormalSolrDocCleanUpe); - logger.warning("done indexing dataset " + id); } catch (SolrServerException | IOException e) { logger.warning("unable to index dataset " + id + ": " + e); } @@ -439,7 +437,6 @@ private Future doIndexDataset(Dataset dataset, boolean doNormalSolrDocCl List versions = dataset.getVersions(); List solrIdsOfFilesToDelete = new ArrayList<>(); for (DatasetVersion datasetVersion : versions) { - logger.warning("indexing version " + datasetVersion.getId()); Long versionDatabaseId = datasetVersion.getId(); String versionTitle = datasetVersion.getTitle(); String semanticVersion = datasetVersion.getSemanticVersion(); @@ -452,7 +449,6 @@ private Future doIndexDataset(Dataset dataset, boolean doNormalSolrDocCl debug.append("- semanticVersion-VersionState: " + semanticVersion + "-" + versionState + "\n"); List fileMetadatas = datasetVersion.getFileMetadatas(); List fileInfo = new ArrayList<>(); - logger.warning("iterating..."); for (FileMetadata fileMetadata : fileMetadatas) { String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); /** @@ -469,7 +465,6 @@ private Future doIndexDataset(Dataset dataset, boolean doNormalSolrDocCl solrIdsOfFilesToDelete.add(solrIdOfPublishedFile); fileInfo.add(fileMetadata.getDataFile().getId() + ":" + fileMetadata.getLabel()); } - logger.warning("adding more ids..."); try { /** * Preemptively delete *all* Solr documents for files associated @@ -503,12 +498,10 @@ private Future doIndexDataset(Dataset dataset, boolean doNormalSolrDocCl debug.append("- files: " + numFiles + " " + fileInfo.toString() + "\n"); } debug.append("numPublishedVersions: " + numPublishedVersions + "\n"); - logger.warning("cleanup..."); if (doNormalSolrDocCleanUp) { IndexResponse resultOfAttemptToPremptivelyDeletePublishedFiles = solrIndexService.deleteMultipleSolrIds(solrIdsOfFilesToDelete); debug.append("result of attempt to premptively deleted published files before reindexing: " + resultOfAttemptToPremptivelyDeletePublishedFiles + "\n"); } - logger.warning("init..."); DatasetVersion latestVersion = dataset.getLatestVersion(); String latestVersionStateString = latestVersion.getVersionState().name(); DatasetVersion.VersionState latestVersionState = latestVersion.getVersionState(); @@ -763,8 +756,6 @@ private String deleteDraftFiles(List solrDocIdsForDraftFilesToDelete) { } private IndexResponse indexDatasetPermissions(Dataset dataset) { - - logger.warning("indexing permissions"); boolean disabledForDebugging = false; if (disabledForDebugging) { /** @@ -775,19 +766,15 @@ private IndexResponse indexDatasetPermissions(Dataset dataset) { return new IndexResponse("permissions indexing disabled for debugging"); } IndexResponse indexResponse = solrIndexService.indexPermissionsOnSelfAndChildren(dataset); - logger.warning("done indexing permissions"); return indexResponse; } private String addOrUpdateDataset(IndexableDataset indexableDataset) throws SolrServerException, IOException { - logger.warning("addOrUpdateDataset"); String result = addOrUpdateDataset(indexableDataset, null); - logger.warning("addOrUpdateDataset done"); return result; } - public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set datafilesInDraftVersion) throws SolrServerException, IOException { - logger.warning("toSolrDocs"); + public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set datafilesInDraftVersion) throws SolrServerException, IOException { IndexableDataset.DatasetState state = indexableDataset.getDatasetState(); Dataset dataset = indexableDataset.getDatasetVersion().getDataset(); logger.fine("adding or updating Solr document for dataset id " + dataset.getId()); @@ -1432,7 +1419,6 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set Date: Thu, 27 Apr 2023 15:30:53 +0200 Subject: [PATCH 10/59] simplified ingest one file method --- .../iq/dataverse/datasetutility/AddReplaceFileHelper.java | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index f41550d8abd..02cd19c37af 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -1945,12 +1945,6 @@ private boolean step_100_startIngestJobs(){ @Asynchronous private void asyncIngestOneFile(Long id, AuthenticatedUser user) { - // prevent unresponsive servers with high cpu load by ingesting one at a time - ingestOneFileAtATime(id, user); - } - - synchronized private void ingestOneFileAtATime(Long id, AuthenticatedUser user) { - // query by id -> when flush-mode is auto, flush is on query, we make sure that the roles assignment added at create is flushed DataFile dataFile = fileService.find(id); if (dataFile.isIngestScheduled()) { ingestService.startIngestJobs(Arrays.asList(dataFile), user); From e12ac79bd64a827821574a53a975ebb4f39e6a64 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 27 Apr 2023 18:56:32 +0200 Subject: [PATCH 11/59] reverted file ingesting change --- .../datasetutility/AddReplaceFileHelper.java | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index 02cd19c37af..9a82415b7f9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -1924,32 +1924,20 @@ private boolean step_100_startIngestJobs(){ } // Should only be one file in the list - Long id = null; - if (finalFileList.size() == 1) { - id = finalFileList.get(0).getId(); - } setNewlyAddedFiles(finalFileList); // clear old file list // finalFileList.clear(); - if (!multifile && id != null) { + if (!multifile) { msg("pre ingest start"); // start the ingest! - asyncIngestOneFile(id, dvRequest.getAuthenticatedUser()); + ingestService.startIngestJobsForDataset(dataset, dvRequest.getAuthenticatedUser()); msg("post ingest start"); } return true; } - - @Asynchronous - private void asyncIngestOneFile(Long id, AuthenticatedUser user) { - DataFile dataFile = fileService.find(id); - if (dataFile.isIngestScheduled()) { - ingestService.startIngestJobs(Arrays.asList(dataFile), user); - } - } private void msg(String m){ logger.fine(m); From baebc68973a00e6a49cd66376c6ac39371d8f974 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 27 Apr 2023 19:43:43 +0200 Subject: [PATCH 12/59] added sleep for search to failing integration tests --- src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java | 1 + src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java index ed4d255ab74..5dd91951d1a 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java @@ -1106,6 +1106,7 @@ public void testAccessFacet() { long fileId = JsonPath.from(addResponse.body().asString()).getLong("data.files[0].dataFile.id"); + UtilIT.sleepForSearch("id:datafile_" + fileId + "_draft", apiToken, "", UtilIT.MAXIMUM_INGEST_LOCK_DURATION); Response searchShouldFindNothingBecauseUnpublished = UtilIT.search("id:datafile_" + fileId + "_draft", apiToken); searchShouldFindNothingBecauseUnpublished.prettyPrint(); searchShouldFindNothingBecauseUnpublished.then().assertThat() diff --git a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java index bc3f9471107..18647195409 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java @@ -72,6 +72,7 @@ public void testSearchPermisions() throws InterruptedException { Integer datasetId1 = UtilIT.getDatasetIdFromResponse(createDataset1Response); + UtilIT.sleepForSearch("id:dataset_" + datasetId1 + "_draft", apiToken1, "", UtilIT.MAXIMUM_INGEST_LOCK_DURATION); Response shouldBeVisibleToUser1 = UtilIT.search("id:dataset_" + datasetId1 + "_draft", apiToken1); shouldBeVisibleToUser1.prettyPrint(); shouldBeVisibleToUser1.then().assertThat() @@ -1220,6 +1221,7 @@ public void testGeospatialSearch() { String datasetPid = JsonPath.from(createDatasetResponse.getBody().asString()).getString("data.persistentId"); // Plymouth rock (41.9580775,-70.6621063) is within 50 km of Cambridge. Hit. + UtilIT.sleepForSearch("id:dataset_" + datasetId + "_draft", apiToken, "&show_entity_ids=true&geo_point=41.9580775,-70.6621063&geo_radius=50", UtilIT.MAXIMUM_INGEST_LOCK_DURATION); Response search1 = UtilIT.search("id:dataset_" + datasetId + "_draft", apiToken, "&show_entity_ids=true&geo_point=41.9580775,-70.6621063&geo_radius=50"); search1.prettyPrint(); search1.then().assertThat() From 1d8b526516c2be2d24670f05679fe36b76c96c3d Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Fri, 28 Apr 2023 12:01:35 +0200 Subject: [PATCH 13/59] integration tests fixes --- .../edu/harvard/iq/dataverse/api/FilesIT.java | 1 - .../harvard/iq/dataverse/api/SearchIT.java | 4 +- .../edu/harvard/iq/dataverse/api/UtilIT.java | 38 ++++++++++++++----- 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java index 5dd91951d1a..ed4d255ab74 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java @@ -1106,7 +1106,6 @@ public void testAccessFacet() { long fileId = JsonPath.from(addResponse.body().asString()).getLong("data.files[0].dataFile.id"); - UtilIT.sleepForSearch("id:datafile_" + fileId + "_draft", apiToken, "", UtilIT.MAXIMUM_INGEST_LOCK_DURATION); Response searchShouldFindNothingBecauseUnpublished = UtilIT.search("id:datafile_" + fileId + "_draft", apiToken); searchShouldFindNothingBecauseUnpublished.prettyPrint(); searchShouldFindNothingBecauseUnpublished.then().assertThat() diff --git a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java index 18647195409..0bc1564b881 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java @@ -72,7 +72,6 @@ public void testSearchPermisions() throws InterruptedException { Integer datasetId1 = UtilIT.getDatasetIdFromResponse(createDataset1Response); - UtilIT.sleepForSearch("id:dataset_" + datasetId1 + "_draft", apiToken1, "", UtilIT.MAXIMUM_INGEST_LOCK_DURATION); Response shouldBeVisibleToUser1 = UtilIT.search("id:dataset_" + datasetId1 + "_draft", apiToken1); shouldBeVisibleToUser1.prettyPrint(); shouldBeVisibleToUser1.then().assertThat() @@ -748,6 +747,7 @@ public void testIdentifier() { System.out.println("identifier: " + identifier); String searchPart = identifier.replace("FK2/", ""); + UtilIT.sleepForReindex(identifier, apiToken, 5); Response searchUnpublished = UtilIT.search(searchPart, apiToken); searchUnpublished.prettyPrint(); searchUnpublished.then().assertThat() @@ -763,6 +763,7 @@ public void testIdentifier() { .statusCode(OK.getStatusCode()); searchPart = identifier.replace("FK2/", ""); + UtilIT.sleepForReindex(identifier, apiToken, 5); Response searchTargeted = UtilIT.search("dsPersistentId:" + searchPart, apiToken); searchTargeted.prettyPrint(); searchTargeted.then().assertThat() @@ -1221,7 +1222,6 @@ public void testGeospatialSearch() { String datasetPid = JsonPath.from(createDatasetResponse.getBody().asString()).getString("data.persistentId"); // Plymouth rock (41.9580775,-70.6621063) is within 50 km of Cambridge. Hit. - UtilIT.sleepForSearch("id:dataset_" + datasetId + "_draft", apiToken, "&show_entity_ids=true&geo_point=41.9580775,-70.6621063&geo_radius=50", UtilIT.MAXIMUM_INGEST_LOCK_DURATION); Response search1 = UtilIT.search("id:dataset_" + datasetId + "_draft", apiToken, "&show_entity_ids=true&geo_point=41.9580775,-70.6621063&geo_radius=50"); search1.prettyPrint(); search1.then().assertThat() diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 642480cf11c..209bc61e7fa 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -1898,6 +1898,7 @@ static Response exportDataverse(String identifier, String apiToken) { } static Response search(String query, String apiToken, String parameterString) { + sleepForDatasetIndex(query, apiToken); RequestSpecification requestSpecification = given(); if (apiToken != null) { requestSpecification = given() @@ -1906,11 +1907,24 @@ static Response search(String query, String apiToken, String parameterString) { return requestSpecification.get("/api/search?q=" + query + parameterString); } + private static void sleepForDatasetIndex(String query, String apiToken) { + if (query.contains("id:dataset") || query.contains("id:datafile")) { + String[] splitted = query.split("_"); + if (splitted.length >= 2) { + boolean ok = UtilIT.sleepForReindex(String.valueOf(splitted[1]), apiToken, 5); + if (!ok) { + logger.info("Still indexing after 5 seconds"); + } + } + } + } + static Response search(String query, String apiToken) { return search(query, apiToken, ""); } static Response searchAndShowFacets(String query, String apiToken) { + sleepForDatasetIndex(query, apiToken); RequestSpecification requestSpecification = given(); if (apiToken != null) { requestSpecification = given() @@ -2485,11 +2499,15 @@ static boolean sleepForReindex(String idOrPersistentId, String apiToken, int dur int repeats = durationInSeconds * (1000 / sleepStep); boolean stale=true; do { - timestampResponse = UtilIT.getDatasetTimestamps(idOrPersistentId, apiToken); - System.out.println(timestampResponse.body().asString()); - String hasStaleIndex = timestampResponse.body().jsonPath().getString("data.hasStaleIndex"); - System.out.println(hasStaleIndex); - stale = Boolean.parseBoolean(hasStaleIndex); + try { + timestampResponse = UtilIT.getDatasetTimestamps(idOrPersistentId, apiToken); + System.out.println(timestampResponse.body().asString()); + String hasStaleIndex = timestampResponse.body().jsonPath().getString("data.hasStaleIndex"); + System.out.println(hasStaleIndex); + stale = Boolean.parseBoolean(hasStaleIndex); + } catch (Exception e) { + stale = false; + } try { Thread.sleep(sleepStep); @@ -2635,10 +2653,12 @@ static Response getDatasetTimestamps(String idOrPersistentId, String apiToken) { queryParams = "?persistentId=" + idOrPersistentId; } - Response response = given() - .header(API_TOKEN_HTTP_HEADER, apiToken) - .get("api/datasets/" + idInPath + "/timestamps" + queryParams); - return response; + RequestSpecification requestSpecification = given(); + if (apiToken != null) { + requestSpecification = given() + .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken); + } + return requestSpecification.get("api/datasets/" + idInPath + "/timestamps" + queryParams); } static Response exportOaiSet(String setName) { From bc95cf36772b53ba83da8334f09e5fcba304ba13 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Fri, 28 Apr 2023 16:59:21 +0200 Subject: [PATCH 14/59] integration test fix --- src/test/java/edu/harvard/iq/dataverse/api/MoveIT.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MoveIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MoveIT.java index dfe23c7d80a..ddb08e82165 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/MoveIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/MoveIT.java @@ -301,6 +301,7 @@ public void testMoveLinkedDataset() { .statusCode(OK.getStatusCode()) .body("feed.entry[0].id", CoreMatchers.endsWith(datasetPid)); + UtilIT.sleepForReindex(datasetPid, superuserApiToken, 5); Response getLinksAfter = UtilIT.getDatasetLinks(datasetPid, superuserApiToken); getLinksAfter.prettyPrint(); getLinksAfter.then().assertThat() From b4a506b62820c145d3962a49566171a77fb08981 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Tue, 2 May 2023 15:38:08 +0200 Subject: [PATCH 15/59] improvements after review --- .../iq/dataverse/search/IndexServiceBean.java | 21 +++++++------------ .../edu/harvard/iq/dataverse/api/UtilIT.java | 14 +++++-------- 2 files changed, 12 insertions(+), 23 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 18c38e04dc2..3bdfdb5b52b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -24,6 +24,7 @@ import edu.harvard.iq.dataverse.PermissionServiceBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUserServiceBean; +import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.DataAccessRequest; import edu.harvard.iq.dataverse.dataaccess.StorageIO; @@ -392,7 +393,9 @@ public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUpe) try { indexDataset(next, doNormalSolrDocCleanUpe); } catch (SolrServerException | IOException e) { - logger.warning("unable to index dataset " + id + ": " + e); + String failureLogText = "Indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); + failureLogText += "\r\n" + e.getLocalizedMessage(); + LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, dataset); } next = getNextToIndex(id, null); // if dataset was not changed during the indexing (and no new job was requested), next is null and loop can be stopped } @@ -412,13 +415,12 @@ public void indexDvObject(DvObject objectIn) throws SolrServerException, IOExce } } - private Future indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { - Future result = doIndexDataset(dataset, doNormalSolrDocCleanUp); + private void indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { + doIndexDataset(dataset, doNormalSolrDocCleanUp); updateLastIndexedTime(dataset.getId()); - return result; } - private Future doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { + private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { logger.fine("indexing dataset " + dataset.getId()); /** * @todo should we use solrDocIdentifierDataset or @@ -577,7 +579,6 @@ private Future doIndexDataset(Dataset dataset, boolean doNormalSolrDocCl String result = getDesiredCardState(desiredCards) + results.toString() + debug.toString(); logger.fine(result); indexDatasetPermissions(dataset); - return new AsyncResult<>(result); } else if (latestVersionState.equals(DatasetVersion.VersionState.DEACCESSIONED)) { desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, true); @@ -624,11 +625,9 @@ private Future doIndexDataset(Dataset dataset, boolean doNormalSolrDocCl String result = getDesiredCardState(desiredCards) + results.toString() + debug.toString(); logger.fine(result); indexDatasetPermissions(dataset); - return new AsyncResult<>(result); } else { String result = "No-op. Unexpected condition reached: No released version and latest version is neither draft nor deaccessioned"; logger.fine(result); - return new AsyncResult<>(result); } } else if (atLeastOnePublishedVersion == true) { results.append("Published versions found. ") @@ -681,7 +680,6 @@ private Future doIndexDataset(Dataset dataset, boolean doNormalSolrDocCl String result = getDesiredCardState(desiredCards) + results.toString() + debug.toString(); logger.fine(result); indexDatasetPermissions(dataset); - return new AsyncResult<>(result); } else if (latestVersionState.equals(DatasetVersion.VersionState.DRAFT)) { IndexableDataset indexableDraftVersion = new IndexableDataset(latestVersion); @@ -735,16 +733,13 @@ private Future doIndexDataset(Dataset dataset, boolean doNormalSolrDocCl String result = getDesiredCardState(desiredCards) + results.toString() + debug.toString(); logger.fine(result); indexDatasetPermissions(dataset); - return new AsyncResult<>(result); } else { String result = "No-op. Unexpected condition reached: There is at least one published version but the latest version is neither published nor draft"; logger.fine(result); - return new AsyncResult<>(result); } } else { String result = "No-op. Unexpected condition reached: Has a version been published or not?"; logger.fine(result); - return new AsyncResult<>(result); } } @@ -1456,8 +1451,6 @@ private void updateLastIndexedTimeInNewTransaction(Long id) { DvObject dvObjectToModify = em.find(DvObject.class, id); dvObjectToModify.setIndexTime(new Timestamp(new Date().getTime())); dvObjectToModify = em.merge(dvObjectToModify); - em.flush(); - dvObjectToModify = null; } /** diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 209bc61e7fa..4c50dfe670e 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -2499,15 +2499,11 @@ static boolean sleepForReindex(String idOrPersistentId, String apiToken, int dur int repeats = durationInSeconds * (1000 / sleepStep); boolean stale=true; do { - try { - timestampResponse = UtilIT.getDatasetTimestamps(idOrPersistentId, apiToken); - System.out.println(timestampResponse.body().asString()); - String hasStaleIndex = timestampResponse.body().jsonPath().getString("data.hasStaleIndex"); - System.out.println(hasStaleIndex); - stale = Boolean.parseBoolean(hasStaleIndex); - } catch (Exception e) { - stale = false; - } + timestampResponse = UtilIT.getDatasetTimestamps(idOrPersistentId, apiToken); + System.out.println(timestampResponse.body().asString()); + String hasStaleIndex = timestampResponse.body().jsonPath().getString("data.hasStaleIndex"); + System.out.println(hasStaleIndex); + stale = Boolean.parseBoolean(hasStaleIndex); try { Thread.sleep(sleepStep); From de944bf9f69a1b0f86a660b6036b8da8601c0254 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Tue, 2 May 2023 16:09:28 +0200 Subject: [PATCH 16/59] ingest does not block updating dataset anymore --- .../iq/dataverse/PermissionServiceBean.java | 15 +++++++++++++++ .../command/impl/UpdateDatasetVersionCommand.java | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index 8f7f53de1a2..5d72bd225d2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -750,6 +750,21 @@ else if (dataset.isLockedFor(DatasetLock.Reason.InReview)) { } } } + + public void checkUpdateDatasetVersionLock(Dataset dataset, DataverseRequest dataverseRequest, Command command) throws IllegalCommandException { + boolean locked = false; + if (dataset.isLocked()) { + for (final DatasetLock lock: dataset.getLocks()) { + if (lock.getReason() != DatasetLock.Reason.Ingest) { + locked = true; + break; + } + } + } + if (locked) { + checkEditDatasetLock(dataset, dataverseRequest, command); + } + } public void checkPublishDatasetLock(Dataset dataset, DataverseRequest dataverseRequest, Command command) throws IllegalCommandException { if (dataset.isLocked()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java index cdc9f275895..12a5d8611f4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java @@ -96,7 +96,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { } Dataset theDataset = getDataset(); - ctxt.permissions().checkEditDatasetLock(theDataset, getRequest(), this); + ctxt.permissions().checkUpdateDatasetVersionLock(theDataset, getRequest(), this); Dataset savedDataset = null; try { From a119fd7d2506cd73245814608347d22f4c71c6e1 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Tue, 2 May 2023 16:16:51 +0200 Subject: [PATCH 17/59] reverted anneeded flush --- .../dataverse/engine/command/impl/CreateDataverseCommand.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDataverseCommand.java index cc5a57fa9a4..cece4230eed 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDataverseCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDataverseCommand.java @@ -170,9 +170,6 @@ public Dataverse execute(CommandContext ctxt) throws CommandException { @Override public boolean onSuccess(CommandContext ctxt, Object r) { - if (ctxt.em() != null) { - ctxt.em().flush(); // in the case when the root dataverse is created, IndexServiceBean need to see the root dataverse at initialization - } return ctxt.dataverses().index((Dataverse) r); } From 01b83e0b166ef574e7efba07e7ade1798d8f7a98 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Tue, 2 May 2023 19:08:09 +0200 Subject: [PATCH 18/59] ingest lock removal by lock id --- .../iq/dataverse/DatasetServiceBean.java | 23 +++++++++++ .../iq/dataverse/ingest/IngestMessage.java | 9 +++++ .../dataverse/ingest/IngestMessageBean.java | 38 +++++++------------ .../dataverse/ingest/IngestServiceBean.java | 3 +- 4 files changed, 47 insertions(+), 26 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index bf36fb469bd..4939454d0c0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -448,6 +448,29 @@ public void removeDatasetLocks(Dataset dataset, DatasetLock.Reason aReason) { } } + /** + * Removes {@link DatasetLock} for the dataset whose id is passed and lock ID + * is {@code lockId}. + * @param dataset the dataset whose locks (for {@code aReason}) will be removed. + * @param aReason The reason of the locks that will be removed. + */ + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public void removeDatasetLock(Dataset dataset, Long lockId) { + if ( dataset != null ) { + new HashSet<>(dataset.getLocks()).stream() + .filter( l -> l.getId() == lockId ) + .forEach( lock -> { + lock = em.merge(lock); + dataset.removeLock(lock); + + AuthenticatedUser user = lock.getUser(); + user.getDatasetLocks().remove(lock); + + em.remove(lock); + }); + } + } + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public void updateDatasetLock(DatasetLock datasetLock) { em.merge(datasetLock); diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessage.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessage.java index e9923012fad..cf00bc7a4ad 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessage.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessage.java @@ -59,6 +59,7 @@ public IngestMessage(int messageLevel, Long authenticatedUserId) { private String datasetVersionNumber; private List datafile_ids; private Long authenticatedUserId; + private Long lockId; public String getVersionNote() { return versionNote; @@ -123,4 +124,12 @@ public void addFileId(Long file_id) { public Long getAuthenticatedUserId() { return authenticatedUserId; } + + public void setLockId(Long lockId) { + this.lockId = lockId; + } + + public Long getLockId() { + return lockId; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java index b029c0c97c5..e79ccd0740f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java @@ -23,7 +23,6 @@ import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; -import edu.harvard.iq.dataverse.util.BundleUtil; import java.sql.Timestamp; import java.time.Instant; @@ -69,7 +68,6 @@ public IngestMessageBean() { public void onMessage(Message message) { IngestMessage ingestMessage = null; - Long datafile_id = null; AuthenticatedUser authenticatedUser = null; try { @@ -78,8 +76,8 @@ public void onMessage(Message message) { authenticatedUser = authenticationServiceBean.findByID(ingestMessage.getAuthenticatedUserId()); - Iterator iter = ingestMessage.getFileIds().iterator(); - datafile_id = null; + Iterator iter = ingestMessage.getFileIds().iterator(); + Long datafile_id = null; boolean ingestWithErrors = false; @@ -87,7 +85,7 @@ public void onMessage(Message message) { sbIngestedFiles.append("
    "); while (iter.hasNext()) { - datafile_id = (Long) iter.next(); + datafile_id = iter.next(); logger.fine("Start ingest job;"); try { @@ -139,11 +137,10 @@ public void onMessage(Message message) { logger.info("trying to save datafile and the failed ingest report, id=" + datafile_id); datafile = datafileService.save(datafile); - Dataset dataset = datafile.getOwner(); - if (dataset != null && dataset.getId() != null) { + if (ingestMessage.getDatasetId() != null) { //logger.info("attempting to remove dataset lock for dataset " + dataset.getId()); //datasetService.removeDatasetLock(dataset.getId()); - ingestService.sendFailNotification(dataset.getId()); + ingestService.sendFailNotification(ingestMessage.getDatasetId()); } } } @@ -153,21 +150,6 @@ public void onMessage(Message message) { sbIngestedFiles.append("
"); Long objectId = null; - - // Remove the dataset lock: - // (note that the assumption here is that all of the datafiles - // packed into this IngestMessage belong to the same dataset) - if (datafile_id != null) { - DataFile datafile = datafileService.find(datafile_id); - if (datafile != null) { - Dataset dataset = datafile.getOwner(); - objectId = dataset.getId(); - if (dataset != null && dataset.getId() != null) { - datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.Ingest); - } - } - } - userNotificationService.sendNotification( authenticatedUser, Timestamp.from(Instant.now()), @@ -182,9 +164,15 @@ public void onMessage(Message message) { ex.printStackTrace(); // error in getting object from message; can't send e-mail } finally { - // when we're done, go ahead and remove the lock (not yet) + // when we're done, go ahead and remove the lock try { - //datasetService.removeDatasetLock( ingestMessage.getDatasetId() ); + // Remove the dataset lock: + // (note that the assumption here is that all of the datafiles + // packed into this IngestMessage belong to the same dataset) + Dataset dataset = datasetService.find(ingestMessage.getDatasetId()); + if (dataset != null && dataset.getId() != null) { + datasetService.removeDatasetLock(dataset, ingestMessage.getLockId()); + } } catch (Exception ex) { ex.printStackTrace(); // application was unable to remove the datasetLock } diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index 0d9f8ce375f..238ae2b1681 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -518,7 +518,7 @@ public String startIngestJobs(List dataFiles, AuthenticatedUser user) if (count > 0) { String info = "Ingest of " + count + " tabular data file(s) is in progress."; logger.info(info); - datasetService.addDatasetLock(scheduledFiles.get(0).getOwner().getId(), + DatasetLock lock = datasetService.addDatasetLock(scheduledFiles.get(0).getOwner().getId(), DatasetLock.Reason.Ingest, (user != null) ? user.getId() : null, info); @@ -540,6 +540,7 @@ public int compare(DataFile d1, DataFile d2) { for (int i = 0; i < count; i++) { ingestMessage.addFileId(scheduledFilesArray[i].getId()); } + ingestMessage.setLockId(lock.getId()); QueueConnection conn = null; QueueSession session = null; From 60a604538295952ad917729c61d58e021f262763 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Wed, 3 May 2023 10:51:52 +0200 Subject: [PATCH 19/59] improved ingest locking with making the ingest bean clustered singleton with retaking the lock if needed --- pom.xml | 5 ++++ .../iq/dataverse/DatasetServiceBean.java | 23 ------------------- .../iq/dataverse/ingest/IngestMessage.java | 10 ++++---- .../dataverse/ingest/IngestMessageBean.java | 15 +++++++++++- .../dataverse/ingest/IngestServiceBean.java | 4 ++-- 5 files changed, 26 insertions(+), 31 deletions(-) diff --git a/pom.xml b/pom.xml index 5f514819947..ca5e978f2c6 100644 --- a/pom.xml +++ b/pom.xml @@ -182,6 +182,11 @@ provided + + fish.payara.api + payara-api + provided + com.sun.mail jakarta.mail diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 4939454d0c0..bf36fb469bd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -448,29 +448,6 @@ public void removeDatasetLocks(Dataset dataset, DatasetLock.Reason aReason) { } } - /** - * Removes {@link DatasetLock} for the dataset whose id is passed and lock ID - * is {@code lockId}. - * @param dataset the dataset whose locks (for {@code aReason}) will be removed. - * @param aReason The reason of the locks that will be removed. - */ - @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void removeDatasetLock(Dataset dataset, Long lockId) { - if ( dataset != null ) { - new HashSet<>(dataset.getLocks()).stream() - .filter( l -> l.getId() == lockId ) - .forEach( lock -> { - lock = em.merge(lock); - dataset.removeLock(lock); - - AuthenticatedUser user = lock.getUser(); - user.getDatasetLocks().remove(lock); - - em.remove(lock); - }); - } - } - @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public void updateDatasetLock(DatasetLock datasetLock) { em.merge(datasetLock); diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessage.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessage.java index cf00bc7a4ad..df25c300248 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessage.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessage.java @@ -59,7 +59,7 @@ public IngestMessage(int messageLevel, Long authenticatedUserId) { private String datasetVersionNumber; private List datafile_ids; private Long authenticatedUserId; - private Long lockId; + private String info; public String getVersionNote() { return versionNote; @@ -125,11 +125,11 @@ public Long getAuthenticatedUserId() { return authenticatedUserId; } - public void setLockId(Long lockId) { - this.lockId = lockId; + public void setInfo(String info) { + this.info = info; } - public Long getLockId() { - return lockId; + public String getInfo() { + return info; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java index e79ccd0740f..36c78429bc1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java @@ -30,7 +30,10 @@ import java.util.logging.Logger; import javax.ejb.ActivationConfigProperty; import javax.ejb.EJB; +import javax.ejb.Lock; +import javax.ejb.LockType; import javax.ejb.MessageDriven; +import javax.ejb.Singleton; import javax.ejb.TransactionAttribute; import javax.ejb.TransactionAttributeType; import javax.jms.JMSException; @@ -38,6 +41,7 @@ import javax.jms.MessageListener; import javax.jms.ObjectMessage; +import fish.payara.cluster.Clustered; /** * * This is an experimental, JMS-based implementation of asynchronous @@ -52,6 +56,8 @@ @ActivationConfigProperty(propertyName = "destinationType", propertyValue = "javax.jms.Queue") } ) +@Singleton +@Clustered @Lock(LockType.READ) public class IngestMessageBean implements MessageListener { private static final Logger logger = Logger.getLogger(IngestMessageBean.class.getCanonicalName()); @EJB DatasetServiceBean datasetService; @@ -65,6 +71,7 @@ public IngestMessageBean() { } @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED) + @Lock(LockType.WRITE) public void onMessage(Message message) { IngestMessage ingestMessage = null; @@ -74,6 +81,12 @@ public void onMessage(Message message) { ObjectMessage om = (ObjectMessage) message; ingestMessage = (IngestMessage) om.getObject(); + // if the lock was removed while an ingest was queued, ratake the lock + datasetService.addDatasetLock(ingestMessage.getDatasetId(), + DatasetLock.Reason.Ingest, + ingestMessage.getAuthenticatedUserId(), + ingestMessage.getInfo()); + authenticatedUser = authenticationServiceBean.findByID(ingestMessage.getAuthenticatedUserId()); Iterator iter = ingestMessage.getFileIds().iterator(); @@ -171,7 +184,7 @@ public void onMessage(Message message) { // packed into this IngestMessage belong to the same dataset) Dataset dataset = datasetService.find(ingestMessage.getDatasetId()); if (dataset != null && dataset.getId() != null) { - datasetService.removeDatasetLock(dataset, ingestMessage.getLockId()); + datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.Ingest); } } catch (Exception ex) { ex.printStackTrace(); // application was unable to remove the datasetLock diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index 238ae2b1681..2a69e63c23b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -518,7 +518,7 @@ public String startIngestJobs(List dataFiles, AuthenticatedUser user) if (count > 0) { String info = "Ingest of " + count + " tabular data file(s) is in progress."; logger.info(info); - DatasetLock lock = datasetService.addDatasetLock(scheduledFiles.get(0).getOwner().getId(), + datasetService.addDatasetLock(scheduledFiles.get(0).getOwner().getId(), DatasetLock.Reason.Ingest, (user != null) ? user.getId() : null, info); @@ -540,7 +540,7 @@ public int compare(DataFile d1, DataFile d2) { for (int i = 0; i < count; i++) { ingestMessage.addFileId(scheduledFilesArray[i].getId()); } - ingestMessage.setLockId(lock.getId()); + ingestMessage.setInfo(info); QueueConnection conn = null; QueueSession session = null; From 441b14131a85cf6f03798c157380db723ec7e766 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Wed, 3 May 2023 11:02:23 +0200 Subject: [PATCH 20/59] next to index and indexing now static variable made concurrent and final --- .../iq/dataverse/search/IndexServiceBean.java | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 3bdfdb5b52b..2401c1d9ce8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -56,6 +56,7 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Future; import java.util.function.Function; import java.util.logging.Logger; @@ -360,28 +361,28 @@ public void indexDatasetInNewTransaction(Long datasetId) { //Dataset dataset) { // nextToIndex contains datasets mapped by dataset id that were added for future indexing while the indexing was already ongoing for a given dataset // (if there already was a dataset scheduled for indexing, it is overwritten and only the most recently requested version is kept in the map) - private static Map nextToIndex = new HashMap<>(); + private static final Map NEXT_TO_INDEX = new ConcurrentHashMap<>(); // indexingNow is a set of dataset ids of datasets being indexed asynchronously right now - private static Set indexingNow = new HashSet<>(); + private static final Map INDEXING_NOW = new ConcurrentHashMap<>(); // When you pass null as Dataset parameter to this method, it indicates that the indexing of the dataset with "id" has finished // Pass non-null Dataset to schedule it for indexing synchronized private static Dataset getNextToIndex(Long id, Dataset d) { if (d == null) { // -> indexing of the dataset with id has finished - Dataset next = nextToIndex.remove(id); + Dataset next = NEXT_TO_INDEX.remove(id); if (next == null) { // -> no new indexing jobs were requested while indexing was ongoing // the job can be stopped now - indexingNow.remove(id); + INDEXING_NOW.remove(id); } return next; } // index job is requested for a non-null dataset - if (indexingNow.contains(id)) { // -> indexing job is already ongoing, and a new job should not be started by the current thread -> return null - nextToIndex.put(id, d); + if (INDEXING_NOW.containsKey(id)) { // -> indexing job is already ongoing, and a new job should not be started by the current thread -> return null + NEXT_TO_INDEX.put(id, d); return null; } // otherwise, start a new job - indexingNow.add(id); + INDEXING_NOW.put(id, true); return d; } From 69e2375601a64e78754ee0308dccd6aee70bbb8e Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Wed, 3 May 2023 16:18:20 +0200 Subject: [PATCH 21/59] clean up of the ingest message and added missing id to the message --- .../edu/harvard/iq/dataverse/api/Files.java | 2 +- .../iq/dataverse/ingest/IngestMessage.java | 58 +------------------ .../dataverse/ingest/IngestServiceBean.java | 9 +-- 3 files changed, 8 insertions(+), 61 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index 182e37a193e..4b620d494d9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -704,7 +704,7 @@ public Response reingest(@Context ContainerRequestContext crc, @PathParam("id") dataFile = fileService.save(dataFile); // queue the data ingest job for asynchronous execution: - String status = ingestService.startIngestJobs(new ArrayList<>(Arrays.asList(dataFile)), u); + String status = ingestService.startIngestJobs(dataset.getId(), new ArrayList<>(Arrays.asList(dataFile)), u); if (!StringUtil.isEmpty(status)) { // This most likely indicates some sort of a problem (for example, diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessage.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessage.java index df25c300248..b1c93e52ebd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessage.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessage.java @@ -19,7 +19,6 @@ */ package edu.harvard.iq.dataverse.ingest; -import edu.harvard.iq.dataverse.DataFile; import java.io.Serializable; import java.util.List; import java.util.ArrayList; @@ -32,51 +31,22 @@ * @author Leonid Andreev */ public class IngestMessage implements Serializable { - public static final int INGEST_MESAGE_LEVEL_ERROR = 1; - public static final int INGEST_MESAGE_LEVEL_INFO = 2; - /** Creates a new instance of IngestMessage */ - public IngestMessage() { - this(INGEST_MESAGE_LEVEL_INFO); - } - public IngestMessage(int messageLevel) { - this.messageLevel = messageLevel; + public IngestMessage() { datafile_ids = new ArrayList(); } - public IngestMessage(int messageLevel, Long authenticatedUserId) { - this.messageLevel = messageLevel; + public IngestMessage(Long authenticatedUserId) { this.authenticatedUserId = authenticatedUserId; datafile_ids = new ArrayList(); } - - private int messageLevel = INGEST_MESAGE_LEVEL_INFO; private Long datasetId; - private Long datasetVersionId; - private String versionNote; - private String datasetVersionNumber; private List datafile_ids; private Long authenticatedUserId; private String info; - public String getVersionNote() { - return versionNote; - } - - public void setVersionNote(String versionNote) { - this.versionNote = versionNote; - } - - public int getMessageLevel() { - return messageLevel; - } - - public void setMessageLevel(int messageLevel) { - this.messageLevel = messageLevel; - } - public Long getDatasetId() { return datasetId; } @@ -84,30 +54,6 @@ public Long getDatasetId() { public void setDatasetId(Long datasetId) { this.datasetId = datasetId; } - - public Long getDatasetVersionId() { - return datasetVersionId; - } - - public void setDatasetVersionId(Long datasetVersionId) { - this.datasetVersionId = datasetVersionId; - } - - public boolean sendInfoMessage() { - return messageLevel >= INGEST_MESAGE_LEVEL_INFO; - } - - public boolean sendErrorMessage() { - return messageLevel >= INGEST_MESAGE_LEVEL_ERROR; - } - - public String getDatasetVersionNumber() { - return datasetVersionNumber; - } - - public void setDatasetVersionNumber(String datasetVersionNumber) { - this.datasetVersionNumber = datasetVersionNumber; - } public List getFileIds() { return datafile_ids; diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index 2a69e63c23b..5f73a761a11 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -474,10 +474,10 @@ public void startIngestJobsForDataset(Dataset dataset, AuthenticatedUser user) { } } - startIngestJobs(scheduledFiles, user); + startIngestJobs(dataset.getId(), scheduledFiles, user); } - public String startIngestJobs(List dataFiles, AuthenticatedUser user) { + public String startIngestJobs(Long datasetId, List dataFiles, AuthenticatedUser user) { IngestMessage ingestMessage = null; StringBuilder sb = new StringBuilder(); @@ -518,7 +518,7 @@ public String startIngestJobs(List dataFiles, AuthenticatedUser user) if (count > 0) { String info = "Ingest of " + count + " tabular data file(s) is in progress."; logger.info(info); - datasetService.addDatasetLock(scheduledFiles.get(0).getOwner().getId(), + datasetService.addDatasetLock(datasetId, DatasetLock.Reason.Ingest, (user != null) ? user.getId() : null, info); @@ -536,10 +536,11 @@ public int compare(DataFile d1, DataFile d2) { } }); - ingestMessage = new IngestMessage(IngestMessage.INGEST_MESAGE_LEVEL_INFO, user.getId()); + ingestMessage = new IngestMessage(user.getId()); for (int i = 0; i < count; i++) { ingestMessage.addFileId(scheduledFilesArray[i].getId()); } + ingestMessage.setDatasetId(datasetId); ingestMessage.setInfo(info); QueueConnection conn = null; From ae35f474ddc79a572b31fa1fd421a5f9dbbfc42d Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 4 May 2023 12:00:15 +0200 Subject: [PATCH 22/59] reverted singleton and lock annotations in ingest message bean --- .../harvard/iq/dataverse/ingest/IngestMessageBean.java | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java index 36c78429bc1..24664eb9e91 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java @@ -30,18 +30,13 @@ import java.util.logging.Logger; import javax.ejb.ActivationConfigProperty; import javax.ejb.EJB; -import javax.ejb.Lock; -import javax.ejb.LockType; import javax.ejb.MessageDriven; -import javax.ejb.Singleton; import javax.ejb.TransactionAttribute; import javax.ejb.TransactionAttributeType; import javax.jms.JMSException; import javax.jms.Message; import javax.jms.MessageListener; import javax.jms.ObjectMessage; - -import fish.payara.cluster.Clustered; /** * * This is an experimental, JMS-based implementation of asynchronous @@ -56,8 +51,7 @@ @ActivationConfigProperty(propertyName = "destinationType", propertyValue = "javax.jms.Queue") } ) -@Singleton -@Clustered @Lock(LockType.READ) + public class IngestMessageBean implements MessageListener { private static final Logger logger = Logger.getLogger(IngestMessageBean.class.getCanonicalName()); @EJB DatasetServiceBean datasetService; @@ -71,7 +65,6 @@ public IngestMessageBean() { } @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED) - @Lock(LockType.WRITE) public void onMessage(Message message) { IngestMessage ingestMessage = null; From 84116dd30351a4fc4f9bed2e882dafcfc016ac25 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 4 May 2023 12:08:38 +0200 Subject: [PATCH 23/59] async indexing in a blocking thread pool executor --- .../search/AsyncIndexServiceBean.java | 162 ++++++++++++++++++ .../iq/dataverse/search/IndexServiceBean.java | 50 +----- 2 files changed, 168 insertions(+), 44 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/search/AsyncIndexServiceBean.java diff --git a/src/main/java/edu/harvard/iq/dataverse/search/AsyncIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/AsyncIndexServiceBean.java new file mode 100644 index 00000000000..b8f2663f824 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/search/AsyncIndexServiceBean.java @@ -0,0 +1,162 @@ +// Author: Eryk Kulikowski @ KU Leuven (2023). Apache 2.0 License + +package edu.harvard.iq.dataverse.search; + +import java.io.IOException; +import java.util.Map; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.RejectedExecutionHandler; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.logging.Logger; + +import javax.annotation.PostConstruct; +import javax.ejb.EJB; +import javax.ejb.Singleton; +import javax.inject.Inject; +import javax.inject.Named; + +import org.apache.solr.client.solrj.SolrServerException; +import org.eclipse.microprofile.config.inject.ConfigProperty; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.batch.util.LoggingUtil; +import fish.payara.cluster.Clustered; +import fish.payara.cluster.DistributedLockType; + +@Named +@Singleton +@Clustered(lock = DistributedLockType.LOCK_NONE) +public class AsyncIndexServiceBean { + + @EJB + IndexServiceBean indexService; + + @Inject + @ConfigProperty(name = "dataverse.search.index.maxpoolsize", defaultValue = "4") + private Integer maximumPoolSize; + + private ThreadPoolExecutor blockingExecutor; + + @PostConstruct + protected void setup() { + blockingExecutor = getBlockingThreadPoolExecutor(); + } + + private static final Logger logger = Logger.getLogger(AsyncIndexServiceBean.class.getCanonicalName()); + + // The following two variables are only used in the synchronized getNextToIndex + // method and do not need to be synchronized themselves. We use concurrent + // variants just in case... + + // nextToIndex contains datasets mapped by dataset id that were added for future + // indexing while the indexing was already ongoing for a given dataset + // (if there already was a dataset scheduled for indexing, it is overwritten and + // only the most recently requested version is kept in the map) + private static final Map NEXT_TO_INDEX = new ConcurrentHashMap<>(); + // indexingNow is a set of dataset ids of datasets being indexed asynchronously + // right now + private static final Map INDEXING_NOW = new ConcurrentHashMap<>(); // it is used as set + + // When you pass null as Dataset parameter to this method, it indicates that the + // indexing of the dataset with "id" has finished + // Pass non-null Dataset to schedule it for indexing + synchronized private static Dataset getNextToIndex(Long id, Dataset d) { + if (d == null) { // -> indexing of the dataset with id has finished + Dataset next = NEXT_TO_INDEX.remove(id); + if (next == null) { // -> no new indexing jobs were requested while indexing was ongoing + // the job can be stopped now + INDEXING_NOW.remove(id); + } + return next; + } + // index job is requested for a non-null dataset + if (INDEXING_NOW.containsKey(id)) { // -> indexing job is already ongoing, and a new job should not be started + NEXT_TO_INDEX.put(id, d); + // by the current thread -> return null + return null; + } + // otherwise, start a new job + INDEXING_NOW.put(id, true); + return d; + } + + /** + * Indexes a dataset asynchronously on a blocking thread pool executor. The + * executor has the maximum pool size as configured in microprofile setting + * "dataverse.search.index.maxpoolsize" with a default value of 4. When all + * threads are in use, this method will block until a thread becomes available. + * Otherwise, the indexing is executed immediately in the background on an + * available (or new) thread. + * + * Note that the commands implement a synchronized skipping mechanism. When an + * indexing job is already running for a given dataset in the background, the + * new command will not index that dataset, but will delegate the execution to + * the already running job. The running job will pick up the requested indexing + * once that it is finished with the ongoing indexing. If another indexing is + * requested before the ongoing indexing is finished, only the indexing that is + * requested most recently will be picked up for the next indexing. + * + * In other words: we can have at most one indexing ongoing for the given + * dataset, and at most one (most recent) request for reindexing of the same + * dataset. All requests that come between the most recent one and the ongoing + * one are skipped for the optimization reasons. For a more in depth discussion, + * see the pull request: https://github.com/IQSS/dataverse/pull/9558 + * + * @param dataset The dataset to be indexed. + * @param doNormalSolrDocCleanUp Flag for normal Solr doc clean up. + */ + public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) { + Runnable command = getIndexingRunnable(dataset, doNormalSolrDocCleanUp); + blockingExecutor.execute(command); + } + + private ThreadPoolExecutor getBlockingThreadPoolExecutor() { + BlockingQueue queue = new ArrayBlockingQueue<>(maximumPoolSize); + ThreadPoolExecutor blockingExecutor = new ThreadPoolExecutor(1, maximumPoolSize, 2, TimeUnit.MINUTES, queue); + blockingExecutor.setRejectedExecutionHandler(new RejectedExecutionHandler() { + @Override + public void rejectedExecution(Runnable runnable, ThreadPoolExecutor executor) { + try { + executor.getQueue().put(runnable); + if (executor.isShutdown()) { + throw new RejectedExecutionException("Indexing blocking executor is shutdown"); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RejectedExecutionException("Indexing blocking executor is interrupted", e); + } + } + }); + return blockingExecutor; + } + + private Runnable getIndexingRunnable(final Dataset dataset, final boolean doNormalSolrDocCleanUp) { + return new Runnable() { + @Override + public void run() { + Long id = dataset.getId(); + Dataset next = getNextToIndex(id, dataset); + // if there is an ongoing index job for this dataset, next is null (ongoing + // index job will reindex the newest version after current indexing finishes) + while (next != null) { + logger.fine("indexing dataset " + id); + try { + indexService.indexDataset(next, doNormalSolrDocCleanUp); + } catch (SolrServerException | IOException e) { + String failureLogText = "Indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + + next.getId().toString(); + failureLogText += "\r\n" + e.getLocalizedMessage(); + LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, next); + } + next = getNextToIndex(id, null); + // if during the indexing no new job was requested, next is null and loop can be + // stopped + } + } + }; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 2401c1d9ce8..3b228b4f35f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -144,6 +144,9 @@ public class IndexServiceBean { @EJB DatasetFieldServiceBean datasetFieldService; + @EJB + AsyncIndexServiceBean asyncIndexServiceBean; + public static final String solrDocIdentifierDataverse = "dataverse_"; public static final String solrDocIdentifierFile = "datafile_"; public static final String solrDocIdentifierDataset = "dataset_"; @@ -356,52 +359,11 @@ public void indexDatasetInNewTransaction(Long datasetId) { //Dataset dataset) { asyncIndexDataset(dataset, doNormalSolrDocCleanUp); dataset = null; } - - // The following two variables are only used in the synchronized getNextToIndex method and do not need to be synchronized themselves - // nextToIndex contains datasets mapped by dataset id that were added for future indexing while the indexing was already ongoing for a given dataset - // (if there already was a dataset scheduled for indexing, it is overwritten and only the most recently requested version is kept in the map) - private static final Map NEXT_TO_INDEX = new ConcurrentHashMap<>(); - // indexingNow is a set of dataset ids of datasets being indexed asynchronously right now - private static final Map INDEXING_NOW = new ConcurrentHashMap<>(); - - // When you pass null as Dataset parameter to this method, it indicates that the indexing of the dataset with "id" has finished - // Pass non-null Dataset to schedule it for indexing - synchronized private static Dataset getNextToIndex(Long id, Dataset d) { - if (d == null) { // -> indexing of the dataset with id has finished - Dataset next = NEXT_TO_INDEX.remove(id); - if (next == null) { // -> no new indexing jobs were requested while indexing was ongoing - // the job can be stopped now - INDEXING_NOW.remove(id); - } - return next; - } - // index job is requested for a non-null dataset - if (INDEXING_NOW.containsKey(id)) { // -> indexing job is already ongoing, and a new job should not be started by the current thread -> return null - NEXT_TO_INDEX.put(id, d); - return null; - } - // otherwise, start a new job - INDEXING_NOW.put(id, true); - return d; + public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) { + asyncIndexServiceBean.asyncIndexDataset(dataset, doNormalSolrDocCleanUp); } - @Asynchronous - public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUpe) { - Long id = dataset.getId(); - Dataset next = getNextToIndex(id, dataset); // if there is an ongoing index job for this dataset, next is null (ongoing index job will reindex the newest version after current indexing finishes) - while (next != null) { - try { - indexDataset(next, doNormalSolrDocCleanUpe); - } catch (SolrServerException | IOException e) { - String failureLogText = "Indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, dataset); - } - next = getNextToIndex(id, null); // if dataset was not changed during the indexing (and no new job was requested), next is null and loop can be stopped - } - } - public void asyncIndexDatasetList(List datasets, boolean doNormalSolrDocCleanUp) { for(Dataset dataset : datasets) { asyncIndexDataset(dataset, true); @@ -416,7 +378,7 @@ public void indexDvObject(DvObject objectIn) throws SolrServerException, IOExce } } - private void indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { + public void indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { doIndexDataset(dataset, doNormalSolrDocCleanUp); updateLastIndexedTime(dataset.getId()); } From 5c9831ba39814835488f50eae1b48c15d4097c4f Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 4 May 2023 12:57:50 +0200 Subject: [PATCH 24/59] removed the bolocking thread pool executor from the async ingest bean --- .../search/AsyncIndexServiceBean.java | 93 ++++--------------- 1 file changed, 20 insertions(+), 73 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/AsyncIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/AsyncIndexServiceBean.java index b8f2663f824..5a785625f9a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/AsyncIndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/AsyncIndexServiceBean.java @@ -4,23 +4,15 @@ import java.io.IOException; import java.util.Map; -import java.util.concurrent.ArrayBlockingQueue; -import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.RejectedExecutionException; -import java.util.concurrent.RejectedExecutionHandler; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; import java.util.logging.Logger; -import javax.annotation.PostConstruct; +import javax.ejb.Asynchronous; import javax.ejb.EJB; import javax.ejb.Singleton; -import javax.inject.Inject; import javax.inject.Named; import org.apache.solr.client.solrj.SolrServerException; -import org.eclipse.microprofile.config.inject.ConfigProperty; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.batch.util.LoggingUtil; @@ -35,17 +27,6 @@ public class AsyncIndexServiceBean { @EJB IndexServiceBean indexService; - @Inject - @ConfigProperty(name = "dataverse.search.index.maxpoolsize", defaultValue = "4") - private Integer maximumPoolSize; - - private ThreadPoolExecutor blockingExecutor; - - @PostConstruct - protected void setup() { - blockingExecutor = getBlockingThreadPoolExecutor(); - } - private static final Logger logger = Logger.getLogger(AsyncIndexServiceBean.class.getCanonicalName()); // The following two variables are only used in the synchronized getNextToIndex @@ -85,12 +66,7 @@ synchronized private static Dataset getNextToIndex(Long id, Dataset d) { } /** - * Indexes a dataset asynchronously on a blocking thread pool executor. The - * executor has the maximum pool size as configured in microprofile setting - * "dataverse.search.index.maxpoolsize" with a default value of 4. When all - * threads are in use, this method will block until a thread becomes available. - * Otherwise, the indexing is executed immediately in the background on an - * available (or new) thread. + * Indexes a dataset asynchronously. * * Note that the commands implement a synchronized skipping mechanism. When an * indexing job is already running for a given dataset in the background, the @@ -109,54 +85,25 @@ synchronized private static Dataset getNextToIndex(Long id, Dataset d) { * @param dataset The dataset to be indexed. * @param doNormalSolrDocCleanUp Flag for normal Solr doc clean up. */ + @Asynchronous public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) { - Runnable command = getIndexingRunnable(dataset, doNormalSolrDocCleanUp); - blockingExecutor.execute(command); - } - - private ThreadPoolExecutor getBlockingThreadPoolExecutor() { - BlockingQueue queue = new ArrayBlockingQueue<>(maximumPoolSize); - ThreadPoolExecutor blockingExecutor = new ThreadPoolExecutor(1, maximumPoolSize, 2, TimeUnit.MINUTES, queue); - blockingExecutor.setRejectedExecutionHandler(new RejectedExecutionHandler() { - @Override - public void rejectedExecution(Runnable runnable, ThreadPoolExecutor executor) { - try { - executor.getQueue().put(runnable); - if (executor.isShutdown()) { - throw new RejectedExecutionException("Indexing blocking executor is shutdown"); - } - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new RejectedExecutionException("Indexing blocking executor is interrupted", e); - } + Long id = dataset.getId(); + Dataset next = getNextToIndex(id, dataset); + // if there is an ongoing index job for this dataset, next is null (ongoing + // index job will reindex the newest version after current indexing finishes) + while (next != null) { + logger.fine("indexing dataset " + id); + try { + indexService.indexDataset(next, doNormalSolrDocCleanUp); + } catch (SolrServerException | IOException e) { + String failureLogText = "Indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + + next.getId().toString(); + failureLogText += "\r\n" + e.getLocalizedMessage(); + LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, next); } - }); - return blockingExecutor; - } - - private Runnable getIndexingRunnable(final Dataset dataset, final boolean doNormalSolrDocCleanUp) { - return new Runnable() { - @Override - public void run() { - Long id = dataset.getId(); - Dataset next = getNextToIndex(id, dataset); - // if there is an ongoing index job for this dataset, next is null (ongoing - // index job will reindex the newest version after current indexing finishes) - while (next != null) { - logger.fine("indexing dataset " + id); - try { - indexService.indexDataset(next, doNormalSolrDocCleanUp); - } catch (SolrServerException | IOException e) { - String failureLogText = "Indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" - + next.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, next); - } - next = getNextToIndex(id, null); - // if during the indexing no new job was requested, next is null and loop can be - // stopped - } - } - }; + next = getNextToIndex(id, null); + // if during the indexing no new job was requested, next is null and loop can be + // stopped + } } } From 4737988d20f4f8281eb78fa14088b3d6f056eed1 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 4 May 2023 14:20:06 +0200 Subject: [PATCH 25/59] back to the blocking thread pool executor --- .../search/AsyncIndexServiceBean.java | 93 +++++++++++++++---- 1 file changed, 73 insertions(+), 20 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/AsyncIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/AsyncIndexServiceBean.java index 5a785625f9a..4e9d0342596 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/AsyncIndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/AsyncIndexServiceBean.java @@ -4,15 +4,23 @@ import java.io.IOException; import java.util.Map; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.BlockingQueue; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.RejectedExecutionHandler; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; import java.util.logging.Logger; -import javax.ejb.Asynchronous; +import javax.annotation.PostConstruct; import javax.ejb.EJB; import javax.ejb.Singleton; +import javax.inject.Inject; import javax.inject.Named; import org.apache.solr.client.solrj.SolrServerException; +import org.eclipse.microprofile.config.inject.ConfigProperty; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.batch.util.LoggingUtil; @@ -27,6 +35,17 @@ public class AsyncIndexServiceBean { @EJB IndexServiceBean indexService; + @Inject + @ConfigProperty(name = "dataverse.search.index.maxpoolsize", defaultValue = "10") + private Integer maximumPoolSize; + + private ThreadPoolExecutor blockingExecutor; + + @PostConstruct + protected void setup() { + blockingExecutor = getBlockingThreadPoolExecutor(); + } + private static final Logger logger = Logger.getLogger(AsyncIndexServiceBean.class.getCanonicalName()); // The following two variables are only used in the synchronized getNextToIndex @@ -66,7 +85,12 @@ synchronized private static Dataset getNextToIndex(Long id, Dataset d) { } /** - * Indexes a dataset asynchronously. + * Indexes a dataset asynchronously on a blocking thread pool executor. The + * executor has the maximum pool size as configured in microprofile setting + * "dataverse.search.index.maxpoolsize" with a default value of 10. When all + * threads are in use, this method will block until a thread becomes available. + * Otherwise, the indexing is executed immediately in the background on an + * available (or new) thread. * * Note that the commands implement a synchronized skipping mechanism. When an * indexing job is already running for a given dataset in the background, the @@ -85,25 +109,54 @@ synchronized private static Dataset getNextToIndex(Long id, Dataset d) { * @param dataset The dataset to be indexed. * @param doNormalSolrDocCleanUp Flag for normal Solr doc clean up. */ - @Asynchronous public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) { - Long id = dataset.getId(); - Dataset next = getNextToIndex(id, dataset); - // if there is an ongoing index job for this dataset, next is null (ongoing - // index job will reindex the newest version after current indexing finishes) - while (next != null) { - logger.fine("indexing dataset " + id); - try { - indexService.indexDataset(next, doNormalSolrDocCleanUp); - } catch (SolrServerException | IOException e) { - String failureLogText = "Indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" - + next.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, next); + Runnable command = getIndexingRunnable(dataset, doNormalSolrDocCleanUp); + blockingExecutor.execute(command); + } + + private ThreadPoolExecutor getBlockingThreadPoolExecutor() { + BlockingQueue queue = new ArrayBlockingQueue<>(maximumPoolSize); + ThreadPoolExecutor blockingExecutor = new ThreadPoolExecutor(1, maximumPoolSize, 2, TimeUnit.MINUTES, queue); + blockingExecutor.setRejectedExecutionHandler(new RejectedExecutionHandler() { + @Override + public void rejectedExecution(Runnable runnable, ThreadPoolExecutor executor) { + try { + executor.getQueue().put(runnable); + if (executor.isShutdown()) { + throw new RejectedExecutionException("Indexing blocking executor is shutdown"); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RejectedExecutionException("Indexing blocking executor is interrupted", e); + } } - next = getNextToIndex(id, null); - // if during the indexing no new job was requested, next is null and loop can be - // stopped - } + }); + return blockingExecutor; + } + + private Runnable getIndexingRunnable(final Dataset dataset, final boolean doNormalSolrDocCleanUp) { + return new Runnable() { + @Override + public void run() { + Long id = dataset.getId(); + Dataset next = getNextToIndex(id, dataset); + // if there is an ongoing index job for this dataset, next is null (ongoing + // index job will reindex the newest version after current indexing finishes) + while (next != null) { + logger.fine("indexing dataset " + id); + try { + indexService.indexDataset(next, doNormalSolrDocCleanUp); + } catch (SolrServerException | IOException e) { + String failureLogText = "Indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + + next.getId().toString(); + failureLogText += "\r\n" + e.getLocalizedMessage(); + LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, next); + } + next = getNextToIndex(id, null); + // if during the indexing no new job was requested, next is null and loop can be + // stopped + } + } + }; } } From e920e0f019f704ca45eadbd63fe8231b8811bd90 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 4 May 2023 15:12:37 +0200 Subject: [PATCH 26/59] async indes service bean is now serializable --- .../search/AsyncIndexServiceBean.java | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/AsyncIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/AsyncIndexServiceBean.java index 4e9d0342596..a45124581e6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/AsyncIndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/AsyncIndexServiceBean.java @@ -3,6 +3,7 @@ package edu.harvard.iq.dataverse.search; import java.io.IOException; +import java.io.Serializable; import java.util.Map; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; @@ -30,7 +31,7 @@ @Named @Singleton @Clustered(lock = DistributedLockType.LOCK_NONE) -public class AsyncIndexServiceBean { +public class AsyncIndexServiceBean implements Serializable { @EJB IndexServiceBean indexService; @@ -44,6 +45,8 @@ public class AsyncIndexServiceBean { @PostConstruct protected void setup() { blockingExecutor = getBlockingThreadPoolExecutor(); + nextToIndex = new ConcurrentHashMap<>(); + indexingNow = new ConcurrentHashMap<>(); } private static final Logger logger = Logger.getLogger(AsyncIndexServiceBean.class.getCanonicalName()); @@ -56,31 +59,31 @@ protected void setup() { // indexing while the indexing was already ongoing for a given dataset // (if there already was a dataset scheduled for indexing, it is overwritten and // only the most recently requested version is kept in the map) - private static final Map NEXT_TO_INDEX = new ConcurrentHashMap<>(); + private Map nextToIndex; // indexingNow is a set of dataset ids of datasets being indexed asynchronously // right now - private static final Map INDEXING_NOW = new ConcurrentHashMap<>(); // it is used as set + private Map indexingNow; // it is used as set // When you pass null as Dataset parameter to this method, it indicates that the // indexing of the dataset with "id" has finished // Pass non-null Dataset to schedule it for indexing - synchronized private static Dataset getNextToIndex(Long id, Dataset d) { + synchronized private Dataset getNextToIndex(Long id, Dataset d) { if (d == null) { // -> indexing of the dataset with id has finished - Dataset next = NEXT_TO_INDEX.remove(id); + Dataset next = nextToIndex.remove(id); if (next == null) { // -> no new indexing jobs were requested while indexing was ongoing // the job can be stopped now - INDEXING_NOW.remove(id); + indexingNow.remove(id); } return next; } // index job is requested for a non-null dataset - if (INDEXING_NOW.containsKey(id)) { // -> indexing job is already ongoing, and a new job should not be started - NEXT_TO_INDEX.put(id, d); + if (indexingNow.containsKey(id)) { // -> indexing job is already ongoing, and a new job should not be started + nextToIndex.put(id, d); // by the current thread -> return null return null; } // otherwise, start a new job - INDEXING_NOW.put(id, true); + indexingNow.put(id, true); return d; } From 9aa254f3a3ec34b2a27d0fdaf186ad8d81152f56 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 4 May 2023 16:38:06 +0200 Subject: [PATCH 27/59] reverted to previous solution --- .../search/AsyncIndexServiceBean.java | 165 ------------------ .../iq/dataverse/search/IndexServiceBean.java | 68 +++++++- 2 files changed, 63 insertions(+), 170 deletions(-) delete mode 100644 src/main/java/edu/harvard/iq/dataverse/search/AsyncIndexServiceBean.java diff --git a/src/main/java/edu/harvard/iq/dataverse/search/AsyncIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/AsyncIndexServiceBean.java deleted file mode 100644 index a45124581e6..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/search/AsyncIndexServiceBean.java +++ /dev/null @@ -1,165 +0,0 @@ -// Author: Eryk Kulikowski @ KU Leuven (2023). Apache 2.0 License - -package edu.harvard.iq.dataverse.search; - -import java.io.IOException; -import java.io.Serializable; -import java.util.Map; -import java.util.concurrent.ArrayBlockingQueue; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.RejectedExecutionException; -import java.util.concurrent.RejectedExecutionHandler; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; -import java.util.logging.Logger; - -import javax.annotation.PostConstruct; -import javax.ejb.EJB; -import javax.ejb.Singleton; -import javax.inject.Inject; -import javax.inject.Named; - -import org.apache.solr.client.solrj.SolrServerException; -import org.eclipse.microprofile.config.inject.ConfigProperty; - -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.batch.util.LoggingUtil; -import fish.payara.cluster.Clustered; -import fish.payara.cluster.DistributedLockType; - -@Named -@Singleton -@Clustered(lock = DistributedLockType.LOCK_NONE) -public class AsyncIndexServiceBean implements Serializable { - - @EJB - IndexServiceBean indexService; - - @Inject - @ConfigProperty(name = "dataverse.search.index.maxpoolsize", defaultValue = "10") - private Integer maximumPoolSize; - - private ThreadPoolExecutor blockingExecutor; - - @PostConstruct - protected void setup() { - blockingExecutor = getBlockingThreadPoolExecutor(); - nextToIndex = new ConcurrentHashMap<>(); - indexingNow = new ConcurrentHashMap<>(); - } - - private static final Logger logger = Logger.getLogger(AsyncIndexServiceBean.class.getCanonicalName()); - - // The following two variables are only used in the synchronized getNextToIndex - // method and do not need to be synchronized themselves. We use concurrent - // variants just in case... - - // nextToIndex contains datasets mapped by dataset id that were added for future - // indexing while the indexing was already ongoing for a given dataset - // (if there already was a dataset scheduled for indexing, it is overwritten and - // only the most recently requested version is kept in the map) - private Map nextToIndex; - // indexingNow is a set of dataset ids of datasets being indexed asynchronously - // right now - private Map indexingNow; // it is used as set - - // When you pass null as Dataset parameter to this method, it indicates that the - // indexing of the dataset with "id" has finished - // Pass non-null Dataset to schedule it for indexing - synchronized private Dataset getNextToIndex(Long id, Dataset d) { - if (d == null) { // -> indexing of the dataset with id has finished - Dataset next = nextToIndex.remove(id); - if (next == null) { // -> no new indexing jobs were requested while indexing was ongoing - // the job can be stopped now - indexingNow.remove(id); - } - return next; - } - // index job is requested for a non-null dataset - if (indexingNow.containsKey(id)) { // -> indexing job is already ongoing, and a new job should not be started - nextToIndex.put(id, d); - // by the current thread -> return null - return null; - } - // otherwise, start a new job - indexingNow.put(id, true); - return d; - } - - /** - * Indexes a dataset asynchronously on a blocking thread pool executor. The - * executor has the maximum pool size as configured in microprofile setting - * "dataverse.search.index.maxpoolsize" with a default value of 10. When all - * threads are in use, this method will block until a thread becomes available. - * Otherwise, the indexing is executed immediately in the background on an - * available (or new) thread. - * - * Note that the commands implement a synchronized skipping mechanism. When an - * indexing job is already running for a given dataset in the background, the - * new command will not index that dataset, but will delegate the execution to - * the already running job. The running job will pick up the requested indexing - * once that it is finished with the ongoing indexing. If another indexing is - * requested before the ongoing indexing is finished, only the indexing that is - * requested most recently will be picked up for the next indexing. - * - * In other words: we can have at most one indexing ongoing for the given - * dataset, and at most one (most recent) request for reindexing of the same - * dataset. All requests that come between the most recent one and the ongoing - * one are skipped for the optimization reasons. For a more in depth discussion, - * see the pull request: https://github.com/IQSS/dataverse/pull/9558 - * - * @param dataset The dataset to be indexed. - * @param doNormalSolrDocCleanUp Flag for normal Solr doc clean up. - */ - public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) { - Runnable command = getIndexingRunnable(dataset, doNormalSolrDocCleanUp); - blockingExecutor.execute(command); - } - - private ThreadPoolExecutor getBlockingThreadPoolExecutor() { - BlockingQueue queue = new ArrayBlockingQueue<>(maximumPoolSize); - ThreadPoolExecutor blockingExecutor = new ThreadPoolExecutor(1, maximumPoolSize, 2, TimeUnit.MINUTES, queue); - blockingExecutor.setRejectedExecutionHandler(new RejectedExecutionHandler() { - @Override - public void rejectedExecution(Runnable runnable, ThreadPoolExecutor executor) { - try { - executor.getQueue().put(runnable); - if (executor.isShutdown()) { - throw new RejectedExecutionException("Indexing blocking executor is shutdown"); - } - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new RejectedExecutionException("Indexing blocking executor is interrupted", e); - } - } - }); - return blockingExecutor; - } - - private Runnable getIndexingRunnable(final Dataset dataset, final boolean doNormalSolrDocCleanUp) { - return new Runnable() { - @Override - public void run() { - Long id = dataset.getId(); - Dataset next = getNextToIndex(id, dataset); - // if there is an ongoing index job for this dataset, next is null (ongoing - // index job will reindex the newest version after current indexing finishes) - while (next != null) { - logger.fine("indexing dataset " + id); - try { - indexService.indexDataset(next, doNormalSolrDocCleanUp); - } catch (SolrServerException | IOException e) { - String failureLogText = "Indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" - + next.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, next); - } - next = getNextToIndex(id, null); - // if during the indexing no new job was requested, next is null and loop can be - // stopped - } - } - }; - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 3b228b4f35f..5fb7dca79f7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -144,9 +144,6 @@ public class IndexServiceBean { @EJB DatasetFieldServiceBean datasetFieldService; - @EJB - AsyncIndexServiceBean asyncIndexServiceBean; - public static final String solrDocIdentifierDataverse = "dataverse_"; public static final String solrDocIdentifierFile = "datafile_"; public static final String solrDocIdentifierDataset = "dataset_"; @@ -360,8 +357,69 @@ public void indexDatasetInNewTransaction(Long datasetId) { //Dataset dataset) { dataset = null; } + // The following two variables are only used in the synchronized getNextToIndex method and do not need to be synchronized themselves + + // nextToIndex contains datasets mapped by dataset id that were added for future indexing while the indexing was already ongoing for a given dataset + // (if there already was a dataset scheduled for indexing, it is overwritten and only the most recently requested version is kept in the map) + private static final Map NEXT_TO_INDEX = new ConcurrentHashMap<>(); + // indexingNow is a set of dataset ids of datasets being indexed asynchronously right now + private static final Map INDEXING_NOW = new ConcurrentHashMap<>(); + + // When you pass null as Dataset parameter to this method, it indicates that the indexing of the dataset with "id" has finished + // Pass non-null Dataset to schedule it for indexing + synchronized private static Dataset getNextToIndex(Long id, Dataset d) { + if (d == null) { // -> indexing of the dataset with id has finished + Dataset next = NEXT_TO_INDEX.remove(id); + if (next == null) { // -> no new indexing jobs were requested while indexing was ongoing + // the job can be stopped now + INDEXING_NOW.remove(id); + } + return next; + } + // index job is requested for a non-null dataset + if (INDEXING_NOW.containsKey(id)) { // -> indexing job is already ongoing, and a new job should not be started by the current thread -> return null + NEXT_TO_INDEX.put(id, d); + return null; + } + // otherwise, start a new job + INDEXING_NOW.put(id, true); + return d; + } + + /** + * Indexes a dataset asynchronously. + * + * Note that this method implement a synchronized skipping mechanism. When an + * indexing job is already running for a given dataset in the background, the + * new call will not index that dataset, but will delegate the execution to + * the already running job. The running job will pick up the requested indexing + * once that it is finished with the ongoing indexing. If another indexing is + * requested before the ongoing indexing is finished, only the indexing that is + * requested most recently will be picked up for the next indexing. + * + * In other words: we can have at most one indexing ongoing for the given + * dataset, and at most one (most recent) request for reindexing of the same + * dataset. All requests that come between the most recent one and the ongoing + * one are skipped for the optimization reasons. For a more in depth discussion, + * see the pull request: https://github.com/IQSS/dataverse/pull/9558 + * + * @param dataset The dataset to be indexed. + * @param doNormalSolrDocCleanUp Flag for normal Solr doc clean up. + */ + @Asynchronous public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) { - asyncIndexServiceBean.asyncIndexDataset(dataset, doNormalSolrDocCleanUp); + Long id = dataset.getId(); + Dataset next = getNextToIndex(id, dataset); // if there is an ongoing index job for this dataset, next is null (ongoing index job will reindex the newest version after current indexing finishes) + while (next != null) { + try { + indexDataset(next, doNormalSolrDocCleanUp); + } catch (SolrServerException | IOException e) { + String failureLogText = "Indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); + failureLogText += "\r\n" + e.getLocalizedMessage(); + LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, dataset); + } + next = getNextToIndex(id, null); // if dataset was not changed during the indexing (and no new job was requested), next is null and loop can be stopped + } } public void asyncIndexDatasetList(List datasets, boolean doNormalSolrDocCleanUp) { @@ -378,7 +436,7 @@ public void indexDvObject(DvObject objectIn) throws SolrServerException, IOExce } } - public void indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { + private void indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { doIndexDataset(dataset, doNormalSolrDocCleanUp); updateLastIndexedTime(dataset.getId()); } From 49db759788478339cd8b3ed616409a2a47c9f9ce Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 4 May 2023 17:54:26 +0200 Subject: [PATCH 28/59] integration test fix for hasStaleIndex illegal argument --- src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 4c50dfe670e..5fba15e5872 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -1911,9 +1911,13 @@ private static void sleepForDatasetIndex(String query, String apiToken) { if (query.contains("id:dataset") || query.contains("id:datafile")) { String[] splitted = query.split("_"); if (splitted.length >= 2) { - boolean ok = UtilIT.sleepForReindex(String.valueOf(splitted[1]), apiToken, 5); - if (!ok) { - logger.info("Still indexing after 5 seconds"); + try { + boolean ok = UtilIT.sleepForReindex(String.valueOf(splitted[1]), apiToken, 5); + if (!ok) { + logger.info("Still indexing after 5 seconds"); + } + } catch (IllegalArgumentException e) { + // search was not for dataset or datafile, illegal argument exception for "hasStaleIndex" property } } } From 334d085e0d30ce212c693eca695eb084e8a6da04 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 4 May 2023 18:23:06 +0200 Subject: [PATCH 29/59] test was still failing, it looks like it was a different exception, not illegal argument exception? --- src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 5fba15e5872..ab8ff1b0bb3 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -1916,8 +1916,8 @@ private static void sleepForDatasetIndex(String query, String apiToken) { if (!ok) { logger.info("Still indexing after 5 seconds"); } - } catch (IllegalArgumentException e) { - // search was not for dataset or datafile, illegal argument exception for "hasStaleIndex" property + } catch (Exception e) { + // search was not for dataset or datafile, illegal argument cannot get property "hasStaleIndex" } } } From cc03c7552dfd95c27d9b96174e13212964d065fe Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 4 May 2023 19:10:55 +0200 Subject: [PATCH 30/59] integration test fix --- .../edu/harvard/iq/dataverse/api/UtilIT.java | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index ab8ff1b0bb3..fff2e080eaa 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -1911,13 +1911,9 @@ private static void sleepForDatasetIndex(String query, String apiToken) { if (query.contains("id:dataset") || query.contains("id:datafile")) { String[] splitted = query.split("_"); if (splitted.length >= 2) { - try { - boolean ok = UtilIT.sleepForReindex(String.valueOf(splitted[1]), apiToken, 5); - if (!ok) { - logger.info("Still indexing after 5 seconds"); - } - } catch (Exception e) { - // search was not for dataset or datafile, illegal argument cannot get property "hasStaleIndex" + boolean ok = UtilIT.sleepForReindex(String.valueOf(splitted[1]), apiToken, 5); + if (!ok) { + logger.info("Still indexing after 5 seconds"); } } } @@ -2505,10 +2501,15 @@ static boolean sleepForReindex(String idOrPersistentId, String apiToken, int dur do { timestampResponse = UtilIT.getDatasetTimestamps(idOrPersistentId, apiToken); System.out.println(timestampResponse.body().asString()); - String hasStaleIndex = timestampResponse.body().jsonPath().getString("data.hasStaleIndex"); - System.out.println(hasStaleIndex); - stale = Boolean.parseBoolean(hasStaleIndex); - + try { + String hasStaleIndex = timestampResponse.body().jsonPath().getString("data.hasStaleIndex"); + System.out.println(hasStaleIndex); + stale = Boolean.parseBoolean(hasStaleIndex); + } catch (Exception ex) { + Logger.getLogger(UtilIT.class.getName()).log(Level.SEVERE, null, ex); + // no stale index property found... + stale = false; + } try { Thread.sleep(sleepStep); i++; From e69ec745d2e69923a9171e1b8f704ae92efd21c8 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 4 May 2023 19:11:11 +0200 Subject: [PATCH 31/59] integration test fix --- src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java index 0bc1564b881..99e9409e3d8 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java @@ -747,7 +747,6 @@ public void testIdentifier() { System.out.println("identifier: " + identifier); String searchPart = identifier.replace("FK2/", ""); - UtilIT.sleepForReindex(identifier, apiToken, 5); Response searchUnpublished = UtilIT.search(searchPart, apiToken); searchUnpublished.prettyPrint(); searchUnpublished.then().assertThat() From e806abd8b788c617ba8ee362f18ddc97a2b43751 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Fri, 5 May 2023 11:23:54 +0200 Subject: [PATCH 32/59] illegal argument exception --- src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index fff2e080eaa..10944a1bdc1 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -2505,9 +2505,8 @@ static boolean sleepForReindex(String idOrPersistentId, String apiToken, int dur String hasStaleIndex = timestampResponse.body().jsonPath().getString("data.hasStaleIndex"); System.out.println(hasStaleIndex); stale = Boolean.parseBoolean(hasStaleIndex); - } catch (Exception ex) { - Logger.getLogger(UtilIT.class.getName()).log(Level.SEVERE, null, ex); - // no stale index property found... + } catch (IllegalArgumentException ex) { + Logger.getLogger(UtilIT.class.getName()).log(Level.INFO, "no stale index property found", ex); stale = false; } try { From 5ccbe272074354af2df1b16ad9fd19791f1a3081 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Fri, 5 May 2023 11:45:16 +0200 Subject: [PATCH 33/59] why??? fix for 4 years old code suddenly giving null pointer exceptions in integration tests... --- .../edu/harvard/iq/dataverse/ingest/IngestMessageBean.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java index 24664eb9e91..6e83a6584df 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java @@ -155,12 +155,11 @@ public void onMessage(Message message) { sbIngestedFiles.append(""); - Long objectId = null; userNotificationService.sendNotification( authenticatedUser, Timestamp.from(Instant.now()), !ingestWithErrors ? UserNotification.Type.INGESTCOMPLETED : UserNotification.Type.INGESTCOMPLETEDWITHERRORS, - objectId, + ingestMessage.getDatasetId(), sbIngestedFiles.toString(), true ); From 6ca073e232b25b7deb902dd03625a916b9c6596a Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Fri, 5 May 2023 13:06:06 +0200 Subject: [PATCH 34/59] added a realease note --- doc/release-notes/9558-async-indexing.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 doc/release-notes/9558-async-indexing.md diff --git a/doc/release-notes/9558-async-indexing.md b/doc/release-notes/9558-async-indexing.md new file mode 100644 index 00000000000..a44eac1ff75 --- /dev/null +++ b/doc/release-notes/9558-async-indexing.md @@ -0,0 +1,3 @@ +Performance improvements, especially for large datasets containing thousands of files. +Uploading files one by one to the dataset is much faster now, allowing uploading thousands of files in an acceptable timeframe. Not only uploading a file, but all edit operations on datasets containing many files, got faster. +Performance tweaks include indexing of the datasets in the background and optimizations in the amount of the indexing operations needed. Furthermore, updates to the dateset no longer wait for ingesting to finish. Ingesting was already running in the background, but it took a lock, preventing updating the dataset and degrading performance for datasets containing many files. \ No newline at end of file From 64743bf7ae09d0911ca09d1c7eff99a7cf147331 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 11 May 2023 11:17:34 +0200 Subject: [PATCH 35/59] dataset page performance improvements --- .../edu/harvard/iq/dataverse/DatasetPage.java | 94 +++++++++---------- .../edu/harvard/iq/dataverse/DvObject.java | 4 + .../iq/dataverse/DvObjectServiceBean.java | 31 ++++++ 3 files changed, 81 insertions(+), 48 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 9294620d790..aae7d11b90f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -40,6 +40,7 @@ import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.license.LicenseServiceBean; import edu.harvard.iq.dataverse.metadataimport.ForeignMetadataImportServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrlUtil; @@ -81,6 +82,8 @@ import java.util.Set; import java.util.Collection; import java.util.logging.Logger; +import java.util.stream.Collectors; + import javax.ejb.EJB; import javax.ejb.EJBException; import javax.faces.application.FacesMessage; @@ -233,6 +236,8 @@ public enum DisplayMode { ExternalToolServiceBean externalToolService; @EJB SolrClientService solrClientService; + @EJB + DvObjectServiceBean dvObjectService; @Inject DataverseRequestServiceBean dvRequestService; @Inject @@ -678,48 +683,43 @@ public void showAll(){ } private List selectFileMetadatasForDisplay() { - Set searchResultsIdSet = null; - - if (isIndexedVersion()) { + final Set searchResultsIdSet; + if (StringUtil.isEmpty(fileLabelSearchTerm) && StringUtil.isEmpty(fileTypeFacet) && StringUtil.isEmpty(fileAccessFacet) && StringUtil.isEmpty(fileTagsFacet)) { + // But, if no search terms were specified, we return the full + // list of the files in the version: + // Since the search results should include the full set of fmds if all the + // terms/facets are empty, setting them to null should just be + // an optimization for the loop below + searchResultsIdSet = null; + } else if (isIndexedVersion()) { // We run the search even if no search term and/or facets are // specified - to generate the facet labels list: searchResultsIdSet = getFileIdsInVersionFromSolr(workingVersion.getId(), this.fileLabelSearchTerm); - // But, if no search terms were specified, we return the full - // list of the files in the version: - if (StringUtil.isEmpty(fileLabelSearchTerm) - && StringUtil.isEmpty(fileTypeFacet) - && StringUtil.isEmpty(fileAccessFacet) - && StringUtil.isEmpty(fileTagsFacet)) { - // Since the search results should include the full set of fmds if all the - // terms/facets are empty, setting them to null should just be - // an optimization for the loop below - searchResultsIdSet = null; - } - } else { + } else if (!StringUtil.isEmpty(this.fileLabelSearchTerm)) { // No, this is not an indexed version. // If the search term was specified, we'll run a search in the db; // if not - return the full list of files in the version. // (no facets without solr!) - if (!StringUtil.isEmpty(this.fileLabelSearchTerm)) { - searchResultsIdSet = getFileIdsInVersionFromDb(workingVersion.getId(), this.fileLabelSearchTerm); - } + searchResultsIdSet = getFileIdsInVersionFromDb(workingVersion.getId(), this.fileLabelSearchTerm); + } else { + searchResultsIdSet = null; } - List retList = new ArrayList<>(); - - for (FileMetadata fileMetadata : workingVersion.getFileMetadatas()) { - if (searchResultsIdSet == null || searchResultsIdSet.contains(fileMetadata.getDataFile().getId())) { - retList.add(fileMetadata); - } + final List md = workingVersion.getFileMetadatas(); + final List retList; + if (searchResultsIdSet == null) { + retList = new ArrayList<>(md); + } else { + retList = md.stream().filter(x -> searchResultsIdSet.contains(x.getDataFile().getId())).collect(Collectors.toList()); } sortFileMetadatas(retList); return retList; } - private void sortFileMetadatas(List fileList) { + private void sortFileMetadatas(final List fileList) { - DataFileComparator dfc = new DataFileComparator(); - Comparator comp = dfc.compareBy(folderPresort, tagPresort, fileSortField, !"desc".equals(fileSortOrder)); + final DataFileComparator dfc = new DataFileComparator(); + final Comparator comp = dfc.compareBy(folderPresort, tagPresort, fileSortField, !"desc".equals(fileSortOrder)); Collections.sort(fileList, comp); } @@ -1843,6 +1843,17 @@ public boolean webloaderUploadSupported() { return settingsWrapper.isWebloaderUpload() && StorageIO.isDirectUploadEnabled(dataset.getEffectiveStorageDriverId()); } + private void setIdByPersistentId() { + GlobalId gid = PidUtil.parseAsGlobalID(persistentId); + Long id = dvObjectService.findIdByGlobalId(gid, DvObject.DType.Dataset); + if (id == null) { + id = dvObjectService.findIdByAltGlobalId(gid, DvObject.DType.Dataset); + } + if (id != null) { + this.setId(id); + } + } + private String init(boolean initFull) { //System.out.println("_YE_OLDE_QUERY_COUNTER_"); // for debug purposes @@ -1866,21 +1877,9 @@ private String init(boolean initFull) { // Set the workingVersion and Dataset // --------------------------------------- if (persistentId != null) { - logger.fine("initializing DatasetPage with persistent ID " + persistentId); - // Set Working Version and Dataset by PersistentID - dataset = datasetService.findByGlobalId(persistentId); - if (dataset == null) { - logger.warning("No such dataset: "+persistentId); - return permissionsWrapper.notFound(); - } - logger.fine("retrieved dataset, id="+dataset.getId()); - - retrieveDatasetVersionResponse = datasetVersionService.selectRequestedVersion(dataset.getVersions(), version); - //retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionByPersistentId(persistentId, version); - this.workingVersion = retrieveDatasetVersionResponse.getDatasetVersion(); - logger.fine("retrieved version: id: " + workingVersion.getId() + ", state: " + this.workingVersion.getVersionState()); - - } else if (this.getId() != null) { + setIdByPersistentId(); + } + if (this.getId() != null) { // Set Working Version and Dataset by Datasaet Id and Version dataset = datasetService.find(this.getId()); if (dataset == null) { @@ -2835,15 +2834,14 @@ public String refresh() { DatasetVersionServiceBean.RetrieveDatasetVersionResponse retrieveDatasetVersionResponse = null; if (persistentId != null) { - //retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionByPersistentId(persistentId, version); - dataset = datasetService.findByGlobalId(persistentId); - retrieveDatasetVersionResponse = datasetVersionService.selectRequestedVersion(dataset.getVersions(), version); - } else if (versionId != null) { - retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionByVersionId(versionId); - } else if (dataset.getId() != null) { + setIdByPersistentId(); + } + if (dataset.getId() != null) { //retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionById(dataset.getId(), version); dataset = datasetService.find(dataset.getId()); retrieveDatasetVersionResponse = datasetVersionService.selectRequestedVersion(dataset.getVersions(), version); + } else if (versionId != null) { + retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionByVersionId(versionId); } if (retrieveDatasetVersionResponse == null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java index 854888737ee..e3013b8cf51 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java @@ -30,9 +30,13 @@ query="SELECT COUNT(obj) FROM DvObject obj WHERE obj.owner.id=:id"), @NamedQuery(name = "DvObject.findByGlobalId", query = "SELECT o FROM DvObject o WHERE o.identifier=:identifier and o.authority=:authority and o.protocol=:protocol and o.dtype=:dtype"), + @NamedQuery(name = "DvObject.findIdByGlobalId", + query = "SELECT o.id FROM DvObject o WHERE o.identifier=:identifier and o.authority=:authority and o.protocol=:protocol and o.dtype=:dtype"), @NamedQuery(name = "DvObject.findByAlternativeGlobalId", query = "SELECT o FROM DvObject o, AlternativePersistentIdentifier a WHERE o.id = a.dvObject.id and a.identifier=:identifier and a.authority=:authority and a.protocol=:protocol and o.dtype=:dtype"), + @NamedQuery(name = "DvObject.findIdByAlternativeGlobalId", + query = "SELECT o.id FROM DvObject o, AlternativePersistentIdentifier a WHERE o.id = a.dvObject.id and a.identifier=:identifier and a.authority=:authority and a.protocol=:protocol and o.dtype=:dtype"), @NamedQuery(name = "DvObject.findByProtocolIdentifierAuthority", query = "SELECT o FROM DvObject o WHERE o.identifier=:identifier and o.authority=:authority and o.protocol=:protocol"), diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java index e22e2f188fd..3430528aea3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java @@ -116,6 +116,16 @@ public DvObject findByAltGlobalId(GlobalId globalId, DvObject.DType dtype) { return runFindByGlobalId(query, globalId, dtype); } + public Long findIdByGlobalId(GlobalId globalId, DvObject.DType dtype) { + Query query = em.createNamedQuery("DvObject.findIdByGlobalId"); + return runFindIdByGlobalId(query, globalId, dtype); + } + + public Long findIdByAltGlobalId(GlobalId globalId, DvObject.DType dtype) { + Query query = em.createNamedQuery("DvObject.findIdByAlternativeGlobalId"); + return runFindIdByGlobalId(query, globalId, dtype); + } + private DvObject runFindByGlobalId(Query query, GlobalId gid, DvObject.DType dtype) { DvObject foundDvObject = null; try { @@ -136,6 +146,27 @@ private DvObject runFindByGlobalId(Query query, GlobalId gid, DvObject.DType dty } return foundDvObject; } + + private Long runFindIdByGlobalId(Query query, GlobalId gid, DvObject.DType dtype) { + Long foundDvObject = null; + try { + query.setParameter("identifier", gid.getIdentifier()); + query.setParameter("protocol", gid.getProtocol()); + query.setParameter("authority", gid.getAuthority()); + query.setParameter("dtype", dtype.getDType()); + foundDvObject = (Long) query.getSingleResult(); + } catch (javax.persistence.NoResultException e) { + // (set to .info, this can fill the log file with thousands of + // these messages during a large harvest run) + logger.fine("no dvObject found: " + gid.asString()); + // DO nothing, just return null. + return null; + } catch (Exception ex) { + logger.info("Exception caught in findByGlobalId: " + ex.getLocalizedMessage()); + return null; + } + return foundDvObject; + } public DvObject findByGlobalId(GlobalId globalId) { return (DvObject) em.createNamedQuery("DvObject.findByProtocolIdentifierAuthority") From 857e66749e9db76f5f185b7a636821f14b3c6a01 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 11 May 2023 18:45:17 +0200 Subject: [PATCH 36/59] faster and not locking find query for dataset with many files --- .../edu/harvard/iq/dataverse/Dataset.java | 2 ++ .../edu/harvard/iq/dataverse/DatasetPage.java | 12 +++++----- .../iq/dataverse/DatasetServiceBean.java | 23 +++++++++++++++++++ 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index 683b6687c8b..da88a388806 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -43,6 +43,8 @@ * @author skraffmiller */ @NamedQueries({ + @NamedQuery(name = "Dataset.findById", + query = "SELECT o FROM Dataset o LEFT JOIN FETCH o.files WHERE o.id=:id"), @NamedQuery(name = "Dataset.findIdStale", query = "SELECT d.id FROM Dataset d WHERE d.indexTime is NULL OR d.indexTime < d.modificationTime"), @NamedQuery(name = "Dataset.findIdStalePermission", diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index aae7d11b90f..160c605bb7f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -1881,7 +1881,7 @@ private String init(boolean initFull) { } if (this.getId() != null) { // Set Working Version and Dataset by Datasaet Id and Version - dataset = datasetService.find(this.getId()); + dataset = datasetService.findDeep(this.getId()); if (dataset == null) { logger.warning("No such dataset: "+dataset); return permissionsWrapper.notFound(); @@ -1980,8 +1980,8 @@ private String init(boolean initFull) { } else { // an attempt to retreive both the filemetadatas and datafiles early on, so that // we don't have to do so later (possibly, many more times than necessary): - AuthenticatedUser au = session.getUser() instanceof AuthenticatedUser ? (AuthenticatedUser) session.getUser() : null; - datafileService.findFileMetadataOptimizedExperimental(dataset, workingVersion, au); + //AuthenticatedUser au = session.getUser() instanceof AuthenticatedUser ? (AuthenticatedUser) session.getUser() : null; + //datafileService.findFileMetadataOptimizedExperimental(dataset, workingVersion, au); } // This will default to all the files in the version, if the search term // parameter hasn't been specified yet: @@ -2838,7 +2838,7 @@ public String refresh() { } if (dataset.getId() != null) { //retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionById(dataset.getId(), version); - dataset = datasetService.find(dataset.getId()); + dataset = datasetService.findDeep(dataset.getId()); retrieveDatasetVersionResponse = datasetVersionService.selectRequestedVersion(dataset.getVersions(), version); } else if (versionId != null) { retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionByVersionId(versionId); @@ -2867,8 +2867,8 @@ public String refresh() { } if (readOnly) { - AuthenticatedUser au = session.getUser() instanceof AuthenticatedUser ? (AuthenticatedUser) session.getUser() : null; - datafileService.findFileMetadataOptimizedExperimental(dataset, workingVersion, au); + //AuthenticatedUser au = session.getUser() instanceof AuthenticatedUser ? (AuthenticatedUser) session.getUser() : null; + //datafileService.findFileMetadataOptimizedExperimental(dataset, workingVersion, au); } fileMetadatasSearch = selectFileMetadatasForDisplay(); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index bf36fb469bd..418ee8d0051 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -38,6 +38,7 @@ import javax.ejb.TransactionAttributeType; import javax.inject.Named; import javax.persistence.EntityManager; +import javax.persistence.LockModeType; import javax.persistence.NoResultException; import javax.persistence.PersistenceContext; import javax.persistence.Query; @@ -105,6 +106,28 @@ public Dataset find(Object pk) { return em.find(Dataset.class, pk); } + public Dataset findDeep(Object pk) { + return (Dataset) em.createNamedQuery("Dataset.findById") + .setParameter("id", pk) + .setHint("eclipselink.left-join-fetch", "o.files.ingestRequest") + .setHint("eclipselink.left-join-fetch", "o.files.thumbnailForDataset") + .setHint("eclipselink.left-join-fetch", "o.files.dataTables") + .setHint("eclipselink.left-join-fetch", "o.files.auxiliaryFiles") + .setHint("eclipselink.left-join-fetch", "o.files.ingestReports") + .setHint("eclipselink.left-join-fetch", "o.files.dataFileTags") + .setHint("eclipselink.left-join-fetch", "o.files.fileMetadatas") + .setHint("eclipselink.left-join-fetch", "o.files.guestbookResponses") + .setHint("eclipselink.left-join-fetch", "o.files.embargo") + .setHint("eclipselink.left-join-fetch", "o.files.fileAccessRequests") + .setHint("eclipselink.left-join-fetch", "o.files.owner") + .setHint("eclipselink.left-join-fetch", "o.files.releaseUser") + .setHint("eclipselink.left-join-fetch", "o.files.creator") + .setHint("eclipselink.left-join-fetch", "o.files.alternativePersistentIndentifiers") + .setHint("eclipselink.left-join-fetch", "o.files.roleAssignments") + .setLockMode(LockModeType.NONE) + .getSingleResult(); + } + public List findByOwnerId(Long ownerId) { return findByOwnerId(ownerId, false); } From e3751bd32bded7aa1ea6b387979259d1fc25f342 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 11 May 2023 19:08:00 +0200 Subject: [PATCH 37/59] attempt at fixing the integration test --- .../java/edu/harvard/iq/dataverse/api/SearchIT.java | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java index 99e9409e3d8..f3def90a005 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java @@ -747,6 +747,7 @@ public void testIdentifier() { System.out.println("identifier: " + identifier); String searchPart = identifier.replace("FK2/", ""); + UtilIT.sleepForReindex(String.valueOf(datasetId), apiToken, 5); Response searchUnpublished = UtilIT.search(searchPart, apiToken); searchUnpublished.prettyPrint(); searchUnpublished.then().assertThat() @@ -762,7 +763,7 @@ public void testIdentifier() { .statusCode(OK.getStatusCode()); searchPart = identifier.replace("FK2/", ""); - UtilIT.sleepForReindex(identifier, apiToken, 5); + UtilIT.sleepForReindex(String.valueOf(datasetId), apiToken, 5); Response searchTargeted = UtilIT.search("dsPersistentId:" + searchPart, apiToken); searchTargeted.prettyPrint(); searchTargeted.then().assertThat() @@ -1007,25 +1008,29 @@ public void testSubtreePermissions() { // TODO: investigate if this is a bug that nothing was found. .body("data.total_count", CoreMatchers.equalTo(0)); + UtilIT.sleepForReindex(String.valueOf(datasetId), apiToken, 5); Response searchUnpublishedRootSubtreeForDataset = UtilIT.search(identifier.replace("FK2/", ""), apiToken, "&subtree=root"); searchUnpublishedRootSubtreeForDataset.prettyPrint(); searchUnpublishedRootSubtreeForDataset.then().assertThat() .statusCode(OK.getStatusCode()) .body("data.total_count", CoreMatchers.equalTo(1)); + UtilIT.sleepForReindex(String.valueOf(datasetId), apiToken, 5); Response searchUnpublishedRootSubtreeForDatasetNoAPI = UtilIT.search(identifier.replace("FK2/", ""), null, "&subtree=root"); searchUnpublishedRootSubtreeForDatasetNoAPI.prettyPrint(); searchUnpublishedRootSubtreeForDatasetNoAPI.then().assertThat() .statusCode(OK.getStatusCode()) // TODO: investigate if this is a bug that nothing was found. .body("data.total_count", CoreMatchers.equalTo(0)); - + + UtilIT.sleepForReindex(String.valueOf(datasetId), apiToken, 5); Response searchUnpublishedNoSubtreeForDataset = UtilIT.search(identifier.replace("FK2/", ""), apiToken, ""); searchUnpublishedNoSubtreeForDataset.prettyPrint(); searchUnpublishedNoSubtreeForDataset.then().assertThat() .statusCode(OK.getStatusCode()) .body("data.total_count", CoreMatchers.equalTo(1)); + UtilIT.sleepForReindex(String.valueOf(datasetId), apiToken, 5); Response searchUnpublishedNoSubtreeForDatasetNoAPI = UtilIT.search(identifier.replace("FK2/", ""), null, ""); searchUnpublishedNoSubtreeForDatasetNoAPI.prettyPrint(); searchUnpublishedNoSubtreeForDatasetNoAPI.then().assertThat() @@ -1075,12 +1080,14 @@ public void testSubtreePermissions() { .statusCode(OK.getStatusCode()) .body("data.total_count", CoreMatchers.equalTo(2)); + UtilIT.sleepForReindex(String.valueOf(datasetId), apiToken, 5); Response searchPublishedRootSubtreeForDataset = UtilIT.search(identifier.replace("FK2/", ""), apiToken, "&subtree=root"); searchPublishedRootSubtreeForDataset.prettyPrint(); searchPublishedRootSubtreeForDataset.then().assertThat() .statusCode(OK.getStatusCode()) .body("data.total_count", CoreMatchers.equalTo(1)); - + + UtilIT.sleepForReindex(String.valueOf(datasetId), apiToken, 5); Response searchPublishedRootSubtreeForDatasetNoAPI = UtilIT.search(identifier.replace("FK2/", ""), null, "&subtree=root"); searchPublishedRootSubtreeForDatasetNoAPI.prettyPrint(); searchPublishedRootSubtreeForDatasetNoAPI.then().assertThat() From aeac121cd6740002c06488aada95d536bd74c790 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Fri, 12 May 2023 11:19:43 +0200 Subject: [PATCH 38/59] more readable checkUpdateDatasetVersionLock implementation --- .../iq/dataverse/PermissionServiceBean.java | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index 5d72bd225d2..8c0a0bf90b0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -751,17 +751,10 @@ else if (dataset.isLockedFor(DatasetLock.Reason.InReview)) { } } - public void checkUpdateDatasetVersionLock(Dataset dataset, DataverseRequest dataverseRequest, Command command) throws IllegalCommandException { - boolean locked = false; - if (dataset.isLocked()) { - for (final DatasetLock lock: dataset.getLocks()) { - if (lock.getReason() != DatasetLock.Reason.Ingest) { - locked = true; - break; - } - } - } - if (locked) { + public void checkUpdateDatasetVersionLock(Dataset dataset, DataverseRequest dataverseRequest, Command command) throws IllegalCommandException { + boolean hasAtLeastOneLockThatIsNotAnIngestLock = dataset.isLocked() && dataset.getLocks().stream() + .anyMatch(lock -> !DatasetLock.Reason.Ingest.equals(lock.getReason())); + if (hasAtLeastOneLockThatIsNotAnIngestLock) { checkEditDatasetLock(dataset, dataverseRequest, command); } } From 4db0d948dd3977f5c78a79243fd0bd8e23095d72 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Wed, 17 May 2023 13:10:49 +0200 Subject: [PATCH 39/59] fix for locking of the dataset for reindexing when unexpected exception is not caught --- .../java/edu/harvard/iq/dataverse/search/IndexServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 5fb7dca79f7..4c8a0a24aef 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -413,7 +413,7 @@ public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) { while (next != null) { try { indexDataset(next, doNormalSolrDocCleanUp); - } catch (SolrServerException | IOException e) { + } catch (Exception e) { // catch all possible exceptions; otherwise when something unexpected happes the dataset wold remain locked and impossible to reindex String failureLogText = "Indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); failureLogText += "\r\n" + e.getLocalizedMessage(); LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, dataset); From 7319ae6d16bfb3fbb886285f58a9f28ba324e4c5 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Wed, 24 May 2023 12:59:30 +0200 Subject: [PATCH 40/59] mutch faster reindexing of datasets by reducing the number of needed queries --- .../iq/dataverse/DatasetServiceBean.java | 1 + .../iq/dataverse/search/IndexServiceBean.java | 39 ++++++++++++------- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 30dd165e22f..f9dcaad1373 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -116,6 +116,7 @@ public Dataset findDeep(Object pk) { .setHint("eclipselink.left-join-fetch", "o.files.ingestReports") .setHint("eclipselink.left-join-fetch", "o.files.dataFileTags") .setHint("eclipselink.left-join-fetch", "o.files.fileMetadatas") + .setHint("eclipselink.left-join-fetch", "o.files.fileMetadatas.fileCategories") .setHint("eclipselink.left-join-fetch", "o.files.guestbookResponses") .setHint("eclipselink.left-join-fetch", "o.files.embargo") .setHint("eclipselink.left-join-fetch", "o.files.fileAccessRequests") diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 23e1f4a1a99..22471cf0105 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -243,7 +243,7 @@ public Future indexDataverse(Dataverse dataverse, boolean processPaths) solrInputDocument.addField(SearchFields.SOURCE, HARVESTED); } else { (this means that all dataverses are "local" - should this be removed? */ solrInputDocument.addField(SearchFields.IS_HARVESTED, false); - solrInputDocument.addField(SearchFields.METADATA_SOURCE, findRootDataverseCached().getName()); //rootDataverseName); + solrInputDocument.addField(SearchFields.METADATA_SOURCE, rootDataverse.getName()); //rootDataverseName); /*}*/ addDataverseReleaseDateToSolrDoc(solrInputDocument, dataverse); @@ -437,7 +437,8 @@ public void indexDvObject(DvObject objectIn) throws SolrServerException, IOExce } private void indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { - doIndexDataset(dataset, doNormalSolrDocCleanUp); + Dataset deep = datasetService.findDeep(dataset.getId()); + doIndexDataset(deep, doNormalSolrDocCleanUp); updateLastIndexedTime(dataset.getId()); } @@ -817,10 +818,15 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set paths = object.isInstanceofDataset() ? retrieveDVOPaths(datasetService.find(object.getId())) + Dataset dataset = null; + if (object.isInstanceofDataset()) { + dataset = datasetService.findDeep(object.getId()); + } + List paths = object.isInstanceofDataset() ? retrieveDVOPaths(dataset) : retrieveDVOPaths(dataverseService.find(object.getId())); sid.removeField(SearchFields.SUBTREE); @@ -1694,7 +1707,7 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc UpdateResponse addResponse = solrClientService.getSolrClient().add(sid); UpdateResponse commitResponse = solrClientService.getSolrClient().commit(); if (object.isInstanceofDataset()) { - for (DataFile df : datasetService.find(object.getId()).getFiles()) { + for (DataFile df : dataset.getFiles()) { solrQuery.setQuery(SearchUtil.constructQuery(SearchFields.ENTITY_ID, df.getId().toString())); res = solrClientService.getSolrClient().query(solrQuery); if (!res.getResults().isEmpty()) { From e4d29b4f873d7fe8ef16188196ebdb40bda099a1 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Fri, 26 May 2023 14:51:09 +0200 Subject: [PATCH 41/59] added comment --- src/main/java/edu/harvard/iq/dataverse/Dataset.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index d5a30ed84e3..f9c839a0fff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -43,6 +43,8 @@ * @author skraffmiller */ @NamedQueries({ + // Dataset.findById should only be used if you're going to iterate over files (otherwise, lazy loading in DatasetService.find() is better). + // If you are going to iterate over files, preferably call the DatasetService.findDeep() method i.s.o. using this query directly. @NamedQuery(name = "Dataset.findById", query = "SELECT o FROM Dataset o LEFT JOIN FETCH o.files WHERE o.id=:id"), @NamedQuery(name = "Dataset.findIdStale", From 628704c4ae70e7dbec26672852c5a34bbb8c11dd Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Fri, 26 May 2023 15:35:35 +0200 Subject: [PATCH 42/59] added comment --- .../edu/harvard/iq/dataverse/DatasetServiceBean.java | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index f9dcaad1373..1afbffac6cb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -106,9 +106,19 @@ public Dataset find(Object pk) { return em.find(Dataset.class, pk); } + /** + * Retrieve a dataset with the deep underlying structure in one query execution. + * This is a more optimal choice when accessing files of a dataset. + * In a contrast, the find() method does not pre-fetch the file objects and results in point queries when accessing these objects. + * Since the files have a deep structure, many queries can be prevented by using the findDeep() method, especially for large datasets + * containing many files, and when iterating over all the files. + * When you are not going to access the file objects, the default find() method is better because of the lazy loading. + * @return a dataset with pre-fetched file objects + */ public Dataset findDeep(Object pk) { return (Dataset) em.createNamedQuery("Dataset.findById") .setParameter("id", pk) + // Optimization hints: retrieve all data in one query; this prevents point queries when iterating over the files .setHint("eclipselink.left-join-fetch", "o.files.ingestRequest") .setHint("eclipselink.left-join-fetch", "o.files.thumbnailForDataset") .setHint("eclipselink.left-join-fetch", "o.files.dataTables") @@ -125,7 +135,7 @@ public Dataset findDeep(Object pk) { .setHint("eclipselink.left-join-fetch", "o.files.creator") .setHint("eclipselink.left-join-fetch", "o.files.alternativePersistentIndentifiers") .setHint("eclipselink.left-join-fetch", "o.files.roleAssignments") - .setLockMode(LockModeType.NONE) + .setLockMode(LockModeType.NONE) // Explicit default ostrich locking (default for em.find() and named query executions) .getSingleResult(); } From 7771e9b27ef2369867ee3e1faf5068f80d0c06cb Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Fri, 26 May 2023 15:36:06 +0200 Subject: [PATCH 43/59] remove explicit lock mode setting --- src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 1afbffac6cb..1794f112d8d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -135,7 +135,6 @@ public Dataset findDeep(Object pk) { .setHint("eclipselink.left-join-fetch", "o.files.creator") .setHint("eclipselink.left-join-fetch", "o.files.alternativePersistentIndentifiers") .setHint("eclipselink.left-join-fetch", "o.files.roleAssignments") - .setLockMode(LockModeType.NONE) // Explicit default ostrich locking (default for em.find() and named query executions) .getSingleResult(); } From 53fcfe513086ed82a305f16ca6bd60bb0dd0bb2b Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Fri, 26 May 2023 15:57:12 +0200 Subject: [PATCH 44/59] experimental query method clean up --- .../iq/dataverse/DataFileServiceBean.java | 366 ------------------ .../edu/harvard/iq/dataverse/DatasetPage.java | 10 - 2 files changed, 376 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 196f84b6877..ab4f61902c6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1,7 +1,5 @@ package edu.harvard.iq.dataverse; -import edu.harvard.iq.dataverse.authorization.AccessRequest; -import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataaccess.StorageIO; @@ -11,19 +9,15 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileSortFieldAndOrder; import edu.harvard.iq.dataverse.util.FileUtil; -import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.IOException; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.HashMap; -import java.util.HashSet; import java.util.Iterator; -import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.UUID; import java.util.logging.Level; import java.util.logging.Logger; @@ -36,9 +30,7 @@ import javax.persistence.NoResultException; import javax.persistence.PersistenceContext; import javax.persistence.Query; -import javax.persistence.StoredProcedureQuery; import javax.persistence.TypedQuery; -import org.apache.commons.lang3.RandomStringUtils; /** * @@ -561,364 +553,6 @@ public DataFile findCheapAndEasy(Long id) { return dataFile; } - /* - * This is an experimental method for populating the versions of - * the datafile with the filemetadatas, optimized for making as few db - * queries as possible. - * It should only be used to retrieve filemetadata for the DatasetPage! - * It is not guaranteed to adequately perform anywhere else. - */ - - public void findFileMetadataOptimizedExperimental(Dataset owner, DatasetVersion version, AuthenticatedUser au) { - List dataFiles = new ArrayList<>(); - List dataTables = new ArrayList<>(); - //List retList = new ArrayList<>(); - - // TODO: - // replace these maps with simple lists and run binary search on them. -- 4.2.1 - - Map userMap = new HashMap<>(); - Map filesMap = new HashMap<>(); - Map datatableMap = new HashMap<>(); - Map categoryMap = new HashMap<>(); - Map> fileTagMap = new HashMap<>(); - List accessRequestFileIds = new ArrayList(); - - List fileTagLabels = DataFileTag.listTags(); - - - int i = 0; - //Cache responses - Map embargoMap = new HashMap(); - - List dataTableResults = em.createNativeQuery("SELECT t0.ID, t0.DATAFILE_ID, t0.UNF, t0.CASEQUANTITY, t0.VARQUANTITY, t0.ORIGINALFILEFORMAT, t0.ORIGINALFILESIZE, t0.ORIGINALFILENAME FROM dataTable t0, dataFile t1, dvObject t2 WHERE ((t0.DATAFILE_ID = t1.ID) AND (t1.ID = t2.ID) AND (t2.OWNER_ID = " + owner.getId() + ")) ORDER BY t0.ID").getResultList(); - - for (Object[] result : dataTableResults) { - DataTable dataTable = new DataTable(); - long fileId = ((Number) result[1]).longValue(); - - dataTable.setId(((Number) result[1]).longValue()); - - dataTable.setUnf((String)result[2]); - - dataTable.setCaseQuantity((Long)result[3]); - - dataTable.setVarQuantity((Long)result[4]); - - dataTable.setOriginalFileFormat((String)result[5]); - - dataTable.setOriginalFileSize((Long)result[6]); - - dataTable.setOriginalFileName((String)result[7]); - - dataTables.add(dataTable); - datatableMap.put(fileId, i++); - - } - - logger.fine("Retrieved "+dataTables.size()+" DataTable objects."); - - List dataTagsResults = em.createNativeQuery("SELECT t0.DATAFILE_ID, t0.TYPE FROM DataFileTag t0, dvObject t1 WHERE (t1.ID = t0.DATAFILE_ID) AND (t1.OWNER_ID="+ owner.getId() + ")").getResultList(); - for (Object[] result : dataTagsResults) { - Long datafile_id = (Long) result[0]; - Integer tagtype_id = (Integer) result[1]; - if (fileTagMap.get(datafile_id) == null) { - fileTagMap.put(datafile_id, new HashSet<>()); - } - fileTagMap.get(datafile_id).add(tagtype_id); - } - logger.fine("Retrieved "+dataTagsResults.size()+" data tags."); - dataTagsResults = null; - - //Only need to check for access requests if there is an authenticated user - if (au != null) { - List accessRequests = em.createNativeQuery("SELECT t0.ID FROM DVOBJECT t0, FILEACCESSREQUESTS t1 WHERE t1.datafile_id = t0.id and t0.OWNER_ID = " + owner.getId() + " and t1.AUTHENTICATED_USER_ID = " + au.getId() + " ORDER BY t0.ID").getResultList(); - for (Object result : accessRequests) { - accessRequestFileIds.add(Long.valueOf((Integer)result)); - } - logger.fine("Retrieved " + accessRequests.size() + " access requests."); - accessRequests = null; - } - - i = 0; - - List fileResults = em.createNativeQuery("SELECT t0.ID, t0.CREATEDATE, t0.INDEXTIME, t0.MODIFICATIONTIME, t0.PERMISSIONINDEXTIME, t0.PERMISSIONMODIFICATIONTIME, t0.PUBLICATIONDATE, t0.CREATOR_ID, t0.RELEASEUSER_ID, t1.CONTENTTYPE, t0.STORAGEIDENTIFIER, t1.FILESIZE, t1.INGESTSTATUS, t1.CHECKSUMVALUE, t1.RESTRICTED, t1.CHECKSUMTYPE, t1.PREVIOUSDATAFILEID, t1.ROOTDATAFILEID, t0.PROTOCOL, t0.AUTHORITY, t0.IDENTIFIER, t1.EMBARGO_ID FROM DVOBJECT t0, DATAFILE t1 WHERE ((t0.OWNER_ID = " + owner.getId() + ") AND ((t1.ID = t0.ID) AND (t0.DTYPE = 'DataFile'))) ORDER BY t0.ID").getResultList(); - - for (Object[] result : fileResults) { - Integer file_id = (Integer) result[0]; - - DataFile dataFile = new DataFile(); - dataFile.setMergeable(false); - - dataFile.setId(file_id.longValue()); - - Timestamp createDate = (Timestamp) result[1]; - Timestamp indexTime = (Timestamp) result[2]; - Timestamp modificationTime = (Timestamp) result[3]; - Timestamp permissionIndexTime = (Timestamp) result[4]; - Timestamp permissionModificationTime = (Timestamp) result[5]; - Timestamp publicationDate = (Timestamp) result[6]; - - dataFile.setCreateDate(createDate); - dataFile.setIndexTime(indexTime); - dataFile.setModificationTime(modificationTime); - dataFile.setPermissionIndexTime(permissionIndexTime); - dataFile.setPermissionModificationTime(permissionModificationTime); - dataFile.setPublicationDate(publicationDate); - - Long creatorId = (Long) result[7]; - if (creatorId != null) { - AuthenticatedUser creator = userMap.get(creatorId); - if (creator == null) { - creator = userService.find(creatorId); - if (creator != null) { - userMap.put(creatorId, creator); - } - } - if (creator != null) { - dataFile.setCreator(creator); - } - } - - dataFile.setOwner(owner); - - Long releaseUserId = (Long) result[8]; - if (releaseUserId != null) { - AuthenticatedUser releaseUser = userMap.get(releaseUserId); - if (releaseUser == null) { - releaseUser = userService.find(releaseUserId); - if (releaseUser != null) { - userMap.put(releaseUserId, releaseUser); - } - } - if (releaseUser != null) { - dataFile.setReleaseUser(releaseUser); - } - } - - String contentType = (String) result[9]; - - if (contentType != null) { - dataFile.setContentType(contentType); - } - - String storageIdentifier = (String) result[10]; - - if (storageIdentifier != null) { - dataFile.setStorageIdentifier(storageIdentifier); - } - - Long fileSize = (Long) result[11]; - - if (fileSize != null) { - dataFile.setFilesize(fileSize); - } - - if (result[12] != null) { - String ingestStatusString = (String) result[12]; - dataFile.setIngestStatus(ingestStatusString.charAt(0)); - } - - String md5 = (String) result[13]; - - if (md5 != null) { - dataFile.setChecksumValue(md5); - } - - Boolean restricted = (Boolean) result[14]; - if (restricted != null) { - dataFile.setRestricted(restricted); - } - - String checksumType = (String) result[15]; - if (checksumType != null) { - try { - // In the database we store "SHA1" rather than "SHA-1". - DataFile.ChecksumType typeFromStringInDatabase = DataFile.ChecksumType.valueOf(checksumType); - dataFile.setChecksumType(typeFromStringInDatabase); - } catch (IllegalArgumentException ex) { - logger.info("Exception trying to convert " + checksumType + " to enum: " + ex); - } - } - - Long previousDataFileId = (Long) result[16]; - if (previousDataFileId != null) { - dataFile.setPreviousDataFileId(previousDataFileId); - } - - Long rootDataFileId = (Long) result[17]; - if (rootDataFileId != null) { - dataFile.setRootDataFileId(rootDataFileId); - } - - String protocol = (String) result[18]; - if (protocol != null) { - dataFile.setProtocol(protocol); - } - - String authority = (String) result[19]; - if (authority != null) { - dataFile.setAuthority(authority); - } - - String identifier = (String) result[20]; - if (identifier != null) { - dataFile.setIdentifier(identifier); - } - - Long embargo_id = (Long) result[21]; - if (embargo_id != null) { - if (embargoMap.containsKey(embargo_id)) { - dataFile.setEmbargo(embargoMap.get(embargo_id)); - } else { - Embargo e = embargoService.findByEmbargoId(embargo_id); - dataFile.setEmbargo(e); - embargoMap.put(embargo_id, e); - } - } - - // TODO: - // - if ingest status is "bad", look up the ingest report; - // - is it a dedicated thumbnail for the dataset? (do we ever need that info?? - not on the dataset page, I don't think...) - - // Is this a tabular file? - - if (datatableMap.get(dataFile.getId()) != null) { - dataTables.get(datatableMap.get(dataFile.getId())).setDataFile(dataFile); - dataFile.setDataTable(dataTables.get(datatableMap.get(dataFile.getId()))); - - } - - if (fileTagMap.get(dataFile.getId()) != null) { - for (Integer tag_id : fileTagMap.get(dataFile.getId())) { - DataFileTag tag = new DataFileTag(); - tag.setTypeByLabel(fileTagLabels.get(tag_id)); - tag.setDataFile(dataFile); - dataFile.addTag(tag); - } - } - - if (dataFile.isRestricted() && accessRequestFileIds.contains(dataFile.getId())) { - dataFile.addFileAccessRequester(au); - } - - dataFiles.add(dataFile); - filesMap.put(dataFile.getId(), i++); - } - - logger.fine("Retrieved and cached "+i+" datafiles."); - - i = 0; - for (DataFileCategory fileCategory : owner.getCategories()) { - //logger.fine("category: id="+fileCategory.getId()); - categoryMap.put(fileCategory.getId(), i++); - } - - logger.fine("Retrieved "+i+" file categories attached to the dataset."); - - version.setFileMetadatas(retrieveFileMetadataForVersion(owner, version, dataFiles, filesMap, categoryMap)); - logger.fine("Retrieved " + version.getFileMetadatas().size() + " filemetadatas for the version " + version.getId()); - owner.setFiles(dataFiles); - } - - private List retrieveFileMetadataForVersion(Dataset dataset, DatasetVersion version, List dataFiles, Map filesMap, Map categoryMap) { - List retList = new ArrayList<>(); - Map> categoryMetaMap = new HashMap<>(); - - List categoryResults = em.createNativeQuery("select t0.filecategories_id, t0.filemetadatas_id from filemetadata_datafilecategory t0, filemetadata t1 where (t0.filemetadatas_id = t1.id) AND (t1.datasetversion_id = "+version.getId()+")").getResultList(); - int i = 0; - for (Object[] result : categoryResults) { - Long category_id = (Long) result[0]; - Long filemeta_id = (Long) result[1]; - if (categoryMetaMap.get(filemeta_id) == null) { - categoryMetaMap.put(filemeta_id, new HashSet<>()); - } - categoryMetaMap.get(filemeta_id).add(category_id); - i++; - } - logger.fine("Retrieved and mapped "+i+" file categories attached to files in the version "+version.getId()); - - List metadataResults = em.createNativeQuery("select id, datafile_id, DESCRIPTION, LABEL, RESTRICTED, DIRECTORYLABEL, prov_freeform from FileMetadata where datasetversion_id = "+version.getId() + " ORDER BY LABEL").getResultList(); - - for (Object[] result : metadataResults) { - Integer filemeta_id = (Integer) result[0]; - - if (filemeta_id == null) { - continue; - } - - Long file_id = (Long) result[1]; - if (file_id == null) { - continue; - } - - Integer file_list_id = filesMap.get(file_id); - if (file_list_id == null) { - continue; - } - FileMetadata fileMetadata = new FileMetadata(); - fileMetadata.setId(filemeta_id.longValue()); - fileMetadata.setCategories(new LinkedList<>()); - - if (categoryMetaMap.get(fileMetadata.getId()) != null) { - for (Long cat_id : categoryMetaMap.get(fileMetadata.getId())) { - if (categoryMap.get(cat_id) != null) { - fileMetadata.getCategories().add(dataset.getCategories().get(categoryMap.get(cat_id))); - } - } - } - - fileMetadata.setDatasetVersion(version); - - // Link the FileMetadata object to the DataFile: - fileMetadata.setDataFile(dataFiles.get(file_list_id)); - // ... and the DataFile back to the FileMetadata: - fileMetadata.getDataFile().getFileMetadatas().add(fileMetadata); - - String description = (String) result[2]; - - if (description != null) { - fileMetadata.setDescription(description); - } - - String label = (String) result[3]; - - if (label != null) { - fileMetadata.setLabel(label); - } - - Boolean restricted = (Boolean) result[4]; - if (restricted != null) { - fileMetadata.setRestricted(restricted); - } - - String dirLabel = (String) result[5]; - if (dirLabel != null){ - fileMetadata.setDirectoryLabel(dirLabel); - } - - String provFreeForm = (String) result[6]; - if (provFreeForm != null){ - fileMetadata.setProvFreeForm(provFreeForm); - } - - retList.add(fileMetadata); - } - - logger.fine("Retrieved "+retList.size()+" file metadatas for version "+version.getId()+" (inside the retrieveFileMetadataForVersion method)."); - - - /* - We no longer perform this sort here, just to keep this filemetadata - list as identical as possible to when it's produced by the "traditional" - EJB method. When it's necessary to have the filemetadatas sorted by - FileMetadata.compareByLabel, the DatasetVersion.getFileMetadatasSorted() - method should be called. - - Collections.sort(retList, FileMetadata.compareByLabel); */ - - return retList; - } public List findIngestsInProgress() { if ( em.isOpen() ) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 1e4c56c6241..d4fd586b6bf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -1977,11 +1977,6 @@ private String init(boolean initFull) { // init the list of FileMetadatas if (workingVersion.isDraft() && canUpdateDataset()) { readOnly = false; - } else { - // an attempt to retreive both the filemetadatas and datafiles early on, so that - // we don't have to do so later (possibly, many more times than necessary): - //AuthenticatedUser au = session.getUser() instanceof AuthenticatedUser ? (AuthenticatedUser) session.getUser() : null; - //datafileService.findFileMetadataOptimizedExperimental(dataset, workingVersion, au); } // This will default to all the files in the version, if the search term // parameter hasn't been specified yet: @@ -2880,11 +2875,6 @@ public String refresh() { this.dataset = this.workingVersion.getDataset(); } - if (readOnly) { - //AuthenticatedUser au = session.getUser() instanceof AuthenticatedUser ? (AuthenticatedUser) session.getUser() : null; - //datafileService.findFileMetadataOptimizedExperimental(dataset, workingVersion, au); - } - fileMetadatasSearch = selectFileMetadatasForDisplay(); displayCitation = dataset.getCitation(true, workingVersion); From cf6f317d8b829ce98d8da988ed13774af53e3921 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Fri, 26 May 2023 15:58:34 +0200 Subject: [PATCH 45/59] removed comments --- .../engine/command/impl/AbstractCreateDatasetCommand.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java index 300c9b6c1b1..eb171160376 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java @@ -139,9 +139,6 @@ public Dataset execute(CommandContext ctxt) throws CommandException { //Use for code that requires database ids postDBFlush(theDataset, ctxt); - // TODO: this needs to be moved in to an onSuccess method; not adding to this PR as its out of scope - // TODO: switch to asynchronous version when JPA sync works - // ctxt.index().asyncIndexDataset(theDataset.getId(), true); ctxt.index().asyncIndexDataset(theDataset, true); return theDataset; From 25d804e8ec316007661a1ba4e0f0c338d1f2a2c0 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Fri, 26 May 2023 16:19:01 +0200 Subject: [PATCH 46/59] async indexing in the new dataset version command --- .../engine/command/impl/CreateDatasetVersionCommand.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java index 3493b145be2..538500cfe0a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java @@ -65,10 +65,9 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException { //good wrapped response if the TOA/Request Access not in compliance prepareDatasetAndVersion(); - // TODO make async - // ctxt.index().asyncIndexDataset(dataset); - return ctxt.datasets().storeVersion(newVersion); - + DatasetVersion version = ctxt.datasets().storeVersion(newVersion); + ctxt.index().asyncIndexDataset(dataset, true); + return version; } /** From 9f9ad74e2e32d1575ad8f54d1aa37d05cffbacc2 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Fri, 26 May 2023 16:36:14 +0200 Subject: [PATCH 47/59] extended comment --- .../edu/harvard/iq/dataverse/ingest/IngestMessageBean.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java index 6e83a6584df..8712fbf3ffc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java @@ -75,6 +75,11 @@ public void onMessage(Message message) { ingestMessage = (IngestMessage) om.getObject(); // if the lock was removed while an ingest was queued, ratake the lock + // The "if" is the first thing that addDatasetLock method does. + // It has some complexity and would result in the code duplication if repeated here. + // If that check would be removed from the addDatasetLock method in the future without + // updating the code using this method, ingest code would still not break because + // we remove "all" ingest locks at the end (right now, there can be at most one ingest lock). datasetService.addDatasetLock(ingestMessage.getDatasetId(), DatasetLock.Reason.Ingest, ingestMessage.getAuthenticatedUserId(), From 433df840141339652370ea362796e7afe73c0d6b Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Fri, 26 May 2023 16:56:28 +0200 Subject: [PATCH 48/59] fixed nullpointer in unit test --- .../engine/command/impl/CreateDatasetVersionCommand.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java index 538500cfe0a..1d83f522f29 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java @@ -66,7 +66,9 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException { prepareDatasetAndVersion(); DatasetVersion version = ctxt.datasets().storeVersion(newVersion); - ctxt.index().asyncIndexDataset(dataset, true); + if (ctxt.index() != null) { + ctxt.index().asyncIndexDataset(dataset, true); + } return version; } From 6d1acb076d1a9cfe58ac6189a35de7fc3d58dae5 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Wed, 31 May 2023 15:42:56 +0200 Subject: [PATCH 49/59] added sleep for reindex in datasets IT --- src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 6988fc333a3..a07fca411d4 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -2803,6 +2803,7 @@ public void testCuratePublishedDatasetVersionCommand() throws IOException { UtilIT.publishDatasetViaNativeApi(datasetId, "updatecurrent", apiToken).then().assertThat().statusCode(OK.getStatusCode()); + UtilIT.sleepForReindex(datasetId, apiToken, 5); Response getDatasetJsonAfterUpdate = UtilIT.nativeGet(datasetId, apiToken); getDatasetJsonAfterUpdate.prettyPrint(); getDatasetJsonAfterUpdate.then().assertThat() From 9e08e2a18c6a8defa101f1efeffa57af824fe451 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Wed, 31 May 2023 15:53:50 +0200 Subject: [PATCH 50/59] fixed expected string given int --- src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index a07fca411d4..127074bd6c1 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -2803,7 +2803,7 @@ public void testCuratePublishedDatasetVersionCommand() throws IOException { UtilIT.publishDatasetViaNativeApi(datasetId, "updatecurrent", apiToken).then().assertThat().statusCode(OK.getStatusCode()); - UtilIT.sleepForReindex(datasetId, apiToken, 5); + UtilIT.sleepForReindex(String.valueOf(datasetId), apiToken, 5); Response getDatasetJsonAfterUpdate = UtilIT.nativeGet(datasetId, apiToken); getDatasetJsonAfterUpdate.prettyPrint(); getDatasetJsonAfterUpdate.then().assertThat() From 846ad06780d2113270f0eccd670cf27161694165 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Wed, 31 May 2023 16:12:08 +0200 Subject: [PATCH 51/59] fixed compile error --- .../java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java index e76d9c96fb1..77ec6701bc6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java @@ -23,6 +23,7 @@ import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.util.BundleUtil; import java.sql.Timestamp; import java.time.Instant; From 1f13674465c76e97a0bc72e607786d9cbadbe66e Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 1 Jun 2023 09:51:19 +0200 Subject: [PATCH 52/59] restored lock mode type NON for the findDeep --- src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 305afd2ed30..51741b0e686 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -135,6 +135,7 @@ public Dataset findDeep(Object pk) { .setHint("eclipselink.left-join-fetch", "o.files.creator") .setHint("eclipselink.left-join-fetch", "o.files.alternativePersistentIndentifiers") .setHint("eclipselink.left-join-fetch", "o.files.roleAssignments") + .setLockMode(LockModeType.NONE) .getSingleResult(); } From 311d115c0144f3e394f6525c7be6ebddd49bc2d2 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 1 Jun 2023 13:16:18 +0200 Subject: [PATCH 53/59] reverted last two commits as they do not fix the integration tests --- src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java | 1 - src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java | 1 - 2 files changed, 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 51741b0e686..305afd2ed30 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -135,7 +135,6 @@ public Dataset findDeep(Object pk) { .setHint("eclipselink.left-join-fetch", "o.files.creator") .setHint("eclipselink.left-join-fetch", "o.files.alternativePersistentIndentifiers") .setHint("eclipselink.left-join-fetch", "o.files.roleAssignments") - .setLockMode(LockModeType.NONE) .getSingleResult(); } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 127074bd6c1..6988fc333a3 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -2803,7 +2803,6 @@ public void testCuratePublishedDatasetVersionCommand() throws IOException { UtilIT.publishDatasetViaNativeApi(datasetId, "updatecurrent", apiToken).then().assertThat().statusCode(OK.getStatusCode()); - UtilIT.sleepForReindex(String.valueOf(datasetId), apiToken, 5); Response getDatasetJsonAfterUpdate = UtilIT.nativeGet(datasetId, apiToken); getDatasetJsonAfterUpdate.prettyPrint(); getDatasetJsonAfterUpdate.then().assertThat() From 3ef99d6512b45d0d0d196b4c3d5c7a468b99d18d Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 1 Jun 2023 13:18:13 +0200 Subject: [PATCH 54/59] reverted back to TODO in CreateDatasetVersionCommand --- .../engine/command/impl/CreateDatasetVersionCommand.java | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java index 1d83f522f29..3493b145be2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java @@ -65,11 +65,10 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException { //good wrapped response if the TOA/Request Access not in compliance prepareDatasetAndVersion(); - DatasetVersion version = ctxt.datasets().storeVersion(newVersion); - if (ctxt.index() != null) { - ctxt.index().asyncIndexDataset(dataset, true); - } - return version; + // TODO make async + // ctxt.index().asyncIndexDataset(dataset); + return ctxt.datasets().storeVersion(newVersion); + } /** From 0869f0f70ad8d27f20a7222dc27866ed3e568f90 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 1 Jun 2023 14:19:10 +0200 Subject: [PATCH 55/59] restored TODO fix in CreateDatasetVersionCommand --- .../engine/command/impl/CreateDatasetVersionCommand.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java index 3493b145be2..1d83f522f29 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java @@ -65,10 +65,11 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException { //good wrapped response if the TOA/Request Access not in compliance prepareDatasetAndVersion(); - // TODO make async - // ctxt.index().asyncIndexDataset(dataset); - return ctxt.datasets().storeVersion(newVersion); - + DatasetVersion version = ctxt.datasets().storeVersion(newVersion); + if (ctxt.index() != null) { + ctxt.index().asyncIndexDataset(dataset, true); + } + return version; } /** From 7082d3c0345da28cb80990a40816c5cb281cd696 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 1 Jun 2023 15:17:13 +0200 Subject: [PATCH 56/59] reverted find deep when dataset was already given --- .../java/edu/harvard/iq/dataverse/search/IndexServiceBean.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 22471cf0105..91e2d21d9f2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -437,8 +437,7 @@ public void indexDvObject(DvObject objectIn) throws SolrServerException, IOExce } private void indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { - Dataset deep = datasetService.findDeep(dataset.getId()); - doIndexDataset(deep, doNormalSolrDocCleanUp); + doIndexDataset(dataset, doNormalSolrDocCleanUp); updateLastIndexedTime(dataset.getId()); } From 54a0346bd74eeeff005ccfd3a7869215faee3481 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 1 Jun 2023 15:37:34 +0200 Subject: [PATCH 57/59] restored find deep for fuster publishing --- .../java/edu/harvard/iq/dataverse/search/IndexServiceBean.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 91e2d21d9f2..22471cf0105 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -437,7 +437,8 @@ public void indexDvObject(DvObject objectIn) throws SolrServerException, IOExce } private void indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { - doIndexDataset(dataset, doNormalSolrDocCleanUp); + Dataset deep = datasetService.findDeep(dataset.getId()); + doIndexDataset(deep, doNormalSolrDocCleanUp); updateLastIndexedTime(dataset.getId()); } From 5adf6a4acf574eab8fd393a18c8bf6d0a390cf71 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 1 Jun 2023 16:18:03 +0200 Subject: [PATCH 58/59] removed both find deep calls from index bean --- .../harvard/iq/dataverse/search/IndexServiceBean.java | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 22471cf0105..baa13eba368 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -437,8 +437,7 @@ public void indexDvObject(DvObject objectIn) throws SolrServerException, IOExce } private void indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { - Dataset deep = datasetService.findDeep(dataset.getId()); - doIndexDataset(deep, doNormalSolrDocCleanUp); + doIndexDataset(dataset, doNormalSolrDocCleanUp); updateLastIndexedTime(dataset.getId()); } @@ -1695,11 +1694,7 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc sid.addField(fieldName, doc.getFieldValue(fieldName)); } - Dataset dataset = null; - if (object.isInstanceofDataset()) { - dataset = datasetService.findDeep(object.getId()); - } - List paths = object.isInstanceofDataset() ? retrieveDVOPaths(dataset) + List paths = object.isInstanceofDataset() ? retrieveDVOPaths(datasetService.find(object.getId())) : retrieveDVOPaths(dataverseService.find(object.getId())); sid.removeField(SearchFields.SUBTREE); @@ -1707,7 +1702,7 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc UpdateResponse addResponse = solrClientService.getSolrClient().add(sid); UpdateResponse commitResponse = solrClientService.getSolrClient().commit(); if (object.isInstanceofDataset()) { - for (DataFile df : dataset.getFiles()) { + for (DataFile df : datasetService.find(object.getId()).getFiles()) { solrQuery.setQuery(SearchUtil.constructQuery(SearchFields.ENTITY_ID, df.getId().toString())); res = solrClientService.getSolrClient().query(solrQuery); if (!res.getResults().isEmpty()) { From 4be0b5d10f48627484f2466bdf40b6cce4fbb74d Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Fri, 9 Jun 2023 18:11:25 +0200 Subject: [PATCH 59/59] restored findDeep in index service bean but only where find was used --- .../harvard/iq/dataverse/search/IndexServiceBean.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index baa13eba368..0b8f93e47a9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -352,7 +352,7 @@ public Future indexDataverse(Dataverse dataverse, boolean processPaths) @TransactionAttribute(REQUIRES_NEW) public void indexDatasetInNewTransaction(Long datasetId) { //Dataset dataset) { boolean doNormalSolrDocCleanUp = false; - Dataset dataset = em.find(Dataset.class, datasetId); + Dataset dataset = datasetService.findDeep(datasetId); asyncIndexDataset(dataset, doNormalSolrDocCleanUp); dataset = null; } @@ -1694,7 +1694,11 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc sid.addField(fieldName, doc.getFieldValue(fieldName)); } - List paths = object.isInstanceofDataset() ? retrieveDVOPaths(datasetService.find(object.getId())) + Dataset dataset = null; + if (object.isInstanceofDataset()) { + dataset = datasetService.findDeep(object.getId()); + } + List paths = object.isInstanceofDataset() ? retrieveDVOPaths(dataset) : retrieveDVOPaths(dataverseService.find(object.getId())); sid.removeField(SearchFields.SUBTREE); @@ -1702,7 +1706,7 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc UpdateResponse addResponse = solrClientService.getSolrClient().add(sid); UpdateResponse commitResponse = solrClientService.getSolrClient().commit(); if (object.isInstanceofDataset()) { - for (DataFile df : datasetService.find(object.getId()).getFiles()) { + for (DataFile df : dataset.getFiles()) { solrQuery.setQuery(SearchUtil.constructQuery(SearchFields.ENTITY_ID, df.getId().toString())); res = solrClientService.getSolrClient().query(solrQuery); if (!res.getResults().isEmpty()) {