diff --git a/conf/solr/schema.xml b/conf/solr/schema.xml index 5517175d443..34f888acec4 100644 --- a/conf/solr/schema.xml +++ b/conf/solr/schema.xml @@ -242,6 +242,7 @@ + - 6.7 + 6.7.1 17 UTF-8 diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 303a6d8a5ac..f52163192f7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -165,12 +165,12 @@ private List findByOwnerId(Long ownerId, boolean onlyPublished) { } public List findIdsByOwnerId(Long ownerId) { - return findIdsByOwnerId(ownerId, false); + return findIdsByOwnerId(ownerId, false, false); } - private List findIdsByOwnerId(Long ownerId, boolean onlyPublished) { + public List findIdsByOwnerId(Long ownerId, boolean onlyPublished, boolean includeHarvested) { List retList = new ArrayList<>(); - if (!onlyPublished) { + if (!onlyPublished && includeHarvested) { return em.createNamedQuery("Dataset.findIdByOwnerId") .setParameter("ownerId", ownerId) .getResultList(); @@ -178,8 +178,18 @@ private List findIdsByOwnerId(Long ownerId, boolean onlyPublished) { List results = em.createNamedQuery("Dataset.findByOwnerId") .setParameter("ownerId", ownerId).getResultList(); for (Dataset ds : results) { - if (ds.isReleased() && !ds.isDeaccessioned()) { - retList.add(ds.getId()); + // For harvested datasets, only add them if includeHarvested is true + if (ds.isHarvested()) { + if (includeHarvested) { + retList.add(ds.getId()); + } + // For non-harvested datasets, either + // - add them all (if onlyPublished is false) OR + // - only add them if they are released and not deaccessioned (if onlyPublished is true) + } else { + if (!onlyPublished || (ds.isReleased() && !ds.isDeaccessioned())) { + retList.add(ds.getId()); + } } } return retList; diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java index d3d99635475..98f52b705a8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java @@ -56,7 +56,13 @@ @NamedQuery(name = "Dataverse.filterByAlias", query="SELECT dv FROM Dataverse dv WHERE LOWER(dv.alias) LIKE :alias order by dv.alias"), @NamedQuery(name = "Dataverse.filterByAliasNameAffiliation", query="SELECT dv FROM Dataverse dv WHERE (LOWER(dv.alias) LIKE :alias) OR (LOWER(dv.name) LIKE :name) OR (LOWER(dv.affiliation) LIKE :affiliation) order by dv.alias"), @NamedQuery(name = "Dataverse.filterByName", query="SELECT dv FROM Dataverse dv WHERE LOWER(dv.name) LIKE :name order by dv.alias"), - @NamedQuery(name = "Dataverse.countAll", query = "SELECT COUNT(dv) FROM Dataverse dv") + @NamedQuery(name = "Dataverse.countAll", query = "SELECT COUNT(dv) FROM Dataverse dv"), + @NamedQuery(name = "Dataverse.getDatasetCount", + query = "SELECT " + + "(SELECT COUNT(DISTINCT d) FROM Dataset d JOIN d.versions v WHERE d.owner.id IN :ids AND v.versionState = :datasetState) + " + + "(SELECT COUNT(DISTINCT l.dataset) FROM DatasetLinkingDataverse l JOIN l.dataset.versions v WHERE l.linkingDataverse.id IN :ids AND v.versionState = :datasetState) " + + // The WHERE statement is a hacky way of ensuring the count is returned in a single result row + "FROM Dataverse d WHERE d.id = (SELECT MIN(d2.id) FROM Dataverse d2)") }) @Entity @Table(indexes = {@Index(columnList="defaultcontributorrole_id") diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index f89e707cc03..0247ad5aa15 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -14,6 +14,7 @@ import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; +import edu.harvard.iq.dataverse.DatasetVersion.VersionState; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.search.IndexServiceBean; import edu.harvard.iq.dataverse.search.SolrIndexServiceBean; @@ -26,13 +27,8 @@ import java.io.File; import java.io.IOException; import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.logging.Logger; -import java.util.Properties; import edu.harvard.iq.dataverse.validation.JSONDataValidation; import jakarta.ejb.EJB; @@ -468,8 +464,26 @@ public List findDataversesThatLinkToThisDvId(long dataverseId) { return dataverseLinkingService.findLinkingDataverses(dataverseId); } + public List findDatasetsThisIdHasLinkedTo(long dataverseId, boolean onlyPublished) { + List linkedDatasets = datasetLinkingService.findLinkedDatasets(dataverseId); + + if (!onlyPublished) { + return linkedDatasets; + } + + List retList = new ArrayList(); + + for (Dataset ds : linkedDatasets) { + if (ds.isReleased() && !ds.isDeaccessioned()) { + retList.add(ds); + } + } + + return retList; + } + public List findDatasetsThisIdHasLinkedTo(long dataverseId) { - return datasetLinkingService.findLinkedDatasets(dataverseId); + return this.findDatasetsThisIdHasLinkedTo(dataverseId, false); } public List findDataversesThatLinkToThisDatasetId(long datasetId) { @@ -754,21 +768,25 @@ public List findAllDataverseDataverseChildren(Long dvId) { // function to recursively find ids of all children of a dataverse that are // of type dataset - public List findAllDataverseDatasetChildren(Long dvId) { + public List findAllDataverseDatasetChildren(Long dvId, boolean onlyPublished, boolean includeHarvested) { // get list of Dataverse children List dataverseChildren = findIdsByOwnerId(dvId); // get list of Dataset children - List datasetChildren = datasetService.findIdsByOwnerId(dvId); + List datasetChildren = datasetService.findIdsByOwnerId(dvId, onlyPublished, includeHarvested); if (dataverseChildren == null) { return datasetChildren; } else { for (Long childDvId : dataverseChildren) { - datasetChildren.addAll(findAllDataverseDatasetChildren(childDvId)); + datasetChildren.addAll(findAllDataverseDatasetChildren(childDvId, onlyPublished, includeHarvested)); } return datasetChildren; } } + + public List findAllDataverseDatasetChildren(Long dvId) { + return findAllDataverseDatasetChildren(dvId, false, false); + } public String addRoleAssignmentsToChildren(Dataverse owner, ArrayList rolesToInherit, boolean inheritAllRoles) { @@ -1257,4 +1275,36 @@ public void disableStorageQuota(StorageQuota storageQuota) { public long getDataverseCount() { return em.createNamedQuery("Dataverse.countAll", Long.class).getSingleResult(); } + + /** + * Returns the total number of published datasets within a Dataverse collection. The number includes harvested and + * linked datasets. Datasets in subcollections are also counted. + * @param dvId ID of a Dataverse collection + * @return the total number of published datasets within that Dataverse collection + */ + public long getDatasetCount(Long dvId) { + Set dvIds = new HashSet<>(); + Deque stack = new ArrayDeque<>(); + dvIds.add(dvId); + stack.push(dvId); + + // Collect IDs of all subdataverses + while (!stack.isEmpty()) { + Long currentId = stack.pop(); + List children = em.createQuery("SELECT d.id FROM Dataverse d WHERE d.owner.id = :parentId", Long.class) + .setParameter("parentId", currentId) + .getResultList(); + + for (Long childId : children) { + if (dvIds.add(childId)) { + stack.push(childId); + } + } + } + + return em.createNamedQuery("Dataverse.getDatasetCount", Long.class) + .setParameter("ids", dvIds) + .setParameter("datasetState", VersionState.RELEASED) + .getSingleResult(); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetLinkingDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetLinkingDataverseCommand.java index 7f5672c0cd7..d44638be42c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetLinkingDataverseCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetLinkingDataverseCommand.java @@ -7,6 +7,7 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetLinkingDataverse; +import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import edu.harvard.iq.dataverse.engine.command.AbstractCommand; @@ -15,6 +16,8 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import org.apache.solr.client.solrj.SolrServerException; @@ -42,12 +45,17 @@ public Dataset execute(CommandContext ctxt) throws CommandException { DatasetLinkingDataverse doomedAndMerged = ctxt.em().merge(doomed); ctxt.em().remove(doomedAndMerged); - try { - ctxt.index().indexDataverse(doomed.getLinkingDataverse()); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post delete linking dataverse indexing failed for Dataverse. "; - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, doomed.getLinkingDataverse()); + List toReindex = new ArrayList<>(); + toReindex.add(doomed.getLinkingDataverse()); + toReindex.addAll(doomed.getLinkingDataverse().getOwners()); + for (Dataverse dv : toReindex) { + try { + ctxt.index().indexDataverse(dv); + } catch (IOException | SolrServerException e) { + String failureLogText = "Post delete linking dataverse indexing failed for Dataverse. "; + failureLogText += "\r\n" + e.getLocalizedMessage(); + LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dv); + } } return merged; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DestroyDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DestroyDatasetCommand.java index be3e28029e4..14dafd8397c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DestroyDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DestroyDatasetCommand.java @@ -49,12 +49,13 @@ public class DestroyDatasetCommand extends AbstractVoidCommand { private List datasetAndFileSolrIdsToDelete; - private Dataverse toReIndex; + private List toReIndex; public DestroyDatasetCommand(Dataset doomed, DataverseRequest aRequest) { super(aRequest, doomed); this.doomed = doomed; datasetAndFileSolrIdsToDelete = new ArrayList<>(); + toReIndex = new ArrayList<>(); } @Override @@ -116,7 +117,12 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { } } - toReIndex = managedDoomed.getOwner(); + toReIndex.add(managedDoomed.getOwner()); + toReIndex.addAll(managedDoomed.getOwner().getOwners()); + managedDoomed.getDatasetLinkingDataverses().forEach(dld -> { + toReIndex.add(dld.getLinkingDataverse()); + toReIndex.addAll(dld.getLinkingDataverse().getOwners()); + }); // add potential Solr IDs of datasets to list for deletion String solrIdOfPublishedDatasetVersion = IndexServiceBean.solrDocIdentifierDataset + managedDoomed.getId(); @@ -145,13 +151,15 @@ public boolean onSuccess(CommandContext ctxt, Object r) { logger.log(Level.FINE, "Result of attempt to delete dataset and file IDs from the search index: {0}", resultOfSolrDeletionAttempt.getMessage()); // reindex - try { - ctxt.index().indexDataverse(toReIndex); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post-destroy dataset indexing of the owning dataverse failed. You can kickoff a re-index of this dataverse with: \r\n curl http://localhost:8080/api/admin/index/dataverses/" + toReIndex.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, toReIndex); - retVal = false; + for (Dataverse dv : toReIndex) { + try { + ctxt.index().indexDataverse(dv); + } catch (IOException | SolrServerException e) { + String failureLogText = "Post-destroy dataset indexing of an owning or linking dataverse failed. You can kickoff a re-index of this dataverse with: \r\n curl http://localhost:8080/api/admin/index/dataverses/" + dv.getId().toString(); + failureLogText += "\r\n" + e.getLocalizedMessage(); + LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dv); + retVal = false; + } } return retVal; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java index 4ba8d39a949..d4bc6b83613 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java @@ -33,8 +33,7 @@ import java.awt.datatransfer.StringSelection; import java.io.IOException; import java.sql.Timestamp; -import java.util.Date; -import java.util.List; +import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; @@ -42,7 +41,7 @@ import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.util.FileUtil; -import java.util.ArrayList; + import java.util.concurrent.Future; import org.apache.logging.log4j.util.Strings; @@ -67,7 +66,7 @@ public class FinalizeDatasetPublicationCommand extends AbstractPublishDatasetCom */ final boolean datasetExternallyReleased; - List dataversesToIndex = new ArrayList<>(); + Set dataversesToIndex = new HashSet<>(); public static final String FILE_VALIDATION_ERROR = "FILE VALIDATION ERROR"; @@ -209,6 +208,15 @@ public Dataset execute(CommandContext ctxt) throws CommandException { } + // The owning dataverse plus all dataverses linking to this dataset must be re-indexed to update their + // datasetCount + dataversesToIndex.add(getDataset().getOwner()); + dataversesToIndex.addAll(getDataset().getOwner().getOwners()); + getDataset().getDatasetLinkingDataverses().forEach(dld -> { + dataversesToIndex.add(dld.getLinkingDataverse()); + dataversesToIndex.addAll(dld.getLinkingDataverse().getOwners()); + }); + List previouslyCalled = ctxt.getCommandsCalled(); PrivateUrl privateUrl = ctxt.engine().submit(new GetPrivateUrlCommand(getRequest(), theDataset)); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LinkDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LinkDatasetCommand.java index 1225c892ac7..41f6d75526f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LinkDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LinkDatasetCommand.java @@ -19,7 +19,10 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import java.io.IOException; import java.sql.Timestamp; +import java.util.ArrayList; import java.util.Date; +import java.util.List; + import org.apache.solr.client.solrj.SolrServerException; /** @@ -68,6 +71,20 @@ public boolean onSuccess(CommandContext ctxt, Object r) { ctxt.index().asyncIndexDataset(dld.getDataset(), true); + List toReindex = new ArrayList<>(); + toReindex.add(dld.getLinkingDataverse()); + toReindex.addAll(dld.getLinkingDataverse().getOwners()); + for (Dataverse dv : toReindex) { + try { + ctxt.index().indexDataverse(dv); + } catch (IOException | SolrServerException e) { + String failureLogText = "Indexing of linking dataverse failed. You can kickoff a re-index of this dataverse with: \r\n curl http://localhost:8080/api/admin/index/dataverses/" + dv.getId().toString(); + failureLogText += "\r\n" + e.getLocalizedMessage(); + LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, dv); + return false; + } + } + return retVal; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java index 7ac58cb1860..cb28691a596 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java @@ -31,6 +31,7 @@ import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; import org.apache.commons.lang3.mutable.MutableBoolean; +import org.apache.solr.client.solrj.SolrServerException; import org.xml.sax.SAXException; import io.gdcc.xoai.model.oaipmh.results.Record; @@ -191,6 +192,17 @@ public void doHarvest(DataverseRequest dataverseRequest, Long harvestingClientId hdLogger.log(Level.INFO, String.format("Datasets created/updated: %s, datasets deleted: %s, datasets failed: %s", harvestedDatasetIds.size(), deletedIdentifiers.size(), failedIdentifiers.size())); + // Reindex dataverse to update datasetCount + List toReindex = new ArrayList<>(); + toReindex.add(harvestingClientConfig.getDataverse()); + toReindex.addAll(harvestingClientConfig.getDataverse().getOwners()); + for (Dataverse dv : toReindex) { + try { + indexService.indexDataverse(dv); + } catch (IOException | SolrServerException e) { + hdLogger.log(Level.SEVERE, "Dataverse indexing failed. You can kickoff a re-index of this dataverse with: \r\n curl http://localhost:8080/api/admin/index/dataverses/" + dv.getId().toString()); + } + } } } catch (StopHarvestException she) { hdLogger.log(Level.INFO, "HARVEST INTERRUPTED BY EXTERNAL REQUEST"); diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java index 2f76fed1a11..4b491a5e722 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java @@ -1,12 +1,10 @@ package edu.harvard.iq.dataverse.harvest.client; -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.DataFileServiceBean; -import edu.harvard.iq.dataverse.DataverseRequestServiceBean; -import edu.harvard.iq.dataverse.DataverseServiceBean; -import edu.harvard.iq.dataverse.EjbDataverseEngine; +import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.search.IndexServiceBean; import edu.harvard.iq.dataverse.timer.DataverseTimerServiceBean; + +import java.io.IOException; import java.util.ArrayList; import java.util.Date; import java.util.List; @@ -22,6 +20,7 @@ import jakarta.persistence.NoResultException; import jakarta.persistence.NonUniqueResultException; import jakarta.persistence.PersistenceContext; +import org.apache.solr.client.solrj.SolrServerException; /** * @@ -154,6 +153,19 @@ public void deleteClient(Long clientId) { } em.remove(merged); + + // Reindex dataverse to update datasetCount + List toReindex = new ArrayList<>(); + toReindex.add(victim.getDataverse()); + toReindex.addAll(victim.getDataverse().getOwners()); + for (Dataverse dv : toReindex) { + try { + indexService.indexDataverse(dv); + } catch (IOException | SolrServerException e) { + logger.severe("Dataverse indexing failed. You can kickoff a re-index of this dataverse with: \r\n curl http://localhost:8080/api/admin/index/dataverses/" + dv.getId().toString()); + } + } + } catch (Exception e) { errorMessage = "Failed to delete cleint. Unknown exception: " + e.getMessage(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index ee264510251..4aa1b5abb20 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -320,6 +320,11 @@ public Future indexDataverse(Dataverse dataverse, boolean processPaths) } solrInputDocument.addField(SearchFields.SUBTREE, dataversePaths); + + if (dataverse.isReleased()) { + solrInputDocument.addField(SearchFields.DATASET_COUNT, dataverseService.getDatasetCount(dataverse.getId())); + } + docs.add(solrInputDocument); String status; diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java index 67ada72da9a..a3328531821 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java @@ -297,5 +297,6 @@ more targeted results for just datasets. The format is YYYY (i.e. public static final String DATASET_LICENSE = "license"; public static final String FILE_COUNT = "fileCount"; + public static final String DATASET_COUNT = "datasetCount"; } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java index e5e89e042a0..7f84f71152c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java @@ -82,6 +82,10 @@ public class SolrSearchResult { * Only Dataset can have a file count */ private Long fileCount; + /** + * Only Dataverses can have a dataset count + */ + private Long datasetCount; /** * Files and datasets might have a UNF. Dataverses don't. */ @@ -702,6 +706,7 @@ public JsonObjectBuilder json(boolean showRelevance, boolean showEntityIds, bool nullSafeJsonBuilder.add("affiliation", dataverseAffiliation); nullSafeJsonBuilder.add("parentDataverseName", dataverseParentName); nullSafeJsonBuilder.add("parentDataverseIdentifier", dataverseParentAlias); + nullSafeJsonBuilder.add("datasetCount", this.datasetCount); } else if (this.entity.isInstanceofDataFile()) { // "published_at" field is only set when the version state is not draft. // On the contrary, this field also takes into account DataFiles in draft version, @@ -1402,4 +1407,12 @@ public Long getFileCount() { public void setFileCount(Long fileCount) { this.fileCount = fileCount; } + + public Long getDatasetCount() { + return datasetCount; + } + + public void setDatasetCount(Long datasetCount) { + this.datasetCount = datasetCount; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchServiceBean.java index 2a6bd335f43..aed7cbae3ff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchServiceBean.java @@ -455,6 +455,7 @@ public SolrQueryResponse search( // Boolean datasetValid = (Boolean) solrDocument.getFieldValue(SearchFields.DATASET_VALID); Long fileCount = (Long) solrDocument.getFieldValue(SearchFields.FILE_COUNT); + Long datasetCount = (Long) solrDocument.getFieldValue(SearchFields.DATASET_COUNT); List matchedFields = new ArrayList<>(); @@ -529,6 +530,7 @@ public SolrQueryResponse search( solrSearchResult.setDvTree(dvTree); solrSearchResult.setDatasetValid(datasetValid); solrSearchResult.setFileCount(fileCount); + solrSearchResult.setDatasetCount(datasetCount); if (Boolean.TRUE.equals((Boolean) solrDocument.getFieldValue(SearchFields.IS_HARVESTED))) { solrSearchResult.setHarvested(true); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java index 37e8b30a9f5..e1d9533fa7e 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java @@ -31,6 +31,7 @@ import jakarta.json.JsonObjectBuilder; +import static io.restassured.RestAssured.given; import static jakarta.ws.rs.core.Response.Status.*; import static java.lang.Thread.sleep; import java.nio.file.Path; @@ -41,6 +42,7 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.assertNotNull; public class SearchIT { @@ -1628,6 +1630,307 @@ public void testSearchWithInvalidDateField() { } + @Test + public void testDataverseDatasetCounts() throws InterruptedException { + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + Response makeSuperuser = UtilIT.makeSuperUser(username); + makeSuperuser.prettyPrint(); + makeSuperuser.then().assertThat() + .statusCode(OK.getStatusCode()); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response publishDataverse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken); + publishDataverse.then().assertThat() + .statusCode(OK.getStatusCode()); + + // Wait for reindex of dataverse after publishing + String searchEmptyDataverseQuery = "identifier:" + dataverseAlias + " AND publicationStatus:Published AND datasetCount:0"; + assertTrue(UtilIT.sleepForSearch(searchEmptyDataverseQuery, null, "", 1, UtilIT.MAXIMUM_INGEST_LOCK_DURATION)); + + Response searchEmptyDataverse = UtilIT.search(searchEmptyDataverseQuery, apiToken, "&type=dataverse"); + searchEmptyDataverse.prettyPrint(); + searchEmptyDataverse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.total_count", CoreMatchers.equalTo(1)) + .body("data.items[0].identifier", CoreMatchers.equalTo(dataverseAlias)) + .body("data.items[0].datasetCount", CoreMatchers.equalTo(0)); + + // Check that published datasets are counted + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDatasetResponse.prettyPrint(); + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse); + String datasetPid = JsonPath.from(createDatasetResponse.getBody().asString()).getString("data.persistentId"); + + Response publishDataset = UtilIT.publishDatasetViaNativeApi(datasetPid, "major", apiToken); + publishDataset.then().assertThat() + .statusCode(OK.getStatusCode()); + UtilIT.sleepForReindex(datasetPid, apiToken, 5); + + // Wait for reindex of dataverse after publishing + String searchDataverseWithDatasetQuery = "identifier:" + dataverseAlias + " AND datasetCount:1"; + assertTrue(UtilIT.sleepForSearch(searchDataverseWithDatasetQuery, null, "", 1, UtilIT.MAXIMUM_INGEST_LOCK_DURATION)); + + Response searchDataverseWithDataset = UtilIT.search(searchDataverseWithDatasetQuery, apiToken, "&type=dataverse"); + searchDataverseWithDataset.prettyPrint(); + searchDataverseWithDataset.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.total_count", CoreMatchers.equalTo(1)) + .body("data.items[0].identifier", CoreMatchers.equalTo(dataverseAlias)) + .body("data.items[0].datasetCount", CoreMatchers.equalTo(1)); + + Response createDataverseResponse2 = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse2.prettyPrint(); + String dataverseAlias2 = UtilIT.getAliasFromResponse(createDataverseResponse2); + + Response publishDataverse2 = UtilIT.publishDataverseViaNativeApi(dataverseAlias2, apiToken); + publishDataverse2.then().assertThat() + .statusCode(OK.getStatusCode()); + + // Wait for reindex of dataverse after publishing + String searchEmptyDataverse2Query = "identifier:" + dataverseAlias2 + " AND publicationStatus:Published AND datasetCount:0"; + assertTrue(UtilIT.sleepForSearch(searchEmptyDataverse2Query, null, "", 1, UtilIT.MAXIMUM_INGEST_LOCK_DURATION)); + + Response searchEmptyDataverse2 = UtilIT.search(searchEmptyDataverse2Query, apiToken, "&type=dataverse"); + searchEmptyDataverse2.prettyPrint(); + searchEmptyDataverse2.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.total_count", CoreMatchers.equalTo(1)) + .body("data.items[0].identifier", CoreMatchers.equalTo(dataverseAlias2)) + .body("data.items[0].datasetCount", CoreMatchers.equalTo(0)); + + // Check that linked datasets are counted + Response linkDataset = UtilIT.linkDataset(datasetPid, dataverseAlias2, apiToken); + linkDataset.prettyPrint(); + linkDataset.then().assertThat() + .statusCode(OK.getStatusCode()); + + // Wait for reindex of dataverse after linking + String searchDataverseWithLinkedDatasetQuery = "identifier:" + dataverseAlias2 + " AND datasetCount:1"; + assertTrue(UtilIT.sleepForSearch(searchDataverseWithLinkedDatasetQuery, null, "", 1, UtilIT.MAXIMUM_INGEST_LOCK_DURATION)); + + Response searchDataverseWithLinkedDataset = UtilIT.search(searchDataverseWithLinkedDatasetQuery, apiToken, "&type=dataverse"); + searchDataverseWithLinkedDataset.prettyPrint(); + searchDataverseWithLinkedDataset.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.total_count", CoreMatchers.equalTo(1)) + .body("data.items[0].identifier", CoreMatchers.equalTo(dataverseAlias2)) + .body("data.items[0].datasetCount", CoreMatchers.equalTo(1)); + + // Check that unlinked datasets are no longer counted + Response unlinkDataset = UtilIT.deleteDatasetLink(Long.valueOf(datasetId), dataverseAlias2, apiToken); + unlinkDataset.prettyPrint(); + unlinkDataset.then().assertThat() + .statusCode(OK.getStatusCode()); + + // Wait for reindex of dataverse after unlinking + String searchDataverseWithUnlinkedDatasetQuery = "identifier:" + dataverseAlias2 + " AND datasetCount:0"; + assertTrue(UtilIT.sleepForSearch(searchDataverseWithUnlinkedDatasetQuery, null, "", 1, UtilIT.MAXIMUM_INGEST_LOCK_DURATION)); + + Response searchDataverseWithUnlinkedDataset = UtilIT.search(searchDataverseWithUnlinkedDatasetQuery, apiToken, "&type=dataverse"); + searchDataverseWithUnlinkedDataset.prettyPrint(); + searchDataverseWithUnlinkedDataset.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.total_count", CoreMatchers.equalTo(1)) + .body("data.items[0].identifier", CoreMatchers.equalTo(dataverseAlias2)) + .body("data.items[0].datasetCount", CoreMatchers.equalTo(0)); + + // Re-link dataset + Response linkDatasetAgain = UtilIT.linkDataset(datasetPid, dataverseAlias2, apiToken); + linkDatasetAgain.prettyPrint(); + linkDatasetAgain.then().assertThat() + .statusCode(OK.getStatusCode()); + + // Wait for reindex of dataverse after linking + assertTrue(UtilIT.sleepForSearch(searchDataverseWithLinkedDatasetQuery, null, "", 1, UtilIT.MAXIMUM_INGEST_LOCK_DURATION)); + + // Check that destroyed datasets are no longer counted + Response destroyDataset = UtilIT.destroyDataset(datasetPid, apiToken); + destroyDataset.prettyPrint(); + destroyDataset.then().assertThat() + .statusCode(OK.getStatusCode()); + + // Wait for reindex of dataverses after destroying dataset + String searchDataverseWithDestroyedDatasetQuery = "identifier:" + dataverseAlias + " AND datasetCount:0"; + assertTrue(UtilIT.sleepForSearch(searchDataverseWithDestroyedDatasetQuery, null, "", 1, UtilIT.MAXIMUM_INGEST_LOCK_DURATION)); + String searchDataverseWithDestroyedLinkedDatasetQuery = "identifier:" + dataverseAlias2 + " AND datasetCount:0"; + assertTrue(UtilIT.sleepForSearch(searchDataverseWithDestroyedLinkedDatasetQuery, null, "", 1, UtilIT.MAXIMUM_INGEST_LOCK_DURATION)); + + Response searchDataverseWithDestroyedDataset = UtilIT.search(searchDataverseWithDestroyedDatasetQuery, apiToken, "&type=dataverse"); + searchDataverseWithDestroyedDataset.prettyPrint(); + searchDataverseWithDestroyedDataset.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.total_count", CoreMatchers.equalTo(1)) + .body("data.items[0].identifier", CoreMatchers.equalTo(dataverseAlias)) + .body("data.items[0].datasetCount", CoreMatchers.equalTo(0)); + + Response searchDataverseWithDestroyedLinkedDataset = UtilIT.search(searchDataverseWithDestroyedLinkedDatasetQuery, apiToken, "&type=dataverse"); + searchDataverseWithDestroyedLinkedDataset.prettyPrint(); + searchDataverseWithDestroyedLinkedDataset.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.total_count", CoreMatchers.equalTo(1)) + .body("data.items[0].identifier", CoreMatchers.equalTo(dataverseAlias2)) + .body("data.items[0].datasetCount", CoreMatchers.equalTo(0)); + + // Check that harvested datasets are counted + Response createDataverseResponse3 = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse3.prettyPrint(); + String dataverseAlias3 = UtilIT.getAliasFromResponse(createDataverseResponse3); + + Response publishDataverse3 = UtilIT.publishDataverseViaNativeApi(dataverseAlias3, apiToken); + publishDataverse3.then().assertThat() + .statusCode(OK.getStatusCode()); + + String nickName = "h" + UtilIT.getRandomString(6); + String sourceName = ""; + + String clientApiPath = String.format("/api/harvest/clients/%s", nickName); + String clientJson = String.format("{\"dataverseAlias\":\"%s\"," + + "\"type\":\"oai\"," + + "\"sourceName\":\"%s\"," + + "\"harvestUrl\":\"%s\"," + + "\"archiveUrl\":\"%s\"," + + "\"set\":\"%s\"," + + "\"allowHarvestingMissingCVV\":%s," + + "\"metadataFormat\":\"%s\"}", + dataverseAlias3, sourceName, "https://demo.dataverse.org/oai", "https://demo.dataverse.org", + "controlTestSet2", true, "oai_dc"); + + Response createResponse = given() + .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken) + .body(clientJson) + .post(clientApiPath); + createResponse.prettyPrint(); + assertEquals(CREATED.getStatusCode(), createResponse.getStatusCode()); + + String runHarvestApiPath = String.format("/api/harvest/clients/%s/run", nickName); + + Response runResponse = given() + .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken) + .post(runHarvestApiPath); + runResponse.prettyPrint(); + assertEquals(ACCEPTED.getStatusCode(), runResponse.getStatusCode()); + + int i = 0; + int maxWait=20; // a very conservative interval; this harvest has no business taking this long + int numberOfDatasetsHarvested = 0; + do { + // Give it an initial 2 sec. delay, to make sure the client state + // has been updated in the database, which can take some appreciable + // amount of time on a heavily-loaded server running a full suite of + // tests: + Thread.sleep(2000L); + // keep checking the status of the client with the GET api: + Response getClientResponse = given() + .get(clientApiPath); + + assertEquals(OK.getStatusCode(), getClientResponse.getStatusCode()); + JsonPath responseJsonPath = getClientResponse.body().jsonPath(); + assertNotNull(responseJsonPath, "Invalid JSON in GET client response"); + assertEquals(ApiConstants.STATUS_OK, responseJsonPath.getString("status")); + + String clientStatus = responseJsonPath.getString("data.status"); + assertNotNull(clientStatus); + + if ("inProgress".equals(clientStatus) || "IN PROGRESS".equals(responseJsonPath.getString("data.lastResult"))) { + // we'll sleep for 2 more seconds + i++; + } else { + logger.info("getClientResponse.prettyPrint: " + + getClientResponse.prettyPrint()); + // Check the values in the response: + // a) Confirm that the harvest has completed: + assertEquals("inActive", clientStatus, "Unexpected client status: "+clientStatus); + + // b) Confirm that it has actually succeeded: + assertTrue(responseJsonPath.getString("data.lastResult").contains("Completed"), "Last harvest not reported a success (took "+i+" seconds)"); + String harvestTimeStamp = responseJsonPath.getString("data.lastHarvest"); + assertNotNull(harvestTimeStamp); + + // c) Confirm that at least one dataset has been harvested: + numberOfDatasetsHarvested = responseJsonPath.getInt("data.lastDatasetsHarvested"); + assertTrue(numberOfDatasetsHarvested > 0); + + // ok, it looks like the harvest has completed successfully. + break; + } + } while (i