Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 114 additions & 46 deletions src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import edu.harvard.iq.dataverse.datavariable.VariableMetadataUtil;
import edu.harvard.iq.dataverse.datavariable.VariableServiceBean;
import edu.harvard.iq.dataverse.harvest.client.HarvestingClient;
import edu.harvard.iq.dataverse.harvest.server.OaiSetException;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import edu.harvard.iq.dataverse.util.FileUtil;
import edu.harvard.iq.dataverse.util.StringUtil;
Expand All @@ -36,11 +37,13 @@
import java.sql.Timestamp;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collection;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
Expand Down Expand Up @@ -161,10 +164,14 @@ public void close() {

@TransactionAttribute(REQUIRES_NEW)
public Future<String> indexDataverseInNewTransaction(Dataverse dataverse) throws SolrServerException, IOException{
return indexDataverse(dataverse);
return indexDataverse(dataverse, false);
}

public Future<String> indexDataverse(Dataverse dataverse) throws SolrServerException, IOException {
return indexDataverse(dataverse, true);
}

public Future<String> indexDataverse(Dataverse dataverse, boolean processPaths) throws SolrServerException, IOException {
logger.fine("indexDataverse called on dataverse id " + dataverse.getId() + "(" + dataverse.getAlias() + ")");
if (dataverse.getId() == null) {
// TODO: Investigate the root cause of this "unable to index dataverse"
Expand Down Expand Up @@ -258,21 +265,30 @@ public Future<String> indexDataverse(Dataverse dataverse) throws SolrServerExcep
for (String dvPath : linkingDataversePaths) {
dataversePaths.add(dvPath);
}

//Get Linking Dataverses to see if I need to reindex my children
if (hasAnyLinkingDataverses(dataverse)) {
for (Dataset dv : datasetService.findPublishedByOwnerId(dataverse.getId())) {
//if this dataverse or any of its ancestors is linked and contains datasets then
// the datasets must be reindexed to get the new paths added
indexDataset(dv, true);
}
for (Dataverse dv : dataverseService.findPublishedByOwnerId(dataverse.getId())) {
//if this dataverse or any of its ancestors is linked and contains dataverses then
// the dataverses must be reindexed to get the new paths added
indexDataverse(dv);
//only do this if we're indexing an individual dataverse ie not full re-index
if (processPaths) {
//Get Linking Dataverses to see if I need to reindex my children
if (hasAnyLinkingDataverses(dataverse)) {
List<Dataverse> found = dataverseService.findByOwnerId(dataverse.getId());
if (!found.isEmpty()) {
for (Dataverse dv : found) {
//if this dataverse or any of its ancestors is linked and contains dataverses then
// the dataverses must be reindexed to get the new paths added
//We're sticking with the re-index here so that the dataverses datasets will also
//get their paths updated
indexDataverseInNewTransaction(dv);
}
}
List<Dataset> datasets = datasetService.findByOwnerId(dataverse.getId());
for (Dataset ds : datasets) {
//if this dataverse or any of its ancestors is linked and contains datasets then
// the datasets must get the new paths added
// changed from a full re-index for efficiency wrt issue 6665
updatePathForExistingSolrDocs(ds);
}
}
}

solrInputDocument.addField(SearchFields.SUBTREE, dataversePaths);
docs.add(solrInputDocument);

Expand Down Expand Up @@ -302,7 +318,7 @@ public Future<String> indexDataverse(Dataverse dataverse) throws SolrServerExcep
return new AsyncResult<>(msg);

}

@TransactionAttribute(REQUIRES_NEW)
public Future<String> indexDatasetInNewTransaction(Long datasetId) throws SolrServerException, IOException{ //Dataset dataset) {
boolean doNormalSolrDocCleanUp = false;
Expand Down Expand Up @@ -694,29 +710,6 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set<Long> d
Dataset dataset = indexableDataset.getDatasetVersion().getDataset();
logger.fine("adding or updating Solr document for dataset id " + dataset.getId());
Collection<SolrInputDocument> docs = new ArrayList<>();
List<String> dataversePathSegmentsAccumulator = new ArrayList<>();
List<String> dataverseSegments = new ArrayList<>();
try {
dataverseSegments = findPathSegments(dataset.getOwner(), dataversePathSegmentsAccumulator);
} catch (Exception ex) {
logger.info("failed to find dataverseSegments for dataversePaths for " + SearchFields.SUBTREE + ": " + ex);
}
List<String> dataversePaths = getDataversePathsFromSegments(dataverseSegments);
// Add Paths for linking dataverses
for (Dataverse linkingDataverse : dsLinkingService.findLinkingDataverses(dataset.getId())) {
List<String> linkingDataversePathSegmentsAccumulator = new ArrayList<>();
List<String> linkingdataverseSegments = findPathSegments(linkingDataverse, linkingDataversePathSegmentsAccumulator);
List<String> linkingDataversePaths = getDataversePathsFromSegments(linkingdataverseSegments);
for (String dvPath : linkingDataversePaths) {
dataversePaths.add(dvPath);
}
}

//Add paths for my linking dataverses
List<String> linkingDataversePaths = findLinkingDataversePaths(dataset.getOwner());
for (String dvPath : linkingDataversePaths) {
dataversePaths.add(dvPath);
}

SolrInputDocument solrInputDocument = new SolrInputDocument();
String datasetSolrDocId = indexableDataset.getSolrDocId();
Expand Down Expand Up @@ -898,6 +891,7 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set<Long> d
}
}

List<String> dataversePaths = retrieveDatasetPaths(dataset);
solrInputDocument.addField(SearchFields.SUBTREE, dataversePaths);
// solrInputDocument.addField(SearchFields.HOST_DATAVERSE,
// dataset.getOwner().getName());
Expand Down Expand Up @@ -929,10 +923,11 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set<Long> d
logger.fine(
"We are indexing a draft version of a dataset that has a released version. We'll be checking file metadatas if they are exact clones of the released versions.");
}

for (FileMetadata fileMetadata : fileMetadatas) {

boolean indexThisMetadata = true;
if (checkForDuplicateMetadata) {

logger.fine("Checking if this file metadata is a duplicate.");
for (FileMetadata releasedFileMetadata : dataset.getReleasedVersion().getFileMetadatas()) {
if (fileMetadata.getDataFile() != null && fileMetadata.getDataFile().equals(releasedFileMetadata.getDataFile())) {
Expand All @@ -945,7 +940,10 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set<Long> d
* whether full text indexing is on now.
*/
if ((fileMetadata.getDataFile().isRestricted() == releasedFileMetadata.getDataFile().isRestricted())) {
if (fileMetadata.contentEquals(releasedFileMetadata) && variableMetadataUtil.compareVariableMetadata(releasedFileMetadata,fileMetadata)) {
if (fileMetadata.contentEquals(releasedFileMetadata)
/* SEK 3/12/2020 remove variable metadata indexing*/
// && variableMetadataUtil.compareVariableMetadata(releasedFileMetadata,fileMetadata)
) {
indexThisMetadata = false;
logger.fine("This file metadata hasn't changed since the released version; skipping indexing.");
} else {
Expand All @@ -959,6 +957,8 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set<Long> d
}
}
if (indexThisMetadata) {


SolrInputDocument datafileSolrInputDocument = new SolrInputDocument();
Long fileEntityId = fileMetadata.getDataFile().getId();
datafileSolrInputDocument.addField(SearchFields.ENTITY_ID, fileEntityId);
Expand Down Expand Up @@ -1177,7 +1177,7 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set<Long> d
datafileSolrInputDocument.addField(SearchFields.VARIABLE_NAME, var.getName());
}


/* SEK 3/12/2020 remove variable metadata indexing
List<VariableMetadata> vmList = variableService.findByDataVarIdAndFileMetaId(var.getId(), fileMetadata.getId());
VariableMetadata vm = null;
if (vmList != null && vmList.size() >0) {
Expand Down Expand Up @@ -1211,8 +1211,9 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set<Long> d
}

}

*/
}

// TABULAR DATA TAGS:
// (not to be confused with the file categories, indexed above!)
for (DataFileTag tag : fileMetadata.getDataFile().getTags()) {
Expand All @@ -1228,7 +1229,7 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set<Long> d
}
}
}

try {
solrClientService.getSolrClient().add(docs);
solrClientService.getSolrClient().commit();
Expand All @@ -1251,8 +1252,6 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set<Long> d
dvObjectToModify = em.merge(dvObjectToModify);
dvObjectToModify = null;

// return "indexed dataset " + dataset.getId() + " as " + solrDocId +
// "\nindexFilesResults for " + solrDocId + ":" + fileInfo.toString();
return "indexed dataset " + dsId + " as " + datasetSolrDocId + ". filesIndexed: " + filesIndexed;
}

Expand Down Expand Up @@ -1417,6 +1416,75 @@ public static String getIN_REVIEW_STRING() {
public static String getDEACCESSIONED_STRING() {
return DEACCESSIONED_STRING;
}



private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerException, IOException {
SolrQuery solrQuery = new SolrQuery();
solrQuery.setQuery(SearchUtil.constructQuery(SearchFields.ENTITY_ID, object.getId().toString()));

QueryResponse res = solrClientService.getSolrClient().query(solrQuery);

if (!res.getResults().isEmpty()) {
SolrDocument doc = res.getResults().get(0);
SolrInputDocument sid = new SolrInputDocument();

for (String fieldName : doc.getFieldNames()) {
sid.addField(fieldName, doc.getFieldValue(fieldName));
}
Dataset ds = datasetService.find(object.getId());
sid.removeField(SearchFields.SUBTREE);
List<String> paths = retrieveDatasetPaths(ds);
sid.addField(SearchFields.SUBTREE, paths);
UpdateResponse addResponse = solrClientService.getSolrClient().add(sid);
UpdateResponse commitResponse = solrClientService.getSolrClient().commit();
for (DataFile df : ds.getFiles()) {
solrQuery.setQuery(SearchUtil.constructQuery(SearchFields.ENTITY_ID, df.getId().toString()));
res = solrClientService.getSolrClient().query(solrQuery);
if (!res.getResults().isEmpty()) {
doc = res.getResults().get(0);
sid = new SolrInputDocument();
for (String fieldName : doc.getFieldNames()) {
sid.addField(fieldName, doc.getFieldValue(fieldName));
}
sid.removeField(SearchFields.SUBTREE);
sid.addField(SearchFields.SUBTREE, paths);
addResponse = solrClientService.getSolrClient().add(sid);
commitResponse = solrClientService.getSolrClient().commit();
}
}
} else {
indexDataset((Dataset) object, true);
}
}


private List<String> retrieveDatasetPaths(Dataset dataset) {
List<String> dataversePathSegmentsAccumulator = new ArrayList<>();
List<String> dataverseSegments = new ArrayList<>();
try {
dataverseSegments = findPathSegments(dataset.getOwner(), dataversePathSegmentsAccumulator);
} catch (Exception ex) {
logger.info("failed to find dataverseSegments for dataversePaths for " + SearchFields.SUBTREE + ": " + ex);
}
List<String> dataversePaths = getDataversePathsFromSegments(dataverseSegments);
// Add Paths for linking dataverses
for (Dataverse linkingDataverse : dsLinkingService.findLinkingDataverses(dataset.getId())) {
List<String> linkingDataversePathSegmentsAccumulator = new ArrayList<>();
List<String> linkingdataverseSegments = findPathSegments(linkingDataverse, linkingDataversePathSegmentsAccumulator);
List<String> linkingDataversePaths = getDataversePathsFromSegments(linkingdataverseSegments);
for (String dvPath : linkingDataversePaths) {
dataversePaths.add(dvPath);
}
}

//Add paths for my linking dataverses
List<String> linkingDataversePaths = findLinkingDataversePaths(dataset.getOwner());
for (String dvPath : linkingDataversePaths) {
dataversePaths.add(dvPath);
}
return dataversePaths;
}

public String delete(Dataverse doomed) {
logger.fine("deleting Solr document for dataverse " + doomed.getId());
Expand Down
6 changes: 6 additions & 0 deletions src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -1059,6 +1059,12 @@ static Response reindexDataset(String persistentId) {
.get("/api/admin/index/dataset?persistentId=" + persistentId);
return response;
}

static Response reindexDataverse(String dvId) {
Response response = given()
.get("/api/admin/index/dataverses/" + dvId);
return response;
}

static Response listAuthenticatedUsers(String apiToken) {
Response response = given()
Expand Down