From 66633947a37bf0e7e5c69aaa94b578e69190ab18 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 27 Feb 2020 11:42:37 -0500 Subject: [PATCH 1/6] #6665 update ds solr docs directly --- .../iq/dataverse/search/IndexServiceBean.java | 99 +++++++++++++------ .../edu/harvard/iq/dataverse/api/UtilIT.java | 6 ++ 2 files changed, 74 insertions(+), 31 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 5afd6cd5dd4..c4dd30d2216 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -27,6 +27,7 @@ import edu.harvard.iq.dataverse.datavariable.VariableMetadataUtil; import edu.harvard.iq.dataverse.datavariable.VariableServiceBean; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; +import edu.harvard.iq.dataverse.harvest.server.OaiSetException; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.StringUtil; @@ -41,6 +42,7 @@ import java.util.Collection; import java.util.Date; import java.util.HashSet; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; @@ -258,21 +260,23 @@ public Future indexDataverse(Dataverse dataverse) throws SolrServerExcep for (String dvPath : linkingDataversePaths) { dataversePaths.add(dvPath); } - //Get Linking Dataverses to see if I need to reindex my children - if (hasAnyLinkingDataverses(dataverse)) { - for (Dataset dv : datasetService.findPublishedByOwnerId(dataverse.getId())) { - //if this dataverse or any of its ancestors is linked and contains datasets then - // the datasets must be reindexed to get the new paths added - indexDataset(dv, true); - } + if (hasAnyLinkingDataverses(dataverse)) { for (Dataverse dv : dataverseService.findPublishedByOwnerId(dataverse.getId())) { //if this dataverse or any of its ancestors is linked and contains dataverses then // the dataverses must be reindexed to get the new paths added + //We're sticking with the re-index here so that the dataverses datasets will also + //get their paths updated indexDataverse(dv); } + + for (Dataset dv : datasetService.findPublishedByOwnerId(dataverse.getId())) { + //if this dataverse or any of its ancestors is linked and contains datasets then + // the datasets must get the new paths added + // changed from a full re-index for efficiency wrt issue 6665 + updatePathForExistingSolrDocs(dv); + } } - solrInputDocument.addField(SearchFields.SUBTREE, dataversePaths); docs.add(solrInputDocument); @@ -694,29 +698,6 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set d Dataset dataset = indexableDataset.getDatasetVersion().getDataset(); logger.fine("adding or updating Solr document for dataset id " + dataset.getId()); Collection docs = new ArrayList<>(); - List dataversePathSegmentsAccumulator = new ArrayList<>(); - List dataverseSegments = new ArrayList<>(); - try { - dataverseSegments = findPathSegments(dataset.getOwner(), dataversePathSegmentsAccumulator); - } catch (Exception ex) { - logger.info("failed to find dataverseSegments for dataversePaths for " + SearchFields.SUBTREE + ": " + ex); - } - List dataversePaths = getDataversePathsFromSegments(dataverseSegments); - // Add Paths for linking dataverses - for (Dataverse linkingDataverse : dsLinkingService.findLinkingDataverses(dataset.getId())) { - List linkingDataversePathSegmentsAccumulator = new ArrayList<>(); - List linkingdataverseSegments = findPathSegments(linkingDataverse, linkingDataversePathSegmentsAccumulator); - List linkingDataversePaths = getDataversePathsFromSegments(linkingdataverseSegments); - for (String dvPath : linkingDataversePaths) { - dataversePaths.add(dvPath); - } - } - - //Add paths for my linking dataverses - List linkingDataversePaths = findLinkingDataversePaths(dataset.getOwner()); - for (String dvPath : linkingDataversePaths) { - dataversePaths.add(dvPath); - } SolrInputDocument solrInputDocument = new SolrInputDocument(); String datasetSolrDocId = indexableDataset.getSolrDocId(); @@ -898,6 +879,7 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set d } } + List dataversePaths = retrieveDatasetPaths(dataset); solrInputDocument.addField(SearchFields.SUBTREE, dataversePaths); // solrInputDocument.addField(SearchFields.HOST_DATAVERSE, // dataset.getOwner().getName()); @@ -1417,6 +1399,61 @@ public static String getIN_REVIEW_STRING() { public static String getDEACCESSIONED_STRING() { return DEACCESSIONED_STRING; } + + + + private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerException, IOException { + SolrQuery solrQuery = new SolrQuery(); + + solrQuery.setQuery(SearchUtil.constructQuery(SearchFields.ENTITY_ID, object.getId().toString())); + + QueryResponse res = solrClientService.getSolrClient().query(solrQuery); + + if (res.getResults().isEmpty()) { + //nothing to update - + } else { + SolrDocument doc = res.getResults().get(0); + SolrInputDocument tmp = new SolrInputDocument(); + + for (String fieldName : doc.getFieldNames()) { + tmp.addField(fieldName, doc.getFieldValue(fieldName)); + } + Dataset ds = datasetService.find(object.getId()); + tmp.removeField(SearchFields.SUBTREE); + tmp.addField(SearchFields.SUBTREE, retrieveDatasetPaths(ds)); + UpdateResponse addResponse = solrClientService.getSolrClient().add(tmp); + UpdateResponse commitResponse = solrClientService.getSolrClient().commit(); + } + + } + + + private List retrieveDatasetPaths(Dataset dataset) { + List dataversePathSegmentsAccumulator = new ArrayList<>(); + List dataverseSegments = new ArrayList<>(); + try { + dataverseSegments = findPathSegments(dataset.getOwner(), dataversePathSegmentsAccumulator); + } catch (Exception ex) { + logger.info("failed to find dataverseSegments for dataversePaths for " + SearchFields.SUBTREE + ": " + ex); + } + List dataversePaths = getDataversePathsFromSegments(dataverseSegments); + // Add Paths for linking dataverses + for (Dataverse linkingDataverse : dsLinkingService.findLinkingDataverses(dataset.getId())) { + List linkingDataversePathSegmentsAccumulator = new ArrayList<>(); + List linkingdataverseSegments = findPathSegments(linkingDataverse, linkingDataversePathSegmentsAccumulator); + List linkingDataversePaths = getDataversePathsFromSegments(linkingdataverseSegments); + for (String dvPath : linkingDataversePaths) { + dataversePaths.add(dvPath); + } + } + + //Add paths for my linking dataverses + List linkingDataversePaths = findLinkingDataversePaths(dataset.getOwner()); + for (String dvPath : linkingDataversePaths) { + dataversePaths.add(dvPath); + } + return dataversePaths; + } public String delete(Dataverse doomed) { logger.fine("deleting Solr document for dataverse " + doomed.getId()); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index c609b400f7e..4507c18eb89 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -1059,6 +1059,12 @@ static Response reindexDataset(String persistentId) { .get("/api/admin/index/dataset?persistentId=" + persistentId); return response; } + + static Response reindexDataverse(String dvId) { + Response response = given() + .get("/api/admin/index/dataverses/" + dvId); + return response; + } static Response listAuthenticatedUsers(String apiToken) { Response response = given() From a94b391b5010f45df8f783a9cd00002781b46994 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 2 Mar 2020 16:56:32 -0500 Subject: [PATCH 2/6] #6665 add paths to files on index dataverse --- .../iq/dataverse/search/IndexServiceBean.java | 31 +++++++++++++------ 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index c4dd30d2216..b381efbda52 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1409,22 +1409,35 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc QueryResponse res = solrClientService.getSolrClient().query(solrQuery); - if (res.getResults().isEmpty()) { - //nothing to update - - } else { + if (!res.getResults().isEmpty()) { SolrDocument doc = res.getResults().get(0); - SolrInputDocument tmp = new SolrInputDocument(); + SolrInputDocument sid = new SolrInputDocument(); for (String fieldName : doc.getFieldNames()) { - tmp.addField(fieldName, doc.getFieldValue(fieldName)); + sid.addField(fieldName, doc.getFieldValue(fieldName)); } Dataset ds = datasetService.find(object.getId()); - tmp.removeField(SearchFields.SUBTREE); - tmp.addField(SearchFields.SUBTREE, retrieveDatasetPaths(ds)); - UpdateResponse addResponse = solrClientService.getSolrClient().add(tmp); + sid.removeField(SearchFields.SUBTREE); + List paths = retrieveDatasetPaths(ds); + sid.addField(SearchFields.SUBTREE, paths); + UpdateResponse addResponse = solrClientService.getSolrClient().add(sid); UpdateResponse commitResponse = solrClientService.getSolrClient().commit(); + for (DataFile df : ds.getFiles()) { + solrQuery.setQuery(SearchUtil.constructQuery(SearchFields.ENTITY_ID, df.getId().toString())); + res = solrClientService.getSolrClient().query(solrQuery); + if (!res.getResults().isEmpty()) { + doc = res.getResults().get(0); + sid = new SolrInputDocument(); + for (String fieldName : doc.getFieldNames()) { + sid.addField(fieldName, doc.getFieldValue(fieldName)); + } + sid.removeField(SearchFields.SUBTREE); + sid.addField(SearchFields.SUBTREE, paths); + addResponse = solrClientService.getSolrClient().add(sid); + commitResponse = solrClientService.getSolrClient().commit(); + } + } } - } From 1085a152a0cbac2b5a613ea3639f9c8cb6a4564b Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 9 Mar 2020 09:57:03 -0400 Subject: [PATCH 3/6] #6665 remove path update from index all --- .../iq/dataverse/search/IndexServiceBean.java | 50 ++++++++++++------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index b381efbda52..a636615d189 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -163,10 +163,14 @@ public void close() { @TransactionAttribute(REQUIRES_NEW) public Future indexDataverseInNewTransaction(Dataverse dataverse) throws SolrServerException, IOException{ - return indexDataverse(dataverse); + return indexDataverse(dataverse, false); } - + public Future indexDataverse(Dataverse dataverse) throws SolrServerException, IOException { + return indexDataverse(dataverse, true); + } + + public Future indexDataverse(Dataverse dataverse, boolean processPaths) throws SolrServerException, IOException { logger.fine("indexDataverse called on dataverse id " + dataverse.getId() + "(" + dataverse.getAlias() + ")"); if (dataverse.getId() == null) { // TODO: Investigate the root cause of this "unable to index dataverse" @@ -260,23 +264,30 @@ public Future indexDataverse(Dataverse dataverse) throws SolrServerExcep for (String dvPath : linkingDataversePaths) { dataversePaths.add(dvPath); } - //Get Linking Dataverses to see if I need to reindex my children - if (hasAnyLinkingDataverses(dataverse)) { - for (Dataverse dv : dataverseService.findPublishedByOwnerId(dataverse.getId())) { - //if this dataverse or any of its ancestors is linked and contains dataverses then - // the dataverses must be reindexed to get the new paths added - //We're sticking with the re-index here so that the dataverses datasets will also - //get their paths updated - indexDataverse(dv); - } - - for (Dataset dv : datasetService.findPublishedByOwnerId(dataverse.getId())) { - //if this dataverse or any of its ancestors is linked and contains datasets then - // the datasets must get the new paths added - // changed from a full re-index for efficiency wrt issue 6665 - updatePathForExistingSolrDocs(dv); + //only do this if we're indexing an individual dataverse ie not full re-index + if (processPaths) { + //Get Linking Dataverses to see if I need to reindex my children + if (hasAnyLinkingDataverses(dataverse)) { + List found = dataverseService.findByOwnerId(dataverse.getId()); + if (!found.isEmpty()) { + for (Dataverse dv : found) { + //if this dataverse or any of its ancestors is linked and contains dataverses then + // the dataverses must be reindexed to get the new paths added + //We're sticking with the re-index here so that the dataverses datasets will also + //get their paths updated + indexDataverseInNewTransaction(dv); + } + } + List datasets = datasetService.findByOwnerId(dataverse.getId()); + for (Dataset ds : datasets) { + //if this dataverse or any of its ancestors is linked and contains datasets then + // the datasets must get the new paths added + // changed from a full re-index for efficiency wrt issue 6665 + updatePathForExistingSolrDocs(ds); + } } } + solrInputDocument.addField(SearchFields.SUBTREE, dataversePaths); docs.add(solrInputDocument); @@ -306,7 +317,7 @@ public Future indexDataverse(Dataverse dataverse) throws SolrServerExcep return new AsyncResult<>(msg); } - + @TransactionAttribute(REQUIRES_NEW) public Future indexDatasetInNewTransaction(Long datasetId) throws SolrServerException, IOException{ //Dataset dataset) { boolean doNormalSolrDocCleanUp = false; @@ -1404,7 +1415,6 @@ public static String getDEACCESSIONED_STRING() { private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerException, IOException { SolrQuery solrQuery = new SolrQuery(); - solrQuery.setQuery(SearchUtil.constructQuery(SearchFields.ENTITY_ID, object.getId().toString())); QueryResponse res = solrClientService.getSolrClient().query(solrQuery); @@ -1437,6 +1447,8 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc commitResponse = solrClientService.getSolrClient().commit(); } } + } else { + indexDatasetInNewTransaction(object.getId()); } } From 4f321b88043e574c98f681fe45bfb9281b9dce47 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 10 Mar 2020 11:31:14 -0400 Subject: [PATCH 4/6] #6665 removing variable metadata process for benchmarking --- .../iq/dataverse/search/IndexServiceBean.java | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index a636615d189..8d68b3d2282 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -890,7 +890,7 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set d } } - List dataversePaths = retrieveDatasetPaths(dataset); + List dataversePaths = retrieveDatasetPaths(dataset); solrInputDocument.addField(SearchFields.SUBTREE, dataversePaths); // solrInputDocument.addField(SearchFields.HOST_DATAVERSE, // dataset.getOwner().getName()); @@ -938,7 +938,9 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set d * whether full text indexing is on now. */ if ((fileMetadata.getDataFile().isRestricted() == releasedFileMetadata.getDataFile().isRestricted())) { - if (fileMetadata.contentEquals(releasedFileMetadata) && variableMetadataUtil.compareVariableMetadata(releasedFileMetadata,fileMetadata)) { + if (fileMetadata.contentEquals(releasedFileMetadata) + // && variableMetadataUtil.compareVariableMetadata(releasedFileMetadata,fileMetadata) + ) { indexThisMetadata = false; logger.fine("This file metadata hasn't changed since the released version; skipping indexing."); } else { @@ -1170,7 +1172,7 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set d datafileSolrInputDocument.addField(SearchFields.VARIABLE_NAME, var.getName()); } - +/* List vmList = variableService.findByDataVarIdAndFileMetaId(var.getId(), fileMetadata.getId()); VariableMetadata vm = null; if (vmList != null && vmList.size() >0) { @@ -1204,8 +1206,9 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set d } } - +*/ } + // TABULAR DATA TAGS: // (not to be confused with the file categories, indexed above!) for (DataFileTag tag : fileMetadata.getDataFile().getTags()) { @@ -1447,8 +1450,8 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc commitResponse = solrClientService.getSolrClient().commit(); } } - } else { - indexDatasetInNewTransaction(object.getId()); + } else { + indexDataset((Dataset) object, true); } } From 1809eb047afc07da8f5913a043cb700b2408b94c Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Wed, 11 Mar 2020 15:16:26 -0400 Subject: [PATCH 5/6] #6665 add debug lines for benchmarking with var metadata processing --- .../iq/dataverse/search/IndexServiceBean.java | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 8d68b3d2282..e75381df155 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -37,6 +37,7 @@ import java.sql.Timestamp; import java.text.DateFormat; import java.text.SimpleDateFormat; +import java.time.Instant; import java.util.ArrayList; import java.util.Calendar; import java.util.Collection; @@ -705,6 +706,8 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset) throws Sol } private String addOrUpdateDataset(IndexableDataset indexableDataset, Set datafilesInDraftVersion) throws SolrServerException, IOException { + System.out.print("start index: " + Instant.now()); + String startTime = Instant.now().toString(); IndexableDataset.DatasetState state = indexableDataset.getDatasetState(); Dataset dataset = indexableDataset.getDatasetVersion().getDataset(); logger.fine("adding or updating Solr document for dataset id " + dataset.getId()); @@ -922,10 +925,13 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set d logger.fine( "We are indexing a draft version of a dataset that has a released version. We'll be checking file metadatas if they are exact clones of the released versions."); } - + System.out.print("before files: " + Instant.now()); for (FileMetadata fileMetadata : fileMetadatas) { + boolean indexThisMetadata = true; if (checkForDuplicateMetadata) { + System.out.print("check for duplticates"); + logger.fine("Checking if this file metadata is a duplicate."); for (FileMetadata releasedFileMetadata : dataset.getReleasedVersion().getFileMetadatas()) { if (fileMetadata.getDataFile() != null && fileMetadata.getDataFile().equals(releasedFileMetadata.getDataFile())) { @@ -939,7 +945,7 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set d */ if ((fileMetadata.getDataFile().isRestricted() == releasedFileMetadata.getDataFile().isRestricted())) { if (fileMetadata.contentEquals(releasedFileMetadata) - // && variableMetadataUtil.compareVariableMetadata(releasedFileMetadata,fileMetadata) + && variableMetadataUtil.compareVariableMetadata(releasedFileMetadata,fileMetadata) ) { indexThisMetadata = false; logger.fine("This file metadata hasn't changed since the released version; skipping indexing."); @@ -954,8 +960,11 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set d } } if (indexThisMetadata) { + + SolrInputDocument datafileSolrInputDocument = new SolrInputDocument(); Long fileEntityId = fileMetadata.getDataFile().getId(); + System.out.print("Indexing file: " + fileEntityId + " " + Instant.now()); datafileSolrInputDocument.addField(SearchFields.ENTITY_ID, fileEntityId); datafileSolrInputDocument.addField(SearchFields.DATAVERSE_VERSION_INDEXED_BY, dataverseVersion); datafileSolrInputDocument.addField(SearchFields.IDENTIFIER, fileEntityId); @@ -1172,7 +1181,7 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set d datafileSolrInputDocument.addField(SearchFields.VARIABLE_NAME, var.getName()); } -/* + List vmList = variableService.findByDataVarIdAndFileMetaId(var.getId(), fileMetadata.getId()); VariableMetadata vm = null; if (vmList != null && vmList.size() >0) { @@ -1206,7 +1215,7 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set d } } -*/ + } // TABULAR DATA TAGS: @@ -1224,7 +1233,7 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set d } } } - +System.out.print("After files: " + Instant.now()); try { solrClientService.getSolrClient().add(docs); solrClientService.getSolrClient().commit(); @@ -1249,6 +1258,9 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set d // return "indexed dataset " + dataset.getId() + " as " + solrDocId + // "\nindexFilesResults for " + solrDocId + ":" + fileInfo.toString(); + System.out.print("Started: " + startTime); + System.out.print("Finished: " + Instant.now()); + System.out.print("indexed dataset " + dsId + " as " + datasetSolrDocId + ". filesIndexed: " + filesIndexed.size()); return "indexed dataset " + dsId + " as " + datasetSolrDocId + ". filesIndexed: " + filesIndexed; } From 78565389b54b7ee68defbb7d9f3760f718379629 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 12 Mar 2020 09:28:19 -0400 Subject: [PATCH 6/6] #6665 remove variable metadata indexing and debug statements --- .../iq/dataverse/search/IndexServiceBean.java | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index e75381df155..b6b7df2887a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -706,8 +706,6 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset) throws Sol } private String addOrUpdateDataset(IndexableDataset indexableDataset, Set datafilesInDraftVersion) throws SolrServerException, IOException { - System.out.print("start index: " + Instant.now()); - String startTime = Instant.now().toString(); IndexableDataset.DatasetState state = indexableDataset.getDatasetState(); Dataset dataset = indexableDataset.getDatasetVersion().getDataset(); logger.fine("adding or updating Solr document for dataset id " + dataset.getId()); @@ -925,12 +923,10 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set d logger.fine( "We are indexing a draft version of a dataset that has a released version. We'll be checking file metadatas if they are exact clones of the released versions."); } - System.out.print("before files: " + Instant.now()); for (FileMetadata fileMetadata : fileMetadatas) { boolean indexThisMetadata = true; if (checkForDuplicateMetadata) { - System.out.print("check for duplticates"); logger.fine("Checking if this file metadata is a duplicate."); for (FileMetadata releasedFileMetadata : dataset.getReleasedVersion().getFileMetadatas()) { @@ -945,7 +941,8 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set d */ if ((fileMetadata.getDataFile().isRestricted() == releasedFileMetadata.getDataFile().isRestricted())) { if (fileMetadata.contentEquals(releasedFileMetadata) - && variableMetadataUtil.compareVariableMetadata(releasedFileMetadata,fileMetadata) + /* SEK 3/12/2020 remove variable metadata indexing*/ + // && variableMetadataUtil.compareVariableMetadata(releasedFileMetadata,fileMetadata) ) { indexThisMetadata = false; logger.fine("This file metadata hasn't changed since the released version; skipping indexing."); @@ -964,7 +961,6 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set d SolrInputDocument datafileSolrInputDocument = new SolrInputDocument(); Long fileEntityId = fileMetadata.getDataFile().getId(); - System.out.print("Indexing file: " + fileEntityId + " " + Instant.now()); datafileSolrInputDocument.addField(SearchFields.ENTITY_ID, fileEntityId); datafileSolrInputDocument.addField(SearchFields.DATAVERSE_VERSION_INDEXED_BY, dataverseVersion); datafileSolrInputDocument.addField(SearchFields.IDENTIFIER, fileEntityId); @@ -1181,7 +1177,7 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set d datafileSolrInputDocument.addField(SearchFields.VARIABLE_NAME, var.getName()); } - +/* SEK 3/12/2020 remove variable metadata indexing List vmList = variableService.findByDataVarIdAndFileMetaId(var.getId(), fileMetadata.getId()); VariableMetadata vm = null; if (vmList != null && vmList.size() >0) { @@ -1215,7 +1211,7 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set d } } - +*/ } // TABULAR DATA TAGS: @@ -1233,7 +1229,7 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set d } } } -System.out.print("After files: " + Instant.now()); + try { solrClientService.getSolrClient().add(docs); solrClientService.getSolrClient().commit(); @@ -1256,11 +1252,6 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set d dvObjectToModify = em.merge(dvObjectToModify); dvObjectToModify = null; - // return "indexed dataset " + dataset.getId() + " as " + solrDocId + - // "\nindexFilesResults for " + solrDocId + ":" + fileInfo.toString(); - System.out.print("Started: " + startTime); - System.out.print("Finished: " + Instant.now()); - System.out.print("indexed dataset " + dsId + " as " + datasetSolrDocId + ". filesIndexed: " + filesIndexed.size()); return "indexed dataset " + dsId + " as " + datasetSolrDocId + ". filesIndexed: " + filesIndexed; }