From 21955de0ba7ca68c247a8ffc1e2db5e83b6f6a38 Mon Sep 17 00:00:00 2001 From: gdurand Date: Wed, 13 Jan 2021 18:01:32 -0500 Subject: [PATCH 1/8] performance improvements for saved search --- .../savedsearch/SavedSearchServiceBean.java | 68 +++++++++---------- 1 file changed, 31 insertions(+), 37 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/savedsearch/SavedSearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/savedsearch/SavedSearchServiceBean.java index 20cb1a8629a..8099305a69b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/savedsearch/SavedSearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/savedsearch/SavedSearchServiceBean.java @@ -8,6 +8,7 @@ import edu.harvard.iq.dataverse.DvObjectServiceBean; import edu.harvard.iq.dataverse.EjbDataverseEngine; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.GuestUser; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.search.SearchServiceBean; import edu.harvard.iq.dataverse.search.SolrQueryResponse; @@ -20,6 +21,7 @@ import edu.harvard.iq.dataverse.search.SortBy; import edu.harvard.iq.dataverse.util.SystemConfig; import java.util.ArrayList; +import java.util.Collections; import java.util.Date; import java.util.List; import java.util.logging.Level; @@ -171,35 +173,40 @@ public JsonObjectBuilder makeLinksForSingleSavedSearch(DataverseRequest dvReq, S JsonArrayBuilder savedSearchArrayBuilder = Json.createArrayBuilder(); JsonArrayBuilder infoPerHit = Json.createArrayBuilder(); SolrQueryResponse queryResponse = findHits(savedSearch); + + List skipList = new ArrayList(); // a list for the definition point itself and already linked objects + skipList.add(savedSearch.getDefinitionPoint().getId()); - // get linked objects and add to a list TypedQuery typedQuery = em.createNamedQuery("DataverseLinkingDataverse.findIdsByLinkingDataverseId", Long.class) .setParameter("linkingDataverseId", savedSearch.getDefinitionPoint().getId()); - List alreadyLinkedObjectIds = typedQuery.getResultList(); + skipList.addAll(typedQuery.getResultList()); typedQuery = em.createNamedQuery("DatasetLinkingDataverse.findIdsByLinkingDataverseId", Long.class) .setParameter("linkingDataverseId", savedSearch.getDefinitionPoint().getId()); - alreadyLinkedObjectIds.addAll(typedQuery.getResultList()); - + skipList.addAll(typedQuery.getResultList()); + for (SolrSearchResult solrSearchResult : queryResponse.getSolrSearchResults()) { JsonObjectBuilder hitInfo = Json.createObjectBuilder(); hitInfo.add("name", solrSearchResult.getNameSort()); hitInfo.add("dvObjectId", solrSearchResult.getEntityId()); + + if (skipList.contains(solrSearchResult.getEntityId())) { + hitInfo.add(resultString, "Skipping because would link to itself or an already linked entity."); + infoPerHit.add(hitInfo); + continue; + } DvObject dvObjectThatDefinitionPointWillLinkTo = dvObjectService.findDvObject(solrSearchResult.getEntityId()); if (dvObjectThatDefinitionPointWillLinkTo == null) { hitInfo.add(resultString, "Could not find DvObject with id " + solrSearchResult.getEntityId()); infoPerHit.add(hitInfo); - break; - } + continue; + } + if (dvObjectThatDefinitionPointWillLinkTo.isInstanceofDataverse()) { Dataverse dataverseToLinkTo = (Dataverse) dvObjectThatDefinitionPointWillLinkTo; - if (wouldResultInLinkingToItself(savedSearch.getDefinitionPoint(), dataverseToLinkTo)) { - hitInfo.add(resultString, "Skipping because dataverse id " + dataverseToLinkTo.getId() + " would link to itself."); - } else if (alreadyLinkedToTheDataverse(alreadyLinkedObjectIds, dataverseToLinkTo)) { - hitInfo.add(resultString, "Skipping because dataverse " + savedSearch.getDefinitionPoint().getId() + " already links to dataverse " + dataverseToLinkTo.getId() + "."); - } else if (dataverseToLinkToIsAlreadyPartOfTheSubtree(savedSearch.getDefinitionPoint(), dataverseToLinkTo)) { + if (dataverseToLinkToIsAlreadyPartOfTheSubtree(savedSearch.getDefinitionPoint(), dataverseToLinkTo)) { hitInfo.add(resultString, "Skipping because " + dataverseToLinkTo + " is already part of the subtree for " + savedSearch.getDefinitionPoint()); } else { DataverseLinkingDataverse link = commandEngine.submitInNewTransaction(new LinkDataverseCommand(dvReq, savedSearch.getDefinitionPoint(), dataverseToLinkTo)); @@ -207,29 +214,22 @@ public JsonObjectBuilder makeLinksForSingleSavedSearch(DataverseRequest dvReq, S } } else if (dvObjectThatDefinitionPointWillLinkTo.isInstanceofDataset()) { Dataset datasetToLinkTo = (Dataset) dvObjectThatDefinitionPointWillLinkTo; - if (alreadyLinkedToTheDataset(alreadyLinkedObjectIds, datasetToLinkTo)) { - hitInfo.add(resultString, "Skipping because dataverse " + savedSearch.getDefinitionPoint() + " already links to dataset " + datasetToLinkTo + "."); - } else if (datasetToLinkToIsAlreadyPartOfTheSubtree(savedSearch.getDefinitionPoint(), datasetToLinkTo)) { + if (datasetToLinkToIsAlreadyPartOfTheSubtree(savedSearch.getDefinitionPoint(), datasetToLinkTo)) { // already there from normal search/browse hitInfo.add(resultString, "Skipping because dataset " + datasetToLinkTo.getId() + " is already part of the subtree for " + savedSearch.getDefinitionPoint().getAlias()); } else if (datasetAncestorAlreadyLinked(savedSearch.getDefinitionPoint(), datasetToLinkTo)) { hitInfo.add(resultString, "FIXME: implement this?"); - } else if (!datasetToLinkTo.isReleased()) { - hitInfo.add(resultString, "Skipping because dataset " + datasetToLinkTo.getId() + " is not released " ); } else { DatasetLinkingDataverse link = commandEngine.submitInNewTransaction(new LinkDatasetCommand(dvReq, savedSearch.getDefinitionPoint(), datasetToLinkTo)); - alreadyLinkedObjectIds.add(datasetToLinkTo.getId()); // because search results could produce two hits (published and draft) hitInfo.add(resultString, "Persisted DatasetLinkingDataverse id " + link.getId() + " link of " + link.getDataset() + " to " + link.getLinkingDataverse()); } - } else if (dvObjectThatDefinitionPointWillLinkTo.isInstanceofDataFile()) { - hitInfo.add(resultString, "Skipping because the search matched a file. The matched file id was " + dvObjectThatDefinitionPointWillLinkTo.getId() + "."); } else { hitInfo.add(resultString, "Unexpected DvObject type."); } infoPerHit.add(hitInfo); } - + JsonObjectBuilder info = getInfo(savedSearch, infoPerHit); if (debugFlag) { info.add("debug", getDebugInfo(savedSearch)); @@ -237,8 +237,7 @@ public JsonObjectBuilder makeLinksForSingleSavedSearch(DataverseRequest dvReq, S savedSearchArrayBuilder.add(info); response.add("hits for saved search id " + savedSearch.getId(), savedSearchArrayBuilder); - Date end = new Date(); - logger.info("SAVED SEARCH (" + savedSearch.getId() + ") total time in ms: " + (end.getTime() - start.getTime())); + logger.info("SAVED SEARCH (" + savedSearch.getId() + ") total time in ms: " + (new Date().getTime() - start.getTime())); return response; } @@ -251,16 +250,23 @@ private SolrQueryResponse findHits(SavedSearch savedSearch) throws SearchExcepti int numResultsPerPage = Integer.MAX_VALUE; List dataverses = new ArrayList<>(); dataverses.add(savedSearch.getDefinitionPoint()); + + // since saved search can only link Dataverses and Datasets, we can limit our search + List searchFilterQueries = savedSearch.getFilterQueriesAsStrings(); + searchFilterQueries.add("dvObjectType:(dataverses OR datasets)"); + + // run the search as GuestUser to only link published objects SolrQueryResponse solrQueryResponse = searchService.search( - new DataverseRequest(savedSearch.getCreator(), getHttpServletRequest()), + new DataverseRequest(GuestUser.get(), getHttpServletRequest()), dataverses, savedSearch.getQuery(), - savedSearch.getFilterQueriesAsStrings(), + searchFilterQueries, sortBy.getField(), sortBy.getOrder(), paginationStart, dataRelatedToMe, - numResultsPerPage + numResultsPerPage, + false // do not retrieve entities ); return solrQueryResponse; } @@ -289,18 +295,6 @@ private JsonArrayBuilder getFilterQueries(SavedSearch savedSearch) { return filterQueriesArrayBuilder; } - private boolean alreadyLinkedToTheDataverse(List alreadyLinkedObjectIds, Dataverse dataverseToLinkTo) { - return alreadyLinkedObjectIds.contains(dataverseToLinkTo.getId()); - } - - private boolean alreadyLinkedToTheDataset(List alreadyLinkedObjectIds, Dataset linkToThisDataset) { - return alreadyLinkedObjectIds.contains(linkToThisDataset.getId()); - } - - private static boolean wouldResultInLinkingToItself(Dataverse savedSearchDefinitionPoint, Dataverse dataverseToLinkTo) { - return savedSearchDefinitionPoint.equals(dataverseToLinkTo); - } - private boolean datasetToLinkToIsAlreadyPartOfTheSubtree(Dataverse definitionPoint, Dataset datasetWeMayLinkTo) { Dataverse ancestor = datasetWeMayLinkTo.getOwner(); while (ancestor != null) { From 9f23e24be834f9fb7b08791c3319252c85c6e456 Mon Sep 17 00:00:00 2001 From: gdurand Date: Fri, 15 Jan 2021 12:41:09 -0500 Subject: [PATCH 2/8] removed adding a saved search when linking a dataverse (since handled by indexing now) --- .../harvard/iq/dataverse/DataversePage.java | 38 ++----------------- 1 file changed, 3 insertions(+), 35 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java index 165c1759b5e..34efc928ab9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java @@ -787,18 +787,9 @@ public String saveLinkedDataverse() { return ""; } - AuthenticatedUser savedSearchCreator = getAuthenticatedUser(); - if (savedSearchCreator == null) { - String msg = BundleUtil.getStringFromBundle("dataverse.link.user"); - logger.severe(msg); - JsfHelper.addErrorMessage(msg); - return returnRedirect(); - } - linkingDataverse = dataverseService.find(linkingDataverseId); LinkDataverseCommand cmd = new LinkDataverseCommand(dvRequestService.getDataverseRequest(), linkingDataverse, dataverse); - //LinkDvObjectCommand cmd = new LinkDvObjectCommand (session.getUser(), linkingDataverse, dataverse); try { commandEngine.submit(cmd); } catch (CommandException ex) { @@ -808,32 +799,9 @@ public String saveLinkedDataverse() { JsfHelper.addErrorMessage(msg); return returnRedirect(); } - - SavedSearch savedSearchOfChildren = createSavedSearchForChildren(savedSearchCreator); - - boolean createLinksAndIndexRightNow = false; - if (createLinksAndIndexRightNow) { - try { - // create links (does indexing) right now (might be expensive) - boolean debug = false; - DataverseRequest dataverseRequest = new DataverseRequest(savedSearchCreator, SavedSearchServiceBean.getHttpServletRequest()); - savedSearchService.makeLinksForSingleSavedSearch(dataverseRequest, savedSearchOfChildren, debug); - JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("dataverse.linked.success", getSuccessMessageArguments())); - return returnRedirect(); - } catch (SearchException | CommandException ex) { - // error: solr is down, etc. can't link children right now - JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataverse.linked.internalerror", getSuccessMessageArguments())); - String msg = dataverse.getDisplayName() + " has been successfully linked to " + linkingDataverse.getDisplayName() + " but contents will not appear until an internal error has been fixed."; - logger.log(Level.SEVERE, "{0} {1}", new Object[]{msg, ex}); - //JsfHelper.addErrorMessage(msg); - return returnRedirect(); - } - } else { - // defer: please wait for the next timer/cron job - //JsfHelper.addSuccessMessage(dataverse.getDisplayName() + " has been successfully linked to " + linkingDataverse.getDisplayName() + ". Please wait for its contents to appear."); - JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("dataverse.linked.success.wait", getSuccessMessageArguments())); - return returnRedirect(); - } + + JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("dataverse.linked.success.wait", getSuccessMessageArguments())); + return returnRedirect(); } private List getSuccessMessageArguments() { From 864391d9dbad62b95acd607be21157bc1a765828 Mon Sep 17 00:00:00 2001 From: gdurand Date: Fri, 15 Jan 2021 14:06:01 -0500 Subject: [PATCH 3/8] minor tweaks --- .../savedsearch/SavedSearchServiceBean.java | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/savedsearch/SavedSearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/savedsearch/SavedSearchServiceBean.java index 8099305a69b..a495842e40d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/savedsearch/SavedSearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/savedsearch/SavedSearchServiceBean.java @@ -242,7 +242,7 @@ public JsonObjectBuilder makeLinksForSingleSavedSearch(DataverseRequest dvReq, S } private SolrQueryResponse findHits(SavedSearch savedSearch) throws SearchException { - String sortField = SearchFields.RELEVANCE; + String sortField = SearchFields.TYPE; // first return dataverses, then datasets String sortOrder = SortBy.DESCENDING; SortBy sortBy = new SortBy(sortField, sortOrder); int paginationStart = 0; @@ -345,14 +345,12 @@ public static HttpServletRequest getHttpServletRequest() { * Saved Search was created. The default IP address in the * DataverseRequest constructor is used instead, which as of this * writing is 0.0.0.0 to mean "undefined". Is this a feature or a bug? - * What is the expected interplay between Saved Search and IP Groups? - * Users might be surprised to see certain DvObjects in the results of - * their query when creating the Saved Search and later find that those - * DvObjects, which are only visible due to an IP Groups membership, are - * not found by Saved Search when executed by cron, for example. As of - * this writing Saved Search is a superuser-only feature so perhaps IP - * Groups are irrelevant because all DvObjects are discoverable to - * superusers. + * This is not an issue for the search itself, since it is now run as the + * Guest User, but would present a problem if the user does not have + * permission to create links and could only create the saved search due to + * a granted permission from the IP Group. + * As of this writing Saved Search is a superuser-only feature; so IP + * Groups are irrelevant because all superusers can create links. */ return null; } From b8a4191abbe498ba56bc08769d4e7d867ac7dbbf Mon Sep 17 00:00:00 2001 From: Gustavo Durand Date: Wed, 20 Jan 2021 18:32:42 -0500 Subject: [PATCH 4/8] release notes --- .../7398-saved-search-performance | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 doc/release-notes/7398-saved-search-performance diff --git a/doc/release-notes/7398-saved-search-performance b/doc/release-notes/7398-saved-search-performance new file mode 100644 index 00000000000..316fdc53ca0 --- /dev/null +++ b/doc/release-notes/7398-saved-search-performance @@ -0,0 +1,86 @@ +A previous version of dataverse changed the indexing logic so that when a user links a dataverse, its children are also indexed as linked. This means that the children to not need to be separately linked, and in this version we removed the logic that creates a saved search to create those links when a dataverse is linked. + +We recommend cleaning up the db to a) remove these saved searches and b) remove the links for the objects. We can do this via a few queries. + +Note: removing these saved searches and links should not affect what users will see as linked due to the aforementioned indexing change. Similarly, not removing these saved searches and links should not affect anything, but is a cleanup of unnecessary rows in the database. + +------------------------------------------ + +First clean up the saved searches: + +-- this query will show you the saved searches that will get deleted + +select ss.id, ss.definitionpoint_id, dld.dataverse_id, ssfq.filterquery +from savedsearch ss, savedsearchfilterquery ssfq, dataverselinkingdataverse dld +where ss.id = ssfq.savedsearch_id +and ss.definitionpoint_id = dld.linkingdataverse_id +and dld.dataverse_id = rtrim(reverse(split_part(reverse(ssfq.filterquery),'/',1)),'"')::integer +and ss.query='*' +and ssfq.filterquery like 'subtreePaths%' +order by ss.definitionpoint_id; + + +-- these queries will delete them + +begin; + +create temporary table delete_ss on commit drop as ( +Select ss.id +from savedsearch ss, savedsearchfilterquery ssfq, dataverselinkingdataverse dld +where ss.id = ssfq.savedsearch_id +and ss.definitionpoint_id = dld.linkingdataverse_id +and dld.dataverse_id = rtrim(reverse(split_part(reverse(ssfq.filterquery),'/',1)),'"')::integer +and ss.query='*' +and ssfq.filterquery like 'subtreePaths%' +); + +delete from savedsearchfilterquery where savedsearch_id in (select id from delete_ss); +delete from savedsearch where id in (select id from delete_ss); + +commit; + +------------------------------------------ + +Then clean up the linked datasets and dataverses: + +-- these queries will show you the linked objects that will get deleted + +select dld.dataset_id, dvo.owner_id, dld.linkingdataverse_id, +dvld.dataverse_id, dvld.linkingdataverse_id +from datasetlinkingdataverse dld, dvobject dvo, dataverselinkingdataverse dvld +where dld.dataset_id = dvo.id +and dld.linkingdataverse_id = dvld.linkingdataverse_id +and dvo.owner_id = dvld.dataverse_id +order by dld.linkingdataverse_id; + +select dld.dataverse_id, dvo.owner_id, dld.linkingdataverse_id, +dvld.dataverse_id, dvld.linkingdataverse_id +from dataverselinkingdataverse dld, dvobject dvo, dataverselinkingdataverse dvld +where dld.dataverse_id = dvo.id +and dld.linkingdataverse_id = dvld.linkingdataverse_id +and dvo.owner_id = dvld.dataverse_id +order by dld.linkingdataverse_id; + + +-- these queries will delete them + +begin; + +delete from datasetlinkingdataverse where id in ( +select dld.id +from datasetlinkingdataverse dld, dvobject dvo, dataverselinkingdataverse dvld +where dld.dataset_id = dvo.id +and dld.linkingdataverse_id = dvld.linkingdataverse_id +and dvo.owner_id = dvld.dataverse_id +); + +delete from dataverselinkingdataverse where id in ( +select dld.id +from dataverselinkingdataverse dld, dvobject dvo, dataverselinkingdataverse dvld +where dld.dataverse_id = dvo.id +and dld.linkingdataverse_id = dvld.linkingdataverse_id +and dvo.owner_id = dvld.dataverse_id +); + +commit; + From 29186f7be8b27ae7dc8ee758909f917b5f105c76 Mon Sep 17 00:00:00 2001 From: Danny Brooke Date: Thu, 21 Jan 2021 22:35:43 -0500 Subject: [PATCH 5/8] updating release notes for Saved Search --- ...performance => 7398-saved-search-performance.md} | 13 +++++++++++++ 1 file changed, 13 insertions(+) rename doc/release-notes/{7398-saved-search-performance => 7398-saved-search-performance.md} (87%) diff --git a/doc/release-notes/7398-saved-search-performance b/doc/release-notes/7398-saved-search-performance.md similarity index 87% rename from doc/release-notes/7398-saved-search-performance rename to doc/release-notes/7398-saved-search-performance.md index 316fdc53ca0..cdb0b7839d5 100644 --- a/doc/release-notes/7398-saved-search-performance +++ b/doc/release-notes/7398-saved-search-performance.md @@ -1,3 +1,13 @@ +## Release Highlights + +### Saved Search Performance Improvements + +A refactoring has greatly improved Saved Search performance in the application. If your installation has multiple, potentially long-running Saved Searches in place, this greatly improves the probability that those search jobs will complete without timing out. + +## Notes for Dataverse Installation Administrators + +### DB Cleanup for Saved Searches + A previous version of dataverse changed the indexing logic so that when a user links a dataverse, its children are also indexed as linked. This means that the children to not need to be separately linked, and in this version we removed the logic that creates a saved search to create those links when a dataverse is linked. We recommend cleaning up the db to a) remove these saved searches and b) remove the links for the objects. We can do this via a few queries. @@ -84,3 +94,6 @@ and dvo.owner_id = dvld.dataverse_id commit; +## Upgrade Instructions + +X\. Add as optional upgrade step. \ No newline at end of file From 9b47509662309440117bbda3eefea9d31214fe38 Mon Sep 17 00:00:00 2001 From: Danny Brooke Date: Thu, 21 Jan 2021 22:57:50 -0500 Subject: [PATCH 6/8] proposed reorganization of query docs and release notes --- .../7398-saved-search-performance.md | 91 +++---------------- scripts/issues/7398/delete_dld.txt | 21 +++++ scripts/issues/7398/delete_ss.txt | 18 ++++ scripts/issues/7398/dld_for_deletion.txt | 17 ++++ scripts/issues/7398/ss_for_deletion.txt | 10 ++ 5 files changed, 77 insertions(+), 80 deletions(-) create mode 100644 scripts/issues/7398/delete_dld.txt create mode 100644 scripts/issues/7398/delete_ss.txt create mode 100644 scripts/issues/7398/dld_for_deletion.txt create mode 100644 scripts/issues/7398/ss_for_deletion.txt diff --git a/doc/release-notes/7398-saved-search-performance.md b/doc/release-notes/7398-saved-search-performance.md index cdb0b7839d5..e37fb2923cd 100644 --- a/doc/release-notes/7398-saved-search-performance.md +++ b/doc/release-notes/7398-saved-search-performance.md @@ -10,90 +10,21 @@ A refactoring has greatly improved Saved Search performance in the application. A previous version of dataverse changed the indexing logic so that when a user links a dataverse, its children are also indexed as linked. This means that the children to not need to be separately linked, and in this version we removed the logic that creates a saved search to create those links when a dataverse is linked. -We recommend cleaning up the db to a) remove these saved searches and b) remove the links for the objects. We can do this via a few queries. +We recommend cleaning up the db to a) remove these saved searches and b) remove the links for the objects. We can do this via a few queries, which are available in the folder here: -Note: removing these saved searches and links should not affect what users will see as linked due to the aforementioned indexing change. Similarly, not removing these saved searches and links should not affect anything, but is a cleanup of unnecessary rows in the database. - ------------------------------------------- - -First clean up the saved searches: - --- this query will show you the saved searches that will get deleted - -select ss.id, ss.definitionpoint_id, dld.dataverse_id, ssfq.filterquery -from savedsearch ss, savedsearchfilterquery ssfq, dataverselinkingdataverse dld -where ss.id = ssfq.savedsearch_id -and ss.definitionpoint_id = dld.linkingdataverse_id -and dld.dataverse_id = rtrim(reverse(split_part(reverse(ssfq.filterquery),'/',1)),'"')::integer -and ss.query='*' -and ssfq.filterquery like 'subtreePaths%' -order by ss.definitionpoint_id; - - --- these queries will delete them - -begin; - -create temporary table delete_ss on commit drop as ( -Select ss.id -from savedsearch ss, savedsearchfilterquery ssfq, dataverselinkingdataverse dld -where ss.id = ssfq.savedsearch_id -and ss.definitionpoint_id = dld.linkingdataverse_id -and dld.dataverse_id = rtrim(reverse(split_part(reverse(ssfq.filterquery),'/',1)),'"')::integer -and ss.query='*' -and ssfq.filterquery like 'subtreePaths%' -); - -delete from savedsearchfilterquery where savedsearch_id in (select id from delete_ss); -delete from savedsearch where id in (select id from delete_ss); +https://github.com/IQSS/dataverse/raw/develop/scripts/issues/7398/ -commit; +There are four queries available, and they should be run in this order: ------------------------------------------- +- ss_for_deletion.txt to identify the Saved Searches to be deleted +- delete_ss.txt to delete the Saved Searches identified in the previous query +- dld_for_deletion.txt to identify the linked datasets and dataverses to be deleted +- delete_dld.txt to delete the linked datasets and dataverses identified in the previous query -Then clean up the linked datasets and dataverses: - --- these queries will show you the linked objects that will get deleted - -select dld.dataset_id, dvo.owner_id, dld.linkingdataverse_id, -dvld.dataverse_id, dvld.linkingdataverse_id -from datasetlinkingdataverse dld, dvobject dvo, dataverselinkingdataverse dvld -where dld.dataset_id = dvo.id -and dld.linkingdataverse_id = dvld.linkingdataverse_id -and dvo.owner_id = dvld.dataverse_id -order by dld.linkingdataverse_id; - -select dld.dataverse_id, dvo.owner_id, dld.linkingdataverse_id, -dvld.dataverse_id, dvld.linkingdataverse_id -from dataverselinkingdataverse dld, dvobject dvo, dataverselinkingdataverse dvld -where dld.dataverse_id = dvo.id -and dld.linkingdataverse_id = dvld.linkingdataverse_id -and dvo.owner_id = dvld.dataverse_id -order by dld.linkingdataverse_id; - - --- these queries will delete them - -begin; - -delete from datasetlinkingdataverse where id in ( -select dld.id -from datasetlinkingdataverse dld, dvobject dvo, dataverselinkingdataverse dvld -where dld.dataset_id = dvo.id -and dld.linkingdataverse_id = dvld.linkingdataverse_id -and dvo.owner_id = dvld.dataverse_id -); - -delete from dataverselinkingdataverse where id in ( -select dld.id -from dataverselinkingdataverse dld, dvobject dvo, dataverselinkingdataverse dvld -where dld.dataverse_id = dvo.id -and dld.linkingdataverse_id = dvld.linkingdataverse_id -and dvo.owner_id = dvld.dataverse_id -); +Note: removing these saved searches and links should not affect what users will see as linked due to the aforementioned indexing change. Similarly, not removing these saved searches and links should not affect anything, but is a cleanup of unnecessary rows in the database. -commit; +## Additional Upgrade Instructions -## Upgrade Instructions +X\. (Optional, but recommended) DB Cleanup -X\. Add as optional upgrade step. \ No newline at end of file +Perform the DB Cleanup for Saved Searches and Linked Objects, summarized in the "Notes for Dataverse Installation Administrators" section above. \ No newline at end of file diff --git a/scripts/issues/7398/delete_dld.txt b/scripts/issues/7398/delete_dld.txt new file mode 100644 index 00000000000..30edf42774b --- /dev/null +++ b/scripts/issues/7398/delete_dld.txt @@ -0,0 +1,21 @@ +-- these queries will delete linked objects identified using the query in dld_for_deletion + +begin; + +delete from datasetlinkingdataverse where id in ( +select dld.id +from datasetlinkingdataverse dld, dvobject dvo, dataverselinkingdataverse dvld +where dld.dataset_id = dvo.id +and dld.linkingdataverse_id = dvld.linkingdataverse_id +and dvo.owner_id = dvld.dataverse_id +); + +delete from dataverselinkingdataverse where id in ( +select dld.id +from dataverselinkingdataverse dld, dvobject dvo, dataverselinkingdataverse dvld +where dld.dataverse_id = dvo.id +and dld.linkingdataverse_id = dvld.linkingdataverse_id +and dvo.owner_id = dvld.dataverse_id +); + +commit; \ No newline at end of file diff --git a/scripts/issues/7398/delete_ss.txt b/scripts/issues/7398/delete_ss.txt new file mode 100644 index 00000000000..3cf053ce5af --- /dev/null +++ b/scripts/issues/7398/delete_ss.txt @@ -0,0 +1,18 @@ +-- these queries will delete the saved searches identified using the ss_for_deletion query + +begin; + +create temporary table delete_ss on commit drop as ( +Select ss.id +from savedsearch ss, savedsearchfilterquery ssfq, dataverselinkingdataverse dld +where ss.id = ssfq.savedsearch_id +and ss.definitionpoint_id = dld.linkingdataverse_id +and dld.dataverse_id = rtrim(reverse(split_part(reverse(ssfq.filterquery),'/',1)),'"')::integer +and ss.query='*' +and ssfq.filterquery like 'subtreePaths%' +); + +delete from savedsearchfilterquery where savedsearch_id in (select id from delete_ss); +delete from savedsearch where id in (select id from delete_ss); + +commit; diff --git a/scripts/issues/7398/dld_for_deletion.txt b/scripts/issues/7398/dld_for_deletion.txt new file mode 100644 index 00000000000..9619de1acee --- /dev/null +++ b/scripts/issues/7398/dld_for_deletion.txt @@ -0,0 +1,17 @@ +-- this query will show you the linked objects that will get deleted + +select dld.dataset_id, dvo.owner_id, dld.linkingdataverse_id, +dvld.dataverse_id, dvld.linkingdataverse_id +from datasetlinkingdataverse dld, dvobject dvo, dataverselinkingdataverse dvld +where dld.dataset_id = dvo.id +and dld.linkingdataverse_id = dvld.linkingdataverse_id +and dvo.owner_id = dvld.dataverse_id +order by dld.linkingdataverse_id; + +select dld.dataverse_id, dvo.owner_id, dld.linkingdataverse_id, +dvld.dataverse_id, dvld.linkingdataverse_id +from dataverselinkingdataverse dld, dvobject dvo, dataverselinkingdataverse dvld +where dld.dataverse_id = dvo.id +and dld.linkingdataverse_id = dvld.linkingdataverse_id +and dvo.owner_id = dvld.dataverse_id +order by dld.linkingdataverse_id; diff --git a/scripts/issues/7398/ss_for_deletion.txt b/scripts/issues/7398/ss_for_deletion.txt new file mode 100644 index 00000000000..11ad8bbc49a --- /dev/null +++ b/scripts/issues/7398/ss_for_deletion.txt @@ -0,0 +1,10 @@ +-- this query will show you the saved searches that will get deleted + +select ss.id, ss.definitionpoint_id, dld.dataverse_id, ssfq.filterquery +from savedsearch ss, savedsearchfilterquery ssfq, dataverselinkingdataverse dld +where ss.id = ssfq.savedsearch_id +and ss.definitionpoint_id = dld.linkingdataverse_id +and dld.dataverse_id = rtrim(reverse(split_part(reverse(ssfq.filterquery),'/',1)),'"')::integer +and ss.query='*' +and ssfq.filterquery like 'subtreePaths%' +order by ss.definitionpoint_id; \ No newline at end of file From c181369765c314b565ede65e90c95b4ce65ba67d Mon Sep 17 00:00:00 2001 From: Gustavo Durand Date: Fri, 22 Jan 2021 11:42:06 -0500 Subject: [PATCH 7/8] Update 7398-saved-search-performance.md --- doc/release-notes/7398-saved-search-performance.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/release-notes/7398-saved-search-performance.md b/doc/release-notes/7398-saved-search-performance.md index e37fb2923cd..4986524ed4f 100644 --- a/doc/release-notes/7398-saved-search-performance.md +++ b/doc/release-notes/7398-saved-search-performance.md @@ -8,18 +8,18 @@ A refactoring has greatly improved Saved Search performance in the application. ### DB Cleanup for Saved Searches -A previous version of dataverse changed the indexing logic so that when a user links a dataverse, its children are also indexed as linked. This means that the children to not need to be separately linked, and in this version we removed the logic that creates a saved search to create those links when a dataverse is linked. +A previous version of dataverse changed the indexing logic so that when a user links a dataverse, its children are also indexed as linked. This means that the children do not need to be separately linked, and in this version we removed the logic that creates a saved search to create those links when a dataverse is linked. We recommend cleaning up the db to a) remove these saved searches and b) remove the links for the objects. We can do this via a few queries, which are available in the folder here: https://github.com/IQSS/dataverse/raw/develop/scripts/issues/7398/ -There are four queries available, and they should be run in this order: +There are four sets of queries available, and they should be run in this order: - ss_for_deletion.txt to identify the Saved Searches to be deleted - delete_ss.txt to delete the Saved Searches identified in the previous query - dld_for_deletion.txt to identify the linked datasets and dataverses to be deleted -- delete_dld.txt to delete the linked datasets and dataverses identified in the previous query +- delete_dld.txt to delete the linked datasets and dataverses identified in the previous queries Note: removing these saved searches and links should not affect what users will see as linked due to the aforementioned indexing change. Similarly, not removing these saved searches and links should not affect anything, but is a cleanup of unnecessary rows in the database. @@ -27,4 +27,4 @@ Note: removing these saved searches and links should not affect what users will X\. (Optional, but recommended) DB Cleanup -Perform the DB Cleanup for Saved Searches and Linked Objects, summarized in the "Notes for Dataverse Installation Administrators" section above. \ No newline at end of file +Perform the DB Cleanup for Saved Searches and Linked Objects, summarized in the "Notes for Dataverse Installation Administrators" section above. From 30144f2fc5e14d385642b34bdbfb1701debf6250 Mon Sep 17 00:00:00 2001 From: Gustavo Durand Date: Fri, 22 Jan 2021 11:46:34 -0500 Subject: [PATCH 8/8] Update dld_for_deletion.txt --- scripts/issues/7398/dld_for_deletion.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/issues/7398/dld_for_deletion.txt b/scripts/issues/7398/dld_for_deletion.txt index 9619de1acee..51aa9ce1458 100644 --- a/scripts/issues/7398/dld_for_deletion.txt +++ b/scripts/issues/7398/dld_for_deletion.txt @@ -1,5 +1,6 @@ --- this query will show you the linked objects that will get deleted +-- these queries will show you the linked objects that will get deleted +-- datasets select dld.dataset_id, dvo.owner_id, dld.linkingdataverse_id, dvld.dataverse_id, dvld.linkingdataverse_id from datasetlinkingdataverse dld, dvobject dvo, dataverselinkingdataverse dvld @@ -8,6 +9,7 @@ and dld.linkingdataverse_id = dvld.linkingdataverse_id and dvo.owner_id = dvld.dataverse_id order by dld.linkingdataverse_id; +-- dataverses select dld.dataverse_id, dvo.owner_id, dld.linkingdataverse_id, dvld.dataverse_id, dvld.linkingdataverse_id from dataverselinkingdataverse dld, dvobject dvo, dataverselinkingdataverse dvld