From 46c1788a99081ef4c8377f16ab4f759cc684bd4d Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 17 Dec 2025 15:41:13 -0500 Subject: [PATCH 1/3] A quick pr speeding up Manage Guestbooks page. --- doc/release-notes/6.9-release-notes.md | 1 + .../edu/harvard/iq/dataverse/GuestbookResponse.java | 3 ++- .../iq/dataverse/GuestbookResponseServiceBean.java | 10 +++++++--- src/main/resources/db/migration/V6.8.0.4.sql | 1 + 4 files changed, 11 insertions(+), 4 deletions(-) create mode 100644 src/main/resources/db/migration/V6.8.0.4.sql diff --git a/doc/release-notes/6.9-release-notes.md b/doc/release-notes/6.9-release-notes.md index 0211afc9acc..c49e4697096 100644 --- a/doc/release-notes/6.9-release-notes.md +++ b/doc/release-notes/6.9-release-notes.md @@ -95,6 +95,7 @@ See [the guides](https://guides.dataverse.org/en/6.9/developers/workflows.html#c - In prior versions of Dataverse, publishing a dataset via the superuser-only update-current-version option would not set the current curation status (if enabled/used) to none/empty and, in v6.7, would not maintain the curation status history. These issues are now resolved and the update-current-version option works the same as normal publication of a new version with regard to curation status. See #11783 and #11784. - This release fixes problems with guestbook questions being displayed at download when files are selected from the dataset files table when guestbook-at-request is enabled and not displaying when they should when access is requested from the file page. See #11800, #11808, and #11835. - The optional Croissant exporter has been updated to 0.1.6 to prevent variable names, variable descriptions, and variable types from being exposed for restricted files. See https://github.com/gdcc/exporter-croissant/pull/20 and #11752. +- Manage Gustbooks page was optimized to load much faster for collections with large numbers of downloads recorded. ## API Updates diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java index a6da7de68c7..6a7ec13a782 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java @@ -32,7 +32,8 @@ @Index(columnList = "datafile_id"), @Index(columnList = "datasetversion_id"), @Index(columnList = "authenticateduser_id"), - @Index(columnList = "dataset_id") + @Index(columnList = "dataset_id"), + @Index(columnList = "dataset_id, guestbook_id", name="INDEX_GUESTBOOKRESPONSE_dataset_id_guestbook_id") }) @NamedQueries( diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java index a49845ce834..26c82fa05fe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java @@ -488,15 +488,19 @@ public Long findCount30Days(Long dataverseId) { return (Long) query.getSingleResult(); } - public Long findCountAll() { + /*public Long findCountAll() { return findCountAll(null); - } + }*/ public Long findCountAll(Long dataverseId) { String queryString; if (dataverseId != null) { - queryString = "select count(o.id) from GuestbookResponse o, DvObject v where o.dataset_id = v.id and v.owner_id = " + dataverseId + " "; + queryString = "select count(o.id) from GuestbookResponse o, DvObject v, Dataset d where o.dataset_id = v.id and v.id = d.id and v.owner_id = " + dataverseId + " "; } else { + // I hope this is never called with null dataverseId!! - counting + // all rows in this table can be insanely expensive. That's why we + // use a stored function to "estimate" its size, for the total + // number of downloads on the homepage. (L.A.) queryString = "select count(o.id) from GuestbookResponse o "; } diff --git a/src/main/resources/db/migration/V6.8.0.4.sql b/src/main/resources/db/migration/V6.8.0.4.sql new file mode 100644 index 00000000000..26a9d7628c7 --- /dev/null +++ b/src/main/resources/db/migration/V6.8.0.4.sql @@ -0,0 +1 @@ +CREATE INDEX IF NOT EXISTS INDEX_GUESTBOOKRESPONSE_dataset_id_guestbook_id ON GUESTBOOKRESPONSE (dataset_id, guestbook_id); From d71ba6c9abf9e872c1f6134c87d14c3deae01ad1 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 17 Dec 2025 15:52:05 -0500 Subject: [PATCH 2/3] Removed the code that allowed to run "select count(o) from guestbookresponse", on the entire table. --- .../GuestbookResponseServiceBean.java | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java index 26c82fa05fe..dcd62c9c0ff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java @@ -488,21 +488,25 @@ public Long findCount30Days(Long dataverseId) { return (Long) query.getSingleResult(); } - /*public Long findCountAll() { - return findCountAll(null); - }*/ - public Long findCountAll(Long dataverseId) { - String queryString; - if (dataverseId != null) { - queryString = "select count(o.id) from GuestbookResponse o, DvObject v, Dataset d where o.dataset_id = v.id and v.id = d.id and v.owner_id = " + dataverseId + " "; - } else { - // I hope this is never called with null dataverseId!! - counting - // all rows in this table can be insanely expensive. That's why we - // use a stored function to "estimate" its size, for the total - // number of downloads on the homepage. (L.A.) - queryString = "select count(o.id) from GuestbookResponse o "; + + if (dataverseId == null) { + return null; } + + // Note that this method used to support NULL dataverseId, + // in which case it counted ALL the guestbookresponse rows + // for the entire instance: + // queryString = "select count(o.id) from GuestbookResponse o "; + // I removed this code (it was not being used, thankfully) since + // the query can be insanely expensive on a large production table. + // That's why we use a stored procedure to "estimate" its size, in + // the dedicated getTotalDownloadCount() method further below, for + // example, when we need to show the total number of downloads on + // the homepage. (L.A.) + + String queryString = "select count(o.id) from GuestbookResponse o, DvObject v, Dataset d where o.dataset_id = v.id and v.id = d.id and v.owner_id = " + dataverseId + " "; + Query query = em.createNativeQuery(queryString); return (Long) query.getSingleResult(); From e0ffbd5cbd4a07cb5ceaf8977f44212e036e313a Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 17 Dec 2025 16:15:46 -0500 Subject: [PATCH 3/3] Added another useful index on guestbookresponse table (that we've been using at HDV since April) --- src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java | 3 ++- src/main/resources/db/migration/V6.8.0.4.sql | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java index 6a7ec13a782..a6ac270b45c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java @@ -33,7 +33,8 @@ @Index(columnList = "datasetversion_id"), @Index(columnList = "authenticateduser_id"), @Index(columnList = "dataset_id"), - @Index(columnList = "dataset_id, guestbook_id", name="INDEX_GUESTBOOKRESPONSE_dataset_id_guestbook_id") + @Index(columnList = "dataset_id, guestbook_id", name="INDEX_GUESTBOOKRESPONSE_dataset_id_guestbook_id"), + @Index(columnList = "dataset_id, eventtype", name="INDEX_GUESTBOOKRESPONSE_dataset_id_eventtype") }) @NamedQueries( diff --git a/src/main/resources/db/migration/V6.8.0.4.sql b/src/main/resources/db/migration/V6.8.0.4.sql index 26a9d7628c7..6d2f3484b81 100644 --- a/src/main/resources/db/migration/V6.8.0.4.sql +++ b/src/main/resources/db/migration/V6.8.0.4.sql @@ -1 +1,2 @@ CREATE INDEX IF NOT EXISTS INDEX_GUESTBOOKRESPONSE_dataset_id_guestbook_id ON GUESTBOOKRESPONSE (dataset_id, guestbook_id); +CREATE INDEX IF NOT EXISTS INDEX_GUESTBOOKRESPONSE_dataset_id_eventtype ON GUESTBOOKRESPONSE (dataset_id, eventtype);