From f3f4545cc4c4c6a6c324b64ae76cdff6eb12b4b2 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 22 Apr 2021 17:01:48 -0400 Subject: [PATCH 1/5] preliminary/draft performance fix for the download-all api. it works for /api/access/dataset; but needs to be cleaned up/refactored, and we may want to take a closer look at how we actually writing the guestbookresponses more closely. #7812 --- .../dataverse/DatasetVersionServiceBean.java | 33 ++++++++++++---- .../GuestbookResponseServiceBean.java | 15 ++++++-- .../edu/harvard/iq/dataverse/api/Access.java | 38 +++++++++++++++---- 3 files changed, 69 insertions(+), 17 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index e4eb6aac88e..3c1ae3abf38 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -648,13 +648,8 @@ public RetrieveDatasetVersionResponse selectRequestedVersion(List 0L)) { setLimit = DataFileZipper.DEFAULT_ZIPFILE_LIMIT; @@ -739,7 +760,7 @@ private Response downloadDatafiles(String rawFileIds, boolean gbrecs, String api if (useCustomZipService) { URI redirect_uri = null; try { - redirect_uri = handleCustomZipDownload(customZipServiceUrl, fileIds, apiToken, apiTokenUser, uriInfo, headers, gbrecs, true); + redirect_uri = handleCustomZipDownload(customZipServiceUrl, version, fileIds, apiToken, apiTokenUser, uriInfo, headers, gbrecs, true); } catch (WebApplicationException wae) { throw wae; } @@ -1865,7 +1886,9 @@ private User findAPITokenUser(String apiToken) { return apiTokenUser; } - private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, String apiToken, User apiTokenUser, UriInfo uriInfo, HttpHeaders headers, boolean gbrecs, boolean orig) throws WebApplicationException { + private URI handleCustomZipDownload(String customZipServiceUrl, DatasetVersion version, String fileIds, String apiToken, User apiTokenUser, UriInfo uriInfo, HttpHeaders headers, boolean gbrecs, boolean orig) throws WebApplicationException { + logger.info("inside handleCustomZipDownload"); + String zipServiceKey = null; Timestamp timestamp = null; @@ -1886,6 +1909,7 @@ private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, } catch (NumberFormatException nfe) { fileId = null; } + logger.info("handling file "+fileId); if (fileId != null) { DataFile file = dataFileService.find(fileId); if (file != null) { @@ -1893,7 +1917,7 @@ private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, if (isAccessAuthorized(file, apiToken)) { logger.fine("adding datafile (id=" + file.getId() + ") to the download list of the ZippedDownloadInstance."); if (gbrecs != true && file.isReleased()) { - GuestbookResponse gbr = guestbookResponseService.initAPIGuestbookResponse(file.getOwner(), file, session, apiTokenUser); + GuestbookResponse gbr = guestbookResponseService.initAPIGuestbookResponse(file.getOwner(), version, file, session, apiTokenUser); guestbookResponseService.save(gbr); MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, file); mdcLogService.logEntry(entry); From ad341af7ca83a22bbf6385895bb96e7c1e05ec1e Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 22 Apr 2021 18:23:04 -0400 Subject: [PATCH 2/5] Getting rid of GetDatasetCommand in the download api (since it appears to be redundant - ?). #7812 --- src/main/java/edu/harvard/iq/dataverse/api/Access.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index 97f77a1172e..16577758439 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -632,7 +632,8 @@ public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersist try { User user = findUserOrDie(); DataverseRequest req = createDataverseRequest(user); - final Dataset retrieved = execCommand(new GetDatasetCommand(req, findDatasetOrDie(datasetIdOrPersistentId))); + //final Dataset retrieved = execCommand(new GetDatasetCommand(req, findDatasetOrDie(datasetIdOrPersistentId))); + final Dataset retrieved = findDatasetOrDie(datasetIdOrPersistentId); //final DatasetVersion latest = execCommand(new GetLatestAccessibleDatasetVersionCommand(req, retrieved)); if (!(user instanceof GuestUser)) { // If it is a guest user, let's not even bother looking up if a draft exists. From c92a65898079c0c107ef228e0728a13789e44def Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 29 Apr 2021 15:46:26 -0400 Subject: [PATCH 3/5] final (?) cleanup for #7812. this further simplifies the solution, implements it for the local version of download-all (local, in-application zipping, w/ no external zipper configured), plus fixes the unrelated guestbook response issue. --- .../GuestbookResponseServiceBean.java | 12 ----- .../edu/harvard/iq/dataverse/api/Access.java | 46 +++++++++++-------- 2 files changed, 26 insertions(+), 32 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java index a582f162ee2..809417e3f9c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java @@ -812,10 +812,6 @@ public GuestbookResponse initDefaultGuestbookResponse(Dataset dataset, DataFile } public GuestbookResponse initAPIGuestbookResponse(Dataset dataset, DataFile dataFile, DataverseSession session, User user) { - return initAPIGuestbookResponse(dataset, null, dataFile, session, user); - } - - public GuestbookResponse initAPIGuestbookResponse(Dataset dataset, DatasetVersion version, DataFile dataFile, DataverseSession session, User user) { GuestbookResponse guestbookResponse = new GuestbookResponse(); Guestbook datasetGuestbook = dataset.getGuestbook(); @@ -825,14 +821,6 @@ public GuestbookResponse initAPIGuestbookResponse(Dataset dataset, DatasetVersio guestbookResponse.setGuestbook(datasetGuestbook); } - // This may not be doing what we want: - if (version == null) { - version = dataset.getLatestVersion(); - } - - if(version != null && version.isDraft()){ - guestbookResponse.setWriteResponse(false); - } if (dataFile != null){ guestbookResponse.setDataFile(dataFile); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index 16577758439..fcde224e62f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -16,7 +16,6 @@ import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DatasetVersionServiceBean; import edu.harvard.iq.dataverse.DatasetServiceBean; -import edu.harvard.iq.dataverse.DatasetVersionServiceBean.RetrieveDatasetVersionResponse; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DataverseRequestServiceBean; import edu.harvard.iq.dataverse.DataverseRoleServiceBean; @@ -24,7 +23,6 @@ import edu.harvard.iq.dataverse.DataverseSession; import edu.harvard.iq.dataverse.DataverseTheme; import edu.harvard.iq.dataverse.FileDownloadServiceBean; -import edu.harvard.iq.dataverse.Guestbook; import edu.harvard.iq.dataverse.GuestbookResponse; import edu.harvard.iq.dataverse.GuestbookResponseServiceBean; import edu.harvard.iq.dataverse.PermissionServiceBean; @@ -69,7 +67,6 @@ import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; -import edu.harvard.iq.dataverse.util.json.JsonPrinter; import java.util.logging.Logger; import javax.ejb.EJB; @@ -121,8 +118,6 @@ import javax.ws.rs.RedirectionException; import javax.ws.rs.core.MediaType; import static javax.ws.rs.core.Response.Status.FORBIDDEN; -import org.glassfish.jersey.media.multipart.FormDataBodyPart; -import org.glassfish.jersey.media.multipart.FormDataContentDisposition; import org.glassfish.jersey.media.multipart.FormDataParam; /* @@ -632,15 +627,16 @@ public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersist try { User user = findUserOrDie(); DataverseRequest req = createDataverseRequest(user); - //final Dataset retrieved = execCommand(new GetDatasetCommand(req, findDatasetOrDie(datasetIdOrPersistentId))); final Dataset retrieved = findDatasetOrDie(datasetIdOrPersistentId); - //final DatasetVersion latest = execCommand(new GetLatestAccessibleDatasetVersionCommand(req, retrieved)); if (!(user instanceof GuestUser)) { // If it is a guest user, let's not even bother looking up if a draft exists. final DatasetVersion draft = versionService.getDatasetVersionById(retrieved.getId(), DatasetVersion.VersionState.DRAFT.toString()); - if (permissionService.requestOn(req, retrieved).has(Permission.ViewUnpublishedDataset)) { + if (draft != null && permissionService.requestOn(req, retrieved).has(Permission.ViewUnpublishedDataset)) { String fileIds = getFileIdsAsCommaSeparated(draft.getFileMetadatas()); - return downloadDatafiles(fileIds, draft, gbrecs, apiTokenParam, uriInfo, headers, response); + // We don't want downloads from Draft versions to be counted, + // so we are setting the gbrecs (aka "do not write guestbook response") + // variable accordingly: + return downloadDatafiles(fileIds, true, apiTokenParam, uriInfo, headers, response); } } @@ -648,8 +644,15 @@ public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersist final DatasetVersion latest = versionService.getLatestReleasedVersionFast(retrieved.getId()); + // Make sure to throw a clean "not found" if we have failed to locate an + // accessible version: + + if (latest == null) { + throw new NotFoundException(); + } + String fileIds = getFileIdsAsCommaSeparated(latest.getFileMetadatas()); - return downloadDatafiles(fileIds, latest, gbrecs, apiTokenParam, uriInfo, headers, response); + return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); } catch (WrappedResponse wr) { return wr.getResponse(); } @@ -685,9 +688,16 @@ public Command handleLatestPublished() { } })); if (dsv == null) { + // Shouldn't this be a 404/not found here? return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.exception.version.not.found")); } String fileIds = getFileIdsAsCommaSeparated(dsv.getFileMetadatas()); + // We don't want downloads from Draft versions to be counted, + // so we are setting the gbrecs (aka "do not write guestbook response") + // variable accordingly: + if (dsv.isDraft()) { + gbrecs = true; + } return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); } catch (WrappedResponse wr) { return wr.getResponse(); @@ -713,11 +723,7 @@ public Response datafiles(@PathParam("fileIds") String fileIds, @QueryParam("gbr return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); } - private Response downloadDatafiles(String rawFileIds, boolean gbrecs, String apiTokenParam, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) throws WebApplicationException { - return downloadDatafiles(rawFileIds, null, gbrecs, apiTokenParam, uriInfo, headers, response); - } - - private Response downloadDatafiles(String rawFileIds, DatasetVersion version, boolean gbrecs, String apiTokenParam, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) throws WebApplicationException /* throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { + private Response downloadDatafiles(String rawFileIds, boolean donotwriteGBResponse, String apiTokenParam, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response) throws WebApplicationException /* throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ { long setLimit = systemConfig.getZipDownloadLimit(); if (!(setLimit > 0L)) { setLimit = DataFileZipper.DEFAULT_ZIPFILE_LIMIT; @@ -761,7 +767,7 @@ private Response downloadDatafiles(String rawFileIds, DatasetVersion version, bo if (useCustomZipService) { URI redirect_uri = null; try { - redirect_uri = handleCustomZipDownload(customZipServiceUrl, version, fileIds, apiToken, apiTokenUser, uriInfo, headers, gbrecs, true); + redirect_uri = handleCustomZipDownload(customZipServiceUrl, fileIds, apiToken, apiTokenUser, uriInfo, headers, donotwriteGBResponse, true); } catch (WebApplicationException wae) { throw wae; } @@ -805,7 +811,7 @@ public void write(OutputStream os) throws IOException, logger.fine("adding datafile (id=" + file.getId() + ") to the download list of the ZippedDownloadInstance."); //downloadInstance.addDataFile(file); - if (gbrecs != true && file.isReleased()){ + if (donotwriteGBResponse != true && file.isReleased()){ GuestbookResponse gbr = guestbookResponseService.initAPIGuestbookResponse(file.getOwner(), file, session, apiTokenUser); guestbookResponseService.save(gbr); MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, file); @@ -1887,7 +1893,7 @@ private User findAPITokenUser(String apiToken) { return apiTokenUser; } - private URI handleCustomZipDownload(String customZipServiceUrl, DatasetVersion version, String fileIds, String apiToken, User apiTokenUser, UriInfo uriInfo, HttpHeaders headers, boolean gbrecs, boolean orig) throws WebApplicationException { + private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, String apiToken, User apiTokenUser, UriInfo uriInfo, HttpHeaders headers, boolean donotwriteGBResponse, boolean orig) throws WebApplicationException { logger.info("inside handleCustomZipDownload"); String zipServiceKey = null; @@ -1917,8 +1923,8 @@ private URI handleCustomZipDownload(String customZipServiceUrl, DatasetVersion v validFileCount++; if (isAccessAuthorized(file, apiToken)) { logger.fine("adding datafile (id=" + file.getId() + ") to the download list of the ZippedDownloadInstance."); - if (gbrecs != true && file.isReleased()) { - GuestbookResponse gbr = guestbookResponseService.initAPIGuestbookResponse(file.getOwner(), version, file, session, apiTokenUser); + if (donotwriteGBResponse != true && file.isReleased()) { + GuestbookResponse gbr = guestbookResponseService.initAPIGuestbookResponse(file.getOwner(), file, session, apiTokenUser); guestbookResponseService.save(gbr); MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, file); mdcLogService.logEntry(entry); From 1795593aef236b52d62c9551c5b4bf23bdb91be0 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 30 Apr 2021 18:05:01 -0400 Subject: [PATCH 4/5] Changed the return code on a non-existing dataset request back to what it was (for legacy reasons), removed the .info log messages, rewrote a comment per code review. (#7812) --- .../edu/harvard/iq/dataverse/api/Access.java | 24 ++++++++++++------- src/main/java/propertyFiles/Bundle.properties | 1 + .../iq/dataverse/api/DownloadFilesIT.java | 5 +++- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index 7b403440c9d..791b0335741 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -629,15 +629,15 @@ public Response postDownloadDatafiles(String fileIds, @QueryParam("gbrecs") bool @GET @Produces({"application/zip"}) public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersistentId, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { - logger.info("inside download-all api"); try { User user = findUserOrDie(); DataverseRequest req = createDataverseRequest(user); final Dataset retrieved = findDatasetOrDie(datasetIdOrPersistentId); if (!(user instanceof GuestUser)) { - // If it is a guest user, let's not even bother looking up if a draft exists. + // The reason we are only looking up a draft version for a NON-guest user + // is that we know that guest is never authorized to final DatasetVersion draft = versionService.getDatasetVersionById(retrieved.getId(), DatasetVersion.VersionState.DRAFT.toString()); - if (draft != null && permissionService.requestOn(req, retrieved).has(Permission.ViewUnpublishedDataset)) { + if (draft != null && permissionService.requestOn(req, retrieved).has(Permission.ViewUnpublishedDataset)) { String fileIds = getFileIdsAsCommaSeparated(draft.getFileMetadatas()); // We don't want downloads from Draft versions to be counted, // so we are setting the gbrecs (aka "do not write guestbook response") @@ -650,11 +650,16 @@ public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersist final DatasetVersion latest = versionService.getLatestReleasedVersionFast(retrieved.getId()); - // Make sure to throw a clean "not found" if we have failed to locate an - // accessible version: + // Make sure to throw a clean error code if we have failed to locate an + // accessible version: + // (A "Not Found" would be more appropriate here, I believe, than a "Bad Request". + // But we've been using the latter for a while, and it's a popular API... + // and this return code is expected by our tests - so I'm choosing it to keep + // -- L.A.) if (latest == null) { - throw new NotFoundException(); + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.exception.dataset.not.found")); + //throw new NotFoundException(); } String fileIds = getFileIdsAsCommaSeparated(latest.getFileMetadatas()); @@ -694,7 +699,10 @@ public Command handleLatestPublished() { } })); if (dsv == null) { - // Shouldn't this be a 404/not found here? + // (A "Not Found" would be more appropriate here, I believe, than a "Bad Request". + // But we've been using the latter for a while, and it's a popular API... + // and this return code is expected by our tests - so I'm choosing it to keep + // -- L.A.) return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.exception.version.not.found")); } String fileIds = getFileIdsAsCommaSeparated(dsv.getFileMetadatas()); @@ -1903,7 +1911,6 @@ private User findAPITokenUser(String apiToken) { } private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, String apiToken, User apiTokenUser, UriInfo uriInfo, HttpHeaders headers, boolean donotwriteGBResponse, boolean orig) throws WebApplicationException { - logger.info("inside handleCustomZipDownload"); String zipServiceKey = null; Timestamp timestamp = null; @@ -1925,7 +1932,6 @@ private URI handleCustomZipDownload(String customZipServiceUrl, String fileIds, } catch (NumberFormatException nfe) { fileId = null; } - logger.info("handling file "+fileId); if (fileId != null) { DataFile file = dataFileService.find(fileId); if (file != null) { diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index d22f6f6537f..ad556123b98 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2395,6 +2395,7 @@ access.api.requestList.noRequestsFound=There are no access requests for this fil access.api.exception.metadata.not.available.for.nontabular.file=This type of metadata is only available for tabular files. access.api.exception.metadata.restricted.no.permission=You do not have permission to download this file. access.api.exception.version.not.found=Could not find requested dataset version. +access.api.exception.dataset.not.found=Could not find requested dataset. #permission permission.AddDataverse.label=AddDataverse diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java index 96bd5be6ca5..7d5adf95507 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java @@ -20,6 +20,7 @@ import static javax.ws.rs.core.Response.Status.FORBIDDEN; import static javax.ws.rs.core.Response.Status.OK; import static javax.ws.rs.core.Response.Status.UNAUTHORIZED; +import static javax.ws.rs.core.Response.Status.BAD_REQUEST; import static org.hamcrest.CoreMatchers.equalTo; import org.junit.Assert; import static org.junit.Assert.assertTrue; @@ -92,10 +93,12 @@ public void downloadAllFilesByVersion() throws IOException { Assert.assertEquals(expectedFiles1, filenamesFound1); // A guest user can't download unpublished files. + // (a guest user cannot even see that the draft version actually exists; + // so they are going to get a "BAD REQUEST", not "UNAUTHORIZED":) Response downloadFiles2 = UtilIT.downloadFiles(datasetPid, null); downloadFiles2.prettyPrint(); downloadFiles2.then().assertThat() - .statusCode(UNAUTHORIZED.getStatusCode()) + .statusCode(BAD_REQUEST.getStatusCode()) .body("status", equalTo("ERROR")); UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken) From 44f4b3dcdfc1211cd44501ef7d55bd427f448bc9 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 30 Apr 2021 19:27:03 -0400 Subject: [PATCH 5/5] fixing a typo-like issue with a comment in the api class (#7812) --- src/main/java/edu/harvard/iq/dataverse/api/Access.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index 791b0335741..b69daa02e45 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -635,7 +635,7 @@ public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersist final Dataset retrieved = findDatasetOrDie(datasetIdOrPersistentId); if (!(user instanceof GuestUser)) { // The reason we are only looking up a draft version for a NON-guest user - // is that we know that guest is never authorized to + // is that we know that guest never has the Permission.ViewUnpublishedDataset. final DatasetVersion draft = versionService.getDatasetVersionById(retrieved.getId(), DatasetVersion.VersionState.DRAFT.toString()); if (draft != null && permissionService.requestOn(req, retrieved).has(Permission.ViewUnpublishedDataset)) { String fileIds = getFileIdsAsCommaSeparated(draft.getFileMetadatas());