From dda1893a019efe5359f76eef355b1d7aa1a425f1 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 6 Dec 2021 15:23:26 -0500 Subject: [PATCH 1/3] allow users to override content type for aux files #8241 --- doc/release-notes/8241-mime.md | 1 + .../source/developers/aux-file-support.rst | 5 +++-- .../dataverse/AuxiliaryFileServiceBean.java | 12 ++++++++--- .../edu/harvard/iq/dataverse/api/Access.java | 9 ++++++++- .../dataverse/api/DownloadInstanceWriter.java | 1 + .../iq/dataverse/api/AuxiliaryFilesIT.java | 20 +++++++++---------- 6 files changed, 31 insertions(+), 17 deletions(-) create mode 100644 doc/release-notes/8241-mime.md diff --git a/doc/release-notes/8241-mime.md b/doc/release-notes/8241-mime.md new file mode 100644 index 00000000000..9fcd90a38f5 --- /dev/null +++ b/doc/release-notes/8241-mime.md @@ -0,0 +1 @@ +Some auxiliary were being saved with the wrong content type (MIME type) but now the user can supply the content type on upload, overriding the type that would otherwise be assigned. diff --git a/doc/sphinx-guides/source/developers/aux-file-support.rst b/doc/sphinx-guides/source/developers/aux-file-support.rst index c92435796e1..666685ff987 100644 --- a/doc/sphinx-guides/source/developers/aux-file-support.rst +++ b/doc/sphinx-guides/source/developers/aux-file-support.rst @@ -10,14 +10,15 @@ To add an auxiliary file, specify the primary key of the datafile (FILE_ID), and .. code-block:: bash export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx - export FILENAME='auxfile.txt' + export FILENAME='auxfile.json' + export FILETYPE='application/json' export FILE_ID='12345' export FORMAT_TAG='dpJson' export FORMAT_VERSION='v1' export TYPE='DP' export SERVER_URL=https://demo.dataverse.org - curl -H X-Dataverse-key:$API_TOKEN -X POST -F "file=@$FILENAME" -F 'origin=myApp' -F 'isPublic=true' -F "type=$TYPE" "$SERVER_URL/api/access/datafile/$FILE_ID/auxiliary/$FORMAT_TAG/$FORMAT_VERSION" + curl -H X-Dataverse-key:$API_TOKEN -X POST -F "file=@$FILENAME;type=$FILETYPE" -F 'origin=myApp' -F 'isPublic=true' -F "type=$TYPE" "$SERVER_URL/api/access/datafile/$FILE_ID/auxiliary/$FORMAT_TAG/$FORMAT_VERSION" You should expect a 200 ("OK") response and JSON with information about your newly uploaded auxiliary file. diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java index fe06b6074e7..9eaf1faf456 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java @@ -25,6 +25,7 @@ import javax.ws.rs.ClientErrorException; import javax.ws.rs.InternalServerErrorException; import javax.ws.rs.ServerErrorException; +import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; import org.apache.tika.Tika; @@ -67,10 +68,11 @@ public AuxiliaryFile save(AuxiliaryFile auxiliaryFile) { * @param origin - name of the tool/system that created the file * @param isPublic boolean - is this file available to any user? * @param type how to group the files such as "DP" for "Differentially + * @param mediaType user supplied content type (MIME type) * Private Statistics". * @return success boolean - returns whether the save was successful */ - public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic, String type) { + public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic, String type, MediaType mediaType) { StorageIO storageIO = null; AuxiliaryFile auxFile = new AuxiliaryFile(); @@ -97,8 +99,12 @@ public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile storageIO.saveInputStreamAsAux(di, auxExtension); auxFile.setChecksum(FileUtil.checksumDigestToString(di.getMessageDigest().digest())); - Tika tika = new Tika(); - auxFile.setContentType(tika.detect(storageIO.getAuxFileAsInputStream(auxExtension))); + if (mediaType != null) { + auxFile.setContentType(mediaType.toString()); + } else { + Tika tika = new Tika(); + auxFile.setContentType(tika.detect(storageIO.getAuxFileAsInputStream(auxExtension))); + } auxFile.setFormatTag(formatTag); auxFile.setFormatVersion(formatVersion); auxFile.setOrigin(origin); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index 49880b0b498..e6cb0052bd1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -125,6 +125,7 @@ import javax.ws.rs.core.MediaType; import static javax.ws.rs.core.Response.Status.FORBIDDEN; import static javax.ws.rs.core.Response.Status.UNAUTHORIZED; +import org.glassfish.jersey.media.multipart.FormDataBodyPart; import org.glassfish.jersey.media.multipart.FormDataParam; /* @@ -1261,6 +1262,7 @@ public Response saveAuxiliaryFileWithVersion(@PathParam("fileId") Long fileId, @FormDataParam("origin") String origin, @FormDataParam("isPublic") boolean isPublic, @FormDataParam("type") String type, + @FormDataParam("file") final FormDataBodyPart formDataBodyPart, @FormDataParam("file") InputStream fileInputStream ) { @@ -1280,7 +1282,12 @@ public Response saveAuxiliaryFileWithVersion(@PathParam("fileId") Long fileId, return error(FORBIDDEN, "User not authorized to edit the dataset."); } - AuxiliaryFile saved = auxiliaryFileService.processAuxiliaryFile(fileInputStream, dataFile, formatTag, formatVersion, origin, isPublic, type); + MediaType mediaType = null; + if (formDataBodyPart != null) { + mediaType = formDataBodyPart.getMediaType(); + } + + AuxiliaryFile saved = auxiliaryFileService.processAuxiliaryFile(fileInputStream, dataFile, formatTag, formatVersion, origin, isPublic, type, mediaType); if (saved!=null) { return ok(json(saved)); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java index 80b3f988953..2e978cab1ae 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java @@ -577,6 +577,7 @@ private boolean isAuxiliaryObjectCached(StorageIO storageIO, String auxiliaryTag } } + // TODO: Return ".md" for "text/markdown" as well as other extensions in MimeTypeDetectionByFileExtension.properties private String getFileExtension(AuxiliaryFile auxFile) { String fileExtension = ""; if (auxFile == null) { diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AuxiliaryFilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AuxiliaryFilesIT.java index a64df2c48db..056f1d77044 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AuxiliaryFilesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AuxiliaryFilesIT.java @@ -82,8 +82,7 @@ public void testUploadAuxFiles() throws IOException { uploadAuxFileJson.then().assertThat() .statusCode(OK.getStatusCode()) .body("data.type", equalTo("DP")) - // FIXME: application/json would be better - .body("data.contentType", equalTo("text/plain")); + .body("data.contentType", equalTo("application/json")); Response uploadAuxFileJsonAgain = UtilIT.uploadAuxFile(fileId, pathToAuxFileJson.toString(), formatTagJson, formatVersionJson, mimeTypeJson, true, dpType, apiToken); uploadAuxFileJsonAgain.prettyPrint(); uploadAuxFileJsonAgain.then().assertThat() @@ -101,8 +100,7 @@ public void testUploadAuxFiles() throws IOException { uploadAuxFileXml.then().assertThat() .statusCode(OK.getStatusCode()) .body("data.type", equalTo("DP")) - // FIXME: application/xml would be better - .body("data.contentType", equalTo("text/plain")); + .body("data.contentType", equalTo("application/xml")); // PDF aux file Path pathToAuxFilePdf = Paths.get(java.nio.file.Files.createTempDirectory(null) + File.separator + "data.pdf"); @@ -158,13 +156,13 @@ public void testUploadAuxFiles() throws IOException { java.nio.file.Files.write(pathToAuxFileMd, contentOfMd.getBytes()); String formatTagMd = "README"; String formatVersionMd = "0.1"; - String mimeTypeMd = "application/xml"; + String mimeTypeMd = "text/markdown"; Response uploadAuxFileMd = UtilIT.uploadAuxFile(fileId, pathToAuxFileMd.toString(), formatTagMd, formatVersionMd, mimeTypeMd, true, null, apiToken); uploadAuxFileMd.prettyPrint(); uploadAuxFileMd.then().assertThat() .statusCode(OK.getStatusCode()) .body("data.type", equalTo(null)) - .body("data.contentType", equalTo("text/plain")); + .body("data.contentType", equalTo("text/markdown")); // Unknown type Path pathToAuxFileTxt = Paths.get(java.nio.file.Files.createTempDirectory(null) + File.separator + "nbt.txt"); @@ -229,14 +227,12 @@ public void testUploadAuxFiles() throws IOException { // Download JSON aux file. Response downloadAuxFileJson = UtilIT.downloadAuxFile(fileId, formatTagJson, formatVersionJson, apiToken); downloadAuxFileJson.then().assertThat().statusCode(OK.getStatusCode()); - // FIXME: This should be ".json" instead of ".txt" - Assert.assertEquals("attachment; filename=\"data.tab.dpJson_0.1.txt\"", downloadAuxFileJson.header("Content-disposition")); + Assert.assertEquals("attachment; filename=\"data.tab.dpJson_0.1.json\"", downloadAuxFileJson.header("Content-disposition")); // Download XML aux file. Response downloadAuxFileXml = UtilIT.downloadAuxFile(fileId, formatTagXml, formatVersionXml, apiToken); downloadAuxFileXml.then().assertThat().statusCode(OK.getStatusCode()); - // FIXME: This should be ".xml" instead of ".txt" - Assert.assertEquals("attachment; filename=\"data.tab.dpXml_0.1.txt\"", downloadAuxFileXml.header("Content-disposition")); + Assert.assertEquals("attachment; filename=\"data.tab.dpXml_0.1.xml\"", downloadAuxFileXml.header("Content-disposition")); // Download PDF aux file. Response downloadAuxFilePdf = UtilIT.downloadAuxFile(fileId, formatTagPdf, formatVersionPdf, apiToken); @@ -246,7 +242,9 @@ public void testUploadAuxFiles() throws IOException { // Download Markdown aux file. Response downloadAuxFileMd = UtilIT.downloadAuxFile(fileId, formatTagMd, formatVersionMd, apiToken); downloadAuxFileMd.then().assertThat().statusCode(OK.getStatusCode()); - Assert.assertEquals("attachment; filename=\"data.tab.README_0.1.txt\"", downloadAuxFileMd.header("Content-disposition")); + // No file extenstion here because Tika's getDefaultMimeTypes doesn't include "text/markdown". + // Note: browsers seem to add ".bin" ("myfile.bin") rather than no extension ("myfile"). + Assert.assertEquals("attachment; filename=\"data.tab.README_0.1\"", downloadAuxFileMd.header("Content-disposition")); Response createUserNoPrivs = UtilIT.createRandomUser(); createUserNoPrivs.then().assertThat().statusCode(OK.getStatusCode()); From 787dbed359a54ea83f2fd084caa08d8732c3720b Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 7 Dec 2021 10:22:13 -0500 Subject: [PATCH 2/3] assert behavior when user doesn't include `;type=foo/bar` #8241 --- .../dataverse/AuxiliaryFileServiceBean.java | 2 ++ .../iq/dataverse/api/AuxiliaryFilesIT.java | 23 ++++++++++++++++++- .../edu/harvard/iq/dataverse/api/UtilIT.java | 5 ++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java index 9eaf1faf456..cc0e17efb5c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java @@ -99,6 +99,8 @@ public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile storageIO.saveInputStreamAsAux(di, auxExtension); auxFile.setChecksum(FileUtil.checksumDigestToString(di.getMessageDigest().digest())); + // The null check prevents an NPE but we expect mediaType to be non-null + // and to default to "application/octet-stream". if (mediaType != null) { auxFile.setContentType(mediaType.toString()); } else { diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AuxiliaryFilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AuxiliaryFilesIT.java index 056f1d77044..2372b98fd94 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AuxiliaryFilesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AuxiliaryFilesIT.java @@ -224,6 +224,21 @@ public void testUploadAuxFiles() throws IOException { .body("data.type", equalTo("someType")) .body("data.contentType", equalTo("text/plain")); + // file with no MIME type + Path pathToAuxFileNoMimeType1 = Paths.get(java.nio.file.Files.createTempDirectory(null) + File.separator + "file1.md"); + String contentOfNoMimeType1 = "This file has no MIME type (content type)."; + java.nio.file.Files.write(pathToAuxFileNoMimeType1, contentOfNoMimeType1.getBytes()); + String formatTagNoMimeType1 = "noMimeType1"; + String formatVersionNoMimeType1 = "0.1"; + String mimeTypeNoMimeType1 = null; + String typeNoMimeType1 = "someType"; + Response uploadAuxFileNoMimeType1 = UtilIT.uploadAuxFile(fileId, pathToAuxFileNoMimeType1.toString(), formatTagNoMimeType1, formatVersionNoMimeType1, mimeTypeNoMimeType1, true, typeNoMimeType1, null, apiToken); + uploadAuxFileNoMimeType1.prettyPrint(); + uploadAuxFileNoMimeType1.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.type", equalTo("someType")) + .body("data.contentType", equalTo("application/octet-stream")); + // Download JSON aux file. Response downloadAuxFileJson = UtilIT.downloadAuxFile(fileId, formatTagJson, formatVersionJson, apiToken); downloadAuxFileJson.then().assertThat().statusCode(OK.getStatusCode()); @@ -246,6 +261,12 @@ public void testUploadAuxFiles() throws IOException { // Note: browsers seem to add ".bin" ("myfile.bin") rather than no extension ("myfile"). Assert.assertEquals("attachment; filename=\"data.tab.README_0.1\"", downloadAuxFileMd.header("Content-disposition")); + // Download Markdown aux file with no MIME type given + Response downloadAuxFileNoMime1 = UtilIT.downloadAuxFile(fileId, formatTagNoMimeType1, formatVersionNoMimeType1, apiToken); + downloadAuxFileNoMime1.then().assertThat().statusCode(OK.getStatusCode()); + // We didn't specify a MIME type and the formDataBodyPart.getMediaType object defaulted to "application/octet-stream" which becomes ".bin". + Assert.assertEquals("attachment; filename=\"data.tab.dpNoMimeType1_0.1.bin\"", downloadAuxFileNoMime1.header("Content-disposition")); + Response createUserNoPrivs = UtilIT.createRandomUser(); createUserNoPrivs.then().assertThat().statusCode(OK.getStatusCode()); String apiTokenNoPrivs = UtilIT.getApiTokenFromResponse(createUserNoPrivs); @@ -287,7 +308,7 @@ public void testUploadAuxFiles() throws IOException { listAllAuxFiles.prettyPrint(); listAllAuxFiles.then().assertThat() .statusCode(OK.getStatusCode()) - .body("data.size()", equalTo(8)); + .body("data.size()", equalTo(9)); Response deleteAuxFileOrigin1 = UtilIT.deleteAuxFile(fileId, formatTagOrigin1, formatVersionOrigin1, apiToken); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 9e900a945be..aaf60b2cb35 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -658,6 +658,11 @@ static Response uploadAuxFile(Long fileId, String pathToFile, String formatTag, .header(API_TOKEN_HTTP_HEADER, apiToken) .multiPart("file", new File(pathToFile), mimeType) .multiPart("isPublic", isPublic); + if (mimeType != null) { + requestSpecification.multiPart("file", new File(pathToFile), mimeType); + } else { + requestSpecification.multiPart("file", new File(pathToFile)); + } if (type != null) { requestSpecification.multiPart("type", type); } From 985b552a4b5661a898cc6dd6c795cf5c1ad159d1 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 7 Dec 2021 10:43:04 -0500 Subject: [PATCH 3/3] allow Tika to detect content type when not supplied #8241 "application/octet-stream" is the default when the user doesn't supply a content type. So if it's this, send it through Tika. Yes, a user can supply "application/octet-stream" and this will also be sent through Tika. --- .../edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java | 2 +- .../java/edu/harvard/iq/dataverse/api/AuxiliaryFilesIT.java | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java index cc0e17efb5c..76c91382868 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java @@ -101,7 +101,7 @@ public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile // The null check prevents an NPE but we expect mediaType to be non-null // and to default to "application/octet-stream". - if (mediaType != null) { + if (mediaType != null && (!mediaType.toString().equals("application/octet-stream"))) { auxFile.setContentType(mediaType.toString()); } else { Tika tika = new Tika(); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AuxiliaryFilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AuxiliaryFilesIT.java index 2372b98fd94..0e404f6ba97 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AuxiliaryFilesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AuxiliaryFilesIT.java @@ -237,7 +237,8 @@ public void testUploadAuxFiles() throws IOException { uploadAuxFileNoMimeType1.then().assertThat() .statusCode(OK.getStatusCode()) .body("data.type", equalTo("someType")) - .body("data.contentType", equalTo("application/octet-stream")); + // "text/plain" was detected by Tika + .body("data.contentType", equalTo("text/plain")); // Download JSON aux file. Response downloadAuxFileJson = UtilIT.downloadAuxFile(fileId, formatTagJson, formatVersionJson, apiToken); @@ -264,8 +265,7 @@ public void testUploadAuxFiles() throws IOException { // Download Markdown aux file with no MIME type given Response downloadAuxFileNoMime1 = UtilIT.downloadAuxFile(fileId, formatTagNoMimeType1, formatVersionNoMimeType1, apiToken); downloadAuxFileNoMime1.then().assertThat().statusCode(OK.getStatusCode()); - // We didn't specify a MIME type and the formDataBodyPart.getMediaType object defaulted to "application/octet-stream" which becomes ".bin". - Assert.assertEquals("attachment; filename=\"data.tab.dpNoMimeType1_0.1.bin\"", downloadAuxFileNoMime1.header("Content-disposition")); + Assert.assertEquals("attachment; filename=\"data.tab.noMimeType1_0.1.txt\"", downloadAuxFileNoMime1.header("Content-disposition")); Response createUserNoPrivs = UtilIT.createRandomUser(); createUserNoPrivs.then().assertThat().statusCode(OK.getStatusCode());