diff --git a/doc/release-notes/8241-mime.md b/doc/release-notes/8241-mime.md new file mode 100644 index 00000000000..9fcd90a38f5 --- /dev/null +++ b/doc/release-notes/8241-mime.md @@ -0,0 +1 @@ +Some auxiliary were being saved with the wrong content type (MIME type) but now the user can supply the content type on upload, overriding the type that would otherwise be assigned. diff --git a/doc/sphinx-guides/source/developers/aux-file-support.rst b/doc/sphinx-guides/source/developers/aux-file-support.rst index c92435796e1..666685ff987 100644 --- a/doc/sphinx-guides/source/developers/aux-file-support.rst +++ b/doc/sphinx-guides/source/developers/aux-file-support.rst @@ -10,14 +10,15 @@ To add an auxiliary file, specify the primary key of the datafile (FILE_ID), and .. code-block:: bash export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx - export FILENAME='auxfile.txt' + export FILENAME='auxfile.json' + export FILETYPE='application/json' export FILE_ID='12345' export FORMAT_TAG='dpJson' export FORMAT_VERSION='v1' export TYPE='DP' export SERVER_URL=https://demo.dataverse.org - curl -H X-Dataverse-key:$API_TOKEN -X POST -F "file=@$FILENAME" -F 'origin=myApp' -F 'isPublic=true' -F "type=$TYPE" "$SERVER_URL/api/access/datafile/$FILE_ID/auxiliary/$FORMAT_TAG/$FORMAT_VERSION" + curl -H X-Dataverse-key:$API_TOKEN -X POST -F "file=@$FILENAME;type=$FILETYPE" -F 'origin=myApp' -F 'isPublic=true' -F "type=$TYPE" "$SERVER_URL/api/access/datafile/$FILE_ID/auxiliary/$FORMAT_TAG/$FORMAT_VERSION" You should expect a 200 ("OK") response and JSON with information about your newly uploaded auxiliary file. diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java index fe06b6074e7..76c91382868 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java @@ -25,6 +25,7 @@ import javax.ws.rs.ClientErrorException; import javax.ws.rs.InternalServerErrorException; import javax.ws.rs.ServerErrorException; +import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; import org.apache.tika.Tika; @@ -67,10 +68,11 @@ public AuxiliaryFile save(AuxiliaryFile auxiliaryFile) { * @param origin - name of the tool/system that created the file * @param isPublic boolean - is this file available to any user? * @param type how to group the files such as "DP" for "Differentially + * @param mediaType user supplied content type (MIME type) * Private Statistics". * @return success boolean - returns whether the save was successful */ - public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic, String type) { + public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile dataFile, String formatTag, String formatVersion, String origin, boolean isPublic, String type, MediaType mediaType) { StorageIO storageIO = null; AuxiliaryFile auxFile = new AuxiliaryFile(); @@ -97,8 +99,14 @@ public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile storageIO.saveInputStreamAsAux(di, auxExtension); auxFile.setChecksum(FileUtil.checksumDigestToString(di.getMessageDigest().digest())); - Tika tika = new Tika(); - auxFile.setContentType(tika.detect(storageIO.getAuxFileAsInputStream(auxExtension))); + // The null check prevents an NPE but we expect mediaType to be non-null + // and to default to "application/octet-stream". + if (mediaType != null && (!mediaType.toString().equals("application/octet-stream"))) { + auxFile.setContentType(mediaType.toString()); + } else { + Tika tika = new Tika(); + auxFile.setContentType(tika.detect(storageIO.getAuxFileAsInputStream(auxExtension))); + } auxFile.setFormatTag(formatTag); auxFile.setFormatVersion(formatVersion); auxFile.setOrigin(origin); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index 49880b0b498..e6cb0052bd1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -125,6 +125,7 @@ import javax.ws.rs.core.MediaType; import static javax.ws.rs.core.Response.Status.FORBIDDEN; import static javax.ws.rs.core.Response.Status.UNAUTHORIZED; +import org.glassfish.jersey.media.multipart.FormDataBodyPart; import org.glassfish.jersey.media.multipart.FormDataParam; /* @@ -1261,6 +1262,7 @@ public Response saveAuxiliaryFileWithVersion(@PathParam("fileId") Long fileId, @FormDataParam("origin") String origin, @FormDataParam("isPublic") boolean isPublic, @FormDataParam("type") String type, + @FormDataParam("file") final FormDataBodyPart formDataBodyPart, @FormDataParam("file") InputStream fileInputStream ) { @@ -1280,7 +1282,12 @@ public Response saveAuxiliaryFileWithVersion(@PathParam("fileId") Long fileId, return error(FORBIDDEN, "User not authorized to edit the dataset."); } - AuxiliaryFile saved = auxiliaryFileService.processAuxiliaryFile(fileInputStream, dataFile, formatTag, formatVersion, origin, isPublic, type); + MediaType mediaType = null; + if (formDataBodyPart != null) { + mediaType = formDataBodyPart.getMediaType(); + } + + AuxiliaryFile saved = auxiliaryFileService.processAuxiliaryFile(fileInputStream, dataFile, formatTag, formatVersion, origin, isPublic, type, mediaType); if (saved!=null) { return ok(json(saved)); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java index 80b3f988953..2e978cab1ae 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java @@ -577,6 +577,7 @@ private boolean isAuxiliaryObjectCached(StorageIO storageIO, String auxiliaryTag } } + // TODO: Return ".md" for "text/markdown" as well as other extensions in MimeTypeDetectionByFileExtension.properties private String getFileExtension(AuxiliaryFile auxFile) { String fileExtension = ""; if (auxFile == null) { diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AuxiliaryFilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AuxiliaryFilesIT.java index a64df2c48db..0e404f6ba97 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AuxiliaryFilesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AuxiliaryFilesIT.java @@ -82,8 +82,7 @@ public void testUploadAuxFiles() throws IOException { uploadAuxFileJson.then().assertThat() .statusCode(OK.getStatusCode()) .body("data.type", equalTo("DP")) - // FIXME: application/json would be better - .body("data.contentType", equalTo("text/plain")); + .body("data.contentType", equalTo("application/json")); Response uploadAuxFileJsonAgain = UtilIT.uploadAuxFile(fileId, pathToAuxFileJson.toString(), formatTagJson, formatVersionJson, mimeTypeJson, true, dpType, apiToken); uploadAuxFileJsonAgain.prettyPrint(); uploadAuxFileJsonAgain.then().assertThat() @@ -101,8 +100,7 @@ public void testUploadAuxFiles() throws IOException { uploadAuxFileXml.then().assertThat() .statusCode(OK.getStatusCode()) .body("data.type", equalTo("DP")) - // FIXME: application/xml would be better - .body("data.contentType", equalTo("text/plain")); + .body("data.contentType", equalTo("application/xml")); // PDF aux file Path pathToAuxFilePdf = Paths.get(java.nio.file.Files.createTempDirectory(null) + File.separator + "data.pdf"); @@ -158,13 +156,13 @@ public void testUploadAuxFiles() throws IOException { java.nio.file.Files.write(pathToAuxFileMd, contentOfMd.getBytes()); String formatTagMd = "README"; String formatVersionMd = "0.1"; - String mimeTypeMd = "application/xml"; + String mimeTypeMd = "text/markdown"; Response uploadAuxFileMd = UtilIT.uploadAuxFile(fileId, pathToAuxFileMd.toString(), formatTagMd, formatVersionMd, mimeTypeMd, true, null, apiToken); uploadAuxFileMd.prettyPrint(); uploadAuxFileMd.then().assertThat() .statusCode(OK.getStatusCode()) .body("data.type", equalTo(null)) - .body("data.contentType", equalTo("text/plain")); + .body("data.contentType", equalTo("text/markdown")); // Unknown type Path pathToAuxFileTxt = Paths.get(java.nio.file.Files.createTempDirectory(null) + File.separator + "nbt.txt"); @@ -226,17 +224,31 @@ public void testUploadAuxFiles() throws IOException { .body("data.type", equalTo("someType")) .body("data.contentType", equalTo("text/plain")); + // file with no MIME type + Path pathToAuxFileNoMimeType1 = Paths.get(java.nio.file.Files.createTempDirectory(null) + File.separator + "file1.md"); + String contentOfNoMimeType1 = "This file has no MIME type (content type)."; + java.nio.file.Files.write(pathToAuxFileNoMimeType1, contentOfNoMimeType1.getBytes()); + String formatTagNoMimeType1 = "noMimeType1"; + String formatVersionNoMimeType1 = "0.1"; + String mimeTypeNoMimeType1 = null; + String typeNoMimeType1 = "someType"; + Response uploadAuxFileNoMimeType1 = UtilIT.uploadAuxFile(fileId, pathToAuxFileNoMimeType1.toString(), formatTagNoMimeType1, formatVersionNoMimeType1, mimeTypeNoMimeType1, true, typeNoMimeType1, null, apiToken); + uploadAuxFileNoMimeType1.prettyPrint(); + uploadAuxFileNoMimeType1.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.type", equalTo("someType")) + // "text/plain" was detected by Tika + .body("data.contentType", equalTo("text/plain")); + // Download JSON aux file. Response downloadAuxFileJson = UtilIT.downloadAuxFile(fileId, formatTagJson, formatVersionJson, apiToken); downloadAuxFileJson.then().assertThat().statusCode(OK.getStatusCode()); - // FIXME: This should be ".json" instead of ".txt" - Assert.assertEquals("attachment; filename=\"data.tab.dpJson_0.1.txt\"", downloadAuxFileJson.header("Content-disposition")); + Assert.assertEquals("attachment; filename=\"data.tab.dpJson_0.1.json\"", downloadAuxFileJson.header("Content-disposition")); // Download XML aux file. Response downloadAuxFileXml = UtilIT.downloadAuxFile(fileId, formatTagXml, formatVersionXml, apiToken); downloadAuxFileXml.then().assertThat().statusCode(OK.getStatusCode()); - // FIXME: This should be ".xml" instead of ".txt" - Assert.assertEquals("attachment; filename=\"data.tab.dpXml_0.1.txt\"", downloadAuxFileXml.header("Content-disposition")); + Assert.assertEquals("attachment; filename=\"data.tab.dpXml_0.1.xml\"", downloadAuxFileXml.header("Content-disposition")); // Download PDF aux file. Response downloadAuxFilePdf = UtilIT.downloadAuxFile(fileId, formatTagPdf, formatVersionPdf, apiToken); @@ -246,7 +258,14 @@ public void testUploadAuxFiles() throws IOException { // Download Markdown aux file. Response downloadAuxFileMd = UtilIT.downloadAuxFile(fileId, formatTagMd, formatVersionMd, apiToken); downloadAuxFileMd.then().assertThat().statusCode(OK.getStatusCode()); - Assert.assertEquals("attachment; filename=\"data.tab.README_0.1.txt\"", downloadAuxFileMd.header("Content-disposition")); + // No file extenstion here because Tika's getDefaultMimeTypes doesn't include "text/markdown". + // Note: browsers seem to add ".bin" ("myfile.bin") rather than no extension ("myfile"). + Assert.assertEquals("attachment; filename=\"data.tab.README_0.1\"", downloadAuxFileMd.header("Content-disposition")); + + // Download Markdown aux file with no MIME type given + Response downloadAuxFileNoMime1 = UtilIT.downloadAuxFile(fileId, formatTagNoMimeType1, formatVersionNoMimeType1, apiToken); + downloadAuxFileNoMime1.then().assertThat().statusCode(OK.getStatusCode()); + Assert.assertEquals("attachment; filename=\"data.tab.noMimeType1_0.1.txt\"", downloadAuxFileNoMime1.header("Content-disposition")); Response createUserNoPrivs = UtilIT.createRandomUser(); createUserNoPrivs.then().assertThat().statusCode(OK.getStatusCode()); @@ -289,7 +308,7 @@ public void testUploadAuxFiles() throws IOException { listAllAuxFiles.prettyPrint(); listAllAuxFiles.then().assertThat() .statusCode(OK.getStatusCode()) - .body("data.size()", equalTo(8)); + .body("data.size()", equalTo(9)); Response deleteAuxFileOrigin1 = UtilIT.deleteAuxFile(fileId, formatTagOrigin1, formatVersionOrigin1, apiToken); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 9e900a945be..aaf60b2cb35 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -658,6 +658,11 @@ static Response uploadAuxFile(Long fileId, String pathToFile, String formatTag, .header(API_TOKEN_HTTP_HEADER, apiToken) .multiPart("file", new File(pathToFile), mimeType) .multiPart("isPublic", isPublic); + if (mimeType != null) { + requestSpecification.multiPart("file", new File(pathToFile), mimeType); + } else { + requestSpecification.multiPart("file", new File(pathToFile)); + } if (type != null) { requestSpecification.multiPart("type", type); }