diff --git a/doc/release-notes/6937-range.md b/doc/release-notes/6937-range.md new file mode 100644 index 00000000000..94a12ae704c --- /dev/null +++ b/doc/release-notes/6937-range.md @@ -0,0 +1,10 @@ +### Support for HTTP "Range" Header for Partial File Downloads + +Dataverse now supports the HTTP "Range" header, which allows users to download parts of a file. Here are some examples: + +- `bytes=0-9` gets the first 10 bytes. +- `bytes=10-19` gets 10 bytes from the middle. +- `bytes=-10` gets the last 10 bytes. +- `bytes=9-` gets all bytes except the first 10. + +Only a single range is supported. For more information, see the [Data Access API](https://guides.dataverse.org/en/5.9/api/dataaccess.html) section of the API Guide. diff --git a/doc/sphinx-guides/source/api/dataaccess.rst b/doc/sphinx-guides/source/api/dataaccess.rst index 55f7f021887..c22b1d8c442 100755 --- a/doc/sphinx-guides/source/api/dataaccess.rst +++ b/doc/sphinx-guides/source/api/dataaccess.rst @@ -131,6 +131,41 @@ true Generates a thumbnail image by rescaling to the default thumbnai ``N`` Rescales the image to ``N`` pixels wide. ``imageThumb=true`` and ``imageThumb=64`` are equivalent. ============== =========== +Headers: +~~~~~~~~ + +============== =========== +Header Description +============== =========== +Range Download a specified byte range. Examples: + + - ``bytes=0-9`` gets the first 10 bytes. + - ``bytes=10-19`` gets 10 bytes from the middle. + - ``bytes=-10`` gets the last 10 bytes. + - ``bytes=9-`` gets all bytes except the first 10. + + Only a single range is supported. The "If-Range" header is not supported. For more on the "Range" header, see https://developer.mozilla.org/en-US/docs/Web/HTTP/Range_requests +============== =========== + +Examples +~~~~~~~~ + +A curl example of using the ``Range`` header to download the first 10 bytes of a file using its file id (database id): + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export FILE_ID=42 + export RANGE=0-9 + + curl -H "Range:bytes=$RANGE" $SERVER_URL/api/access/datafile/$FILE_ID + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "Range:bytes=0-9" https://demo.dataverse.org/api/access/datafile/42 + Multiple File ("bundle") download --------------------------------- diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java index 2621a5e0b09..80b3f988953 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java @@ -36,13 +36,16 @@ import java.net.URISyntaxException; import java.net.URLEncoder; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; import javax.inject.Inject; +import javax.ws.rs.ClientErrorException; import javax.ws.rs.NotFoundException; import javax.ws.rs.RedirectionException; import javax.ws.rs.ServiceUnavailableException; +import javax.ws.rs.core.HttpHeaders; import org.apache.tika.mime.MimeType; import org.apache.tika.mime.MimeTypeException; import org.apache.tika.mime.MimeTypes; @@ -401,52 +404,140 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] httpHeaders.add("Content-Type", mimeType + "; name=\"" + finalFileName + "\""); long contentSize; - boolean useChunkedTransfer = false; - //if ((contentSize = getFileSize(di, storageIO.getVarHeader())) > 0) { + + // User may have requested a rangeHeader of bytes. + // Ranges are only supported when the size of the content + // stream is known (i.e., it's not a dynamically generated + // stream. + List ranges = new ArrayList<>(); + String rangeHeader = null; + HttpHeaders headers = di.getRequestHttpHeaders(); + if (headers != null) { + rangeHeader = headers.getHeaderString("Range"); + } + long offset = 0; + long leftToRead = -1L; + // Moving the "left to read" var. here; - since we may need + // to start counting our rangeHeader bytes outside the main .write() + // loop, if it's a tabular file with a header. + if ((contentSize = getContentSize(storageIO)) > 0) { - logger.fine("Content size (retrieved from the AccessObject): " + contentSize); - httpHeaders.add("Content-Length", contentSize); + try { + ranges = getRanges(rangeHeader, contentSize); + } catch (Exception ex) { + logger.fine("Exception caught processing Range header: " + ex.getLocalizedMessage()); + throw new ClientErrorException("Error due to Range header: " + ex.getLocalizedMessage(), Response.Status.REQUESTED_RANGE_NOT_SATISFIABLE); + } + + if (ranges.isEmpty()) { + logger.fine("Content size (retrieved from the AccessObject): " + contentSize); + httpHeaders.add("Content-Length", contentSize); + } else { + // For now we only support a single rangeHeader. + long rangeContentSize = ranges.get(0).getLength(); + logger.fine("Content size (Range header in use): " + rangeContentSize); + httpHeaders.add("Content-Length", rangeContentSize); + + offset = ranges.get(0).getStart(); + leftToRead = rangeContentSize; + } } else { - //httpHeaders.add("Transfer-encoding", "chunked"); - //useChunkedTransfer = true; + // Content size unknown, must be a dynamically + // generated stream, such as a subsetting request. + // We do NOT want to support rangeHeader requests on such streams: + if (rangeHeader != null) { + throw new NotFoundException("Range headers are not supported on dynamically-generated content, such as tabular subsetting."); + } + } // (the httpHeaders map must be modified *before* writing any // data in the output stream!) int bufsize; byte[] bffr = new byte[4 * 8192]; - byte[] chunkClose = "\r\n".getBytes(); - // before writing out any bytes from the input stream, flush + // Before writing out any bytes from the input stream, write // any extra content, such as the variable header for the // subsettable files: if (storageIO.getVarHeader() != null) { + logger.fine("storageIO.getVarHeader().getBytes().length: " + storageIO.getVarHeader().getBytes().length); if (storageIO.getVarHeader().getBytes().length > 0) { - if (useChunkedTransfer) { - String chunkSizeLine = String.format("%x\r\n", storageIO.getVarHeader().getBytes().length); - outstream.write(chunkSizeLine.getBytes()); - } - outstream.write(storageIO.getVarHeader().getBytes()); - if (useChunkedTransfer) { - outstream.write(chunkClose); + // If a rangeHeader is not being requested, let's call that the normal case. + // Write the entire line of variable headers. Later, the rest of the file + // will be written. + if (ranges.isEmpty()) { + logger.fine("writing the entire variable header"); + outstream.write(storageIO.getVarHeader().getBytes()); + } else { + // Range requested. Since the output stream of a + // tabular file is made up of the varHeader and the body of + // the physical file, we should assume that the requested + // rangeHeader may span any portion of the combined stream. + // Thus we may or may not have to write the header, or a + // portion thereof. + int headerLength = storageIO.getVarHeader().getBytes().length; + if (offset >= headerLength) { + // We can skip the entire header. + // All we need to do is adjust the byte offset + // in the physical file; the number of bytes + // left to write stays unchanged, since we haven't + // written anything. + logger.fine("Skipping the variable header completely."); + offset -= headerLength; + } else { + // We need to write some portion of the header; + // Once we are done, we may or may not still have + // some bytes left to write from the main physical file. + if (offset + leftToRead <= headerLength) { + // This is a more straightforward case - we just need to + // write a portion of the header, and then we are done! + logger.fine("Writing this many bytes of the variable header line: " + leftToRead); + outstream.write(Arrays.copyOfRange(storageIO.getVarHeader().getBytes(), (int)offset, (int)offset + (int)leftToRead)); + // set "left to read" to zero, indicating that we are done: + leftToRead = 0; + } else { + // write the requested portion of the header: + logger.fine("Writing this many bytes of the variable header line: " + (headerLength - offset)); + outstream.write(Arrays.copyOfRange(storageIO.getVarHeader().getBytes(), (int)offset, headerLength)); + // and adjust the file offset and remaining number of bytes accordingly: + leftToRead -= (headerLength - offset); + offset = 0; + } + + } } } } - while ((bufsize = instream.read(bffr)) != -1) { - if (useChunkedTransfer) { - String chunkSizeLine = String.format("%x\r\n", bufsize); - outstream.write(chunkSizeLine.getBytes()); + // Dynamic streams, etc. Normal operation. No leftToRead. + if (ranges.isEmpty()) { + logger.fine("Normal, non-range request of file id " + dataFile.getId()); + while ((bufsize = instream.read(bffr)) != -1) { + outstream.write(bffr, 0, bufsize); } - outstream.write(bffr, 0, bufsize); - if (useChunkedTransfer) { - outstream.write(chunkClose); + } else if (leftToRead > 0) { + // This is a rangeHeader request, and we still have bytes to read + // (for a tabular file, we may have already written enough + // bytes from the variable header!) + storageIO.setOffset(offset); + // Thinking about it, we could just do instream.skip(offset) + // here... But I would like to have this offset functionality + // in StorageIO, for any future cases where we may not + // be able to do that on the stream directly (?) -- L.A. + logger.fine("Range request of file id " + dataFile.getId()); + // Read a rangeHeader of bytes instead of the whole file. We'll count down as we write. + // For now we only support a single rangeHeader. + while ((bufsize = instream.read(bffr)) != -1) { + if ((leftToRead -= bufsize) > 0) { + // Just do a normal write. Potentially lots to go. Don't break. + outstream.write(bffr, 0, bufsize); + } else { + // Get those last bytes or bytes equal to bufsize. Last one. Then break. + outstream.write(bffr, 0, (int) leftToRead + bufsize); + break; + } } - } - if (useChunkedTransfer) { - String chunkClosing = "0\r\n\r\n"; - outstream.write(chunkClosing.getBytes()); } logger.fine("di conversion param: " + di.getConversionParam() + ", value: " + di.getConversionParamValue()); @@ -585,4 +676,77 @@ private long getFileSize(DownloadInstance di, String extraHeader) { } return -1; } + + /** + * @param range "bytes 0-10" for example. Found in the "Range" HTTP header. + * @param fileSize File size in bytes. + * @throws RunTimeException on any problems processing the Range header. + */ + public List getRanges(String range, long fileSize) { + // Inspired by https://gist.github.com/davinkevin/b97e39d7ce89198774b4 + // via https://stackoverflow.com/questions/28427339/how-to-implement-http-byte-rangeHeader-requests-in-spring-mvc/28479001#28479001 + List ranges = new ArrayList<>(); + + if (range != null) { + logger.fine("Range header supplied: " + range); + + // Technically this regex supports multiple ranges. + // Below we have a check to enforce a single range. + if (!range.matches("^bytes=\\d*-\\d*(,\\d*-\\d*)*$")) { + throw new RuntimeException("The format is bytes=- where start and end are optional."); + } + + // The 6 is to remove "bytes=" + String[] parts = range.substring(6).split(","); + if (parts.length > 1) { + // Only allow a single range. + throw new RuntimeException("Only one range is allowed."); + } + // This loop is here in case we ever want to support multiple ranges. + for (String part : parts) { + + long start = getRangeStart(part); + long end = getRangeEnd(part); + + if (start == -1) { + // start does not exist. Base start off of how many bytes from end. + start = fileSize - end; + end = fileSize - 1; + } else if (end == -1 || end > fileSize - 1) { + // Set end when it doesn't exist. + // Also, automatically set end to size of file if end is beyond + // the file size (rather than throwing an error). + end = fileSize - 1; + } + + if (start > end) { + throw new RuntimeException("Start is larger than end or size of file."); + } + + ranges.add(new Range(start, end)); + + } + } + + return ranges; + } + + /** + * @return Return a positive long or -1 if start does not exist. + */ + public static long getRangeStart(String part) { + // Get everything before the "-". + String start = part.substring(0, part.indexOf("-")); + return (start.length() > 0) ? Long.parseLong(start) : -1; + } + + /** + * @return Return a positive long or -1 if end does not exist. + */ + public static long getRangeEnd(String part) { + // Get everything after the "-". + String end = part.substring(part.indexOf("-") + 1, part.length()); + return (end.length() > 0) ? Long.parseLong(end) : -1; + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/Range.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/Range.java new file mode 100644 index 00000000000..2021219b1a9 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/Range.java @@ -0,0 +1,28 @@ +package edu.harvard.iq.dataverse.dataaccess; + +public class Range { + + // Used to set the offset, how far to skip into the file. + private final long start; + // Used to calculate the length. + private final long end; + + public Range(long start, long end) { + this.start = start; + this.end = end; + } + + public long getStart() { + return start; + } + + public long getEnd() { + return end; + } + + // Used to determine when to stop reading. + public long getLength() { + return end - start + 1; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 2f66eec5f4c..b0e9648285c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -191,6 +191,12 @@ public boolean canWrite() { /*private int status;*/ private long size; + + /** + * Where in the file to seek to when reading (default is zero bytes, the + * start of the file). + */ + private long offset; private String mimeType; private String fileName; @@ -272,6 +278,10 @@ public long getSize() { return size; } + public long getOffset() { + return offset; + } + public InputStream getInputStream() throws IOException { return in; } @@ -381,6 +391,18 @@ public void setSize(long s) { size = s; } + // open() has already been called. Now we can skip, if need be. + public void setOffset(long offset) throws IOException { + InputStream inputStream = getInputStream(); + if (inputStream != null) { + inputStream.skip(offset); + // The skip has already been done. Why not record it. + this.offset = offset; + } else { + throw new IOException("Could not skip into InputStream because it is null"); + } + } + public void setInputStream(InputStream is) { in = is; } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriterTest.java b/src/test/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriterTest.java new file mode 100644 index 00000000000..6de52951077 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriterTest.java @@ -0,0 +1,148 @@ +package edu.harvard.iq.dataverse.api; + +import edu.harvard.iq.dataverse.dataaccess.Range; +import java.util.List; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import org.junit.Before; +import org.junit.Test; + +public class DownloadInstanceWriterTest { + + DownloadInstanceWriter diw; + + @Before + public void setUpClass() { + diw = new DownloadInstanceWriter(); + } + + // Get first 10 bytes. + @Test + public void testGetRange0to9of100() { + List ranges = diw.getRanges("bytes=0-9", 100); + assertEquals(0, ranges.get(0).getStart()); + assertEquals(9, ranges.get(0).getEnd()); + assertEquals(10, ranges.get(0).getLength()); + } + + // Don't start reading until 90th byte, get rest. + @Test + public void testGetRange90toNullof100() { + List ranges = diw.getRanges("bytes=90-", 100); + assertEquals(90, ranges.get(0).getStart()); + assertEquals(99, ranges.get(0).getEnd()); + assertEquals(10, ranges.get(0).getLength()); + } + + // Get last 10 bytes. + @Test + public void testGetRangeNullto5of10() { + List ranges = diw.getRanges("bytes=-10", 100); + assertEquals(90, ranges.get(0).getStart()); + assertEquals(99, ranges.get(0).getEnd()); + assertEquals(10, ranges.get(0).getLength()); + } + + // Get last byte + @Test + public void testGetRange100toNullof101() { + List ranges = diw.getRanges("bytes=100-", 101); + assertEquals(100, ranges.get(0).getStart()); + assertEquals(100, ranges.get(0).getEnd()); + assertEquals(1, ranges.get(0).getLength()); + } + + // When you request a range beyond the size of the file we just + // give you what we can (rather than throwing an error). + @Test + public void testGetRangeBeyondFilesize() { + List ranges = diw.getRanges("bytes=100-149", 120); + assertEquals(100, ranges.get(0).getStart()); + assertEquals(119, ranges.get(0).getEnd()); + assertEquals(20, ranges.get(0).getLength()); + } + + // Attempt to get invalid range (start larger than end). + @Test + public void testGetRangeInvalidStartLargerThanEnd() { + Exception expectedException = null; + try { + List ranges = diw.getRanges("bytes=20-10", 100); + } catch (Exception ex) { + // "Start is larger than end or size of file." + System.out.println("exception: " + ex); + expectedException = ex; + } + assertNotNull(expectedException); + } + + // Attempt to get invalid range (multiple ranges). + @Test + public void testGetRangeInvalidMultipleRanges() { + Exception expectedException = null; + try { + List ranges = diw.getRanges("bytes=0-9,90-99", 100); + } catch (Exception ex) { + // "Only one range is allowed." + System.out.println("exception: " + ex); + expectedException = ex; + } + assertNotNull(expectedException); + } + + // Attempt to get invalid range (multiple ranges, beyond file size). + @Test + public void testGetRangeInvalidMultipleRangesBeyondFileSize() { + Exception expectedException = null; + try { + List ranges = diw.getRanges("bytes=0-9,90-99", 40); + } catch (Exception ex) { + // "Only one range is allowed." + // We report the multiple ranges error before reporting the "beyond filesize" error. + System.out.println("exception: " + ex); + expectedException = ex; + } + assertNotNull(expectedException); + } + + // Test "junk" instead of "bytes=0-10" + @Test + public void testGetRangeInvalidJunk() { + Exception expectedException = null; + try { + List ranges = diw.getRanges("junk", 100); + } catch (Exception ex) { + // "The format is bytes=- where start and end are optional." + System.out.println("exception: " + ex); + expectedException = ex; + } + assertNotNull(expectedException); + } + + // Get first 10 bytes and last 10 bytes. Not currently supported. + @Test + public void testGetRanges0to0and90toNull() { + Exception expectedException = null; + try { + List ranges = diw.getRanges("bytes=0-9,-10", 100); + // These asserts on start, end, etc. don't actually + // run because we throw an expection that multiple + // ranges are not supported. + // + // first range + assertEquals(0, ranges.get(0).getStart()); + assertEquals(9, ranges.get(0).getEnd()); + assertEquals(10, ranges.get(0).getLength()); + // second range + assertEquals(90, ranges.get(1).getStart()); + assertEquals(99, ranges.get(1).getEnd()); + assertEquals(10, ranges.get(1).getLength()); + } catch (Exception ex) { + // Only one range is allowed. + System.out.println("exception: " + ex); + expectedException = ex; + } + assertNotNull(expectedException); + } + +} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java index 9fa06e28a0d..b82514ce083 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java @@ -11,8 +11,12 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import java.io.File; +import java.io.IOException; import static java.lang.Thread.sleep; import java.math.BigDecimal; +import java.nio.file.Path; +import java.nio.file.Paths; import java.text.MessageFormat; import java.util.Arrays; import java.util.Collections; @@ -27,6 +31,7 @@ import static javax.ws.rs.core.Response.Status.NOT_FOUND; import static javax.ws.rs.core.Response.Status.NO_CONTENT; import static javax.ws.rs.core.Response.Status.OK; +import static javax.ws.rs.core.Response.Status.REQUESTED_RANGE_NOT_SATISFIABLE; import static javax.ws.rs.core.Response.Status.UNAUTHORIZED; import static junit.framework.Assert.assertEquals; import org.hamcrest.CoreMatchers; @@ -38,6 +43,7 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; +import org.junit.Ignore; public class FilesIT { @@ -1579,5 +1585,203 @@ private void dashes(){ private void msgt(String m){ dashes(); msg(m); dashes(); } - + + @Test + public void testRange() throws IOException { + + Response createUser = UtilIT.createRandomUser(); +// createUser.prettyPrint(); + String authorUsername = UtilIT.getUsernameFromResponse(createUser); + String authorApiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverse = UtilIT.createRandomDataverse(authorApiToken); +// createDataverse.prettyPrint(); + createDataverse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverse); + + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, authorApiToken); +// createDataset.prettyPrint(); + createDataset.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + String datasetPid = JsonPath.from(createDataset.asString()).getString("data.persistentId"); + + Path pathToTxt = Paths.get(java.nio.file.Files.createTempDirectory(null) + File.separator + "file.txt"); + String contentOfTxt = "" + + "first is the worst\n" + + "second is the best\n" + + "third is the one with the hairy chest\n"; + java.nio.file.Files.write(pathToTxt, contentOfTxt.getBytes()); + + Response uploadFileTxt = UtilIT.uploadFileViaNative(datasetId.toString(), pathToTxt.toString(), authorApiToken); +// uploadFileTxt.prettyPrint(); + uploadFileTxt.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("file.txt")); + + Integer fileIdTxt = JsonPath.from(uploadFileTxt.body().asString()).getInt("data.files[0].dataFile.id"); + + // Download the whole file. + Response downloadTxtNoArgs = UtilIT.downloadFile(fileIdTxt, null, null, null, authorApiToken); + downloadTxtNoArgs.then().assertThat() + .statusCode(OK.getStatusCode()) + .body(equalTo("first is the worst\n" + + "second is the best\n" + + "third is the one with the hairy chest\n")); + + // Download the first 10 bytes. + Response downloadTxtFirst10 = UtilIT.downloadFile(fileIdTxt, "0-9", null, null, authorApiToken); + downloadTxtFirst10.then().assertThat() + .statusCode(OK.getStatusCode()) + .body(equalTo("first is t")); + + // Download the last 6 bytes. + Response downloadTxtLast6 = UtilIT.downloadFile(fileIdTxt, "-6", null, null, authorApiToken); + downloadTxtLast6.then().assertThat() + .statusCode(OK.getStatusCode()) + .body(equalTo("chest\n")); + + // Download some bytes from the middle. + Response downloadTxtMiddle = UtilIT.downloadFile(fileIdTxt, "09-19", null, null, authorApiToken); + downloadTxtMiddle.then().assertThat() + .statusCode(OK.getStatusCode()) + .body(equalTo("the worst\ns")); + + // Skip the first 10 bytes and download the rest. + Response downloadTxtSkipFirst10 = UtilIT.downloadFile(fileIdTxt, "9-", null, null, authorApiToken); + downloadTxtSkipFirst10.then().assertThat() + .statusCode(OK.getStatusCode()) + .body(equalTo("the worst\n" + + "second is the best\n" + + "third is the one with the hairy chest\n")); + + Path pathToCsv = Paths.get(java.nio.file.Files.createTempDirectory(null) + File.separator + "data.csv"); + String contentOfCsv = "" + + "name,pounds,species\n" + + "Marshall,40,dog\n" + + "Tiger,17,cat\n" + + "Panther,21,cat\n"; + java.nio.file.Files.write(pathToCsv, contentOfCsv.getBytes()); + + Response uploadFileCsv = UtilIT.uploadFileViaNative(datasetId.toString(), pathToCsv.toString(), authorApiToken); +// uploadFileCsv.prettyPrint(); + uploadFileCsv.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("data.csv")); + + Integer fileIdCsv = JsonPath.from(uploadFileCsv.body().asString()).getInt("data.files[0].dataFile.id"); + + assertTrue("Failed test if Ingest Lock exceeds max duration " + pathToCsv, UtilIT.sleepForLock(datasetId.longValue(), "Ingest", authorApiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION)); + + // Just the tabular file, not the original, no byte range. Vanilla. + Response downloadFileNoArgs = UtilIT.downloadFile(fileIdCsv, null, null, null, authorApiToken); + downloadFileNoArgs.then().assertThat() + .statusCode(OK.getStatusCode()) + .body(equalTo("name\tpounds\tspecies\n" + + "\"Marshall\"\t40\t\"dog\"\n" + + "\"Tiger\"\t17\t\"cat\"\n" + + "\"Panther\"\t21\t\"cat\"\n")); + + // first 10 bytes of tabular format + Response downloadTabFirstTen = UtilIT.downloadFile(fileIdCsv, "0-9", null, null, authorApiToken); + downloadTabFirstTen.then().assertThat() + .statusCode(OK.getStatusCode()) + .body(equalTo("name\tpound")); + + // first 30 bytes of tabular format + Response downloadTabFirst30 = UtilIT.downloadFile(fileIdCsv, "0-29", null, null, authorApiToken); + downloadTabFirst30.then().assertThat() + .statusCode(OK.getStatusCode()) + .body(equalTo("name\tpounds\tspecies\n" + + "\"Marshall\"")); + + // last 16 bytes of tabular format + Response downloadTabLast16 = UtilIT.downloadFile(fileIdCsv, "-16", null, null, authorApiToken); + downloadTabLast16.then().assertThat() + .statusCode(OK.getStatusCode()) + .body(equalTo("nther\"\t21\t\"cat\"\n")); + + Response downloadTabMiddleBytesHeader = UtilIT.downloadFile(fileIdCsv, "1-7", null, null, authorApiToken); + downloadTabMiddleBytesHeader.then().assertThat() + .statusCode(OK.getStatusCode()) + .body(equalTo("ame\tpou")); + + Response downloadTabMiddleBytesBody = UtilIT.downloadFile(fileIdCsv, "31-43", null, null, authorApiToken); + downloadTabMiddleBytesBody.then().assertThat() + .statusCode(OK.getStatusCode()) + .body(equalTo("40\t\"dog\"\n" + + "\"Tig")); + + // Original version of tabular file (CSV in this case). + Response downloadOrig = UtilIT.downloadFile(fileIdCsv, null, "original", null, authorApiToken); + downloadOrig.then().assertThat() + .statusCode(OK.getStatusCode()) + .body(equalTo("name,pounds,species\n" + + "Marshall,40,dog\n" + + "Tiger,17,cat\n" + + "Panther,21,cat\n")); + + // first ten bytes + Response downloadOrigFirstTen = UtilIT.downloadFile(fileIdCsv, "0-9", "original", null, authorApiToken); + downloadOrigFirstTen.then().assertThat() + .statusCode(OK.getStatusCode()) + .body(equalTo("name,pound")); + + // last ten bytes + Response downloadOrigLastTen = UtilIT.downloadFile(fileIdCsv, "-10", "original", null, authorApiToken); + downloadOrigLastTen.then().assertThat() + .statusCode(OK.getStatusCode()) + .body(equalTo("er,21,cat\n")); + + // middle bytes + Response downloadOrigMiddle = UtilIT.downloadFile(fileIdCsv, "29-39", "original", null, authorApiToken); + downloadOrigMiddle.then().assertThat() + .statusCode(OK.getStatusCode()) + .body(equalTo("40,dog\nTige")); + + String pathToZipWithImage = "scripts/search/data/binary/trees.zip"; + Response uploadFileZipWithImage = UtilIT.uploadFileViaNative(datasetId.toString(), pathToZipWithImage, authorApiToken); +// uploadFileZipWithImage.prettyPrint(); + uploadFileZipWithImage.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("trees.png")); + + Integer fileIdPng = JsonPath.from(uploadFileZipWithImage.body().asString()).getInt("data.files[0].dataFile.id"); + + String trueOrWidthInPixels = "true"; + Response getFileThumbnailImageA = UtilIT.getFileThumbnail(fileIdPng.toString(), trueOrWidthInPixels, authorApiToken); + getFileThumbnailImageA.then().assertThat() + .contentType("image/png") + .statusCode(OK.getStatusCode()); + + // Yes, you can get a range of bytes from a thumbnail. + String imageThumbPixels = "true"; + Response downloadThumbnail = UtilIT.downloadFile(fileIdPng, "0-149", null, imageThumbPixels, authorApiToken); +// downloadThumbnail.prettyPrint(); + downloadThumbnail.then().assertThat().statusCode(OK.getStatusCode()); + + Response multipleRangesNotSupported = UtilIT.downloadFile(fileIdTxt, "0-9,20-29", null, null, authorApiToken); + // "Error due to Range header: Only one range is allowed." + multipleRangesNotSupported.prettyPrint(); + multipleRangesNotSupported.then().assertThat().statusCode(REQUESTED_RANGE_NOT_SATISFIABLE.getStatusCode()); + + Response startLargerThanEndError = UtilIT.downloadFile(fileIdTxt, "20-10", null, null, authorApiToken); + // "Error due to Range header: Start is larger than end or size of file." + startLargerThanEndError.prettyPrint(); + startLargerThanEndError.then().assertThat().statusCode(REQUESTED_RANGE_NOT_SATISFIABLE.getStatusCode()); + + Response rangeBeyondFileSize = UtilIT.downloadFile(fileIdTxt, "88888-99999", null, null, authorApiToken); + // "Error due to Range header: Start is larger than end or size of file." + rangeBeyondFileSize.prettyPrint(); + rangeBeyondFileSize.then().assertThat().statusCode(REQUESTED_RANGE_NOT_SATISFIABLE.getStatusCode()); + +// Response publishDataverse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, authorApiToken); +// publishDataverse.then().assertThat().statusCode(OK.getStatusCode()); +// Response publishDataset = UtilIT.publishDatasetViaNativeApi(datasetPid, "major", authorApiToken); +// publishDataset.then().assertThat().statusCode(OK.getStatusCode()); + + } + } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 1240585929f..03145f4c01b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -725,13 +725,33 @@ static Response downloadFile(Integer fileId) { } static Response downloadFile(Integer fileId, String apiToken) { - return given() - /** - * Data Access API does not support X-Dataverse-key header - - * https://github.com/IQSS/dataverse/issues/2662 - */ - //.header(API_TOKEN_HTTP_HEADER, apiToken) - .get("/api/access/datafile/" + fileId + "?key=" + apiToken); + String nullByteRange = null; + String nullFormat = null; + String nullImageThumb = null; + return downloadFile(fileId, nullByteRange, nullFormat, nullImageThumb, apiToken); + } + + static Response downloadFile(Integer fileId, String byteRange, String format, String imageThumb, String apiToken) { + RequestSpecification requestSpecification = given(); + if (byteRange != null) { + requestSpecification.header("Range", "bytes=" + byteRange); + } + String optionalFormat = ""; + if (format != null) { + optionalFormat = "&format=" + format; + } + String optionalImageThumb = ""; + if (format != null) { + optionalImageThumb = "&imageThumb=" + imageThumb; + } + /** + * Data Access API does not support X-Dataverse-key header - + * https://github.com/IQSS/dataverse/issues/2662 + * + * Actually, these days it does. We could switch. + */ + //.header(API_TOKEN_HTTP_HEADER, apiToken) + return requestSpecification.get("/api/access/datafile/" + fileId + "?key=" + apiToken + optionalFormat + optionalImageThumb); } static Response downloadTabularFile(Integer fileId) {