From e82771a967dc8db0a6d39b289938cabf9995f796 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 3 Sep 2021 16:53:33 -0400
Subject: [PATCH 01/26] support HTTP "Range" header #6937

Support is not full. As noted in the docs, only a single range is
supported and the "If-Range" header is not supported.
---
 doc/sphinx-guides/source/api/dataaccess.rst   |  16 +++
 .../dataverse/api/DownloadInstanceWriter.java | 114 ++++++++++++++++-
 .../iq/dataverse/dataaccess/FileAccessIO.java |   1 +
 .../iq/dataverse/dataaccess/Range.java        |  30 +++++
 .../iq/dataverse/dataaccess/S3AccessIO.java   |   2 +
 .../iq/dataverse/dataaccess/StorageIO.java    |  14 +++
 .../api/DownloadInstanceWriterTest.java       | 116 ++++++++++++++++++
 .../edu/harvard/iq/dataverse/api/FilesIT.java |  49 +++++++-
 .../edu/harvard/iq/dataverse/api/UtilIT.java  |  24 ++--
 .../dataaccess/FileAccessIOTest.java          |  24 ++++
 10 files changed, 380 insertions(+), 10 deletions(-)
 create mode 100644 src/main/java/edu/harvard/iq/dataverse/dataaccess/Range.java
 create mode 100644 src/test/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriterTest.java
diff --git a/doc/sphinx-guides/source/api/dataaccess.rst b/doc/sphinx-guides/source/api/dataaccess.rst
index 55f7f021887..3973428a5a3 100755
--- a/doc/sphinx-guides/source/api/dataaccess.rst
+++ b/doc/sphinx-guides/source/api/dataaccess.rst
@@ -131,6 +131,22 @@ true            Generates a thumbnail image by rescaling to the default thumbnai
 ``N``           Rescales the image to ``N`` pixels wide. ``imageThumb=true`` and ``imageThumb=64`` are equivalent.
 ==============  ===========
 
+Headers:
+~~~~~~~~
+
+==============  ===========
+Header          Description
+==============  ===========
+Range           Download a specified byte range. Examples:
+
+                - ``bytes=0-9`` gets the first 10 bytes.
+                - ``bytes=10-19`` gets 20 bytes from the middle.
+                - ``bytes=-10`` gets the last 10 bytes.
+                - ``bytes=9-`` gets all bytes except the first 10.
+
+                Only a single range is supported. The "If-Range" header is not supported. For more on the "Range" header, see https://developer.mozilla.org/en-US/docs/Web/HTTP/Range_requests
+==============  ===========
+
 Multiple File ("bundle") download
 ---------------------------------
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
index 2621a5e0b09..c8c9de30215 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
@@ -80,7 +80,24 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
             StorageIO<DataFile> storageIO = DataAccess.getStorageIO(dataFile, daReq);
 
             if (storageIO != null) {
+                List<Range> ranges = new ArrayList<>();
+                long fileSize = storageIO.getDataFile().getFilesize();
                 try {
+                    String range = di.getRequestHttpHeaders().getHeaderString("Range");
+                    long offset = 0;
+                    try {
+                        ranges = getRanges(range, fileSize);
+                    } catch (Exception ex) {
+                        logger.fine("Exception caught processing Range header: " + ex.getLocalizedMessage());
+                        // The message starts with "Datafile" because otherwise the message is not passed
+                        // to the user due to how WebApplicationExceptionHandler works.
+                        throw new NotFoundException("Datafile download error due to Range header: " + ex.getLocalizedMessage());
+                    }
+                    if (!ranges.isEmpty()) {
+                        // For now we only support a single range.
+                        offset = ranges.get(0).getStart();
+                    }
+                    storageIO.setOffset(offset);
                     storageIO.open();
                 } catch (IOException ioex) {
                     //throw new WebApplicationException(Response.Status.SERVICE_UNAVAILABLE);
@@ -403,9 +420,14 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                         long contentSize;
                         boolean useChunkedTransfer = false;
                         //if ((contentSize = getFileSize(di, storageIO.getVarHeader())) > 0) {
-                        if ((contentSize = getContentSize(storageIO)) > 0) {
+                        if ((contentSize = getContentSize(storageIO)) > 0 && ranges.isEmpty()) {
                             logger.fine("Content size (retrieved from the AccessObject): " + contentSize);
                             httpHeaders.add("Content-Length", contentSize);
+                        } else if (ranges.isEmpty()) {
+                            // For now we only support a single range.
+                            long rangeContentSize = ranges.get(0).getLength();
+                            logger.fine("Content size (Range header in use): " + rangeContentSize);
+                            httpHeaders.add("Content-Length", rangeContentSize);
                         } else {
                             //httpHeaders.add("Transfer-encoding", "chunked");
                             //useChunkedTransfer = true;
@@ -433,12 +455,26 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                             }
                         }
 
+                        // Default to reading the whole file. We'll count down as we write.
+                        long leftToRead = fileSize;
+                        if (!ranges.isEmpty()) {
+                            // Read a range of bytes instead of the whole file.
+                            // For now we only support a single range.
+                            leftToRead = ranges.get(0).getLength();
+                        }
                         while ((bufsize = instream.read(bffr)) != -1) {
                             if (useChunkedTransfer) {
                                 String chunkSizeLine = String.format("%x\r\n", bufsize);
                                 outstream.write(chunkSizeLine.getBytes());
                             }
-                            outstream.write(bffr, 0, bufsize);
+                            if ((leftToRead -= bufsize) > 0) {
+                                // Just do a normal write. Potentially lots to go. Don't break.
+                                outstream.write(bffr, 0, bufsize);
+                            } else {
+                                // Get those last bytes or bytes equal to bufsize. Last one. Then break.
+                                outstream.write(bffr, 0, (int) leftToRead + bufsize);
+                                break;
+                            }
                             if (useChunkedTransfer) {
                                 outstream.write(chunkClose);
                             }
@@ -585,4 +621,78 @@ private long getFileSize(DownloadInstance di, String extraHeader) {
         }
         return -1;
     }
+
+    /**
+     * @param range "bytes 0-10" for example. Found in the "Range" HTTP header.
+     * @param fileSize File size in bytes.
+     * @throws RunTimeException on any problems processing the Range header.
+     */
+    public List<Range> getRanges(String range, long fileSize) {
+        // Inspired by https://gist.github.com/davinkevin/b97e39d7ce89198774b4
+        // via https://stackoverflow.com/questions/28427339/how-to-implement-http-byte-range-requests-in-spring-mvc/28479001#28479001
+        List<Range> ranges = new ArrayList<>();
+
+        if (range != null) {
+            logger.fine("Range header supplied: " + range);
+
+            // Technically this regex supports multiple ranges.
+            // Below we have a check to enforce a single range.
+            if (!range.matches("^bytes=\\d*-\\d*(,\\d*-\\d*)*$")) {
+                throw new RuntimeException("The format is bytes=<range-start>-<range-end> where start and end are optional.");
+            }
+
+            if (ranges.isEmpty()) {
+                // The 6 is to remove "bytes="
+
+                for (String part : range.substring(6).split(",")) {
+
+                    long start = getRangeStart(part);
+                    long end = getRangeEnd(part);
+
+                    if (start == -1) {
+                        // start does not exist. Base start off of how many bytes from end.
+                        start = fileSize - end;
+                        end = fileSize - 1;
+                    } else if (end == -1 || end > fileSize - 1) {
+                        // Set end when it doesn't exist.
+                        // Also, automatically set end to size of file if end is beyond
+                        // the file size (rather than throwing an error).
+                        end = fileSize - 1;
+                    }
+
+                    if (start > end) {
+                        throw new RuntimeException("Start is larger than end.");
+                    }
+
+                    if (ranges.size() < 1) {
+                        ranges.add(new Range(start, end));
+                    } else {
+                        throw new RuntimeException("Only one range is allowed.");
+                    }
+
+                }
+            }
+        }
+
+        return ranges;
+    }
+
+    /**
+     * @return Return a positive long or -1 if start does not exist.
+     */
+    public static long getRangeStart(String part) {
+        // Get everything before the "-".
+        String start = part.substring(0, part.indexOf("-"));
+        return (start.length() > 0) ? Long.parseLong(start) : -1;
+    }
+
+    /**
+     * @return Return a positive long or -1 if end does not exist.
+     */
+    public static long getRangeEnd(String part) {
+        // Get everything after the "-".
+        String end = part.substring(part.indexOf("-") + 1, part.length());
+        return (end.length() > 0) ? Long.parseLong(end) : -1;
+    }
+
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java
index bd0549622f0..9c55caa3360 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java
@@ -509,6 +509,7 @@ public FileInputStream openLocalFileAsInputStream () {
 
         try {
             in = new FileInputStream(getFileSystemPath().toFile());
+            in.skip(this.getOffset());
         } catch (IOException ex) {
             // We don't particularly care what the reason why we have
             // failed to access the file was.
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/Range.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/Range.java
new file mode 100644
index 00000000000..e07d6aaae94
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/Range.java
@@ -0,0 +1,30 @@
+package edu.harvard.iq.dataverse.dataaccess;
+
+public class Range {
+
+    // Used to set the offset, how far to skip into the file.
+    private final long start;
+    // Used to calculate the length.
+    private final long end;
+    // Used to determine when to stop reading.
+    private final long length;
+
+    public Range(long start, long end) {
+        this.start = start;
+        this.end = end;
+        this.length = end - start + 1;
+    }
+
+    public long getStart() {
+        return start;
+    }
+
+    public long getEnd() {
+        return end;
+    }
+
+    public long getLength() {
+        return length;
+    }
+
+}
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
index 80fbbd103e7..52bb2cd5895 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
@@ -282,6 +282,8 @@ public InputStream getInputStream() throws IOException {
 
         setChannel(Channels.newChannel(super.getInputStream()));
 
+        super.getInputStream().skip(super.getOffset());
+
         return super.getInputStream();
     }
     
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
index 2f66eec5f4c..0387561a075 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
@@ -191,6 +191,12 @@ public boolean canWrite() {
 
     /*private int status;*/
     private long size;
+
+    /**
+     * Where in the file to seek to when reading (default is zero bytes, the
+     * start of the file).
+     */
+    private long offset;
     
     private String mimeType;
     private String fileName;
@@ -272,6 +278,10 @@ public long getSize() {
         return size;
     }
 
+    public long getOffset() {
+        return offset;
+    }
+
     public InputStream getInputStream() throws IOException {
         return in;
     }
@@ -381,6 +391,10 @@ public void setSize(long s) {
         size = s;
     }
 
+    public void setOffset(long offset) {
+        this.offset = offset;
+    }
+
     public void setInputStream(InputStream is) {
         in = is;
     }
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriterTest.java b/src/test/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriterTest.java
new file mode 100644
index 00000000000..a59bc497c8b
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriterTest.java
@@ -0,0 +1,116 @@
+package edu.harvard.iq.dataverse.api;
+
+import edu.harvard.iq.dataverse.dataaccess.Range;
+import java.util.List;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import org.junit.Before;
+import org.junit.Test;
+
+public class DownloadInstanceWriterTest {
+
+    DownloadInstanceWriter diw;
+
+    @Before
+    public void setUpClass() {
+        diw = new DownloadInstanceWriter();
+    }
+
+    // Get first 10 bytes.
+    @Test
+    public void testGetRange0to9of100() {
+        List<Range> ranges = diw.getRanges("bytes=0-9", 100);
+        assertEquals(0, ranges.get(0).getStart());
+        assertEquals(9, ranges.get(0).getEnd());
+        assertEquals(10, ranges.get(0).getLength());
+    }
+
+    // Don't start reading until 90th byte, get rest.
+    @Test
+    public void testGetRange90toNullof100() {
+        List<Range> ranges = diw.getRanges("bytes=90-", 100);
+        assertEquals(90, ranges.get(0).getStart());
+        assertEquals(99, ranges.get(0).getEnd());
+        assertEquals(10, ranges.get(0).getLength());
+    }
+
+    // Get last 10 bytes.
+    @Test
+    public void testGetRangeNullto5of10() {
+        List<Range> ranges = diw.getRanges("bytes=-10", 100);
+        assertEquals(90, ranges.get(0).getStart());
+        assertEquals(99, ranges.get(0).getEnd());
+        assertEquals(10, ranges.get(0).getLength());
+    }
+
+    // Get last byte
+    @Test
+    public void testGetRange100toNullof101() {
+        List<Range> ranges = diw.getRanges("bytes=100-", 101);
+        assertEquals(100, ranges.get(0).getStart());
+        assertEquals(100, ranges.get(0).getEnd());
+        assertEquals(1, ranges.get(0).getLength());
+    }
+
+    // When you request a range beyond the size of the file we just
+    // give you what we can (rather than throwing an error). 
+    @Test
+    public void testGetRangeBeyondFilesize() {
+        List<Range> ranges = diw.getRanges("bytes=100-149", 120);
+        assertEquals(100, ranges.get(0).getStart());
+        assertEquals(119, ranges.get(0).getEnd());
+        assertEquals(20, ranges.get(0).getLength());
+    }
+
+    // Attempt to get invalid range (start larger than end).
+    @Test
+    public void testGetRangeInvalidStartLargerThanEnd() {
+        Exception expectedException = null;
+        try {
+            List<Range> ranges = diw.getRanges("bytes=20-10", 100);
+        } catch (Exception ex) {
+            System.out.println("exeption: " + ex);
+            expectedException = ex;
+        }
+        assertNotNull(expectedException);
+    }
+
+    // Test "junk" instead of "bytes=0-10"
+    @Test
+    public void testGetRangeInvalidJunk() {
+        Exception expectedException = null;
+        try {
+            List<Range> ranges = diw.getRanges("junk", 100);
+        } catch (Exception ex) {
+            System.out.println("exeption: " + ex);
+            expectedException = ex;
+        }
+        assertNotNull(expectedException);
+    }
+
+    // Get first 10 bytes and last 10 bytes. Not currently supported.
+    @Test
+    public void testGetRanges0to0and90toNull() {
+        Exception expectedException = null;
+        try {
+            List<Range> ranges = diw.getRanges("bytes=0-9,-10", 100);
+            // These asserts on start, end, etc. don't actually
+            // run because we throw an expection that multiple
+            // ranges are not supported.
+            //
+            // first range
+            assertEquals(0, ranges.get(0).getStart());
+            assertEquals(9, ranges.get(0).getEnd());
+            assertEquals(10, ranges.get(0).getLength());
+            // second range
+            assertEquals(90, ranges.get(1).getStart());
+            assertEquals(99, ranges.get(1).getEnd());
+            assertEquals(10, ranges.get(1).getLength());
+        } catch (Exception ex) {
+            System.out.println("exeption: " + ex);
+            expectedException = ex;
+        }
+        assertNotNull(expectedException);
+    }
+
+}
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
index 9fa06e28a0d..c824fbf7182 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
@@ -38,6 +38,7 @@
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
+import org.junit.Ignore;
 
 public class FilesIT {
 
@@ -1579,5 +1580,51 @@ private void dashes(){
     private void msgt(String m){
         dashes(); msg(m); dashes();
     }
-    
+
+    @Ignore
+    @Test
+    public void test2toNull() {
+        Integer localFile = 15;
+        Integer s3file = 16;
+        Integer fileId = 0;
+        fileId = localFile;
+        String nullApiToken = null;
+        String byteRange = null;
+        byteRange = "2-";
+        // java.lang.IndexOutOfBoundsException: Range [0, 0 + -2) out of bounds for length 32768
+        Response downloadFile = UtilIT.downloadFile(fileId, byteRange, nullApiToken);
+        downloadFile.prettyPrint();
+    }
+
+    @Ignore
+    @Test
+    public void test0to100() {
+        Integer localFile = 16;
+        Integer s3file = 11;
+        Integer fileId = 0;
+        fileId = localFile;
+        fileId = s3file;
+        String nullApiToken = null;
+        String byteRange = null;
+        byteRange = "0-9"; // first ten
+        byteRange = "-9"; // last ten
+        byteRange = "9-"; // last ten
+        Response downloadFile = UtilIT.downloadFile(fileId, byteRange, nullApiToken);
+        downloadFile.prettyPrint();
+    }
+
+    @Ignore
+    @Test
+    public void testMultipleRanges() {
+        Integer localFile = 16;
+        Integer s3file = 11;
+        Integer fileId = 0;
+        fileId = localFile;
+//        fileId = s3file;
+        String nullApiToken = null;
+        String byteRange = null;
+        byteRange = "0-9,90-99";
+        Response downloadFile = UtilIT.downloadFile(fileId, byteRange, nullApiToken);
+        downloadFile.prettyPrint();
+    }    
 }
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
index d29d3761f22..a040c3506c3 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
@@ -720,13 +720,23 @@ static Response downloadFile(Integer fileId) {
     }
 
     static Response downloadFile(Integer fileId, String apiToken) {
-        return given()
-                /**
-                 * Data Access API does not support X-Dataverse-key header -
-                 * https://github.com/IQSS/dataverse/issues/2662
-                 */
-                //.header(API_TOKEN_HTTP_HEADER, apiToken)
-                .get("/api/access/datafile/" + fileId + "?key=" + apiToken);
+        String nullByteRange = null;
+        return downloadFile(fileId, nullByteRange, apiToken);
+    }
+
+    static Response downloadFile(Integer fileId, String byteRange, String apiToken) {
+        RequestSpecification requestSpecification = given();
+        if (byteRange != null) {
+            requestSpecification.header("Range", "bytes=" + byteRange);
+        }
+        /**
+         * Data Access API does not support X-Dataverse-key header -
+         * https://github.com/IQSS/dataverse/issues/2662
+         *
+         * Actually, these days it does. We could switch.
+         */
+        //.header(API_TOKEN_HTTP_HEADER, apiToken)
+        return requestSpecification.get("/api/access/datafile/" + fileId + "?key=" + apiToken);
     }
     
     static Response downloadTabularFile(Integer fileId) {
diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIOTest.java
index 286fe8a6595..ad31e4adee3 100644
--- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIOTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIOTest.java
@@ -269,6 +269,30 @@ public void testOpenLocalFileAsInputStream() throws IOException {
         assertNotNull(datasetAccess.openLocalFileAsOutputStream());
     }
 
+    /**
+     * Test of openLocalFileAsInputStream (with an offset) and
+     * openLocalFileAsOutputStream method, of class FileAccessIO.
+     *
+     * @throws java.io.IOException if test is broken
+     */
+    @Test
+    public void testOpenLocalFileAsInputStreamWithOffset() throws IOException {
+        // Start reading a few bytes in.
+        datasetAccess.setOffset(11);
+        BufferedReader br = new BufferedReader(new InputStreamReader(datasetAccess.openLocalFileAsInputStream()));
+        StringBuilder sb = new StringBuilder();
+        String line;
+        while ((line = br.readLine()) != null) {
+            sb.append(line);
+            sb.append('\n');
+        }
+        // Put offset back.
+        datasetAccess.setOffset(0);
+        // Instead of "This is a test string"
+        assertEquals("est string\n", sb.toString());
+        assertNotNull(datasetAccess.openLocalFileAsOutputStream());
+    }
+
     /**
      * Test of getAuxFileAsInputStream method, of class FileAccessIO.
      *

From 875d08de3583b63a63bb908059c94b838121f907 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 8 Sep 2021 14:03:17 -0400
Subject: [PATCH 02/26] avoid NPE when HttpHeaders is null #6937

This was null for aux files, for example.
---
 .../harvard/iq/dataverse/api/DownloadInstanceWriter.java   | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
index c8c9de30215..0f5ccfadb0a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
@@ -43,6 +43,7 @@
 import javax.ws.rs.NotFoundException;
 import javax.ws.rs.RedirectionException;
 import javax.ws.rs.ServiceUnavailableException;
+import javax.ws.rs.core.HttpHeaders;
 import org.apache.tika.mime.MimeType;
 import org.apache.tika.mime.MimeTypeException;
 import org.apache.tika.mime.MimeTypes;
@@ -83,7 +84,11 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                 List<Range> ranges = new ArrayList<>();
                 long fileSize = storageIO.getDataFile().getFilesize();
                 try {
-                    String range = di.getRequestHttpHeaders().getHeaderString("Range");
+                    String range = null;
+                    HttpHeaders headers = di.getRequestHttpHeaders();
+                    if (headers != null) {
+                        range = headers.getHeaderString("Range");
+                    }
                     long offset = 0;
                     try {
                         ranges = getRanges(range, fileSize);

From fb8d5597f68cf0219b9111bfb23f2c784f1903af Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 8 Sep 2021 15:17:40 -0400
Subject: [PATCH 03/26] set file size properly for original tabular files #6937

---
 .../edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java    | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
index 0f5ccfadb0a..5537693c670 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
@@ -293,6 +293,8 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
 
                             if ("original".equals(di.getConversionParamValue())) {
                                 logger.fine("stored original of an ingested file requested");
+                                // Get orginal file size before storageIO is reassigned to prevent an NPE.
+                                fileSize = storageIO.getDataFile().getOriginalFileSize();
                                 storageIO = StoredOriginalFile.retreive(storageIO);
                             } else {
                                 // Other format conversions: 

From 809d20b7a15aaa31d9e332d07f54c13894038fc5 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 8 Sep 2021 15:51:19 -0400
Subject: [PATCH 04/26] send content size from range #6937

---
 .../edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
index 5537693c670..c3b3350a355 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
@@ -430,7 +430,7 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                         if ((contentSize = getContentSize(storageIO)) > 0 && ranges.isEmpty()) {
                             logger.fine("Content size (retrieved from the AccessObject): " + contentSize);
                             httpHeaders.add("Content-Length", contentSize);
-                        } else if (ranges.isEmpty()) {
+                        } else if (!ranges.isEmpty()) {
                             // For now we only support a single range.
                             long rangeContentSize = ranges.get(0).getLength();
                             logger.fine("Content size (Range header in use): " + rangeContentSize);

From 29ea2564a96bf68790c2282108922b12971ae897 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 30 Sep 2021 16:52:30 -0400
Subject: [PATCH 05/26] move range check down so storageIO.getSize() works
 #6937

Also start writing real integration tests.

A FIXME now notes that ranges don't currently work well with tabular
files, due to how we store the header.
---
 .../dataverse/api/DownloadInstanceWriter.java | 109 +++++++++-------
 .../edu/harvard/iq/dataverse/api/FilesIT.java | 116 ++++++++++++++++--
 .../edu/harvard/iq/dataverse/api/UtilIT.java  |  16 ++-
 3 files changed, 187 insertions(+), 54 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
index c3b3350a355..c7b22f0e980 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
@@ -81,28 +81,7 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
             StorageIO<DataFile> storageIO = DataAccess.getStorageIO(dataFile, daReq);
 
             if (storageIO != null) {
-                List<Range> ranges = new ArrayList<>();
-                long fileSize = storageIO.getDataFile().getFilesize();
                 try {
-                    String range = null;
-                    HttpHeaders headers = di.getRequestHttpHeaders();
-                    if (headers != null) {
-                        range = headers.getHeaderString("Range");
-                    }
-                    long offset = 0;
-                    try {
-                        ranges = getRanges(range, fileSize);
-                    } catch (Exception ex) {
-                        logger.fine("Exception caught processing Range header: " + ex.getLocalizedMessage());
-                        // The message starts with "Datafile" because otherwise the message is not passed
-                        // to the user due to how WebApplicationExceptionHandler works.
-                        throw new NotFoundException("Datafile download error due to Range header: " + ex.getLocalizedMessage());
-                    }
-                    if (!ranges.isEmpty()) {
-                        // For now we only support a single range.
-                        offset = ranges.get(0).getStart();
-                    }
-                    storageIO.setOffset(offset);
                     storageIO.open();
                 } catch (IOException ioex) {
                     //throw new WebApplicationException(Response.Status.SERVICE_UNAVAILABLE);
@@ -293,8 +272,6 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
 
                             if ("original".equals(di.getConversionParamValue())) {
                                 logger.fine("stored original of an ingested file requested");
-                                // Get orginal file size before storageIO is reassigned to prevent an NPE.
-                                fileSize = storageIO.getDataFile().getOriginalFileSize();
                                 storageIO = StoredOriginalFile.retreive(storageIO);
                             } else {
                                 // Other format conversions: 
@@ -425,7 +402,32 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                         httpHeaders.add("Content-Type", mimeType + "; name=\"" + finalFileName + "\"");
 
                         long contentSize;
+                        // This is hard-coded to false and could probably be removed some day,
+                        // but for now it's illustrative.
                         boolean useChunkedTransfer = false;
+
+                        // User may have requested a range of bytes.
+                        List<Range> ranges = new ArrayList<>();
+                        long fileSize = storageIO.getSize();
+                        String range = null;
+                        HttpHeaders headers = di.getRequestHttpHeaders();
+                        if (headers != null) {
+                            range = headers.getHeaderString("Range");
+                        }
+                        long offset = 0;
+                        try {
+                            ranges = getRanges(range, fileSize);
+                        } catch (Exception ex) {
+                            logger.fine("Exception caught processing Range header: " + ex.getLocalizedMessage());
+                            // The message starts with "Datafile" because otherwise the message is not passed
+                            // to the user due to how WebApplicationExceptionHandler works.
+                            throw new NotFoundException("Datafile download error due to Range header: " + ex.getLocalizedMessage());
+                        }
+                        if (!ranges.isEmpty()) {
+                            // For now we only support a single range.
+                            offset = ranges.get(0).getStart();
+                        }
+                        storageIO.setOffset(offset);
                         //if ((contentSize = getFileSize(di, storageIO.getVarHeader())) > 0) {
                         if ((contentSize = getContentSize(storageIO)) > 0 && ranges.isEmpty()) {
                             logger.fine("Content size (retrieved from the AccessObject): " + contentSize);
@@ -436,6 +438,10 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                             logger.fine("Content size (Range header in use): " + rangeContentSize);
                             httpHeaders.add("Content-Length", rangeContentSize);
                         } else {
+                            // Neither a range request, nor do we know the size. Must be dynamically
+                            // generated, such as a subsetting request.
+                            // Note the chunk lines below are commented out because Payara will see
+                            // that the header is not present and switch into chunk mode.
                             //httpHeaders.add("Transfer-encoding", "chunked");
                             //useChunkedTransfer = true;
                         }
@@ -450,11 +456,15 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                         // any extra content, such as the variable header for the 
                         // subsettable files: 
                         if (storageIO.getVarHeader() != null) {
+                            logger.fine("storageIO.getVarHeader().getBytes().length: " + storageIO.getVarHeader().getBytes().length);
                             if (storageIO.getVarHeader().getBytes().length > 0) {
                                 if (useChunkedTransfer) {
                                     String chunkSizeLine = String.format("%x\r\n", storageIO.getVarHeader().getBytes().length);
                                     outstream.write(chunkSizeLine.getBytes());
                                 }
+                                // FIXME: check if ranges.isEmpty() here. These bytes, the header, is being written
+                                // even if we are only requesting the last bytes of a file. So you see all or part
+                                // of the header, even if you don't request it.
                                 outstream.write(storageIO.getVarHeader().getBytes());
                                 if (useChunkedTransfer) {
                                     outstream.write(chunkClose);
@@ -462,29 +472,42 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                             }
                         }
 
-                        // Default to reading the whole file. We'll count down as we write.
-                        long leftToRead = fileSize;
-                        if (!ranges.isEmpty()) {
-                            // Read a range of bytes instead of the whole file.
-                            // For now we only support a single range.
-                            leftToRead = ranges.get(0).getLength();
-                        }
-                        while ((bufsize = instream.read(bffr)) != -1) {
-                            if (useChunkedTransfer) {
-                                String chunkSizeLine = String.format("%x\r\n", bufsize);
-                                outstream.write(chunkSizeLine.getBytes());
-                            }
-                            if ((leftToRead -= bufsize) > 0) {
-                                // Just do a normal write. Potentially lots to go. Don't break.
+                        // Dynamic streams, etc. Normal operation. No leftToRead.
+                        if (ranges.isEmpty()) {
+                            logger.fine("Normal, non-range request of file id " + dataFile.getId());
+                            while ((bufsize = instream.read(bffr)) != -1) {
+                                if (useChunkedTransfer) {
+                                    String chunkSizeLine = String.format("%x\r\n", bufsize);
+                                    outstream.write(chunkSizeLine.getBytes());
+                                }
                                 outstream.write(bffr, 0, bufsize);
-                            } else {
-                                // Get those last bytes or bytes equal to bufsize. Last one. Then break.
-                                outstream.write(bffr, 0, (int) leftToRead + bufsize);
-                                break;
+                                if (useChunkedTransfer) {
+                                    outstream.write(chunkClose);
+                                }
                             }
-                            if (useChunkedTransfer) {
-                                outstream.write(chunkClose);
+                        } else {
+                            logger.fine("Range request of file id " + dataFile.getId());
+                            // Read a range of bytes instead of the whole file. We'll count down as we write.
+                            // For now we only support a single range.
+                            long leftToRead = ranges.get(0).getLength();
+                            while ((bufsize = instream.read(bffr)) != -1) {
+                                if (useChunkedTransfer) {
+                                    String chunkSizeLine = String.format("%x\r\n", bufsize);
+                                    outstream.write(chunkSizeLine.getBytes());
+                                }
+                                if ((leftToRead -= bufsize) > 0) {
+                                    // Just do a normal write. Potentially lots to go. Don't break.
+                                    outstream.write(bffr, 0, bufsize);
+                                } else {
+                                    // Get those last bytes or bytes equal to bufsize. Last one. Then break.
+                                    outstream.write(bffr, 0, (int) leftToRead + bufsize);
+                                    break;
+                                }
+                                if (useChunkedTransfer) {
+                                    outstream.write(chunkClose);
+                                }
                             }
+
                         }
 
                         if (useChunkedTransfer) {
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
index c824fbf7182..1d13559985b 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
@@ -11,8 +11,12 @@
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.util.BundleUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
+import java.io.File;
+import java.io.IOException;
 import static java.lang.Thread.sleep;
 import java.math.BigDecimal;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.text.MessageFormat;
 import java.util.Arrays;
 import java.util.Collections;
@@ -1589,30 +1593,40 @@ public void test2toNull() {
         Integer fileId = 0;
         fileId = localFile;
         String nullApiToken = null;
+        String nullFormat = null;
+        String nullThumbnail = null;
         String byteRange = null;
         byteRange = "2-";
         // java.lang.IndexOutOfBoundsException: Range [0, 0 + -2) out of bounds for length 32768
-        Response downloadFile = UtilIT.downloadFile(fileId, byteRange, nullApiToken);
+        Response downloadFile = UtilIT.downloadFile(fileId, byteRange, nullFormat, nullThumbnail, nullApiToken);
         downloadFile.prettyPrint();
     }
 
-    @Ignore
+//    @Ignore
     @Test
     public void test0to100() {
         Integer localFile = 16;
+        Integer tabularLocalFile = 156;
         Integer s3file = 11;
         Integer fileId = 0;
         fileId = localFile;
-        fileId = s3file;
+//        fileId = s3file;
+        fileId = tabularLocalFile;
         String nullApiToken = null;
+        String format = null;
+        format = "original";
+        String nullThumbnail = null;
         String byteRange = null;
-        byteRange = "0-9"; // first ten
-        byteRange = "-9"; // last ten
-        byteRange = "9-"; // last ten
-        Response downloadFile = UtilIT.downloadFile(fileId, byteRange, nullApiToken);
+//        byteRange = "0-9"; // first ten
+        byteRange = "0-150"; 
+//        byteRange = "-9"; // last ten
+//        byteRange = "9-"; // last ten
+        Response downloadFile = UtilIT.downloadFile(fileId, byteRange, format, nullThumbnail, nullApiToken);
         downloadFile.prettyPrint();
     }
 
+    // test Range header with thumbnails, etc.
+    
     @Ignore
     @Test
     public void testMultipleRanges() {
@@ -1622,9 +1636,95 @@ public void testMultipleRanges() {
         fileId = localFile;
 //        fileId = s3file;
         String nullApiToken = null;
+        String nullFormat = null;
+        String nullThumbnail = null;
         String byteRange = null;
         byteRange = "0-9,90-99";
-        Response downloadFile = UtilIT.downloadFile(fileId, byteRange, nullApiToken);
+        Response downloadFile = UtilIT.downloadFile(fileId, byteRange, nullFormat, nullThumbnail, nullApiToken);
         downloadFile.prettyPrint();
     }    
+
+    @Test
+    public void testRange() throws IOException {
+
+        Response createUser = UtilIT.createRandomUser();
+        createUser.prettyPrint();
+        String authorUsername = UtilIT.getUsernameFromResponse(createUser);
+        String authorApiToken = UtilIT.getApiTokenFromResponse(createUser);
+
+        Response createDataverse = UtilIT.createRandomDataverse(authorApiToken);
+        createDataverse.prettyPrint();
+        createDataverse.then().assertThat()
+                .statusCode(CREATED.getStatusCode());
+        String dataverseAlias = UtilIT.getAliasFromResponse(createDataverse);
+
+        Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, authorApiToken);
+        createDataset.prettyPrint();
+        createDataset.then().assertThat()
+                .statusCode(CREATED.getStatusCode());
+
+        Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset);
+        String datasetPid = JsonPath.from(createDataset.asString()).getString("data.persistentId");
+
+        Path pathToCsv = Paths.get(java.nio.file.Files.createTempDirectory(null) + File.separator + "data.csv");
+        String contentOfCsv = ""
+                + "name,pounds,species\n"
+                + "Marshall,40,dog\n"
+                + "Tiger,17,cat\n"
+                + "Panther,21,cat\n";
+        java.nio.file.Files.write(pathToCsv, contentOfCsv.getBytes());
+
+        Response uploadFileCsv = UtilIT.uploadFileViaNative(datasetId.toString(), pathToCsv.toString(), authorApiToken);
+        uploadFileCsv.prettyPrint();
+        uploadFileCsv.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body("data.files[0].label", equalTo("data.csv"));
+
+        Integer fileIdCsv = JsonPath.from(uploadFileCsv.body().asString()).getInt("data.files[0].dataFile.id");
+
+        assertTrue("Failed test if Ingest Lock exceeds max duration " + pathToCsv, UtilIT.sleepForLock(datasetId.longValue(), "Ingest", authorApiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION));
+
+        
+        String byteRange = "0-9"; // first ten
+//        byteRange = "0-29"; // first thirty
+//        byteRange = "-40";
+        String format = null;
+//        format = "original";
+//        String nullApiToken = null;
+        Response downloadFile = UtilIT.downloadFile(fileIdCsv, byteRange, format, null, authorApiToken);
+        downloadFile.prettyPrint();
+
+        if (true) return;
+        Response uploadFile = UtilIT.uploadFile(datasetPid, "trees.zip", authorApiToken);
+        uploadFile.prettyPrint();
+        
+//        Response uploadFile = UtilIT.uploadFileViaNative(datasetPid, apiToken, authorApiToken);
+
+        Response getDatasetJson1 = UtilIT.nativeGetUsingPersistentId(datasetPid, authorApiToken);
+        getDatasetJson1.prettyPrint();
+        Long fileIdPng = JsonPath.from(getDatasetJson1.getBody().asString()).getLong("data.latestVersion.files[1].dataFile.id");
+        System.out.println("datafileId: " + fileIdPng);
+        getDatasetJson1.then().assertThat()
+                .statusCode(200);
+
+        String trueOrWidthInPixels = "true";
+        Response getFileThumbnailImageA = UtilIT.getFileThumbnail(fileIdPng.toString(), trueOrWidthInPixels, authorApiToken);
+        getFileThumbnailImageA.then().assertThat()
+                .contentType("image/png")
+                .statusCode(OK.getStatusCode());
+
+        Response publishDataverse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, authorApiToken);
+        publishDataverse.then().assertThat().statusCode(OK.getStatusCode());
+        Response publishDataset = UtilIT.publishDatasetViaNativeApi(datasetPid, "major", authorApiToken);
+        publishDataset.then().assertThat().statusCode(OK.getStatusCode());
+
+        // Yes, you can get a range of bytes from a thumbnail.
+        String imageThumbPixels = "true";
+        Response downloadThumbnail = UtilIT.downloadFile(fileIdPng.intValue(), "0-149", null, imageThumbPixels, authorApiToken);
+        downloadThumbnail.prettyPrint();
+        downloadThumbnail.then().assertThat().statusCode(OK.getStatusCode());
+        
+
+    }
+
 }
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
index a040c3506c3..94b47f250f3 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
@@ -721,14 +721,24 @@ static Response downloadFile(Integer fileId) {
 
     static Response downloadFile(Integer fileId, String apiToken) {
         String nullByteRange = null;
-        return downloadFile(fileId, nullByteRange, apiToken);
+        String nullFormat = null;
+        String nullImageThumb = null;
+        return downloadFile(fileId, nullByteRange, nullFormat, nullImageThumb, apiToken);
     }
 
-    static Response downloadFile(Integer fileId, String byteRange, String apiToken) {
+    static Response downloadFile(Integer fileId, String byteRange, String format, String imageThumb, String apiToken) {
         RequestSpecification requestSpecification = given();
         if (byteRange != null) {
             requestSpecification.header("Range", "bytes=" + byteRange);
         }
+        String optionalFormat = "";
+        if (format != null) {
+            optionalFormat = "&format=" + format;
+        }
+        String optionalImageThumb = "";
+        if (format != null) {
+            optionalImageThumb = "&imageThumb=" + imageThumb;
+        }
         /**
          * Data Access API does not support X-Dataverse-key header -
          * https://github.com/IQSS/dataverse/issues/2662
@@ -736,7 +746,7 @@ static Response downloadFile(Integer fileId, String byteRange, String apiToken)
          * Actually, these days it does. We could switch.
          */
         //.header(API_TOKEN_HTTP_HEADER, apiToken)
-        return requestSpecification.get("/api/access/datafile/" + fileId + "?key=" + apiToken);
+        return requestSpecification.get("/api/access/datafile/" + fileId + "?key=" + apiToken + optionalFormat + optionalImageThumb);
     }
     
     static Response downloadTabularFile(Integer fileId) {

From adab5546bb88be7920bd49520995d4bf8784db40 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Mon, 4 Oct 2021 16:06:02 -0400
Subject: [PATCH 06/26] vary variable header writes by range request #6937

If a range is not requested, write the whole variable header line.
Otherwise, make a reasonable effort to write what we can.
---
 .../dataverse/api/DownloadInstanceWriter.java | 33 ++++++++++++++++---
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
index c7b22f0e980..f13da0bce1f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
@@ -36,6 +36,7 @@
 import java.net.URISyntaxException;
 import java.net.URLEncoder;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 import java.util.logging.Level;
 import java.util.logging.Logger;
@@ -462,10 +463,34 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                                     String chunkSizeLine = String.format("%x\r\n", storageIO.getVarHeader().getBytes().length);
                                     outstream.write(chunkSizeLine.getBytes());
                                 }
-                                // FIXME: check if ranges.isEmpty() here. These bytes, the header, is being written
-                                // even if we are only requesting the last bytes of a file. So you see all or part
-                                // of the header, even if you don't request it.
-                                outstream.write(storageIO.getVarHeader().getBytes());
+                                // If a range is not being requested, let's call that the normal case.
+                                // Write the entire line of variable headers. Later, the rest of the file
+                                // will be written.
+                                if (ranges.isEmpty()) {
+                                    outstream.write(storageIO.getVarHeader().getBytes());
+                                } else {
+                                    // Range requested. We should only write the header if the range specifies the
+                                    // start of the file or if the range includes some of the end of the header.
+                                    // For now we only support a single range.
+                                    Range range1 = ranges.get(0);
+                                    if (range1.getStart() == 0 & range1.getLength() >= storageIO.getVarHeader().getBytes().length) {
+                                        // Does the range requested include the whole length of the variable headers?
+                                        // If so, write those headers, if the range is the start of the file (index 0).
+                                        logger.fine("Writing entire variable header line.");
+                                        outstream.write(storageIO.getVarHeader().getBytes());
+                                    } else if (range1.getStart() == 0 & range1.getLength() < storageIO.getVarHeader().getBytes().length) {
+                                        // Is the user requesting a small range near the front, so small that we can't
+                                        // even write the full line of variable headers? Ok, we'll just write as much
+                                        // as we can, within the range.
+                                        logger.fine("Writing this many bytes of the variable header line: " + range1.getLength());
+                                        outstream.write(Arrays.copyOfRange(storageIO.getVarHeader().getBytes(), 0, (int) range1.getLength()));
+                                    } else {
+                                        // TODO: Consider supporting weird scenarios:
+                                        // - The user is requesting the end or middle of the file and should end up with part or all of the variable headers.
+                                        // - ??
+                                        logger.fine("A range was requested but didn't start with 0. Skip printing of variable headers.");
+                                    }
+                                }
                                 if (useChunkedTransfer) {
                                     outstream.write(chunkClose);
                                 }

From 4724e116a4f04bfd39707141c1d4565dc2502f28 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 5 Oct 2021 16:07:57 -0400
Subject: [PATCH 07/26] get offset working again #6937

Offset stopped working in 29ea256. Before that commit we were relying on
setOffset() being called after open().

Now we can (and do) call setOffset() after open().
---
 .../iq/dataverse/dataaccess/FileAccessIO.java        |  1 -
 .../harvard/iq/dataverse/dataaccess/S3AccessIO.java  |  2 --
 .../harvard/iq/dataverse/dataaccess/StorageIO.java   | 12 ++++++++++--
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java
index 9c55caa3360..bd0549622f0 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java
@@ -509,7 +509,6 @@ public FileInputStream openLocalFileAsInputStream () {
 
         try {
             in = new FileInputStream(getFileSystemPath().toFile());
-            in.skip(this.getOffset());
         } catch (IOException ex) {
             // We don't particularly care what the reason why we have
             // failed to access the file was.
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
index 52bb2cd5895..80fbbd103e7 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
@@ -282,8 +282,6 @@ public InputStream getInputStream() throws IOException {
 
         setChannel(Channels.newChannel(super.getInputStream()));
 
-        super.getInputStream().skip(super.getOffset());
-
         return super.getInputStream();
     }
     
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
index 0387561a075..b0e9648285c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
@@ -391,8 +391,16 @@ public void setSize(long s) {
         size = s;
     }
 
-    public void setOffset(long offset) {
-        this.offset = offset;
+    // open() has already been called. Now we can skip, if need be.
+    public void setOffset(long offset) throws IOException {
+        InputStream inputStream = getInputStream();
+        if (inputStream != null) {
+            inputStream.skip(offset);
+            // The skip has already been done. Why not record it.
+            this.offset = offset;
+        } else {
+            throw new IOException("Could not skip into InputStream because it is null");
+        }
     }
 
     public void setInputStream(InputStream is) {

From f0f08776f14c79862cce6850835122f29154a88a Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 5 Oct 2021 16:23:46 -0400
Subject: [PATCH 08/26] delete old setOffset test (was for pre 4724e1) #6937

---
 .../dataaccess/FileAccessIOTest.java          | 24 -------------------
 1 file changed, 24 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIOTest.java
index ad31e4adee3..286fe8a6595 100644
--- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIOTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIOTest.java
@@ -269,30 +269,6 @@ public void testOpenLocalFileAsInputStream() throws IOException {
         assertNotNull(datasetAccess.openLocalFileAsOutputStream());
     }
 
-    /**
-     * Test of openLocalFileAsInputStream (with an offset) and
-     * openLocalFileAsOutputStream method, of class FileAccessIO.
-     *
-     * @throws java.io.IOException if test is broken
-     */
-    @Test
-    public void testOpenLocalFileAsInputStreamWithOffset() throws IOException {
-        // Start reading a few bytes in.
-        datasetAccess.setOffset(11);
-        BufferedReader br = new BufferedReader(new InputStreamReader(datasetAccess.openLocalFileAsInputStream()));
-        StringBuilder sb = new StringBuilder();
-        String line;
-        while ((line = br.readLine()) != null) {
-            sb.append(line);
-            sb.append('\n');
-        }
-        // Put offset back.
-        datasetAccess.setOffset(0);
-        // Instead of "This is a test string"
-        assertEquals("est string\n", sb.toString());
-        assertNotNull(datasetAccess.openLocalFileAsOutputStream());
-    }
-
     /**
      * Test of getAuxFileAsInputStream method, of class FileAccessIO.
      *

From b02209d168e464049efb2989b487ceb4d5a5ea9a Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 5 Oct 2021 16:25:08 -0400
Subject: [PATCH 09/26] add more offset/range API tests #6937

---
 .../edu/harvard/iq/dataverse/api/FilesIT.java | 130 +++++++++++++-----
 1 file changed, 97 insertions(+), 33 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
index 1d13559985b..ab40ed6118c 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
@@ -1602,7 +1602,7 @@ public void test2toNull() {
         downloadFile.prettyPrint();
     }
 
-//    @Ignore
+    @Ignore
     @Test
     public void test0to100() {
         Integer localFile = 16;
@@ -1648,24 +1648,73 @@ public void testMultipleRanges() {
     public void testRange() throws IOException {
 
         Response createUser = UtilIT.createRandomUser();
-        createUser.prettyPrint();
+//        createUser.prettyPrint();
         String authorUsername = UtilIT.getUsernameFromResponse(createUser);
         String authorApiToken = UtilIT.getApiTokenFromResponse(createUser);
 
         Response createDataverse = UtilIT.createRandomDataverse(authorApiToken);
-        createDataverse.prettyPrint();
+//        createDataverse.prettyPrint();
         createDataverse.then().assertThat()
                 .statusCode(CREATED.getStatusCode());
         String dataverseAlias = UtilIT.getAliasFromResponse(createDataverse);
 
         Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, authorApiToken);
-        createDataset.prettyPrint();
+//        createDataset.prettyPrint();
         createDataset.then().assertThat()
                 .statusCode(CREATED.getStatusCode());
 
         Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset);
         String datasetPid = JsonPath.from(createDataset.asString()).getString("data.persistentId");
 
+        Path pathToTxt = Paths.get(java.nio.file.Files.createTempDirectory(null) + File.separator + "file.txt");
+        String contentOfTxt = ""
+                + "first is the worst\n"
+                + "second is the best\n"
+                + "third is the one with the hairy chest\n";
+        java.nio.file.Files.write(pathToTxt, contentOfTxt.getBytes());
+
+        Response uploadFileTxt = UtilIT.uploadFileViaNative(datasetId.toString(), pathToTxt.toString(), authorApiToken);
+//        uploadFileTxt.prettyPrint();
+        uploadFileTxt.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body("data.files[0].label", equalTo("file.txt"));
+
+        Integer fileIdTxt = JsonPath.from(uploadFileTxt.body().asString()).getInt("data.files[0].dataFile.id");
+
+        // Download the whole file.
+        Response downloadTxtNoArgs = UtilIT.downloadFile(fileIdTxt, null, null, null, authorApiToken);
+        downloadTxtNoArgs.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body(equalTo("first is the worst\n"
+                        + "second is the best\n"
+                        + "third is the one with the hairy chest\n"));
+
+        // Download the first 10 bytes.
+        Response downloadTxtFirst10 = UtilIT.downloadFile(fileIdTxt, "0-9", null, null, authorApiToken);
+        downloadTxtFirst10.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body(equalTo("first is t"));
+
+        // Download the last 6 bytes.
+        Response downloadTxtLast6 = UtilIT.downloadFile(fileIdTxt, "-6", null, null, authorApiToken);
+        downloadTxtLast6.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body(equalTo("chest\n"));
+
+        // Download some bytes from the middle.
+        Response downloadTxtMiddle = UtilIT.downloadFile(fileIdTxt, "09-19", null, null, authorApiToken);
+        downloadTxtMiddle.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body(equalTo("the worst\ns"));
+
+        // Skip the first 10 bytes and download the rest.
+        Response downloadTxtSkipFirst10 = UtilIT.downloadFile(fileIdTxt, "9-", null, null, authorApiToken);
+        downloadTxtSkipFirst10.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body(equalTo("the worst\n"
+                        + "second is the best\n"
+                        + "third is the one with the hairy chest\n"));
+
         Path pathToCsv = Paths.get(java.nio.file.Files.createTempDirectory(null) + File.separator + "data.csv");
         String contentOfCsv = ""
                 + "name,pounds,species\n"
@@ -1675,7 +1724,7 @@ public void testRange() throws IOException {
         java.nio.file.Files.write(pathToCsv, contentOfCsv.getBytes());
 
         Response uploadFileCsv = UtilIT.uploadFileViaNative(datasetId.toString(), pathToCsv.toString(), authorApiToken);
-        uploadFileCsv.prettyPrint();
+//        uploadFileCsv.prettyPrint();
         uploadFileCsv.then().assertThat()
                 .statusCode(OK.getStatusCode())
                 .body("data.files[0].label", equalTo("data.csv"));
@@ -1684,28 +1733,44 @@ public void testRange() throws IOException {
 
         assertTrue("Failed test if Ingest Lock exceeds max duration " + pathToCsv, UtilIT.sleepForLock(datasetId.longValue(), "Ingest", authorApiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION));
 
-        
-        String byteRange = "0-9"; // first ten
-//        byteRange = "0-29"; // first thirty
-//        byteRange = "-40";
-        String format = null;
-//        format = "original";
-//        String nullApiToken = null;
-        Response downloadFile = UtilIT.downloadFile(fileIdCsv, byteRange, format, null, authorApiToken);
-        downloadFile.prettyPrint();
+        // Just the tabular file, not the original, no byte range. Vanilla.
+        Response downloadFileNoArgs = UtilIT.downloadFile(fileIdCsv, null, null, null, authorApiToken);
+        downloadFileNoArgs.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body(equalTo("name	pounds	species\n"
+                        + "\"Marshall\"	40	\"dog\"\n"
+                        + "\"Tiger\"	17	\"cat\"\n"
+                        + "\"Panther\"	21	\"cat\"\n"));
+
+        // Original version of tabular file (CSV in this case).
+        Response downloadFileOrig = UtilIT.downloadFile(fileIdCsv, null, "original", null, authorApiToken);
+        downloadFileOrig.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body(equalTo("name,pounds,species\n"
+                        + "Marshall,40,dog\n"
+                        + "Tiger,17,cat\n"
+                        + "Panther,21,cat\n"));
+
+        // first ten bytes
+        Response downloadFirstTen = UtilIT.downloadFile(fileIdCsv, "0-9", "original", null, authorApiToken);
+        downloadFirstTen.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body(equalTo("name,pound"));
 
-        if (true) return;
-        Response uploadFile = UtilIT.uploadFile(datasetPid, "trees.zip", authorApiToken);
-        uploadFile.prettyPrint();
-        
-//        Response uploadFile = UtilIT.uploadFileViaNative(datasetPid, apiToken, authorApiToken);
+        // last ten bytes
+        Response downloadLastTen = UtilIT.downloadFile(fileIdCsv, "-10", "original", null, authorApiToken);
+        downloadLastTen.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body(equalTo("er,21,cat\n"));
+
+        String pathToZipWithImage = "scripts/search/data/binary/trees.zip";
+        Response uploadFileZipWithImage = UtilIT.uploadFileViaNative(datasetId.toString(), pathToZipWithImage, authorApiToken);
+//        uploadFileZipWithImage.prettyPrint();
+        uploadFileZipWithImage.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body("data.files[0].label", equalTo("trees.png"));
 
-        Response getDatasetJson1 = UtilIT.nativeGetUsingPersistentId(datasetPid, authorApiToken);
-        getDatasetJson1.prettyPrint();
-        Long fileIdPng = JsonPath.from(getDatasetJson1.getBody().asString()).getLong("data.latestVersion.files[1].dataFile.id");
-        System.out.println("datafileId: " + fileIdPng);
-        getDatasetJson1.then().assertThat()
-                .statusCode(200);
+        Integer fileIdPng = JsonPath.from(uploadFileZipWithImage.body().asString()).getInt("data.files[0].dataFile.id");
 
         String trueOrWidthInPixels = "true";
         Response getFileThumbnailImageA = UtilIT.getFileThumbnail(fileIdPng.toString(), trueOrWidthInPixels, authorApiToken);
@@ -1713,17 +1778,16 @@ public void testRange() throws IOException {
                 .contentType("image/png")
                 .statusCode(OK.getStatusCode());
 
-        Response publishDataverse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, authorApiToken);
-        publishDataverse.then().assertThat().statusCode(OK.getStatusCode());
-        Response publishDataset = UtilIT.publishDatasetViaNativeApi(datasetPid, "major", authorApiToken);
-        publishDataset.then().assertThat().statusCode(OK.getStatusCode());
-
         // Yes, you can get a range of bytes from a thumbnail.
         String imageThumbPixels = "true";
-        Response downloadThumbnail = UtilIT.downloadFile(fileIdPng.intValue(), "0-149", null, imageThumbPixels, authorApiToken);
-        downloadThumbnail.prettyPrint();
+        Response downloadThumbnail = UtilIT.downloadFile(fileIdPng, "0-149", null, imageThumbPixels, authorApiToken);
+//        downloadThumbnail.prettyPrint();
         downloadThumbnail.then().assertThat().statusCode(OK.getStatusCode());
-        
+
+//        Response publishDataverse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, authorApiToken);
+//        publishDataverse.then().assertThat().statusCode(OK.getStatusCode());
+//        Response publishDataset = UtilIT.publishDatasetViaNativeApi(datasetPid, "major", authorApiToken);
+//        publishDataset.then().assertThat().statusCode(OK.getStatusCode());
 
     }
 

From 3d237a803e52d662130436f91bc8f7bf30360417 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 5 Oct 2021 16:35:32 -0400
Subject: [PATCH 10/26] add release note for HTTP Range header #6937

---
 doc/release-notes/6937-range.md | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 doc/release-notes/6937-range.md

diff --git a/doc/release-notes/6937-range.md b/doc/release-notes/6937-range.md
new file mode 100644
index 00000000000..2ce3e30b9aa
--- /dev/null
+++ b/doc/release-notes/6937-range.md
@@ -0,0 +1,10 @@
+### Support for HTTP "Range" Header for Partial File Downloads
+
+Dataverse now supports the HTTP "Range" header, which allows users to download parts of a file. Here are some examples:
+
+- `bytes=0-9` gets the first 10 bytes.
+- `bytes=10-19` gets 20 bytes from the middle.
+- `bytes=-10` gets the last 10 bytes.
+- `bytes=9-` gets all bytes except the first 10.
+
+Only a single range is supported. For more information, see the [Data Access API](https://guides.dataverse.org/en/latest/api/dataaccess.html) section of the API Guide.

From a57e6298efc71bd246e0a50809e90debdb4636b8 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 6 Oct 2021 10:07:26 -0400
Subject: [PATCH 11/26] try to make reviewdog happy by replacing tabs with \t
 #6937

---
 src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
index ab40ed6118c..fc9ae2b4fdc 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
@@ -1737,10 +1737,10 @@ public void testRange() throws IOException {
         Response downloadFileNoArgs = UtilIT.downloadFile(fileIdCsv, null, null, null, authorApiToken);
         downloadFileNoArgs.then().assertThat()
                 .statusCode(OK.getStatusCode())
-                .body(equalTo("name	pounds	species\n"
-                        + "\"Marshall\"	40	\"dog\"\n"
-                        + "\"Tiger\"	17	\"cat\"\n"
-                        + "\"Panther\"	21	\"cat\"\n"));
+                .body(equalTo("name\tpounds\tspecies\n"
+                        + "\"Marshall\"\t40\t\"dog\"\n"
+                        + "\"Tiger\"\t17\t\"cat\"\n"
+                        + "\"Panther\"\t21\t\"cat\"\n"));
 
         // Original version of tabular file (CSV in this case).
         Response downloadFileOrig = UtilIT.downloadFile(fileIdCsv, null, "original", null, authorApiToken);

From e6233d8814f8905ebd08f5e604d0181e0766e546 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 6 Oct 2021 16:13:05 -0400
Subject: [PATCH 12/26] remove redundant ranges.isEmpty() check #6937

Just above `ranges` is initialized to `new ArrayList<>()`.
---
 .../dataverse/api/DownloadInstanceWriter.java | 51 +++++++++----------
 1 file changed, 24 insertions(+), 27 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
index f13da0bce1f..ee27d14e409 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
@@ -696,36 +696,33 @@ public List<Range> getRanges(String range, long fileSize) {
                 throw new RuntimeException("The format is bytes=<range-start>-<range-end> where start and end are optional.");
             }
 
-            if (ranges.isEmpty()) {
-                // The 6 is to remove "bytes="
-
-                for (String part : range.substring(6).split(",")) {
-
-                    long start = getRangeStart(part);
-                    long end = getRangeEnd(part);
-
-                    if (start == -1) {
-                        // start does not exist. Base start off of how many bytes from end.
-                        start = fileSize - end;
-                        end = fileSize - 1;
-                    } else if (end == -1 || end > fileSize - 1) {
-                        // Set end when it doesn't exist.
-                        // Also, automatically set end to size of file if end is beyond
-                        // the file size (rather than throwing an error).
-                        end = fileSize - 1;
-                    }
-
-                    if (start > end) {
-                        throw new RuntimeException("Start is larger than end.");
-                    }
+            // The 6 is to remove "bytes="
+            for (String part : range.substring(6).split(",")) {
+
+                long start = getRangeStart(part);
+                long end = getRangeEnd(part);
+
+                if (start == -1) {
+                    // start does not exist. Base start off of how many bytes from end.
+                    start = fileSize - end;
+                    end = fileSize - 1;
+                } else if (end == -1 || end > fileSize - 1) {
+                    // Set end when it doesn't exist.
+                    // Also, automatically set end to size of file if end is beyond
+                    // the file size (rather than throwing an error).
+                    end = fileSize - 1;
+                }
 
-                    if (ranges.size() < 1) {
-                        ranges.add(new Range(start, end));
-                    } else {
-                        throw new RuntimeException("Only one range is allowed.");
-                    }
+                if (start > end) {
+                    throw new RuntimeException("Start is larger than end.");
+                }
 
+                if (ranges.size() < 1) {
+                    ranges.add(new Range(start, end));
+                } else {
+                    throw new RuntimeException("Only one range is allowed.");
                 }
+
             }
         }
 

From ed4ec86ad3ba3ecfd86a47f4a1209f0b38bc6041 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Thu, 7 Oct 2021 12:23:24 -0400
Subject: [PATCH 13/26] rearranged the handling of the "range requst on a
 tabular file" special case. (#6937)

---
 .../dataverse/api/DownloadInstanceWriter.java | 85 +++++++++++++------
 1 file changed, 57 insertions(+), 28 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
index ee27d14e409..0ea67d5bccf 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
@@ -416,6 +416,10 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                             range = headers.getHeaderString("Range");
                         }
                         long offset = 0;
+                        long leftToRead = -1L; 
+                        // Moving the "left to read" var. here; - since we may need 
+                        // to start counting our range bytes outside the main .write()
+                        // loop, if it's a tabular file with a header. 
                         try {
                             ranges = getRanges(range, fileSize);
                         } catch (Exception ex) {
@@ -427,9 +431,9 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                         if (!ranges.isEmpty()) {
                             // For now we only support a single range.
                             offset = ranges.get(0).getStart();
+                            leftToRead = ranges.get(0).getLength();
                         }
-                        storageIO.setOffset(offset);
-                        //if ((contentSize = getFileSize(di, storageIO.getVarHeader())) > 0) {
+                        
                         if ((contentSize = getContentSize(storageIO)) > 0 && ranges.isEmpty()) {
                             logger.fine("Content size (retrieved from the AccessObject): " + contentSize);
                             httpHeaders.add("Content-Length", contentSize);
@@ -452,6 +456,7 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                         int bufsize;
                         byte[] bffr = new byte[4 * 8192];
                         byte[] chunkClose = "\r\n".getBytes();
+                                                
 
                         // before writing out any bytes from the input stream, flush
                         // any extra content, such as the variable header for the 
@@ -459,36 +464,52 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                         if (storageIO.getVarHeader() != null) {
                             logger.fine("storageIO.getVarHeader().getBytes().length: " + storageIO.getVarHeader().getBytes().length);
                             if (storageIO.getVarHeader().getBytes().length > 0) {
-                                if (useChunkedTransfer) {
-                                    String chunkSizeLine = String.format("%x\r\n", storageIO.getVarHeader().getBytes().length);
-                                    outstream.write(chunkSizeLine.getBytes());
-                                }
+                                //if (useChunkedTransfer) {
+                                //    String chunkSizeLine = String.format("%x\r\n", storageIO.getVarHeader().getBytes().length);
+                                //    outstream.write(chunkSizeLine.getBytes());
+                                //}
                                 // If a range is not being requested, let's call that the normal case.
                                 // Write the entire line of variable headers. Later, the rest of the file
                                 // will be written.
                                 if (ranges.isEmpty()) {
+                                    logger.info("writing the entire variable header");
                                     outstream.write(storageIO.getVarHeader().getBytes());
                                 } else {
-                                    // Range requested. We should only write the header if the range specifies the
-                                    // start of the file or if the range includes some of the end of the header.
-                                    // For now we only support a single range.
-                                    Range range1 = ranges.get(0);
-                                    if (range1.getStart() == 0 & range1.getLength() >= storageIO.getVarHeader().getBytes().length) {
-                                        // Does the range requested include the whole length of the variable headers?
-                                        // If so, write those headers, if the range is the start of the file (index 0).
-                                        logger.fine("Writing entire variable header line.");
-                                        outstream.write(storageIO.getVarHeader().getBytes());
-                                    } else if (range1.getStart() == 0 & range1.getLength() < storageIO.getVarHeader().getBytes().length) {
-                                        // Is the user requesting a small range near the front, so small that we can't
-                                        // even write the full line of variable headers? Ok, we'll just write as much
-                                        // as we can, within the range.
-                                        logger.fine("Writing this many bytes of the variable header line: " + range1.getLength());
-                                        outstream.write(Arrays.copyOfRange(storageIO.getVarHeader().getBytes(), 0, (int) range1.getLength()));
+                                    // Range requested. Since the output stream of a 
+                                    // tabular file is made up of the varHeader and the body of 
+                                    // the physical file, we should assume that the requested 
+                                    // range may span any portion of the combined stream.
+                                    // Thus we may or may not to write the header, or a 
+                                    // portion thereof. 
+                                    logger.info("Skipping the variable header completely.");
+                                    int headerLength = storageIO.getVarHeader().getBytes().length;
+                                    if (offset >= headerLength) {
+                                        // We can skip the entire header. 
+                                        // All we need to do is adjust the byte offset 
+                                        // in the physical file; the number of bytes
+                                        // left to write stays unchanged, since we haven't
+                                        // written anything.
+                                        offset -= storageIO.getVarHeader().getBytes().length;
                                     } else {
-                                        // TODO: Consider supporting weird scenarios:
-                                        // - The user is requesting the end or middle of the file and should end up with part or all of the variable headers.
-                                        // - ??
-                                        logger.fine("A range was requested but didn't start with 0. Skip printing of variable headers.");
+                                        // We need to write some portion of the header; 
+                                        // Once we are done, we may or may not still have 
+                                        // some bytes left to write from the main physical file.
+                                        if (offset + leftToRead <= headerLength) {
+                                            // This is a more straightforward case - we just need to 
+                                            // write a portion of the header, and then we are done!
+                                            logger.info("Writing this many bytes of the variable header line: " + leftToRead);
+                                            outstream.write(Arrays.copyOfRange(storageIO.getVarHeader().getBytes(), (int)offset, (int)offset + (int)leftToRead));
+                                            // set "left to read" to zero, indicating that we are done:
+                                            leftToRead = 0; 
+                                        } else {
+                                            // write the requested portion of the header:
+                                            logger.info("Writing this many bytes of the variable header line: " + (headerLength - offset));
+                                            outstream.write(Arrays.copyOfRange(storageIO.getVarHeader().getBytes(), (int)offset, headerLength));
+                                            // and adjust the file offset and remaining number of bytes accordingly: 
+                                            leftToRead -= (headerLength - offset);
+                                            offset = 0;
+                                        }
+                                        
                                     }
                                 }
                                 if (useChunkedTransfer) {
@@ -510,11 +531,19 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                                     outstream.write(chunkClose);
                                 }
                             }
-                        } else {
-                            logger.fine("Range request of file id " + dataFile.getId());
+                        } else if (leftToRead > 0) {
+                            // This is a range request, and we still have bytes to read 
+                            // (for a tabular file, we may have already written enough
+                            // bytes from the variable header!)
+                            storageIO.setOffset(offset);
+                            // Thinking about it, we could just do instream.skip(offset) 
+                            // here... But I would like to have this offset functionality 
+                            // in StorageIO, for any future cases where we may not 
+                            // be able to do that on the stream directly -- L.A.
+                            logger.info("Range request of file id " + dataFile.getId());
                             // Read a range of bytes instead of the whole file. We'll count down as we write.
                             // For now we only support a single range.
-                            long leftToRead = ranges.get(0).getLength();
+                            //long leftToRead = ranges.get(0).getLength();
                             while ((bufsize = instream.read(bffr)) != -1) {
                                 if (useChunkedTransfer) {
                                     String chunkSizeLine = String.format("%x\r\n", bufsize);

From 3e8b193b3fa2971380dfd4be91e0cee33d639e4e Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Thu, 7 Oct 2021 12:41:26 -0400
Subject: [PATCH 14/26] minor/cosmetic change (#6937)

---
 .../edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
index 0ea67d5bccf..92bf78ae03e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
@@ -489,7 +489,7 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                                         // in the physical file; the number of bytes
                                         // left to write stays unchanged, since we haven't
                                         // written anything.
-                                        offset -= storageIO.getVarHeader().getBytes().length;
+                                        offset -= headerLength;
                                     } else {
                                         // We need to write some portion of the header; 
                                         // Once we are done, we may or may not still have 
@@ -539,7 +539,7 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                             // Thinking about it, we could just do instream.skip(offset) 
                             // here... But I would like to have this offset functionality 
                             // in StorageIO, for any future cases where we may not 
-                            // be able to do that on the stream directly -- L.A.
+                            // be able to do that on the stream directly (?) -- L.A.
                             logger.info("Range request of file id " + dataFile.getId());
                             // Read a range of bytes instead of the whole file. We'll count down as we write.
                             // For now we only support a single range.

From 12900b739cdc470dbc66b40a74072dd04d841e90 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Thu, 7 Oct 2021 12:45:23 -0400
Subject: [PATCH 15/26] couple more cosmetic changes in comments/logging (the
 .info() logging will need to be muted of course) (#6937)

---
 .../edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
index 92bf78ae03e..3baeb099c40 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
@@ -479,9 +479,8 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                                     // tabular file is made up of the varHeader and the body of 
                                     // the physical file, we should assume that the requested 
                                     // range may span any portion of the combined stream.
-                                    // Thus we may or may not to write the header, or a 
+                                    // Thus we may or may not have to write the header, or a 
                                     // portion thereof. 
-                                    logger.info("Skipping the variable header completely.");
                                     int headerLength = storageIO.getVarHeader().getBytes().length;
                                     if (offset >= headerLength) {
                                         // We can skip the entire header. 
@@ -489,6 +488,7 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                                         // in the physical file; the number of bytes
                                         // left to write stays unchanged, since we haven't
                                         // written anything.
+                                        logger.info("Skipping the variable header completely.");
                                         offset -= headerLength;
                                     } else {
                                         // We need to write some portion of the header; 

From f6b4a06097f5840a3693a6f1ad39825208a0d03d Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 7 Oct 2021 13:38:32 -0400
Subject: [PATCH 16/26] add more range API tests #6937

downloadTabLast10 is commented out because it doesn't yet pass.
---
 .../edu/harvard/iq/dataverse/api/FilesIT.java | 50 ++++++++++++++++---
 1 file changed, 44 insertions(+), 6 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
index fc9ae2b4fdc..c951c5667c5 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
@@ -1742,9 +1742,41 @@ public void testRange() throws IOException {
                         + "\"Tiger\"\t17\t\"cat\"\n"
                         + "\"Panther\"\t21\t\"cat\"\n"));
 
+        // first 10 bytes of tabular format
+        Response downloadTabFirstTen = UtilIT.downloadFile(fileIdCsv, "0-9", null, null, authorApiToken);
+        downloadTabFirstTen.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body(equalTo("name\tpound"));
+
+        // first 30 bytes of tabular format
+        Response downloadTabFirst30 = UtilIT.downloadFile(fileIdCsv, "0-29", null, null, authorApiToken);
+        downloadTabFirst30.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body(equalTo("name\tpounds\tspecies\n"
+                        + "\"Marshall\""));
+
+//        // FIXME: This should be something like "er 21 cat" (the last line) like downloadOrigLastTen
+//        // FIXME: but right now it's showing "17 cat" (the second to last line).
+//        // last 10 bytes of tabular format
+//        Response downloadTabLast10 = UtilIT.downloadFile(fileIdCsv, "-10", null, null, authorApiToken);
+//        downloadTabLast10.then().assertThat()
+//                .statusCode(OK.getStatusCode())
+//                .body(equalTo("er\"\t21\t\"cat\"\n"));
+
+        Response downloadTabMiddleBytesHeader = UtilIT.downloadFile(fileIdCsv, "1-7", null, null, authorApiToken);
+        downloadTabMiddleBytesHeader.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body(equalTo("ame\tpou"));
+
+        Response downloadTabMiddleBytesBody = UtilIT.downloadFile(fileIdCsv, "31-43", null, null, authorApiToken);
+        downloadTabMiddleBytesBody.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body(equalTo("40\t\"dog\"\n"
+                        + "\"Tig"));
+
         // Original version of tabular file (CSV in this case).
-        Response downloadFileOrig = UtilIT.downloadFile(fileIdCsv, null, "original", null, authorApiToken);
-        downloadFileOrig.then().assertThat()
+        Response downloadOrig = UtilIT.downloadFile(fileIdCsv, null, "original", null, authorApiToken);
+        downloadOrig.then().assertThat()
                 .statusCode(OK.getStatusCode())
                 .body(equalTo("name,pounds,species\n"
                         + "Marshall,40,dog\n"
@@ -1752,17 +1784,23 @@ public void testRange() throws IOException {
                         + "Panther,21,cat\n"));
 
         // first ten bytes
-        Response downloadFirstTen = UtilIT.downloadFile(fileIdCsv, "0-9", "original", null, authorApiToken);
-        downloadFirstTen.then().assertThat()
+        Response downloadOrigFirstTen = UtilIT.downloadFile(fileIdCsv, "0-9", "original", null, authorApiToken);
+        downloadOrigFirstTen.then().assertThat()
                 .statusCode(OK.getStatusCode())
                 .body(equalTo("name,pound"));
 
         // last ten bytes
-        Response downloadLastTen = UtilIT.downloadFile(fileIdCsv, "-10", "original", null, authorApiToken);
-        downloadLastTen.then().assertThat()
+        Response downloadOrigLastTen = UtilIT.downloadFile(fileIdCsv, "-10", "original", null, authorApiToken);
+        downloadOrigLastTen.then().assertThat()
                 .statusCode(OK.getStatusCode())
                 .body(equalTo("er,21,cat\n"));
 
+        // middle bytes
+        Response downloadOrigMiddle = UtilIT.downloadFile(fileIdCsv, "29-39", "original", null, authorApiToken);
+        downloadOrigMiddle.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body(equalTo("40,dog\nTige"));
+
         String pathToZipWithImage = "scripts/search/data/binary/trees.zip";
         Response uploadFileZipWithImage = UtilIT.uploadFileViaNative(datasetId.toString(), pathToZipWithImage, authorApiToken);
 //        uploadFileZipWithImage.prettyPrint();

From 90786ce41cfecfadc730ef1b2a04aa67805bb377 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 7 Oct 2021 14:05:04 -0400
Subject: [PATCH 17/26] fix byte count in docs and release note #6937

---
 doc/release-notes/6937-range.md             | 2 +-
 doc/sphinx-guides/source/api/dataaccess.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/release-notes/6937-range.md b/doc/release-notes/6937-range.md
index 2ce3e30b9aa..2c592d0c251 100644
--- a/doc/release-notes/6937-range.md
+++ b/doc/release-notes/6937-range.md
@@ -3,7 +3,7 @@
 Dataverse now supports the HTTP "Range" header, which allows users to download parts of a file. Here are some examples:
 
 - `bytes=0-9` gets the first 10 bytes.
-- `bytes=10-19` gets 20 bytes from the middle.
+- `bytes=10-19` gets 10 bytes from the middle.
 - `bytes=-10` gets the last 10 bytes.
 - `bytes=9-` gets all bytes except the first 10.
 
diff --git a/doc/sphinx-guides/source/api/dataaccess.rst b/doc/sphinx-guides/source/api/dataaccess.rst
index 3973428a5a3..3f99740f267 100755
--- a/doc/sphinx-guides/source/api/dataaccess.rst
+++ b/doc/sphinx-guides/source/api/dataaccess.rst
@@ -140,7 +140,7 @@ Header          Description
 Range           Download a specified byte range. Examples:
 
                 - ``bytes=0-9`` gets the first 10 bytes.
-                - ``bytes=10-19`` gets 20 bytes from the middle.
+                - ``bytes=10-19`` gets 10 bytes from the middle.
                 - ``bytes=-10`` gets the last 10 bytes.
                 - ``bytes=9-`` gets all bytes except the first 10.
 

From a62da5dcfe740630b1060ba84697734505f41e55 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Thu, 7 Oct 2021 23:00:11 -0400
Subject: [PATCH 18/26] as agreed - some cleanup: removing the old, unused
 legacy code for creating chunked-encoded stream. (#6937)

---
 .../dataverse/api/DownloadInstanceWriter.java | 37 +------------------
 1 file changed, 1 insertion(+), 36 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
index 3baeb099c40..47a17e475d9 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
@@ -403,10 +403,7 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                         httpHeaders.add("Content-Type", mimeType + "; name=\"" + finalFileName + "\"");
 
                         long contentSize;
-                        // This is hard-coded to false and could probably be removed some day,
-                        // but for now it's illustrative.
-                        boolean useChunkedTransfer = false;
-
+                        
                         // User may have requested a range of bytes.
                         List<Range> ranges = new ArrayList<>();
                         long fileSize = storageIO.getSize();
@@ -445,18 +442,12 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                         } else {
                             // Neither a range request, nor do we know the size. Must be dynamically
                             // generated, such as a subsetting request.
-                            // Note the chunk lines below are commented out because Payara will see
-                            // that the header is not present and switch into chunk mode.
-                            //httpHeaders.add("Transfer-encoding", "chunked");
-                            //useChunkedTransfer = true;
                         }
 
                         // (the httpHeaders map must be modified *before* writing any
                         // data in the output stream!)
                         int bufsize;
                         byte[] bffr = new byte[4 * 8192];
-                        byte[] chunkClose = "\r\n".getBytes();
-                                                
 
                         // before writing out any bytes from the input stream, flush
                         // any extra content, such as the variable header for the 
@@ -464,10 +455,6 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                         if (storageIO.getVarHeader() != null) {
                             logger.fine("storageIO.getVarHeader().getBytes().length: " + storageIO.getVarHeader().getBytes().length);
                             if (storageIO.getVarHeader().getBytes().length > 0) {
-                                //if (useChunkedTransfer) {
-                                //    String chunkSizeLine = String.format("%x\r\n", storageIO.getVarHeader().getBytes().length);
-                                //    outstream.write(chunkSizeLine.getBytes());
-                                //}
                                 // If a range is not being requested, let's call that the normal case.
                                 // Write the entire line of variable headers. Later, the rest of the file
                                 // will be written.
@@ -512,9 +499,6 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                                         
                                     }
                                 }
-                                if (useChunkedTransfer) {
-                                    outstream.write(chunkClose);
-                                }
                             }
                         }
 
@@ -522,14 +506,7 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                         if (ranges.isEmpty()) {
                             logger.fine("Normal, non-range request of file id " + dataFile.getId());
                             while ((bufsize = instream.read(bffr)) != -1) {
-                                if (useChunkedTransfer) {
-                                    String chunkSizeLine = String.format("%x\r\n", bufsize);
-                                    outstream.write(chunkSizeLine.getBytes());
-                                }
                                 outstream.write(bffr, 0, bufsize);
-                                if (useChunkedTransfer) {
-                                    outstream.write(chunkClose);
-                                }
                             }
                         } else if (leftToRead > 0) {
                             // This is a range request, and we still have bytes to read 
@@ -545,10 +522,6 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                             // For now we only support a single range.
                             //long leftToRead = ranges.get(0).getLength();
                             while ((bufsize = instream.read(bffr)) != -1) {
-                                if (useChunkedTransfer) {
-                                    String chunkSizeLine = String.format("%x\r\n", bufsize);
-                                    outstream.write(chunkSizeLine.getBytes());
-                                }
                                 if ((leftToRead -= bufsize) > 0) {
                                     // Just do a normal write. Potentially lots to go. Don't break.
                                     outstream.write(bffr, 0, bufsize);
@@ -557,18 +530,10 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                                     outstream.write(bffr, 0, (int) leftToRead + bufsize);
                                     break;
                                 }
-                                if (useChunkedTransfer) {
-                                    outstream.write(chunkClose);
-                                }
                             }
 
                         }
 
-                        if (useChunkedTransfer) {
-                            String chunkClosing = "0\r\n\r\n";
-                            outstream.write(chunkClosing.getBytes());
-                        }
-
                         logger.fine("di conversion param: " + di.getConversionParam() + ", value: " + di.getConversionParamValue());
 
                         // Downloads of thumbnail images (scaled down, low-res versions of graphic image files) and 

From 65f1704686395158010c50dcd1a329d9da81979a Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Thu, 7 Oct 2021 23:36:14 -0400
Subject: [PATCH 19/26] final (hopefully) rearrangement of case logic (#6937)

---
 .../dataverse/api/DownloadInstanceWriter.java | 78 ++++++++++---------
 1 file changed, 43 insertions(+), 35 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
index 47a17e475d9..1b5aeaef30c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
@@ -404,44 +404,52 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
 
                         long contentSize;
                         
-                        // User may have requested a range of bytes.
+                        // User may have requested a rangeHeader of bytes.
+                        // Ranges are only supported when the size of the content 
+                        // stream is known (i.e., it's not a dynamically generated 
+                        // stream. 
                         List<Range> ranges = new ArrayList<>();
-                        long fileSize = storageIO.getSize();
-                        String range = null;
+                        String rangeHeader = null;
                         HttpHeaders headers = di.getRequestHttpHeaders();
                         if (headers != null) {
-                            range = headers.getHeaderString("Range");
+                            rangeHeader = headers.getHeaderString("Range");
                         }
                         long offset = 0;
                         long leftToRead = -1L; 
                         // Moving the "left to read" var. here; - since we may need 
-                        // to start counting our range bytes outside the main .write()
+                        // to start counting our rangeHeader bytes outside the main .write()
                         // loop, if it's a tabular file with a header. 
-                        try {
-                            ranges = getRanges(range, fileSize);
-                        } catch (Exception ex) {
-                            logger.fine("Exception caught processing Range header: " + ex.getLocalizedMessage());
-                            // The message starts with "Datafile" because otherwise the message is not passed
-                            // to the user due to how WebApplicationExceptionHandler works.
-                            throw new NotFoundException("Datafile download error due to Range header: " + ex.getLocalizedMessage());
-                        }
-                        if (!ranges.isEmpty()) {
-                            // For now we only support a single range.
-                            offset = ranges.get(0).getStart();
-                            leftToRead = ranges.get(0).getLength();
-                        }
                         
-                        if ((contentSize = getContentSize(storageIO)) > 0 && ranges.isEmpty()) {
-                            logger.fine("Content size (retrieved from the AccessObject): " + contentSize);
-                            httpHeaders.add("Content-Length", contentSize);
-                        } else if (!ranges.isEmpty()) {
-                            // For now we only support a single range.
-                            long rangeContentSize = ranges.get(0).getLength();
-                            logger.fine("Content size (Range header in use): " + rangeContentSize);
-                            httpHeaders.add("Content-Length", rangeContentSize);
+                        if ((contentSize = getContentSize(storageIO)) > 0) {
+                            try {
+                                ranges = getRanges(rangeHeader, contentSize);
+                            } catch (Exception ex) {
+                                logger.fine("Exception caught processing Range header: " + ex.getLocalizedMessage());
+                                // The message starts with "Datafile" because otherwise the message is not passed
+                                // to the user due to how WebApplicationExceptionHandler works.
+                                throw new NotFoundException("Datafile download error due to Range header: " + ex.getLocalizedMessage());
+                            }
+                            
+                            if (ranges.isEmpty()) {
+                                logger.fine("Content size (retrieved from the AccessObject): " + contentSize);
+                                httpHeaders.add("Content-Length", contentSize);
+                            } else  {
+                                // For now we only support a single rangeHeader.
+                                long rangeContentSize = ranges.get(0).getLength();
+                                logger.fine("Content size (Range header in use): " + rangeContentSize);
+                                httpHeaders.add("Content-Length", rangeContentSize);
+                                
+                                offset = ranges.get(0).getStart();
+                                leftToRead = rangeContentSize;
+                            }
                         } else {
-                            // Neither a range request, nor do we know the size. Must be dynamically
-                            // generated, such as a subsetting request.
+                            // Content size unknown, must be a dynamically
+                            // generated stream, such as a subsetting request.
+                            // We do NOT want to support rangeHeader requests on such streams:
+                            if (rangeHeader != null) {
+                                throw new NotFoundException("Range headers are not supported on dynamically-generated content, such as tabular subsetting.");
+                            }
+                            
                         }
 
                         // (the httpHeaders map must be modified *before* writing any
@@ -455,7 +463,7 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                         if (storageIO.getVarHeader() != null) {
                             logger.fine("storageIO.getVarHeader().getBytes().length: " + storageIO.getVarHeader().getBytes().length);
                             if (storageIO.getVarHeader().getBytes().length > 0) {
-                                // If a range is not being requested, let's call that the normal case.
+                                // If a rangeHeader is not being requested, let's call that the normal case.
                                 // Write the entire line of variable headers. Later, the rest of the file
                                 // will be written.
                                 if (ranges.isEmpty()) {
@@ -465,7 +473,7 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                                     // Range requested. Since the output stream of a 
                                     // tabular file is made up of the varHeader and the body of 
                                     // the physical file, we should assume that the requested 
-                                    // range may span any portion of the combined stream.
+                                    // rangeHeader may span any portion of the combined stream.
                                     // Thus we may or may not have to write the header, or a 
                                     // portion thereof. 
                                     int headerLength = storageIO.getVarHeader().getBytes().length;
@@ -509,7 +517,7 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                                 outstream.write(bffr, 0, bufsize);
                             }
                         } else if (leftToRead > 0) {
-                            // This is a range request, and we still have bytes to read 
+                            // This is a rangeHeader request, and we still have bytes to read 
                             // (for a tabular file, we may have already written enough
                             // bytes from the variable header!)
                             storageIO.setOffset(offset);
@@ -518,8 +526,8 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                             // in StorageIO, for any future cases where we may not 
                             // be able to do that on the stream directly (?) -- L.A.
                             logger.info("Range request of file id " + dataFile.getId());
-                            // Read a range of bytes instead of the whole file. We'll count down as we write.
-                            // For now we only support a single range.
+                            // Read a rangeHeader of bytes instead of the whole file. We'll count down as we write.
+                            // For now we only support a single rangeHeader.
                             //long leftToRead = ranges.get(0).getLength();
                             while ((bufsize = instream.read(bffr)) != -1) {
                                 if ((leftToRead -= bufsize) > 0) {
@@ -678,14 +686,14 @@ private long getFileSize(DownloadInstance di, String extraHeader) {
      */
     public List<Range> getRanges(String range, long fileSize) {
         // Inspired by https://gist.github.com/davinkevin/b97e39d7ce89198774b4
-        // via https://stackoverflow.com/questions/28427339/how-to-implement-http-byte-range-requests-in-spring-mvc/28479001#28479001
+        // via https://stackoverflow.com/questions/28427339/how-to-implement-http-byte-rangeHeader-requests-in-spring-mvc/28479001#28479001
         List<Range> ranges = new ArrayList<>();
 
         if (range != null) {
             logger.fine("Range header supplied: " + range);
 
             // Technically this regex supports multiple ranges.
-            // Below we have a check to enforce a single range.
+            // Below we have a check to enforce a single rangeHeader.
             if (!range.matches("^bytes=\\d*-\\d*(,\\d*-\\d*)*$")) {
                 throw new RuntimeException("The format is bytes=<range-start>-<range-end> where start and end are optional.");
             }

From cea1a140f9fa6beceb9e8148f33529e3b71950d0 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 8 Oct 2021 10:47:11 -0400
Subject: [PATCH 20/26] enable "last bytes from tabular" test (code now works)
 #6937

---
 .../java/edu/harvard/iq/dataverse/api/FilesIT.java   | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
index c951c5667c5..66d720da411 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
@@ -1755,13 +1755,11 @@ public void testRange() throws IOException {
                 .body(equalTo("name\tpounds\tspecies\n"
                         + "\"Marshall\""));
 
-//        // FIXME: This should be something like "er 21 cat" (the last line) like downloadOrigLastTen
-//        // FIXME: but right now it's showing "17 cat" (the second to last line).
-//        // last 10 bytes of tabular format
-//        Response downloadTabLast10 = UtilIT.downloadFile(fileIdCsv, "-10", null, null, authorApiToken);
-//        downloadTabLast10.then().assertThat()
-//                .statusCode(OK.getStatusCode())
-//                .body(equalTo("er\"\t21\t\"cat\"\n"));
+        // last 16 bytes of tabular format
+        Response downloadTabLast16 = UtilIT.downloadFile(fileIdCsv, "-16", null, null, authorApiToken);
+        downloadTabLast16.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body(equalTo("nther\"\t21\t\"cat\"\n"));
 
         Response downloadTabMiddleBytesHeader = UtilIT.downloadFile(fileIdCsv, "1-7", null, null, authorApiToken);
         downloadTabMiddleBytesHeader.then().assertThat()

From bdec095c2dbf9d576c71f2f7829a4b0f05f09c44 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 8 Oct 2021 10:48:03 -0400
Subject: [PATCH 21/26] reduce logging for range code #6937

---
 .../iq/dataverse/api/DownloadInstanceWriter.java       | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
index 1b5aeaef30c..4bf4bb92125 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
@@ -467,7 +467,7 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                                 // Write the entire line of variable headers. Later, the rest of the file
                                 // will be written.
                                 if (ranges.isEmpty()) {
-                                    logger.info("writing the entire variable header");
+                                    logger.fine("writing the entire variable header");
                                     outstream.write(storageIO.getVarHeader().getBytes());
                                 } else {
                                     // Range requested. Since the output stream of a 
@@ -483,7 +483,7 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                                         // in the physical file; the number of bytes
                                         // left to write stays unchanged, since we haven't
                                         // written anything.
-                                        logger.info("Skipping the variable header completely.");
+                                        logger.fine("Skipping the variable header completely.");
                                         offset -= headerLength;
                                     } else {
                                         // We need to write some portion of the header; 
@@ -492,13 +492,13 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                                         if (offset + leftToRead <= headerLength) {
                                             // This is a more straightforward case - we just need to 
                                             // write a portion of the header, and then we are done!
-                                            logger.info("Writing this many bytes of the variable header line: " + leftToRead);
+                                            logger.fine("Writing this many bytes of the variable header line: " + leftToRead);
                                             outstream.write(Arrays.copyOfRange(storageIO.getVarHeader().getBytes(), (int)offset, (int)offset + (int)leftToRead));
                                             // set "left to read" to zero, indicating that we are done:
                                             leftToRead = 0; 
                                         } else {
                                             // write the requested portion of the header:
-                                            logger.info("Writing this many bytes of the variable header line: " + (headerLength - offset));
+                                            logger.fine("Writing this many bytes of the variable header line: " + (headerLength - offset));
                                             outstream.write(Arrays.copyOfRange(storageIO.getVarHeader().getBytes(), (int)offset, headerLength));
                                             // and adjust the file offset and remaining number of bytes accordingly: 
                                             leftToRead -= (headerLength - offset);
@@ -525,7 +525,7 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                             // here... But I would like to have this offset functionality 
                             // in StorageIO, for any future cases where we may not 
                             // be able to do that on the stream directly (?) -- L.A.
-                            logger.info("Range request of file id " + dataFile.getId());
+                            logger.fine("Range request of file id " + dataFile.getId());
                             // Read a rangeHeader of bytes instead of the whole file. We'll count down as we write.
                             // For now we only support a single rangeHeader.
                             //long leftToRead = ranges.get(0).getLength();

From b688d94c8439dd70ce0aeefbd5124d753c798159 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 8 Oct 2021 12:09:00 -0400
Subject: [PATCH 22/26] more tests and cleanup #6937

---
 .../dataverse/api/DownloadInstanceWriter.java |  3 +-
 .../api/DownloadInstanceWriterTest.java       | 13 ++++
 .../edu/harvard/iq/dataverse/api/FilesIT.java | 69 +++----------------
 3 files changed, 24 insertions(+), 61 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
index 4bf4bb92125..d64a6065bba 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
@@ -457,7 +457,7 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                         int bufsize;
                         byte[] bffr = new byte[4 * 8192];
 
-                        // before writing out any bytes from the input stream, flush
+                        // Before writing out any bytes from the input stream, write
                         // any extra content, such as the variable header for the 
                         // subsettable files: 
                         if (storageIO.getVarHeader() != null) {
@@ -528,7 +528,6 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                             logger.fine("Range request of file id " + dataFile.getId());
                             // Read a rangeHeader of bytes instead of the whole file. We'll count down as we write.
                             // For now we only support a single rangeHeader.
-                            //long leftToRead = ranges.get(0).getLength();
                             while ((bufsize = instream.read(bffr)) != -1) {
                                 if ((leftToRead -= bufsize) > 0) {
                                     // Just do a normal write. Potentially lots to go. Don't break.
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriterTest.java b/src/test/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriterTest.java
index a59bc497c8b..56a94883e16 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriterTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriterTest.java
@@ -75,6 +75,19 @@ public void testGetRangeInvalidStartLargerThanEnd() {
         assertNotNull(expectedException);
     }
 
+    // Attempt to get invalid range (multiple ranges).
+    @Test
+    public void testGetRangeInvalidMultipleRanges() {
+        Exception expectedException = null;
+        try {
+            List<Range> ranges = diw.getRanges("bytes=0-9,90-99", 100);
+        } catch (Exception ex) {
+            System.out.println("exeption: " + ex);
+            expectedException = ex;
+        }
+        assertNotNull(expectedException);
+    }
+
     // Test "junk" instead of "bytes=0-10"
     @Test
     public void testGetRangeInvalidJunk() {
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
index 66d720da411..d76294f7a28 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
@@ -1585,65 +1585,6 @@ private void msgt(String m){
         dashes(); msg(m); dashes();
     }
 
-    @Ignore
-    @Test
-    public void test2toNull() {
-        Integer localFile = 15;
-        Integer s3file = 16;
-        Integer fileId = 0;
-        fileId = localFile;
-        String nullApiToken = null;
-        String nullFormat = null;
-        String nullThumbnail = null;
-        String byteRange = null;
-        byteRange = "2-";
-        // java.lang.IndexOutOfBoundsException: Range [0, 0 + -2) out of bounds for length 32768
-        Response downloadFile = UtilIT.downloadFile(fileId, byteRange, nullFormat, nullThumbnail, nullApiToken);
-        downloadFile.prettyPrint();
-    }
-
-    @Ignore
-    @Test
-    public void test0to100() {
-        Integer localFile = 16;
-        Integer tabularLocalFile = 156;
-        Integer s3file = 11;
-        Integer fileId = 0;
-        fileId = localFile;
-//        fileId = s3file;
-        fileId = tabularLocalFile;
-        String nullApiToken = null;
-        String format = null;
-        format = "original";
-        String nullThumbnail = null;
-        String byteRange = null;
-//        byteRange = "0-9"; // first ten
-        byteRange = "0-150"; 
-//        byteRange = "-9"; // last ten
-//        byteRange = "9-"; // last ten
-        Response downloadFile = UtilIT.downloadFile(fileId, byteRange, format, nullThumbnail, nullApiToken);
-        downloadFile.prettyPrint();
-    }
-
-    // test Range header with thumbnails, etc.
-    
-    @Ignore
-    @Test
-    public void testMultipleRanges() {
-        Integer localFile = 16;
-        Integer s3file = 11;
-        Integer fileId = 0;
-        fileId = localFile;
-//        fileId = s3file;
-        String nullApiToken = null;
-        String nullFormat = null;
-        String nullThumbnail = null;
-        String byteRange = null;
-        byteRange = "0-9,90-99";
-        Response downloadFile = UtilIT.downloadFile(fileId, byteRange, nullFormat, nullThumbnail, nullApiToken);
-        downloadFile.prettyPrint();
-    }    
-
     @Test
     public void testRange() throws IOException {
 
@@ -1820,6 +1761,16 @@ public void testRange() throws IOException {
 //        downloadThumbnail.prettyPrint();
         downloadThumbnail.then().assertThat().statusCode(OK.getStatusCode());
 
+        Response multipleRangesNotSupported = UtilIT.downloadFile(fileIdTxt, "0-9,20-29", null, null, authorApiToken);
+        // Datafile download error due to Range header: Only one range is allowed.
+//        multipleRangesNotSupported.prettyPrint();
+        multipleRangesNotSupported.then().assertThat().statusCode(NOT_FOUND.getStatusCode());
+
+        Response startLargerThanEndError = UtilIT.downloadFile(fileIdTxt, "20-10", null, null, authorApiToken);
+        // Datafile download error due to Range header: Start is larger than end.
+        startLargerThanEndError.prettyPrint();
+        startLargerThanEndError.then().assertThat().statusCode(NOT_FOUND.getStatusCode());
+
 //        Response publishDataverse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, authorApiToken);
 //        publishDataverse.then().assertThat().statusCode(OK.getStatusCode());
 //        Response publishDataset = UtilIT.publishDatasetViaNativeApi(datasetPid, "major", authorApiToken);

From eba6563854dba8698836d22c2142997910939b5b Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 14 Oct 2021 16:40:56 -0400
Subject: [PATCH 23/26] add curl example for Range header #6937

---
 doc/sphinx-guides/source/api/dataaccess.rst | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/doc/sphinx-guides/source/api/dataaccess.rst b/doc/sphinx-guides/source/api/dataaccess.rst
index 3f99740f267..c22b1d8c442 100755
--- a/doc/sphinx-guides/source/api/dataaccess.rst
+++ b/doc/sphinx-guides/source/api/dataaccess.rst
@@ -147,6 +147,25 @@ Range           Download a specified byte range. Examples:
                 Only a single range is supported. The "If-Range" header is not supported. For more on the "Range" header, see https://developer.mozilla.org/en-US/docs/Web/HTTP/Range_requests
 ==============  ===========
 
+Examples
+~~~~~~~~
+
+A curl example of using the ``Range`` header to download the first 10 bytes of a file using its file id (database id):
+
+.. code-block:: bash
+
+  export SERVER_URL=https://demo.dataverse.org
+  export FILE_ID=42
+  export RANGE=0-9
+
+  curl -H "Range:bytes=$RANGE" $SERVER_URL/api/access/datafile/$FILE_ID
+
+The fully expanded example above (without environment variables) looks like this:
+
+.. code-block:: bash
+
+  curl -H "Range:bytes=0-9" https://demo.dataverse.org/api/access/datafile/42
+
 Multiple File ("bundle") download
 ---------------------------------
 

From 3024d6766725c6eab8f39215fc3bbad5939ced34 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 15 Oct 2021 11:19:23 -0400
Subject: [PATCH 24/26] clean up range header error handling #6937

Most importantly, this commit introduces using status code 416
(Range Not Satisfiable) when the range is invalid.
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Range
says, "If the ranges are invalid, the server returns the 416 Range Not
Satisfiable error."

I also rearranged the logic around testing for a single range. Now it
fails fast and reports that only one range is supported rather than
potentially showing an error about the range being beyond the file size,
for example.

I also changed the error message
"Start is larger than end" to
"Start is larger than end or size of file" to more accurately reflect
what the check is doing.

Finally, I did some cleanup and added additional tests.
---
 .../dataverse/api/DownloadInstanceWriter.java | 23 ++++++++--------
 .../api/DownloadInstanceWriterTest.java       | 27 ++++++++++++++++---
 .../edu/harvard/iq/dataverse/api/FilesIT.java | 16 +++++++----
 3 files changed, 46 insertions(+), 20 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
index d64a6065bba..80b3f988953 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
@@ -41,6 +41,7 @@
 import java.util.logging.Level;
 import java.util.logging.Logger;
 import javax.inject.Inject;
+import javax.ws.rs.ClientErrorException;
 import javax.ws.rs.NotFoundException;
 import javax.ws.rs.RedirectionException;
 import javax.ws.rs.ServiceUnavailableException;
@@ -425,9 +426,7 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                                 ranges = getRanges(rangeHeader, contentSize);
                             } catch (Exception ex) {
                                 logger.fine("Exception caught processing Range header: " + ex.getLocalizedMessage());
-                                // The message starts with "Datafile" because otherwise the message is not passed
-                                // to the user due to how WebApplicationExceptionHandler works.
-                                throw new NotFoundException("Datafile download error due to Range header: " + ex.getLocalizedMessage());
+                                throw new ClientErrorException("Error due to Range header: " + ex.getLocalizedMessage(), Response.Status.REQUESTED_RANGE_NOT_SATISFIABLE);
                             }
                             
                             if (ranges.isEmpty()) {
@@ -692,13 +691,19 @@ public List<Range> getRanges(String range, long fileSize) {
             logger.fine("Range header supplied: " + range);
 
             // Technically this regex supports multiple ranges.
-            // Below we have a check to enforce a single rangeHeader.
+            // Below we have a check to enforce a single range.
             if (!range.matches("^bytes=\\d*-\\d*(,\\d*-\\d*)*$")) {
                 throw new RuntimeException("The format is bytes=<range-start>-<range-end> where start and end are optional.");
             }
 
             // The 6 is to remove "bytes="
-            for (String part : range.substring(6).split(",")) {
+            String[] parts = range.substring(6).split(",");
+            if (parts.length > 1) {
+                // Only allow a single range.
+                throw new RuntimeException("Only one range is allowed.");
+            }
+            // This loop is here in case we ever want to support multiple ranges.
+            for (String part : parts) {
 
                 long start = getRangeStart(part);
                 long end = getRangeEnd(part);
@@ -715,14 +720,10 @@ public List<Range> getRanges(String range, long fileSize) {
                 }
 
                 if (start > end) {
-                    throw new RuntimeException("Start is larger than end.");
+                    throw new RuntimeException("Start is larger than end or size of file.");
                 }
 
-                if (ranges.size() < 1) {
-                    ranges.add(new Range(start, end));
-                } else {
-                    throw new RuntimeException("Only one range is allowed.");
-                }
+                ranges.add(new Range(start, end));
 
             }
         }
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriterTest.java b/src/test/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriterTest.java
index 56a94883e16..6de52951077 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriterTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriterTest.java
@@ -69,7 +69,8 @@ public void testGetRangeInvalidStartLargerThanEnd() {
         try {
             List<Range> ranges = diw.getRanges("bytes=20-10", 100);
         } catch (Exception ex) {
-            System.out.println("exeption: " + ex);
+            // "Start is larger than end or size of file."
+            System.out.println("exception: " + ex);
             expectedException = ex;
         }
         assertNotNull(expectedException);
@@ -82,7 +83,23 @@ public void testGetRangeInvalidMultipleRanges() {
         try {
             List<Range> ranges = diw.getRanges("bytes=0-9,90-99", 100);
         } catch (Exception ex) {
-            System.out.println("exeption: " + ex);
+            // "Only one range is allowed."
+            System.out.println("exception: " + ex);
+            expectedException = ex;
+        }
+        assertNotNull(expectedException);
+    }
+
+    // Attempt to get invalid range (multiple ranges, beyond file size).
+    @Test
+    public void testGetRangeInvalidMultipleRangesBeyondFileSize() {
+        Exception expectedException = null;
+        try {
+            List<Range> ranges = diw.getRanges("bytes=0-9,90-99", 40);
+        } catch (Exception ex) {
+            // "Only one range is allowed."
+            // We report the multiple ranges error before reporting the "beyond filesize" error.
+            System.out.println("exception: " + ex);
             expectedException = ex;
         }
         assertNotNull(expectedException);
@@ -95,7 +112,8 @@ public void testGetRangeInvalidJunk() {
         try {
             List<Range> ranges = diw.getRanges("junk", 100);
         } catch (Exception ex) {
-            System.out.println("exeption: " + ex);
+            // "The format is bytes=<range-start>-<range-end> where start and end are optional."
+            System.out.println("exception: " + ex);
             expectedException = ex;
         }
         assertNotNull(expectedException);
@@ -120,7 +138,8 @@ public void testGetRanges0to0and90toNull() {
             assertEquals(99, ranges.get(1).getEnd());
             assertEquals(10, ranges.get(1).getLength());
         } catch (Exception ex) {
-            System.out.println("exeption: " + ex);
+            // Only one range is allowed.
+            System.out.println("exception: " + ex);
             expectedException = ex;
         }
         assertNotNull(expectedException);
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
index d76294f7a28..b82514ce083 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
@@ -31,6 +31,7 @@
 import static javax.ws.rs.core.Response.Status.NOT_FOUND;
 import static javax.ws.rs.core.Response.Status.NO_CONTENT;
 import static javax.ws.rs.core.Response.Status.OK;
+import static javax.ws.rs.core.Response.Status.REQUESTED_RANGE_NOT_SATISFIABLE;
 import static javax.ws.rs.core.Response.Status.UNAUTHORIZED;
 import static junit.framework.Assert.assertEquals;
 import org.hamcrest.CoreMatchers;
@@ -1762,14 +1763,19 @@ public void testRange() throws IOException {
         downloadThumbnail.then().assertThat().statusCode(OK.getStatusCode());
 
         Response multipleRangesNotSupported = UtilIT.downloadFile(fileIdTxt, "0-9,20-29", null, null, authorApiToken);
-        // Datafile download error due to Range header: Only one range is allowed.
-//        multipleRangesNotSupported.prettyPrint();
-        multipleRangesNotSupported.then().assertThat().statusCode(NOT_FOUND.getStatusCode());
+        // "Error due to Range header: Only one range is allowed."
+        multipleRangesNotSupported.prettyPrint();
+        multipleRangesNotSupported.then().assertThat().statusCode(REQUESTED_RANGE_NOT_SATISFIABLE.getStatusCode());
 
         Response startLargerThanEndError = UtilIT.downloadFile(fileIdTxt, "20-10", null, null, authorApiToken);
-        // Datafile download error due to Range header: Start is larger than end.
+        // "Error due to Range header: Start is larger than end or size of file."
         startLargerThanEndError.prettyPrint();
-        startLargerThanEndError.then().assertThat().statusCode(NOT_FOUND.getStatusCode());
+        startLargerThanEndError.then().assertThat().statusCode(REQUESTED_RANGE_NOT_SATISFIABLE.getStatusCode());
+
+        Response rangeBeyondFileSize = UtilIT.downloadFile(fileIdTxt, "88888-99999", null, null, authorApiToken);
+        // "Error due to Range header: Start is larger than end or size of file."
+        rangeBeyondFileSize.prettyPrint();
+        rangeBeyondFileSize.then().assertThat().statusCode(REQUESTED_RANGE_NOT_SATISFIABLE.getStatusCode());
 
 //        Response publishDataverse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, authorApiToken);
 //        publishDataverse.then().assertThat().statusCode(OK.getStatusCode());

From 22d5ffb89188deab4b3ee3a89c1e5420ec5ea9b6 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 22 Oct 2021 16:25:46 -0400
Subject: [PATCH 25/26] move logic to getLength per code review #6937

---
 .../java/edu/harvard/iq/dataverse/dataaccess/Range.java     | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/Range.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/Range.java
index e07d6aaae94..2021219b1a9 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/Range.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/Range.java
@@ -6,13 +6,10 @@ public class Range {
     private final long start;
     // Used to calculate the length.
     private final long end;
-    // Used to determine when to stop reading.
-    private final long length;
 
     public Range(long start, long end) {
         this.start = start;
         this.end = end;
-        this.length = end - start + 1;
     }
 
     public long getStart() {
@@ -23,8 +20,9 @@ public long getEnd() {
         return end;
     }
 
+    // Used to determine when to stop reading.
     public long getLength() {
-        return length;
+        return end - start + 1;
     }
 
 }

From 07ebf9a431880ac304bb75799bfd1afb77502184 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Mon, 8 Nov 2021 08:56:09 -0500
Subject: [PATCH 26/26] specify 5.9 as version #6937

---
 doc/release-notes/6937-range.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/release-notes/6937-range.md b/doc/release-notes/6937-range.md
index 2c592d0c251..94a12ae704c 100644
--- a/doc/release-notes/6937-range.md
+++ b/doc/release-notes/6937-range.md
@@ -7,4 +7,4 @@ Dataverse now supports the HTTP "Range" header, which allows users to download p
 - `bytes=-10` gets the last 10 bytes.
 - `bytes=9-` gets all bytes except the first 10.
 
-Only a single range is supported. For more information, see the [Data Access API](https://guides.dataverse.org/en/latest/api/dataaccess.html) section of the API Guide.
+Only a single range is supported. For more information, see the [Data Access API](https://guides.dataverse.org/en/5.9/api/dataaccess.html) section of the API Guide.