diff --git a/rust/parquet/src/lib.rs b/rust/parquet/src/lib.rs index 8d23e89c3d0..07c72adeccb 100644 --- a/rust/parquet/src/lib.rs +++ b/rust/parquet/src/lib.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#![feature(bufreader_seek_relative)] +#![feature(seek_convenience)] #![feature(specialization)] #![allow(dead_code)] #![allow(non_camel_case_types)] diff --git a/rust/parquet/src/util/io.rs b/rust/parquet/src/util/io.rs index b36a710722f..3da95e411d3 100644 --- a/rust/parquet/src/util/io.rs +++ b/rust/parquet/src/util/io.rs @@ -47,8 +47,10 @@ pub struct FileSource { impl FileSource { /// Creates new file reader with start and length from a file handle pub fn new(fd: &R, start: u64, length: usize) -> Self { + let reader = BufReader::new(fd.try_clone().unwrap()); + Self { - reader: Mutex::new(BufReader::new(fd.try_clone().unwrap())), + reader: Mutex::new(reader), start, end: start + length as u64, } @@ -65,7 +67,14 @@ impl Read for FileSource { let bytes_to_read = cmp::min(buf.len(), (self.end - self.start) as usize); let buf = &mut buf[0..bytes_to_read]; - reader.seek(SeekFrom::Start(self.start as u64))?; + let pos = reader.stream_position()?; + let seek_offset = self.start as i64 - pos as i64; + if seek_offset != 0 { + // BufReader::seek will discard its internal buffer on every seek. + // Using seek_relative will retain the buffer if the seek position + // lands within the buffer bounds. + reader.seek_relative(seek_offset)?; + } let res = reader.read(buf); if let Ok(bytes_read) = res { self.start += bytes_read as u64;