From b2eb4d906ec1d8e832fe9f642eeed55dcc073b60 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Wed, 25 Oct 2023 11:40:34 +0800 Subject: [PATCH 01/46] Save work Signed-off-by: Xuanwo --- core/src/layers/complete.rs | 12 ++++++------ core/src/raw/oio/read/into_seekable_read_by_range.rs | 10 ++++++---- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/core/src/layers/complete.rs b/core/src/layers/complete.rs index 82e8066e4424..36502fff6cfa 100644 --- a/core/src/layers/complete.rs +++ b/core/src/layers/complete.rs @@ -116,10 +116,10 @@ use crate::*; pub struct CompleteLayer; impl Layer for CompleteLayer { - type LayeredAccessor = CompleteReaderAccessor; + type LayeredAccessor = CompleteAccessor; fn layer(&self, inner: A) -> Self::LayeredAccessor { - CompleteReaderAccessor { + CompleteAccessor { meta: inner.info(), inner: Arc::new(inner), } @@ -127,18 +127,18 @@ impl Layer for CompleteLayer { } /// Provide complete wrapper for backend. -pub struct CompleteReaderAccessor { +pub struct CompleteAccessor { meta: AccessorInfo, inner: Arc, } -impl Debug for CompleteReaderAccessor { +impl Debug for CompleteAccessor { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { self.inner.fmt(f) } } -impl CompleteReaderAccessor { +impl CompleteAccessor { fn new_unsupported_error(&self, op: impl Into<&'static str>) -> Error { let scheme = self.meta.scheme(); let op = op.into(); @@ -351,7 +351,7 @@ impl CompleteReaderAccessor { } #[async_trait] -impl LayeredAccessor for CompleteReaderAccessor { +impl LayeredAccessor for CompleteAccessor { type Inner = A; type Reader = CompleteReader; type BlockingReader = CompleteReader; diff --git a/core/src/raw/oio/read/into_seekable_read_by_range.rs b/core/src/raw/oio/read/into_seekable_read_by_range.rs index 26ac62ba79d9..5b0c28823c91 100644 --- a/core/src/raw/oio/read/into_seekable_read_by_range.rs +++ b/core/src/raw/oio/read/into_seekable_read_by_range.rs @@ -46,17 +46,17 @@ use crate::*; pub fn into_seekable_read_by_range( acc: Arc, path: &str, - reader: R, - offset: u64, - size: u64, + op: OpRead, ) -> ByRangeSeekableReader { ByRangeSeekableReader { acc, path: path.to_string(), + op, + offset, size, cur: 0, - state: State::Reading(reader), + state: State::::Idle, last_seek_pos: None, } } @@ -65,6 +65,7 @@ pub fn into_seekable_read_by_range( pub struct ByRangeSeekableReader { acc: Arc, path: String, + op: OpRead, offset: u64, size: u64, @@ -81,6 +82,7 @@ pub struct ByRangeSeekableReader { enum State { Idle, + Stating(BoxFuture<'static, Result>), Sending(BoxFuture<'static, Result<(RpRead, R)>>), Reading(R), } From 3556f64151e833085334389f4e6eae0d56aea034 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Wed, 25 Oct 2023 17:30:32 +0800 Subject: [PATCH 02/46] refactor by range Signed-off-by: Xuanwo --- core/src/layers/complete.rs | 49 +-- .../oio/read/into_seekable_read_by_range.rs | 396 +++++++++++++----- core/src/raw/ops.rs | 17 + 3 files changed, 323 insertions(+), 139 deletions(-) diff --git a/core/src/layers/complete.rs b/core/src/layers/complete.rs index 36502fff6cfa..c0fe7b4809ff 100644 --- a/core/src/layers/complete.rs +++ b/core/src/layers/complete.rs @@ -162,9 +162,7 @@ impl CompleteAccessor { let seekable = capability.read_can_seek; let streamable = capability.read_can_next; - let range = args.range(); - let (rp, r) = self.inner.read(path, args).await?; - let content_length = rp.metadata().content_length(); + let (rp, r) = self.inner.read(path, args.clone()).await?; match (seekable, streamable) { (true, true) => Ok((rp, CompleteReader::AlreadyComplete(r))), @@ -173,24 +171,7 @@ impl CompleteAccessor { Ok((rp, CompleteReader::NeedStreamable(r))) } _ => { - let (offset, size) = match (range.offset(), range.size()) { - (Some(offset), _) => (offset, content_length), - (None, None) => (0, content_length), - (None, Some(size)) => { - // TODO: we can read content range to calculate - // the total content length. - let om = self.inner.stat(path, OpStat::new()).await?.into_metadata(); - let total_size = om.content_length(); - let (offset, size) = if size > total_size { - (0, total_size) - } else { - (total_size - size, size) - }; - - (offset, size) - } - }; - let r = oio::into_seekable_read_by_range(self.inner.clone(), path, r, offset, size); + let r = oio::into_seekable_read_by_range(self.inner.clone(), path, args); if streamable { Ok((rp, CompleteReader::NeedSeekable(r))) @@ -215,9 +196,7 @@ impl CompleteAccessor { let seekable = capability.read_can_seek; let streamable = capability.read_can_next; - let range = args.range(); - let (rp, r) = self.inner.blocking_read(path, args)?; - let content_length = rp.metadata().content_length(); + let (rp, r) = self.inner.blocking_read(path, args.clone())?; match (seekable, streamable) { (true, true) => Ok((rp, CompleteReader::AlreadyComplete(r))), @@ -226,27 +205,7 @@ impl CompleteAccessor { Ok((rp, CompleteReader::NeedStreamable(r))) } _ => { - let (offset, size) = match (range.offset(), range.size()) { - (Some(offset), _) => (offset, content_length), - (None, None) => (0, content_length), - (None, Some(size)) => { - // TODO: we can read content range to calculate - // the total content length. - let om = self - .inner - .blocking_stat(path, OpStat::new())? - .into_metadata(); - let total_size = om.content_length(); - let (offset, size) = if size > total_size { - (0, total_size) - } else { - (total_size - size, size) - }; - - (offset, size) - } - }; - let r = oio::into_seekable_read_by_range(self.inner.clone(), path, r, offset, size); + let r = oio::into_seekable_read_by_range(self.inner.clone(), path, args); if streamable { Ok((rp, CompleteReader::NeedSeekable(r))) diff --git a/core/src/raw/oio/read/into_seekable_read_by_range.rs b/core/src/raw/oio/read/into_seekable_read_by_range.rs index 5b0c28823c91..2cfb0c2de8e5 100644 --- a/core/src/raw/oio/read/into_seekable_read_by_range.rs +++ b/core/src/raw/oio/read/into_seekable_read_by_range.rs @@ -48,9 +48,15 @@ pub fn into_seekable_read_by_range( path: &str, op: OpRead, ) -> ByRangeSeekableReader { + // Normalize range like `..` into `0..` to make sure offset is valid. + let (offset, size) = match (op.range().offset(), op.range().size()) { + (None, None) => (Some(0), None), + v => v, + }; + ByRangeSeekableReader { acc, - path: path.to_string(), + path: Arc::new(path.to_string()), op, offset, @@ -64,11 +70,11 @@ pub fn into_seekable_read_by_range( /// ByRangeReader that can do seek on non-seekable reader. pub struct ByRangeSeekableReader { acc: Arc, - path: String, + path: Arc, op: OpRead, - offset: u64, - size: u64, + offset: Option, + size: Option, cur: u64, state: State, @@ -82,9 +88,9 @@ pub struct ByRangeSeekableReader { enum State { Idle, - Stating(BoxFuture<'static, Result>), - Sending(BoxFuture<'static, Result<(RpRead, R)>>), - Reading(R), + SendStat(BoxFuture<'static, Result>), + SendRead(BoxFuture<'static, Result<(RpRead, R)>>), + Read(R), } /// Safety: State will only be accessed under &mut. @@ -94,30 +100,49 @@ impl ByRangeSeekableReader where A: Accessor, { - /// calculate the seek position. - /// - /// This operation will not update the `self.cur`. - fn seek_pos(&self, pos: SeekFrom) -> Result { - if let Some(last_pos) = self.last_seek_pos { - return Ok(last_pos); - } + /// Fill current reader's range by total_size. + fn fill_range(&mut self, total_size: u64) -> Result<()> { + (self.offset, self.size) = match (self.offset, self.size) { + (None, Some(size)) => { + if size > total_size { + return Err(Error::new( + ErrorKind::InvalidInput, + "read to a negative or overflowing position is invalid", + )); + } - let (base, amt) = match pos { - SeekFrom::Start(n) => (0, n as i64), - SeekFrom::End(n) => (self.size as i64, n), - SeekFrom::Current(n) => (self.cur as i64, n), - }; + (Some(total_size - size), Some(size)) + } + (Some(offset), None) => { + // It's valid for reader to seek to a position that out of the content length. + // We should return `Ok(0)` instead of an error at this case to align fs behavior. + let size = total_size.checked_sub(offset).unwrap_or_default(); - let n = match base.checked_add(amt) { - Some(n) if n >= 0 => n as u64, - _ => { - return Err(Error::new( - ErrorKind::InvalidInput, - "invalid seek to a negative or overflowing position", - )) + (Some(offset), Some(size)) + } + (Some(offset), Some(size)) => (Some(offset), Some(size)), + (None, None) => { + unreachable!("fill_range should not reach this case after normalization") } }; - Ok(n) + + Ok(()) + } + + /// Calculate the current range, maybe sent as next read request. + /// + /// # Panics + /// + /// Offset must be normalized before calling this function. + /// + /// - `..` should be transformed into `0..` + /// - `..size` should be transformed into `(total-size)..total`. + fn calculate_range(&self) -> BytesRange { + let offset = self + .offset + .expect("offset must be set before calculating range"); + + BytesRange::new(Some(offset + self.cur), self.size.map(|v| v - self.cur)) } } @@ -129,13 +154,37 @@ where fn read_future(&self) -> BoxFuture<'static, Result<(RpRead, R)>> { let acc = self.acc.clone(); let path = self.path.clone(); - let op = OpRead::default().with_range(BytesRange::new( - Some(self.offset + self.cur), - Some(self.size - self.cur), - )); + + let mut op = self.op.clone(); + // cur != 0 means we have read some data out, we should convert + // the op into deterministic to avoid ETag changes. + if self.cur != 0 { + op = op.into_deterministic(); + } + // Alter OpRead with correct calculated range. + op = op.with_range(self.calculate_range()); Box::pin(async move { acc.read(&path, op).await }) } + + fn stat_future(&self) -> BoxFuture<'static, Result> { + let acc = self.acc.clone(); + let path = self.path.clone(); + + // Handle if-match and if-none-match correctly. + let mut args = OpStat::default(); + // TODO: stat should support range to check if ETag matches. + if self.op.range().is_full() { + if let Some(v) = self.op.if_match() { + args = args.with_if_match(v); + } + if let Some(v) = self.op.if_none_match() { + args = args.with_if_none_match(v); + } + } + + Box::pin(async move { acc.stat(&path, args).await }) + } } impl ByRangeSeekableReader @@ -146,13 +195,37 @@ where fn read_action(&self) -> Result<(RpRead, R)> { let acc = self.acc.clone(); let path = self.path.clone(); - let op = OpRead::default().with_range(BytesRange::new( - Some(self.offset + self.cur), - Some(self.size - self.cur), - )); + + let mut op = self.op.clone(); + // cur != 0 means we have read some data out, we should convert + // the op into deterministic to avoid ETag changes. + if self.cur != 0 { + op = op.into_deterministic(); + } + // Alter OpRead with correct calculated range. + op = op.with_range(self.calculate_range()); acc.blocking_read(&path, op) } + + fn stat_action(&self) -> Result { + let acc = self.acc.clone(); + let path = self.path.clone(); + + // Handle if-match and if-none-match correctly. + let mut args = OpStat::default(); + // TODO: stat should support range to check if ETag matches. + if self.op.range().is_full() { + if let Some(v) = self.op.if_match() { + args = args.with_if_match(v); + } + if let Some(v) = self.op.if_none_match() { + args = args.with_if_none_match(v); + } + } + + acc.blocking_stat(&path, args) + } } impl oio::Read for ByRangeSeekableReader @@ -163,17 +236,41 @@ where fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { match &mut self.state { State::Idle => { - if self.cur >= self.size { + // Sanity check for normal cases. + if buf.is_empty() || self.cur > self.size.unwrap_or(u64::MAX) { return Poll::Ready(Ok(0)); } - self.state = State::Sending(self.read_future()); + self.state = if self.offset.is_none() { + // Offset is none means we are doing tailing reading. + // we should stat first to get the correct offset. + State::SendStat(self.stat_future()) + } else { + State::SendRead(self.read_future()) + }; + + self.poll_read(cx, buf) + } + State::SendStat(fut) => { + let rp = ready!(Pin::new(fut).poll(cx)).map_err(|err| { + // If stat future returns an error, we should reset + // state to Idle so that we can retry it. + self.state = State::Idle; + err + })?; + + let length = rp.into_metadata().content_length(); + self.fill_range(length).map_err(|err| { + // If stat future returns an error, we should reset + // state to Idle so that we can retry it. + self.state = State::Idle; + err + })?; + + self.state = State::Idle; self.poll_read(cx, buf) } - State::Sending(fut) => { - // TODO - // - // we can use RpRead returned here to correct size. + State::SendRead(fut) => { let (_, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { // If read future returns an error, we should reset // state to Idle so that we can retry it. @@ -181,10 +278,10 @@ where err })?; - self.state = State::Reading(r); + self.state = State::Read(r); self.poll_read(cx, buf) } - State::Reading(r) => match ready!(Pin::new(r).poll_read(cx, buf)) { + State::Read(r) => match ready!(Pin::new(r).poll_read(cx, buf)) { Ok(0) => { // Reset state to Idle after all data has been consumed. self.state = State::Idle; @@ -202,35 +299,65 @@ where } } - fn poll_seek(&mut self, _: &mut Context<'_>, pos: SeekFrom) -> Poll> { - let seek_pos = self.seek_pos(pos)?; - self.last_seek_pos = Some(seek_pos); - + fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { match &mut self.state { State::Idle => { + let (base, amt) = match pos { + SeekFrom::Start(n) => (0, n as i64), + SeekFrom::End(n) => { + if let Some(size) = self.size { + (size as i64, n) + } else { + self.state = State::SendStat(self.stat_future()); + return self.poll_seek(cx, pos); + } + } + SeekFrom::Current(n) => (self.cur as i64, n), + }; + + let seek_pos = match base.checked_add(amt) { + Some(n) if n >= 0 => n as u64, + _ => { + return Poll::Ready(Err(Error::new( + ErrorKind::InvalidInput, + "invalid seek to a negative or overflowing position", + ))) + } + }; + self.cur = seek_pos; - self.last_seek_pos = None; Poll::Ready(Ok(self.cur)) } - State::Sending(_) => { + State::SendStat(fut) => { + let rp = ready!(Pin::new(fut).poll(cx)).map_err(|err| { + // If stat future returns an error, we should reset + // state to Idle so that we can retry it. + self.state = State::Idle; + err + })?; + + let length = rp.into_metadata().content_length(); + self.fill_range(length)?; + + self.state = State::Idle; + self.poll_seek(cx, pos) + } + State::SendRead(_) => { // It's impossible for us to go into this state while // poll_seek. We can just drop this future and check state. self.state = State::Idle; - - self.cur = seek_pos; - self.last_seek_pos = None; - Poll::Ready(Ok(self.cur)) + self.poll_seek(cx, pos) } - State::Reading(_) => { - if seek_pos == self.cur { - self.last_seek_pos = None; + State::Read(_) => { + // There is an optimization here that we can calculate if users trying to seek + // the same position, for example, `reader.seek(SeekFrom::Current(0))`. + // In this case, we can just return current position without dropping reader. + if pos == SeekFrom::Current(0) || pos == SeekFrom::Start(self.cur) { return Poll::Ready(Ok(self.cur)); } self.state = State::Idle; - self.cur = seek_pos; - self.last_seek_pos = None; - Poll::Ready(Ok(self.cur)) + self.poll_seek(cx, pos) } } } @@ -238,17 +365,36 @@ where fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { match &mut self.state { State::Idle => { - if self.cur >= self.size { + // Sanity check for normal cases. + if self.cur > self.size.unwrap_or(u64::MAX) { return Poll::Ready(None); } - self.state = State::Sending(self.read_future()); + self.state = if self.offset.is_none() { + // Offset is none means we are doing tailing reading. + // we should stat first to get the correct offset. + State::SendStat(self.stat_future()) + } else { + State::SendRead(self.read_future()) + }; + self.poll_next(cx) } - State::Sending(fut) => { - // TODO - // - // we can use RpRead returned here to correct size. + State::SendStat(fut) => { + let rp = ready!(Pin::new(fut).poll(cx)).map_err(|err| { + // If stat future returns an error, we should reset + // state to Idle so that we can retry it. + self.state = State::Idle; + err + })?; + + let length = rp.into_metadata().content_length(); + self.fill_range(length)?; + + self.state = State::Idle; + self.poll_next(cx) + } + State::SendRead(fut) => { let (_, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { // If read future returns an error, we should reset // state to Idle so that we can retry it. @@ -256,10 +402,10 @@ where err })?; - self.state = State::Reading(r); + self.state = State::Read(r); self.poll_next(cx) } - State::Reading(r) => match ready!(Pin::new(r).poll_next(cx)) { + State::Read(r) => match ready!(Pin::new(r).poll_next(cx)) { Some(Ok(bs)) => { self.cur += bs.len() as u64; Poll::Ready(Some(Ok(bs))) @@ -285,15 +431,25 @@ where fn read(&mut self, buf: &mut [u8]) -> Result { match &mut self.state { State::Idle => { - if self.cur >= self.size { + // Sanity check for normal cases. + if buf.is_empty() || self.cur > self.size.unwrap_or(u64::MAX) { return Ok(0); } + // Offset is none means we are doing tailing reading. + // we should stat first to get the correct offset. + if self.offset.is_none() { + let rp = self.stat_action()?; + + let length = rp.into_metadata().content_length(); + self.fill_range(length)?; + } + let (_, r) = self.read_action()?; - self.state = State::Reading(r); + self.state = State::Read(r); self.read(buf) } - State::Reading(r) => { + State::Read(r) => { match r.read(buf) { Ok(0) => { // Reset state to Idle after all data has been consumed. @@ -310,31 +466,64 @@ where } } } - State::Sending(_) => { - unreachable!("It's invalid to go into State::Sending for BlockingRead, please report this bug") + State::SendStat(_) => { + unreachable!("It's invalid to go into State::SendStat for BlockingRead, please report this bug") + } + State::SendRead(_) => { + unreachable!("It's invalid to go into State::SendRead for BlockingRead, please report this bug") } } } fn seek(&mut self, pos: SeekFrom) -> Result { - let seek_pos = self.seek_pos(pos)?; - match &mut self.state { State::Idle => { + let (base, amt) = match pos { + SeekFrom::Start(n) => (0, n as i64), + SeekFrom::End(n) => { + if let Some(size) = self.size { + (size as i64, n) + } else { + let rp = self.stat_action()?; + let length = rp.into_metadata().content_length(); + self.fill_range(length)?; + + let size = self.size.expect("size must be valid after fill_range"); + (size as i64, n) + } + } + SeekFrom::Current(n) => (self.cur as i64, n), + }; + + let seek_pos = match base.checked_add(amt) { + Some(n) if n >= 0 => n as u64, + _ => { + return Err(Error::new( + ErrorKind::InvalidInput, + "invalid seek to a negative or overflowing position", + )); + } + }; + self.cur = seek_pos; Ok(self.cur) } - State::Reading(_) => { - if seek_pos == self.cur { + State::Read(_) => { + // There is an optimization here that we can calculate if users trying to seek + // the same position, for example, `reader.seek(SeekFrom::Current(0))`. + // In this case, we can just return current position without dropping reader. + if pos == SeekFrom::Current(0) || pos == SeekFrom::Start(self.cur) { return Ok(self.cur); } self.state = State::Idle; - self.cur = seek_pos; - Ok(self.cur) + self.seek(pos) + } + State::SendStat(_) => { + unreachable!("It's invalid to go into State::SendStat for BlockingRead, please report this bug") } - State::Sending(_) => { - unreachable!("It's invalid to go into State::Sending for BlockingRead, please report this bug") + State::SendRead(_) => { + unreachable!("It's invalid to go into State::SendRead for BlockingRead, please report this bug") } } } @@ -342,18 +531,33 @@ where fn next(&mut self) -> Option> { match &mut self.state { State::Idle => { - if self.cur >= self.size { + // Sanity check for normal cases. + if self.cur > self.size.unwrap_or(u64::MAX) { return None; } + // Offset is none means we are doing tailing reading. + // we should stat first to get the correct offset. + if self.offset.is_none() { + let rp = match self.stat_action() { + Ok(rp) => rp, + Err(err) => return Some(Err(err)), + }; + + let length = rp.into_metadata().content_length(); + if let Err(err) = self.fill_range(length) { + return Some(Err(err)); + } + } + let r = match self.read_action() { Ok((_, r)) => r, Err(err) => return Some(Err(err)), }; - self.state = State::Reading(r); + self.state = State::Read(r); self.next() } - State::Reading(r) => match r.next() { + State::Read(r) => match r.next() { Some(Ok(bs)) => { self.cur += bs.len() as u64; Some(Ok(bs)) @@ -367,8 +571,11 @@ where None } }, - State::Sending(_) => { - unreachable!("It's invalid to go into State::Sending for BlockingRead, please report this bug") + State::SendStat(_) => { + unreachable!("It's invalid to go into State::SendStat for BlockingRead, please report this bug") + } + State::SendRead(_) => { + unreachable!("It's invalid to go into State::SendRead for BlockingRead, please report this bug") } } } @@ -483,11 +690,11 @@ mod tests { let (bs, _) = gen_bytes(); let acc = Arc::new(MockReadService::new(bs.clone())); - let r = MockReader { - inner: futures::io::Cursor::new(bs.to_vec()), - }; - let mut r = - Box::new(into_seekable_read_by_range(acc, "x", r, 0, bs.len() as u64)) as oio::Reader; + let mut r = Box::new(into_seekable_read_by_range( + acc, + "x", + OpRead::default().with_range(BytesRange::from(..)), + )) as oio::Reader; let mut buf = Vec::new(); r.read_to_end(&mut buf).await?; @@ -518,10 +725,11 @@ mod tests { let (bs, _) = gen_bytes(); let acc = Arc::new(MockReadService::new(bs.clone())); - let r = MockReader { - inner: futures::io::Cursor::new(bs[4096..4096 + 4096].to_vec()), - }; - let mut r = Box::new(into_seekable_read_by_range(acc, "x", r, 4096, 4096)) as oio::Reader; + let mut r = Box::new(into_seekable_read_by_range( + acc, + "x", + OpRead::default().with_range(BytesRange::from(4096..4096 + 4096)), + )) as oio::Reader; let mut buf = Vec::new(); r.read_to_end(&mut buf).await?; diff --git a/core/src/raw/ops.rs b/core/src/raw/ops.rs index 1e60d329eb91..0eb3937c6a8c 100644 --- a/core/src/raw/ops.rs +++ b/core/src/raw/ops.rs @@ -274,6 +274,23 @@ impl OpRead { Self::default() } + /// The into_deterministic function transforms the OpRead into a deterministic version. + /// + /// This API is utilized because it allows for internal optimizations such as dividing read + /// ranges or retrying the read request from where it failed. In these scenarios, the expected + /// `ETag` value differs from what users specify in `If-Match` or `If-None-Match`.Therefore, + /// we need to eliminate these conditional headers to ensure that the read operation is + /// deterministic. + /// + /// This API is not intended to be used by users and should never be exposed. + pub(crate) fn into_deterministic(self) -> Self { + Self { + if_match: None, + if_none_match: None, + ..self + } + } + /// Create a new OpRead with range. pub fn with_range(mut self, range: BytesRange) -> Self { self.br = range; From 4f65839d6fd67db5281f01c1713c321102ff08d7 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Wed, 25 Oct 2023 17:36:04 +0800 Subject: [PATCH 03/46] Remove last_seek_pos Signed-off-by: Xuanwo --- core/src/raw/oio/read/into_seekable_read_by_range.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/core/src/raw/oio/read/into_seekable_read_by_range.rs b/core/src/raw/oio/read/into_seekable_read_by_range.rs index 2cfb0c2de8e5..0092863a63a4 100644 --- a/core/src/raw/oio/read/into_seekable_read_by_range.rs +++ b/core/src/raw/oio/read/into_seekable_read_by_range.rs @@ -63,7 +63,6 @@ pub fn into_seekable_read_by_range( size, cur: 0, state: State::::Idle, - last_seek_pos: None, } } @@ -77,13 +76,6 @@ pub struct ByRangeSeekableReader { size: Option, cur: u64, state: State, - - /// Seek operation could return Pending which may lead - /// `SeekFrom::Current(off)` been input multiple times. - /// - /// So we need to store the last seek pos to make sure - /// we always seek to the right position. - last_seek_pos: Option, } enum State { From c9a2d2618c3587748f51e3d9c396da660822dc67 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Wed, 25 Oct 2023 17:38:13 +0800 Subject: [PATCH 04/46] Remove not used code Signed-off-by: Xuanwo --- core/src/raw/oio/read/cloneable_read.rs | 140 ------------------------ core/src/raw/oio/read/mod.rs | 6 - 2 files changed, 146 deletions(-) delete mode 100644 core/src/raw/oio/read/cloneable_read.rs diff --git a/core/src/raw/oio/read/cloneable_read.rs b/core/src/raw/oio/read/cloneable_read.rs deleted file mode 100644 index a5fe921847dd..000000000000 --- a/core/src/raw/oio/read/cloneable_read.rs +++ /dev/null @@ -1,140 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::io::SeekFrom; -use std::sync::Arc; -use std::task::Context; -use std::task::Poll; - -use bytes::Bytes; - -use crate::raw::*; -use crate::*; - -/// Convert given reader into a wrapper with `std::sync::Mutex` for `Send + Sync + Clone`. -pub fn into_cloneable_reader_within_std(reader: R) -> CloneableReaderWithinStd { - CloneableReaderWithinStd(Arc::new(std::sync::Mutex::new(reader))) -} - -/// CloneableReaderWithinStd is a Send + Sync + Clone with `std::sync::Mutex` wrapper of input -/// reader. -/// -/// Caller can clone this reader but only one thread can calling `oio::Read` API at the -/// same time, otherwise, we will return error if lock block happened. -pub struct CloneableReaderWithinStd(Arc>); - -impl CloneableReaderWithinStd { - /// Consume self to get inner reader. - pub fn into_inner(self) -> Arc> { - self.0 - } -} - -impl Clone for CloneableReaderWithinStd { - fn clone(&self) -> Self { - Self(self.0.clone()) - } -} - -impl oio::Read for CloneableReaderWithinStd { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - match self.0.try_lock() { - Ok(mut this) => this.poll_read(cx, buf), - Err(_) => Poll::Ready(Err(Error::new( - ErrorKind::Unexpected, - "the cloneable reader is expected to have only one owner, but it's not", - ))), - } - } - - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { - match self.0.try_lock() { - Ok(mut this) => this.poll_seek(cx, pos), - Err(_) => Poll::Ready(Err(Error::new( - ErrorKind::Unexpected, - "the cloneable reader is expected to have only one owner, but it's not", - ))), - } - } - - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - match self.0.try_lock() { - Ok(mut this) => this.poll_next(cx), - Err(_) => Poll::Ready(Some(Err(Error::new( - ErrorKind::Unexpected, - "the cloneable reader is expected to have only one owner, but it's not", - )))), - } - } -} - -/// Convert given reader into a wrapper with `tokio::sync::Mutex` for `Send + Sync + Clone`. -pub fn into_cloneable_reader_within_tokio(reader: R) -> CloneableReaderWithinTokio { - CloneableReaderWithinTokio(Arc::new(tokio::sync::Mutex::new(reader))) -} - -/// CloneableReaderWithinTokio is a Send + Sync + Clone with `tokio::sync::Mutex` wrapper of input -/// reader. -/// -/// Caller can clone this reader but only one thread can calling `oio::Read` API at the -/// same time, otherwise, we will return error if lock block happened. -pub struct CloneableReaderWithinTokio(Arc>); - -impl CloneableReaderWithinTokio { - /// Consume self to get inner reader. - pub fn into_inner(self) -> Arc> { - self.0 - } -} - -impl Clone for CloneableReaderWithinTokio { - fn clone(&self) -> Self { - Self(self.0.clone()) - } -} - -impl oio::Read for CloneableReaderWithinTokio { - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - match self.0.try_lock() { - Ok(mut this) => this.poll_read(cx, buf), - Err(_) => Poll::Ready(Err(Error::new( - ErrorKind::Unexpected, - "the cloneable reader is expected to have only one owner, but it's not", - ))), - } - } - - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { - match self.0.try_lock() { - Ok(mut this) => this.poll_seek(cx, pos), - Err(_) => Poll::Ready(Err(Error::new( - ErrorKind::Unexpected, - "the cloneable reader is expected to have only one owner, but it's not", - ))), - } - } - - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - match self.0.try_lock() { - Ok(mut this) => this.poll_next(cx), - Err(_) => Poll::Ready(Some(Err(Error::new( - ErrorKind::Unexpected, - "the cloneable reader is expected to have only one owner, but it's not", - )))), - } - } -} diff --git a/core/src/raw/oio/read/mod.rs b/core/src/raw/oio/read/mod.rs index 841dfdd1aa52..1415d630ed1e 100644 --- a/core/src/raw/oio/read/mod.rs +++ b/core/src/raw/oio/read/mod.rs @@ -38,9 +38,3 @@ pub use into_read_from_file::FromFileReader; mod into_read_from_stream; pub use into_read_from_stream::into_read_from_stream; pub use into_read_from_stream::FromStreamReader; - -mod cloneable_read; -pub use cloneable_read::into_cloneable_reader_within_std; -pub use cloneable_read::into_cloneable_reader_within_tokio; -pub use cloneable_read::CloneableReaderWithinStd; -pub use cloneable_read::CloneableReaderWithinTokio; From e52ba54ad93b25a3545c24ec1593fef296318dd8 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Wed, 25 Oct 2023 17:47:00 +0800 Subject: [PATCH 05/46] Polish API Signed-off-by: Xuanwo --- core/src/layers/complete.rs | 10 +- core/src/raw/oio/read/mod.rs | 5 +- ...eekable_read_by_range.rs => range_read.rs} | 92 ++++++++++--------- 3 files changed, 55 insertions(+), 52 deletions(-) rename core/src/raw/oio/read/{into_seekable_read_by_range.rs => range_read.rs} (93%) diff --git a/core/src/layers/complete.rs b/core/src/layers/complete.rs index c0fe7b4809ff..903b64e6b611 100644 --- a/core/src/layers/complete.rs +++ b/core/src/layers/complete.rs @@ -29,10 +29,10 @@ use bytes::Bytes; use crate::raw::oio::into_flat_page; use crate::raw::oio::into_hierarchy_page; -use crate::raw::oio::ByRangeSeekableReader; use crate::raw::oio::Entry; use crate::raw::oio::FlatPager; use crate::raw::oio::HierarchyPager; +use crate::raw::oio::RangeReader; use crate::raw::oio::StreamableReader; use crate::raw::*; use crate::*; @@ -171,7 +171,7 @@ impl CompleteAccessor { Ok((rp, CompleteReader::NeedStreamable(r))) } _ => { - let r = oio::into_seekable_read_by_range(self.inner.clone(), path, args); + let r = RangeReader::new(self.inner.clone(), path, args); if streamable { Ok((rp, CompleteReader::NeedSeekable(r))) @@ -205,7 +205,7 @@ impl CompleteAccessor { Ok((rp, CompleteReader::NeedStreamable(r))) } _ => { - let r = oio::into_seekable_read_by_range(self.inner.clone(), path, args); + let r = RangeReader::new(self.inner.clone(), path, args); if streamable { Ok((rp, CompleteReader::NeedSeekable(r))) @@ -547,9 +547,9 @@ impl LayeredAccessor for CompleteAccessor { pub enum CompleteReader { AlreadyComplete(R), - NeedSeekable(ByRangeSeekableReader), + NeedSeekable(RangeReader), NeedStreamable(StreamableReader), - NeedBoth(StreamableReader>), + NeedBoth(StreamableReader>), } impl oio::Read for CompleteReader diff --git a/core/src/raw/oio/read/mod.rs b/core/src/raw/oio/read/mod.rs index 1415d630ed1e..f7d9717829fd 100644 --- a/core/src/raw/oio/read/mod.rs +++ b/core/src/raw/oio/read/mod.rs @@ -27,9 +27,8 @@ mod into_streamable_read; pub use into_streamable_read::into_streamable_read; pub use into_streamable_read::StreamableReader; -mod into_seekable_read_by_range; -pub use into_seekable_read_by_range::into_seekable_read_by_range; -pub use into_seekable_read_by_range::ByRangeSeekableReader; +mod range_read; +pub use range_read::RangeReader; mod into_read_from_file; pub use into_read_from_file::into_read_from_file; diff --git a/core/src/raw/oio/read/into_seekable_read_by_range.rs b/core/src/raw/oio/read/range_read.rs similarity index 93% rename from core/src/raw/oio/read/into_seekable_read_by_range.rs rename to core/src/raw/oio/read/range_read.rs index 0092863a63a4..870285e4267e 100644 --- a/core/src/raw/oio/read/into_seekable_read_by_range.rs +++ b/core/src/raw/oio/read/range_read.rs @@ -29,45 +29,16 @@ use futures::future::BoxFuture; use crate::raw::*; use crate::*; -/// Convert given reader into [`oio::Reader`] by range. +/// RangeReader that can do seek on non-seekable reader. /// -/// # Input +/// `oio::Reader` requires the underlying reader to be seekable, but some services like s3, gcs +/// doesn't support seek natively. RangeReader implement seek by read_with_range. We will start +/// a new read request with the correct range when seek is called. /// -/// The input is an Accessor will may return a non-seekable reader. -/// -/// # Output -/// -/// The output is a reader that can be seek by range. -/// -/// # Notes -/// -/// This operation is not zero cost. If the accessor already returns a -/// seekable reader, please don't use this. -pub fn into_seekable_read_by_range( - acc: Arc, - path: &str, - op: OpRead, -) -> ByRangeSeekableReader { - // Normalize range like `..` into `0..` to make sure offset is valid. - let (offset, size) = match (op.range().offset(), op.range().size()) { - (None, None) => (Some(0), None), - v => v, - }; - - ByRangeSeekableReader { - acc, - path: Arc::new(path.to_string()), - op, - - offset, - size, - cur: 0, - state: State::::Idle, - } -} - -/// ByRangeReader that can do seek on non-seekable reader. -pub struct ByRangeSeekableReader { +/// The `seek` operation on `RangeReader` is zero cost and purely in-memory. But calling `seek` +/// while there is a pending read request will cancel the request and start a new one. This could +/// add extra cost to the read operation. +pub struct RangeReader { acc: Arc, path: Arc, op: OpRead, @@ -88,10 +59,43 @@ enum State { /// Safety: State will only be accessed under &mut. unsafe impl Sync for State {} -impl ByRangeSeekableReader +impl RangeReader where A: Accessor, { + /// Create a new [`oio::Reader`] by range support. + /// + /// # Input + /// + /// The input is an Accessor will may return a non-seekable reader. + /// + /// # Output + /// + /// The output is a reader that can be seek by range. + /// + /// # Notes + /// + /// This operation is not zero cost. If the accessor already returns a + /// seekable reader, please don't use this. + pub fn new(acc: Arc, path: &str, op: OpRead) -> RangeReader { + // Normalize range like `..` into `0..` to make sure offset is valid. + let (offset, size) = match (op.range().offset(), op.range().size()) { + (None, None) => (Some(0), None), + v => v, + }; + + RangeReader { + acc, + path: Arc::new(path.to_string()), + op, + + offset, + size, + cur: 0, + state: State::::Idle, + } + } + /// Fill current reader's range by total_size. fn fill_range(&mut self, total_size: u64) -> Result<()> { (self.offset, self.size) = match (self.offset, self.size) { @@ -138,7 +142,7 @@ where } } -impl ByRangeSeekableReader +impl RangeReader where A: Accessor, R: oio::Read, @@ -179,7 +183,7 @@ where } } -impl ByRangeSeekableReader +impl RangeReader where A: Accessor, R: oio::BlockingRead, @@ -220,7 +224,7 @@ where } } -impl oio::Read for ByRangeSeekableReader +impl oio::Read for RangeReader where A: Accessor, R: oio::Read, @@ -415,7 +419,7 @@ where } } -impl oio::BlockingRead for ByRangeSeekableReader +impl oio::BlockingRead for RangeReader where A: Accessor, R: oio::BlockingRead, @@ -682,7 +686,7 @@ mod tests { let (bs, _) = gen_bytes(); let acc = Arc::new(MockReadService::new(bs.clone())); - let mut r = Box::new(into_seekable_read_by_range( + let mut r = Box::new(RangeReader::new( acc, "x", OpRead::default().with_range(BytesRange::from(..)), @@ -717,7 +721,7 @@ mod tests { let (bs, _) = gen_bytes(); let acc = Arc::new(MockReadService::new(bs.clone())); - let mut r = Box::new(into_seekable_read_by_range( + let mut r = Box::new(RangeReader::new( acc, "x", OpRead::default().with_range(BytesRange::from(4096..4096 + 4096)), From 2f091123fd50d6f0d3a06222f834589a4baf6c02 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Wed, 25 Oct 2023 20:25:59 +0800 Subject: [PATCH 06/46] Implement file reader Signed-off-by: Xuanwo --- core/src/raw/oio/read/file_read.rs | 186 ++++++++++++++++++ core/src/raw/oio/read/into_read_from_file.rs | 192 ------------------- core/src/raw/oio/read/mod.rs | 5 +- core/src/services/fs/backend.rs | 116 +++++------ core/src/services/hdfs/backend.rs | 88 +++++---- core/src/services/sftp/backend.rs | 90 ++++++--- core/src/services/sftp/utils.rs | 73 ------- 7 files changed, 345 insertions(+), 405 deletions(-) create mode 100644 core/src/raw/oio/read/file_read.rs delete mode 100644 core/src/raw/oio/read/into_read_from_file.rs diff --git a/core/src/raw/oio/read/file_read.rs b/core/src/raw/oio/read/file_read.rs new file mode 100644 index 000000000000..852e7c47d8f1 --- /dev/null +++ b/core/src/raw/oio/read/file_read.rs @@ -0,0 +1,186 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::cmp; +use std::io::Read; +use std::io::Seek; +use std::io::SeekFrom; +use std::pin::Pin; +use std::task::ready; +use std::task::Context; +use std::task::Poll; + +use bytes::Bytes; +use futures::AsyncRead; +use futures::AsyncSeek; + +use crate::raw::*; +use crate::*; + +/// FileReader implements [`oio::Read`] via `AsyncRead + AsyncSeek`. +pub struct FileReader { + inner: R, + + start: u64, + end: Option, + + offset: u64, +} + +impl FileReader { + /// Create a new FileReader. + /// + /// # Notes + /// + /// It's required that input reader's cursor is at the input `start` of the file. + pub fn new(fd: R, start: u64, end: Option) -> FileReader { + FileReader { + inner: fd, + start, + end, + + offset: start, + } + } + + fn calculate_position(&self, pos: SeekFrom) -> Result { + match pos { + SeekFrom::Start(n) => { + if n < self.start { + return Err(Error::new( + ErrorKind::InvalidInput, + "seek to a negative position is invalid", + ) + .with_context("position", format!("{pos:?}"))); + } + + Ok(SeekFrom::Start(self.start + n)) + } + SeekFrom::End(n) => { + let end = if let Some(end) = self.end { + end as i64 + n + } else { + n + }; + + if self.start as i64 + end < 0 { + return Err(Error::new( + ErrorKind::InvalidInput, + "seek to a negative position is invalid", + ) + .with_context("position", format!("{pos:?}"))); + } + + Ok(SeekFrom::End(end)) + } + SeekFrom::Current(n) => { + if self.offset as i64 + n < self.start as i64 { + return Err(Error::new( + ErrorKind::InvalidInput, + "seek to a negative position is invalid", + ) + .with_context("position", format!("{pos:?}"))); + } + + Ok(SeekFrom::Current(n)) + } + } + } +} + +impl oio::Read for FileReader +where + R: AsyncRead + AsyncSeek + Unpin + Send + Sync, +{ + fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { + let size = if let Some(end) = self.end { + if self.offset >= end { + return Poll::Ready(Ok(0)); + } + cmp::min(buf.len(), (end - self.offset) as usize) + } else { + buf.len() + }; + + let n = + ready!(Pin::new(&mut self.inner).poll_read(cx, &mut buf[..size])).map_err(|err| { + Error::new(ErrorKind::Unexpected, "read data from FileReader").set_source(err) + })?; + self.offset += n as u64; + Poll::Ready(Ok(n)) + } + + fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { + let pos = self.calculate_position(pos)?; + + let cur = ready!(Pin::new(&mut self.inner).poll_seek(cx, pos)).map_err(|err| { + Error::new(ErrorKind::Unexpected, "seek data from FileReader").set_source(err) + })?; + + self.offset = cur; + Poll::Ready(Ok(self.offset - self.start)) + } + + fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { + let _ = cx; + + Poll::Ready(Some(Err(Error::new( + ErrorKind::Unsupported, + "output reader doesn't support next", + )))) + } +} + +impl oio::BlockingRead for FileReader +where + R: Read + Seek + Send + Sync + 'static, +{ + fn read(&mut self, buf: &mut [u8]) -> Result { + let size = if let Some(end) = self.end { + if self.offset >= end { + return Ok(0); + } + cmp::min(buf.len(), (end - self.offset) as usize) + } else { + buf.len() + }; + + let n = self.inner.read(&mut buf[..size]).map_err(|err| { + Error::new(ErrorKind::Unexpected, "read data from FileReader").set_source(err) + })?; + self.offset += n as u64; + Ok(n) + } + + fn seek(&mut self, pos: SeekFrom) -> Result { + let pos = self.calculate_position(pos)?; + + let cur = self.inner.seek(pos).map_err(|err| { + Error::new(ErrorKind::Unexpected, "seek data from FileReader").set_source(err) + })?; + + self.offset = cur; + Ok(self.offset - self.start) + } + + fn next(&mut self) -> Option> { + Some(Err(Error::new( + ErrorKind::Unsupported, + "output reader doesn't support iterating", + ))) + } +} diff --git a/core/src/raw/oio/read/into_read_from_file.rs b/core/src/raw/oio/read/into_read_from_file.rs deleted file mode 100644 index f005ac73721d..000000000000 --- a/core/src/raw/oio/read/into_read_from_file.rs +++ /dev/null @@ -1,192 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::cmp; -use std::io::Read; -use std::io::Seek; -use std::io::SeekFrom; -use std::pin::Pin; -use std::task::ready; -use std::task::Context; -use std::task::Poll; - -use bytes::Bytes; -use futures::AsyncRead; -use futures::AsyncSeek; - -use crate::raw::*; -use crate::*; - -/// Convert given file into [`oio::Reader`]. -pub fn into_read_from_file(fd: R, start: u64, end: u64) -> FromFileReader { - FromFileReader { - inner: fd, - start, - end, - offset: 0, - } -} - -/// FromFileReader is a wrapper of input fd to implement [`oio::Read`]. -pub struct FromFileReader { - inner: R, - - start: u64, - end: u64, - offset: u64, -} - -impl FromFileReader { - pub(crate) fn current_size(&self) -> i64 { - debug_assert!(self.offset >= self.start, "offset must in range"); - self.end as i64 - self.offset as i64 - } -} - -impl oio::Read for FromFileReader -where - R: AsyncRead + AsyncSeek + Unpin + Send + Sync, -{ - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - if self.current_size() <= 0 { - return Poll::Ready(Ok(0)); - } - - let max = cmp::min(buf.len() as u64, self.current_size() as u64) as usize; - // TODO: we can use pread instead. - let n = - ready!(Pin::new(&mut self.inner).poll_read(cx, &mut buf[..max])).map_err(|err| { - Error::new(ErrorKind::Unexpected, "read data from FdReader") - .with_context("source", "FdReader") - .set_source(err) - })?; - self.offset += n as u64; - Poll::Ready(Ok(n)) - } - - /// TODO: maybe we don't need to do seek really, just call pread instead. - /// - /// We need to wait for tokio's pread support. - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { - let (base, offset) = match pos { - SeekFrom::Start(n) => (self.start as i64, n as i64), - SeekFrom::End(n) => (self.end as i64, n), - SeekFrom::Current(n) => (self.offset as i64, n), - }; - - match base.checked_add(offset) { - // Seek to position like `-123` is invalid. - Some(n) if n < 0 => Poll::Ready(Err(Error::new( - ErrorKind::InvalidInput, - "seek to a negative or overflowing position is invalid", - ) - .with_context("position", n.to_string()))), - // Seek to position before the start of current file is invalid. - Some(n) if n < self.start as i64 => Poll::Ready(Err(Error::new( - ErrorKind::InvalidInput, - "seek to a position before start of file is invalid", - ) - .with_context("position", n.to_string()) - .with_context("start", self.start.to_string()))), - Some(n) => { - let cur = - ready!(Pin::new(&mut self.inner).poll_seek(cx, SeekFrom::Start(n as u64))) - .map_err(|err| { - Error::new(ErrorKind::Unexpected, "seek data from FdReader") - .with_context("source", "FdReader") - .set_source(err) - })?; - - self.offset = cur; - Poll::Ready(Ok(self.offset - self.start)) - } - None => Poll::Ready(Err(Error::new( - ErrorKind::InvalidInput, - "invalid seek to a negative or overflowing position", - ))), - } - } - - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - let _ = cx; - - Poll::Ready(Some(Err(Error::new( - ErrorKind::Unsupported, - "output reader doesn't support next", - )))) - } -} - -impl oio::BlockingRead for FromFileReader -where - R: Read + Seek + Send + Sync + 'static, -{ - fn read(&mut self, buf: &mut [u8]) -> Result { - if self.current_size() <= 0 { - return Ok(0); - } - - let max = cmp::min(buf.len() as u64, self.current_size() as u64) as usize; - // TODO: we can use pread instead. - let n = self.inner.read(&mut buf[..max]).map_err(|err| { - Error::new(ErrorKind::Unexpected, "read data from FdReader") - .with_context("source", "FdReader") - .set_source(err) - })?; - self.offset += n as u64; - Ok(n) - } - - /// TODO: maybe we don't need to do seek really, just call pread instead. - /// - /// We need to wait for tokio's pread support. - fn seek(&mut self, pos: SeekFrom) -> Result { - let (base, offset) = match pos { - SeekFrom::Start(n) => (self.start as i64, n as i64), - SeekFrom::End(n) => (self.end as i64, n), - SeekFrom::Current(n) => (self.offset as i64, n), - }; - - match base.checked_add(offset) { - Some(n) if n < 0 => Err(Error::new( - ErrorKind::InvalidInput, - "invalid seek to a negative or overflowing position", - )), - Some(n) => { - let cur = self.inner.seek(SeekFrom::Start(n as u64)).map_err(|err| { - Error::new(ErrorKind::Unexpected, "seek data from FdReader") - .with_context("source", "FdReader") - .set_source(err) - })?; - - self.offset = cur; - Ok(self.offset - self.start) - } - None => Err(Error::new( - ErrorKind::InvalidInput, - "invalid seek to a negative or overflowing position", - )), - } - } - - fn next(&mut self) -> Option> { - Some(Err(Error::new( - ErrorKind::Unsupported, - "output reader doesn't support iterating", - ))) - } -} diff --git a/core/src/raw/oio/read/mod.rs b/core/src/raw/oio/read/mod.rs index f7d9717829fd..78e888865458 100644 --- a/core/src/raw/oio/read/mod.rs +++ b/core/src/raw/oio/read/mod.rs @@ -30,9 +30,8 @@ pub use into_streamable_read::StreamableReader; mod range_read; pub use range_read::RangeReader; -mod into_read_from_file; -pub use into_read_from_file::into_read_from_file; -pub use into_read_from_file::FromFileReader; +mod file_read; +pub use file_read::FileReader; mod into_read_from_stream; pub use into_read_from_stream::into_read_from_stream; diff --git a/core/src/services/fs/backend.rs b/core/src/services/fs/backend.rs index 40ada10f2194..9af013cf1832 100644 --- a/core/src/services/fs/backend.rs +++ b/core/src/services/fs/backend.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use std::cmp::min; use std::collections::HashMap; use std::io::SeekFrom; use std::path::Path; @@ -248,8 +247,8 @@ impl FsBackend { #[async_trait] impl Accessor for FsBackend { - type Reader = oio::FromFileReader>; - type BlockingReader = oio::FromFileReader; + type Reader = oio::FileReader>; + type BlockingReader = oio::FileReader; type Writer = FsWriter; type BlockingWriter = FsWriter; type Pager = Option>; @@ -306,7 +305,7 @@ impl Accessor for FsBackend { /// /// Benchmark could be found [here](https://gist.github.com/Xuanwo/48f9cfbc3022ea5f865388bb62e1a70f) async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - use oio::ReadExt; + use tokio::io::AsyncSeekExt; let p = self.root.join(path.trim_end_matches('/')); @@ -316,7 +315,7 @@ impl Accessor for FsBackend { .await .map_err(parse_io_error)?; - let total_length = if self.enable_path_check { + if self.enable_path_check { // Get fs metadata of file at given path, ensuring it is not a false-positive due to slash normalization. let meta = f.metadata().await.map_err(parse_io_error)?; if meta.is_dir() != path.ends_with('/') { @@ -331,41 +330,36 @@ impl Accessor for FsBackend { "given path is a directory", )); } + } - meta.len() - } else { - use tokio::io::AsyncSeekExt; - - f.seek(SeekFrom::End(0)).await.map_err(parse_io_error)? - }; - - let f = Compat::new(f); - - let br = args.range(); - let (start, end) = match (br.offset(), br.size()) { - // Read a specific range. - (Some(offset), Some(size)) => (offset, min(offset + size, total_length)), - // Read from offset. - (Some(offset), None) => (offset, total_length), - // Read the last size bytes. - (None, Some(size)) => ( - if total_length > size { - total_length - size - } else { - 0 - }, - total_length, - ), - // Read the whole file. - (None, None) => (0, total_length), + let (start, end) = match (args.range().offset(), args.range().size()) { + (None, None) => (0, None), + (None, Some(size)) => { + let start = f + .seek(SeekFrom::End(size as i64)) + .await + .map_err(parse_io_error)?; + (start, Some(start + size)) + } + (Some(offset), None) => { + let start = f + .seek(SeekFrom::Start(offset)) + .await + .map_err(parse_io_error)?; + (start, None) + } + (Some(offset), Some(size)) => { + let start = f + .seek(SeekFrom::Start(offset)) + .await + .map_err(parse_io_error)?; + (start, Some(size)) + } }; - let mut r = oio::into_read_from_file(f, start, end); + let r = oio::FileReader::new(Compat::new(f), start, end); - // Rewind to make sure we are on the correct offset. - r.seek(SeekFrom::Start(0)).await?; - - Ok((RpRead::new(end - start), r)) + Ok((RpRead::new(0), r)) } async fn write(&self, path: &str, op: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -510,7 +504,7 @@ impl Accessor for FsBackend { } fn blocking_read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::BlockingReader)> { - use oio::BlockingRead; + use std::io::Seek; let p = self.root.join(path.trim_end_matches('/')); @@ -519,7 +513,7 @@ impl Accessor for FsBackend { .open(p) .map_err(parse_io_error)?; - let total_length = if self.enable_path_check { + if self.enable_path_check { // Get fs metadata of file at given path, ensuring it is not a false-positive due to slash normalization. let meta = f.metadata().map_err(parse_io_error)?; if meta.is_dir() != path.ends_with('/') { @@ -534,39 +528,27 @@ impl Accessor for FsBackend { "given path is a directory", )); } + } - meta.len() - } else { - use std::io::Seek; - - f.seek(SeekFrom::End(0)).map_err(parse_io_error)? - }; - - let br = args.range(); - let (start, end) = match (br.offset(), br.size()) { - // Read a specific range. - (Some(offset), Some(size)) => (offset, min(offset + size, total_length)), - // Read from offset. - (Some(offset), None) => (offset, total_length), - // Read the last size bytes. - (None, Some(size)) => ( - if total_length > size { - total_length - size - } else { - 0 - }, - total_length, - ), - // Read the whole file. - (None, None) => (0, total_length), + let (start, end) = match (args.range().offset(), args.range().size()) { + (None, None) => (0, None), + (None, Some(size)) => { + let start = f.seek(SeekFrom::End(size as i64)).map_err(parse_io_error)?; + (start, Some(start + size)) + } + (Some(offset), None) => { + let start = f.seek(SeekFrom::Start(offset)).map_err(parse_io_error)?; + (start, None) + } + (Some(offset), Some(size)) => { + let start = f.seek(SeekFrom::Start(offset)).map_err(parse_io_error)?; + (start, Some(size)) + } }; - let mut r: oio::FromFileReader = oio::into_read_from_file(f, start, end); - - // Rewind to make sure we are on the correct offset. - r.seek(SeekFrom::Start(0))?; + let r = oio::FileReader::new(f, start, end); - Ok((RpRead::new(end - start), r)) + Ok((RpRead::new(0), r)) } fn blocking_write(&self, path: &str, op: OpWrite) -> Result<(RpWrite, Self::BlockingWriter)> { diff --git a/core/src/services/hdfs/backend.rs b/core/src/services/hdfs/backend.rs index a6078c89ebe3..4d093656bb53 100644 --- a/core/src/services/hdfs/backend.rs +++ b/core/src/services/hdfs/backend.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use std::cmp::min; use std::collections::HashMap; use std::fmt::Debug; use std::io; @@ -160,8 +159,8 @@ unsafe impl Sync for HdfsBackend {} #[async_trait] impl Accessor for HdfsBackend { - type Reader = oio::FromFileReader; - type BlockingReader = oio::FromFileReader; + type Reader = oio::FileReader; + type BlockingReader = oio::FileReader; type Writer = HdfsWriter; type BlockingWriter = HdfsWriter; type Pager = Option; @@ -205,14 +204,11 @@ impl Accessor for HdfsBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - use oio::ReadExt; + use futures::AsyncSeekExt; let p = build_rooted_abs_path(&self.root, path); - // This will be addressed by https://github.com/apache/incubator-opendal/issues/506 - let meta = self.client.metadata(&p).map_err(parse_io_error)?; - - let f = self + let mut f = self .client .open_file() .read(true) @@ -220,23 +216,34 @@ impl Accessor for HdfsBackend { .await .map_err(parse_io_error)?; - let br = args.range(); - let (start, end) = match (br.offset(), br.size()) { - // Read a specific range. - (Some(offset), Some(size)) => (offset, min(offset + size, meta.len())), - // Read from offset. - (Some(offset), None) => (offset, meta.len()), - // Read the last size bytes. - (None, Some(size)) => (meta.len() - size, meta.len()), - // Read the whole file. - (None, None) => (0, meta.len()), + let (start, end) = match (args.range().offset(), args.range().size()) { + (None, None) => (0, None), + (None, Some(size)) => { + let start = f + .seek(SeekFrom::End(size as i64)) + .await + .map_err(parse_io_error)?; + (start, Some(start + size)) + } + (Some(offset), None) => { + let start = f + .seek(SeekFrom::Start(offset)) + .await + .map_err(parse_io_error)?; + (start, None) + } + (Some(offset), Some(size)) => { + let start = f + .seek(SeekFrom::Start(offset)) + .await + .map_err(parse_io_error)?; + (start, Some(size)) + } }; - let mut r = oio::into_read_from_file(f, start, end); - // Rewind to make sure we are on the correct offset. - r.seek(SeekFrom::Start(0)).await?; + let r = oio::FileReader::new(f, start, end); - Ok((RpRead::new(end - start), r)) + Ok((RpRead::new(0), r)) } async fn write(&self, path: &str, op: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -344,37 +351,36 @@ impl Accessor for HdfsBackend { } fn blocking_read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::BlockingReader)> { - use oio::BlockingRead; + use std::io::Seek; let p = build_rooted_abs_path(&self.root, path); - // This will be addressed by https://github.com/apache/incubator-opendal/issues/506 - let meta = self.client.metadata(&p).map_err(parse_io_error)?; - - let f = self + let mut f = self .client .open_file() .read(true) .open(&p) .map_err(parse_io_error)?; - let br = args.range(); - let (start, end) = match (br.offset(), br.size()) { - // Read a specific range. - (Some(offset), Some(size)) => (offset, min(offset + size, meta.len())), - // Read from offset. - (Some(offset), None) => (offset, meta.len()), - // Read the last size bytes. - (None, Some(size)) => (meta.len() - size, meta.len()), - // Read the whole file. - (None, None) => (0, meta.len()), + let (start, end) = match (args.range().offset(), args.range().size()) { + (None, None) => (0, None), + (None, Some(size)) => { + let start = f.seek(SeekFrom::End(size as i64)).map_err(parse_io_error)?; + (start, Some(start + size)) + } + (Some(offset), None) => { + let start = f.seek(SeekFrom::Start(offset)).map_err(parse_io_error)?; + (start, None) + } + (Some(offset), Some(size)) => { + let start = f.seek(SeekFrom::Start(offset)).map_err(parse_io_error)?; + (start, Some(size)) + } }; - let mut r = oio::into_read_from_file(f, start, end); - // Rewind to make sure we are on the correct offset. - r.seek(SeekFrom::Start(0))?; + let r = oio::FileReader::new(f, start, end); - Ok((RpRead::new(end - start), r)) + Ok((RpRead::new(0), r)) } fn blocking_write(&self, path: &str, _: OpWrite) -> Result<(RpWrite, Self::BlockingWriter)> { diff --git a/core/src/services/sftp/backend.rs b/core/src/services/sftp/backend.rs index 0f9b7f3bd6fe..774221534543 100644 --- a/core/src/services/sftp/backend.rs +++ b/core/src/services/sftp/backend.rs @@ -15,25 +15,27 @@ // specific language governing permissions and limitations // under the License. -use std::cmp::min; +use async_compat::Compat; use std::collections::HashMap; use std::fmt::Debug; use std::fmt::Formatter; +use std::io::SeekFrom; use std::path::Path; use std::path::PathBuf; +use std::pin::Pin; use async_trait::async_trait; use futures::StreamExt; use log::debug; use openssh::KnownHosts; use openssh::SessionBuilder; +use openssh_sftp_client::file::TokioCompatFile; use openssh_sftp_client::Sftp; use openssh_sftp_client::SftpOptions; use super::error::is_not_found; use super::error::is_sftp_protocol_error; use super::pager::SftpPager; -use super::utils::SftpReader; use super::writer::SftpWriter; use crate::raw::*; use crate::*; @@ -224,7 +226,7 @@ impl Debug for SftpBackend { #[async_trait] impl Accessor for SftpBackend { - type Reader = SftpReader; + type Reader = oio::FileReader>>>; type BlockingReader = (); type Writer = SftpWriter; type BlockingWriter = (); @@ -285,41 +287,51 @@ impl Accessor for SftpBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { + use tokio::io::AsyncSeekExt; + let client = self.connect().await?; let mut fs = client.fs(); fs.set_cwd(&self.root); let path = fs.canonicalize(path).await?; - let mut file = client.open(path.as_path()).await?; - - let total_length = file.metadata().await?.len().ok_or(Error::new( - ErrorKind::NotFound, - format!("file not found: {}", path.to_str().unwrap()).as_str(), - ))?; - - let br = args.range(); - let (start, end) = match (br.offset(), br.size()) { - // Read a specific range. - (Some(offset), Some(size)) => (offset, min(offset + size, total_length)), - // Read from offset. - (Some(offset), None) => (offset, total_length), - // Read the last size bytes. - (None, Some(size)) => ( - if total_length > size { - total_length - size - } else { - 0 - }, - total_length, - ), - // Read the whole file. - (None, None) => (0, total_length), + let mut f = client.open(path.as_path()).await?; + + let (start, end) = match (args.range().offset(), args.range().size()) { + (None, None) => (0, None), + (None, Some(size)) => { + let start = f + .seek(SeekFrom::End(size as i64)) + .await + .map_err(parse_io_error)?; + (start, Some(start + size)) + } + (Some(offset), None) => { + let start = f + .seek(SeekFrom::Start(offset)) + .await + .map_err(parse_io_error)?; + (start, None) + } + (Some(offset), Some(size)) => { + let start = f + .seek(SeekFrom::Start(offset)) + .await + .map_err(parse_io_error)?; + (start, Some(size)) + } }; - let r = SftpReader::new(file, start, end).await?; + // Sorry for the ugly code... + // + // - `f` is a openssh file. + // - `TokioCompatFile::new(f)` makes it implements tokio AsyncRead + AsyncSeek for openssh File. + // - `Compat::new(f)` make it compatible to `futures::AsyncRead + futures::AsyncSeek`. + // - `Box::pin(x)` to make sure this reader implements `Unpin`, since `TokioCompatFile` is not. + // - `oio::FileReader::new(x)` makes it a `oio::FileReader` which implements `oio::Read`. + let r = oio::FileReader::new(Box::pin(Compat::new(TokioCompatFile::new(f))), start, end); - Ok((RpRead::new(end - start), r)) + Ok((RpRead::new(0), r)) } async fn write(&self, path: &str, op: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -546,3 +558,23 @@ async fn connect_sftp( Ok(sftp) } + +/// Parse all io related errors. +pub fn parse_io_error(err: std::io::Error) -> Error { + use std::io::ErrorKind::*; + + let (kind, retryable) = match err.kind() { + NotFound => (ErrorKind::NotFound, false), + PermissionDenied => (ErrorKind::PermissionDenied, false), + Interrupted | UnexpectedEof | TimedOut | WouldBlock => (ErrorKind::Unexpected, true), + _ => (ErrorKind::Unexpected, true), + }; + + let mut err = Error::new(kind, &err.kind().to_string()).set_source(err); + + if retryable { + err = err.set_temporary(); + } + + err +} diff --git a/core/src/services/sftp/utils.rs b/core/src/services/sftp/utils.rs index fc8bb8a6f64c..5e627b89adc1 100644 --- a/core/src/services/sftp/utils.rs +++ b/core/src/services/sftp/utils.rs @@ -15,83 +15,10 @@ // specific language governing permissions and limitations // under the License. -use std::io::SeekFrom; -use std::pin::Pin; -use std::task::Context; -use std::task::Poll; - -use async_compat::Compat; -use futures::AsyncBufRead; -use futures::AsyncRead; -use futures::AsyncSeek; -use openssh_sftp_client::file::File; -use openssh_sftp_client::file::TokioCompatFile; use openssh_sftp_client::metadata::MetaData as SftpMeta; -use crate::raw::oio; -use crate::raw::oio::FromFileReader; -use crate::raw::oio::ReadExt; use crate::EntryMode; use crate::Metadata; -use crate::Result; - -pub struct SftpReaderInner { - file: Pin>>, -} -pub type SftpReader = FromFileReader; - -impl SftpReaderInner { - pub async fn new(file: File) -> Self { - let file = Compat::new(file.into()); - Self { - file: Box::pin(file), - } - } -} - -impl SftpReader { - /// Create a new reader from a file, starting at the given offset and ending at the given offset. - pub async fn new(file: File, start: u64, end: u64) -> Result { - let file = SftpReaderInner::new(file).await; - let mut r = oio::into_read_from_file(file, start, end); - r.seek(SeekFrom::Start(0)).await?; - Ok(r) - } -} - -impl AsyncRead for SftpReaderInner { - fn poll_read( - self: Pin<&mut Self>, - cx: &mut Context, - buf: &mut [u8], - ) -> Poll> { - let this = self.get_mut(); - Pin::new(&mut this.file).poll_read(cx, buf) - } -} - -impl AsyncBufRead for SftpReaderInner { - fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { - let this = self.get_mut(); - Pin::new(&mut this.file).poll_fill_buf(cx) - } - - fn consume(self: Pin<&mut Self>, amt: usize) { - let this = self.get_mut(); - Pin::new(&mut this.file).consume(amt) - } -} - -impl AsyncSeek for SftpReaderInner { - fn poll_seek( - self: Pin<&mut Self>, - cx: &mut Context, - pos: SeekFrom, - ) -> Poll> { - let this = self.get_mut(); - Pin::new(&mut this.file).poll_seek(cx, pos) - } -} impl From for Metadata { fn from(meta: SftpMeta) -> Self { From 196620246e9f3e523c0721ac56ce885db3e07d7d Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Wed, 25 Oct 2023 20:31:26 +0800 Subject: [PATCH 07/46] Fix tests Signed-off-by: Xuanwo --- core/tests/behavior/blocking_copy.rs | 16 +++++++++++++--- core/tests/behavior/blocking_rename.rs | 16 +++++++++++++--- core/tests/behavior/copy.rs | 21 +++++++++++++++++---- core/tests/behavior/rename.rs | 16 +++++++++++++--- 4 files changed, 56 insertions(+), 13 deletions(-) diff --git a/core/tests/behavior/blocking_copy.rs b/core/tests/behavior/blocking_copy.rs index 822369a229f8..53b26e8f0d67 100644 --- a/core/tests/behavior/blocking_copy.rs +++ b/core/tests/behavior/blocking_copy.rs @@ -16,6 +16,7 @@ // under the License. use anyhow::Result; +use sha2::{Digest, Sha256}; use crate::*; @@ -50,7 +51,10 @@ pub fn test_blocking_copy_file(op: BlockingOperator) -> Result<()> { op.copy(&source_path, &target_path)?; let target_content = op.read(&target_path).expect("read must succeed"); - assert_eq!(target_content, source_content); + assert_eq!( + format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(&source_content)), + ); op.delete(&source_path).expect("delete must succeed"); op.delete(&target_path).expect("delete must succeed"); @@ -137,7 +141,10 @@ pub fn test_blocking_copy_nested(op: BlockingOperator) -> Result<()> { op.copy(&source_path, &target_path)?; let target_content = op.read(&target_path).expect("read must succeed"); - assert_eq!(target_content, source_content); + assert_eq!( + format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(&source_content)), + ); op.delete(&source_path).expect("delete must succeed"); op.delete(&target_path).expect("delete must succeed"); @@ -160,7 +167,10 @@ pub fn test_blocking_copy_overwrite(op: BlockingOperator) -> Result<()> { op.copy(&source_path, &target_path)?; let target_content = op.read(&target_path).expect("read must succeed"); - assert_eq!(target_content, source_content); + assert_eq!( + format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(&source_content)), + ); op.delete(&source_path).expect("delete must succeed"); op.delete(&target_path).expect("delete must succeed"); diff --git a/core/tests/behavior/blocking_rename.rs b/core/tests/behavior/blocking_rename.rs index 398a3f4dacae..24c1bc559b61 100644 --- a/core/tests/behavior/blocking_rename.rs +++ b/core/tests/behavior/blocking_rename.rs @@ -16,6 +16,7 @@ // under the License. use anyhow::Result; +use sha2::{Digest, Sha256}; use crate::*; @@ -53,7 +54,10 @@ pub fn test_blocking_rename_file(op: BlockingOperator) -> Result<()> { assert_eq!(err.kind(), ErrorKind::NotFound); let target_content = op.read(&target_path).expect("read must succeed"); - assert_eq!(target_content, source_content); + assert_eq!( + format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(&source_content)), + ); op.delete(&source_path).expect("delete must succeed"); op.delete(&target_path).expect("delete must succeed"); @@ -143,7 +147,10 @@ pub fn test_blocking_rename_nested(op: BlockingOperator) -> Result<()> { assert_eq!(err.kind(), ErrorKind::NotFound); let target_content = op.read(&target_path).expect("read must succeed"); - assert_eq!(target_content, source_content); + assert_eq!( + format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(&source_content)), + ); op.delete(&source_path).expect("delete must succeed"); op.delete(&target_path).expect("delete must succeed"); @@ -169,7 +176,10 @@ pub fn test_blocking_rename_overwrite(op: BlockingOperator) -> Result<()> { assert_eq!(err.kind(), ErrorKind::NotFound); let target_content = op.read(&target_path).expect("read must succeed"); - assert_eq!(target_content, source_content); + assert_eq!( + format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(&source_content)), + ); op.delete(&source_path).expect("delete must succeed"); op.delete(&target_path).expect("delete must succeed"); diff --git a/core/tests/behavior/copy.rs b/core/tests/behavior/copy.rs index b8a4573ba395..7d65a2e6c78f 100644 --- a/core/tests/behavior/copy.rs +++ b/core/tests/behavior/copy.rs @@ -16,6 +16,7 @@ // under the License. use anyhow::Result; +use sha2::{Digest, Sha256}; use crate::*; @@ -51,7 +52,10 @@ pub async fn test_copy_file_with_ascii_name(op: Operator) -> Result<()> { op.copy(&source_path, &target_path).await?; let target_content = op.read(&target_path).await.expect("read must succeed"); - assert_eq!(target_content, source_content); + assert_eq!( + format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(&source_content)), + ); op.delete(&source_path).await.expect("delete must succeed"); op.delete(&target_path).await.expect("delete must succeed"); @@ -68,7 +72,10 @@ pub async fn test_copy_file_with_non_ascii_name(op: Operator) -> Result<()> { op.copy(source_path, target_path).await?; let target_content = op.read(target_path).await.expect("read must succeed"); - assert_eq!(target_content, source_content); + assert_eq!( + format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(&source_content)), + ); op.delete(source_path).await.expect("delete must succeed"); op.delete(target_path).await.expect("delete must succeed"); @@ -159,7 +166,10 @@ pub async fn test_copy_nested(op: Operator) -> Result<()> { op.copy(&source_path, &target_path).await?; let target_content = op.read(&target_path).await.expect("read must succeed"); - assert_eq!(target_content, source_content); + assert_eq!( + format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(&source_content)), + ); op.delete(&source_path).await.expect("delete must succeed"); op.delete(&target_path).await.expect("delete must succeed"); @@ -182,7 +192,10 @@ pub async fn test_copy_overwrite(op: Operator) -> Result<()> { op.copy(&source_path, &target_path).await?; let target_content = op.read(&target_path).await.expect("read must succeed"); - assert_eq!(target_content, source_content); + assert_eq!( + format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(&source_content)), + ); op.delete(&source_path).await.expect("delete must succeed"); op.delete(&target_path).await.expect("delete must succeed"); diff --git a/core/tests/behavior/rename.rs b/core/tests/behavior/rename.rs index ac4974c8f401..8880e16c89ad 100644 --- a/core/tests/behavior/rename.rs +++ b/core/tests/behavior/rename.rs @@ -16,6 +16,7 @@ // under the License. use anyhow::Result; +use sha2::{Digest, Sha256}; use crate::*; @@ -53,7 +54,10 @@ pub async fn test_rename_file(op: Operator) -> Result<()> { assert_eq!(err.kind(), ErrorKind::NotFound); let target_content = op.read(&target_path).await.expect("read must succeed"); - assert_eq!(target_content, source_content); + assert_eq!( + format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(&source_content)), + ); op.delete(&source_path).await.expect("delete must succeed"); op.delete(&target_path).await.expect("delete must succeed"); @@ -147,7 +151,10 @@ pub async fn test_rename_nested(op: Operator) -> Result<()> { assert_eq!(err.kind(), ErrorKind::NotFound); let target_content = op.read(&target_path).await.expect("read must succeed"); - assert_eq!(target_content, source_content); + assert_eq!( + format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(&source_content)), + ); op.delete(&source_path).await.expect("delete must succeed"); op.delete(&target_path).await.expect("delete must succeed"); @@ -173,7 +180,10 @@ pub async fn test_rename_overwrite(op: Operator) -> Result<()> { assert_eq!(err.kind(), ErrorKind::NotFound); let target_content = op.read(&target_path).await.expect("read must succeed"); - assert_eq!(target_content, source_content); + assert_eq!( + format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(&source_content)), + ); op.delete(&source_path).await.expect("delete must succeed"); op.delete(&target_path).await.expect("delete must succeed"); From c431480b8a353fee103a3f616ec4364b341df96e Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Wed, 25 Oct 2023 21:01:44 +0800 Subject: [PATCH 08/46] Make clippy happy Signed-off-by: Xuanwo --- core/src/raw/mod.rs | 3 ++ core/src/raw/oio/read/futures_read.rs | 67 ++++++++++++++++++++++++++ core/src/raw/oio/read/mod.rs | 3 ++ core/src/raw/std_io_util.rs | 48 ++++++++++++++++++ core/tests/behavior/blocking_copy.rs | 6 +-- core/tests/behavior/blocking_rename.rs | 6 +-- core/tests/behavior/copy.rs | 8 +-- core/tests/behavior/rename.rs | 6 +-- 8 files changed, 134 insertions(+), 13 deletions(-) create mode 100644 core/src/raw/oio/read/futures_read.rs create mode 100644 core/src/raw/std_io_util.rs diff --git a/core/src/raw/mod.rs b/core/src/raw/mod.rs index 313c5d04f629..34f1a0c67052 100644 --- a/core/src/raw/mod.rs +++ b/core/src/raw/mod.rs @@ -59,6 +59,9 @@ pub use chrono_util::*; mod tokio_util; pub use tokio_util::*; +mod std_io_util; +pub use std_io_util::*; + // Expose as a pub mod to avoid confusing. pub mod adapters; pub mod oio; diff --git a/core/src/raw/oio/read/futures_read.rs b/core/src/raw/oio/read/futures_read.rs new file mode 100644 index 000000000000..03b52bbf74ee --- /dev/null +++ b/core/src/raw/oio/read/futures_read.rs @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::raw::*; +use crate::*; +use bytes::Bytes; +use futures::AsyncRead; +use futures::AsyncSeek; +use std::io::SeekFrom; +use std::pin::Pin; +use std::task::{Context, Poll}; + +/// FuturesReader implements [`oio::Read`] via [`AsyncRead`] + [`AsyncSeek`]. +pub struct FuturesReader { + inner: R, +} + +impl FuturesReader { + /// Create a new futures reader. + pub fn new(inner: R) -> Self { + Self { inner } + } +} + +impl oio::Read for FuturesReader +where + R: AsyncRead + AsyncSeek + Unpin + Send + Sync, +{ + fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { + Pin::new(&mut self.inner).poll_read(cx, buf).map_err(|err| { + new_std_io_error(err) + .with_operation(oio::ReadOperation::Read) + .with_context("source", "FuturesReader") + }) + } + + fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { + Pin::new(&mut self.inner).poll_seek(cx, pos).map_err(|err| { + new_std_io_error(err) + .with_operation(oio::ReadOperation::Seek) + .with_context("source", "FuturesReader") + }) + } + + fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { + let _ = cx; + + Poll::Ready(Some(Err(Error::new( + ErrorKind::Unsupported, + "FuturesReader doesn't support poll_next", + )))) + } +} diff --git a/core/src/raw/oio/read/mod.rs b/core/src/raw/oio/read/mod.rs index 78e888865458..9ccbebdc335c 100644 --- a/core/src/raw/oio/read/mod.rs +++ b/core/src/raw/oio/read/mod.rs @@ -36,3 +36,6 @@ pub use file_read::FileReader; mod into_read_from_stream; pub use into_read_from_stream::into_read_from_stream; pub use into_read_from_stream::FromStreamReader; + +mod futures_read; +pub use futures_read::FuturesReader; diff --git a/core/src/raw/std_io_util.rs b/core/src/raw/std_io_util.rs new file mode 100644 index 000000000000..a36e1e47f6cc --- /dev/null +++ b/core/src/raw/std_io_util.rs @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::*; + +/// Parse std io error into opendal::Error. +/// +/// # TODO +/// +/// Add `NotADirectory` and `IsADirectory` once they are stable. +/// +/// ref: +pub fn new_std_io_error(err: std::io::Error) -> Error { + use std::io::ErrorKind::*; + + let (kind, retryable) = match err.kind() { + NotFound => (ErrorKind::NotFound, false), + PermissionDenied => (ErrorKind::PermissionDenied, false), + AlreadyExists => (ErrorKind::AlreadyExists, false), + InvalidInput => (ErrorKind::InvalidInput, false), + Unsupported => (ErrorKind::Unsupported, false), + + Interrupted | UnexpectedEof | TimedOut | WouldBlock => (ErrorKind::Unexpected, true), + _ => (ErrorKind::Unexpected, true), + }; + + let mut err = Error::new(kind, &err.kind().to_string()).set_source(err); + + if retryable { + err = err.set_temporary(); + } + + err +} diff --git a/core/tests/behavior/blocking_copy.rs b/core/tests/behavior/blocking_copy.rs index 53b26e8f0d67..9188e35efcbe 100644 --- a/core/tests/behavior/blocking_copy.rs +++ b/core/tests/behavior/blocking_copy.rs @@ -52,7 +52,7 @@ pub fn test_blocking_copy_file(op: BlockingOperator) -> Result<()> { let target_content = op.read(&target_path).expect("read must succeed"); assert_eq!( - format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(target_content)), format!("{:x}", Sha256::digest(&source_content)), ); @@ -142,7 +142,7 @@ pub fn test_blocking_copy_nested(op: BlockingOperator) -> Result<()> { let target_content = op.read(&target_path).expect("read must succeed"); assert_eq!( - format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(target_content)), format!("{:x}", Sha256::digest(&source_content)), ); @@ -168,7 +168,7 @@ pub fn test_blocking_copy_overwrite(op: BlockingOperator) -> Result<()> { let target_content = op.read(&target_path).expect("read must succeed"); assert_eq!( - format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(target_content)), format!("{:x}", Sha256::digest(&source_content)), ); diff --git a/core/tests/behavior/blocking_rename.rs b/core/tests/behavior/blocking_rename.rs index 24c1bc559b61..56a71dead99b 100644 --- a/core/tests/behavior/blocking_rename.rs +++ b/core/tests/behavior/blocking_rename.rs @@ -55,7 +55,7 @@ pub fn test_blocking_rename_file(op: BlockingOperator) -> Result<()> { let target_content = op.read(&target_path).expect("read must succeed"); assert_eq!( - format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(target_content)), format!("{:x}", Sha256::digest(&source_content)), ); @@ -148,7 +148,7 @@ pub fn test_blocking_rename_nested(op: BlockingOperator) -> Result<()> { let target_content = op.read(&target_path).expect("read must succeed"); assert_eq!( - format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(target_content)), format!("{:x}", Sha256::digest(&source_content)), ); @@ -177,7 +177,7 @@ pub fn test_blocking_rename_overwrite(op: BlockingOperator) -> Result<()> { let target_content = op.read(&target_path).expect("read must succeed"); assert_eq!( - format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(target_content)), format!("{:x}", Sha256::digest(&source_content)), ); diff --git a/core/tests/behavior/copy.rs b/core/tests/behavior/copy.rs index 7d65a2e6c78f..501ea33b9352 100644 --- a/core/tests/behavior/copy.rs +++ b/core/tests/behavior/copy.rs @@ -53,7 +53,7 @@ pub async fn test_copy_file_with_ascii_name(op: Operator) -> Result<()> { let target_content = op.read(&target_path).await.expect("read must succeed"); assert_eq!( - format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(target_content)), format!("{:x}", Sha256::digest(&source_content)), ); @@ -73,7 +73,7 @@ pub async fn test_copy_file_with_non_ascii_name(op: Operator) -> Result<()> { let target_content = op.read(target_path).await.expect("read must succeed"); assert_eq!( - format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(target_content)), format!("{:x}", Sha256::digest(&source_content)), ); @@ -167,7 +167,7 @@ pub async fn test_copy_nested(op: Operator) -> Result<()> { let target_content = op.read(&target_path).await.expect("read must succeed"); assert_eq!( - format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(target_content)), format!("{:x}", Sha256::digest(&source_content)), ); @@ -193,7 +193,7 @@ pub async fn test_copy_overwrite(op: Operator) -> Result<()> { let target_content = op.read(&target_path).await.expect("read must succeed"); assert_eq!( - format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(target_content)), format!("{:x}", Sha256::digest(&source_content)), ); diff --git a/core/tests/behavior/rename.rs b/core/tests/behavior/rename.rs index 8880e16c89ad..9ed9172e32fc 100644 --- a/core/tests/behavior/rename.rs +++ b/core/tests/behavior/rename.rs @@ -55,7 +55,7 @@ pub async fn test_rename_file(op: Operator) -> Result<()> { let target_content = op.read(&target_path).await.expect("read must succeed"); assert_eq!( - format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(target_content)), format!("{:x}", Sha256::digest(&source_content)), ); @@ -152,7 +152,7 @@ pub async fn test_rename_nested(op: Operator) -> Result<()> { let target_content = op.read(&target_path).await.expect("read must succeed"); assert_eq!( - format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(target_content)), format!("{:x}", Sha256::digest(&source_content)), ); @@ -181,7 +181,7 @@ pub async fn test_rename_overwrite(op: Operator) -> Result<()> { let target_content = op.read(&target_path).await.expect("read must succeed"); assert_eq!( - format!("{:x}", Sha256::digest(&target_content)), + format!("{:x}", Sha256::digest(target_content)), format!("{:x}", Sha256::digest(&source_content)), ); From 048cce063e3ef2c1e13d27717f6b5ebae9f1f9f0 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Wed, 25 Oct 2023 21:08:20 +0800 Subject: [PATCH 09/46] Remove parse_io_error Signed-off-by: Xuanwo --- core/src/layers/madsim.rs | 4 -- core/src/services/fs/backend.rs | 81 +++++++++++++------------- core/src/services/fs/error.rs | 41 ------------- core/src/services/fs/mod.rs | 1 - core/src/services/fs/pager.rs | 9 ++- core/src/services/fs/writer.rs | 17 +++--- core/src/services/hdfs/backend.rs | 54 +++++++++-------- core/src/services/hdfs/error.rs | 45 -------------- core/src/services/hdfs/mod.rs | 1 - core/src/services/hdfs/writer.rs | 11 ++-- core/src/services/memcached/ascii.rs | 26 ++++----- core/src/services/memcached/backend.rs | 6 +- core/src/services/sftp/backend.rs | 26 +-------- core/src/services/sftp/error.rs | 1 + core/src/services/sftp/writer.rs | 9 ++- 15 files changed, 113 insertions(+), 219 deletions(-) delete mode 100644 core/src/services/fs/error.rs delete mode 100644 core/src/services/hdfs/error.rs diff --git a/core/src/layers/madsim.rs b/core/src/layers/madsim.rs index d10451774ac2..6e2a3b6a36c1 100644 --- a/core/src/layers/madsim.rs +++ b/core/src/layers/madsim.rs @@ -346,10 +346,6 @@ impl oio::Page for MadsimPager { } } -fn parse_io_error(e: std::io::Error) -> Error { - Error::new(ErrorKind::Unexpected, "madsim error") -} - /// A simulated server.This an experimental feature, docs are not ready yet. #[derive(Default, Clone)] pub struct MadsimServer; diff --git a/core/src/services/fs/backend.rs b/core/src/services/fs/backend.rs index 9af013cf1832..524c4ef5e578 100644 --- a/core/src/services/fs/backend.rs +++ b/core/src/services/fs/backend.rs @@ -26,7 +26,6 @@ use chrono::DateTime; use log::debug; use uuid::Uuid; -use super::error::parse_io_error; use super::pager::FsPager; use super::writer::FsWriter; use crate::raw::*; @@ -211,7 +210,7 @@ impl FsBackend { })? .to_path_buf(); - std::fs::create_dir_all(parent).map_err(parse_io_error)?; + std::fs::create_dir_all(parent).map_err(new_std_io_error)?; Ok(p) } @@ -239,7 +238,7 @@ impl FsBackend { tokio::fs::create_dir_all(&parent) .await - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; Ok(p) } @@ -290,7 +289,7 @@ impl Accessor for FsBackend { tokio::fs::create_dir_all(&p) .await - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; Ok(RpCreateDir::default()) } @@ -313,11 +312,11 @@ impl Accessor for FsBackend { .read(true) .open(&p) .await - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; if self.enable_path_check { // Get fs metadata of file at given path, ensuring it is not a false-positive due to slash normalization. - let meta = f.metadata().await.map_err(parse_io_error)?; + let meta = f.metadata().await.map_err(new_std_io_error)?; if meta.is_dir() != path.ends_with('/') { return Err(Error::new( ErrorKind::NotFound, @@ -338,21 +337,21 @@ impl Accessor for FsBackend { let start = f .seek(SeekFrom::End(size as i64)) .await - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; (start, Some(start + size)) } (Some(offset), None) => { let start = f .seek(SeekFrom::Start(offset)) .await - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; (start, None) } (Some(offset), Some(size)) => { let start = f .seek(SeekFrom::Start(offset)) .await - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; (start, Some(size)) } }; @@ -372,7 +371,7 @@ impl Accessor for FsBackend { if op.append() && tokio::fs::try_exists(&target_path) .await - .map_err(parse_io_error)? + .map_err(new_std_io_error)? { (target_path, None) } else { @@ -395,7 +394,7 @@ impl Accessor for FsBackend { let f = open_options .open(tmp_path.as_ref().unwrap_or(&target_path)) .await - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; Ok((RpWrite::new(), FsWriter::new(target_path, tmp_path, f))) } @@ -404,11 +403,11 @@ impl Accessor for FsBackend { let from = self.root.join(from.trim_end_matches('/')); // try to get the metadata of the source file to ensure it exists - tokio::fs::metadata(&from).await.map_err(parse_io_error)?; + tokio::fs::metadata(&from).await.map_err(new_std_io_error)?; let to = Self::ensure_write_abs_path(&self.root, to.trim_end_matches('/')).await?; - tokio::fs::copy(from, to).await.map_err(parse_io_error)?; + tokio::fs::copy(from, to).await.map_err(new_std_io_error)?; Ok(RpCopy::default()) } @@ -417,11 +416,13 @@ impl Accessor for FsBackend { let from = self.root.join(from.trim_end_matches('/')); // try to get the metadata of the source file to ensure it exists - tokio::fs::metadata(&from).await.map_err(parse_io_error)?; + tokio::fs::metadata(&from).await.map_err(new_std_io_error)?; let to = Self::ensure_write_abs_path(&self.root, to.trim_end_matches('/')).await?; - tokio::fs::rename(from, to).await.map_err(parse_io_error)?; + tokio::fs::rename(from, to) + .await + .map_err(new_std_io_error)?; Ok(RpRename::default()) } @@ -429,7 +430,7 @@ impl Accessor for FsBackend { async fn stat(&self, path: &str, _: OpStat) -> Result { let p = self.root.join(path.trim_end_matches('/')); - let meta = tokio::fs::metadata(&p).await.map_err(parse_io_error)?; + let meta = tokio::fs::metadata(&p).await.map_err(new_std_io_error)?; if self.enable_path_check && meta.is_dir() != path.ends_with('/') { return Err(Error::new( @@ -450,7 +451,7 @@ impl Accessor for FsBackend { .with_last_modified( meta.modified() .map(DateTime::from) - .map_err(parse_io_error)?, + .map_err(new_std_io_error)?, ); Ok(RpStat::new(m)) @@ -464,15 +465,15 @@ impl Accessor for FsBackend { match meta { Ok(meta) => { if meta.is_dir() { - tokio::fs::remove_dir(&p).await.map_err(parse_io_error)?; + tokio::fs::remove_dir(&p).await.map_err(new_std_io_error)?; } else { - tokio::fs::remove_file(&p).await.map_err(parse_io_error)?; + tokio::fs::remove_file(&p).await.map_err(new_std_io_error)?; } Ok(RpDelete::default()) } Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(RpDelete::default()), - Err(err) => Err(parse_io_error(err)), + Err(err) => Err(new_std_io_error(err)), } } @@ -485,7 +486,7 @@ impl Accessor for FsBackend { return if e.kind() == std::io::ErrorKind::NotFound { Ok((RpList::default(), None)) } else { - Err(parse_io_error(e)) + Err(new_std_io_error(e)) }; } }; @@ -498,7 +499,7 @@ impl Accessor for FsBackend { fn blocking_create_dir(&self, path: &str, _: OpCreateDir) -> Result { let p = self.root.join(path.trim_end_matches('/')); - std::fs::create_dir_all(p).map_err(parse_io_error)?; + std::fs::create_dir_all(p).map_err(new_std_io_error)?; Ok(RpCreateDir::default()) } @@ -511,11 +512,11 @@ impl Accessor for FsBackend { let mut f = std::fs::OpenOptions::new() .read(true) .open(p) - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; if self.enable_path_check { // Get fs metadata of file at given path, ensuring it is not a false-positive due to slash normalization. - let meta = f.metadata().map_err(parse_io_error)?; + let meta = f.metadata().map_err(new_std_io_error)?; if meta.is_dir() != path.ends_with('/') { return Err(Error::new( ErrorKind::NotFound, @@ -533,15 +534,17 @@ impl Accessor for FsBackend { let (start, end) = match (args.range().offset(), args.range().size()) { (None, None) => (0, None), (None, Some(size)) => { - let start = f.seek(SeekFrom::End(size as i64)).map_err(parse_io_error)?; + let start = f + .seek(SeekFrom::End(size as i64)) + .map_err(new_std_io_error)?; (start, Some(start + size)) } (Some(offset), None) => { - let start = f.seek(SeekFrom::Start(offset)).map_err(parse_io_error)?; + let start = f.seek(SeekFrom::Start(offset)).map_err(new_std_io_error)?; (start, None) } (Some(offset), Some(size)) => { - let start = f.seek(SeekFrom::Start(offset)).map_err(parse_io_error)?; + let start = f.seek(SeekFrom::Start(offset)).map_err(new_std_io_error)?; (start, Some(size)) } }; @@ -561,7 +564,7 @@ impl Accessor for FsBackend { if op.append() && Path::new(&target_path) .try_exists() - .map_err(parse_io_error)? + .map_err(new_std_io_error)? { (target_path, None) } else { @@ -584,7 +587,7 @@ impl Accessor for FsBackend { let f = f .open(tmp_path.as_ref().unwrap_or(&target_path)) - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; Ok((RpWrite::new(), FsWriter::new(target_path, tmp_path, f))) } @@ -593,11 +596,11 @@ impl Accessor for FsBackend { let from = self.root.join(from.trim_end_matches('/')); // try to get the metadata of the source file to ensure it exists - std::fs::metadata(&from).map_err(parse_io_error)?; + std::fs::metadata(&from).map_err(new_std_io_error)?; let to = Self::blocking_ensure_write_abs_path(&self.root, to.trim_end_matches('/'))?; - std::fs::copy(from, to).map_err(parse_io_error)?; + std::fs::copy(from, to).map_err(new_std_io_error)?; Ok(RpCopy::default()) } @@ -606,11 +609,11 @@ impl Accessor for FsBackend { let from = self.root.join(from.trim_end_matches('/')); // try to get the metadata of the source file to ensure it exists - std::fs::metadata(&from).map_err(parse_io_error)?; + std::fs::metadata(&from).map_err(new_std_io_error)?; let to = Self::blocking_ensure_write_abs_path(&self.root, to.trim_end_matches('/'))?; - std::fs::rename(from, to).map_err(parse_io_error)?; + std::fs::rename(from, to).map_err(new_std_io_error)?; Ok(RpRename::default()) } @@ -618,7 +621,7 @@ impl Accessor for FsBackend { fn blocking_stat(&self, path: &str, _: OpStat) -> Result { let p = self.root.join(path.trim_end_matches('/')); - let meta = std::fs::metadata(p).map_err(parse_io_error)?; + let meta = std::fs::metadata(p).map_err(new_std_io_error)?; if self.enable_path_check && meta.is_dir() != path.ends_with('/') { return Err(Error::new( @@ -639,7 +642,7 @@ impl Accessor for FsBackend { .with_last_modified( meta.modified() .map(DateTime::from) - .map_err(parse_io_error)?, + .map_err(new_std_io_error)?, ); Ok(RpStat::new(m)) @@ -653,15 +656,15 @@ impl Accessor for FsBackend { match meta { Ok(meta) => { if meta.is_dir() { - std::fs::remove_dir(&p).map_err(parse_io_error)?; + std::fs::remove_dir(&p).map_err(new_std_io_error)?; } else { - std::fs::remove_file(&p).map_err(parse_io_error)?; + std::fs::remove_file(&p).map_err(new_std_io_error)?; } Ok(RpDelete::default()) } Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(RpDelete::default()), - Err(err) => Err(parse_io_error(err)), + Err(err) => Err(new_std_io_error(err)), } } @@ -674,7 +677,7 @@ impl Accessor for FsBackend { return if e.kind() == std::io::ErrorKind::NotFound { Ok((RpList::default(), None)) } else { - Err(parse_io_error(e)) + Err(new_std_io_error(e)) }; } }; diff --git a/core/src/services/fs/error.rs b/core/src/services/fs/error.rs deleted file mode 100644 index de9f710ce773..000000000000 --- a/core/src/services/fs/error.rs +++ /dev/null @@ -1,41 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::io; - -use crate::Error; -use crate::ErrorKind; - -/// Parse all io related errors. -pub fn parse_io_error(err: io::Error) -> Error { - use io::ErrorKind::*; - - let (kind, retryable) = match err.kind() { - NotFound => (ErrorKind::NotFound, false), - PermissionDenied => (ErrorKind::PermissionDenied, false), - Interrupted | UnexpectedEof | TimedOut | WouldBlock => (ErrorKind::Unexpected, true), - _ => (ErrorKind::Unexpected, true), - }; - - let mut err = Error::new(kind, &err.kind().to_string()).set_source(err); - - if retryable { - err = err.set_temporary(); - } - - err -} diff --git a/core/src/services/fs/mod.rs b/core/src/services/fs/mod.rs index aa2a5fca1f65..28aae0814c1c 100644 --- a/core/src/services/fs/mod.rs +++ b/core/src/services/fs/mod.rs @@ -18,6 +18,5 @@ mod backend; pub use backend::FsBuilder as Fs; -mod error; mod pager; mod writer; diff --git a/core/src/services/fs/pager.rs b/core/src/services/fs/pager.rs index f15ac8eaa9cb..1c1e1fcce9ac 100644 --- a/core/src/services/fs/pager.rs +++ b/core/src/services/fs/pager.rs @@ -20,7 +20,6 @@ use std::path::PathBuf; use async_trait::async_trait; -use super::error::parse_io_error; use crate::raw::*; use crate::EntryMode; use crate::Metadata; @@ -49,7 +48,7 @@ impl oio::Page for FsPager { let mut oes: Vec = Vec::with_capacity(self.size); for _ in 0..self.size { - let de = match self.rd.next_entry().await.map_err(parse_io_error)? { + let de = match self.rd.next_entry().await.map_err(new_std_io_error)? { Some(de) => de, None => break, }; @@ -67,7 +66,7 @@ impl oio::Page for FsPager { // (no extra system calls needed), but some Unix platforms may // require the equivalent call to symlink_metadata to learn about // the target file type. - let file_type = de.file_type().await.map_err(parse_io_error)?; + let file_type = de.file_type().await.map_err(new_std_io_error)?; let d = if file_type.is_file() { oio::Entry::new(&rel_path, Metadata::new(EntryMode::FILE)) @@ -91,7 +90,7 @@ impl oio::BlockingPage for FsPager { for _ in 0..self.size { let de = match self.rd.next() { - Some(de) => de.map_err(parse_io_error)?, + Some(de) => de.map_err(new_std_io_error)?, None => break, }; @@ -108,7 +107,7 @@ impl oio::BlockingPage for FsPager { // (no extra system calls needed), but some Unix platforms may // require the equivalent call to symlink_metadata to learn about // the target file type. - let file_type = de.file_type().map_err(parse_io_error)?; + let file_type = de.file_type().map_err(new_std_io_error)?; let d = if file_type.is_file() { oio::Entry::new(&rel_path, Metadata::new(EntryMode::FILE)) diff --git a/core/src/services/fs/writer.rs b/core/src/services/fs/writer.rs index d1283d4ca8c1..bd41f5611338 100644 --- a/core/src/services/fs/writer.rs +++ b/core/src/services/fs/writer.rs @@ -28,7 +28,6 @@ use futures::FutureExt; use tokio::io::AsyncWrite; use tokio::io::AsyncWriteExt; -use super::error::parse_io_error; use crate::raw::*; use crate::*; @@ -64,7 +63,7 @@ impl oio::Write for FsWriter { Pin::new(f) .poll_write_vectored(cx, &bs.vectored_chunk()) - .map_err(parse_io_error) + .map_err(new_std_io_error) } fn poll_close(&mut self, cx: &mut Context<'_>) -> Poll> { @@ -79,13 +78,13 @@ impl oio::Write for FsWriter { let tmp_path = self.tmp_path.clone(); let target_path = self.target_path.clone(); self.fut = Some(Box::pin(async move { - f.flush().await.map_err(parse_io_error)?; - f.sync_all().await.map_err(parse_io_error)?; + f.flush().await.map_err(new_std_io_error)?; + f.sync_all().await.map_err(new_std_io_error)?; if let Some(tmp_path) = &tmp_path { tokio::fs::rename(tmp_path, &target_path) .await - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; } Ok(()) @@ -107,7 +106,7 @@ impl oio::Write for FsWriter { if let Some(tmp_path) = &tmp_path { tokio::fs::remove_file(tmp_path) .await - .map_err(parse_io_error) + .map_err(new_std_io_error) } else { Err(Error::new( ErrorKind::Unsupported, @@ -124,15 +123,15 @@ impl oio::BlockingWrite for FsWriter { let f = self.f.as_mut().expect("FsWriter must be initialized"); f.write_vectored(&bs.vectored_chunk()) - .map_err(parse_io_error) + .map_err(new_std_io_error) } fn close(&mut self) -> Result<()> { if let Some(f) = self.f.take() { - f.sync_all().map_err(parse_io_error)?; + f.sync_all().map_err(new_std_io_error)?; if let Some(tmp_path) = &self.tmp_path { - std::fs::rename(tmp_path, &self.target_path).map_err(parse_io_error)?; + std::fs::rename(tmp_path, &self.target_path).map_err(new_std_io_error)?; } } diff --git a/core/src/services/hdfs/backend.rs b/core/src/services/hdfs/backend.rs index 4d093656bb53..a7fb23f0cfc0 100644 --- a/core/src/services/hdfs/backend.rs +++ b/core/src/services/hdfs/backend.rs @@ -25,7 +25,6 @@ use std::sync::Arc; use async_trait::async_trait; use log::debug; -use super::error::parse_io_error; use super::pager::HdfsPager; use super::writer::HdfsWriter; use crate::raw::*; @@ -127,14 +126,14 @@ impl Builder for HdfsBuilder { builder = builder.with_user(user.as_str()); } - let client = builder.connect().map_err(parse_io_error)?; + let client = builder.connect().map_err(new_std_io_error)?; // Create root dir if not exist. if let Err(e) = client.metadata(&root) { if e.kind() == io::ErrorKind::NotFound { debug!("root {} is not exist, creating now", root); - client.create_dir(&root).map_err(parse_io_error)? + client.create_dir(&root).map_err(new_std_io_error)? } } @@ -198,7 +197,7 @@ impl Accessor for HdfsBackend { async fn create_dir(&self, path: &str, _: OpCreateDir) -> Result { let p = build_rooted_abs_path(&self.root, path); - self.client.create_dir(&p).map_err(parse_io_error)?; + self.client.create_dir(&p).map_err(new_std_io_error)?; Ok(RpCreateDir::default()) } @@ -214,7 +213,7 @@ impl Accessor for HdfsBackend { .read(true) .async_open(&p) .await - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; let (start, end) = match (args.range().offset(), args.range().size()) { (None, None) => (0, None), @@ -222,21 +221,21 @@ impl Accessor for HdfsBackend { let start = f .seek(SeekFrom::End(size as i64)) .await - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; (start, Some(start + size)) } (Some(offset), None) => { let start = f .seek(SeekFrom::Start(offset)) .await - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; (start, None) } (Some(offset), Some(size)) => { let start = f .seek(SeekFrom::Start(offset)) .await - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; (start, Some(size)) } }; @@ -262,7 +261,7 @@ impl Accessor for HdfsBackend { self.client .create_dir(&parent.to_string_lossy()) - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; let mut open_options = self.client.open_file(); open_options.create(true); @@ -272,7 +271,10 @@ impl Accessor for HdfsBackend { open_options.write(true); } - let f = open_options.async_open(&p).await.map_err(parse_io_error)?; + let f = open_options + .async_open(&p) + .await + .map_err(new_std_io_error)?; Ok((RpWrite::new(), HdfsWriter::new(f))) } @@ -280,7 +282,7 @@ impl Accessor for HdfsBackend { async fn stat(&self, path: &str, _: OpStat) -> Result { let p = build_rooted_abs_path(&self.root, path); - let meta = self.client.metadata(&p).map_err(parse_io_error)?; + let meta = self.client.metadata(&p).map_err(new_std_io_error)?; let mode = if meta.is_dir() { EntryMode::DIR @@ -305,7 +307,7 @@ impl Accessor for HdfsBackend { return if err.kind() == io::ErrorKind::NotFound { Ok(RpDelete::default()) } else { - Err(parse_io_error(err)) + Err(new_std_io_error(err)) }; } @@ -318,7 +320,7 @@ impl Accessor for HdfsBackend { self.client.remove_file(&p) }; - result.map_err(parse_io_error)?; + result.map_err(new_std_io_error)?; Ok(RpDelete::default()) } @@ -332,7 +334,7 @@ impl Accessor for HdfsBackend { return if e.kind() == io::ErrorKind::NotFound { Ok((RpList::default(), None)) } else { - Err(parse_io_error(e)) + Err(new_std_io_error(e)) } } }; @@ -345,7 +347,7 @@ impl Accessor for HdfsBackend { fn blocking_create_dir(&self, path: &str, _: OpCreateDir) -> Result { let p = build_rooted_abs_path(&self.root, path); - self.client.create_dir(&p).map_err(parse_io_error)?; + self.client.create_dir(&p).map_err(new_std_io_error)?; Ok(RpCreateDir::default()) } @@ -360,20 +362,22 @@ impl Accessor for HdfsBackend { .open_file() .read(true) .open(&p) - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; let (start, end) = match (args.range().offset(), args.range().size()) { (None, None) => (0, None), (None, Some(size)) => { - let start = f.seek(SeekFrom::End(size as i64)).map_err(parse_io_error)?; + let start = f + .seek(SeekFrom::End(size as i64)) + .map_err(new_std_io_error)?; (start, Some(start + size)) } (Some(offset), None) => { - let start = f.seek(SeekFrom::Start(offset)).map_err(parse_io_error)?; + let start = f.seek(SeekFrom::Start(offset)).map_err(new_std_io_error)?; (start, None) } (Some(offset), Some(size)) => { - let start = f.seek(SeekFrom::Start(offset)).map_err(parse_io_error)?; + let start = f.seek(SeekFrom::Start(offset)).map_err(new_std_io_error)?; (start, Some(size)) } }; @@ -399,7 +403,7 @@ impl Accessor for HdfsBackend { self.client .create_dir(&parent.to_string_lossy()) - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; let f = self .client @@ -407,7 +411,7 @@ impl Accessor for HdfsBackend { .create(true) .write(true) .open(&p) - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; Ok((RpWrite::new(), HdfsWriter::new(f))) } @@ -415,7 +419,7 @@ impl Accessor for HdfsBackend { fn blocking_stat(&self, path: &str, _: OpStat) -> Result { let p = build_rooted_abs_path(&self.root, path); - let meta = self.client.metadata(&p).map_err(parse_io_error)?; + let meta = self.client.metadata(&p).map_err(new_std_io_error)?; let mode = if meta.is_dir() { EntryMode::DIR @@ -440,7 +444,7 @@ impl Accessor for HdfsBackend { return if err.kind() == io::ErrorKind::NotFound { Ok(RpDelete::default()) } else { - Err(parse_io_error(err)) + Err(new_std_io_error(err)) }; } @@ -453,7 +457,7 @@ impl Accessor for HdfsBackend { self.client.remove_file(&p) }; - result.map_err(parse_io_error)?; + result.map_err(new_std_io_error)?; Ok(RpDelete::default()) } @@ -467,7 +471,7 @@ impl Accessor for HdfsBackend { return if e.kind() == io::ErrorKind::NotFound { Ok((RpList::default(), None)) } else { - Err(parse_io_error(e)) + Err(new_std_io_error(e)) } } }; diff --git a/core/src/services/hdfs/error.rs b/core/src/services/hdfs/error.rs deleted file mode 100644 index f97fada23a2b..000000000000 --- a/core/src/services/hdfs/error.rs +++ /dev/null @@ -1,45 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::io; - -use crate::Error; -use crate::ErrorKind; - -/// Parse all path related errors. -/// -/// ## Notes -/// -/// Skip utf-8 check to allow invalid path input. -pub fn parse_io_error(err: io::Error) -> Error { - use io::ErrorKind::*; - - let (kind, retryable) = match err.kind() { - NotFound => (ErrorKind::NotFound, false), - PermissionDenied => (ErrorKind::PermissionDenied, false), - Interrupted | UnexpectedEof | TimedOut | WouldBlock => (ErrorKind::Unexpected, true), - _ => (ErrorKind::Unexpected, true), - }; - - let mut err = Error::new(kind, &err.kind().to_string()).set_source(err); - - if retryable { - err = err.set_temporary(); - } - - err -} diff --git a/core/src/services/hdfs/mod.rs b/core/src/services/hdfs/mod.rs index c1e98a3b6288..996a654fac3d 100644 --- a/core/src/services/hdfs/mod.rs +++ b/core/src/services/hdfs/mod.rs @@ -18,6 +18,5 @@ mod backend; pub use backend::HdfsBuilder as Hdfs; -mod error; mod pager; mod writer; diff --git a/core/src/services/hdfs/writer.rs b/core/src/services/hdfs/writer.rs index a436a8416e4d..4990df40a99e 100644 --- a/core/src/services/hdfs/writer.rs +++ b/core/src/services/hdfs/writer.rs @@ -23,7 +23,6 @@ use std::task::Poll; use async_trait::async_trait; use futures::AsyncWrite; -use super::error::parse_io_error; use crate::raw::*; use crate::*; @@ -42,7 +41,7 @@ impl oio::Write for HdfsWriter { fn poll_write(&mut self, cx: &mut Context<'_>, bs: &dyn oio::WriteBuf) -> Poll> { Pin::new(&mut self.f) .poll_write(cx, bs.chunk()) - .map_err(parse_io_error) + .map_err(new_std_io_error) } fn poll_abort(&mut self, _: &mut Context<'_>) -> Poll> { @@ -53,17 +52,19 @@ impl oio::Write for HdfsWriter { } fn poll_close(&mut self, cx: &mut Context<'_>) -> Poll> { - Pin::new(&mut self.f).poll_close(cx).map_err(parse_io_error) + Pin::new(&mut self.f) + .poll_close(cx) + .map_err(new_std_io_error) } } impl oio::BlockingWrite for HdfsWriter { fn write(&mut self, bs: &dyn oio::WriteBuf) -> Result { - self.f.write(bs.chunk()).map_err(parse_io_error) + self.f.write(bs.chunk()).map_err(new_std_io_error) } fn close(&mut self) -> Result<()> { - self.f.flush().map_err(parse_io_error)?; + self.f.flush().map_err(new_std_io_error)?; Ok(()) } diff --git a/core/src/services/memcached/ascii.rs b/core/src/services/memcached/ascii.rs index 12ba7589d70c..6a790889396c 100644 --- a/core/src/services/memcached/ascii.rs +++ b/core/src/services/memcached/ascii.rs @@ -21,7 +21,7 @@ use tokio::io::AsyncWriteExt; use tokio::io::BufReader; use tokio::net::TcpStream; -use super::backend::parse_io_error; +use crate::raw::*; use crate::*; pub struct Connection { @@ -43,8 +43,8 @@ impl Connection { writer .write_all(&[b"get ", key.as_bytes(), b"\r\n"].concat()) .await - .map_err(parse_io_error)?; - writer.flush().await.map_err(parse_io_error)?; + .map_err(new_std_io_error)?; + writer.flush().await.map_err(new_std_io_error)?; // Read response header let header = self.read_header().await?; @@ -71,7 +71,7 @@ impl Connection { self.io .read_exact(&mut buffer) .await - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; // Read the trailing header self.read_line().await?; // \r\n @@ -85,10 +85,10 @@ impl Connection { self.io .write_all(header.as_bytes()) .await - .map_err(parse_io_error)?; - self.io.write_all(val).await.map_err(parse_io_error)?; - self.io.write_all(b"\r\n").await.map_err(parse_io_error)?; - self.io.flush().await.map_err(parse_io_error)?; + .map_err(new_std_io_error)?; + self.io.write_all(val).await.map_err(new_std_io_error)?; + self.io.write_all(b"\r\n").await.map_err(new_std_io_error)?; + self.io.flush().await.map_err(new_std_io_error)?; // Read response header let header = self.read_header().await?; @@ -110,8 +110,8 @@ impl Connection { self.io .write_all(header.as_bytes()) .await - .map_err(parse_io_error)?; - self.io.flush().await.map_err(parse_io_error)?; + .map_err(new_std_io_error)?; + self.io.flush().await.map_err(new_std_io_error)?; // Read response header let header = self.read_header().await?; @@ -132,8 +132,8 @@ impl Connection { self.io .write_all(b"version\r\n") .await - .map_err(parse_io_error)?; - self.io.flush().await.map_err(parse_io_error)?; + .map_err(new_std_io_error)?; + self.io.flush().await.map_err(new_std_io_error)?; // Read response header let header = self.read_header().await?; @@ -151,7 +151,7 @@ impl Connection { async fn read_line(&mut self) -> Result<&[u8]> { let Self { io, buf } = self; buf.clear(); - io.read_until(b'\n', buf).await.map_err(parse_io_error)?; + io.read_until(b'\n', buf).await.map_err(new_std_io_error)?; if buf.last().copied() != Some(b'\n') { return Err(Error::new( ErrorKind::ContentIncomplete, diff --git a/core/src/services/memcached/backend.rs b/core/src/services/memcached/backend.rs index 2731a70a15ee..91c127de8a77 100644 --- a/core/src/services/memcached/backend.rs +++ b/core/src/services/memcached/backend.rs @@ -250,7 +250,7 @@ impl bb8::ManageConnection for MemcacheConnectionManager { async fn connect(&self) -> std::result::Result { let conn = TcpStream::connect(&self.address) .await - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; Ok(ascii::Connection::new(conn)) } @@ -262,7 +262,3 @@ impl bb8::ManageConnection for MemcacheConnectionManager { false } } - -pub fn parse_io_error(err: std::io::Error) -> Error { - Error::new(ErrorKind::Unexpected, &err.kind().to_string()).set_source(err) -} diff --git a/core/src/services/sftp/backend.rs b/core/src/services/sftp/backend.rs index 774221534543..258c77066aef 100644 --- a/core/src/services/sftp/backend.rs +++ b/core/src/services/sftp/backend.rs @@ -303,21 +303,21 @@ impl Accessor for SftpBackend { let start = f .seek(SeekFrom::End(size as i64)) .await - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; (start, Some(start + size)) } (Some(offset), None) => { let start = f .seek(SeekFrom::Start(offset)) .await - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; (start, None) } (Some(offset), Some(size)) => { let start = f .seek(SeekFrom::Start(offset)) .await - .map_err(parse_io_error)?; + .map_err(new_std_io_error)?; (start, Some(size)) } }; @@ -558,23 +558,3 @@ async fn connect_sftp( Ok(sftp) } - -/// Parse all io related errors. -pub fn parse_io_error(err: std::io::Error) -> Error { - use std::io::ErrorKind::*; - - let (kind, retryable) = match err.kind() { - NotFound => (ErrorKind::NotFound, false), - PermissionDenied => (ErrorKind::PermissionDenied, false), - Interrupted | UnexpectedEof | TimedOut | WouldBlock => (ErrorKind::Unexpected, true), - _ => (ErrorKind::Unexpected, true), - }; - - let mut err = Error::new(kind, &err.kind().to_string()).set_source(err); - - if retryable { - err = err.set_temporary(); - } - - err -} diff --git a/core/src/services/sftp/error.rs b/core/src/services/sftp/error.rs index 9c0124e77576..adb8d079e924 100644 --- a/core/src/services/sftp/error.rs +++ b/core/src/services/sftp/error.rs @@ -45,6 +45,7 @@ impl From for Error { } } +/// REMOVE ME: it's not allowed to impl for Error. impl From for Error { fn from(e: SshError) -> Self { Error::new(ErrorKind::Unexpected, "ssh error").set_source(e) diff --git a/core/src/services/sftp/writer.rs b/core/src/services/sftp/writer.rs index c6a2aa6bef33..c80d8de9b5c5 100644 --- a/core/src/services/sftp/writer.rs +++ b/core/src/services/sftp/writer.rs @@ -45,11 +45,14 @@ impl oio::Write for SftpWriter { self.file .as_mut() .poll_write(cx, bs.chunk()) - .map_err(parse_io_error) + .map_err(new_std_io_error) } fn poll_close(&mut self, cx: &mut Context<'_>) -> Poll> { - self.file.as_mut().poll_shutdown(cx).map_err(parse_io_error) + self.file + .as_mut() + .poll_shutdown(cx) + .map_err(new_std_io_error) } fn poll_abort(&mut self, _: &mut Context<'_>) -> Poll> { @@ -60,6 +63,6 @@ impl oio::Write for SftpWriter { } } -fn parse_io_error(err: std::io::Error) -> Error { +fn new_std_io_error(err: std::io::Error) -> Error { Error::new(ErrorKind::Unexpected, "read from sftp").set_source(err) } From 08d757a4d3895ef084edc302f0b1079828e5928c Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Wed, 25 Oct 2023 23:14:37 +0800 Subject: [PATCH 10/46] Implement tokio_read Signed-off-by: Xuanwo --- core/src/raw/oio/read/mod.rs | 3 + core/src/raw/oio/read/tokio_read.rs | 88 +++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 core/src/raw/oio/read/tokio_read.rs diff --git a/core/src/raw/oio/read/mod.rs b/core/src/raw/oio/read/mod.rs index 9ccbebdc335c..e5f66a538cee 100644 --- a/core/src/raw/oio/read/mod.rs +++ b/core/src/raw/oio/read/mod.rs @@ -39,3 +39,6 @@ pub use into_read_from_stream::FromStreamReader; mod futures_read; pub use futures_read::FuturesReader; + +mod tokio_read; +pub use tokio_read::TokioReader; diff --git a/core/src/raw/oio/read/tokio_read.rs b/core/src/raw/oio/read/tokio_read.rs new file mode 100644 index 000000000000..966973bdc3c8 --- /dev/null +++ b/core/src/raw/oio/read/tokio_read.rs @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::raw::*; +use crate::*; +use bytes::Bytes; +use std::io::SeekFrom; +use std::pin::Pin; +use std::task::{ready, Context, Poll}; +use tokio::io::AsyncSeek; +use tokio::io::{AsyncRead, ReadBuf}; + +/// FuturesReader implements [`oio::Read`] via [`AsyncRead`] + [`AsyncSeek`]. +pub struct TokioReader { + inner: R, + + seek_pos: Option, +} + +impl TokioReader { + /// Create a new tokio reader. + pub fn new(inner: R) -> Self { + Self { + inner, + seek_pos: None, + } + } +} + +impl oio::Read for TokioReader +where + R: AsyncRead + AsyncSeek + Unpin + Send + Sync, +{ + fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { + let mut buf = ReadBuf::new(buf); + + ready!(Pin::new(&mut self.inner).poll_read(cx, &mut buf)).map_err(|err| { + new_std_io_error(err) + .with_operation(oio::ReadOperation::Read) + .with_context("source", "TokioReader") + })?; + + Poll::Ready(Ok(buf.filled().len())) + } + + fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { + if self.seek_pos != Some(pos) { + Pin::new(&mut self.inner).start_seek(pos).map_err(|err| { + new_std_io_error(err) + .with_operation(oio::ReadOperation::Seek) + .with_context("source", "TokioReader") + })?; + self.seek_pos = Some(pos) + } + + // NOTE: don't return error by `?` here, we need to reset seek_pos. + let pos = ready!(Pin::new(&mut self.inner).poll_complete(cx).map_err(|err| { + new_std_io_error(err) + .with_operation(oio::ReadOperation::Seek) + .with_context("source", "TokioReader") + })); + self.seek_pos = None; + Poll::Ready(pos) + } + + fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { + let _ = cx; + + Poll::Ready(Some(Err(Error::new( + ErrorKind::Unsupported, + "TokioReader doesn't support poll_next", + )))) + } +} From 0ce3b2eb92093eb4c537e913b2b47e1d99da6293 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Wed, 25 Oct 2023 23:21:32 +0800 Subject: [PATCH 11/46] Imple,emt std_read Signed-off-by: Xuanwo --- core/src/raw/oio/read/api.rs | 2 +- core/src/raw/oio/read/mod.rs | 3 ++ core/src/raw/oio/read/std_read.rs | 62 +++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 core/src/raw/oio/read/std_read.rs diff --git a/core/src/raw/oio/read/api.rs b/core/src/raw/oio/read/api.rs index da32259301e8..79fd77c33778 100644 --- a/core/src/raw/oio/read/api.rs +++ b/core/src/raw/oio/read/api.rs @@ -269,7 +269,7 @@ pub type BlockingReader = Box; /// /// `Read` is required to be implemented, `Seek` and `Iterator` /// is optional. We use `Read` to make users life easier. -pub trait BlockingRead: Send + Sync + 'static { +pub trait BlockingRead: Send + Sync { /// Read synchronously. fn read(&mut self, buf: &mut [u8]) -> Result; diff --git a/core/src/raw/oio/read/mod.rs b/core/src/raw/oio/read/mod.rs index e5f66a538cee..16510636a122 100644 --- a/core/src/raw/oio/read/mod.rs +++ b/core/src/raw/oio/read/mod.rs @@ -42,3 +42,6 @@ pub use futures_read::FuturesReader; mod tokio_read; pub use tokio_read::TokioReader; + +mod std_read; +pub use std_read::StdReader; diff --git a/core/src/raw/oio/read/std_read.rs b/core/src/raw/oio/read/std_read.rs new file mode 100644 index 000000000000..926b9abb358c --- /dev/null +++ b/core/src/raw/oio/read/std_read.rs @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::raw::*; +use crate::*; +use bytes::Bytes; +use std::io::Seek; +use std::io::{Read, SeekFrom}; + +/// FuturesReader implements [`oio::BlockingRead`] via [`Read`] + [`Seek`]. +pub struct StdReader { + inner: R, +} + +impl StdReader { + /// Create a new std reader. + pub fn new(inner: R) -> Self { + Self { inner } + } +} + +impl oio::BlockingRead for StdReader +where + R: Read + Seek + Send + Sync, +{ + fn read(&mut self, buf: &mut [u8]) -> Result { + self.inner.read(buf).map_err(|err| { + new_std_io_error(err) + .with_operation(oio::ReadOperation::BlockingRead) + .with_context("source", "StdReader") + }) + } + + fn seek(&mut self, pos: SeekFrom) -> Result { + self.inner.seek(pos).map_err(|err| { + new_std_io_error(err) + .with_operation(oio::ReadOperation::BlockingSeek) + .with_context("source", "StdReader") + }) + } + + fn next(&mut self) -> Option> { + Some(Err(Error::new( + ErrorKind::Unsupported, + "StdReader doesn't support poll_next", + ))) + } +} From 0c28f67d3f4970fe29dfe7059cda71ed3f0c71e1 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Thu, 26 Oct 2023 17:01:03 +0800 Subject: [PATCH 12/46] Add file reader support Signed-off-by: Xuanwo --- core/src/raw/oio/buf/adaptive.rs | 140 ++++++++ core/src/raw/oio/buf/mod.rs | 3 + core/src/raw/oio/read/file_read.rs | 546 +++++++++++++++++++++++------ core/src/services/fs/backend.rs | 67 +--- core/src/services/hdfs/backend.rs | 65 +--- core/src/services/sftp/backend.rs | 41 +-- 6 files changed, 599 insertions(+), 263 deletions(-) create mode 100644 core/src/raw/oio/buf/adaptive.rs diff --git a/core/src/raw/oio/buf/adaptive.rs b/core/src/raw/oio/buf/adaptive.rs new file mode 100644 index 000000000000..eb5b72b045c3 --- /dev/null +++ b/core/src/raw/oio/buf/adaptive.rs @@ -0,0 +1,140 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use bytes::{Bytes, BytesMut}; +use std::cmp; +use tokio::io::ReadBuf; + +/// The default minimum adaptive buffer size is 8 KiB. +const DEFAULT_MIN_BUFFER_SIZE: usize = 8192; + +/// The default maximum adaptive buffer size is 4 MiB. +/// +/// We will not grow the buffer beyond this size. +const DEFAULT_MAX_BUFFER_SIZE: usize = 4 * 1024 * 1024; + +/// AdaptiveBuf is inspired by hyper [ReadStrategy](https://github.com/hyperium/hyper/blob/master/src/proto/h1/io.rs#L26). +/// +/// We build this adaptive buf to make our internal buf grow and shrink automatically based on IO +/// throughput. +pub struct AdaptiveBuf { + /// The underlying buffer. + buffer: BytesMut, + + next: usize, + decrease_now: bool, +} + +impl Default for AdaptiveBuf { + fn default() -> Self { + Self { + buffer: BytesMut::default(), + next: DEFAULT_MIN_BUFFER_SIZE, + decrease_now: false, + } + } +} + +impl AdaptiveBuf { + /// reserve will reserve the buffer to the next size. + pub fn reserve(&mut self) { + if self.buffer.capacity() < self.next { + self.buffer.reserve(self.next); + } + } + + /// Returning the initialized part of the buffer. + pub fn initialized_mut<'a>(&'a mut self) -> ReadBuf<'a> { + let dst = self.buffer.spare_capacity_mut(); + let length = dst.len(); + let mut buf = ReadBuf::uninit(dst); + + // Safety: we make sure that we only return the initialized part of the buffer. + unsafe { + buf.assume_init(length); + } + buf + } + + /// Records the number of bytes read from the underlying IO. + pub fn record(&mut self, read: usize) { + if read >= self.next { + // Growing if we uses the whole buffer. + self.next = cmp::min(self.next.saturating_mul(2), DEFAULT_MAX_BUFFER_SIZE); + self.decrease_now = false; + } else { + // Shrinking if we uses less than half of the buffer. + let decr_to = self.next.saturating_div(2); + if read < decr_to { + if self.decrease_now { + self.next = cmp::max(decr_to, DEFAULT_MIN_BUFFER_SIZE); + self.decrease_now = false; + } else { + // Mark decrease_now as true to shrink the buffer next time. + self.decrease_now = true; + } + } else { + // Mark decrease_now as false to keep current buffer size. + self.decrease_now = false; + } + } + } + + /// Splits the buffer into two at the given index. + /// + /// # Safety + /// + /// It's required that buffer has been filled with given bytes. + pub fn split(&mut self, n: usize) -> Bytes { + unsafe { self.buffer.set_len(n) } + self.buffer.split().freeze() + } +} + +#[cfg(tests)] +mod tests { + use super::*; + + #[test] + fn read_strategy_adaptive_decrements() { + let mut huf = AdaptiveBuf::default(); + huf.record(8192); + assert_eq!(huf.next, 16384); + + huf.record(1); + assert_eq!( + huf.next, 16384, + "first smaller record doesn't decrement yet" + ); + huf.record(8192); + assert_eq!(huf.next, 16384, "record was with range"); + + huf.record(1); + assert_eq!( + huf.next, 16384, + "in-range record should make this the 'first' again" + ); + + huf.record(1); + assert_eq!(huf.next, 8192, "second smaller record decrements"); + + huf.record(1); + assert_eq!(huf.next, 8192, "first doesn't decrement"); + huf.record(1); + assert_eq!(huf.next, 8192, "doesn't decrement under minimum"); + } +} diff --git a/core/src/raw/oio/buf/mod.rs b/core/src/raw/oio/buf/mod.rs index dfd3663e56f8..abc8bf3286de 100644 --- a/core/src/raw/oio/buf/mod.rs +++ b/core/src/raw/oio/buf/mod.rs @@ -20,3 +20,6 @@ pub use chunked_bytes::ChunkedBytes; mod write_buf; pub use write_buf::WriteBuf; + +mod adaptive; +pub use adaptive::AdaptiveBuf; diff --git a/core/src/raw/oio/read/file_read.rs b/core/src/raw/oio/read/file_read.rs index 852e7c47d8f1..b06e65fef8e9 100644 --- a/core/src/raw/oio/read/file_read.rs +++ b/core/src/raw/oio/read/file_read.rs @@ -16,171 +16,495 @@ // under the License. use std::cmp; -use std::io::Read; -use std::io::Seek; + use std::io::SeekFrom; use std::pin::Pin; +use std::sync::Arc; use std::task::ready; use std::task::Context; use std::task::Poll; use bytes::Bytes; -use futures::AsyncRead; -use futures::AsyncSeek; +use futures::future::BoxFuture; +use futures::Future; use crate::raw::*; use crate::*; -/// FileReader implements [`oio::Read`] via `AsyncRead + AsyncSeek`. -pub struct FileReader { - inner: R, +/// FileReader that implement range read and streamable read on seekable reader. +/// +/// `oio::Reader` requires the underlying reader to handle range correctly and have streamable support. +/// But some services like `fs`, `hdfs` only have seek support. FileReader implements range and stream +/// support based on `seek`. We will maintain the correct range for give file and implement streamable +/// operations based on [`oio::AdaptiveBuf`]. +pub struct FileReader { + acc: Arc, + path: Arc, + op: OpRead, + + offset: Option, + size: Option, + cur: u64, - start: u64, - end: Option, + buf: oio::AdaptiveBuf, + state: State, +} - offset: u64, +enum State { + Idle, + Send(BoxFuture<'static, Result<(RpRead, R)>>), + Read(R), } -impl FileReader { +/// Safety: State will only be accessed under &mut. +unsafe impl Sync for State {} + +impl FileReader +where + A: Accessor, +{ /// Create a new FileReader. /// /// # Notes /// /// It's required that input reader's cursor is at the input `start` of the file. - pub fn new(fd: R, start: u64, end: Option) -> FileReader { + pub fn new(acc: Arc, path: &str, op: OpRead) -> FileReader { FileReader { - inner: fd, - start, - end, + acc, + path: Arc::new(path.to_string()), + op, - offset: start, + offset: None, + size: None, + cur: 0, + buf: oio::AdaptiveBuf::default(), + state: State::::Idle, } } +} - fn calculate_position(&self, pos: SeekFrom) -> Result { - match pos { - SeekFrom::Start(n) => { - if n < self.start { - return Err(Error::new( - ErrorKind::InvalidInput, - "seek to a negative position is invalid", - ) - .with_context("position", format!("{pos:?}"))); - } +impl FileReader +where + A: Accessor, + R: oio::Read, +{ + fn read_future(&self) -> BoxFuture<'static, Result<(RpRead, R)>> { + let acc = self.acc.clone(); + let path = self.path.clone(); + + // FileReader doesn't support range, we will always use full range to open a file. + let op = self.op.clone().with_range(BytesRange::from(..)); - Ok(SeekFrom::Start(self.start + n)) + Box::pin(async move { acc.read(&path, op).await }) + } +} + +impl oio::Read for FileReader +where + A: Accessor, + R: oio::Read, +{ + fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { + match &mut self.state { + State::Idle => { + self.state = State::Send(self.read_future()); + self.poll_read(cx, buf) + } + State::Send(fut) => { + let (_, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { + // If send future returns an error, we should reset + // state to Idle so that we can retry it. + self.state = State::Idle; + err + })?; + self.state = State::Read(r); + self.poll_read(cx, buf) } - SeekFrom::End(n) => { - let end = if let Some(end) = self.end { - end as i64 + n + State::Read(r) => { + // We should know where to start read the data. + if self.offset.is_none() { + let (offset, size) = match (self.op.range().offset(), self.op.range().size()) { + (None, None) => (0, None), + (None, Some(size)) => { + let start = ready!(r.poll_seek(cx, SeekFrom::End(size as i64)))?; + (start, Some(size)) + } + (Some(offset), None) => { + let start = ready!(r.poll_seek(cx, SeekFrom::Start(offset)))?; + (start, None) + } + (Some(offset), Some(size)) => { + let start = ready!(r.poll_seek(cx, SeekFrom::Start(offset)))?; + (start, Some(size)) + } + }; + self.offset = Some(offset); + self.size = size; + } + let size = if let Some(size) = self.size { + // Sanity check. + if self.cur >= size { + return Poll::Ready(Ok(0)); + } + cmp::min(buf.len(), (size - self.cur) as usize) } else { - n + buf.len() }; - if self.start as i64 + end < 0 { - return Err(Error::new( - ErrorKind::InvalidInput, - "seek to a negative position is invalid", - ) - .with_context("position", format!("{pos:?}"))); + match ready!(r.poll_read(cx, &mut buf[..size])) { + Ok(0) => Poll::Ready(Ok(0)), + Ok(n) => { + self.cur += n as u64; + Poll::Ready(Ok(n)) + } + // We don't need to reset state here since it's ok to poll the same reader. + Err(err) => Poll::Ready(Err(err)), } + } + } + } - Ok(SeekFrom::End(end)) + fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { + match &mut self.state { + State::Idle => { + self.state = State::Send(self.read_future()); + self.poll_seek(cx, pos) } - SeekFrom::Current(n) => { - if self.offset as i64 + n < self.start as i64 { - return Err(Error::new( - ErrorKind::InvalidInput, - "seek to a negative position is invalid", - ) - .with_context("position", format!("{pos:?}"))); + State::Send(fut) => { + let (_, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { + // If send future returns an error, we should reset + // state to Idle so that we can retry it. + self.state = State::Idle; + err + })?; + self.state = State::Read(r); + self.poll_seek(cx, pos) + } + State::Read(r) => { + // We should know where to start read the data. + if self.offset.is_none() { + let (offset, size) = match (self.op.range().offset(), self.op.range().size()) { + (None, None) => (0, None), + (None, Some(size)) => { + let start = ready!(r.poll_seek(cx, SeekFrom::End(-(size as i64))))?; + (start, Some(size)) + } + (Some(offset), None) => { + let start = ready!(r.poll_seek(cx, SeekFrom::Start(offset)))?; + (start, None) + } + (Some(offset), Some(size)) => { + let start = ready!(r.poll_seek(cx, SeekFrom::Start(offset)))?; + (start, Some(size)) + } + }; + self.offset = Some(offset); + self.size = size; } - - Ok(SeekFrom::Current(n)) + let pos = calculate_position(self.offset, self.size, self.cur, pos)?; + let cur = ready!(r.poll_seek(cx, pos))?; + self.cur = cur - self.offset.unwrap(); + Poll::Ready(Ok(cur)) } } } -} -impl oio::Read for FileReader -where - R: AsyncRead + AsyncSeek + Unpin + Send + Sync, -{ - fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { - let size = if let Some(end) = self.end { - if self.offset >= end { - return Poll::Ready(Ok(0)); - } - cmp::min(buf.len(), (end - self.offset) as usize) - } else { - buf.len() - }; - - let n = - ready!(Pin::new(&mut self.inner).poll_read(cx, &mut buf[..size])).map_err(|err| { - Error::new(ErrorKind::Unexpected, "read data from FileReader").set_source(err) - })?; - self.offset += n as u64; - Poll::Ready(Ok(n)) - } - - fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { - let pos = self.calculate_position(pos)?; + fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { + match &mut self.state { + State::Idle => { + self.state = State::Send(self.read_future()); + self.poll_next(cx) + } + State::Send(fut) => { + let (_, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { + // If send future returns an error, we should reset + // state to Idle so that we can retry it. + self.state = State::Idle; + err + })?; + self.state = State::Read(r); + self.poll_next(cx) + } + State::Read(r) => { + // We should know where to start read the data. + if self.offset.is_none() { + let (offset, size) = match (self.op.range().offset(), self.op.range().size()) { + (None, None) => (0, None), + (None, Some(size)) => { + let start = ready!(r.poll_seek(cx, SeekFrom::End(size as i64)))?; + (start, Some(size)) + } + (Some(offset), None) => { + let start = ready!(r.poll_seek(cx, SeekFrom::Start(offset)))?; + (start, None) + } + (Some(offset), Some(size)) => { + let start = ready!(r.poll_seek(cx, SeekFrom::Start(offset)))?; + (start, Some(size)) + } + }; + self.offset = Some(offset); + self.size = size; + } - let cur = ready!(Pin::new(&mut self.inner).poll_seek(cx, pos)).map_err(|err| { - Error::new(ErrorKind::Unexpected, "seek data from FileReader").set_source(err) - })?; + self.buf.reserve(); - self.offset = cur; - Poll::Ready(Ok(self.offset - self.start)) - } + let mut buf = self.buf.initialized_mut(); + let buf = buf.initialized_mut(); - fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { - let _ = cx; + let size = if let Some(size) = self.size { + // Sanity check. + if self.cur >= size { + return Poll::Ready(None); + } + cmp::min(buf.len(), (size - self.cur) as usize) + } else { + buf.len() + }; - Poll::Ready(Some(Err(Error::new( - ErrorKind::Unsupported, - "output reader doesn't support next", - )))) + match ready!(r.poll_read(cx, &mut buf[..size])) { + Ok(0) => Poll::Ready(None), + Ok(n) => { + self.cur += n as u64; + self.buf.record(n); + Poll::Ready(Some(Ok(self.buf.split(n)))) + } + // We don't need to reset state here since it's ok to poll the same reader. + Err(err) => Poll::Ready(Some(Err(err))), + } + } + } } } -impl oio::BlockingRead for FileReader +impl oio::BlockingRead for FileReader where - R: Read + Seek + Send + Sync + 'static, + A: Accessor, + R: oio::BlockingRead, { fn read(&mut self, buf: &mut [u8]) -> Result { - let size = if let Some(end) = self.end { - if self.offset >= end { - return Ok(0); - } - cmp::min(buf.len(), (end - self.offset) as usize) - } else { - buf.len() - }; - - let n = self.inner.read(&mut buf[..size]).map_err(|err| { - Error::new(ErrorKind::Unexpected, "read data from FileReader").set_source(err) - })?; - self.offset += n as u64; - Ok(n) + match &mut self.state { + State::Idle => { + // FileReader doesn't support range, we will always use full range to open a file. + let op = self.op.clone().with_range(BytesRange::from(..)); + + let (_, r) = self.acc.blocking_read(&self.path, op)?; + self.state = State::Read(r); + self.read(buf) + } + + State::Read(r) => { + // We should know where to start read the data. + if self.offset.is_none() { + let (offset, size) = match (self.op.range().offset(), self.op.range().size()) { + (None, None) => (0, None), + (None, Some(size)) => { + let start = r.seek(SeekFrom::End(size as i64))?; + (start, Some(size)) + } + (Some(offset), None) => { + let start = r.seek(SeekFrom::Start(offset))?; + (start, None) + } + (Some(offset), Some(size)) => { + let start = r.seek(SeekFrom::Start(offset))?; + (start, Some(size)) + } + }; + self.offset = Some(offset); + self.size = size; + } + let size = if let Some(size) = self.size { + // Sanity check. + if self.cur >= size { + return Ok(0); + } + cmp::min(buf.len(), (size - self.cur) as usize) + } else { + buf.len() + }; + + match r.read(&mut buf[..size]) { + Ok(0) => Ok(0), + Ok(n) => { + self.cur += n as u64; + Ok(n) + } + // We don't need to reset state here since it's ok to poll the same reader. + Err(err) => Err(err), + } + } + State::Send(_) => { + unreachable!( + "It's invalid to go into State::Send for BlockingRead, please report this bug" + ) + } + } } fn seek(&mut self, pos: SeekFrom) -> Result { - let pos = self.calculate_position(pos)?; - - let cur = self.inner.seek(pos).map_err(|err| { - Error::new(ErrorKind::Unexpected, "seek data from FileReader").set_source(err) - })?; + match &mut self.state { + State::Idle => { + // FileReader doesn't support range, we will always use full range to open a file. + let op = self.op.clone().with_range(BytesRange::from(..)); - self.offset = cur; - Ok(self.offset - self.start) + let (_, r) = self.acc.blocking_read(&self.path, op)?; + self.state = State::Read(r); + self.seek(pos) + } + State::Read(r) => { + // We should know where to start read the data. + if self.offset.is_none() { + let (offset, size) = match (self.op.range().offset(), self.op.range().size()) { + (None, None) => (0, None), + (None, Some(size)) => { + let start = r.seek(SeekFrom::End(-(size as i64)))?; + (start, Some(size)) + } + (Some(offset), None) => { + let start = r.seek(SeekFrom::Start(offset))?; + (start, None) + } + (Some(offset), Some(size)) => { + let start = r.seek(SeekFrom::Start(offset))?; + (start, Some(size)) + } + }; + self.offset = Some(offset); + self.size = size; + } + let pos = calculate_position(self.offset, self.size, self.cur, pos)?; + let cur = r.seek(pos)?; + self.cur = cur - self.offset.unwrap(); + Ok(self.cur) + } + State::Send(_) => { + unreachable!( + "It's invalid to go into State::Send for BlockingRead, please report this bug" + ) + } + } } fn next(&mut self) -> Option> { - Some(Err(Error::new( - ErrorKind::Unsupported, - "output reader doesn't support iterating", - ))) + match &mut self.state { + State::Idle => { + // FileReader doesn't support range, we will always use full range to open a file. + let op = self.op.clone().with_range(BytesRange::from(..)); + + let r = match self.acc.blocking_read(&self.path, op) { + Ok((_, r)) => r, + Err(err) => return Some(Err(err)), + }; + self.state = State::Read(r); + self.next() + } + + State::Read(r) => { + // We should know where to start read the data. + if self.offset.is_none() { + let (offset, size) = match (self.op.range().offset(), self.op.range().size()) { + (None, None) => (0, None), + (None, Some(size)) => { + let start = match r.seek(SeekFrom::End(size as i64)) { + Ok(v) => v, + Err(err) => return Some(Err(err)), + }; + (start, Some(size)) + } + (Some(offset), None) => { + let start = match r.seek(SeekFrom::Start(offset)) { + Ok(v) => v, + Err(err) => return Some(Err(err)), + }; + (start, None) + } + (Some(offset), Some(size)) => { + let start = match r.seek(SeekFrom::Start(offset)) { + Ok(v) => v, + Err(err) => return Some(Err(err)), + }; + (start, Some(size)) + } + }; + self.offset = Some(offset); + self.size = size; + } + + self.buf.reserve(); + + let mut buf = self.buf.initialized_mut(); + let buf = buf.initialized_mut(); + + let size = if let Some(size) = self.size { + // Sanity check. + if self.cur >= size { + return None; + } + cmp::min(buf.len(), (size - self.cur) as usize) + } else { + buf.len() + }; + + match r.read(&mut buf[..size]) { + Ok(0) => None, + Ok(n) => { + self.cur += n as u64; + self.buf.record(n); + Some(Ok(self.buf.split(n))) + } + // We don't need to reset state here since it's ok to poll the same reader. + Err(err) => Some(Err(err)), + } + } + State::Send(_) => { + unreachable!( + "It's invalid to go into State::Send for BlockingRead, please report this bug" + ) + } + } + } +} + +/// Calculate the actual position that we should seek to. +fn calculate_position( + offset: Option, + size: Option, + cur: u64, + pos: SeekFrom, +) -> Result { + let offset = offset.expect("offset should be set for calculate_position"); + + match pos { + SeekFrom::Start(n) => { + // It's valid for user to seek outsides end of the file. + Ok(SeekFrom::Start(offset + n)) + } + SeekFrom::End(n) => { + if let Some(size) = size { + if size as i64 + n < 0 { + return Err(Error::new( + ErrorKind::InvalidInput, + "seek to a negative position is invalid", + ) + .with_context("position", format!("{pos:?}"))); + } + // size is known, we can convert SeekFrom::End into SeekFrom::Start. + Ok(SeekFrom::Start(offset + (size as i64 + n) as u64)) + } else { + // size unknown means we can forward seek end to underlying reader directly. + Ok(SeekFrom::End(n)) + } + } + SeekFrom::Current(n) => { + if cur as i64 + n < 0 { + return Err(Error::new( + ErrorKind::InvalidInput, + "seek to a negative position is invalid", + ) + .with_context("position", format!("{pos:?}"))); + } + Ok(SeekFrom::Start(offset + (cur as i64 + n) as u64)) + } } } diff --git a/core/src/services/fs/backend.rs b/core/src/services/fs/backend.rs index 524c4ef5e578..034d75e033e3 100644 --- a/core/src/services/fs/backend.rs +++ b/core/src/services/fs/backend.rs @@ -16,11 +16,9 @@ // under the License. use std::collections::HashMap; -use std::io::SeekFrom; use std::path::Path; use std::path::PathBuf; -use async_compat::Compat; use async_trait::async_trait; use chrono::DateTime; use log::debug; @@ -246,8 +244,8 @@ impl FsBackend { #[async_trait] impl Accessor for FsBackend { - type Reader = oio::FileReader>; - type BlockingReader = oio::FileReader; + type Reader = oio::TokioReader; + type BlockingReader = oio::StdReader; type Writer = FsWriter; type BlockingWriter = FsWriter; type Pager = Option>; @@ -262,7 +260,6 @@ impl Accessor for FsBackend { read: true, read_can_seek: true, - read_with_range: true, write: true, write_can_empty: true, @@ -303,12 +300,10 @@ impl Accessor for FsBackend { /// - open file first, and than use `seek`. (100ns) /// /// Benchmark could be found [here](https://gist.github.com/Xuanwo/48f9cfbc3022ea5f865388bb62e1a70f) - async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - use tokio::io::AsyncSeekExt; - + async fn read(&self, path: &str, _: OpRead) -> Result<(RpRead, Self::Reader)> { let p = self.root.join(path.trim_end_matches('/')); - let mut f = tokio::fs::OpenOptions::new() + let f = tokio::fs::OpenOptions::new() .read(true) .open(&p) .await @@ -331,33 +326,7 @@ impl Accessor for FsBackend { } } - let (start, end) = match (args.range().offset(), args.range().size()) { - (None, None) => (0, None), - (None, Some(size)) => { - let start = f - .seek(SeekFrom::End(size as i64)) - .await - .map_err(new_std_io_error)?; - (start, Some(start + size)) - } - (Some(offset), None) => { - let start = f - .seek(SeekFrom::Start(offset)) - .await - .map_err(new_std_io_error)?; - (start, None) - } - (Some(offset), Some(size)) => { - let start = f - .seek(SeekFrom::Start(offset)) - .await - .map_err(new_std_io_error)?; - (start, Some(size)) - } - }; - - let r = oio::FileReader::new(Compat::new(f), start, end); - + let r = oio::TokioReader::new(f); Ok((RpRead::new(0), r)) } @@ -504,12 +473,10 @@ impl Accessor for FsBackend { Ok(RpCreateDir::default()) } - fn blocking_read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::BlockingReader)> { - use std::io::Seek; - + fn blocking_read(&self, path: &str, _: OpRead) -> Result<(RpRead, Self::BlockingReader)> { let p = self.root.join(path.trim_end_matches('/')); - let mut f = std::fs::OpenOptions::new() + let f = std::fs::OpenOptions::new() .read(true) .open(p) .map_err(new_std_io_error)?; @@ -531,25 +498,7 @@ impl Accessor for FsBackend { } } - let (start, end) = match (args.range().offset(), args.range().size()) { - (None, None) => (0, None), - (None, Some(size)) => { - let start = f - .seek(SeekFrom::End(size as i64)) - .map_err(new_std_io_error)?; - (start, Some(start + size)) - } - (Some(offset), None) => { - let start = f.seek(SeekFrom::Start(offset)).map_err(new_std_io_error)?; - (start, None) - } - (Some(offset), Some(size)) => { - let start = f.seek(SeekFrom::Start(offset)).map_err(new_std_io_error)?; - (start, Some(size)) - } - }; - - let r = oio::FileReader::new(f, start, end); + let r = oio::StdReader::new(f); Ok((RpRead::new(0), r)) } diff --git a/core/src/services/hdfs/backend.rs b/core/src/services/hdfs/backend.rs index a7fb23f0cfc0..fed635b15e4d 100644 --- a/core/src/services/hdfs/backend.rs +++ b/core/src/services/hdfs/backend.rs @@ -18,7 +18,6 @@ use std::collections::HashMap; use std::fmt::Debug; use std::io; -use std::io::SeekFrom; use std::path::PathBuf; use std::sync::Arc; @@ -158,8 +157,8 @@ unsafe impl Sync for HdfsBackend {} #[async_trait] impl Accessor for HdfsBackend { - type Reader = oio::FileReader; - type BlockingReader = oio::FileReader; + type Reader = oio::FuturesReader; + type BlockingReader = oio::StdReader; type Writer = HdfsWriter; type BlockingWriter = HdfsWriter; type Pager = Option; @@ -174,7 +173,6 @@ impl Accessor for HdfsBackend { read: true, read_can_seek: true, - read_with_range: true, write: true, // TODO: wait for https://github.com/apache/incubator-opendal/pull/2715 @@ -202,12 +200,10 @@ impl Accessor for HdfsBackend { Ok(RpCreateDir::default()) } - async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - use futures::AsyncSeekExt; - + async fn read(&self, path: &str, _: OpRead) -> Result<(RpRead, Self::Reader)> { let p = build_rooted_abs_path(&self.root, path); - let mut f = self + let f = self .client .open_file() .read(true) @@ -215,32 +211,7 @@ impl Accessor for HdfsBackend { .await .map_err(new_std_io_error)?; - let (start, end) = match (args.range().offset(), args.range().size()) { - (None, None) => (0, None), - (None, Some(size)) => { - let start = f - .seek(SeekFrom::End(size as i64)) - .await - .map_err(new_std_io_error)?; - (start, Some(start + size)) - } - (Some(offset), None) => { - let start = f - .seek(SeekFrom::Start(offset)) - .await - .map_err(new_std_io_error)?; - (start, None) - } - (Some(offset), Some(size)) => { - let start = f - .seek(SeekFrom::Start(offset)) - .await - .map_err(new_std_io_error)?; - (start, Some(size)) - } - }; - - let r = oio::FileReader::new(f, start, end); + let r = oio::FuturesReader::new(f); Ok((RpRead::new(0), r)) } @@ -352,37 +323,17 @@ impl Accessor for HdfsBackend { Ok(RpCreateDir::default()) } - fn blocking_read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::BlockingReader)> { - use std::io::Seek; - + fn blocking_read(&self, path: &str, _: OpRead) -> Result<(RpRead, Self::BlockingReader)> { let p = build_rooted_abs_path(&self.root, path); - let mut f = self + let f = self .client .open_file() .read(true) .open(&p) .map_err(new_std_io_error)?; - let (start, end) = match (args.range().offset(), args.range().size()) { - (None, None) => (0, None), - (None, Some(size)) => { - let start = f - .seek(SeekFrom::End(size as i64)) - .map_err(new_std_io_error)?; - (start, Some(start + size)) - } - (Some(offset), None) => { - let start = f.seek(SeekFrom::Start(offset)).map_err(new_std_io_error)?; - (start, None) - } - (Some(offset), Some(size)) => { - let start = f.seek(SeekFrom::Start(offset)).map_err(new_std_io_error)?; - (start, Some(size)) - } - }; - - let r = oio::FileReader::new(f, start, end); + let r = oio::StdReader::new(f); Ok((RpRead::new(0), r)) } diff --git a/core/src/services/sftp/backend.rs b/core/src/services/sftp/backend.rs index 258c77066aef..5c67dab7664a 100644 --- a/core/src/services/sftp/backend.rs +++ b/core/src/services/sftp/backend.rs @@ -15,11 +15,9 @@ // specific language governing permissions and limitations // under the License. -use async_compat::Compat; use std::collections::HashMap; use std::fmt::Debug; use std::fmt::Formatter; -use std::io::SeekFrom; use std::path::Path; use std::path::PathBuf; use std::pin::Pin; @@ -226,7 +224,7 @@ impl Debug for SftpBackend { #[async_trait] impl Accessor for SftpBackend { - type Reader = oio::FileReader>>>; + type Reader = oio::TokioReader>>; type BlockingReader = (); type Writer = SftpWriter; type BlockingWriter = (); @@ -241,7 +239,6 @@ impl Accessor for SftpBackend { stat: true, read: true, - read_with_range: true, read_can_seek: true, write: true, @@ -286,50 +283,22 @@ impl Accessor for SftpBackend { return Ok(RpCreateDir::default()); } - async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - use tokio::io::AsyncSeekExt; - + async fn read(&self, path: &str, _: OpRead) -> Result<(RpRead, Self::Reader)> { let client = self.connect().await?; let mut fs = client.fs(); fs.set_cwd(&self.root); let path = fs.canonicalize(path).await?; - let mut f = client.open(path.as_path()).await?; - - let (start, end) = match (args.range().offset(), args.range().size()) { - (None, None) => (0, None), - (None, Some(size)) => { - let start = f - .seek(SeekFrom::End(size as i64)) - .await - .map_err(new_std_io_error)?; - (start, Some(start + size)) - } - (Some(offset), None) => { - let start = f - .seek(SeekFrom::Start(offset)) - .await - .map_err(new_std_io_error)?; - (start, None) - } - (Some(offset), Some(size)) => { - let start = f - .seek(SeekFrom::Start(offset)) - .await - .map_err(new_std_io_error)?; - (start, Some(size)) - } - }; + let f = client.open(path.as_path()).await?; // Sorry for the ugly code... // // - `f` is a openssh file. // - `TokioCompatFile::new(f)` makes it implements tokio AsyncRead + AsyncSeek for openssh File. - // - `Compat::new(f)` make it compatible to `futures::AsyncRead + futures::AsyncSeek`. // - `Box::pin(x)` to make sure this reader implements `Unpin`, since `TokioCompatFile` is not. - // - `oio::FileReader::new(x)` makes it a `oio::FileReader` which implements `oio::Read`. - let r = oio::FileReader::new(Box::pin(Compat::new(TokioCompatFile::new(f))), start, end); + // - `oio::TokioReader::new(x)` makes it a `oio::TokioReader` which implements `oio::Read`. + let r = oio::TokioReader::new(Box::pin(TokioCompatFile::new(f))); Ok((RpRead::new(0), r)) } From ca51f047e9e21167f07786cec049d6efcdfac427 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Thu, 26 Oct 2023 17:05:59 +0800 Subject: [PATCH 13/46] Cleanup Signed-off-by: Xuanwo --- core/src/raw/oio/buf/adaptive.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/raw/oio/buf/adaptive.rs b/core/src/raw/oio/buf/adaptive.rs index eb5b72b045c3..63d6cdb09860 100644 --- a/core/src/raw/oio/buf/adaptive.rs +++ b/core/src/raw/oio/buf/adaptive.rs @@ -58,7 +58,7 @@ impl AdaptiveBuf { } /// Returning the initialized part of the buffer. - pub fn initialized_mut<'a>(&'a mut self) -> ReadBuf<'a> { + pub fn initialized_mut(&mut self) -> ReadBuf { let dst = self.buffer.spare_capacity_mut(); let length = dst.len(); let mut buf = ReadBuf::uninit(dst); From 7c6d21958a578a8ec28a4146a74c8e8f48078321 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Thu, 26 Oct 2023 17:08:11 +0800 Subject: [PATCH 14/46] Fix attr Signed-off-by: Xuanwo --- core/src/raw/oio/buf/adaptive.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/raw/oio/buf/adaptive.rs b/core/src/raw/oio/buf/adaptive.rs index 63d6cdb09860..02dce54fec2e 100644 --- a/core/src/raw/oio/buf/adaptive.rs +++ b/core/src/raw/oio/buf/adaptive.rs @@ -105,7 +105,7 @@ impl AdaptiveBuf { } } -#[cfg(tests)] +#[cfg(test)] mod tests { use super::*; From c88362cf94838c2afc6488215dcd5fb0b252db86 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Thu, 26 Oct 2023 21:18:18 +0800 Subject: [PATCH 15/46] Save Signed-off-by: Xuanwo --- core/src/layers/complete.rs | 41 ++-- core/src/layers/madsim.rs | 5 +- core/src/layers/prometheus.rs | 38 ++-- core/src/layers/retry.rs | 2 +- core/src/raw/adapters/kv/backend.rs | 5 +- core/src/raw/adapters/typed_kv/backend.rs | 5 +- core/src/raw/oio/read/api.rs | 130 +++++++++++- core/src/raw/oio/read/lazy_read.rs | 198 ++++++++++++++++++ core/src/raw/oio/read/mod.rs | 3 + core/src/raw/oio/read/range_read.rs | 2 +- core/src/raw/rps.rs | 25 +-- core/src/services/azblob/backend.rs | 6 +- core/src/services/azdls/backend.rs | 5 +- core/src/services/azfile/backend.rs | 5 +- core/src/services/cos/backend.rs | 5 +- core/src/services/dropbox/backend.rs | 5 +- core/src/services/fs/backend.rs | 4 +- core/src/services/ftp/backend.rs | 14 +- core/src/services/gcs/backend.rs | 3 +- core/src/services/gdrive/backend.rs | 18 +- core/src/services/ghac/backend.rs | 5 +- core/src/services/hdfs/backend.rs | 4 +- core/src/services/http/backend.rs | 5 +- core/src/services/ipfs/backend.rs | 5 +- core/src/services/ipmfs/backend.rs | 5 +- core/src/services/obs/backend.rs | 5 +- core/src/services/onedrive/backend.rs | 5 +- core/src/services/oss/backend.rs | 5 +- core/src/services/s3/backend.rs | 5 +- core/src/services/sftp/backend.rs | 2 +- core/src/services/supabase/backend.rs | 5 +- core/src/services/vercel_artifacts/backend.rs | 5 +- core/src/services/wasabi/backend.rs | 5 +- core/src/services/webdav/backend.rs | 5 +- core/src/services/webhdfs/backend.rs | 5 +- core/src/types/operator/blocking_operator.rs | 23 +- core/src/types/operator/operator.rs | 33 +-- 37 files changed, 417 insertions(+), 229 deletions(-) create mode 100644 core/src/raw/oio/read/lazy_read.rs diff --git a/core/src/layers/complete.rs b/core/src/layers/complete.rs index 903b64e6b611..0a0a0bd3fdaf 100644 --- a/core/src/layers/complete.rs +++ b/core/src/layers/complete.rs @@ -27,13 +27,13 @@ use std::task::Poll; use async_trait::async_trait; use bytes::Bytes; -use crate::raw::oio::into_flat_page; -use crate::raw::oio::into_hierarchy_page; use crate::raw::oio::Entry; use crate::raw::oio::FlatPager; use crate::raw::oio::HierarchyPager; use crate::raw::oio::RangeReader; use crate::raw::oio::StreamableReader; +use crate::raw::oio::{into_flat_page, FileReader}; +use crate::raw::oio::{into_hierarchy_page, LazyReader}; use crate::raw::*; use crate::*; @@ -162,22 +162,24 @@ impl CompleteAccessor { let seekable = capability.read_can_seek; let streamable = capability.read_can_next; - let (rp, r) = self.inner.read(path, args.clone()).await?; - match (seekable, streamable) { - (true, true) => Ok((rp, CompleteReader::AlreadyComplete(r))), + (true, true) => { + let r = LazyReader::new(self.inner.clone(), path, args); + Ok((RpRead::new(), CompleteReader::AlreadyComplete(r))) + } (true, false) => { - let r = oio::into_streamable_read(r, 256 * 1024); - Ok((rp, CompleteReader::NeedStreamable(r))) + let r = FileReader::new(self.inner.clone(), path, args); + + Ok((RpRead::new(), CompleteReader::NeedStreamable(r))) } _ => { let r = RangeReader::new(self.inner.clone(), path, args); if streamable { - Ok((rp, CompleteReader::NeedSeekable(r))) + Ok((RpRead::new(), CompleteReader::NeedSeekable(r))) } else { let r = oio::into_streamable_read(r, 256 * 1024); - Ok((rp, CompleteReader::NeedBoth(r))) + Ok((RpRead::new(), CompleteReader::NeedBoth(r))) } } } @@ -196,22 +198,23 @@ impl CompleteAccessor { let seekable = capability.read_can_seek; let streamable = capability.read_can_next; - let (rp, r) = self.inner.blocking_read(path, args.clone())?; - match (seekable, streamable) { - (true, true) => Ok((rp, CompleteReader::AlreadyComplete(r))), + (true, true) => { + let r = LazyReader::new(self.inner.clone(), path, args); + Ok((RpRead::new(), CompleteReader::AlreadyComplete(r))) + } (true, false) => { - let r = oio::into_streamable_read(r, 256 * 1024); - Ok((rp, CompleteReader::NeedStreamable(r))) + let r = FileReader::new(self.inner.clone(), path, args); + Ok((RpRead::new(), CompleteReader::NeedStreamable(r))) } _ => { let r = RangeReader::new(self.inner.clone(), path, args); if streamable { - Ok((rp, CompleteReader::NeedSeekable(r))) + Ok((RpRead::new(), CompleteReader::NeedSeekable(r))) } else { let r = oio::into_streamable_read(r, 256 * 1024); - Ok((rp, CompleteReader::NeedBoth(r))) + Ok((RpRead::new(), CompleteReader::NeedBoth(r))) } } } @@ -546,9 +549,9 @@ impl LayeredAccessor for CompleteAccessor { } pub enum CompleteReader { - AlreadyComplete(R), + AlreadyComplete(LazyReader), NeedSeekable(RangeReader), - NeedStreamable(StreamableReader), + NeedStreamable(FileReader), NeedBoth(StreamableReader>), } @@ -788,7 +791,7 @@ mod tests { } async fn read(&self, _: &str, _: OpRead) -> Result<(RpRead, Self::Reader)> { - Ok((RpRead::new(0), Box::new(()))) + Ok((RpRead::new(), Box::new(()))) } async fn write(&self, _: &str, _: OpWrite) -> Result<(RpWrite, Self::Writer)> { diff --git a/core/src/layers/madsim.rs b/core/src/layers/madsim.rs index 6e2a3b6a36c1..1d1253bafd3d 100644 --- a/core/src/layers/madsim.rs +++ b/core/src/layers/madsim.rs @@ -191,10 +191,7 @@ impl LayeredAccessor for MadsimAccessor { .downcast::() .expect("fail to downcast response to ReadResponse"); let content_length = resp.data.as_ref().map(|b| b.len()).unwrap_or(0); - Ok(( - RpRead::new(content_length as u64), - MadsimReader { data: resp.data }, - )) + Ok((RpRead::new(), MadsimReader { data: resp.data })) } #[cfg(not(madsim))] { diff --git a/core/src/layers/prometheus.rs b/core/src/layers/prometheus.rs index 4692da0a68c7..f26a4af2bf3b 100644 --- a/core/src/layers/prometheus.rs +++ b/core/src/layers/prometheus.rs @@ -320,28 +320,18 @@ impl LayeredAccessor for PrometheusAccessor { .with_label_values(&labels) .start_timer(); - let read_res = self - .inner - .read(path, args) - .map(|v| { - v.map(|(rp, r)| { - self.stats - .bytes_total - .with_label_values(&labels) - .observe(rp.metadata().content_length() as f64); - ( - rp, - PrometheusMetricWrapper::new( - r, - Operation::Read, - self.stats.clone(), - self.scheme, - &path.to_string(), - ), - ) - }) - }) - .await; + let read_res = self.inner.read(path, args).await.map(|(rp, r)| { + ( + rp, + PrometheusMetricWrapper::new( + r, + Operation::Read, + self.stats.clone(), + self.scheme, + &path.to_string(), + ), + ) + }); timer.observe_duration(); read_res.map_err(|e| { self.stats.increment_errors_total(Operation::Read, e.kind()); @@ -546,10 +536,6 @@ impl LayeredAccessor for PrometheusAccessor { .with_label_values(&labels) .start_timer(); let result = self.inner.blocking_read(path, args).map(|(rp, r)| { - self.stats - .bytes_total - .with_label_values(&labels) - .observe(rp.metadata().content_length() as f64); ( rp, PrometheusMetricWrapper::new( diff --git a/core/src/layers/retry.rs b/core/src/layers/retry.rs index 67239d3fd5ee..9ae6c877d7de 100644 --- a/core/src/layers/retry.rs +++ b/core/src/layers/retry.rs @@ -1164,7 +1164,7 @@ mod tests { async fn read(&self, _: &str, _: OpRead) -> Result<(RpRead, Self::Reader)> { Ok(( - RpRead::new(13), + RpRead::new(), MockReader { attempt: self.attempt.clone(), pos: 0, diff --git a/core/src/raw/adapters/kv/backend.rs b/core/src/raw/adapters/kv/backend.rs index 94db4de8dc5e..799dd1be5415 100644 --- a/core/src/raw/adapters/kv/backend.rs +++ b/core/src/raw/adapters/kv/backend.rs @@ -129,8 +129,7 @@ impl Accessor for Backend { let bs = self.apply_range(bs, args.range()); - let length = bs.len(); - Ok((RpRead::new(length as u64), oio::Cursor::from(bs))) + Ok((RpRead::new(), oio::Cursor::from(bs))) } fn blocking_read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::BlockingReader)> { @@ -142,7 +141,7 @@ impl Accessor for Backend { }; let bs = self.apply_range(bs, args.range()); - Ok((RpRead::new(bs.len() as u64), oio::Cursor::from(bs))) + Ok((RpRead::new(), oio::Cursor::from(bs))) } async fn write(&self, path: &str, _: OpWrite) -> Result<(RpWrite, Self::Writer)> { diff --git a/core/src/raw/adapters/typed_kv/backend.rs b/core/src/raw/adapters/typed_kv/backend.rs index d5313b8e0097..ca872346a7a8 100644 --- a/core/src/raw/adapters/typed_kv/backend.rs +++ b/core/src/raw/adapters/typed_kv/backend.rs @@ -135,8 +135,7 @@ impl Accessor for Backend { let bs = self.apply_range(bs, args.range()); - let length = bs.len(); - Ok((RpRead::new(length as u64), oio::Cursor::from(bs))) + Ok((RpRead::new(), oio::Cursor::from(bs))) } fn blocking_read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::BlockingReader)> { @@ -149,7 +148,7 @@ impl Accessor for Backend { }; let bs = self.apply_range(bs, args.range()); - Ok((RpRead::new(bs.len() as u64), oio::Cursor::from(bs))) + Ok((RpRead::new(), oio::Cursor::from(bs))) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { diff --git a/core/src/raw/oio/read/api.rs b/core/src/raw/oio/read/api.rs index 79fd77c33778..436c8447fd53 100644 --- a/core/src/raw/oio/read/api.rs +++ b/core/src/raw/oio/read/api.rs @@ -17,10 +17,10 @@ use std::fmt::Display; use std::fmt::Formatter; -use std::io; use std::pin::Pin; -use std::task::Context; use std::task::Poll; +use std::task::{ready, Context}; +use std::{cmp, io}; use bytes::Bytes; use futures::Future; @@ -198,6 +198,16 @@ pub trait ReadExt: Read { fn next(&mut self) -> NextFuture<'_, Self> { NextFuture { reader: self } } + + /// Build a future for `read_to_end`. + fn read_to_end<'a>(&'a mut self, buf: &'a mut Vec) -> ReadToEndFuture<'a, Self> { + let start = buf.len(); + ReadToEndFuture { + reader: self, + buf, + start, + } + } } /// Make this future `!Unpin` for compatibility with async trait methods. @@ -256,6 +266,82 @@ where } } +/// Make this future `!Unpin` for compatibility with async trait methods. +#[pin_project(!Unpin)] +pub struct ReadToEndFuture<'a, R: Read + Unpin + ?Sized> { + reader: &'a mut R, + buf: &'a mut Vec, + start: usize, +} + +impl Future for ReadToEndFuture<'_, R> +where + R: Read + Unpin + ?Sized, +{ + type Output = Result; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let this = self.project(); + + let mut g = ReadToEndGuard { + len: this.buf.len(), + buf: this.buf, + next: MIN_READ_TO_END_GROW_SIZE, + }; + + loop { + if g.buf.capacity() - g.buf.len() < g.next { + g.buf.reserve(g.next); + unsafe { + g.buf.set_len(g.buf.capacity()); + } + } + + let buf = &mut g.buf[g.len..]; + match ready!(this.reader.poll_read(cx, buf)) { + Ok(0) => return Poll::Ready(Ok(g.len - *this.start)), + Ok(n) => { + g.next = if n >= g.next { + cmp::min(g.next.saturating_mul(2), MAX_READ_TO_END_GROW_SIZE) + } else if n >= g.next / 2 { + g.next + } else { + cmp::max(g.next.saturating_div(2), MIN_READ_TO_END_GROW_SIZE) + }; + // We can't allow bogus values from read. If it is too large, the returned vec could have its length + // set past its capacity, or if it overflows the vec could be shortened which could create an invalid + // string if this is called via read_to_string. + assert!(n <= buf.len()); + g.len += n; + } + Err(e) => return Poll::Ready(Err(e)), + } + } + } +} + +const MIN_READ_TO_END_GROW_SIZE: usize = 8 * 1024; +const MAX_READ_TO_END_GROW_SIZE: usize = 4 * 1024 * 1024; + +/// ReadToEndGuard makes sure that the buf length is maintained correctly. +struct ReadToEndGuard<'a> { + buf: &'a mut Vec, + /// Store the real length of buf. + len: usize, + next: usize, +} + +impl Drop for ReadToEndGuard<'_> { + /// # Safety + /// + /// We make sure that the length of buf is maintained correctly. + fn drop(&mut self) { + unsafe { + self.buf.set_len(self.len); + } + } +} + /// BlockingReader is a boxed dyn `BlockingRead`. pub type BlockingReader = Box; @@ -278,6 +364,46 @@ pub trait BlockingRead: Send + Sync { /// Iterating [`Bytes`] from underlying reader. fn next(&mut self) -> Option>; + + /// Read all data of current reader to the end of buf. + fn read_to_end(&mut self, buf: &mut Vec) -> Result { + let start_len = buf.len(); + let mut g = ReadToEndGuard { + len: buf.len(), + buf, + next: MIN_READ_TO_END_GROW_SIZE, + }; + + loop { + if g.buf.capacity() - g.buf.len() < g.next { + g.buf.reserve(g.next); + unsafe { + g.buf.set_len(g.buf.capacity()); + } + } + + let buf = &mut g.buf[g.len..]; + match self.read(buf) { + Ok(0) => return Ok(g.len - start_len), + Ok(n) => { + g.next = if n >= g.next { + cmp::min(g.next.saturating_mul(2), MAX_READ_TO_END_GROW_SIZE) + } else if n >= g.next / 2 { + g.next + } else { + cmp::max(g.next.saturating_div(2), MIN_READ_TO_END_GROW_SIZE) + }; + + // We can't allow bogus values from read. If it is too large, the returned vec could have its length + // set past its capacity, or if it overflows the vec could be shortened which could create an invalid + // string if this is called via read_to_string. + assert!(n <= buf.len()); + g.len += n; + } + Err(e) => return Err(e), + } + } + } } impl BlockingRead for () { diff --git a/core/src/raw/oio/read/lazy_read.rs b/core/src/raw/oio/read/lazy_read.rs new file mode 100644 index 000000000000..89705deff534 --- /dev/null +++ b/core/src/raw/oio/read/lazy_read.rs @@ -0,0 +1,198 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::raw::*; +use crate::*; +use bytes::Bytes; +use futures::future::BoxFuture; +use futures::Future; +use std::io::SeekFrom; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{ready, Context, Poll}; + +/// LazyReader implements [`oio::Read`] in a lazy way. +/// +/// The real requests are send when users calling read or seek. +pub struct LazyReader { + acc: Arc, + path: Arc, + op: OpRead, + state: State, +} + +enum State { + Idle, + Send(BoxFuture<'static, Result<(RpRead, R)>>), + Read(R), +} + +/// Safety: State will only be accessed under &mut. +unsafe impl Sync for State {} + +impl LazyReader +where + A: Accessor, +{ + /// Create a new [`oio::Reader`] with lazy support. + pub fn new(acc: Arc, path: &str, op: OpRead) -> LazyReader { + LazyReader { + acc, + path: Arc::new(path.to_string()), + op, + + state: State::::Idle, + } + } +} + +impl LazyReader +where + A: Accessor, + R: oio::Read, +{ + fn read_future(&self) -> BoxFuture<'static, Result<(RpRead, R)>> { + let acc = self.acc.clone(); + let path = self.path.clone(); + let op = self.op.clone(); + + Box::pin(async move { acc.read(&path, op).await }) + } +} + +impl oio::Read for LazyReader +where + A: Accessor, + R: oio::Read, +{ + fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { + match &mut self.state { + State::Idle => { + self.state = State::Send(self.read_future()); + self.poll_read(cx, buf) + } + State::Send(fut) => { + let (_, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { + // If read future returns an error, we should reset + // state to Idle so that we can retry it. + self.state = State::Idle; + err + })?; + self.state = State::Read(r); + self.poll_read(cx, buf) + } + State::Read(r) => r.poll_read(cx, buf), + } + } + + fn poll_seek(&mut self, cx: &mut Context<'_>, pos: SeekFrom) -> Poll> { + match &mut self.state { + State::Idle => { + self.state = State::Send(self.read_future()); + self.poll_seek(cx, pos) + } + State::Send(fut) => { + let (_, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { + // If read future returns an error, we should reset + // state to Idle so that we can retry it. + self.state = State::Idle; + err + })?; + self.state = State::Read(r); + self.poll_seek(cx, pos) + } + State::Read(r) => r.poll_seek(cx, pos), + } + } + + fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { + match &mut self.state { + State::Idle => { + self.state = State::Send(self.read_future()); + self.poll_next(cx) + } + State::Send(fut) => { + let (_, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { + // If read future returns an error, we should reset + // state to Idle so that we can retry it. + self.state = State::Idle; + err + })?; + self.state = State::Read(r); + self.poll_next(cx) + } + State::Read(r) => r.poll_next(cx), + } + } +} + +impl oio::BlockingRead for LazyReader +where + A: Accessor, + R: oio::BlockingRead, +{ + fn read(&mut self, buf: &mut [u8]) -> Result { + match &mut self.state { + State::Idle => { + let (_, r) = self.acc.blocking_read(&self.path, self.op.clone())?; + self.state = State::Read(r); + self.read(buf) + } + State::Read(r) => r.read(buf), + State::Send(_) => { + unreachable!( + "It's invalid to go into State::Send for BlockingRead, please report this bug" + ) + } + } + } + + fn seek(&mut self, pos: SeekFrom) -> Result { + match &mut self.state { + State::Idle => { + let (_, r) = self.acc.blocking_read(&self.path, self.op.clone())?; + self.state = State::Read(r); + self.seek(pos) + } + State::Read(r) => r.seek(pos), + State::Send(_) => { + unreachable!( + "It's invalid to go into State::Send for BlockingRead, please report this bug" + ) + } + } + } + + fn next(&mut self) -> Option> { + match &mut self.state { + State::Idle => { + let r = match self.acc.blocking_read(&self.path, self.op.clone()) { + Ok((_, r)) => r, + Err(err) => return Some(Err(err)), + }; + self.state = State::Read(r); + self.next() + } + State::Read(r) => r.next(), + State::Send(_) => { + unreachable!( + "It's invalid to go into State::Send for BlockingRead, please report this bug" + ) + } + } + } +} diff --git a/core/src/raw/oio/read/mod.rs b/core/src/raw/oio/read/mod.rs index 16510636a122..5f7d5d93a163 100644 --- a/core/src/raw/oio/read/mod.rs +++ b/core/src/raw/oio/read/mod.rs @@ -45,3 +45,6 @@ pub use tokio_read::TokioReader; mod std_read; pub use std_read::StdReader; + +mod lazy_read; +pub use lazy_read::LazyReader; diff --git a/core/src/raw/oio/read/range_read.rs b/core/src/raw/oio/read/range_read.rs index 870285e4267e..e6967cea535b 100644 --- a/core/src/raw/oio/read/range_read.rs +++ b/core/src/raw/oio/read/range_read.rs @@ -637,7 +637,7 @@ mod tests { let bs = args.range().apply_on_bytes(self.data.clone()); Ok(( - RpRead::new(bs.len() as u64), + RpRead::new(), MockReader { inner: futures::io::Cursor::new(bs.into()), }, diff --git a/core/src/raw/rps.rs b/core/src/raw/rps.rs index bc45d1457b6e..17e4470aec18 100644 --- a/core/src/raw/rps.rs +++ b/core/src/raw/rps.rs @@ -98,31 +98,12 @@ impl From for Request { /// Reply for `read` operation. #[derive(Debug, Clone)] -pub struct RpRead { - meta: Metadata, -} +pub struct RpRead {} impl RpRead { /// Create a new reply for `read`. - pub fn new(content_length: u64) -> Self { - RpRead { - meta: Metadata::new(EntryMode::FILE).with_content_length(content_length), - } - } - - /// Create reply read with existing metadata. - pub fn with_metadata(meta: Metadata) -> Self { - RpRead { meta } - } - - /// Get a ref of metadata. - pub fn metadata(&self) -> &Metadata { - &self.meta - } - - /// Consume reply to get the meta. - pub fn into_metadata(self) -> Metadata { - self.meta + pub fn new() -> Self { + RpRead {} } } diff --git a/core/src/services/azblob/backend.rs b/core/src/services/azblob/backend.rs index de6d5e3e943a..375a375d81bb 100644 --- a/core/src/services/azblob/backend.rs +++ b/core/src/services/azblob/backend.rs @@ -586,11 +586,7 @@ impl Accessor for AzblobBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - - Ok((RpRead::with_metadata(meta), resp.into_body())) - } + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/azdls/backend.rs b/core/src/services/azdls/backend.rs index 9351a25221e3..1ab176255610 100644 --- a/core/src/services/azdls/backend.rs +++ b/core/src/services/azdls/backend.rs @@ -292,10 +292,7 @@ impl Accessor for AzdlsBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) - } + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/azfile/backend.rs b/core/src/services/azfile/backend.rs index c7d8ad41cb91..78af5035d7a1 100644 --- a/core/src/services/azfile/backend.rs +++ b/core/src/services/azfile/backend.rs @@ -310,10 +310,7 @@ impl Accessor for AzfileBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) - } + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/cos/backend.rs b/core/src/services/cos/backend.rs index d376abcb7ab6..5ffe4be230a1 100644 --- a/core/src/services/cos/backend.rs +++ b/core/src/services/cos/backend.rs @@ -332,10 +332,7 @@ impl Accessor for CosBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) - } + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/dropbox/backend.rs b/core/src/services/dropbox/backend.rs index 8400b3300cb1..bbe3b0b18a34 100644 --- a/core/src/services/dropbox/backend.rs +++ b/core/src/services/dropbox/backend.rs @@ -97,10 +97,7 @@ impl Accessor for DropboxBackend { let resp = self.core.dropbox_get(path, args).await?; let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) - } + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/fs/backend.rs b/core/src/services/fs/backend.rs index 034d75e033e3..4b0d14207b1d 100644 --- a/core/src/services/fs/backend.rs +++ b/core/src/services/fs/backend.rs @@ -327,7 +327,7 @@ impl Accessor for FsBackend { } let r = oio::TokioReader::new(f); - Ok((RpRead::new(0), r)) + Ok((RpRead::new(), r)) } async fn write(&self, path: &str, op: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -500,7 +500,7 @@ impl Accessor for FsBackend { let r = oio::StdReader::new(f); - Ok((RpRead::new(0), r)) + Ok((RpRead::new(), r)) } fn blocking_write(&self, path: &str, op: OpWrite) -> Result<(RpWrite, Self::BlockingWriter)> { diff --git a/core/src/services/ftp/backend.rs b/core/src/services/ftp/backend.rs index 2fe8ffe9a8c9..0fe5a17784b2 100644 --- a/core/src/services/ftp/backend.rs +++ b/core/src/services/ftp/backend.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use std::cmp::min; use std::collections::HashMap; use std::fmt::Debug; use std::fmt::Formatter; @@ -319,37 +318,38 @@ impl Accessor for FtpBackend { return Ok(RpCreateDir::default()); } + /// TODO: migrate to FileReader maybe? async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { let mut ftp_stream = self.ftp_connect(Operation::Read).await?; let meta = self.ftp_stat(path).await?; let br = args.range(); - let (r, size): (Box, _) = match (br.offset(), br.size()) { + let r: Box = match (br.offset(), br.size()) { (Some(offset), Some(size)) => { ftp_stream.resume_transfer(offset as usize).await?; let ds = ftp_stream.retr_as_stream(path).await?.take(size); - (Box::new(ds), min(size, meta.size() as u64 - offset)) + Box::new(ds) } (Some(offset), None) => { ftp_stream.resume_transfer(offset as usize).await?; let ds = ftp_stream.retr_as_stream(path).await?; - (Box::new(ds), meta.size() as u64 - offset) + Box::new(ds) } (None, Some(size)) => { ftp_stream .resume_transfer((meta.size() as u64 - size) as usize) .await?; let ds = ftp_stream.retr_as_stream(path).await?; - (Box::new(ds), size) + Box::new(ds) } (None, None) => { let ds = ftp_stream.retr_as_stream(path).await?; - (Box::new(ds), meta.size() as u64) + Box::new(ds) } }; - Ok((RpRead::new(size), FtpReader::new(r, ftp_stream))) + Ok((RpRead::new(), FtpReader::new(r, ftp_stream))) } async fn write(&self, path: &str, _: OpWrite) -> Result<(RpWrite, Self::Writer)> { diff --git a/core/src/services/gcs/backend.rs b/core/src/services/gcs/backend.rs index e65fb155de91..c667920d9680 100644 --- a/core/src/services/gcs/backend.rs +++ b/core/src/services/gcs/backend.rs @@ -389,8 +389,7 @@ impl Accessor for GcsBackend { let resp = self.core.gcs_get_object(path, &args).await?; if resp.status().is_success() { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) + Ok((RpRead::new(), resp.into_body())) } else { Err(parse_error(resp).await?) } diff --git a/core/src/services/gdrive/backend.rs b/core/src/services/gdrive/backend.rs index 879eab7cc27a..53ab999ea032 100644 --- a/core/src/services/gdrive/backend.rs +++ b/core/src/services/gdrive/backend.rs @@ -118,26 +118,12 @@ impl Accessor for GdriveBackend { } async fn read(&self, path: &str, _args: OpRead) -> Result<(RpRead, Self::Reader)> { - // We need to request for metadata and body separately here. - // Request for metadata first to check if the file exists. - let resp = self.core.gdrive_stat(path).await?; + let resp = self.core.gdrive_get(path).await?; let status = resp.status(); match status { - StatusCode::OK => { - let body = resp.into_body().bytes().await?; - let meta = self.parse_metadata(body)?; - - let resp = self.core.gdrive_get(path).await?; - - let status = resp.status(); - - match status { - StatusCode::OK => Ok((RpRead::with_metadata(meta), resp.into_body())), - _ => Err(parse_error(resp).await?), - } - } + StatusCode::OK => Ok((RpRead::new(), resp.into_body())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/ghac/backend.rs b/core/src/services/ghac/backend.rs index 83c6a46d6a7c..941d89ce71fc 100644 --- a/core/src/services/ghac/backend.rs +++ b/core/src/services/ghac/backend.rs @@ -350,10 +350,7 @@ impl Accessor for GhacBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) - } + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/hdfs/backend.rs b/core/src/services/hdfs/backend.rs index fed635b15e4d..a52d9c2842f6 100644 --- a/core/src/services/hdfs/backend.rs +++ b/core/src/services/hdfs/backend.rs @@ -213,7 +213,7 @@ impl Accessor for HdfsBackend { let r = oio::FuturesReader::new(f); - Ok((RpRead::new(0), r)) + Ok((RpRead::new(), r)) } async fn write(&self, path: &str, op: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -335,7 +335,7 @@ impl Accessor for HdfsBackend { let r = oio::StdReader::new(f); - Ok((RpRead::new(0), r)) + Ok((RpRead::new(), r)) } fn blocking_write(&self, path: &str, _: OpWrite) -> Result<(RpWrite, Self::BlockingWriter)> { diff --git a/core/src/services/http/backend.rs b/core/src/services/http/backend.rs index af50631cca47..2877d5aa20a3 100644 --- a/core/src/services/http/backend.rs +++ b/core/src/services/http/backend.rs @@ -237,10 +237,7 @@ impl Accessor for HttpBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) - } + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/ipfs/backend.rs b/core/src/services/ipfs/backend.rs index ebf1e327e881..6a557d6a476e 100644 --- a/core/src/services/ipfs/backend.rs +++ b/core/src/services/ipfs/backend.rs @@ -194,10 +194,7 @@ impl Accessor for IpfsBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) - } + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/ipmfs/backend.rs b/core/src/services/ipmfs/backend.rs index e7999767b93d..303b1db8df3c 100644 --- a/core/src/services/ipmfs/backend.rs +++ b/core/src/services/ipmfs/backend.rs @@ -112,10 +112,7 @@ impl Accessor for IpmfsBackend { let status = resp.status(); match status { - StatusCode::OK => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) - } + StatusCode::OK => Ok((RpRead::new(), resp.into_body())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/obs/backend.rs b/core/src/services/obs/backend.rs index aa5a86864c63..18bcefb52f79 100644 --- a/core/src/services/obs/backend.rs +++ b/core/src/services/obs/backend.rs @@ -359,10 +359,7 @@ impl Accessor for ObsBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) - } + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/onedrive/backend.rs b/core/src/services/onedrive/backend.rs index 0a7952e4a27f..8b1959b9ba08 100644 --- a/core/src/services/onedrive/backend.rs +++ b/core/src/services/onedrive/backend.rs @@ -93,10 +93,7 @@ impl Accessor for OnedriveBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) - } + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), _ => Err(parse_error(resp).await?), } diff --git a/core/src/services/oss/backend.rs b/core/src/services/oss/backend.rs index 8ce0d65d899b..043993699ba8 100644 --- a/core/src/services/oss/backend.rs +++ b/core/src/services/oss/backend.rs @@ -472,10 +472,7 @@ impl Accessor for OssBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) - } + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/s3/backend.rs b/core/src/services/s3/backend.rs index f9e160502e8b..e6b86298207d 100644 --- a/core/src/services/s3/backend.rs +++ b/core/src/services/s3/backend.rs @@ -969,10 +969,7 @@ impl Accessor for S3Backend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) - } + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/sftp/backend.rs b/core/src/services/sftp/backend.rs index 5c67dab7664a..792e27f49644 100644 --- a/core/src/services/sftp/backend.rs +++ b/core/src/services/sftp/backend.rs @@ -300,7 +300,7 @@ impl Accessor for SftpBackend { // - `oio::TokioReader::new(x)` makes it a `oio::TokioReader` which implements `oio::Read`. let r = oio::TokioReader::new(Box::pin(TokioCompatFile::new(f))); - Ok((RpRead::new(0), r)) + Ok((RpRead::new(), r)) } async fn write(&self, path: &str, op: OpWrite) -> Result<(RpWrite, Self::Writer)> { diff --git a/core/src/services/supabase/backend.rs b/core/src/services/supabase/backend.rs index a5d0d3db1053..b96bbd3efd4a 100644 --- a/core/src/services/supabase/backend.rs +++ b/core/src/services/supabase/backend.rs @@ -215,10 +215,7 @@ impl Accessor for SupabaseBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) - } + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/vercel_artifacts/backend.rs b/core/src/services/vercel_artifacts/backend.rs index 9a0ae95cf719..432b82b68fd9 100644 --- a/core/src/services/vercel_artifacts/backend.rs +++ b/core/src/services/vercel_artifacts/backend.rs @@ -74,10 +74,7 @@ impl Accessor for VercelArtifactsBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) - } + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), _ => Err(parse_error(resp).await?), } diff --git a/core/src/services/wasabi/backend.rs b/core/src/services/wasabi/backend.rs index a7a41042f037..c2f746e77578 100644 --- a/core/src/services/wasabi/backend.rs +++ b/core/src/services/wasabi/backend.rs @@ -738,10 +738,7 @@ impl Accessor for WasabiBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) - } + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/webdav/backend.rs b/core/src/services/webdav/backend.rs index 4c4cf6b834a2..24c80a3da193 100644 --- a/core/src/services/webdav/backend.rs +++ b/core/src/services/webdav/backend.rs @@ -267,10 +267,7 @@ impl Accessor for WebdavBackend { let resp = self.webdav_get(path, args).await?; let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) - } + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/webhdfs/backend.rs b/core/src/services/webhdfs/backend.rs index 4adbc6ab9e93..3863b2dd38cb 100644 --- a/core/src/services/webhdfs/backend.rs +++ b/core/src/services/webhdfs/backend.rs @@ -465,10 +465,7 @@ impl Accessor for WebhdfsBackend { let range = args.range(); let resp = self.webhdfs_read_file(path, range).await?; match resp.status() { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) - } + StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/types/operator/blocking_operator.rs b/core/src/types/operator/blocking_operator.rs index 3dfa0d121ae9..f55a1c48a82c 100644 --- a/core/src/types/operator/blocking_operator.rs +++ b/core/src/types/operator/blocking_operator.rs @@ -15,11 +15,10 @@ // specific language governing permissions and limitations // under the License. -use std::io::Read; - use bytes::Bytes; use super::operator_functions::*; +use crate::raw::oio::BlockingRead; use crate::raw::oio::WriteBuf; use crate::raw::*; use crate::*; @@ -339,22 +338,12 @@ impl BlockingOperator { ); } - let (rp, mut s) = inner.blocking_read(&path, args)?; - let mut buffer = Vec::with_capacity(rp.into_metadata().content_length() as usize); + let (_, mut s) = inner.blocking_read(&path, args)?; - match s.read_to_end(&mut buffer) { - Ok(n) => { - buffer.truncate(n); - Ok(buffer) - } - Err(err) => Err( - Error::new(ErrorKind::Unexpected, "blocking read_with failed") - .with_operation("BlockingOperator::read_with") - .with_context("service", inner.info().scheme().into_static()) - .with_context("path", &path) - .set_source(err), - ), - } + let mut buf = Vec::new(); + s.read_to_end(&mut buf)?; + + Ok(buf) }, )) } diff --git a/core/src/types/operator/operator.rs b/core/src/types/operator/operator.rs index 00bb13e10b2e..1abb1f0276f4 100644 --- a/core/src/types/operator/operator.rs +++ b/core/src/types/operator/operator.rs @@ -20,15 +20,13 @@ use std::time::Duration; use bytes::Buf; use bytes::Bytes; use futures::stream; -use futures::AsyncReadExt; use futures::Stream; use futures::StreamExt; use futures::TryStreamExt; -use tokio::io::ReadBuf; use super::BlockingOperator; use crate::operator_futures::*; -use crate::raw::oio::WriteExt; +use crate::raw::oio::{ReadExt, WriteExt}; use crate::raw::*; use crate::*; @@ -368,32 +366,11 @@ impl Operator { .with_context("path", &path)); } - let br = args.range(); - let (rp, mut s) = inner.read(&path, args).await?; + let (_, mut s) = inner.read(&path, args).await?; + let mut buf = Vec::new(); + s.read_to_end(&mut buf).await?; - let length = rp.into_metadata().content_length() as usize; - let mut buffer = Vec::with_capacity(length); - - let dst = buffer.spare_capacity_mut(); - let mut buf = ReadBuf::uninit(dst); - - // Safety: the input buffer is created with_capacity(length). - unsafe { buf.assume_init(length) }; - - // TODO: use native read api - s.read_exact(buf.initialized_mut()).await.map_err(|err| { - Error::new(ErrorKind::Unexpected, "read from storage") - .with_operation("read") - .with_context("service", inner.info().scheme().into_static()) - .with_context("path", &path) - .with_context("range", br.to_string()) - .set_source(err) - })?; - - // Safety: read_exact makes sure this buffer has been filled. - unsafe { buffer.set_len(length) } - - Ok(buffer) + Ok(buf) }; Box::pin(fut) From b8fb852d4c570568d6c89c76c94a85a18f241a3b Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Thu, 26 Oct 2023 21:23:08 +0800 Subject: [PATCH 16/46] make rustc happy Signed-off-by: Xuanwo --- core/src/raw/oio/read/api.rs | 8 ++++++++ core/src/raw/rps.rs | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/core/src/raw/oio/read/api.rs b/core/src/raw/oio/read/api.rs index 436c8447fd53..579a4858d95d 100644 --- a/core/src/raw/oio/read/api.rs +++ b/core/src/raw/oio/read/api.rs @@ -292,6 +292,10 @@ where loop { if g.buf.capacity() - g.buf.len() < g.next { g.buf.reserve(g.next); + // # Safety + // + // We make sure that the length of buf is maintained correctly. + #[deny(clippy::uninit_vec)] unsafe { g.buf.set_len(g.buf.capacity()); } @@ -377,6 +381,10 @@ pub trait BlockingRead: Send + Sync { loop { if g.buf.capacity() - g.buf.len() < g.next { g.buf.reserve(g.next); + // # Safety + // + // We make sure that the length of buf is maintained correctly. + #[deny(clippy::uninit_vec)] unsafe { g.buf.set_len(g.buf.capacity()); } diff --git a/core/src/raw/rps.rs b/core/src/raw/rps.rs index 17e4470aec18..10b3c7159477 100644 --- a/core/src/raw/rps.rs +++ b/core/src/raw/rps.rs @@ -97,13 +97,13 @@ impl From for Request { } /// Reply for `read` operation. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Default)] pub struct RpRead {} impl RpRead { /// Create a new reply for `read`. pub fn new() -> Self { - RpRead {} + RpRead::default() } } From f91d61e60c3598f99937b5d35da4989db740067e Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Thu, 26 Oct 2023 21:24:48 +0800 Subject: [PATCH 17/46] Fix typo Signed-off-by: Xuanwo --- core/src/raw/oio/read/api.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/raw/oio/read/api.rs b/core/src/raw/oio/read/api.rs index 579a4858d95d..3fd1459da510 100644 --- a/core/src/raw/oio/read/api.rs +++ b/core/src/raw/oio/read/api.rs @@ -295,7 +295,7 @@ where // # Safety // // We make sure that the length of buf is maintained correctly. - #[deny(clippy::uninit_vec)] + #[allow(clippy::uninit_vec)] unsafe { g.buf.set_len(g.buf.capacity()); } @@ -384,7 +384,7 @@ pub trait BlockingRead: Send + Sync { // # Safety // // We make sure that the length of buf is maintained correctly. - #[deny(clippy::uninit_vec)] + #[allow(clippy::uninit_vec)] unsafe { g.buf.set_len(g.buf.capacity()); } From ef9aab9c061648b08c351766ef37ddd12d0012c8 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Thu, 26 Oct 2023 22:08:20 +0800 Subject: [PATCH 18/46] fix read to end Signed-off-by: Xuanwo --- core/src/raw/oio/read/api.rs | 102 ++++++++++++++++------------------- 1 file changed, 45 insertions(+), 57 deletions(-) diff --git a/core/src/raw/oio/read/api.rs b/core/src/raw/oio/read/api.rs index 3fd1459da510..0b9aaf7703cf 100644 --- a/core/src/raw/oio/read/api.rs +++ b/core/src/raw/oio/read/api.rs @@ -206,6 +206,8 @@ pub trait ReadExt: Read { reader: self, buf, start, + length: start, + next: MIN_READ_TO_END_GROW_SIZE, } } } @@ -266,12 +268,19 @@ where } } +/// The MIN read to end grow size. +const MIN_READ_TO_END_GROW_SIZE: usize = 8 * 1024; +/// The MAX read to end grow size. +const MAX_READ_TO_END_GROW_SIZE: usize = 4 * 1024 * 1024; + /// Make this future `!Unpin` for compatibility with async trait methods. #[pin_project(!Unpin)] pub struct ReadToEndFuture<'a, R: Read + Unpin + ?Sized> { reader: &'a mut R, buf: &'a mut Vec, start: usize, + length: usize, + next: usize, } impl Future for ReadToEndFuture<'_, R> @@ -283,40 +292,39 @@ where fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { let this = self.project(); - let mut g = ReadToEndGuard { - len: this.buf.len(), - buf: this.buf, - next: MIN_READ_TO_END_GROW_SIZE, - }; - loop { - if g.buf.capacity() - g.buf.len() < g.next { - g.buf.reserve(g.next); + if this.buf.capacity() == *this.length { + this.buf.reserve(*this.next); // # Safety // // We make sure that the length of buf is maintained correctly. #[allow(clippy::uninit_vec)] unsafe { - g.buf.set_len(g.buf.capacity()); + this.buf.set_len(this.buf.capacity()); } } - let buf = &mut g.buf[g.len..]; + let buf = &mut this.buf[*this.length..]; match ready!(this.reader.poll_read(cx, buf)) { - Ok(0) => return Poll::Ready(Ok(g.len - *this.start)), + Ok(0) => { + unsafe { + this.buf.set_len(*this.length); + } + return Poll::Ready(Ok(*this.length - *this.start)); + } Ok(n) => { - g.next = if n >= g.next { - cmp::min(g.next.saturating_mul(2), MAX_READ_TO_END_GROW_SIZE) - } else if n >= g.next / 2 { - g.next + *this.next = if n >= *this.next { + cmp::min((*this.next).saturating_mul(2), MAX_READ_TO_END_GROW_SIZE) + } else if n >= *this.next / 2 { + *this.next } else { - cmp::max(g.next.saturating_div(2), MIN_READ_TO_END_GROW_SIZE) + cmp::max((*this.next).saturating_div(2), MIN_READ_TO_END_GROW_SIZE) }; // We can't allow bogus values from read. If it is too large, the returned vec could have its length // set past its capacity, or if it overflows the vec could be shortened which could create an invalid // string if this is called via read_to_string. assert!(n <= buf.len()); - g.len += n; + *this.length += n; } Err(e) => return Poll::Ready(Err(e)), } @@ -324,28 +332,6 @@ where } } -const MIN_READ_TO_END_GROW_SIZE: usize = 8 * 1024; -const MAX_READ_TO_END_GROW_SIZE: usize = 4 * 1024 * 1024; - -/// ReadToEndGuard makes sure that the buf length is maintained correctly. -struct ReadToEndGuard<'a> { - buf: &'a mut Vec, - /// Store the real length of buf. - len: usize, - next: usize, -} - -impl Drop for ReadToEndGuard<'_> { - /// # Safety - /// - /// We make sure that the length of buf is maintained correctly. - fn drop(&mut self) { - unsafe { - self.buf.set_len(self.len); - } - } -} - /// BlockingReader is a boxed dyn `BlockingRead`. pub type BlockingReader = Box; @@ -371,42 +357,44 @@ pub trait BlockingRead: Send + Sync { /// Read all data of current reader to the end of buf. fn read_to_end(&mut self, buf: &mut Vec) -> Result { - let start_len = buf.len(); - let mut g = ReadToEndGuard { - len: buf.len(), - buf, - next: MIN_READ_TO_END_GROW_SIZE, - }; + let start = buf.len(); + let mut next = MAX_READ_TO_END_GROW_SIZE; + let mut length = start; loop { - if g.buf.capacity() - g.buf.len() < g.next { - g.buf.reserve(g.next); + if buf.capacity() == length { + buf.reserve(next); // # Safety // // We make sure that the length of buf is maintained correctly. #[allow(clippy::uninit_vec)] unsafe { - g.buf.set_len(g.buf.capacity()); + buf.set_len(buf.capacity()); } } - let buf = &mut g.buf[g.len..]; - match self.read(buf) { - Ok(0) => return Ok(g.len - start_len), + let bs = &mut buf[length..]; + match self.read(bs) { + Ok(0) => { + unsafe { + buf.set_len(length); + } + return Ok(length - start); + } Ok(n) => { - g.next = if n >= g.next { - cmp::min(g.next.saturating_mul(2), MAX_READ_TO_END_GROW_SIZE) - } else if n >= g.next / 2 { - g.next + next = if n >= next { + cmp::min(next.saturating_mul(2), MAX_READ_TO_END_GROW_SIZE) + } else if n >= next / 2 { + next } else { - cmp::max(g.next.saturating_div(2), MIN_READ_TO_END_GROW_SIZE) + cmp::max(next.saturating_div(2), MIN_READ_TO_END_GROW_SIZE) }; // We can't allow bogus values from read. If it is too large, the returned vec could have its length // set past its capacity, or if it overflows the vec could be shortened which could create an invalid // string if this is called via read_to_string. assert!(n <= buf.len()); - g.len += n; + length += n; } Err(e) => return Err(e), } From 07be9dc2db9df1fd18a398d23478431c59300af8 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Thu, 26 Oct 2023 22:37:08 +0800 Subject: [PATCH 19/46] Fix 416 not handled Signed-off-by: Xuanwo --- core/src/raw/http_util/body.rs | 16 +++++++++++++++- core/src/raw/oio/read/range_read.rs | 8 ++++---- core/src/services/azblob/backend.rs | 1 + core/src/services/cos/backend.rs | 1 + core/src/services/gcs/backend.rs | 2 ++ core/src/services/http/backend.rs | 1 + core/src/services/obs/backend.rs | 1 + core/src/services/oss/backend.rs | 1 + core/src/services/s3/backend.rs | 1 + 9 files changed, 27 insertions(+), 5 deletions(-) diff --git a/core/src/raw/http_util/body.rs b/core/src/raw/http_util/body.rs index 474f7489780d..8b7d5da5af09 100644 --- a/core/src/raw/http_util/body.rs +++ b/core/src/raw/http_util/body.rs @@ -78,6 +78,16 @@ impl IncomingAsyncBody { } } + /// Create an empty IncomingAsyncBody. + pub(crate) fn empty() -> Self { + Self { + inner: Box::new(()), + size: Some(0), + consumed: 0, + chunk: None, + } + } + /// Consume the entire body. pub async fn consume(mut self) -> Result<()> { use oio::ReadExt; @@ -145,7 +155,7 @@ impl IncomingAsyncBody { impl oio::Read for IncomingAsyncBody { fn poll_read(&mut self, cx: &mut Context<'_>, mut buf: &mut [u8]) -> Poll> { - if buf.is_empty() { + if buf.is_empty() || self.size == Some(0) { return Poll::Ready(Ok(0)); } @@ -179,6 +189,10 @@ impl oio::Read for IncomingAsyncBody { } fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { + if self.size == Some(0) { + return Poll::Ready(None); + } + if let Some(bs) = self.chunk.take() { return Poll::Ready(Some(Ok(bs))); } diff --git a/core/src/raw/oio/read/range_read.rs b/core/src/raw/oio/read/range_read.rs index e6967cea535b..205b89a6f23a 100644 --- a/core/src/raw/oio/read/range_read.rs +++ b/core/src/raw/oio/read/range_read.rs @@ -233,7 +233,7 @@ where match &mut self.state { State::Idle => { // Sanity check for normal cases. - if buf.is_empty() || self.cur > self.size.unwrap_or(u64::MAX) { + if buf.is_empty() || self.cur >= self.size.unwrap_or(u64::MAX) { return Poll::Ready(Ok(0)); } @@ -362,7 +362,7 @@ where match &mut self.state { State::Idle => { // Sanity check for normal cases. - if self.cur > self.size.unwrap_or(u64::MAX) { + if self.cur >= self.size.unwrap_or(u64::MAX) { return Poll::Ready(None); } @@ -428,7 +428,7 @@ where match &mut self.state { State::Idle => { // Sanity check for normal cases. - if buf.is_empty() || self.cur > self.size.unwrap_or(u64::MAX) { + if buf.is_empty() || self.cur >= self.size.unwrap_or(u64::MAX) { return Ok(0); } @@ -528,7 +528,7 @@ where match &mut self.state { State::Idle => { // Sanity check for normal cases. - if self.cur > self.size.unwrap_or(u64::MAX) { + if self.cur >= self.size.unwrap_or(u64::MAX) { return None; } diff --git a/core/src/services/azblob/backend.rs b/core/src/services/azblob/backend.rs index 375a375d81bb..6e8dd69ee322 100644 --- a/core/src/services/azblob/backend.rs +++ b/core/src/services/azblob/backend.rs @@ -587,6 +587,7 @@ impl Accessor for AzblobBackend { match status { StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), + StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/cos/backend.rs b/core/src/services/cos/backend.rs index 5ffe4be230a1..aef237a1acd4 100644 --- a/core/src/services/cos/backend.rs +++ b/core/src/services/cos/backend.rs @@ -333,6 +333,7 @@ impl Accessor for CosBackend { match status { StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), + StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/gcs/backend.rs b/core/src/services/gcs/backend.rs index c667920d9680..40f7f6a9b247 100644 --- a/core/src/services/gcs/backend.rs +++ b/core/src/services/gcs/backend.rs @@ -390,6 +390,8 @@ impl Accessor for GcsBackend { if resp.status().is_success() { Ok((RpRead::new(), resp.into_body())) + } else if resp.status() == StatusCode::RANGE_NOT_SATISFIABLE { + Ok((RpRead::new(), IncomingAsyncBody::empty())) } else { Err(parse_error(resp).await?) } diff --git a/core/src/services/http/backend.rs b/core/src/services/http/backend.rs index 2877d5aa20a3..6b238886259a 100644 --- a/core/src/services/http/backend.rs +++ b/core/src/services/http/backend.rs @@ -238,6 +238,7 @@ impl Accessor for HttpBackend { match status { StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), + StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/obs/backend.rs b/core/src/services/obs/backend.rs index 18bcefb52f79..98e51b9dbe2c 100644 --- a/core/src/services/obs/backend.rs +++ b/core/src/services/obs/backend.rs @@ -360,6 +360,7 @@ impl Accessor for ObsBackend { match status { StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), + StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/oss/backend.rs b/core/src/services/oss/backend.rs index 043993699ba8..12f2d2a256b8 100644 --- a/core/src/services/oss/backend.rs +++ b/core/src/services/oss/backend.rs @@ -473,6 +473,7 @@ impl Accessor for OssBackend { match status { StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), + StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/s3/backend.rs b/core/src/services/s3/backend.rs index e6b86298207d..eeadf18d7ec1 100644 --- a/core/src/services/s3/backend.rs +++ b/core/src/services/s3/backend.rs @@ -970,6 +970,7 @@ impl Accessor for S3Backend { match status { StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), + StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } } From 48291521884399f104d603e574fe705ab3b5efe9 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Thu, 26 Oct 2023 22:39:19 +0800 Subject: [PATCH 20/46] Fix dbfs Signed-off-by: Xuanwo --- core/src/services/dbfs/backend.rs | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/core/src/services/dbfs/backend.rs b/core/src/services/dbfs/backend.rs index c01acb043ae6..be4822309840 100644 --- a/core/src/services/dbfs/backend.rs +++ b/core/src/services/dbfs/backend.rs @@ -200,32 +200,9 @@ impl Accessor for DbfsBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let mut meta = Metadata::new(EntryMode::FILE); - - if let Some(length) = args.range().size() { - meta.set_content_length(length); - } else { - let stat_resp = self.core.dbfs_get_status(path).await?; - meta = parse_into_metadata(path, stat_resp.headers())?; - let decoded_response = - serde_json::from_slice::(&stat_resp.into_body().bytes().await?) - .map_err(new_json_deserialize_error)?; - meta.set_last_modified(parse_datetime_from_from_timestamp_millis( - decoded_response.modification_time, - )?); - meta.set_mode(if decoded_response.is_dir { - EntryMode::DIR - } else { - EntryMode::FILE - }); - if !decoded_response.is_dir { - meta.set_content_length(decoded_response.file_size as u64); - } - } - let op = DbfsReader::new(self.core.clone(), args, path.to_string()); - Ok((RpRead::with_metadata(meta), op)) + Ok((RpRead::new(), op)) } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { From da8511b3378f19f5ccfd6858d0e1127e6635ce65 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Thu, 26 Oct 2023 23:16:03 +0800 Subject: [PATCH 21/46] fix webdav Signed-off-by: Xuanwo --- core/src/services/webdav/backend.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/services/webdav/backend.rs b/core/src/services/webdav/backend.rs index 24c80a3da193..1e0e3b76e7b6 100644 --- a/core/src/services/webdav/backend.rs +++ b/core/src/services/webdav/backend.rs @@ -268,6 +268,7 @@ impl Accessor for WebdavBackend { let status = resp.status(); match status { StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), + StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } } From a81b140bf51304a21d3879245446ce4d294cad1c Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Thu, 26 Oct 2023 23:46:12 +0800 Subject: [PATCH 22/46] Fix complete test Signed-off-by: Xuanwo --- core/src/layers/complete.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/layers/complete.rs b/core/src/layers/complete.rs index 222f503486dc..c5e593a47460 100644 --- a/core/src/layers/complete.rs +++ b/core/src/layers/complete.rs @@ -791,7 +791,7 @@ mod tests { } async fn read(&self, _: &str, _: OpRead) -> Result<(RpRead, Self::Reader)> { - Ok((RpRead::new(), Box::new(()))) + Ok((RpRead::new(), Box::new(oio::Cursor::new()))) } async fn write(&self, _: &str, _: OpWrite) -> Result<(RpWrite, Self::Writer)> { From 9e23d28fd45b134a6795c405bc569569d55d4044 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 27 Oct 2023 08:30:18 +0800 Subject: [PATCH 23/46] Update workflow Signed-off-by: Xuanwo --- .github/workflows/ci.yml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2f00b6fdcbed..134d759e7b3b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -63,6 +63,7 @@ jobs: with: need-rocksdb: true need-protoc: true + github-token: ${{ secrets.GITHUB_TOKEN }} - name: Checkout python env uses: actions/setup-python@v4 @@ -91,6 +92,7 @@ jobs: with: need-rocksdb: true need-protoc: true + github-token: ${{ secrets.GITHUB_TOKEN }} - name: Checkout python env uses: actions/setup-python@v4 @@ -140,6 +142,8 @@ jobs: - uses: actions/checkout@v4 - name: Setup Rust toolchain uses: ./.github/actions/setup + with: + github-token: ${{ secrets.GITHUB_TOKEN }} - name: Build run: cargo build -p opendal -p oli -p object_store_opendal @@ -162,6 +166,7 @@ jobs: with: need-rocksdb: true need-protoc: true + github-token: ${{ secrets.GITHUB_TOKEN }} - name: Build run: cargo build --all-features @@ -190,12 +195,8 @@ jobs: uses: ./.github/actions/setup with: need-rocksdb: true - - - name: Install Protoc - uses: arduino/setup-protoc@v2 - with: - version: "23.4" - repo-token: ${{ secrets.GITHUB_TOKEN }} + need-protoc: true + github-token: ${{ secrets.GITHUB_TOKEN }} - name: Build shell: bash @@ -267,6 +268,7 @@ jobs: uses: ./.github/actions/setup with: need-nextest: true + - name: Test run: cargo nextest run --no-fail-fast --features layers-all && cargo test --doc env: From 72cd9819b1b31e4276fe343d246203328d631813 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 27 Oct 2023 17:26:01 +0800 Subject: [PATCH 24/46] Update fuzz Signed-off-by: Xuanwo --- core/fuzz/fuzz_reader.rs | 14 ++++++++------ core/fuzz/utils.rs | 7 ++++++- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/core/fuzz/fuzz_reader.rs b/core/fuzz/fuzz_reader.rs index d6eb19c1c0f0..d4af06d60c97 100644 --- a/core/fuzz/fuzz_reader.rs +++ b/core/fuzz/fuzz_reader.rs @@ -44,6 +44,7 @@ enum ReadAction { #[derive(Debug, Clone)] struct FuzzInput { + path: String, size: usize, range: BytesRange, actions: Vec, @@ -109,6 +110,7 @@ impl Arbitrary<'_> for FuzzInput { } Ok(FuzzInput { + path: uuid::Uuid::new_v4().to_string(), size: total_size, range, actions, @@ -221,12 +223,13 @@ impl ReadChecker { } async fn fuzz_reader(op: Operator, input: FuzzInput) -> Result<()> { - let path = uuid::Uuid::new_v4().to_string(); - let mut checker = ReadChecker::new(input.size, input.range); - op.write(&path, checker.raw_data.clone()).await?; + op.write(&input.path, checker.raw_data.clone()).await?; - let mut o = op.reader_with(&path).range(input.range.to_range()).await?; + let mut o = op + .reader_with(&input.path) + .range(input.range.to_range()) + .await?; for action in input.actions { match action { @@ -248,7 +251,6 @@ async fn fuzz_reader(op: Operator, input: FuzzInput) -> Result<()> { } } - op.delete(&path).await?; Ok(()) } @@ -261,7 +263,7 @@ fuzz_target!(|input: FuzzInput| { runtime.block_on(async { fuzz_reader(op, input.clone()) .await - .unwrap_or_else(|_| panic!("fuzz reader must succeed")); + .unwrap_or_else(|err| panic!("fuzz reader must succeed: {err:?}")); }) } }); diff --git a/core/fuzz/utils.rs b/core/fuzz/utils.rs index 37598fdb3713..9cf962f62fa0 100644 --- a/core/fuzz/utils.rs +++ b/core/fuzz/utils.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use opendal::layers::RetryLayer; use std::env; use std::str::FromStr; @@ -44,5 +45,9 @@ pub fn init_service() -> Option { }) .collect(); - Some(Operator::via_map(scheme, envs).unwrap_or_else(|_| panic!("init {} must succeed", scheme))) + Some( + Operator::via_map(scheme, envs) + .unwrap_or_else(|_| panic!("init {} must succeed", scheme)) + .layer(RetryLayer::default()), + ) } From 3ae3ad9fff4c960a7d7e240c5ca9e109276fce74 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 27 Oct 2023 17:26:25 +0800 Subject: [PATCH 25/46] Add new fuzz test Signed-off-by: Xuanwo --- core/src/raw/oio/read/range_read.rs | 32 +++--- core/tests/behavior/fuzz.rs | 154 +++++++++++++++++++++++++++- 2 files changed, 166 insertions(+), 20 deletions(-) diff --git a/core/src/raw/oio/read/range_read.rs b/core/src/raw/oio/read/range_read.rs index 205b89a6f23a..28cff072e44c 100644 --- a/core/src/raw/oio/read/range_read.rs +++ b/core/src/raw/oio/read/range_read.rs @@ -230,13 +230,13 @@ where R: oio::Read, { fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { + // Sanity check for normal cases. + if buf.is_empty() || self.cur >= self.size.unwrap_or(u64::MAX) { + return Poll::Ready(Ok(0)); + } + match &mut self.state { State::Idle => { - // Sanity check for normal cases. - if buf.is_empty() || self.cur >= self.size.unwrap_or(u64::MAX) { - return Poll::Ready(Ok(0)); - } - self.state = if self.offset.is_none() { // Offset is none means we are doing tailing reading. // we should stat first to get the correct offset. @@ -300,6 +300,7 @@ where State::Idle => { let (base, amt) = match pos { SeekFrom::Start(n) => (0, n as i64), + SeekFrom::Current(n) => (self.cur as i64, n), SeekFrom::End(n) => { if let Some(size) = self.size { (size as i64, n) @@ -308,7 +309,6 @@ where return self.poll_seek(cx, pos); } } - SeekFrom::Current(n) => (self.cur as i64, n), }; let seek_pos = match base.checked_add(amt) { @@ -359,13 +359,13 @@ where } fn poll_next(&mut self, cx: &mut Context<'_>) -> Poll>> { + // Sanity check for normal cases. + if self.cur >= self.size.unwrap_or(u64::MAX) { + return Poll::Ready(None); + } + match &mut self.state { State::Idle => { - // Sanity check for normal cases. - if self.cur >= self.size.unwrap_or(u64::MAX) { - return Poll::Ready(None); - } - self.state = if self.offset.is_none() { // Offset is none means we are doing tailing reading. // we should stat first to get the correct offset. @@ -425,13 +425,13 @@ where R: oio::BlockingRead, { fn read(&mut self, buf: &mut [u8]) -> Result { + // Sanity check for normal cases. + if buf.is_empty() || self.cur >= self.size.unwrap_or(u64::MAX) { + return Ok(0); + } + match &mut self.state { State::Idle => { - // Sanity check for normal cases. - if buf.is_empty() || self.cur >= self.size.unwrap_or(u64::MAX) { - return Ok(0); - } - // Offset is none means we are doing tailing reading. // we should stat first to get the correct offset. if self.offset.is_none() { diff --git a/core/tests/behavior/fuzz.rs b/core/tests/behavior/fuzz.rs index b8f47fdfbed8..3ccd51a7d5bc 100644 --- a/core/tests/behavior/fuzz.rs +++ b/core/tests/behavior/fuzz.rs @@ -15,18 +15,23 @@ // specific language governing permissions and limitations // under the License. -use std::io; use std::io::SeekFrom; use std::vec; use anyhow::Result; -use futures::AsyncSeekExt; +use bytes::Bytes; use log::debug; +use opendal::raw::oio::ReadExt; use crate::*; pub fn behavior_fuzz_tests(op: &Operator) -> Vec { - async_trials!(op, test_fuzz_issue_2717) + async_trials!( + op, + test_fuzz_issue_2717, + test_fuzz_pr_3395_case_1, + test_fuzz_pr_3395_case_2 + ) } /// This fuzz test is to reproduce . @@ -86,7 +91,148 @@ pub async fn test_fuzz_issue_2717(op: Operator) -> Result<()> { // Perform a seek let result = r.seek(SeekFrom::End(-2)).await; assert!(result.is_err()); - assert_eq!(result.unwrap_err().kind(), io::ErrorKind::InvalidInput); + assert_eq!(result.unwrap_err().kind(), ErrorKind::InvalidInput); + + Ok(()) +} + +/// This fuzz test is to reproduce bug inside . +/// +/// The simplified cases could be seen as: +/// +/// ``` +/// FuzzInput { +/// path: "06ae5d93-c0e9-43f2-ae5a-225cfaaa40a0", +/// size: 1, +/// range: BytesRange( +/// Some( +/// 0, +/// ), +/// None, +/// ), +/// actions: [ +/// Seek( +/// Current( +/// 1, +/// ), +/// ), +/// Next, +/// Seek( +/// End( +/// -1, +/// ), +/// ), +/// Read { +/// size: 0, +/// }, +/// Read { +/// size: 0, +/// }, +/// ], +/// } +/// ``` +pub async fn test_fuzz_pr_3395_case_1(op: Operator) -> Result<()> { + let cap = op.info().full_capability(); + + if !(cap.read && cap.write & cap.read_with_range) { + return Ok(()); + } + + let path = uuid::Uuid::new_v4().to_string(); + debug!("Generate a random file: {}", &path); + let content = gen_fixed_bytes(1); + + op.write(&path, content.clone()) + .await + .expect("write must succeed"); + + let mut r = op.reader_with(&path).range(0..).await?; + + let pos = r.seek(SeekFrom::Current(1)).await?; + assert_eq!(pos, 1); + + let bs = r.next().await.transpose()?; + assert!(bs.is_none()); + + let pos = r.seek(SeekFrom::End(-1)).await?; + assert_eq!(pos, 0); + + let mut buf = vec![0; 0]; + let n = r.read(&mut buf).await?; + assert_eq!(n, 0); + + let mut buf = vec![0; 0]; + let n = r.read(&mut buf).await?; + assert_eq!(n, 0); + + Ok(()) +} + +/// This fuzz test is to reproduce bug inside . +/// +/// The simplified cases could be seen as: +/// +/// ``` +/// FuzzInput { +/// path: "e6056989-7c7c-4075-b975-5ae380884333", +/// size: 1, +/// range: BytesRange( +/// Some( +/// 0, +/// ), +/// None, +/// ), +/// actions: [ +/// Next, +/// Seek( +/// Current( +/// 1, +/// ), +/// ), +/// Next, +/// Seek( +/// End( +/// 0, +/// ), +/// ), +/// Read { +/// size: 0, +/// }, +/// ], +/// } +/// ``` +pub async fn test_fuzz_pr_3395_case_2(op: Operator) -> Result<()> { + let cap = op.info().full_capability(); + + if !(cap.read && cap.write & cap.read_with_range) { + return Ok(()); + } + + let path = uuid::Uuid::new_v4().to_string(); + debug!("Generate a random file: {}", &path); + let content = gen_fixed_bytes(1); + + op.write(&path, content.clone()) + .await + .expect("write must succeed"); + + let mut r = op.reader_with(&path).range(0..).await?; + + let bs = r.next().await.transpose()?; + assert_eq!(bs, Some(Bytes::from(content.clone()))); + + let pos = r.seek(SeekFrom::Current(1)).await?; + assert_eq!(pos, 2); + + let bs = r.next().await.transpose()?; + assert!(bs.is_none()); + + let pos = r.seek(SeekFrom::End(0)).await?; + assert_eq!(pos, 1); + + let mut buf = vec![0; 0]; + let n = r.read(&mut buf).await?; + assert_eq!(n, 0); Ok(()) } From 64c1f8f0daedfeef1b6438bbed9fe73eec783d7d Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 27 Oct 2023 17:34:20 +0800 Subject: [PATCH 26/46] Remove file after succeed Signed-off-by: Xuanwo --- core/fuzz/fuzz_reader.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/core/fuzz/fuzz_reader.rs b/core/fuzz/fuzz_reader.rs index d4af06d60c97..f00fa9151bf2 100644 --- a/core/fuzz/fuzz_reader.rs +++ b/core/fuzz/fuzz_reader.rs @@ -251,6 +251,7 @@ async fn fuzz_reader(op: Operator, input: FuzzInput) -> Result<()> { } } + op.delete(&input.path).await?; Ok(()) } From 821616ca3e9b62331c9373f503d2aadbba24285d Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 27 Oct 2023 20:13:27 +0800 Subject: [PATCH 27/46] Polish logging Signed-off-by: Xuanwo --- core/src/layers/logging.rs | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/core/src/layers/logging.rs b/core/src/layers/logging.rs index 457fc2612725..81b1cfb2c965 100644 --- a/core/src/layers/logging.rs +++ b/core/src/layers/logging.rs @@ -991,17 +991,20 @@ impl Drop for LoggingReader { impl oio::Read for LoggingReader { fn poll_read(&mut self, cx: &mut Context<'_>, buf: &mut [u8]) -> Poll> { + let buf_size = buf.len(); + match self.inner.poll_read(cx, buf) { Poll::Ready(res) => match res { Ok(n) => { self.read += n as u64; trace!( target: LOGGING_TARGET, - "service={} operation={} path={} read={} -> data read {}B ", + "service={} operation={} path={} read={} -> buf size: {}B, read {}B ", self.ctx.scheme, ReadOperation::Read, self.path, self.read, + buf_size, n ); Poll::Ready(Ok(n)) @@ -1011,7 +1014,7 @@ impl oio::Read for LoggingReader { log!( target: LOGGING_TARGET, lvl, - "service={} operation={} path={} read={} -> data read failed: {}", + "service={} operation={} path={} read={} -> read failed: {}", self.ctx.scheme, ReadOperation::Read, self.path, @@ -1025,11 +1028,12 @@ impl oio::Read for LoggingReader { Poll::Pending => { trace!( target: LOGGING_TARGET, - "service={} operation={} path={} read={} -> data read pending", + "service={} operation={} path={} read={} -> buf size: {}B, read pending", self.ctx.scheme, ReadOperation::Read, self.path, - self.read + self.read, + buf_size ); Poll::Pending } @@ -1042,7 +1046,7 @@ impl oio::Read for LoggingReader { Ok(n) => { trace!( target: LOGGING_TARGET, - "service={} operation={} path={} read={} -> data seek to offset {n}", + "service={} operation={} path={} read={} -> seek to {pos:?}, current offset {n}", self.ctx.scheme, ReadOperation::Seek, self.path, @@ -1055,7 +1059,7 @@ impl oio::Read for LoggingReader { log!( target: LOGGING_TARGET, lvl, - "service={} operation={} path={} read={} -> data read failed: {}", + "service={} operation={} path={} read={} -> seek to {pos:?} failed: {}", self.ctx.scheme, ReadOperation::Seek, self.path, @@ -1069,7 +1073,7 @@ impl oio::Read for LoggingReader { Poll::Pending => { trace!( target: LOGGING_TARGET, - "service={} operation={} path={} read={} -> data seek pending", + "service={} operation={} path={} read={} -> seek to {pos:?} pending", self.ctx.scheme, ReadOperation::Seek, self.path, @@ -1087,7 +1091,7 @@ impl oio::Read for LoggingReader { self.read += bs.len() as u64; trace!( target: LOGGING_TARGET, - "service={} operation={} path={} read={} -> data read {}B", + "service={} operation={} path={} read={} -> next {}B", self.ctx.scheme, ReadOperation::Next, self.path, @@ -1101,7 +1105,7 @@ impl oio::Read for LoggingReader { log!( target: LOGGING_TARGET, lvl, - "service={} operation={} path={} read={} -> data read failed: {}", + "service={} operation={} path={} read={} -> next failed: {}", self.ctx.scheme, ReadOperation::Next, self.path, @@ -1116,7 +1120,7 @@ impl oio::Read for LoggingReader { Poll::Pending => { trace!( target: LOGGING_TARGET, - "service={} operation={} path={} read={} -> data read pending", + "service={} operation={} path={} read={} -> next pending", self.ctx.scheme, ReadOperation::Next, self.path, From 7b37178a66b67b74d12f0f810333c118d5943717 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 27 Oct 2023 20:13:45 +0800 Subject: [PATCH 28/46] Polish fuzz Signed-off-by: Xuanwo --- core/fuzz/Cargo.toml | 4 ++++ core/fuzz/fuzz_reader.rs | 40 ++++++++++++++++++++++++++++++++++++++-- core/fuzz/utils.rs | 3 ++- 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/core/fuzz/Cargo.toml b/core/fuzz/Cargo.toml index 8020dfaf49c3..036433934ba1 100644 --- a/core/fuzz/Cargo.toml +++ b/core/fuzz/Cargo.toml @@ -35,6 +35,10 @@ rand = "0.8" sha2 = { version = "0.10.6" } tokio = { version = "1", features = ["full"] } uuid = { version = "1", features = ["v4"] } +tracing-subscriber = { version = "0.3", features = [ + "env-filter", + "tracing-log", +] } [[bin]] name = "fuzz_reader" diff --git a/core/fuzz/fuzz_reader.rs b/core/fuzz/fuzz_reader.rs index f00fa9151bf2..71ecca94086e 100644 --- a/core/fuzz/fuzz_reader.rs +++ b/core/fuzz/fuzz_reader.rs @@ -17,6 +17,7 @@ #![no_main] +use std::fmt::{Debug, Formatter}; use std::io::SeekFrom; use bytes::Bytes; @@ -35,14 +36,14 @@ mod utils; const MAX_DATA_SIZE: usize = 16 * 1024 * 1024; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Eq, PartialEq)] enum ReadAction { Read { size: usize }, Seek(SeekFrom), Next, } -#[derive(Debug, Clone)] +#[derive(Clone)] struct FuzzInput { path: String, size: usize, @@ -50,6 +51,36 @@ struct FuzzInput { actions: Vec, } +impl Debug for FuzzInput { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let mut actions = self.actions.clone(); + // Remove all tailing Read(0) entry. + let mut pre = None; + let empty = ReadAction::Read { size: 0 }; + actions.retain(|e| match &pre { + None => { + pre = Some(e.clone()); + true + } + Some(entry) => { + if entry == &empty { + false + } else { + pre = Some(e.clone()); + true + } + } + }); + + f.debug_struct("FuzzInput") + .field("path", &self.path) + .field("size", &self.size) + .field("range", &self.range.to_string()) + .field("actions", &actions) + .finish() + } +} + impl Arbitrary<'_> for FuzzInput { fn arbitrary(u: &mut Unstructured<'_>) -> arbitrary::Result { let total_size = u.int_in_range(1..=MAX_DATA_SIZE)?; @@ -257,6 +288,11 @@ async fn fuzz_reader(op: Operator, input: FuzzInput) -> Result<()> { fuzz_target!(|input: FuzzInput| { let _ = dotenvy::dotenv(); + let _ = tracing_subscriber::fmt() + .pretty() + .with_test_writer() + .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) + .try_init(); let runtime = tokio::runtime::Runtime::new().expect("init runtime must succeed"); diff --git a/core/fuzz/utils.rs b/core/fuzz/utils.rs index 9cf962f62fa0..e83e4c18473c 100644 --- a/core/fuzz/utils.rs +++ b/core/fuzz/utils.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use opendal::layers::RetryLayer; +use opendal::layers::{LoggingLayer, RetryLayer}; use std::env; use std::str::FromStr; @@ -48,6 +48,7 @@ pub fn init_service() -> Option { Some( Operator::via_map(scheme, envs) .unwrap_or_else(|_| panic!("init {} must succeed", scheme)) + .layer(LoggingLayer::default()) .layer(RetryLayer::default()), ) } From 91886ca914fa0df47115e2a445fef2a6af5f107d Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 27 Oct 2023 20:15:49 +0800 Subject: [PATCH 29/46] Save lock Signed-off-by: Xuanwo --- Cargo.lock | 1 + 1 file changed, 1 insertion(+) diff --git a/Cargo.lock b/Cargo.lock index 2466fc2c5c3b..ffdcd89150be 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4325,6 +4325,7 @@ dependencies = [ "rand 0.8.5", "sha2", "tokio", + "tracing-subscriber", "uuid", ] From 1fa27ab9519e942a4a02c55d59353fc5abff130a Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 27 Oct 2023 21:05:50 +0800 Subject: [PATCH 30/46] Fix build Signed-off-by: Xuanwo --- core/fuzz/fuzz_reader.rs | 3 +- core/src/raw/oio/buf/adaptive.rs | 6 + core/src/raw/oio/read/file_read.rs | 268 ++++++++++++----------------- 3 files changed, 119 insertions(+), 158 deletions(-) diff --git a/core/fuzz/fuzz_reader.rs b/core/fuzz/fuzz_reader.rs index 71ecca94086e..d140ed112d6d 100644 --- a/core/fuzz/fuzz_reader.rs +++ b/core/fuzz/fuzz_reader.rs @@ -239,7 +239,8 @@ impl ReadChecker { "{:x}", Sha256::digest(&self.ranged_data[self.cur..self.cur + output.len()]) ), - "check next failed: output bs is different with expected bs", + "check next failed: output bs is different with expected bs, current: {}, output length: {}", + self.cur, output.len(), ); // update the current position diff --git a/core/src/raw/oio/buf/adaptive.rs b/core/src/raw/oio/buf/adaptive.rs index 02dce54fec2e..abebcb36f6c3 100644 --- a/core/src/raw/oio/buf/adaptive.rs +++ b/core/src/raw/oio/buf/adaptive.rs @@ -59,6 +59,12 @@ impl AdaptiveBuf { /// Returning the initialized part of the buffer. pub fn initialized_mut(&mut self) -> ReadBuf { + assert_eq!( + self.buffer.len(), + 0, + "buffer must be empty before initialized_mut" + ); + let dst = self.buffer.spare_capacity_mut(); let length = dst.len(); let mut buf = ReadBuf::uninit(dst); diff --git a/core/src/raw/oio/read/file_read.rs b/core/src/raw/oio/read/file_read.rs index b06e65fef8e9..ca95691eaa0f 100644 --- a/core/src/raw/oio/read/file_read.rs +++ b/core/src/raw/oio/read/file_read.rs @@ -81,6 +81,49 @@ where state: State::::Idle, } } + + /// Calculate the actual position that we should seek to. + fn calculate_position( + offset: Option, + size: Option, + cur: u64, + pos: SeekFrom, + ) -> Result { + let offset = offset.expect("offset should be set for calculate_position"); + + match pos { + SeekFrom::Start(n) => { + // It's valid for user to seek outsides end of the file. + Ok(SeekFrom::Start(offset + n)) + } + SeekFrom::End(n) => { + if let Some(size) = size { + if size as i64 + n < 0 { + return Err(Error::new( + ErrorKind::InvalidInput, + "seek to a negative position is invalid", + ) + .with_context("position", format!("{pos:?}"))); + } + // size is known, we can convert SeekFrom::End into SeekFrom::Start. + Ok(SeekFrom::Start(offset + (size as i64 + n) as u64)) + } else { + // size unknown means we can forward seek end to underlying reader directly. + Ok(SeekFrom::End(n)) + } + } + SeekFrom::Current(n) => { + if cur as i64 + n < 0 { + return Err(Error::new( + ErrorKind::InvalidInput, + "seek to a negative position is invalid", + ) + .with_context("position", format!("{pos:?}"))); + } + Ok(SeekFrom::Start(offset + (cur as i64 + n) as u64)) + } + } + } } impl FileReader @@ -97,6 +140,58 @@ where Box::pin(async move { acc.read(&path, op).await }) } + + /// calculate_offset will make sure that the offset has been set. + fn poll_offset( + cx: &mut Context<'_>, + r: &mut R, + range: BytesRange, + ) -> Poll, Option)>> { + let (offset, size) = match (range.offset(), range.size()) { + (None, None) => (0, None), + (None, Some(size)) => { + let start = ready!(r.poll_seek(cx, SeekFrom::End(-(size as i64))))?; + (start, Some(size)) + } + (Some(offset), None) => { + let start = ready!(r.poll_seek(cx, SeekFrom::Start(offset)))?; + (start, None) + } + (Some(offset), Some(size)) => { + let start = ready!(r.poll_seek(cx, SeekFrom::Start(offset)))?; + (start, Some(size)) + } + }; + + Poll::Ready(Ok((Some(offset), size))) + } +} + +impl FileReader +where + A: Accessor, + R: oio::BlockingRead, +{ + /// calculate_offset will make sure that the offset has been set. + fn calculate_offset(r: &mut R, range: BytesRange) -> Result<(Option, Option)> { + let (offset, size) = match (range.offset(), range.size()) { + (None, None) => (0, None), + (None, Some(size)) => { + let start = r.seek(SeekFrom::End(-(size as i64)))?; + (start, Some(size)) + } + (Some(offset), None) => { + let start = r.seek(SeekFrom::Start(offset))?; + (start, None) + } + (Some(offset), Some(size)) => { + let start = r.seek(SeekFrom::Start(offset))?; + (start, Some(size)) + } + }; + + Ok((Some(offset), size)) + } } impl oio::Read for FileReader @@ -123,24 +218,9 @@ where State::Read(r) => { // We should know where to start read the data. if self.offset.is_none() { - let (offset, size) = match (self.op.range().offset(), self.op.range().size()) { - (None, None) => (0, None), - (None, Some(size)) => { - let start = ready!(r.poll_seek(cx, SeekFrom::End(size as i64)))?; - (start, Some(size)) - } - (Some(offset), None) => { - let start = ready!(r.poll_seek(cx, SeekFrom::Start(offset)))?; - (start, None) - } - (Some(offset), Some(size)) => { - let start = ready!(r.poll_seek(cx, SeekFrom::Start(offset)))?; - (start, Some(size)) - } - }; - self.offset = Some(offset); - self.size = size; + (self.offset, self.size) = ready!(Self::poll_offset(cx, r, self.op.range()))?; } + let size = if let Some(size) = self.size { // Sanity check. if self.cur >= size { @@ -183,28 +263,13 @@ where State::Read(r) => { // We should know where to start read the data. if self.offset.is_none() { - let (offset, size) = match (self.op.range().offset(), self.op.range().size()) { - (None, None) => (0, None), - (None, Some(size)) => { - let start = ready!(r.poll_seek(cx, SeekFrom::End(-(size as i64))))?; - (start, Some(size)) - } - (Some(offset), None) => { - let start = ready!(r.poll_seek(cx, SeekFrom::Start(offset)))?; - (start, None) - } - (Some(offset), Some(size)) => { - let start = ready!(r.poll_seek(cx, SeekFrom::Start(offset)))?; - (start, Some(size)) - } - }; - self.offset = Some(offset); - self.size = size; + (self.offset, self.size) = ready!(Self::poll_offset(cx, r, self.op.range()))?; } - let pos = calculate_position(self.offset, self.size, self.cur, pos)?; + + let pos = Self::calculate_position(self.offset, self.size, self.cur, pos)?; let cur = ready!(r.poll_seek(cx, pos))?; self.cur = cur - self.offset.unwrap(); - Poll::Ready(Ok(cur)) + Poll::Ready(Ok(self.cur)) } } } @@ -228,23 +293,7 @@ where State::Read(r) => { // We should know where to start read the data. if self.offset.is_none() { - let (offset, size) = match (self.op.range().offset(), self.op.range().size()) { - (None, None) => (0, None), - (None, Some(size)) => { - let start = ready!(r.poll_seek(cx, SeekFrom::End(size as i64)))?; - (start, Some(size)) - } - (Some(offset), None) => { - let start = ready!(r.poll_seek(cx, SeekFrom::Start(offset)))?; - (start, None) - } - (Some(offset), Some(size)) => { - let start = ready!(r.poll_seek(cx, SeekFrom::Start(offset)))?; - (start, Some(size)) - } - }; - self.offset = Some(offset); - self.size = size; + (self.offset, self.size) = ready!(Self::poll_offset(cx, r, self.op.range()))?; } self.buf.reserve(); @@ -296,24 +345,9 @@ where State::Read(r) => { // We should know where to start read the data. if self.offset.is_none() { - let (offset, size) = match (self.op.range().offset(), self.op.range().size()) { - (None, None) => (0, None), - (None, Some(size)) => { - let start = r.seek(SeekFrom::End(size as i64))?; - (start, Some(size)) - } - (Some(offset), None) => { - let start = r.seek(SeekFrom::Start(offset))?; - (start, None) - } - (Some(offset), Some(size)) => { - let start = r.seek(SeekFrom::Start(offset))?; - (start, Some(size)) - } - }; - self.offset = Some(offset); - self.size = size; + (self.offset, self.size) = Self::calculate_offset(r, self.op.range())?; } + let size = if let Some(size) = self.size { // Sanity check. if self.cur >= size { @@ -355,25 +389,10 @@ where State::Read(r) => { // We should know where to start read the data. if self.offset.is_none() { - let (offset, size) = match (self.op.range().offset(), self.op.range().size()) { - (None, None) => (0, None), - (None, Some(size)) => { - let start = r.seek(SeekFrom::End(-(size as i64)))?; - (start, Some(size)) - } - (Some(offset), None) => { - let start = r.seek(SeekFrom::Start(offset))?; - (start, None) - } - (Some(offset), Some(size)) => { - let start = r.seek(SeekFrom::Start(offset))?; - (start, Some(size)) - } - }; - self.offset = Some(offset); - self.size = size; + (self.offset, self.size) = Self::calculate_offset(r, self.op.range())?; } - let pos = calculate_position(self.offset, self.size, self.cur, pos)?; + + let pos = Self::calculate_position(self.offset, self.size, self.cur, pos)?; let cur = r.seek(pos)?; self.cur = cur - self.offset.unwrap(); Ok(self.cur) @@ -403,32 +422,10 @@ where State::Read(r) => { // We should know where to start read the data. if self.offset.is_none() { - let (offset, size) = match (self.op.range().offset(), self.op.range().size()) { - (None, None) => (0, None), - (None, Some(size)) => { - let start = match r.seek(SeekFrom::End(size as i64)) { - Ok(v) => v, - Err(err) => return Some(Err(err)), - }; - (start, Some(size)) - } - (Some(offset), None) => { - let start = match r.seek(SeekFrom::Start(offset)) { - Ok(v) => v, - Err(err) => return Some(Err(err)), - }; - (start, None) - } - (Some(offset), Some(size)) => { - let start = match r.seek(SeekFrom::Start(offset)) { - Ok(v) => v, - Err(err) => return Some(Err(err)), - }; - (start, Some(size)) - } - }; - self.offset = Some(offset); - self.size = size; + (self.offset, self.size) = match Self::calculate_offset(r, self.op.range()) { + Ok(v) => v, + Err(err) => return Some(Err(err)), + } } self.buf.reserve(); @@ -465,46 +462,3 @@ where } } } - -/// Calculate the actual position that we should seek to. -fn calculate_position( - offset: Option, - size: Option, - cur: u64, - pos: SeekFrom, -) -> Result { - let offset = offset.expect("offset should be set for calculate_position"); - - match pos { - SeekFrom::Start(n) => { - // It's valid for user to seek outsides end of the file. - Ok(SeekFrom::Start(offset + n)) - } - SeekFrom::End(n) => { - if let Some(size) = size { - if size as i64 + n < 0 { - return Err(Error::new( - ErrorKind::InvalidInput, - "seek to a negative position is invalid", - ) - .with_context("position", format!("{pos:?}"))); - } - // size is known, we can convert SeekFrom::End into SeekFrom::Start. - Ok(SeekFrom::Start(offset + (size as i64 + n) as u64)) - } else { - // size unknown means we can forward seek end to underlying reader directly. - Ok(SeekFrom::End(n)) - } - } - SeekFrom::Current(n) => { - if cur as i64 + n < 0 { - return Err(Error::new( - ErrorKind::InvalidInput, - "seek to a negative position is invalid", - ) - .with_context("position", format!("{pos:?}"))); - } - Ok(SeekFrom::Start(offset + (cur as i64 + n) as u64)) - } - } -} From 0dfe270b56ed9f12130537bc279a9d41fbb9a814 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 27 Oct 2023 21:15:13 +0800 Subject: [PATCH 31/46] Fix seek check Signed-off-by: Xuanwo --- core/src/raw/oio/read/file_read.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/core/src/raw/oio/read/file_read.rs b/core/src/raw/oio/read/file_read.rs index ca95691eaa0f..9bf363d3921e 100644 --- a/core/src/raw/oio/read/file_read.rs +++ b/core/src/raw/oio/read/file_read.rs @@ -268,6 +268,13 @@ where let pos = Self::calculate_position(self.offset, self.size, self.cur, pos)?; let cur = ready!(r.poll_seek(cx, pos))?; + if cur < self.offset.unwrap() { + return Poll::Ready(Err(Error::new( + ErrorKind::InvalidInput, + "seek to a negative position is invalid", + ) + .with_context("position", format!("{pos:?}")))); + } self.cur = cur - self.offset.unwrap(); Poll::Ready(Ok(self.cur)) } @@ -394,6 +401,14 @@ where let pos = Self::calculate_position(self.offset, self.size, self.cur, pos)?; let cur = r.seek(pos)?; + if cur < self.offset.unwrap() { + return Err(Error::new( + ErrorKind::InvalidInput, + "seek to a negative position is invalid", + ) + .with_context("position", format!("{pos:?}"))); + } + self.cur = cur - self.offset.unwrap(); Ok(self.cur) } From 2079ee0a6f2809de7c52e0f52be34c88f1ff96c7 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 27 Oct 2023 21:17:17 +0800 Subject: [PATCH 32/46] Fix fuzz reader Signed-off-by: Xuanwo --- core/fuzz/fuzz_reader.rs | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/core/fuzz/fuzz_reader.rs b/core/fuzz/fuzz_reader.rs index d140ed112d6d..127afc0623e8 100644 --- a/core/fuzz/fuzz_reader.rs +++ b/core/fuzz/fuzz_reader.rs @@ -175,17 +175,8 @@ impl ReadChecker { } } - fn check_read(&mut self, n: usize, output: &[u8]) { - if n == 0 { - assert_eq!( - output.len(), - 0, - "check read failed: output bs is not empty when read size is 0" - ); - return; - } - - let expected = &self.ranged_data[self.cur..self.cur + n]; + fn check_read(&mut self, output: &[u8]) { + let expected = &self.ranged_data[self.cur..self.cur + output.len()]; // Check the read result assert_eq!( @@ -195,7 +186,7 @@ impl ReadChecker { ); // Update the current position - self.cur += n; + self.cur += output.len(); } fn check_seek(&mut self, seek_from: SeekFrom, output: Result) { @@ -268,7 +259,7 @@ async fn fuzz_reader(op: Operator, input: FuzzInput) -> Result<()> { ReadAction::Read { size } => { let mut buf = vec![0; size]; let n = o.read(&mut buf).await?; - checker.check_read(n, &buf[..n]); + checker.check_read(&buf[..n]); } ReadAction::Seek(seek_from) => { From b556408d7bec748faa5376c444328dec517390c9 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 27 Oct 2023 21:27:36 +0800 Subject: [PATCH 33/46] Polish fuzz reader Signed-off-by: Xuanwo --- core/fuzz/fuzz_reader.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/core/fuzz/fuzz_reader.rs b/core/fuzz/fuzz_reader.rs index 127afc0623e8..d57a649a462e 100644 --- a/core/fuzz/fuzz_reader.rs +++ b/core/fuzz/fuzz_reader.rs @@ -175,7 +175,22 @@ impl ReadChecker { } } - fn check_read(&mut self, output: &[u8]) { + fn check_read(&mut self, buf_size: usize, output: &[u8]) { + if buf_size == 0 { + assert_eq!( + output.len(), + 0, + "check read failed: output must be empty if buf_size is 0" + ); + } + + if buf_size > 0 && output.len() == 0 { + assert!( + self.cur >= self.ranged_data.len(), + "check read failed: no data read means cur must outsides of ranged_data", + ); + } + let expected = &self.ranged_data[self.cur..self.cur + output.len()]; // Check the read result @@ -259,7 +274,7 @@ async fn fuzz_reader(op: Operator, input: FuzzInput) -> Result<()> { ReadAction::Read { size } => { let mut buf = vec![0; size]; let n = o.read(&mut buf).await?; - checker.check_read(&buf[..n]); + checker.check_read(size, &buf[..n]); } ReadAction::Seek(seek_from) => { From d690f4dbf07710389d12e5b4c2f5acd2fbef5867 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 27 Oct 2023 21:52:35 +0800 Subject: [PATCH 34/46] Fix fuzzer Signed-off-by: Xuanwo --- core/fuzz/fuzz_reader.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/core/fuzz/fuzz_reader.rs b/core/fuzz/fuzz_reader.rs index d57a649a462e..11c399c38ed1 100644 --- a/core/fuzz/fuzz_reader.rs +++ b/core/fuzz/fuzz_reader.rs @@ -182,6 +182,16 @@ impl ReadChecker { 0, "check read failed: output must be empty if buf_size is 0" ); + return; + } + + if self.cur >= self.ranged_data.len() { + assert_eq!( + output.len(), + 0, + "check read failed: cur outsides of ranged_data, output must be empty" + ); + return; } if buf_size > 0 && output.len() == 0 { @@ -189,6 +199,7 @@ impl ReadChecker { self.cur >= self.ranged_data.len(), "check read failed: no data read means cur must outsides of ranged_data", ); + return; } let expected = &self.ranged_data[self.cur..self.cur + output.len()]; From 680879ecc1ae7ea6cd08b0c77125445335e7cda7 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 27 Oct 2023 22:51:45 +0800 Subject: [PATCH 35/46] Fix seek Signed-off-by: Xuanwo --- core/fuzz/fuzz_reader.rs | 34 ++---- core/src/raw/oio/read/file_read.rs | 174 +++++++++++++++++++---------- 2 files changed, 121 insertions(+), 87 deletions(-) diff --git a/core/fuzz/fuzz_reader.rs b/core/fuzz/fuzz_reader.rs index 11c399c38ed1..799d6f52bc78 100644 --- a/core/fuzz/fuzz_reader.rs +++ b/core/fuzz/fuzz_reader.rs @@ -54,23 +54,9 @@ struct FuzzInput { impl Debug for FuzzInput { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { let mut actions = self.actions.clone(); - // Remove all tailing Read(0) entry. - let mut pre = None; + // Remove all Read(0) entry. let empty = ReadAction::Read { size: 0 }; - actions.retain(|e| match &pre { - None => { - pre = Some(e.clone()); - true - } - Some(entry) => { - if entry == &empty { - false - } else { - pre = Some(e.clone()); - true - } - } - }); + actions.retain(|e| e != &empty); f.debug_struct("FuzzInput") .field("path", &self.path) @@ -185,16 +171,7 @@ impl ReadChecker { return; } - if self.cur >= self.ranged_data.len() { - assert_eq!( - output.len(), - 0, - "check read failed: cur outsides of ranged_data, output must be empty" - ); - return; - } - - if buf_size > 0 && output.len() == 0 { + if buf_size > 0 && output.is_empty() { assert!( self.cur >= self.ranged_data.len(), "check read failed: no data read means cur must outsides of ranged_data", @@ -202,6 +179,11 @@ impl ReadChecker { return; } + assert!( + self.cur + output.len() <= self.ranged_data.len(), + "check read failed: cur + output length must be less than ranged_data length, cur: {}, output: {}, ranged_data: {}", self.cur, output.len(), self.ranged_data.len(), + ); + let expected = &self.ranged_data[self.cur..self.cur + output.len()]; // Check the read result diff --git a/core/src/raw/oio/read/file_read.rs b/core/src/raw/oio/read/file_read.rs index 9bf363d3921e..a5367075a135 100644 --- a/core/src/raw/oio/read/file_read.rs +++ b/core/src/raw/oio/read/file_read.rs @@ -81,49 +81,6 @@ where state: State::::Idle, } } - - /// Calculate the actual position that we should seek to. - fn calculate_position( - offset: Option, - size: Option, - cur: u64, - pos: SeekFrom, - ) -> Result { - let offset = offset.expect("offset should be set for calculate_position"); - - match pos { - SeekFrom::Start(n) => { - // It's valid for user to seek outsides end of the file. - Ok(SeekFrom::Start(offset + n)) - } - SeekFrom::End(n) => { - if let Some(size) = size { - if size as i64 + n < 0 { - return Err(Error::new( - ErrorKind::InvalidInput, - "seek to a negative position is invalid", - ) - .with_context("position", format!("{pos:?}"))); - } - // size is known, we can convert SeekFrom::End into SeekFrom::Start. - Ok(SeekFrom::Start(offset + (size as i64 + n) as u64)) - } else { - // size unknown means we can forward seek end to underlying reader directly. - Ok(SeekFrom::End(n)) - } - } - SeekFrom::Current(n) => { - if cur as i64 + n < 0 { - return Err(Error::new( - ErrorKind::InvalidInput, - "seek to a negative position is invalid", - ) - .with_context("position", format!("{pos:?}"))); - } - Ok(SeekFrom::Start(offset + (cur as i64 + n) as u64)) - } - } - } } impl FileReader @@ -165,6 +122,49 @@ where Poll::Ready(Ok((Some(offset), size))) } + + fn poll_seek_inner( + cx: &mut Context<'_>, + r: &mut R, + offset: Option, + size: Option, + cur: u64, + pos: SeekFrom, + ) -> Poll> { + let offset = offset.expect("offset should be set for calculate_position"); + + match pos { + SeekFrom::Start(n) => { + // It's valid for user to seek outsides end of the file. + r.poll_seek(cx, SeekFrom::Start(offset + n)) + } + SeekFrom::End(n) => { + let size = + size.expect("size should be set for calculate_position when seek with end"); + if size as i64 + n < 0 { + return Poll::Ready(Err(Error::new( + ErrorKind::InvalidInput, + "seek to a negative position is invalid", + ) + .with_context("position", format!("{pos:?}")))); + } + // size is known, we can convert SeekFrom::End into SeekFrom::Start. + let pos = SeekFrom::Start(offset + (size as i64 + n) as u64); + r.poll_seek(cx, pos) + } + SeekFrom::Current(n) => { + if cur as i64 + n < 0 { + return Poll::Ready(Err(Error::new( + ErrorKind::InvalidInput, + "seek to a negative position is invalid", + ) + .with_context("position", format!("{pos:?}")))); + } + let pos = SeekFrom::Start(offset + (cur as i64 + n) as u64); + r.poll_seek(cx, pos) + } + } + } } impl FileReader @@ -192,6 +192,48 @@ where Ok((Some(offset), size)) } + + fn seek_inner( + r: &mut R, + offset: Option, + size: Option, + cur: u64, + pos: SeekFrom, + ) -> Result { + let offset = offset.expect("offset should be set for calculate_position"); + + match pos { + SeekFrom::Start(n) => { + // It's valid for user to seek outsides end of the file. + r.seek(SeekFrom::Start(offset + n)) + } + SeekFrom::End(n) => { + let size = + size.expect("size should be set for calculate_position when seek with end"); + if size as i64 + n < 0 { + return Err(Error::new( + ErrorKind::InvalidInput, + "seek to a negative position is invalid", + ) + .with_context("position", format!("{pos:?}"))); + } + // size is known, we can convert SeekFrom::End into SeekFrom::Start. + let pos = SeekFrom::Start(offset + (size as i64 + n) as u64); + r.seek(pos) + } + SeekFrom::Current(n) => { + if cur as i64 + n < 0 { + return Err(Error::new( + ErrorKind::InvalidInput, + "seek to a negative position is invalid", + ) + .with_context("position", format!("{pos:?}"))); + } + let pos = SeekFrom::Start(offset + (cur as i64 + n) as u64); + r.seek(pos) + } + } + } } impl oio::Read for FileReader @@ -265,17 +307,26 @@ where if self.offset.is_none() { (self.offset, self.size) = ready!(Self::poll_offset(cx, r, self.op.range()))?; } + // Fetch size when seek end. + if matches!(pos, SeekFrom::End(_)) && self.size.is_none() { + let current_offset = self.offset.unwrap() + self.cur; - let pos = Self::calculate_position(self.offset, self.size, self.cur, pos)?; - let cur = ready!(r.poll_seek(cx, pos))?; - if cur < self.offset.unwrap() { - return Poll::Ready(Err(Error::new( - ErrorKind::InvalidInput, - "seek to a negative position is invalid", - ) - .with_context("position", format!("{pos:?}")))); + let size = ready!(r.poll_seek(cx, SeekFrom::End(0)))?; + self.size = Some(size); + + // Reset cursor. + ready!(r.poll_seek(cx, SeekFrom::Start(current_offset)))?; } - self.cur = cur - self.offset.unwrap(); + + let pos = ready!(Self::poll_seek_inner( + cx, + r, + self.offset, + self.size, + self.cur, + pos + ))?; + self.cur = pos - self.offset.unwrap(); Poll::Ready(Ok(self.cur)) } } @@ -398,18 +449,19 @@ where if self.offset.is_none() { (self.offset, self.size) = Self::calculate_offset(r, self.op.range())?; } + // Fetch size when seek end. + if matches!(pos, SeekFrom::End(_)) && self.size.is_none() { + let current_offset = self.offset.unwrap() + self.cur; - let pos = Self::calculate_position(self.offset, self.size, self.cur, pos)?; - let cur = r.seek(pos)?; - if cur < self.offset.unwrap() { - return Err(Error::new( - ErrorKind::InvalidInput, - "seek to a negative position is invalid", - ) - .with_context("position", format!("{pos:?}"))); + let size = r.seek(SeekFrom::End(0))?; + self.size = Some(size); + + // Reset cursor. + r.seek(SeekFrom::Start(current_offset))?; } - self.cur = cur - self.offset.unwrap(); + let pos = Self::seek_inner(r, self.offset, self.size, self.cur, pos)?; + self.cur = pos - self.offset.unwrap(); Ok(self.cur) } State::Send(_) => { From d035748dfc2ddab36f1379e7771f930053eeddce Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 27 Oct 2023 22:57:39 +0800 Subject: [PATCH 36/46] Fix Signed-off-by: Xuanwo --- core/src/raw/oio/read/file_read.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/raw/oio/read/file_read.rs b/core/src/raw/oio/read/file_read.rs index a5367075a135..11bb4dd4f6eb 100644 --- a/core/src/raw/oio/read/file_read.rs +++ b/core/src/raw/oio/read/file_read.rs @@ -141,7 +141,7 @@ where SeekFrom::End(n) => { let size = size.expect("size should be set for calculate_position when seek with end"); - if size as i64 + n < 0 { + if size as i64 + n < offset as i64 { return Poll::Ready(Err(Error::new( ErrorKind::InvalidInput, "seek to a negative position is invalid", @@ -210,7 +210,7 @@ where SeekFrom::End(n) => { let size = size.expect("size should be set for calculate_position when seek with end"); - if size as i64 + n < 0 { + if size as i64 + n < offset as i64 { return Err(Error::new( ErrorKind::InvalidInput, "seek to a negative position is invalid", From 355c9085b8bfe4abffad3e99be4ea6d9f31b0231 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 27 Oct 2023 23:26:56 +0800 Subject: [PATCH 37/46] Fix dirty seek Signed-off-by: Xuanwo --- core/src/raw/oio/read/file_read.rs | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/core/src/raw/oio/read/file_read.rs b/core/src/raw/oio/read/file_read.rs index 11bb4dd4f6eb..2c403f4a688d 100644 --- a/core/src/raw/oio/read/file_read.rs +++ b/core/src/raw/oio/read/file_read.rs @@ -48,6 +48,8 @@ pub struct FileReader { buf: oio::AdaptiveBuf, state: State, + /// Do we need to reset our cursor? + seek_dirty: bool, } enum State { @@ -79,6 +81,7 @@ where cur: 0, buf: oio::AdaptiveBuf::default(), state: State::::Idle, + seek_dirty: false, } } } @@ -141,7 +144,7 @@ where SeekFrom::End(n) => { let size = size.expect("size should be set for calculate_position when seek with end"); - if size as i64 + n < offset as i64 { + if size as i64 + n < 0 { return Poll::Ready(Err(Error::new( ErrorKind::InvalidInput, "seek to a negative position is invalid", @@ -210,7 +213,7 @@ where SeekFrom::End(n) => { let size = size.expect("size should be set for calculate_position when seek with end"); - if size as i64 + n < offset as i64 { + if size as i64 + n < 0 { return Err(Error::new( ErrorKind::InvalidInput, "seek to a negative position is invalid", @@ -307,15 +310,18 @@ where if self.offset.is_none() { (self.offset, self.size) = ready!(Self::poll_offset(cx, r, self.op.range()))?; } + // Fetch size when seek end. + let current_offset = self.offset.unwrap() + self.cur; if matches!(pos, SeekFrom::End(_)) && self.size.is_none() { - let current_offset = self.offset.unwrap() + self.cur; - let size = ready!(r.poll_seek(cx, SeekFrom::End(0)))?; - self.size = Some(size); - + self.size = Some(size - self.offset.unwrap()); + self.seek_dirty = true; + } + if self.seek_dirty { // Reset cursor. ready!(r.poll_seek(cx, SeekFrom::Start(current_offset)))?; + self.seek_dirty = false; } let pos = ready!(Self::poll_seek_inner( @@ -450,14 +456,16 @@ where (self.offset, self.size) = Self::calculate_offset(r, self.op.range())?; } // Fetch size when seek end. + let current_offset = self.offset.unwrap() + self.cur; if matches!(pos, SeekFrom::End(_)) && self.size.is_none() { - let current_offset = self.offset.unwrap() + self.cur; - let size = r.seek(SeekFrom::End(0))?; - self.size = Some(size); - + self.size = Some(size - self.offset.unwrap()); + self.seek_dirty = true; + } + if self.seek_dirty { // Reset cursor. r.seek(SeekFrom::Start(current_offset))?; + self.seek_dirty = false; } let pos = Self::seek_inner(r, self.offset, self.size, self.cur, pos)?; From c55d62e5cde8a644462391825b4ff73c590d73f6 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 27 Oct 2023 23:38:25 +0800 Subject: [PATCH 38/46] Fix ghac Signed-off-by: Xuanwo --- core/src/services/ghac/backend.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/services/ghac/backend.rs b/core/src/services/ghac/backend.rs index 941d89ce71fc..5b9c7c27db6c 100644 --- a/core/src/services/ghac/backend.rs +++ b/core/src/services/ghac/backend.rs @@ -351,6 +351,7 @@ impl Accessor for GhacBackend { let status = resp.status(); match status { StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), + StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } } From b62f7bcaa8b64e6d6323bda3f46320c2fcc0f86a Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 27 Oct 2023 23:39:22 +0800 Subject: [PATCH 39/46] Fix http Signed-off-by: Xuanwo --- core/src/services/azdls/backend.rs | 1 + core/src/services/azfile/backend.rs | 1 + core/src/services/webhdfs/backend.rs | 1 + 3 files changed, 3 insertions(+) diff --git a/core/src/services/azdls/backend.rs b/core/src/services/azdls/backend.rs index 1ab176255610..435d6a57424c 100644 --- a/core/src/services/azdls/backend.rs +++ b/core/src/services/azdls/backend.rs @@ -293,6 +293,7 @@ impl Accessor for AzdlsBackend { match status { StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), + StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/azfile/backend.rs b/core/src/services/azfile/backend.rs index 78af5035d7a1..f49ea127b136 100644 --- a/core/src/services/azfile/backend.rs +++ b/core/src/services/azfile/backend.rs @@ -311,6 +311,7 @@ impl Accessor for AzfileBackend { match status { StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), + StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/webhdfs/backend.rs b/core/src/services/webhdfs/backend.rs index 3863b2dd38cb..165a9d363c5f 100644 --- a/core/src/services/webhdfs/backend.rs +++ b/core/src/services/webhdfs/backend.rs @@ -466,6 +466,7 @@ impl Accessor for WebhdfsBackend { let resp = self.webhdfs_read_file(path, range).await?; match resp.status() { StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), + StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } } From caabbe67d97081b1603b8ec3550191f53ffd109f Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Mon, 30 Oct 2023 15:06:25 +0800 Subject: [PATCH 40/46] Add size hint for rp read Signed-off-by: Xuanwo --- core/src/raw/rps.rs | 25 ++++++++++++++++++++++++- core/src/services/azblob/backend.rs | 5 ++++- core/src/services/azdls/backend.rs | 5 ++++- core/src/services/azfile/backend.rs | 5 ++++- core/src/services/cos/backend.rs | 5 ++++- core/src/services/gcs/backend.rs | 3 ++- core/src/services/gdrive/backend.rs | 5 ++++- core/src/services/ghac/backend.rs | 5 ++++- core/src/services/http/backend.rs | 5 ++++- core/src/services/obs/backend.rs | 5 ++++- core/src/services/onedrive/backend.rs | 6 ++++-- core/src/services/oss/backend.rs | 5 ++++- core/src/services/s3/backend.rs | 5 ++++- core/src/services/webdav/backend.rs | 5 ++++- core/src/services/webhdfs/backend.rs | 5 ++++- 15 files changed, 78 insertions(+), 16 deletions(-) diff --git a/core/src/raw/rps.rs b/core/src/raw/rps.rs index 10b3c7159477..4cba4b8cc8b5 100644 --- a/core/src/raw/rps.rs +++ b/core/src/raw/rps.rs @@ -98,13 +98,36 @@ impl From for Request { /// Reply for `read` operation. #[derive(Debug, Clone, Default)] -pub struct RpRead {} +pub struct RpRead { + /// Size is the size of the reader returned by this read operation. + /// + /// - `Some(size)` means the reader has at most size bytes. + /// - `None` means the reader has unknown size. + /// + /// It's ok to leave size as empty, but it's recommended to set size if possible. We will use + /// this size as hint to do some optimization like avoid an extra stat or read. + size: Option, +} impl RpRead { /// Create a new reply for `read`. pub fn new() -> Self { RpRead::default() } + + /// Got the size of the reader returned by this read operation. + /// + /// - `Some(size)` means the reader has at most size bytes. + /// - `None` means the reader has unknown size. + pub fn size(&self) -> Option { + self.size + } + + /// Set the size of the reader returned by this read operation. + pub fn with_size(mut self, size: Option) -> Self { + self.size = size; + self + } } /// Reply for `batch` operation. diff --git a/core/src/services/azblob/backend.rs b/core/src/services/azblob/backend.rs index 6e8dd69ee322..6e46e1b35771 100644 --- a/core/src/services/azblob/backend.rs +++ b/core/src/services/azblob/backend.rs @@ -586,7 +586,10 @@ impl Accessor for AzblobBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), + StatusCode::OK | StatusCode::PARTIAL_CONTENT => { + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) + } StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } diff --git a/core/src/services/azdls/backend.rs b/core/src/services/azdls/backend.rs index 435d6a57424c..ad03274a2c11 100644 --- a/core/src/services/azdls/backend.rs +++ b/core/src/services/azdls/backend.rs @@ -292,7 +292,10 @@ impl Accessor for AzdlsBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), + StatusCode::OK | StatusCode::PARTIAL_CONTENT => { + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) + } StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } diff --git a/core/src/services/azfile/backend.rs b/core/src/services/azfile/backend.rs index 256243624bfe..106ec405d2a8 100644 --- a/core/src/services/azfile/backend.rs +++ b/core/src/services/azfile/backend.rs @@ -316,7 +316,10 @@ impl Accessor for AzfileBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), + StatusCode::OK | StatusCode::PARTIAL_CONTENT => { + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) + } StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } diff --git a/core/src/services/cos/backend.rs b/core/src/services/cos/backend.rs index aef237a1acd4..e5a52171803e 100644 --- a/core/src/services/cos/backend.rs +++ b/core/src/services/cos/backend.rs @@ -332,7 +332,10 @@ impl Accessor for CosBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), + StatusCode::OK | StatusCode::PARTIAL_CONTENT => { + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) + } StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } diff --git a/core/src/services/gcs/backend.rs b/core/src/services/gcs/backend.rs index 40f7f6a9b247..0522a8d5c143 100644 --- a/core/src/services/gcs/backend.rs +++ b/core/src/services/gcs/backend.rs @@ -389,7 +389,8 @@ impl Accessor for GcsBackend { let resp = self.core.gcs_get_object(path, &args).await?; if resp.status().is_success() { - Ok((RpRead::new(), resp.into_body())) + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) } else if resp.status() == StatusCode::RANGE_NOT_SATISFIABLE { Ok((RpRead::new(), IncomingAsyncBody::empty())) } else { diff --git a/core/src/services/gdrive/backend.rs b/core/src/services/gdrive/backend.rs index 53ab999ea032..4e1274d217e5 100644 --- a/core/src/services/gdrive/backend.rs +++ b/core/src/services/gdrive/backend.rs @@ -123,7 +123,10 @@ impl Accessor for GdriveBackend { let status = resp.status(); match status { - StatusCode::OK => Ok((RpRead::new(), resp.into_body())), + StatusCode::OK => { + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) + } _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/ghac/backend.rs b/core/src/services/ghac/backend.rs index 5b9c7c27db6c..425cee86bad1 100644 --- a/core/src/services/ghac/backend.rs +++ b/core/src/services/ghac/backend.rs @@ -350,7 +350,10 @@ impl Accessor for GhacBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), + StatusCode::OK | StatusCode::PARTIAL_CONTENT => { + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) + } StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } diff --git a/core/src/services/http/backend.rs b/core/src/services/http/backend.rs index 6b238886259a..233237f8721f 100644 --- a/core/src/services/http/backend.rs +++ b/core/src/services/http/backend.rs @@ -237,7 +237,10 @@ impl Accessor for HttpBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), + StatusCode::OK | StatusCode::PARTIAL_CONTENT => { + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) + } StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } diff --git a/core/src/services/obs/backend.rs b/core/src/services/obs/backend.rs index 98e51b9dbe2c..311af30a6a4d 100644 --- a/core/src/services/obs/backend.rs +++ b/core/src/services/obs/backend.rs @@ -359,7 +359,10 @@ impl Accessor for ObsBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), + StatusCode::OK | StatusCode::PARTIAL_CONTENT => { + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) + } StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } diff --git a/core/src/services/onedrive/backend.rs b/core/src/services/onedrive/backend.rs index 8b1959b9ba08..95e052654184 100644 --- a/core/src/services/onedrive/backend.rs +++ b/core/src/services/onedrive/backend.rs @@ -93,8 +93,10 @@ impl Accessor for OnedriveBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), - + StatusCode::OK | StatusCode::PARTIAL_CONTENT => { + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) + } _ => Err(parse_error(resp).await?), } } diff --git a/core/src/services/oss/backend.rs b/core/src/services/oss/backend.rs index bf39396ba7a9..93323b3a3156 100644 --- a/core/src/services/oss/backend.rs +++ b/core/src/services/oss/backend.rs @@ -474,7 +474,10 @@ impl Accessor for OssBackend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), + StatusCode::OK | StatusCode::PARTIAL_CONTENT => { + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) + } StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } diff --git a/core/src/services/s3/backend.rs b/core/src/services/s3/backend.rs index eeadf18d7ec1..f2157c5dc6f5 100644 --- a/core/src/services/s3/backend.rs +++ b/core/src/services/s3/backend.rs @@ -969,7 +969,10 @@ impl Accessor for S3Backend { let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), + StatusCode::OK | StatusCode::PARTIAL_CONTENT => { + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) + } StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } diff --git a/core/src/services/webdav/backend.rs b/core/src/services/webdav/backend.rs index 1e0e3b76e7b6..93e0a70ca264 100644 --- a/core/src/services/webdav/backend.rs +++ b/core/src/services/webdav/backend.rs @@ -267,7 +267,10 @@ impl Accessor for WebdavBackend { let resp = self.webdav_get(path, args).await?; let status = resp.status(); match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), + StatusCode::OK | StatusCode::PARTIAL_CONTENT => { + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) + } StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } diff --git a/core/src/services/webhdfs/backend.rs b/core/src/services/webhdfs/backend.rs index 165a9d363c5f..6c11ebc33618 100644 --- a/core/src/services/webhdfs/backend.rs +++ b/core/src/services/webhdfs/backend.rs @@ -465,7 +465,10 @@ impl Accessor for WebhdfsBackend { let range = args.range(); let resp = self.webhdfs_read_file(path, range).await?; match resp.status() { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), + StatusCode::OK | StatusCode::PARTIAL_CONTENT => { + let size = parse_content_length(resp.headers())?; + Ok((RpRead::new().with_size(size), resp.into_body())) + } StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } From 13f4124e34de0623f5cb07b8146f8b4fcbec220a Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Mon, 30 Oct 2023 15:18:58 +0800 Subject: [PATCH 41/46] set size for reader Signed-off-by: Xuanwo --- core/src/raw/oio/read/range_read.rs | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/core/src/raw/oio/read/range_read.rs b/core/src/raw/oio/read/range_read.rs index 28cff072e44c..004aea2a8021 100644 --- a/core/src/raw/oio/read/range_read.rs +++ b/core/src/raw/oio/read/range_read.rs @@ -267,13 +267,19 @@ where self.poll_read(cx, buf) } State::SendRead(fut) => { - let (_, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { + let (rp, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { // If read future returns an error, we should reset // state to Idle so that we can retry it. self.state = State::Idle; err })?; + // Set size if read returns size hint. + if let Some(size) = rp.size() { + if self.size.is_none() { + self.size = Some(size + self.cur); + } + } self.state = State::Read(r); self.poll_read(cx, buf) } @@ -391,13 +397,19 @@ where self.poll_next(cx) } State::SendRead(fut) => { - let (_, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { + let (rp, r) = ready!(Pin::new(fut).poll(cx)).map_err(|err| { // If read future returns an error, we should reset // state to Idle so that we can retry it. self.state = State::Idle; err })?; + // Set size if read returns size hint. + if let Some(size) = rp.size() { + if self.size.is_none() { + self.size = Some(size + self.cur); + } + } self.state = State::Read(r); self.poll_next(cx) } @@ -441,7 +453,14 @@ where self.fill_range(length)?; } - let (_, r) = self.read_action()?; + let (rp, r) = self.read_action()?; + // Set size if read returns size hint. + if let Some(size) = rp.size() { + if self.size.is_none() { + self.size = Some(size + self.cur); + } + } + self.state = State::Read(r); self.read(buf) } From 96b73bb3487d59e8332a7fd74168a4ba572f932e Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Mon, 30 Oct 2023 16:15:38 +0800 Subject: [PATCH 42/46] Fix webhdfs Signed-off-by: Xuanwo --- core/src/services/webhdfs/backend.rs | 12 ++++++++++++ core/src/services/webhdfs/error.rs | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/core/src/services/webhdfs/backend.rs b/core/src/services/webhdfs/backend.rs index 6c11ebc33618..db72a346736d 100644 --- a/core/src/services/webhdfs/backend.rs +++ b/core/src/services/webhdfs/backend.rs @@ -28,6 +28,7 @@ use log::debug; use tokio::sync::OnceCell; use super::error::parse_error; +use super::error::parse_error_msg; use super::message::BooleanResp; use super::message::DirectoryListingWrapper; use super::message::FileStatusType; @@ -469,6 +470,17 @@ impl Accessor for WebhdfsBackend { let size = parse_content_length(resp.headers())?; Ok((RpRead::new().with_size(size), resp.into_body())) } + // WebHDFS will returns 403 when range is outside of the end. + StatusCode::FORBIDDEN => { + let (parts, body) = resp.into_parts(); + let bs = body.bytes().await?; + let s = String::from_utf8_lossy(&bs); + if s.contains("out of the range") { + Ok((RpRead::new(), IncomingAsyncBody::empty())) + } else { + Err(parse_error_msg(parts, &s)?) + } + } StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } diff --git a/core/src/services/webhdfs/error.rs b/core/src/services/webhdfs/error.rs index 449725e632e3..d243b84811d1 100644 --- a/core/src/services/webhdfs/error.rs +++ b/core/src/services/webhdfs/error.rs @@ -46,7 +46,7 @@ pub(super) async fn parse_error(resp: Response) -> Result Result { +pub(super) fn parse_error_msg(parts: Parts, body: &str) -> Result { let (kind, retryable) = match parts.status { StatusCode::NOT_FOUND => (ErrorKind::NotFound, false), StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => (ErrorKind::PermissionDenied, false), From 187c079e9eeb362f022c8151d1c24ab5d9d9ad99 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Mon, 30 Oct 2023 16:15:44 +0800 Subject: [PATCH 43/46] Fix dropbox Signed-off-by: Xuanwo --- core/src/services/dropbox/backend.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/services/dropbox/backend.rs b/core/src/services/dropbox/backend.rs index bbe3b0b18a34..e250ea76c0c8 100644 --- a/core/src/services/dropbox/backend.rs +++ b/core/src/services/dropbox/backend.rs @@ -98,6 +98,7 @@ impl Accessor for DropboxBackend { let status = resp.status(); match status { StatusCode::OK | StatusCode::PARTIAL_CONTENT => Ok((RpRead::new(), resp.into_body())), + StatusCode::RANGE_NOT_SATISFIABLE => Ok((RpRead::new(), IncomingAsyncBody::empty())), _ => Err(parse_error(resp).await?), } } From ca1ffea3943d95b7f36c697247154f13b869af98 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Mon, 30 Oct 2023 16:26:03 +0800 Subject: [PATCH 44/46] Add opendal log level to trace Signed-off-by: Xuanwo --- .github/actions/setup/action.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/setup/action.yaml b/.github/actions/setup/action.yaml index d217c3fc73e7..bde6031d41dc 100644 --- a/.github/actions/setup/action.yaml +++ b/.github/actions/setup/action.yaml @@ -42,7 +42,7 @@ runs: # Enable backtraces echo "RUST_BACKTRACE=1" >> $GITHUB_ENV # Enable logging - echo "RUST_LOG=opendal=debug" >> $GITHUB_ENV + echo "RUST_LOG=opendal=trace" >> $GITHUB_ENV # Enable sparse index echo "CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse" >> $GITHUB_ENV From 10dda265ca625a4c2e027e89c5784076afd71f0b Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Mon, 30 Oct 2023 16:49:24 +0800 Subject: [PATCH 45/46] Don't set size if it's 0 Signed-off-by: Xuanwo --- core/src/layers/logging.rs | 16 +++++++++++++--- core/src/raw/oio/read/range_read.rs | 7 ++++--- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/core/src/layers/logging.rs b/core/src/layers/logging.rs index 81b1cfb2c965..78ebc8d4ccc1 100644 --- a/core/src/layers/logging.rs +++ b/core/src/layers/logging.rs @@ -1091,7 +1091,7 @@ impl oio::Read for LoggingReader { self.read += bs.len() as u64; trace!( target: LOGGING_TARGET, - "service={} operation={} path={} read={} -> next {}B", + "service={} operation={} path={} read={} -> next returns {}B", self.ctx.scheme, ReadOperation::Next, self.path, @@ -1115,12 +1115,22 @@ impl oio::Read for LoggingReader { } Poll::Ready(Some(Err(err))) } - None => Poll::Ready(None), + None => { + trace!( + target: LOGGING_TARGET, + "service={} operation={} path={} read={} -> next returns None", + self.ctx.scheme, + ReadOperation::Next, + self.path, + self.read, + ); + Poll::Ready(None) + } }, Poll::Pending => { trace!( target: LOGGING_TARGET, - "service={} operation={} path={} read={} -> next pending", + "service={} operation={} path={} read={} -> next returns pending", self.ctx.scheme, ReadOperation::Next, self.path, diff --git a/core/src/raw/oio/read/range_read.rs b/core/src/raw/oio/read/range_read.rs index 004aea2a8021..0447aba8f4a8 100644 --- a/core/src/raw/oio/read/range_read.rs +++ b/core/src/raw/oio/read/range_read.rs @@ -276,7 +276,7 @@ where // Set size if read returns size hint. if let Some(size) = rp.size() { - if self.size.is_none() { + if size != 0 && self.size.is_none() { self.size = Some(size + self.cur); } } @@ -406,7 +406,7 @@ where // Set size if read returns size hint. if let Some(size) = rp.size() { - if self.size.is_none() { + if size != 0 && self.size.is_none() { self.size = Some(size + self.cur); } } @@ -454,9 +454,10 @@ where } let (rp, r) = self.read_action()?; + // Set size if read returns size hint. if let Some(size) = rp.size() { - if self.size.is_none() { + if size != 0 && self.size.is_none() { self.size = Some(size + self.cur); } } From de7c3d32b319eb7a3f8458e524531e0f2b275759 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 31 Oct 2023 13:38:53 +0800 Subject: [PATCH 46/46] format toml Signed-off-by: Xuanwo --- .github/workflows/behavior_test.yml | 1 + .typos.toml | 2 +- bin/oay/Cargo.toml | 7 +++- bindings/c/Cargo.toml | 2 +- bindings/cpp/Cargo.toml | 6 +-- bindings/dotnet/Cargo.toml | 1 - bindings/haskell/Cargo.toml | 2 +- bindings/java/Cargo.toml | 40 ++++++++++---------- bindings/python/Cargo.toml | 39 +++++++++---------- core/Cargo.toml | 2 +- core/edge/file_write_on_full_disk/Cargo.toml | 6 +-- core/fuzz/Cargo.toml | 2 +- integrations/dav-server/Cargo.toml | 5 +-- 13 files changed, 60 insertions(+), 55 deletions(-) diff --git a/.github/workflows/behavior_test.yml b/.github/workflows/behavior_test.yml index 861a7f981900..d1cb64ab7ae3 100644 --- a/.github/workflows/behavior_test.yml +++ b/.github/workflows/behavior_test.yml @@ -102,6 +102,7 @@ jobs: with: os: ${{ matrix.os }} cases: ${{ toJson(matrix.cases) }} + test_binding_python: name: binding_python / ${{ matrix.os }} needs: [plan] diff --git a/.typos.toml b/.typos.toml index 5b2bca99b5a6..df205aecc397 100644 --- a/.typos.toml +++ b/.typos.toml @@ -18,8 +18,8 @@ [default.extend-words] # Random strings. "Dum" = "Dum" -"ba" = "ba" "Hel" = "Hel" +"ba" = "ba" "hellow" = "hellow" # Showed up in examples. "thw" = "thw" diff --git a/bin/oay/Cargo.toml b/bin/oay/Cargo.toml index 87c07a4648af..5a907dfcee60 100644 --- a/bin/oay/Cargo.toml +++ b/bin/oay/Cargo.toml @@ -33,7 +33,12 @@ version.workspace = true default = ["frontends-webdav", "frontends-s3"] frontends-s3 = [] -frontends-webdav = ["dep:dav-server", "dep:dav-server-opendalfs", "dep:bytes", "dep:futures-util"] +frontends-webdav = [ + "dep:dav-server", + "dep:dav-server-opendalfs", + "dep:bytes", + "dep:futures-util", +] [dependencies] anyhow = "1" diff --git a/bindings/c/Cargo.toml b/bindings/c/Cargo.toml index a0f31bc75f9b..86532f8acd4d 100644 --- a/bindings/c/Cargo.toml +++ b/bindings/c/Cargo.toml @@ -36,6 +36,6 @@ cbindgen = "0.25.0" [dependencies] bytes = "1.4.0" +once_cell = "1.17.1" opendal.workspace = true tokio = { version = "1.27", features = ["fs", "macros", "rt-multi-thread"] } -once_cell = "1.17.1" diff --git a/bindings/cpp/Cargo.toml b/bindings/cpp/Cargo.toml index 058de3a2078f..40e90680eecb 100644 --- a/bindings/cpp/Cargo.toml +++ b/bindings/cpp/Cargo.toml @@ -24,17 +24,17 @@ edition.workspace = true homepage.workspace = true license.workspace = true repository.workspace = true -version.workspace = true rust-version.workspace = true +version.workspace = true [lib] crate-type = ["staticlib"] [dependencies] -opendal.workspace = true -cxx = "1.0" anyhow = "1.0" chrono = "0.4" +cxx = "1.0" +opendal.workspace = true [build-dependencies] cxx-build = "1.0" diff --git a/bindings/dotnet/Cargo.toml b/bindings/dotnet/Cargo.toml index 11a6a3c5a250..e6a320bc463f 100644 --- a/bindings/dotnet/Cargo.toml +++ b/bindings/dotnet/Cargo.toml @@ -27,7 +27,6 @@ license.workspace = true repository.workspace = true rust-version.workspace = true - [lib] crate-type = ["cdylib"] doc = false diff --git a/bindings/haskell/Cargo.toml b/bindings/haskell/Cargo.toml index 637ee3d8e196..5c1021f076e7 100644 --- a/bindings/haskell/Cargo.toml +++ b/bindings/haskell/Cargo.toml @@ -24,8 +24,8 @@ edition.workspace = true homepage.workspace = true license.workspace = true repository.workspace = true -version.workspace = true rust-version.workspace = true +version.workspace = true [lib] crate-type = ["cdylib"] diff --git a/bindings/java/Cargo.toml b/bindings/java/Cargo.toml index c5e7707b96bf..15bf36620525 100644 --- a/bindings/java/Cargo.toml +++ b/bindings/java/Cargo.toml @@ -86,22 +86,23 @@ services-all = [ ] # Default services provided by opendal. -services-azblob = [ "opendal/services-azblob" ] -services-azdls = [ "opendal/services-azdls" ] -services-cos = [ "opendal/services-cos" ] -services-fs = [ "opendal/services-fs" ] -services-gcs = [ "opendal/services-gcs" ] -services-ghac = [ "opendal/services-ghac" ] -services-http = [ "opendal/services-http" ] -services-ipmfs = [ "opendal/services-ipmfs" ] -services-memory = [ "opendal/services-memory" ] -services-obs = [ "opendal/services-obs" ] -services-oss = [ "opendal/services-oss" ] -services-s3 = [ "opendal/services-s3" ] -services-webdav = [ "opendal/services-webdav" ] -services-webhdfs = [ "opendal/services-webhdfs" ] +services-azblob = ["opendal/services-azblob"] +services-azdls = ["opendal/services-azdls"] +services-cos = ["opendal/services-cos"] +services-fs = ["opendal/services-fs"] +services-gcs = ["opendal/services-gcs"] +services-ghac = ["opendal/services-ghac"] +services-http = ["opendal/services-http"] +services-ipmfs = ["opendal/services-ipmfs"] +services-memory = ["opendal/services-memory"] +services-obs = ["opendal/services-obs"] +services-oss = ["opendal/services-oss"] +services-s3 = ["opendal/services-s3"] +services-webdav = ["opendal/services-webdav"] +services-webhdfs = ["opendal/services-webhdfs"] # Optional services provided by opendal. +services-azfile = ["opendal/services-azfile"] services-cacache = ["opendal/services-cacache"] services-dashmap = ["opendal/services-dashmap"] services-dropbox = ["opendal/services-dropbox"] @@ -114,6 +115,8 @@ services-ipfs = ["opendal/services-ipfs"] services-memcached = ["opendal/services-memcached"] services-mini-moka = ["opendal/services-mini-moka"] services-moka = ["opendal/services-moka"] +services-mongodb = ["opendal/services-mongodb"] +services-mysql = ["opendal/services-mysql"] services-onedrive = ["opendal/services-onedrive"] services-persy = ["opendal/services-persy"] services-postgresql = ["opendal/services-postgresql"] @@ -123,28 +126,25 @@ services-redis-rustls = ["opendal/services-redis-rustls"] services-rocksdb = ["opendal/services-rocksdb"] services-sftp = ["opendal/services-sftp"] services-sled = ["opendal/services-sled"] +services-sqlite = ["opendal/services-sqlite"] services-supabase = ["opendal/services-supabase"] services-tikv = ["opendal/services-tikv"] services-vercel-artifacts = ["opendal/services-vercel-artifacts"] services-wasabi = ["opendal/services-wasabi"] -services-mysql = ["opendal/services-mysql"] -services-mongodb = ["opendal/services-mongodb"] -services-sqlite = ["opendal/services-sqlite"] -services-azfile = ["opendal/services-azfile"] [dependencies] anyhow = "1.0.71" jni = "0.21.1" num_cpus = "1.15.0" once_cell = "1.17.1" -tokio = { version = "1.28.1", features = ["full"] } opendal = { workspace = true } +tokio = { version = "1.28.1", features = ["full"] } # This is not optimal. See also the Cargo issue: # https://github.com/rust-lang/cargo/issues/1197#issuecomment-1641086954 [target.'cfg(unix)'.dependencies.opendal] -workspace = true features = [ # Depend on "openssh" which depends on "tokio-pipe" that is unavailable on Windows. "services-sftp", ] +workspace = true diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index a2ef50b5bf81..fa636592ce57 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -27,7 +27,6 @@ repository.workspace = true rust-version.workspace = true version.workspace = true - [features] # Enable all opendal default feature by default. default = [ @@ -50,6 +49,7 @@ default = [ services-all = [ "default", + "services-azfile", "services-cacache", "services-dashmap", "services-dropbox", @@ -82,20 +82,21 @@ services-all = [ ] # Default services provided by opendal. -services-azblob = [ "opendal/services-azblob" ] -services-azdls = [ "opendal/services-azdls" ] -services-cos = [ "opendal/services-cos" ] -services-fs = [ "opendal/services-fs" ] -services-gcs = [ "opendal/services-gcs" ] -services-ghac = [ "opendal/services-ghac" ] -services-http = [ "opendal/services-http" ] -services-ipmfs = [ "opendal/services-ipmfs" ] -services-memory = [ "opendal/services-memory" ] -services-obs = [ "opendal/services-obs" ] -services-oss = [ "opendal/services-oss" ] -services-s3 = [ "opendal/services-s3" ] -services-webdav = [ "opendal/services-webdav" ] -services-webhdfs = [ "opendal/services-webhdfs" ] +services-azblob = ["opendal/services-azblob"] +services-azdls = ["opendal/services-azdls"] +services-azfile = ["opendal/services-azfile"] +services-cos = ["opendal/services-cos"] +services-fs = ["opendal/services-fs"] +services-gcs = ["opendal/services-gcs"] +services-ghac = ["opendal/services-ghac"] +services-http = ["opendal/services-http"] +services-ipmfs = ["opendal/services-ipmfs"] +services-memory = ["opendal/services-memory"] +services-obs = ["opendal/services-obs"] +services-oss = ["opendal/services-oss"] +services-s3 = ["opendal/services-s3"] +services-webdav = ["opendal/services-webdav"] +services-webhdfs = ["opendal/services-webhdfs"] # Optional services provided by opendal. services-cacache = ["opendal/services-cacache"] @@ -110,6 +111,8 @@ services-ipfs = ["opendal/services-ipfs"] services-memcached = ["opendal/services-memcached"] services-mini-moka = ["opendal/services-mini-moka"] services-moka = ["opendal/services-moka"] +services-mongodb = ["opendal/services-mongodb"] +services-mysql = ["opendal/services-mysql"] services-onedrive = ["opendal/services-onedrive"] services-persy = ["opendal/services-persy"] services-postgresql = ["opendal/services-postgresql"] @@ -119,13 +122,11 @@ services-redis-rustls = ["opendal/services-redis-rustls"] services-rocksdb = ["opendal/services-rocksdb"] services-sftp = ["opendal/services-sftp"] services-sled = ["opendal/services-sled"] +services-sqlite = ["opendal/services-sqlite"] services-supabase = ["opendal/services-supabase"] services-tikv = ["opendal/services-tikv"] services-vercel-artifacts = ["opendal/services-vercel-artifacts"] services-wasabi = ["opendal/services-wasabi"] -services-mysql = ["opendal/services-mysql"] -services-mongodb = ["opendal/services-mongodb"] -services-sqlite = ["opendal/services-sqlite"] [lib] crate-type = ["cdylib"] @@ -136,4 +137,4 @@ futures = "0.3.28" opendal.workspace = true pyo3 = "0.19" pyo3-asyncio = { version = "0.19", features = ["tokio-runtime"] } -tokio = "1" \ No newline at end of file +tokio = "1" diff --git a/core/Cargo.toml b/core/Cargo.toml index 2bf456813148..a0a1d76a4900 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -234,6 +234,7 @@ chrono = { version = "0.4.28", default-features = false, features = [ ] } dashmap = { version = "5.4", optional = true } dirs = { version = "5.0.1", optional = true } +dotenvy = { version = "0.15", optional = true } etcd-client = { version = "0.11", optional = true, features = ["tls"] } flagset = "0.4" foundationdb = { version = "0.8.0", features = [ @@ -295,7 +296,6 @@ tokio = "1.27" tokio-postgres = { version = "0.7.8", optional = true } tracing = { version = "0.1", optional = true } uuid = { version = "1", features = ["serde", "v4"] } -dotenvy = { version = "0.15", optional = true } [dev-dependencies] criterion = { version = "0.4", features = ["async", "async_tokio"] } diff --git a/core/edge/file_write_on_full_disk/Cargo.toml b/core/edge/file_write_on_full_disk/Cargo.toml index aefee22d47b0..7cb9e4a3b39a 100644 --- a/core/edge/file_write_on_full_disk/Cargo.toml +++ b/core/edge/file_write_on_full_disk/Cargo.toml @@ -16,13 +16,13 @@ # under the License. [package] -name = "edge_test_file_write_on_full_disk" edition = "2021" -version = "0.0.0" +name = "edge_test_file_write_on_full_disk" publish = false +version = "0.0.0" [dependencies] futures = "0.3" opendal = { workspace = true } -tokio = { version = "1", features = ["full"] } rand = "0.8" +tokio = { version = "1", features = ["full"] } diff --git a/core/fuzz/Cargo.toml b/core/fuzz/Cargo.toml index f0a3ad726dc4..fbc45cc1fa0e 100644 --- a/core/fuzz/Cargo.toml +++ b/core/fuzz/Cargo.toml @@ -32,11 +32,11 @@ dotenvy = "0.15.6" libfuzzer-sys = "0.4" opendal = { path = "..", features = ["tests"] } tokio = { version = "1", features = ["full"] } -uuid = { version = "1", features = ["v4"] } tracing-subscriber = { version = "0.3", features = [ "env-filter", "tracing-log", ] } +uuid = { version = "1", features = ["v4"] } [[bin]] name = "fuzz_reader" diff --git a/integrations/dav-server/Cargo.toml b/integrations/dav-server/Cargo.toml index 5bb32948c749..911812f47214 100644 --- a/integrations/dav-server/Cargo.toml +++ b/integrations/dav-server/Cargo.toml @@ -29,10 +29,10 @@ version.workspace = true [dependencies] anyhow = "1" -chrono = "0.4.28" -dirs = "5.0.0" bytes = { version = "1.4.0" } +chrono = "0.4.28" dav-server = { version = "0.5.5" } +dirs = "5.0.0" futures = "0.3" futures-util = { version = "0.3.16" } opendal.workspace = true @@ -44,4 +44,3 @@ tokio = { version = "1.27", features = [ "rt-multi-thread", "io-std", ] } -