From b8a2c82edf8f9374a0f4b7d92ccaa1183a590d77 Mon Sep 17 00:00:00 2001 From: yansong Date: Tue, 2 May 2023 02:03:36 +0800 Subject: [PATCH 01/41] fix: add redirect logic --- core/src/services/webdav/backend.rs | 39 ++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/core/src/services/webdav/backend.rs b/core/src/services/webdav/backend.rs index 78b6463ad68d..3e8856a51b2a 100644 --- a/core/src/services/webdav/backend.rs +++ b/core/src/services/webdav/backend.rs @@ -27,6 +27,7 @@ use http::Request; use http::Response; use http::StatusCode; use log::debug; +use reqwest::Url; use super::error::parse_error; use super::list_response::Multistatus; @@ -199,7 +200,7 @@ impl Builder for WebdavBuilder { Some(v) => v, None => { return Err(Error::new(ErrorKind::ConfigInvalid, "endpoint is empty") - .with_context("service", Scheme::Webdav)) + .with_context("service", Scheme::Webdav)); } }; @@ -308,6 +309,42 @@ impl Accessor for WebdavBackend { let meta = parse_into_metadata(path, resp.headers())?; Ok((RpRead::with_metadata(meta), resp.into_body())) } + StatusCode::FOUND | StatusCode::TEMPORARY_REDIRECT => { + // if server returns redirect HTTP status, then redirect it + let url = parse_location(resp.headers())? + // no location means invalid redirect response + .ok_or( + Error::new( + ErrorKind::Unexpected, + &format!("no location header in redirect response."), + ).with_operation(Operation::Read) + )?; + + // first the url in location should be valid + let redirected_url = Url::parse(url).map_err(|e| { + Error::new( + ErrorKind::Unexpected, + &format!("redirected url({url}) is not valid."), + ).with_operation(Operation::Read).set_source(e) + })?; + + let original_url = Url::parse(&self.endpoint).map_err(|e| { + Error::new( + ErrorKind::Unexpected, + &format!("original url({}) is not valid.", &self.endpoint), + ).with_operation(Operation::Read).set_source(e) + })?; + // then the redirected url should have the same origin with original url + // this is basic security check + if original_url != redirected_url { + return Err(Error::new( + ErrorKind::Unexpected, + &format!("origin of original url({}) doesn't match with redirect url({}).", &self.endpoint, url), + ).with_operation(Operation::Read)); + } + // if original_url + return self.read(redirected_url.path(), args).await; + } _ => Err(parse_error(resp).await?), } } From f6d5f4f1337bde3c68d09f25b10b9c119d2d567a Mon Sep 17 00:00:00 2001 From: yansong Date: Mon, 8 May 2023 23:37:52 +0800 Subject: [PATCH 02/41] refactor: webdav redirect logic refactor --- Cargo.lock | 12 ++++ core/Cargo.toml | 1 + core/src/services/webdav/backend.rs | 106 +++++++++++++++------------- 3 files changed, 70 insertions(+), 49 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8a986e386195..ff3768c2055c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -217,6 +217,17 @@ dependencies = [ "event-listener", ] +[[package]] +name = "async-recursion" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.12", +] + [[package]] name = "async-std" version = "1.12.0" @@ -2621,6 +2632,7 @@ version = "0.34.0" dependencies = [ "anyhow", "async-compat", + "async-recursion", "async-tls", "async-trait", "backon", diff --git a/core/Cargo.toml b/core/Cargo.toml index 28be62b86e42..a1a4b12509c1 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -214,6 +214,7 @@ suppaftp = { version = "4.5", default-features = false, features = [ tokio = "1.27" tracing = { version = "0.1", optional = true } uuid = { version = "1", features = ["serde", "v4"] } +async-recursion = "1.0.4" [dev-dependencies] criterion = { version = "0.4", features = ["async", "async_tokio"] } diff --git a/core/src/services/webdav/backend.rs b/core/src/services/webdav/backend.rs index 3e8856a51b2a..19b97dffddcc 100644 --- a/core/src/services/webdav/backend.rs +++ b/core/src/services/webdav/backend.rs @@ -28,6 +28,7 @@ use http::Response; use http::StatusCode; use log::debug; use reqwest::Url; +use async_recursion::async_recursion; use super::error::parse_error; use super::list_response::Multistatus; @@ -300,53 +301,7 @@ impl Accessor for WebdavBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let resp = self.webdav_get(path, args.range()).await?; - - let status = resp.status(); - - match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) - } - StatusCode::FOUND | StatusCode::TEMPORARY_REDIRECT => { - // if server returns redirect HTTP status, then redirect it - let url = parse_location(resp.headers())? - // no location means invalid redirect response - .ok_or( - Error::new( - ErrorKind::Unexpected, - &format!("no location header in redirect response."), - ).with_operation(Operation::Read) - )?; - - // first the url in location should be valid - let redirected_url = Url::parse(url).map_err(|e| { - Error::new( - ErrorKind::Unexpected, - &format!("redirected url({url}) is not valid."), - ).with_operation(Operation::Read).set_source(e) - })?; - - let original_url = Url::parse(&self.endpoint).map_err(|e| { - Error::new( - ErrorKind::Unexpected, - &format!("original url({}) is not valid.", &self.endpoint), - ).with_operation(Operation::Read).set_source(e) - })?; - // then the redirected url should have the same origin with original url - // this is basic security check - if original_url != redirected_url { - return Err(Error::new( - ErrorKind::Unexpected, - &format!("origin of original url({}) doesn't match with redirect url({}).", &self.endpoint, url), - ).with_operation(Operation::Read)); - } - // if original_url - return self.read(redirected_url.path(), args).await; - } - _ => Err(parse_error(resp).await?), - } + self.read(path, args, true).await } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -486,6 +441,7 @@ impl WebdavBackend { &self, path: &str, range: BytesRange, + send_auth: bool, ) -> Result> { let p = build_rooted_abs_path(&self.root, path); @@ -493,8 +449,9 @@ impl WebdavBackend { let mut req = Request::get(&url); - if let Some(auth) = &self.authorization { - req = req.header(header::AUTHORIZATION, auth.clone()) + match &self.authorization { + Some(auth) if send_auth => req = req.header(header::AUTHORIZATION, auth.clone()), + _ => () } if !range.is_full() { @@ -508,6 +465,57 @@ impl WebdavBackend { self.client.send(req).await } + #[async_recursion] + // read will send http request for dav access. + // sometimes we need to control whether we should add auth token in Authorization header + // that is why send_auth exists here. + async fn read(&self, path: &str, args: OpRead, send_auth: bool) -> Result<(RpRead, IncomingAsyncBody)> { + let resp = self.webdav_get(path, args.range(), send_auth).await?; + + let status = resp.status(); + + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => { + let meta = parse_into_metadata(path, resp.headers())?; + Ok((RpRead::with_metadata(meta), resp.into_body())) + } + StatusCode::FOUND | StatusCode::TEMPORARY_REDIRECT => { + // if server returns redirect HTTP status, then redirect it + let url = parse_location(resp.headers())? + // no location means invalid redirect response + .ok_or( + Error::new( + ErrorKind::Unexpected, + &format!("no location header in redirect response."), + ).with_operation(Operation::Read) + )?; + + // first the url in location should be valid + let redirected_url = Url::parse(url).map_err(|e| { + Error::new( + ErrorKind::Unexpected, + &format!("redirected url({url}) is not valid."), + ).with_operation(Operation::Read).set_source(e) + })?; + + let original_url = Url::parse(&self.endpoint).map_err(|e| { + Error::new( + ErrorKind::Unexpected, + &format!("original url({}) is not valid.", &self.endpoint), + ).with_operation(Operation::Read).set_source(e) + })?; + // basic security check, the redirected url should have the same origin with original url + // if not, it will not send request with auth + return self.read( + redirected_url.path(), + args, + send_auth && (original_url == redirected_url), + ).await; + } + _ => Err(parse_error(resp).await?), + } + } + pub async fn webdav_put( &self, abs_path: &str, From 1e951d2dfb38e94b71dd681219d1aa9caefb591a Mon Sep 17 00:00:00 2001 From: yansong Date: Thu, 11 May 2023 22:25:16 +0800 Subject: [PATCH 03/41] fix: compare Origin instead of whole url --- core/src/services/webdav/backend.rs | 40 ++++++++++++++--------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/core/src/services/webdav/backend.rs b/core/src/services/webdav/backend.rs index 19b97dffddcc..74a46db86b3e 100644 --- a/core/src/services/webdav/backend.rs +++ b/core/src/services/webdav/backend.rs @@ -301,7 +301,7 @@ impl Accessor for WebdavBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - self.read(path, args, true).await + self.read(path, args, None).await } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -441,11 +441,16 @@ impl WebdavBackend { &self, path: &str, range: BytesRange, - send_auth: bool, + override_endpoint: Option, ) -> Result> { let p = build_rooted_abs_path(&self.root, path); - - let url: String = format!("{}{}", self.endpoint, percent_encode_path(&p)); + // user can give one new endpoint to override default endpoint + // this case happens when receive redirect response from server + let endpoint = override_endpoint.unwrap_or(self.endpoint.clone()); + // if the override endpoint differs from original endpoint + // we will not send auth to server due to security issue. + let send_auth = endpoint.eq(&self.endpoint); + let url: String = format!("{}{}", endpoint, percent_encode_path(&p)); let mut req = Request::get(&url); @@ -465,12 +470,12 @@ impl WebdavBackend { self.client.send(req).await } - #[async_recursion] // read will send http request for dav access. - // sometimes we need to control whether we should add auth token in Authorization header - // that is why send_auth exists here. - async fn read(&self, path: &str, args: OpRead, send_auth: bool) -> Result<(RpRead, IncomingAsyncBody)> { - let resp = self.webdav_get(path, args.range(), send_auth).await?; + // sometimes we need to send request to different endpoint when server side + // indicate we need to redirect to another url. + #[async_recursion] + async fn read(&self, path: &str, args: OpRead, override_endpoint: Option) -> Result<(RpRead, IncomingAsyncBody)> { + let resp = self.webdav_get(path, args.range(), override_endpoint).await?; let status = resp.status(); @@ -481,7 +486,7 @@ impl WebdavBackend { } StatusCode::FOUND | StatusCode::TEMPORARY_REDIRECT => { // if server returns redirect HTTP status, then redirect it - let url = parse_location(resp.headers())? + let redirected_url = parse_location(resp.headers())? // no location means invalid redirect response .ok_or( Error::new( @@ -491,25 +496,18 @@ impl WebdavBackend { )?; // first the url in location should be valid - let redirected_url = Url::parse(url).map_err(|e| { + let redirected_url = Url::parse(redirected_url).map_err(|e| { Error::new( ErrorKind::Unexpected, - &format!("redirected url({url}) is not valid."), + &format!("redirected url({redirected_url}) is not valid."), ).with_operation(Operation::Read).set_source(e) })?; - let original_url = Url::parse(&self.endpoint).map_err(|e| { - Error::new( - ErrorKind::Unexpected, - &format!("original url({}) is not valid.", &self.endpoint), - ).with_operation(Operation::Read).set_source(e) - })?; // basic security check, the redirected url should have the same origin with original url // if not, it will not send request with auth return self.read( - redirected_url.path(), - args, - send_auth && (original_url == redirected_url), + redirected_url.path(), args, + Some(redirected_url.origin().unicode_serialization()), ).await; } _ => Err(parse_error(resp).await?), From 3da6269ad08b853f7e7f463c8f8f65a60bf7a0a1 Mon Sep 17 00:00:00 2001 From: yansong Date: Thu, 11 May 2023 22:46:15 +0800 Subject: [PATCH 04/41] fix: path beginning with / --- core/src/services/webdav/backend.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/src/services/webdav/backend.rs b/core/src/services/webdav/backend.rs index 74a46db86b3e..72dafab3b762 100644 --- a/core/src/services/webdav/backend.rs +++ b/core/src/services/webdav/backend.rs @@ -505,8 +505,10 @@ impl WebdavBackend { // basic security check, the redirected url should have the same origin with original url // if not, it will not send request with auth + let path = redirected_url.path(); return self.read( - redirected_url.path(), args, + path.strip_prefix("/").unwrap_or(path), + args, Some(redirected_url.origin().unicode_serialization()), ).await; } From 660025794167b2edd112dd38273525ddea5351f6 Mon Sep 17 00:00:00 2001 From: yansong Date: Thu, 11 May 2023 23:15:04 +0800 Subject: [PATCH 05/41] fix: remove the root from redirect location --- core/src/services/webdav/backend.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/core/src/services/webdav/backend.rs b/core/src/services/webdav/backend.rs index 72dafab3b762..34119d8beb67 100644 --- a/core/src/services/webdav/backend.rs +++ b/core/src/services/webdav/backend.rs @@ -475,6 +475,7 @@ impl WebdavBackend { // indicate we need to redirect to another url. #[async_recursion] async fn read(&self, path: &str, args: OpRead, override_endpoint: Option) -> Result<(RpRead, IncomingAsyncBody)> { + debug!("will read: path={}, override_endpoint={:?}", &path, &override_endpoint); let resp = self.webdav_get(path, args.range(), override_endpoint).await?; let status = resp.status(); @@ -494,6 +495,7 @@ impl WebdavBackend { &format!("no location header in redirect response."), ).with_operation(Operation::Read) )?; + debug!("received statue code 302/307, will redirect request, url: {}", redirected_url); // first the url in location should be valid let redirected_url = Url::parse(redirected_url).map_err(|e| { @@ -507,7 +509,9 @@ impl WebdavBackend { // if not, it will not send request with auth let path = redirected_url.path(); return self.read( - path.strip_prefix("/").unwrap_or(path), + // if root is the prefix of path, then remove it + // this is for the case that redirect only change the origin of url + path.strip_prefix(&self.root).unwrap_or(path), args, Some(redirected_url.origin().unicode_serialization()), ).await; From 91079195f3fad0404ac5af6dd1bc417e263bcd80 Mon Sep 17 00:00:00 2001 From: yansong Date: Thu, 11 May 2023 23:31:24 +0800 Subject: [PATCH 06/41] fix: add url escape decode logic for path --- core/src/services/webdav/backend.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/core/src/services/webdav/backend.rs b/core/src/services/webdav/backend.rs index 34119d8beb67..165760319bfa 100644 --- a/core/src/services/webdav/backend.rs +++ b/core/src/services/webdav/backend.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use std::borrow::Cow; use std::collections::HashMap; use std::fmt::Debug; use std::fmt::Formatter; @@ -29,6 +30,7 @@ use http::StatusCode; use log::debug; use reqwest::Url; use async_recursion::async_recursion; +use percent_encoding::{percent_decode_str}; use super::error::parse_error; use super::list_response::Multistatus; @@ -508,10 +510,14 @@ impl WebdavBackend { // basic security check, the redirected url should have the same origin with original url // if not, it will not send request with auth let path = redirected_url.path(); + // url escape decode to avoid special case + let path = percent_decode_str(path) + .decode_utf8().unwrap_or(Cow::from(path)) + .into_owned(); return self.read( // if root is the prefix of path, then remove it // this is for the case that redirect only change the origin of url - path.strip_prefix(&self.root).unwrap_or(path), + path.strip_prefix(&self.root).unwrap_or(&path), args, Some(redirected_url.origin().unicode_serialization()), ).await; From fe10893b93bef20faa3a6188eaf9fa13ae322139 Mon Sep 17 00:00:00 2001 From: yansong Date: Thu, 11 May 2023 23:32:31 +0800 Subject: [PATCH 07/41] chores: format --- core/src/services/webdav/backend.rs | 53 ++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/core/src/services/webdav/backend.rs b/core/src/services/webdav/backend.rs index 165760319bfa..00ae04f86a00 100644 --- a/core/src/services/webdav/backend.rs +++ b/core/src/services/webdav/backend.rs @@ -20,6 +20,7 @@ use std::collections::HashMap; use std::fmt::Debug; use std::fmt::Formatter; +use async_recursion::async_recursion; use async_trait::async_trait; use bytes::Buf; use http::header; @@ -28,9 +29,8 @@ use http::Request; use http::Response; use http::StatusCode; use log::debug; +use percent_encoding::percent_decode_str; use reqwest::Url; -use async_recursion::async_recursion; -use percent_encoding::{percent_decode_str}; use super::error::parse_error; use super::list_response::Multistatus; @@ -458,7 +458,7 @@ impl WebdavBackend { match &self.authorization { Some(auth) if send_auth => req = req.header(header::AUTHORIZATION, auth.clone()), - _ => () + _ => (), } if !range.is_full() { @@ -476,9 +476,19 @@ impl WebdavBackend { // sometimes we need to send request to different endpoint when server side // indicate we need to redirect to another url. #[async_recursion] - async fn read(&self, path: &str, args: OpRead, override_endpoint: Option) -> Result<(RpRead, IncomingAsyncBody)> { - debug!("will read: path={}, override_endpoint={:?}", &path, &override_endpoint); - let resp = self.webdav_get(path, args.range(), override_endpoint).await?; + async fn read( + &self, + path: &str, + args: OpRead, + override_endpoint: Option, + ) -> Result<(RpRead, IncomingAsyncBody)> { + debug!( + "will read: path={}, override_endpoint={:?}", + &path, &override_endpoint + ); + let resp = self + .webdav_get(path, args.range(), override_endpoint) + .await?; let status = resp.status(); @@ -495,16 +505,22 @@ impl WebdavBackend { Error::new( ErrorKind::Unexpected, &format!("no location header in redirect response."), - ).with_operation(Operation::Read) + ) + .with_operation(Operation::Read), )?; - debug!("received statue code 302/307, will redirect request, url: {}", redirected_url); + debug!( + "received statue code 302/307, will redirect request, url: {}", + redirected_url + ); // first the url in location should be valid let redirected_url = Url::parse(redirected_url).map_err(|e| { Error::new( ErrorKind::Unexpected, &format!("redirected url({redirected_url}) is not valid."), - ).with_operation(Operation::Read).set_source(e) + ) + .with_operation(Operation::Read) + .set_source(e) })?; // basic security check, the redirected url should have the same origin with original url @@ -512,15 +528,18 @@ impl WebdavBackend { let path = redirected_url.path(); // url escape decode to avoid special case let path = percent_decode_str(path) - .decode_utf8().unwrap_or(Cow::from(path)) + .decode_utf8() + .unwrap_or(Cow::from(path)) .into_owned(); - return self.read( - // if root is the prefix of path, then remove it - // this is for the case that redirect only change the origin of url - path.strip_prefix(&self.root).unwrap_or(&path), - args, - Some(redirected_url.origin().unicode_serialization()), - ).await; + return self + .read( + // if root is the prefix of path, then remove it + // this is for the case that redirect only change the origin of url + path.strip_prefix(&self.root).unwrap_or(&path), + args, + Some(redirected_url.origin().unicode_serialization()), + ) + .await; } _ => Err(parse_error(resp).await?), } From e14820de9dcd1ec6ffe414a78727f52440a5fc28 Mon Sep 17 00:00:00 2001 From: yansong Date: Fri, 12 May 2023 00:19:56 +0800 Subject: [PATCH 08/41] chores: add integration test for redirect --- .github/workflows/service_test_webdav.yml | 10 ++++++++++ core/src/services/webdav/fixtures/nginx.conf | 17 ++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/.github/workflows/service_test_webdav.yml b/.github/workflows/service_test_webdav.yml index a67d2c7d161f..c22f425be435 100644 --- a/.github/workflows/service_test_webdav.yml +++ b/.github/workflows/service_test_webdav.yml @@ -68,6 +68,16 @@ jobs: OPENDAL_WEBDAV_TEST: on OPENDAL_WEBDAV_ENDPOINT: http://127.0.0.1:8080 + - name: Test with redirect + shell: bash + working-directory: core + run: cargo test webdav -- --show-output + env: + RUST_BACKTRACE: full + RUST_LOG: debug + OPENDAL_WEBDAV_TEST: on + OPENDAL_WEBDAV_ENDPOINT: http://localhost:8081 + nginx_with_password: runs-on: ${{ matrix.os }} strategy: diff --git a/core/src/services/webdav/fixtures/nginx.conf b/core/src/services/webdav/fixtures/nginx.conf index 456fdf59af36..b776c7509369 100644 --- a/core/src/services/webdav/fixtures/nginx.conf +++ b/core/src/services/webdav/fixtures/nginx.conf @@ -8,8 +8,23 @@ events { } http { + # the following configuration is for redirect test server { - listen 127.0.0.1:8080; + listen 8081; + server_name localhost; + + location / { + if ($request_method = GET) { + return 302 http://$server_name:8080$request_uri; + } + client_max_body_size 1024M; + # forward all other requests to port 8081 + proxy_pass http://localhost:8080; + } + } + + server { + listen 8080; server_name localhost; access_log /tmp/access.log; root /tmp/static; From b77233f98f4e653cdda3c36596a841d2aae29f0f Mon Sep 17 00:00:00 2001 From: Yansongsongsong Date: Fri, 12 May 2023 17:47:01 +0800 Subject: [PATCH 09/41] fix: nginx config log file for new virtual server --- core/src/services/webdav/fixtures/nginx.conf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/src/services/webdav/fixtures/nginx.conf b/core/src/services/webdav/fixtures/nginx.conf index b776c7509369..8537765f64da 100644 --- a/core/src/services/webdav/fixtures/nginx.conf +++ b/core/src/services/webdav/fixtures/nginx.conf @@ -10,8 +10,10 @@ events { http { # the following configuration is for redirect test server { - listen 8081; + listen 8081; server_name localhost; + access_log /tmp/forward-access.log; + error_log /tmp/forward-error.log; location / { if ($request_method = GET) { From 12b213f65f9d1f69fe3f5ba7f9ee96fb7694e076 Mon Sep 17 00:00:00 2001 From: Yansongsongsong Date: Fri, 12 May 2023 18:02:08 +0800 Subject: [PATCH 10/41] fix: use loopback nic to listen on --- .github/workflows/service_test_webdav.yml | 2 +- core/src/services/webdav/fixtures/nginx.conf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/service_test_webdav.yml b/.github/workflows/service_test_webdav.yml index c22f425be435..980ed043e8e8 100644 --- a/.github/workflows/service_test_webdav.yml +++ b/.github/workflows/service_test_webdav.yml @@ -76,7 +76,7 @@ jobs: RUST_BACKTRACE: full RUST_LOG: debug OPENDAL_WEBDAV_TEST: on - OPENDAL_WEBDAV_ENDPOINT: http://localhost:8081 + OPENDAL_WEBDAV_ENDPOINT: http://127.0.0.1:8081 nginx_with_password: runs-on: ${{ matrix.os }} diff --git a/core/src/services/webdav/fixtures/nginx.conf b/core/src/services/webdav/fixtures/nginx.conf index 8537765f64da..8df9c407f6c1 100644 --- a/core/src/services/webdav/fixtures/nginx.conf +++ b/core/src/services/webdav/fixtures/nginx.conf @@ -10,7 +10,7 @@ events { http { # the following configuration is for redirect test server { - listen 8081; + listen 127.0.0.1:8081; server_name localhost; access_log /tmp/forward-access.log; error_log /tmp/forward-error.log; @@ -26,7 +26,7 @@ http { } server { - listen 8080; + listen 127.0.0.1:8080; server_name localhost; access_log /tmp/access.log; root /tmp/static; From 182e7a12b09f846a7d68377b297bafce080f093a Mon Sep 17 00:00:00 2001 From: Yansongsongsong Date: Fri, 12 May 2023 18:33:57 +0800 Subject: [PATCH 11/41] fix: proxy forward address --- core/src/services/webdav/fixtures/nginx.conf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/src/services/webdav/fixtures/nginx.conf b/core/src/services/webdav/fixtures/nginx.conf index 8df9c407f6c1..0a44adc19ab7 100644 --- a/core/src/services/webdav/fixtures/nginx.conf +++ b/core/src/services/webdav/fixtures/nginx.conf @@ -1,4 +1,3 @@ -load_module /usr/lib/nginx/modules/ngx_http_dav_ext_module.so; error_log /tmp/error.log; pid /tmp/nginx.pid; @@ -21,7 +20,7 @@ http { } client_max_body_size 1024M; # forward all other requests to port 8081 - proxy_pass http://localhost:8080; + proxy_pass http://127.0.0.1:8080; } } From f00018a1d7fd010c814891a29b348d01261fef39 Mon Sep 17 00:00:00 2001 From: Yansongsongsong Date: Fri, 12 May 2023 18:37:50 +0800 Subject: [PATCH 12/41] fix: add module of webdav of nginx --- core/src/services/webdav/fixtures/nginx.conf | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/services/webdav/fixtures/nginx.conf b/core/src/services/webdav/fixtures/nginx.conf index 0a44adc19ab7..b3524334f373 100644 --- a/core/src/services/webdav/fixtures/nginx.conf +++ b/core/src/services/webdav/fixtures/nginx.conf @@ -1,3 +1,4 @@ +load_module /usr/lib/nginx/modules/ngx_http_dav_ext_module.so; error_log /tmp/error.log; pid /tmp/nginx.pid; From 2ff9a3693fa3d98a49dd78109322daeb85c7aa30 Mon Sep 17 00:00:00 2001 From: yansong Date: Sat, 13 May 2023 00:39:15 +0800 Subject: [PATCH 13/41] fix: permission error during test --- .github/workflows/service_test_webdav.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/service_test_webdav.yml b/.github/workflows/service_test_webdav.yml index 980ed043e8e8..d28fe8b8a205 100644 --- a/.github/workflows/service_test_webdav.yml +++ b/.github/workflows/service_test_webdav.yml @@ -56,6 +56,7 @@ jobs: working-directory: core run: | mkdir -p /tmp/static + chmod a+rw static/ # make nginx worker has permission to operate it nginx -c `pwd`/src/services/webdav/fixtures/nginx.conf - name: Test From cf03cee4ff3e60a935b891abafde787aa04b969b Mon Sep 17 00:00:00 2001 From: yansong Date: Sat, 13 May 2023 00:40:43 +0800 Subject: [PATCH 14/41] fix: permission error during test --- .github/workflows/service_test_webdav.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/service_test_webdav.yml b/.github/workflows/service_test_webdav.yml index d28fe8b8a205..ea2d7a99fba3 100644 --- a/.github/workflows/service_test_webdav.yml +++ b/.github/workflows/service_test_webdav.yml @@ -56,7 +56,7 @@ jobs: working-directory: core run: | mkdir -p /tmp/static - chmod a+rw static/ # make nginx worker has permission to operate it + chmod a+rw /tmp/static/ # make nginx worker has permission to operate it nginx -c `pwd`/src/services/webdav/fixtures/nginx.conf - name: Test From 60e1579b916fd1b33675c22dd330882f583a7aee Mon Sep 17 00:00:00 2001 From: yansong Date: Sat, 13 May 2023 01:00:47 +0800 Subject: [PATCH 15/41] tmp: add log for nginx --- .github/workflows/service_test_webdav.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/service_test_webdav.yml b/.github/workflows/service_test_webdav.yml index ea2d7a99fba3..954814cdf391 100644 --- a/.github/workflows/service_test_webdav.yml +++ b/.github/workflows/service_test_webdav.yml @@ -79,6 +79,11 @@ jobs: OPENDAL_WEBDAV_TEST: on OPENDAL_WEBDAV_ENDPOINT: http://127.0.0.1:8081 + - name: print error log + shell: bash + working-directory: core + run: cat /tmp/error.log + nginx_with_password: runs-on: ${{ matrix.os }} strategy: From 39229fc2f65e2d8b6dc63ef785a706534eb2eb3c Mon Sep 17 00:00:00 2001 From: yansong Date: Sat, 13 May 2023 01:14:41 +0800 Subject: [PATCH 16/41] tmp: add log for nginx --- .github/workflows/service_test_webdav.yml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/.github/workflows/service_test_webdav.yml b/.github/workflows/service_test_webdav.yml index 954814cdf391..34d0a155faae 100644 --- a/.github/workflows/service_test_webdav.yml +++ b/.github/workflows/service_test_webdav.yml @@ -72,18 +72,13 @@ jobs: - name: Test with redirect shell: bash working-directory: core - run: cargo test webdav -- --show-output + run: cargo test webdav -- --show-output || true; cat /tmp/error.log env: RUST_BACKTRACE: full RUST_LOG: debug OPENDAL_WEBDAV_TEST: on OPENDAL_WEBDAV_ENDPOINT: http://127.0.0.1:8081 - - name: print error log - shell: bash - working-directory: core - run: cat /tmp/error.log - nginx_with_password: runs-on: ${{ matrix.os }} strategy: From d2e781258162c2744713f4b6a3b4f22957d3ce6a Mon Sep 17 00:00:00 2001 From: yansong Date: Sat, 13 May 2023 01:28:02 +0800 Subject: [PATCH 17/41] tmp: add log for nginx --- .github/workflows/service_test_webdav.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/service_test_webdav.yml b/.github/workflows/service_test_webdav.yml index 34d0a155faae..a2f49c2bbefe 100644 --- a/.github/workflows/service_test_webdav.yml +++ b/.github/workflows/service_test_webdav.yml @@ -72,7 +72,10 @@ jobs: - name: Test with redirect shell: bash working-directory: core - run: cargo test webdav -- --show-output || true; cat /tmp/error.log + run: | + rm -rf /tmp/static + chmod a+rw /tmp/static/ + cargo test webdav -- --show-output || true; cat /tmp/error.log env: RUST_BACKTRACE: full RUST_LOG: debug From 154e2814c25ecab038886615fc370bd8c70d251c Mon Sep 17 00:00:00 2001 From: yansong Date: Sat, 13 May 2023 01:28:22 +0800 Subject: [PATCH 18/41] tmp: add log for nginx --- .github/workflows/service_test_webdav.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/service_test_webdav.yml b/.github/workflows/service_test_webdav.yml index a2f49c2bbefe..659450f14f1a 100644 --- a/.github/workflows/service_test_webdav.yml +++ b/.github/workflows/service_test_webdav.yml @@ -74,6 +74,7 @@ jobs: working-directory: core run: | rm -rf /tmp/static + mkdir -p /tmp/static chmod a+rw /tmp/static/ cargo test webdav -- --show-output || true; cat /tmp/error.log env: From c71abc11a65f17da9953a1b9425689725ddc5c40 Mon Sep 17 00:00:00 2001 From: yansong Date: Sat, 13 May 2023 01:44:19 +0800 Subject: [PATCH 19/41] tmp: add log for nginx --- .github/workflows/service_test_webdav.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/service_test_webdav.yml b/.github/workflows/service_test_webdav.yml index 659450f14f1a..d4a3c07f851a 100644 --- a/.github/workflows/service_test_webdav.yml +++ b/.github/workflows/service_test_webdav.yml @@ -76,7 +76,7 @@ jobs: rm -rf /tmp/static mkdir -p /tmp/static chmod a+rw /tmp/static/ - cargo test webdav -- --show-output || true; cat /tmp/error.log + cargo test services_webdav::write_test_read_full -- --show-output || true; cat /tmp/error.log env: RUST_BACKTRACE: full RUST_LOG: debug From 54ab48db1af50f1d107682628c8b19c6adca2516 Mon Sep 17 00:00:00 2001 From: yansong Date: Sat, 13 May 2023 02:05:07 +0800 Subject: [PATCH 20/41] tmp: add log for nginx --- .github/workflows/service_test_webdav.yml | 47 +++++++++++++++-------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/.github/workflows/service_test_webdav.yml b/.github/workflows/service_test_webdav.yml index d4a3c07f851a..2968e99cafbf 100644 --- a/.github/workflows/service_test_webdav.yml +++ b/.github/workflows/service_test_webdav.yml @@ -56,7 +56,6 @@ jobs: working-directory: core run: | mkdir -p /tmp/static - chmod a+rw /tmp/static/ # make nginx worker has permission to operate it nginx -c `pwd`/src/services/webdav/fixtures/nginx.conf - name: Test @@ -69,20 +68,6 @@ jobs: OPENDAL_WEBDAV_TEST: on OPENDAL_WEBDAV_ENDPOINT: http://127.0.0.1:8080 - - name: Test with redirect - shell: bash - working-directory: core - run: | - rm -rf /tmp/static - mkdir -p /tmp/static - chmod a+rw /tmp/static/ - cargo test services_webdav::write_test_read_full -- --show-output || true; cat /tmp/error.log - env: - RUST_BACKTRACE: full - RUST_LOG: debug - OPENDAL_WEBDAV_TEST: on - OPENDAL_WEBDAV_ENDPOINT: http://127.0.0.1:8081 - nginx_with_password: runs-on: ${{ matrix.os }} strategy: @@ -127,3 +112,35 @@ jobs: OPENDAL_WEBDAV_ENDPOINT: http://127.0.0.1:8080 OPENDAL_WEBDAV_USERNAME: bar OPENDAL_WEBDAV_PASSWORD: bar + + nginx_with_redirect: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: + - ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Setup Rust toolchain + uses: ./.github/actions/setup + + - name: Install nginx full for dav_ext modules + run: sudo apt install nginx-full + + - name: Start nginx + shell: bash + working-directory: core + run: | + mkdir -p /tmp/static + chmod a+rw /tmp/static/ # make nginx worker has permission to operate it + nginx -c `pwd`/src/services/webdav/fixtures/nginx.conf + + - name: Test with redirect + shell: bash + working-directory: core + run: cargo test services_webdav::write_test_read_full -- --show-output || true; cat /tmp/error.log + env: + RUST_BACKTRACE: full + RUST_LOG: debug + OPENDAL_WEBDAV_TEST: on + OPENDAL_WEBDAV_ENDPOINT: http://127.0.0.1:8081 From 96e34477eab51f4c57f2ed3348ff3f2dd4b8fc4d Mon Sep 17 00:00:00 2001 From: yansong Date: Sat, 13 May 2023 22:06:12 +0800 Subject: [PATCH 21/41] tmp: add log for nginx --- .github/workflows/service_test_webdav.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/service_test_webdav.yml b/.github/workflows/service_test_webdav.yml index 2968e99cafbf..0674b21ca221 100644 --- a/.github/workflows/service_test_webdav.yml +++ b/.github/workflows/service_test_webdav.yml @@ -138,7 +138,12 @@ jobs: - name: Test with redirect shell: bash working-directory: core - run: cargo test services_webdav::write_test_read_full -- --show-output || true; cat /tmp/error.log + run: | + cargo test services_webdav::write_test_read_full -- --show-output || true; cat /tmp/error.log + echo "following is forward-access.log" + cat /tmp/forward-access.log + echo "following is forward-error.log" + cat /tmp/forward-error.log; env: RUST_BACKTRACE: full RUST_LOG: debug From e4c1c804aecf4207e9d046cae8c8bfa8ef777a14 Mon Sep 17 00:00:00 2001 From: yansong Date: Sat, 13 May 2023 22:25:33 +0800 Subject: [PATCH 22/41] tmp: add log for nginx --- .github/workflows/service_test_webdav.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/service_test_webdav.yml b/.github/workflows/service_test_webdav.yml index 0674b21ca221..889637cf1fd3 100644 --- a/.github/workflows/service_test_webdav.yml +++ b/.github/workflows/service_test_webdav.yml @@ -133,6 +133,7 @@ jobs: run: | mkdir -p /tmp/static chmod a+rw /tmp/static/ # make nginx worker has permission to operate it + chmod a+rw /var/lib/nginx/body/ nginx -c `pwd`/src/services/webdav/fixtures/nginx.conf - name: Test with redirect From b3bb57d8fd4150ddeaf19e4123491a1c9df48e1e Mon Sep 17 00:00:00 2001 From: yansong Date: Sat, 13 May 2023 22:35:30 +0800 Subject: [PATCH 23/41] tmp: add log for nginx --- .github/workflows/service_test_webdav.yml | 1 - core/src/services/webdav/fixtures/nginx.conf | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/service_test_webdav.yml b/.github/workflows/service_test_webdav.yml index 889637cf1fd3..0674b21ca221 100644 --- a/.github/workflows/service_test_webdav.yml +++ b/.github/workflows/service_test_webdav.yml @@ -133,7 +133,6 @@ jobs: run: | mkdir -p /tmp/static chmod a+rw /tmp/static/ # make nginx worker has permission to operate it - chmod a+rw /var/lib/nginx/body/ nginx -c `pwd`/src/services/webdav/fixtures/nginx.conf - name: Test with redirect diff --git a/core/src/services/webdav/fixtures/nginx.conf b/core/src/services/webdav/fixtures/nginx.conf index b3524334f373..5efd6927080e 100644 --- a/core/src/services/webdav/fixtures/nginx.conf +++ b/core/src/services/webdav/fixtures/nginx.conf @@ -1,5 +1,9 @@ load_module /usr/lib/nginx/modules/ngx_http_dav_ext_module.so; +# use root to avoid some unexpected issues +user root; +worker_processes auto; + error_log /tmp/error.log; pid /tmp/nginx.pid; From 24cb37cc01ae9fc9159e896eaa16514dfc1eb4da Mon Sep 17 00:00:00 2001 From: yansong Date: Sat, 13 May 2023 22:56:21 +0800 Subject: [PATCH 24/41] tmp: add log for nginx --- .github/workflows/service_test_webdav.yml | 1 + core/src/services/webdav/fixtures/nginx.conf | 4 ---- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/service_test_webdav.yml b/.github/workflows/service_test_webdav.yml index 0674b21ca221..1334b14fc488 100644 --- a/.github/workflows/service_test_webdav.yml +++ b/.github/workflows/service_test_webdav.yml @@ -133,6 +133,7 @@ jobs: run: | mkdir -p /tmp/static chmod a+rw /tmp/static/ # make nginx worker has permission to operate it + sudo chmod a+rw /var/lib/ nginx -c `pwd`/src/services/webdav/fixtures/nginx.conf - name: Test with redirect diff --git a/core/src/services/webdav/fixtures/nginx.conf b/core/src/services/webdav/fixtures/nginx.conf index 5efd6927080e..b3524334f373 100644 --- a/core/src/services/webdav/fixtures/nginx.conf +++ b/core/src/services/webdav/fixtures/nginx.conf @@ -1,9 +1,5 @@ load_module /usr/lib/nginx/modules/ngx_http_dav_ext_module.so; -# use root to avoid some unexpected issues -user root; -worker_processes auto; - error_log /tmp/error.log; pid /tmp/nginx.pid; From 6c303a33bbc80a4ab3f2c75188b314cff3f07cc0 Mon Sep 17 00:00:00 2001 From: yansong Date: Sun, 14 May 2023 03:10:59 +0800 Subject: [PATCH 25/41] fix: permission --- .github/workflows/service_test_webdav.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/service_test_webdav.yml b/.github/workflows/service_test_webdav.yml index 1334b14fc488..657a59224433 100644 --- a/.github/workflows/service_test_webdav.yml +++ b/.github/workflows/service_test_webdav.yml @@ -132,8 +132,9 @@ jobs: working-directory: core run: | mkdir -p /tmp/static + mkdir -p /var/lib/nginx chmod a+rw /tmp/static/ # make nginx worker has permission to operate it - sudo chmod a+rw /var/lib/ + sudo chmod a+rw /var/lib/nginx nginx -c `pwd`/src/services/webdav/fixtures/nginx.conf - name: Test with redirect From f9fa9031f5bfa846eed049e237117c66945c7387 Mon Sep 17 00:00:00 2001 From: yansong Date: Sun, 14 May 2023 22:31:31 +0800 Subject: [PATCH 26/41] debug: check permission staff --- .github/workflows/service_test_webdav.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/service_test_webdav.yml b/.github/workflows/service_test_webdav.yml index 657a59224433..a602a0f62812 100644 --- a/.github/workflows/service_test_webdav.yml +++ b/.github/workflows/service_test_webdav.yml @@ -134,7 +134,7 @@ jobs: mkdir -p /tmp/static mkdir -p /var/lib/nginx chmod a+rw /tmp/static/ # make nginx worker has permission to operate it - sudo chmod a+rw /var/lib/nginx + sudo chmod a+rw /var/lib/nginx/body nginx -c `pwd`/src/services/webdav/fixtures/nginx.conf - name: Test with redirect @@ -146,6 +146,10 @@ jobs: cat /tmp/forward-access.log echo "following is forward-error.log" cat /tmp/forward-error.log; + echo "following is checking perm of file" + ls -ld /var/lib/nginx/body + echo "check storage" + df -h /var/lib/nginx/ env: RUST_BACKTRACE: full RUST_LOG: debug From 6507ed6bc6fa3669488ca1fd25c0fde90d9b98aa Mon Sep 17 00:00:00 2001 From: yansong Date: Sun, 14 May 2023 22:40:54 +0800 Subject: [PATCH 27/41] debug: make /var/lib/nginx/body executable --- .github/workflows/service_test_webdav.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/service_test_webdav.yml b/.github/workflows/service_test_webdav.yml index a602a0f62812..44312bfdfa9f 100644 --- a/.github/workflows/service_test_webdav.yml +++ b/.github/workflows/service_test_webdav.yml @@ -134,7 +134,7 @@ jobs: mkdir -p /tmp/static mkdir -p /var/lib/nginx chmod a+rw /tmp/static/ # make nginx worker has permission to operate it - sudo chmod a+rw /var/lib/nginx/body + sudo chmod 777 /var/lib/nginx/body nginx -c `pwd`/src/services/webdav/fixtures/nginx.conf - name: Test with redirect From 17486f6fcccb2ed1f5bc77dc697f71cf60086b0c Mon Sep 17 00:00:00 2001 From: yansongsongsong Date: Sun, 14 May 2023 22:47:29 +0800 Subject: [PATCH 28/41] fix: nginx redriect test --- .github/workflows/service_test_webdav.yml | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/.github/workflows/service_test_webdav.yml b/.github/workflows/service_test_webdav.yml index 44312bfdfa9f..cf2bd7596c7b 100644 --- a/.github/workflows/service_test_webdav.yml +++ b/.github/workflows/service_test_webdav.yml @@ -134,22 +134,14 @@ jobs: mkdir -p /tmp/static mkdir -p /var/lib/nginx chmod a+rw /tmp/static/ # make nginx worker has permission to operate it - sudo chmod 777 /var/lib/nginx/body + sudo chmod 777 /var/lib/nginx/body # make nginx worker has permission to operate it nginx -c `pwd`/src/services/webdav/fixtures/nginx.conf - name: Test with redirect shell: bash working-directory: core run: | - cargo test services_webdav::write_test_read_full -- --show-output || true; cat /tmp/error.log - echo "following is forward-access.log" - cat /tmp/forward-access.log - echo "following is forward-error.log" - cat /tmp/forward-error.log; - echo "following is checking perm of file" - ls -ld /var/lib/nginx/body - echo "check storage" - df -h /var/lib/nginx/ + cargo test webdav -- --show-output env: RUST_BACKTRACE: full RUST_LOG: debug From f369bafacc502eb4e2bc70af38ef6781aca2155d Mon Sep 17 00:00:00 2001 From: Yansongsongsong Date: Sun, 14 May 2023 23:38:55 +0800 Subject: [PATCH 29/41] refactor: refactor with maximum retry times --- core/src/services/webdav/backend.rs | 149 ++++++++++++++-------------- 1 file changed, 74 insertions(+), 75 deletions(-) diff --git a/core/src/services/webdav/backend.rs b/core/src/services/webdav/backend.rs index 00ae04f86a00..ae0fa729f121 100644 --- a/core/src/services/webdav/backend.rs +++ b/core/src/services/webdav/backend.rs @@ -20,7 +20,6 @@ use std::collections::HashMap; use std::fmt::Debug; use std::fmt::Formatter; -use async_recursion::async_recursion; use async_trait::async_trait; use bytes::Buf; use http::header; @@ -303,7 +302,80 @@ impl Accessor for WebdavBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - self.read(path, args, None).await + let mut remaining_retry_times = 3; + // if response indicates that we should redirect + // then modify this variable + let mut override_endpoint: Option = None; + // sometimes path will also changed according to redirect + // response + let mut override_path: String = path.to_string(); + + debug!("will read with maximum {} times to retry: path={}", &path, remaining_retry_times); + while remaining_retry_times > 0 { + debug!("remaining retry times: {}", remaining_retry_times); + let resp = self + .webdav_get(&override_path, args.range(), override_endpoint.clone()) + .await?; + let status = resp.status(); + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => { + let meta = parse_into_metadata(path, resp.headers())?; + return Ok((RpRead::with_metadata(meta), resp.into_body())); + } + StatusCode::FOUND | StatusCode::TEMPORARY_REDIRECT => { + // if server returns redirect HTTP status, then redirect it + let redirected_url = parse_location(resp.headers())? + // no location means invalid redirect response + .ok_or( + Error::new( + ErrorKind::Unexpected, + &format!("no location header in redirect response."), + ) + .with_operation(Operation::Read), + )?; + debug!( + "received statue code 302/307, will redirect request, url: {}", + redirected_url + ); + + // first the url in location should be valid + let redirected_url = Url::parse(redirected_url).map_err(|e| { + Error::new( + ErrorKind::Unexpected, + &format!("redirected url({redirected_url}) is not valid."), + ) + .with_operation(Operation::Read) + .set_source(e) + })?; + + // basic security check, the redirected url should have the same origin with original url + // if not, it will not send request with auth + let path = redirected_url.path(); + // url escape decode to avoid special case + let path = percent_decode_str(path) + .decode_utf8() + .unwrap_or(Cow::from(path)) + .into_owned(); + // if root is the prefix of path, then remove it + // this is for the case that redirect only change the origin of url + override_path = path.strip_prefix(&self.root).unwrap_or(&path).to_string(); + override_endpoint = Some(redirected_url.origin().unicode_serialization()) + } + _ => return Err(parse_error(resp).await?), + } + + remaining_retry_times -= remaining_retry_times + } + return Err( + Error::new( + ErrorKind::Unexpected, + &format!( + "reach maximum retry times for requesting redirected endpoint({:?}), path({})", + override_endpoint, + override_path, + ), + ).with_operation(Operation::Read) + ); } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -472,79 +544,6 @@ impl WebdavBackend { self.client.send(req).await } - // read will send http request for dav access. - // sometimes we need to send request to different endpoint when server side - // indicate we need to redirect to another url. - #[async_recursion] - async fn read( - &self, - path: &str, - args: OpRead, - override_endpoint: Option, - ) -> Result<(RpRead, IncomingAsyncBody)> { - debug!( - "will read: path={}, override_endpoint={:?}", - &path, &override_endpoint - ); - let resp = self - .webdav_get(path, args.range(), override_endpoint) - .await?; - - let status = resp.status(); - - match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - Ok((RpRead::with_metadata(meta), resp.into_body())) - } - StatusCode::FOUND | StatusCode::TEMPORARY_REDIRECT => { - // if server returns redirect HTTP status, then redirect it - let redirected_url = parse_location(resp.headers())? - // no location means invalid redirect response - .ok_or( - Error::new( - ErrorKind::Unexpected, - &format!("no location header in redirect response."), - ) - .with_operation(Operation::Read), - )?; - debug!( - "received statue code 302/307, will redirect request, url: {}", - redirected_url - ); - - // first the url in location should be valid - let redirected_url = Url::parse(redirected_url).map_err(|e| { - Error::new( - ErrorKind::Unexpected, - &format!("redirected url({redirected_url}) is not valid."), - ) - .with_operation(Operation::Read) - .set_source(e) - })?; - - // basic security check, the redirected url should have the same origin with original url - // if not, it will not send request with auth - let path = redirected_url.path(); - // url escape decode to avoid special case - let path = percent_decode_str(path) - .decode_utf8() - .unwrap_or(Cow::from(path)) - .into_owned(); - return self - .read( - // if root is the prefix of path, then remove it - // this is for the case that redirect only change the origin of url - path.strip_prefix(&self.root).unwrap_or(&path), - args, - Some(redirected_url.origin().unicode_serialization()), - ) - .await; - } - _ => Err(parse_error(resp).await?), - } - } - pub async fn webdav_put( &self, abs_path: &str, From c465a450e8fea7fb9383857b9c146cd0dbb85254 Mon Sep 17 00:00:00 2001 From: Yansongsongsong Date: Sun, 14 May 2023 23:39:25 +0800 Subject: [PATCH 30/41] refactor: remove async recursive dep --- core/Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/core/Cargo.toml b/core/Cargo.toml index a1a4b12509c1..28be62b86e42 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -214,7 +214,6 @@ suppaftp = { version = "4.5", default-features = false, features = [ tokio = "1.27" tracing = { version = "0.1", optional = true } uuid = { version = "1", features = ["serde", "v4"] } -async-recursion = "1.0.4" [dev-dependencies] criterion = { version = "0.4", features = ["async", "async_tokio"] } From ecdf0fefda24a43f8701f244f2a8df9ec5363e4d Mon Sep 17 00:00:00 2001 From: Yansongsongsong Date: Sun, 14 May 2023 23:44:22 +0800 Subject: [PATCH 31/41] chore: clippy fix --- Cargo.lock | 12 ------------ core/src/services/webdav/backend.rs | 2 +- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ff3768c2055c..8a986e386195 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -217,17 +217,6 @@ dependencies = [ "event-listener", ] -[[package]] -name = "async-recursion" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.12", -] - [[package]] name = "async-std" version = "1.12.0" @@ -2632,7 +2621,6 @@ version = "0.34.0" dependencies = [ "anyhow", "async-compat", - "async-recursion", "async-tls", "async-trait", "backon", diff --git a/core/src/services/webdav/backend.rs b/core/src/services/webdav/backend.rs index ae0fa729f121..24b9f51894f3 100644 --- a/core/src/services/webdav/backend.rs +++ b/core/src/services/webdav/backend.rs @@ -329,7 +329,7 @@ impl Accessor for WebdavBackend { .ok_or( Error::new( ErrorKind::Unexpected, - &format!("no location header in redirect response."), + "no location header in redirect response.", ) .with_operation(Operation::Read), )?; From ce7f9cf422789b536a1d7abcaab31b86366ed9bc Mon Sep 17 00:00:00 2001 From: Yansongsongsong Date: Sun, 14 May 2023 23:45:35 +0800 Subject: [PATCH 32/41] chore: cargo fmt fix --- core/src/services/webdav/backend.rs | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/core/src/services/webdav/backend.rs b/core/src/services/webdav/backend.rs index 24b9f51894f3..7aa651458cde 100644 --- a/core/src/services/webdav/backend.rs +++ b/core/src/services/webdav/backend.rs @@ -310,7 +310,10 @@ impl Accessor for WebdavBackend { // response let mut override_path: String = path.to_string(); - debug!("will read with maximum {} times to retry: path={}", &path, remaining_retry_times); + debug!( + "will read with maximum {} times to retry: path={}", + &path, remaining_retry_times + ); while remaining_retry_times > 0 { debug!("remaining retry times: {}", remaining_retry_times); let resp = self @@ -331,7 +334,7 @@ impl Accessor for WebdavBackend { ErrorKind::Unexpected, "no location header in redirect response.", ) - .with_operation(Operation::Read), + .with_operation(Operation::Read), )?; debug!( "received statue code 302/307, will redirect request, url: {}", @@ -344,8 +347,8 @@ impl Accessor for WebdavBackend { ErrorKind::Unexpected, &format!("redirected url({redirected_url}) is not valid."), ) - .with_operation(Operation::Read) - .set_source(e) + .with_operation(Operation::Read) + .set_source(e) })?; // basic security check, the redirected url should have the same origin with original url @@ -366,16 +369,14 @@ impl Accessor for WebdavBackend { remaining_retry_times -= remaining_retry_times } - return Err( - Error::new( - ErrorKind::Unexpected, - &format!( - "reach maximum retry times for requesting redirected endpoint({:?}), path({})", - override_endpoint, - override_path, - ), - ).with_operation(Operation::Read) - ); + return Err(Error::new( + ErrorKind::Unexpected, + &format!( + "reach maximum retry times for requesting redirected endpoint({:?}), path({})", + override_endpoint, override_path, + ), + ) + .with_operation(Operation::Read)); } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { From cfac25b13d213757ea489b11f1a994a23eb1ed91 Mon Sep 17 00:00:00 2001 From: Yansongsongsong Date: Mon, 15 May 2023 00:13:52 +0800 Subject: [PATCH 33/41] fix: retry step fix --- core/src/services/webdav/backend.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/src/services/webdav/backend.rs b/core/src/services/webdav/backend.rs index 7aa651458cde..293e878e1681 100644 --- a/core/src/services/webdav/backend.rs +++ b/core/src/services/webdav/backend.rs @@ -27,7 +27,7 @@ use http::HeaderMap; use http::Request; use http::Response; use http::StatusCode; -use log::debug; +use log::{debug, error}; use percent_encoding::percent_decode_str; use reqwest::Url; @@ -302,7 +302,7 @@ impl Accessor for WebdavBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let mut remaining_retry_times = 3; + let mut remaining_retry_times = 10; // if response indicates that we should redirect // then modify this variable let mut override_endpoint: Option = None; @@ -367,7 +367,7 @@ impl Accessor for WebdavBackend { _ => return Err(parse_error(resp).await?), } - remaining_retry_times -= remaining_retry_times + remaining_retry_times -= 1 } return Err(Error::new( ErrorKind::Unexpected, From 2f2a96b16c3809b101270b628f613a12b9abbec3 Mon Sep 17 00:00:00 2001 From: Yansongsongsong Date: Mon, 15 May 2023 00:14:46 +0800 Subject: [PATCH 34/41] fix: remove useless dep --- core/src/services/webdav/backend.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/services/webdav/backend.rs b/core/src/services/webdav/backend.rs index 293e878e1681..27df29d2f35f 100644 --- a/core/src/services/webdav/backend.rs +++ b/core/src/services/webdav/backend.rs @@ -27,7 +27,7 @@ use http::HeaderMap; use http::Request; use http::Response; use http::StatusCode; -use log::{debug, error}; +use log::debug; use percent_encoding::percent_decode_str; use reqwest::Url; From 6687fc1e30e89ac6ad42077fed1d99acc5cf61f3 Mon Sep 17 00:00:00 2001 From: Yansongsongsong Date: Mon, 15 May 2023 00:24:26 +0800 Subject: [PATCH 35/41] fix: clippy fix and typos --- core/src/services/webdav/backend.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/core/src/services/webdav/backend.rs b/core/src/services/webdav/backend.rs index 27df29d2f35f..fd00c0cd83ec 100644 --- a/core/src/services/webdav/backend.rs +++ b/core/src/services/webdav/backend.rs @@ -302,7 +302,7 @@ impl Accessor for WebdavBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let mut remaining_retry_times = 10; + let mut remaining_retry_times = 3; // if response indicates that we should redirect // then modify this variable let mut override_endpoint: Option = None; @@ -329,15 +329,15 @@ impl Accessor for WebdavBackend { // if server returns redirect HTTP status, then redirect it let redirected_url = parse_location(resp.headers())? // no location means invalid redirect response - .ok_or( + .ok_or_else(|| { Error::new( ErrorKind::Unexpected, "no location header in redirect response.", ) - .with_operation(Operation::Read), - )?; + .with_operation(Operation::Read) + })?; debug!( - "received statue code 302/307, will redirect request, url: {}", + "received status code 302/307, will redirect request, url: {}", redirected_url ); @@ -357,7 +357,7 @@ impl Accessor for WebdavBackend { // url escape decode to avoid special case let path = percent_decode_str(path) .decode_utf8() - .unwrap_or(Cow::from(path)) + .unwrap_or_else(|_| Cow::from(path)) .into_owned(); // if root is the prefix of path, then remove it // this is for the case that redirect only change the origin of url @@ -521,7 +521,7 @@ impl WebdavBackend { let p = build_rooted_abs_path(&self.root, path); // user can give one new endpoint to override default endpoint // this case happens when receive redirect response from server - let endpoint = override_endpoint.unwrap_or(self.endpoint.clone()); + let endpoint = override_endpoint.unwrap_or_else(|| self.endpoint.clone()); // if the override endpoint differs from original endpoint // we will not send auth to server due to security issue. let send_auth = endpoint.eq(&self.endpoint); From 4cd0335c38a594cd8249d517694070e0ced3f74a Mon Sep 17 00:00:00 2001 From: Yansongsongsong Date: Mon, 15 May 2023 11:16:18 +0800 Subject: [PATCH 36/41] fix: comment fixes --- core/src/services/webdav/fixtures/nginx.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/services/webdav/fixtures/nginx.conf b/core/src/services/webdav/fixtures/nginx.conf index b3524334f373..1ba753cfe12c 100644 --- a/core/src/services/webdav/fixtures/nginx.conf +++ b/core/src/services/webdav/fixtures/nginx.conf @@ -20,7 +20,7 @@ http { return 302 http://$server_name:8080$request_uri; } client_max_body_size 1024M; - # forward all other requests to port 8081 + # forward all other requests to port 8080 proxy_pass http://127.0.0.1:8080; } } From 0009b969da9fe72c02465669e305d8486868da68 Mon Sep 17 00:00:00 2001 From: Yansongsongsong Date: Thu, 18 May 2023 23:46:33 +0800 Subject: [PATCH 37/41] chores: comment style change --- .github/workflows/service_test_webdav.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/service_test_webdav.yml b/.github/workflows/service_test_webdav.yml index cf2bd7596c7b..384c086c83aa 100644 --- a/.github/workflows/service_test_webdav.yml +++ b/.github/workflows/service_test_webdav.yml @@ -133,8 +133,10 @@ jobs: run: | mkdir -p /tmp/static mkdir -p /var/lib/nginx - chmod a+rw /tmp/static/ # make nginx worker has permission to operate it - sudo chmod 777 /var/lib/nginx/body # make nginx worker has permission to operate it + # make nginx worker has permission to operate it + chmod a+rw /tmp/static/ + # make nginx worker has permission to operate it + sudo chmod 777 /var/lib/nginx/body nginx -c `pwd`/src/services/webdav/fixtures/nginx.conf - name: Test with redirect From 6fb6e1d83cd72e37fae582d6be0f4d596313172d Mon Sep 17 00:00:00 2001 From: Yansongsongsong Date: Mon, 22 May 2023 21:43:54 +0800 Subject: [PATCH 38/41] refactor: revert redirect logic in webdav --- core/src/services/webdav/backend.rs | 94 +++-------------------------- 1 file changed, 10 insertions(+), 84 deletions(-) diff --git a/core/src/services/webdav/backend.rs b/core/src/services/webdav/backend.rs index fd00c0cd83ec..d2da001ae55c 100644 --- a/core/src/services/webdav/backend.rs +++ b/core/src/services/webdav/backend.rs @@ -302,81 +302,15 @@ impl Accessor for WebdavBackend { } async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let mut remaining_retry_times = 3; - // if response indicates that we should redirect - // then modify this variable - let mut override_endpoint: Option = None; - // sometimes path will also changed according to redirect - // response - let mut override_path: String = path.to_string(); - - debug!( - "will read with maximum {} times to retry: path={}", - &path, remaining_retry_times - ); - while remaining_retry_times > 0 { - debug!("remaining retry times: {}", remaining_retry_times); - let resp = self - .webdav_get(&override_path, args.range(), override_endpoint.clone()) - .await?; - let status = resp.status(); - match status { - StatusCode::OK | StatusCode::PARTIAL_CONTENT => { - let meta = parse_into_metadata(path, resp.headers())?; - return Ok((RpRead::with_metadata(meta), resp.into_body())); - } - StatusCode::FOUND | StatusCode::TEMPORARY_REDIRECT => { - // if server returns redirect HTTP status, then redirect it - let redirected_url = parse_location(resp.headers())? - // no location means invalid redirect response - .ok_or_else(|| { - Error::new( - ErrorKind::Unexpected, - "no location header in redirect response.", - ) - .with_operation(Operation::Read) - })?; - debug!( - "received status code 302/307, will redirect request, url: {}", - redirected_url - ); - - // first the url in location should be valid - let redirected_url = Url::parse(redirected_url).map_err(|e| { - Error::new( - ErrorKind::Unexpected, - &format!("redirected url({redirected_url}) is not valid."), - ) - .with_operation(Operation::Read) - .set_source(e) - })?; - - // basic security check, the redirected url should have the same origin with original url - // if not, it will not send request with auth - let path = redirected_url.path(); - // url escape decode to avoid special case - let path = percent_decode_str(path) - .decode_utf8() - .unwrap_or_else(|_| Cow::from(path)) - .into_owned(); - // if root is the prefix of path, then remove it - // this is for the case that redirect only change the origin of url - override_path = path.strip_prefix(&self.root).unwrap_or(&path).to_string(); - override_endpoint = Some(redirected_url.origin().unicode_serialization()) - } - _ => return Err(parse_error(resp).await?), + let resp = self.webdav_get(path, args.range()).await?; + let status = resp.status(); + match status { + StatusCode::OK | StatusCode::PARTIAL_CONTENT => { + let meta = parse_into_metadata(path, resp.headers())?; + Ok((RpRead::with_metadata(meta), resp.into_body())) } - - remaining_retry_times -= 1 + _ => Err(parse_error(resp).await?), } - return Err(Error::new( - ErrorKind::Unexpected, - &format!( - "reach maximum retry times for requesting redirected endpoint({:?}), path({})", - override_endpoint, override_path, - ), - ) - .with_operation(Operation::Read)); } async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { @@ -516,22 +450,14 @@ impl WebdavBackend { &self, path: &str, range: BytesRange, - override_endpoint: Option, ) -> Result> { let p = build_rooted_abs_path(&self.root, path); - // user can give one new endpoint to override default endpoint - // this case happens when receive redirect response from server - let endpoint = override_endpoint.unwrap_or_else(|| self.endpoint.clone()); - // if the override endpoint differs from original endpoint - // we will not send auth to server due to security issue. - let send_auth = endpoint.eq(&self.endpoint); - let url: String = format!("{}{}", endpoint, percent_encode_path(&p)); + let url: String = format!("{}{}", self.endpoint, percent_encode_path(&p)); let mut req = Request::get(&url); - match &self.authorization { - Some(auth) if send_auth => req = req.header(header::AUTHORIZATION, auth.clone()), - _ => (), + if let Some(auth) = &self.authorization { + req = req.header(header::AUTHORIZATION, auth.clone()) } if !range.is_full() { From e1c7e50a3dc2cb484bc484cadd8e97a65f40ba3f Mon Sep 17 00:00:00 2001 From: Yansongsongsong Date: Sun, 28 May 2023 23:28:42 +0800 Subject: [PATCH 39/41] refactor: add redirect handling for http client --- Cargo.lock | 1 + core/Cargo.toml | 1 + core/src/raw/http_util/client.rs | 162 +++++++++++++++++++++++++++- core/src/services/webdav/backend.rs | 5 +- 4 files changed, 162 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8a986e386195..24496e1e77b3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2672,6 +2672,7 @@ dependencies = [ "tracing", "tracing-opentelemetry", "tracing-subscriber", + "url", "uuid", "wiremock", ] diff --git a/core/Cargo.toml b/core/Cargo.toml index 28be62b86e42..f75fbfe902f0 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -203,6 +203,7 @@ reqsign = { version = "0.10.1", default-features = false, optional = true } reqwest = { version = "0.11.13", features = [ "stream", ], default-features = false } +url = { version = "2.2" } # version should follow reqwest rocksdb = { version = "0.20.1", default-features = false, optional = true } serde = { version = "1", features = ["derive"] } serde_json = "1" diff --git a/core/src/raw/http_util/client.rs b/core/src/raw/http_util/client.rs index c6cb38a468a4..af2cb225480c 100644 --- a/core/src/raw/http_util/client.rs +++ b/core/src/raw/http_util/client.rs @@ -21,16 +21,18 @@ use std::mem; use std::str::FromStr; use futures::TryStreamExt; -use http::Request; +use http::{Request, StatusCode, header}; use http::Response; use reqwest::redirect::Policy; -use reqwest::Url; +use url::{Url, ParseError}; +use log::debug; use super::body::IncomingAsyncBody; use super::parse_content_length; use super::AsyncBody; use crate::Error; use crate::ErrorKind; +use crate::raw::parse_location; use crate::Result; /// HttpClient that used across opendal. @@ -64,7 +66,7 @@ impl HttpClient { builder = builder.redirect(Policy::none()); #[cfg(feature = "trust-dns")] - let builder = builder.trust_dns(true); + let builder = builder.trust_dns(true); Ok(Self { client: builder.build().map_err(|err| { @@ -153,4 +155,158 @@ impl HttpClient { Ok(resp) } + + /// Send a request in async way with handling redirection logic. + /// Now we only support redirect GET request. + /// # Arguments + /// * `times` - how many times do you want to send request when we need to handle redirection + pub async fn send_with_redirect(&self, req: Request, times: usize) -> Result> { + if req.method() != http::Method::GET { + // for now we only handle redirection for GET request + // and please note that we don't support stream request either. + return Err( + Error::new(ErrorKind::Unsupported, "redirect for unsupported HTTP method") + .with_operation("http_util::Client::send_with_redirect_async") + .with_context("method", req.method().as_str()) + ); + } + + let mut prev_req = self.clone_request(&req); + let mut prev_resp = self.send(req).await?; + let mut retries = 0; + + let resp = loop { + let status = prev_resp.status(); + // for now we only handle 302/308 for 3xx status + // notice that our redirect logic may not follow the HTTP standard + let should_redirect = match status { + StatusCode::FOUND => { + // theoretically we need to handle following status also: + // - StatusCode::MOVED_PERMANENTLY + // - StatusCode::SEE_OTHER + let mut new_req = self.clone_request(&prev_req); + for header in &[ + header::TRANSFER_ENCODING, + header::CONTENT_ENCODING, + header::CONTENT_TYPE, + header::CONTENT_LENGTH, + ] { + new_req.headers_mut().remove(header); + } + // see https://www.rfc-editor.org/rfc/rfc9110.html#section-15.4.2 + // theoretically for 301, 302 and 303 should change + // original http method to GET except HEAD + // even though we only support GET request for now, + // just in case we support other HTTP method in the future + // add method modification logic here + match new_req.method() { + &http::Method::GET | &http::Method::HEAD => {} + _ => *new_req.method_mut() = http::Method::GET + } + Some(new_req) + } + // theoretically we need to handle following status also: + // - StatusCode::PERMANENT_REDIRECT + StatusCode::TEMPORARY_REDIRECT => Some(self.clone_request(&prev_req)), + _ => None + }; + + retries += 1; + if retries > times || should_redirect.is_none() { + // exceeds maximum retry times or no need to redirect request + // just return last response + debug!("no need to redirect or reach the maximum retry times"); + break prev_resp; + } + debug!("it is the {} time for http client to retry. maximum times: {}", retries, times); + + if let Some(mut redirect_req) = should_redirect { + let prev_url_str = redirect_req.uri().to_string(); + let prev_url = Url::parse(&prev_url_str).map_err(|e| { + Error::new(ErrorKind::Unexpected, "url is not valid") + .with_context("url", &prev_url_str) + .set_source(e) + })?; + + let loc = parse_location(prev_resp.headers())? + // no location means invalid redirect response + .ok_or_else(|| { + debug!("no location headers in response, url: {}, headers: {:?}", + &prev_url_str, &prev_resp.headers()); + Error::new( + ErrorKind::Unexpected, + "no location header in redirect response") + .with_context("method", redirect_req.method().as_str()) + .with_context("url", &prev_url_str) + })?; + + // one url with origin and path + let loc_url = Url::parse(loc).or_else(|err| { + match err { + ParseError::RelativeUrlWithoutBase => { + debug!("redirected location is relative url, will join it to original base url. loc: {}", loc); + let url = prev_url.clone().join(loc).map_err(|err| { + Error::new(ErrorKind::Unexpected, "invalid redirect base url and path") + .with_context("base", &prev_url_str) + .with_context("path", loc) + .set_source(err) + })?; + Ok(url) + } + err => { + Err( + Error::new(ErrorKind::Unexpected, "invalid location header") + .with_context("location", loc) + .set_source(err) + ) + } + } + })?; + + debug!("redirecting '{}' to '{}'", &prev_url_str, loc_url.as_str()); + self.remove_sensitive_headers(&mut redirect_req, &loc_url, &prev_url); + // change the request uri + *redirect_req.uri_mut() = loc_url.as_str().parse().map_err(|err| { + Error::new( + ErrorKind::Unexpected, + "new redirect url is invalid") + .with_context("loc", loc_url.as_str()) + .set_source(err) + })?; + prev_req = self.clone_request(&redirect_req); + prev_resp = self.send(redirect_req).await?; + } + }; + return Ok(resp); + } } + +impl HttpClient { + fn clone_request(&self, req: &Request) -> Request { + let (mut parts, body) = Request::new(match req.body() { + AsyncBody::Empty => AsyncBody::Empty, + AsyncBody::Bytes(bytes) => AsyncBody::Bytes(bytes.clone()) + }).into_parts(); + + // we just ignore extensions of request, because we won't use it + parts.method = req.method().clone(); + parts.uri = req.uri().clone(); + parts.version = req.version().clone(); + parts.headers = req.headers().clone(); + + Request::from_parts(parts, body) + } + + fn remove_sensitive_headers(&self, req: &mut Request, next: &Url, previous: &Url) { + let cross_host = next.host_str() != previous.host_str() + || next.port_or_known_default() != previous.port_or_known_default(); + if cross_host { + let headers = req.headers_mut(); + headers.remove(header::AUTHORIZATION); + headers.remove(header::COOKIE); + headers.remove("cookie2"); + headers.remove(header::PROXY_AUTHORIZATION); + headers.remove(header::WWW_AUTHENTICATE); + } + } +} \ No newline at end of file diff --git a/core/src/services/webdav/backend.rs b/core/src/services/webdav/backend.rs index d2da001ae55c..8e0040d5fee0 100644 --- a/core/src/services/webdav/backend.rs +++ b/core/src/services/webdav/backend.rs @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -use std::borrow::Cow; use std::collections::HashMap; use std::fmt::Debug; use std::fmt::Formatter; @@ -28,8 +27,6 @@ use http::Request; use http::Response; use http::StatusCode; use log::debug; -use percent_encoding::percent_decode_str; -use reqwest::Url; use super::error::parse_error; use super::list_response::Multistatus; @@ -468,7 +465,7 @@ impl WebdavBackend { .body(AsyncBody::Empty) .map_err(new_request_build_error)?; - self.client.send(req).await + self.client.send_with_redirect(req, 5).await } pub async fn webdav_put( From 76385299863583183dc630568d7a1c69697c1839 Mon Sep 17 00:00:00 2001 From: Yansongsongsong Date: Sun, 28 May 2023 23:31:50 +0800 Subject: [PATCH 40/41] fix: cargo clippy --- core/src/raw/http_util/client.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/raw/http_util/client.rs b/core/src/raw/http_util/client.rs index af2cb225480c..8d6b51c712bd 100644 --- a/core/src/raw/http_util/client.rs +++ b/core/src/raw/http_util/client.rs @@ -277,7 +277,7 @@ impl HttpClient { prev_resp = self.send(redirect_req).await?; } }; - return Ok(resp); + Ok(resp) } } @@ -291,7 +291,7 @@ impl HttpClient { // we just ignore extensions of request, because we won't use it parts.method = req.method().clone(); parts.uri = req.uri().clone(); - parts.version = req.version().clone(); + parts.version = req.version(); parts.headers = req.headers().clone(); Request::from_parts(parts, body) From e8614435ebf34d54164e9616ed7b845f9ae36a70 Mon Sep 17 00:00:00 2001 From: Yansongsongsong Date: Sun, 28 May 2023 23:33:29 +0800 Subject: [PATCH 41/41] chore: fix format --- core/src/raw/http_util/client.rs | 61 +++++++++++++++++++------------- 1 file changed, 36 insertions(+), 25 deletions(-) diff --git a/core/src/raw/http_util/client.rs b/core/src/raw/http_util/client.rs index 8d6b51c712bd..80a998d02863 100644 --- a/core/src/raw/http_util/client.rs +++ b/core/src/raw/http_util/client.rs @@ -21,18 +21,18 @@ use std::mem; use std::str::FromStr; use futures::TryStreamExt; -use http::{Request, StatusCode, header}; use http::Response; -use reqwest::redirect::Policy; -use url::{Url, ParseError}; +use http::{header, Request, StatusCode}; use log::debug; +use reqwest::redirect::Policy; +use url::{ParseError, Url}; use super::body::IncomingAsyncBody; use super::parse_content_length; use super::AsyncBody; +use crate::raw::parse_location; use crate::Error; use crate::ErrorKind; -use crate::raw::parse_location; use crate::Result; /// HttpClient that used across opendal. @@ -66,7 +66,7 @@ impl HttpClient { builder = builder.redirect(Policy::none()); #[cfg(feature = "trust-dns")] - let builder = builder.trust_dns(true); + let builder = builder.trust_dns(true); Ok(Self { client: builder.build().map_err(|err| { @@ -160,15 +160,20 @@ impl HttpClient { /// Now we only support redirect GET request. /// # Arguments /// * `times` - how many times do you want to send request when we need to handle redirection - pub async fn send_with_redirect(&self, req: Request, times: usize) -> Result> { + pub async fn send_with_redirect( + &self, + req: Request, + times: usize, + ) -> Result> { if req.method() != http::Method::GET { // for now we only handle redirection for GET request // and please note that we don't support stream request either. - return Err( - Error::new(ErrorKind::Unsupported, "redirect for unsupported HTTP method") - .with_operation("http_util::Client::send_with_redirect_async") - .with_context("method", req.method().as_str()) - ); + return Err(Error::new( + ErrorKind::Unsupported, + "redirect for unsupported HTTP method", + ) + .with_operation("http_util::Client::send_with_redirect_async") + .with_context("method", req.method().as_str())); } let mut prev_req = self.clone_request(&req); @@ -201,14 +206,14 @@ impl HttpClient { // add method modification logic here match new_req.method() { &http::Method::GET | &http::Method::HEAD => {} - _ => *new_req.method_mut() = http::Method::GET + _ => *new_req.method_mut() = http::Method::GET, } Some(new_req) } // theoretically we need to handle following status also: // - StatusCode::PERMANENT_REDIRECT StatusCode::TEMPORARY_REDIRECT => Some(self.clone_request(&prev_req)), - _ => None + _ => None, }; retries += 1; @@ -218,7 +223,10 @@ impl HttpClient { debug!("no need to redirect or reach the maximum retry times"); break prev_resp; } - debug!("it is the {} time for http client to retry. maximum times: {}", retries, times); + debug!( + "it is the {} time for http client to retry. maximum times: {}", + retries, times + ); if let Some(mut redirect_req) = should_redirect { let prev_url_str = redirect_req.uri().to_string(); @@ -231,13 +239,17 @@ impl HttpClient { let loc = parse_location(prev_resp.headers())? // no location means invalid redirect response .ok_or_else(|| { - debug!("no location headers in response, url: {}, headers: {:?}", - &prev_url_str, &prev_resp.headers()); + debug!( + "no location headers in response, url: {}, headers: {:?}", + &prev_url_str, + &prev_resp.headers() + ); Error::new( ErrorKind::Unexpected, - "no location header in redirect response") - .with_context("method", redirect_req.method().as_str()) - .with_context("url", &prev_url_str) + "no location header in redirect response", + ) + .with_context("method", redirect_req.method().as_str()) + .with_context("url", &prev_url_str) })?; // one url with origin and path @@ -267,9 +279,7 @@ impl HttpClient { self.remove_sensitive_headers(&mut redirect_req, &loc_url, &prev_url); // change the request uri *redirect_req.uri_mut() = loc_url.as_str().parse().map_err(|err| { - Error::new( - ErrorKind::Unexpected, - "new redirect url is invalid") + Error::new(ErrorKind::Unexpected, "new redirect url is invalid") .with_context("loc", loc_url.as_str()) .set_source(err) })?; @@ -285,8 +295,9 @@ impl HttpClient { fn clone_request(&self, req: &Request) -> Request { let (mut parts, body) = Request::new(match req.body() { AsyncBody::Empty => AsyncBody::Empty, - AsyncBody::Bytes(bytes) => AsyncBody::Bytes(bytes.clone()) - }).into_parts(); + AsyncBody::Bytes(bytes) => AsyncBody::Bytes(bytes.clone()), + }) + .into_parts(); // we just ignore extensions of request, because we won't use it parts.method = req.method().clone(); @@ -309,4 +320,4 @@ impl HttpClient { headers.remove(header::WWW_AUTHENTICATE); } } -} \ No newline at end of file +}