From 12a0a7a3389a7718963e84dc8eaaf891cd01fc1a Mon Sep 17 00:00:00 2001 From: Alexey Matveev <> Date: Sat, 18 Apr 2020 09:29:52 +0300 Subject: [PATCH 01/16] remote: add support for WebDAV Fixes #1153 --- dvc/remote/__init__.py | 4 ++++ dvc/remote/base.py | 8 +++---- dvc/remote/webdav.py | 53 ++++++++++++++++++++++++++++++++++++++++++ dvc/remote/webdavs.py | 6 +++++ dvc/scheme.py | 2 ++ 5 files changed, 69 insertions(+), 4 deletions(-) create mode 100644 dvc/remote/webdav.py create mode 100644 dvc/remote/webdavs.py diff --git a/dvc/remote/__init__.py b/dvc/remote/__init__.py index 6c2de0a057..76c0047272 100644 --- a/dvc/remote/__init__.py +++ b/dvc/remote/__init__.py @@ -11,6 +11,8 @@ from dvc.remote.oss import RemoteOSS from dvc.remote.s3 import RemoteS3 from dvc.remote.ssh import RemoteSSH +from dvc.remote.webdav import RemoteWEBDAV +from dvc.remote.webdavs import RemoteWEBDAVS REMOTES = [ @@ -23,6 +25,8 @@ RemoteS3, RemoteSSH, RemoteOSS, + RemoteWEBDAV, + RemoteWEBDAVS, # NOTE: RemoteLOCAL is the default ] diff --git a/dvc/remote/base.py b/dvc/remote/base.py index 93e22cdc2e..0384268354 100644 --- a/dvc/remote/base.py +++ b/dvc/remote/base.py @@ -552,8 +552,8 @@ def upload(self, from_info, to_info, name=None, no_progress_bar=False): if not hasattr(self, "_upload"): raise RemoteActionNotImplemented("upload", self.scheme) - if to_info.scheme != self.scheme: - raise NotImplementedError + # if to_info.scheme != self.scheme: + # raise NotImplementedError if from_info.scheme != "local": raise NotImplementedError @@ -588,8 +588,8 @@ def download( if not hasattr(self, "_download"): raise RemoteActionNotImplemented("download", self.scheme) - if from_info.scheme != self.scheme: - raise NotImplementedError + # if from_info.scheme != self.scheme: + # raise NotImplementedError if to_info.scheme == self.scheme != "local": self.copy(from_info, to_info) diff --git a/dvc/remote/webdav.py b/dvc/remote/webdav.py new file mode 100644 index 0000000000..54618c1285 --- /dev/null +++ b/dvc/remote/webdav.py @@ -0,0 +1,53 @@ +from .http import RemoteHTTP, ask_password +from dvc.scheme import Schemes + +import os.path + +from dvc.progress import Tqdm + + +class RemoteWEBDAV(RemoteHTTP): + scheme = Schemes.WEBDAV + + def __init__(self, repo, config): + super().__init__(repo, config) + + url = config.get("url") + if url: + self.path_info = self.path_cls(url) + self.path_info.scheme = self.path_info.scheme.replace( + "webdav", "http") + user = config.get("user", None) + if user: + self.path_info.user = user + else: + self.path_info = None + + self.auth = config.get("auth", None) + self.custom_auth_header = config.get("custom_auth_header", None) + self.password = config.get("password", None) + self.ask_password = config.get("ask_password", False) + self.headers = {} + + def _upload(self, from_file, to_info, name=None, no_progress_bar=False): + def chunks(): + with open(from_file, "rb") as fd: + with Tqdm.wrapattr( + fd, + "read", + total=None + if no_progress_bar + else os.path.getsize(from_file), + leave=False, + desc=to_info.url if name is None else name, + disable=no_progress_bar, + ) as fd_wrapped: + while True: + chunk = fd_wrapped.read(self.CHUNK_SIZE) + if not chunk: + break + yield chunk + + response = self._request("PUT", to_info.url, data=chunks()) + if response.status_code not in (200, 201): + raise HTTPError(response.status_code, response.reason) diff --git a/dvc/remote/webdavs.py b/dvc/remote/webdavs.py new file mode 100644 index 0000000000..ec4d934d5c --- /dev/null +++ b/dvc/remote/webdavs.py @@ -0,0 +1,6 @@ +from .webdav import RemoteWEBDAV +from dvc.scheme import Schemes + + +class RemoteWEBDAVS(RemoteWEBDAV): + scheme = Schemes.WEBDAVS diff --git a/dvc/scheme.py b/dvc/scheme.py index e64e24f5ac..76c6d7a497 100644 --- a/dvc/scheme.py +++ b/dvc/scheme.py @@ -9,3 +9,5 @@ class Schemes: GDRIVE = "gdrive" LOCAL = "local" OSS = "oss" + WEBDAV = "webdav" + WEBDAVS = "webdavs" From 9e8a97be73e2e89f9da0a02f4b879544f948aa51 Mon Sep 17 00:00:00 2001 From: Alexey Matveev <> Date: Sat, 18 Apr 2020 09:55:19 +0300 Subject: [PATCH 02/16] remote: WebDav Fix deepsource error --- dvc/remote/webdav.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dvc/remote/webdav.py b/dvc/remote/webdav.py index 54618c1285..24162551dd 100644 --- a/dvc/remote/webdav.py +++ b/dvc/remote/webdav.py @@ -1,9 +1,10 @@ -from .http import RemoteHTTP, ask_password +from .http import RemoteHTTP from dvc.scheme import Schemes import os.path from dvc.progress import Tqdm +from dvc.exceptions import HTTPError class RemoteWEBDAV(RemoteHTTP): From 5dc4dd802cb60f123e46af976b7569bf2ee8183f Mon Sep 17 00:00:00 2001 From: Alexey Matveev <> Date: Mon, 20 Apr 2020 11:29:35 +0300 Subject: [PATCH 03/16] remote: WebDav Fix: base to the original state. Add WebdavURLInfo. Creating intermediate collections. --- dvc/remote/base.py | 8 ++--- dvc/remote/webdav.py | 74 ++++++++++++++++++++++++++++++-------------- 2 files changed, 55 insertions(+), 27 deletions(-) diff --git a/dvc/remote/base.py b/dvc/remote/base.py index 0384268354..93e22cdc2e 100644 --- a/dvc/remote/base.py +++ b/dvc/remote/base.py @@ -552,8 +552,8 @@ def upload(self, from_info, to_info, name=None, no_progress_bar=False): if not hasattr(self, "_upload"): raise RemoteActionNotImplemented("upload", self.scheme) - # if to_info.scheme != self.scheme: - # raise NotImplementedError + if to_info.scheme != self.scheme: + raise NotImplementedError if from_info.scheme != "local": raise NotImplementedError @@ -588,8 +588,8 @@ def download( if not hasattr(self, "_download"): raise RemoteActionNotImplemented("download", self.scheme) - # if from_info.scheme != self.scheme: - # raise NotImplementedError + if from_info.scheme != self.scheme: + raise NotImplementedError if to_info.scheme == self.scheme != "local": self.copy(from_info, to_info) diff --git a/dvc/remote/webdav.py b/dvc/remote/webdav.py index 24162551dd..41135246ed 100644 --- a/dvc/remote/webdav.py +++ b/dvc/remote/webdav.py @@ -1,34 +1,49 @@ -from .http import RemoteHTTP -from dvc.scheme import Schemes - +import copy import os.path +from funcy import cached_property + +from .http import RemoteHTTP +from dvc.scheme import Schemes +from dvc.path_info import HTTPURLInfo from dvc.progress import Tqdm from dvc.exceptions import HTTPError +class WebdavURLInfo(HTTPURLInfo): + def __init__(self, url): + super().__init__(url) + + @cached_property + def url(self): + return "{}://{}{}{}{}{}".format( + self.scheme.replace("webdav", "http"), + self.netloc, + self._spath, + (";" + self.params) if self.params else "", + ("?" + self.query) if self.query else "", + ("#" + self.fragment) if self.fragment else "", + ) + + def get_collections(self) -> list: + def pcol(path): + return "{}://{}{}".format( + self.scheme.replace("webdav", "http"), + self.netloc, + path, + ) + p = self.path.split("/")[1:-1] + if not p: + return [] + r = [] + for i in range(len(p)): + r.append(pcol("/{}/".format("/".join(p[:i + 1])))) + return r + + class RemoteWEBDAV(RemoteHTTP): scheme = Schemes.WEBDAV - - def __init__(self, repo, config): - super().__init__(repo, config) - - url = config.get("url") - if url: - self.path_info = self.path_cls(url) - self.path_info.scheme = self.path_info.scheme.replace( - "webdav", "http") - user = config.get("user", None) - if user: - self.path_info.user = user - else: - self.path_info = None - - self.auth = config.get("auth", None) - self.custom_auth_header = config.get("custom_auth_header", None) - self.password = config.get("password", None) - self.ask_password = config.get("ask_password", False) - self.headers = {} + path_cls = WebdavURLInfo def _upload(self, from_file, to_info, name=None, no_progress_bar=False): def chunks(): @@ -49,6 +64,19 @@ def chunks(): break yield chunk + self._create_collections(to_info) response = self._request("PUT", to_info.url, data=chunks()) if response.status_code not in (200, 201): raise HTTPError(response.status_code, response.reason) + + def _create_collections(self, to_info): + url_cols = to_info.get_collections() + i = len(url_cols) + while i > 0: + if bool(self._request("HEAD", url_cols[i - 1])): + break + i -= 1 + for i in range(i, len(url_cols)): + response = self._request("MKCOL", url_cols[i]) + if response.status_code not in (200, 201): + raise HTTPError(response.status_code, response.reason) From 4d0d7bb7f49870b4c003b89cef29b6949f971345 Mon Sep 17 00:00:00 2001 From: Alexey Matveev <> Date: Mon, 20 Apr 2020 11:57:33 +0300 Subject: [PATCH 04/16] remote: WebDav Fix build error --- dvc/remote/webdav.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/dvc/remote/webdav.py b/dvc/remote/webdav.py index 41135246ed..e21c56f421 100644 --- a/dvc/remote/webdav.py +++ b/dvc/remote/webdav.py @@ -1,4 +1,3 @@ -import copy import os.path from funcy import cached_property @@ -71,11 +70,9 @@ def chunks(): def _create_collections(self, to_info): url_cols = to_info.get_collections() - i = len(url_cols) - while i > 0: + for i in reversed(range(len(url_cols) + 1)): if bool(self._request("HEAD", url_cols[i - 1])): break - i -= 1 for i in range(i, len(url_cols)): response = self._request("MKCOL", url_cols[i]) if response.status_code not in (200, 201): From 441e9f4dc28f7d1e8063b8fa85338eb0483b7214 Mon Sep 17 00:00:00 2001 From: Alexey Matveev <> Date: Mon, 20 Apr 2020 20:46:57 +0300 Subject: [PATCH 05/16] remote: WebDav Fix deepsource error --- dvc/remote/webdav.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/dvc/remote/webdav.py b/dvc/remote/webdav.py index e21c56f421..4e87c256d4 100644 --- a/dvc/remote/webdav.py +++ b/dvc/remote/webdav.py @@ -70,10 +70,21 @@ def chunks(): def _create_collections(self, to_info): url_cols = to_info.get_collections() + from_i = 0 for i in reversed(range(len(url_cols) + 1)): + from_i = i if bool(self._request("HEAD", url_cols[i - 1])): break - for i in range(i, len(url_cols)): + for i in range(from_i, len(url_cols)): response = self._request("MKCOL", url_cols[i]) if response.status_code not in (200, 201): raise HTTPError(response.status_code, response.reason) + + def gc(self): + raise NotImplementedError + + def list_cache_paths(self, prefix=None, progress_callback=None): + raise NotImplementedError + + def walk_files(self, path_info): + raise NotImplementedError From 74e8f53f79dddb7db24448bfab889603e6409ae0 Mon Sep 17 00:00:00 2001 From: Alexey Matveev <> Date: Mon, 20 Apr 2020 20:49:02 +0300 Subject: [PATCH 06/16] remote: WebDav Fix deepseource error (webdavs) --- dvc/remote/webdavs.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/dvc/remote/webdavs.py b/dvc/remote/webdavs.py index ec4d934d5c..1302123340 100644 --- a/dvc/remote/webdavs.py +++ b/dvc/remote/webdavs.py @@ -4,3 +4,12 @@ class RemoteWEBDAVS(RemoteWEBDAV): scheme = Schemes.WEBDAVS + + def gc(self): + raise NotImplementedError + + def list_cache_paths(self, prefix=None, progress_callback=None): + raise NotImplementedError + + def walk_files(self, path_info): + raise NotImplementedError From a997811347baf17d004fa714ed749b5a41083ae7 Mon Sep 17 00:00:00 2001 From: Alexey Matveev <> Date: Mon, 20 Apr 2020 21:06:53 +0300 Subject: [PATCH 07/16] remote: WebDav Fix codeclimat --- dvc/remote/webdav.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dvc/remote/webdav.py b/dvc/remote/webdav.py index 4e87c256d4..e57d9015c3 100644 --- a/dvc/remote/webdav.py +++ b/dvc/remote/webdav.py @@ -70,13 +70,13 @@ def chunks(): def _create_collections(self, to_info): url_cols = to_info.get_collections() - from_i = 0 - for i in reversed(range(len(url_cols) + 1)): - from_i = i - if bool(self._request("HEAD", url_cols[i - 1])): + from_idx = 0 + for idx in reversed(range(len(url_cols) + 1)): + from_idx = idx + if bool(self._request("HEAD", url_cols[idx - 1])): break - for i in range(from_i, len(url_cols)): - response = self._request("MKCOL", url_cols[i]) + for idx in range(from_idx, len(url_cols)): + response = self._request("MKCOL", url_cols[idx]) if response.status_code not in (200, 201): raise HTTPError(response.status_code, response.reason) From a057c1a71940ddbebb788399c77530bba95b090b Mon Sep 17 00:00:00 2001 From: Alexey Matveev <> Date: Mon, 20 Apr 2020 22:05:57 +0300 Subject: [PATCH 08/16] remote: WebDav Fix black code style --- dvc/remote/webdav.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dvc/remote/webdav.py b/dvc/remote/webdav.py index e57d9015c3..7348befea2 100644 --- a/dvc/remote/webdav.py +++ b/dvc/remote/webdav.py @@ -27,16 +27,15 @@ def url(self): def get_collections(self) -> list: def pcol(path): return "{}://{}{}".format( - self.scheme.replace("webdav", "http"), - self.netloc, - path, + self.scheme.replace("webdav", "http"), self.netloc, path, ) + p = self.path.split("/")[1:-1] if not p: return [] r = [] for i in range(len(p)): - r.append(pcol("/{}/".format("/".join(p[:i + 1])))) + r.append(pcol("/{}/".format("/".join(p[: i + 1])))) return r From 87b2d233812e2e1c7bb30863a871a05a3fba7047 Mon Sep 17 00:00:00 2001 From: Alexey Matveev <> Date: Tue, 21 Apr 2020 23:03:37 +0300 Subject: [PATCH 09/16] remote: WebDav Add tests --- dvc/path_info.py | 30 +++++++++++++++++++++++++++++ dvc/remote/webdav.py | 32 +------------------------------ tests/unit/remote/test_webdav.py | 19 ++++++++++++++++++ tests/unit/test_path_info.py | 33 ++++++++++++++++++++++++++++++++ tests/utils/httpd.py | 23 ++++++++++++++++++++++ 5 files changed, 106 insertions(+), 31 deletions(-) create mode 100644 tests/unit/remote/test_webdav.py diff --git a/dvc/path_info.py b/dvc/path_info.py index e502130aa8..ffdedfefe8 100644 --- a/dvc/path_info.py +++ b/dvc/path_info.py @@ -312,3 +312,33 @@ def __eq__(self, other): and self._path == other._path and self._extra_parts == other._extra_parts ) + + +class WebdavURLInfo(HTTPURLInfo): + def __init__(self, url): + super().__init__(url) + + @cached_property + def url(self): + return "{}://{}{}{}{}{}".format( + self.scheme.replace("webdav", "http"), + self.netloc, + self._spath, + (";" + self.params) if self.params else "", + ("?" + self.query) if self.query else "", + ("#" + self.fragment) if self.fragment else "", + ) + + def get_collections(self) -> list: + def pcol(path): + return "{}://{}{}".format( + self.scheme.replace("webdav", "http"), self.netloc, path, + ) + + p = self.path.split("/")[1:-1] + if not p: + return [] + r = [] + for i in range(len(p)): + r.append(pcol("/{}/".format("/".join(p[: i + 1])))) + return r diff --git a/dvc/remote/webdav.py b/dvc/remote/webdav.py index 7348befea2..b1709f925b 100644 --- a/dvc/remote/webdav.py +++ b/dvc/remote/webdav.py @@ -4,39 +4,9 @@ from .http import RemoteHTTP from dvc.scheme import Schemes -from dvc.path_info import HTTPURLInfo from dvc.progress import Tqdm from dvc.exceptions import HTTPError - - -class WebdavURLInfo(HTTPURLInfo): - def __init__(self, url): - super().__init__(url) - - @cached_property - def url(self): - return "{}://{}{}{}{}{}".format( - self.scheme.replace("webdav", "http"), - self.netloc, - self._spath, - (";" + self.params) if self.params else "", - ("?" + self.query) if self.query else "", - ("#" + self.fragment) if self.fragment else "", - ) - - def get_collections(self) -> list: - def pcol(path): - return "{}://{}{}".format( - self.scheme.replace("webdav", "http"), self.netloc, path, - ) - - p = self.path.split("/")[1:-1] - if not p: - return [] - r = [] - for i in range(len(p)): - r.append(pcol("/{}/".format("/".join(p[: i + 1])))) - return r +from dvc.path_info import WebdavURLInfo class RemoteWEBDAV(RemoteHTTP): diff --git a/tests/unit/remote/test_webdav.py b/tests/unit/remote/test_webdav.py new file mode 100644 index 0000000000..bfbbdfb44e --- /dev/null +++ b/tests/unit/remote/test_webdav.py @@ -0,0 +1,19 @@ +import pytest + +from dvc.exceptions import HTTPError +from dvc.path_info import WebdavURLInfo +from dvc.remote.webdav import RemoteWEBDAV +from tests.utils.httpd import StaticFileServer, WebDavSimpleHandler + + +def test_create_collections(dvc): + with StaticFileServer(handler_class=WebDavSimpleHandler) as httpd: + url = "webdav://localhost:{}/a/b/file.txt".format(httpd.server_port) + config = {"url": url} + + remote = RemoteWEBDAV(dvc, config) + + remote._create_collections(WebdavURLInfo(url)) + + with pytest.raises(HTTPError): + remote._create_collections(WebdavURLInfo(url + "/check")) diff --git a/tests/unit/test_path_info.py b/tests/unit/test_path_info.py index 0b202fa124..f6c8be2056 100644 --- a/tests/unit/test_path_info.py +++ b/tests/unit/test_path_info.py @@ -7,6 +7,7 @@ from dvc.path_info import HTTPURLInfo from dvc.path_info import PathInfo from dvc.path_info import URLInfo +from dvc.path_info import WebdavURLInfo TEST_DEPTH = len(pathlib.Path(__file__).parents) + 1 @@ -89,3 +90,35 @@ def test_https_url_info_str(): def test_path_info_as_posix(mocker, path, as_posix, osname): mocker.patch("os.name", osname) assert PathInfo(path).as_posix() == as_posix + + +@pytest.mark.parametrize("cls", [WebdavURLInfo]) +def test_webdav_url_info_str(cls): + u1 = cls("webdav://test.com/t1") + u2 = cls("webdavs://test.com/t1") + assert u1.url == "http://test.com/t1" + assert u2.url == "https://test.com/t1" + + +@pytest.mark.parametrize("cls", [WebdavURLInfo]) +def test_webdav_collections_path(cls): + u = cls("webdav://test.com/t1") + assert u.get_collections() == [] + + u = cls("webdav://test.com/") + assert u.get_collections() == [] + + u = cls("webdav://test.com") + assert u.get_collections() == [] + + u = cls("webdav://test.com/t1/") + assert u.get_collections() == ["http://test.com/t1/"] + + u = cls("webdav://test.com/t1/check") + assert u.get_collections() == ["http://test.com/t1/"] + + u = cls("webdav://test.com/t1/t2/check") + assert u.get_collections() == [ + "http://test.com/t1/", + "http://test.com/t1/t2/", + ] diff --git a/tests/utils/httpd.py b/tests/utils/httpd.py index 378bb75b3f..7b548228d5 100644 --- a/tests/utils/httpd.py +++ b/tests/utils/httpd.py @@ -65,6 +65,29 @@ def do_POST(self): self.end_headers() +class WebDavSimpleHandler(SimpleHTTPRequestHandler): + """ + paths: + /a/ - exists + other - not exists + created path: "/a/b/" + """ + + def do_HEAD(self): + if self.path in ["/a/"]: + self.send_response(HTTPStatus.OK) + else: + self.send_response(HTTPStatus.BAD_REQUEST) + self.end_headers() + + def do_MKCOL(self): + if self.path in ["/a/b/"]: + self.send_response(HTTPStatus.CREATED) + else: + self.send_response(HTTPStatus.BAD_REQUEST) + self.end_headers() + + class StaticFileServer: _lock = threading.Lock() From 0908005154f8aea7ceacda0e31b898ac4ca7f4bc Mon Sep 17 00:00:00 2001 From: Alexey Matveev <> Date: Tue, 21 Apr 2020 23:07:09 +0300 Subject: [PATCH 10/16] remote: WebDav Fix deepsource --- dvc/remote/webdav.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/dvc/remote/webdav.py b/dvc/remote/webdav.py index b1709f925b..02199d911c 100644 --- a/dvc/remote/webdav.py +++ b/dvc/remote/webdav.py @@ -1,7 +1,5 @@ import os.path -from funcy import cached_property - from .http import RemoteHTTP from dvc.scheme import Schemes from dvc.progress import Tqdm From df3dd7bfcb343c9fa8e6908d37adbf587d42f758 Mon Sep 17 00:00:00 2001 From: Alexey Matveev <> Date: Wed, 22 Apr 2020 08:31:19 +0300 Subject: [PATCH 11/16] remote: WebDav Fix tests - delete parametrization --- tests/unit/test_path_info.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/tests/unit/test_path_info.py b/tests/unit/test_path_info.py index f6c8be2056..95e4a9fb70 100644 --- a/tests/unit/test_path_info.py +++ b/tests/unit/test_path_info.py @@ -92,32 +92,30 @@ def test_path_info_as_posix(mocker, path, as_posix, osname): assert PathInfo(path).as_posix() == as_posix -@pytest.mark.parametrize("cls", [WebdavURLInfo]) -def test_webdav_url_info_str(cls): - u1 = cls("webdav://test.com/t1") - u2 = cls("webdavs://test.com/t1") +def test_webdav_url_info_str(): + u1 = WebdavURLInfo("webdav://test.com/t1") + u2 = WebdavURLInfo("webdavs://test.com/t1") assert u1.url == "http://test.com/t1" assert u2.url == "https://test.com/t1" -@pytest.mark.parametrize("cls", [WebdavURLInfo]) -def test_webdav_collections_path(cls): - u = cls("webdav://test.com/t1") +def test_webdav_collections_path(): + u = WebdavURLInfo("webdav://test.com/t1") assert u.get_collections() == [] - u = cls("webdav://test.com/") + u = WebdavURLInfo("webdav://test.com/") assert u.get_collections() == [] - u = cls("webdav://test.com") + u = WebdavURLInfo("webdav://test.com") assert u.get_collections() == [] - u = cls("webdav://test.com/t1/") + u = WebdavURLInfo("webdav://test.com/t1/") assert u.get_collections() == ["http://test.com/t1/"] - u = cls("webdav://test.com/t1/check") + u = WebdavURLInfo("webdav://test.com/t1/check") assert u.get_collections() == ["http://test.com/t1/"] - u = cls("webdav://test.com/t1/t2/check") + u = WebdavURLInfo("webdav://test.com/t1/t2/check") assert u.get_collections() == [ "http://test.com/t1/", "http://test.com/t1/t2/", From 4110f0ef43bae02935d46890b454786c0fb4d33c Mon Sep 17 00:00:00 2001 From: Alexey Matveev <> Date: Thu, 23 Apr 2020 17:17:12 +0300 Subject: [PATCH 12/16] remote: WebDav miss, add scheme in config file --- dvc/config.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dvc/config.py b/dvc/config.py index 1fc63acbb3..9cc7e80f71 100644 --- a/dvc/config.py +++ b/dvc/config.py @@ -184,6 +184,8 @@ class RelPath(str): }, "http": {**HTTP_COMMON, **REMOTE_COMMON}, "https": {**HTTP_COMMON, **REMOTE_COMMON}, + "webdav": {**HTTP_COMMON, **REMOTE_COMMON}, + "webdavs": {**HTTP_COMMON, **REMOTE_COMMON}, "remote": {str: object}, # Any of the above options are valid } ) From 52e8d5af1a23f1bd450fe798cc91794988cf9c12 Mon Sep 17 00:00:00 2001 From: Alexey Matveev <> Date: Thu, 23 Apr 2020 23:36:15 +0300 Subject: [PATCH 13/16] remote: WebDav If MKCOL returned an error and the path already exists - continue --- dvc/remote/webdav.py | 3 +++ tests/unit/remote/test_webdav.py | 9 +++++---- tests/utils/httpd.py | 17 ++++++++--------- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/dvc/remote/webdav.py b/dvc/remote/webdav.py index 02199d911c..33f397d07b 100644 --- a/dvc/remote/webdav.py +++ b/dvc/remote/webdav.py @@ -10,6 +10,7 @@ class RemoteWEBDAV(RemoteHTTP): scheme = Schemes.WEBDAV path_cls = WebdavURLInfo + REQUEST_TIMEOUT = 20 def _upload(self, from_file, to_info, name=None, no_progress_bar=False): def chunks(): @@ -45,6 +46,8 @@ def _create_collections(self, to_info): for idx in range(from_idx, len(url_cols)): response = self._request("MKCOL", url_cols[idx]) if response.status_code not in (200, 201): + if bool(self._request("HEAD", url_cols[idx])): + continue raise HTTPError(response.status_code, response.reason) def gc(self): diff --git a/tests/unit/remote/test_webdav.py b/tests/unit/remote/test_webdav.py index bfbbdfb44e..539ae7b406 100644 --- a/tests/unit/remote/test_webdav.py +++ b/tests/unit/remote/test_webdav.py @@ -8,12 +8,13 @@ def test_create_collections(dvc): with StaticFileServer(handler_class=WebDavSimpleHandler) as httpd: - url = "webdav://localhost:{}/a/b/file.txt".format(httpd.server_port) - config = {"url": url} + url0 = "webdav://localhost:{}/a/b/file.txt".format(httpd.server_port) + url1 = "webdav://localhost:{}/a/c/file.txt".format(httpd.server_port) + config = {"url": url0} remote = RemoteWEBDAV(dvc, config) - remote._create_collections(WebdavURLInfo(url)) + remote._create_collections(WebdavURLInfo(url0)) with pytest.raises(HTTPError): - remote._create_collections(WebdavURLInfo(url + "/check")) + remote._create_collections(WebdavURLInfo(url1)) diff --git a/tests/utils/httpd.py b/tests/utils/httpd.py index 7b548228d5..f0a48b6f13 100644 --- a/tests/utils/httpd.py +++ b/tests/utils/httpd.py @@ -66,23 +66,22 @@ def do_POST(self): class WebDavSimpleHandler(SimpleHTTPRequestHandler): - """ - paths: - /a/ - exists - other - not exists - created path: "/a/b/" - """ - def do_HEAD(self): - if self.path in ["/a/"]: + if self.path == "/a/": + self.send_response(HTTPStatus.OK) + elif self.path == "/a/b/": self.send_response(HTTPStatus.OK) + elif self.path == "/a/c/": + self.send_response(HTTPStatus.BAD_REQUEST) else: self.send_response(HTTPStatus.BAD_REQUEST) self.end_headers() def do_MKCOL(self): - if self.path in ["/a/b/"]: + if self.path == "/a/b/": self.send_response(HTTPStatus.CREATED) + elif self.path == "/a/c/": + self.send_response(HTTPStatus.BAD_REQUEST) else: self.send_response(HTTPStatus.BAD_REQUEST) self.end_headers() From 3411f345f0b5923cc6c4aa0ae99198e3dbe94d41 Mon Sep 17 00:00:00 2001 From: Alexey Matveev <> Date: Sun, 26 Apr 2020 18:34:24 +0300 Subject: [PATCH 14/16] remote WebDav: add to DEFAULT_PORTS --- dvc/path_info.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dvc/path_info.py b/dvc/path_info.py index ffdedfefe8..9d906c0302 100644 --- a/dvc/path_info.py +++ b/dvc/path_info.py @@ -104,7 +104,14 @@ def __repr__(self): class URLInfo(_BasePath): - DEFAULT_PORTS = {"http": 80, "https": 443, "ssh": 22, "hdfs": 0} + DEFAULT_PORTS = { + "http": 80, + "https": 443, + "ssh": 22, + "hdfs": 0, + "webdav": 80, + "webdavs": 443, + } def __init__(self, url): p = urlparse(url) From 0d3424e223e5b432a737d710480d51d2eb31c872 Mon Sep 17 00:00:00 2001 From: Alexey Matveev <> Date: Wed, 29 Apr 2020 19:00:44 +0300 Subject: [PATCH 15/16] remote WebDav: add gc --- dvc/remote/webdav.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dvc/remote/webdav.py b/dvc/remote/webdav.py index 33f397d07b..ba14e52508 100644 --- a/dvc/remote/webdav.py +++ b/dvc/remote/webdav.py @@ -50,8 +50,13 @@ def _create_collections(self, to_info): continue raise HTTPError(response.status_code, response.reason) + def remove(self, path_info): + response = self._request("DELETE", path_info.url) + if response.status_code not in (200, 201, 204): + raise HTTPError(response.status_code, response.reason) + def gc(self): - raise NotImplementedError + return super(RemoteHTTP, self).gc() def list_cache_paths(self, prefix=None, progress_callback=None): raise NotImplementedError From 64b51294bb3039ff8a2f1b4e817ac7bedfa46120 Mon Sep 17 00:00:00 2001 From: Alexey Matveev <> Date: Tue, 12 May 2020 21:51:16 +0300 Subject: [PATCH 16/16] remote WebDav: Drop method get_collection in favor parents --- dvc/path_info.py | 14 -------------- dvc/remote/webdav.py | 12 ++++++------ tests/unit/test_path_info.py | 23 ----------------------- 3 files changed, 6 insertions(+), 43 deletions(-) diff --git a/dvc/path_info.py b/dvc/path_info.py index 9d906c0302..d9c0d04f17 100644 --- a/dvc/path_info.py +++ b/dvc/path_info.py @@ -335,17 +335,3 @@ def url(self): ("?" + self.query) if self.query else "", ("#" + self.fragment) if self.fragment else "", ) - - def get_collections(self) -> list: - def pcol(path): - return "{}://{}{}".format( - self.scheme.replace("webdav", "http"), self.netloc, path, - ) - - p = self.path.split("/")[1:-1] - if not p: - return [] - r = [] - for i in range(len(p)): - r.append(pcol("/{}/".format("/".join(p[: i + 1])))) - return r diff --git a/dvc/remote/webdav.py b/dvc/remote/webdav.py index ba14e52508..15cc3a1ecc 100644 --- a/dvc/remote/webdav.py +++ b/dvc/remote/webdav.py @@ -37,16 +37,16 @@ def chunks(): raise HTTPError(response.status_code, response.reason) def _create_collections(self, to_info): - url_cols = to_info.get_collections() + url_cols = [x.url + "/" for x in to_info.parents][:-1] from_idx = 0 - for idx in reversed(range(len(url_cols) + 1)): + for idx, url in enumerate(url_cols): from_idx = idx - if bool(self._request("HEAD", url_cols[idx - 1])): + if bool(self._request("HEAD", url)): break - for idx in range(from_idx, len(url_cols)): - response = self._request("MKCOL", url_cols[idx]) + for url in reversed(url_cols[:from_idx]): + response = self._request("MKCOL", url) if response.status_code not in (200, 201): - if bool(self._request("HEAD", url_cols[idx])): + if bool(self._request("HEAD", url)): continue raise HTTPError(response.status_code, response.reason) diff --git a/tests/unit/test_path_info.py b/tests/unit/test_path_info.py index 95e4a9fb70..075bb823d3 100644 --- a/tests/unit/test_path_info.py +++ b/tests/unit/test_path_info.py @@ -97,26 +97,3 @@ def test_webdav_url_info_str(): u2 = WebdavURLInfo("webdavs://test.com/t1") assert u1.url == "http://test.com/t1" assert u2.url == "https://test.com/t1" - - -def test_webdav_collections_path(): - u = WebdavURLInfo("webdav://test.com/t1") - assert u.get_collections() == [] - - u = WebdavURLInfo("webdav://test.com/") - assert u.get_collections() == [] - - u = WebdavURLInfo("webdav://test.com") - assert u.get_collections() == [] - - u = WebdavURLInfo("webdav://test.com/t1/") - assert u.get_collections() == ["http://test.com/t1/"] - - u = WebdavURLInfo("webdav://test.com/t1/check") - assert u.get_collections() == ["http://test.com/t1/"] - - u = WebdavURLInfo("webdav://test.com/t1/t2/check") - assert u.get_collections() == [ - "http://test.com/t1/", - "http://test.com/t1/t2/", - ]