diff --git a/dvc/remote/__init__.py b/dvc/remote/__init__.py index 6c2de0a057..76c0047272 100644 --- a/dvc/remote/__init__.py +++ b/dvc/remote/__init__.py @@ -11,6 +11,8 @@ from dvc.remote.oss import RemoteOSS from dvc.remote.s3 import RemoteS3 from dvc.remote.ssh import RemoteSSH +from dvc.remote.webdav import RemoteWEBDAV +from dvc.remote.webdavs import RemoteWEBDAVS REMOTES = [ @@ -23,6 +25,8 @@ RemoteS3, RemoteSSH, RemoteOSS, + RemoteWEBDAV, + RemoteWEBDAVS, # NOTE: RemoteLOCAL is the default ] diff --git a/dvc/remote/webdav.py b/dvc/remote/webdav.py new file mode 100644 index 0000000000..7348befea2 --- /dev/null +++ b/dvc/remote/webdav.py @@ -0,0 +1,89 @@ +import os.path + +from funcy import cached_property + +from .http import RemoteHTTP +from dvc.scheme import Schemes +from dvc.path_info import HTTPURLInfo +from dvc.progress import Tqdm +from dvc.exceptions import HTTPError + + +class WebdavURLInfo(HTTPURLInfo): + def __init__(self, url): + super().__init__(url) + + @cached_property + def url(self): + return "{}://{}{}{}{}{}".format( + self.scheme.replace("webdav", "http"), + self.netloc, + self._spath, + (";" + self.params) if self.params else "", + ("?" + self.query) if self.query else "", + ("#" + self.fragment) if self.fragment else "", + ) + + def get_collections(self) -> list: + def pcol(path): + return "{}://{}{}".format( + self.scheme.replace("webdav", "http"), self.netloc, path, + ) + + p = self.path.split("/")[1:-1] + if not p: + return [] + r = [] + for i in range(len(p)): + r.append(pcol("/{}/".format("/".join(p[: i + 1])))) + return r + + +class RemoteWEBDAV(RemoteHTTP): + scheme = Schemes.WEBDAV + path_cls = WebdavURLInfo + + def _upload(self, from_file, to_info, name=None, no_progress_bar=False): + def chunks(): + with open(from_file, "rb") as fd: + with Tqdm.wrapattr( + fd, + "read", + total=None + if no_progress_bar + else os.path.getsize(from_file), + leave=False, + desc=to_info.url if name is None else name, + disable=no_progress_bar, + ) as fd_wrapped: + while True: + chunk = fd_wrapped.read(self.CHUNK_SIZE) + if not chunk: + break + yield chunk + + self._create_collections(to_info) + response = self._request("PUT", to_info.url, data=chunks()) + if response.status_code not in (200, 201): + raise HTTPError(response.status_code, response.reason) + + def _create_collections(self, to_info): + url_cols = to_info.get_collections() + from_idx = 0 + for idx in reversed(range(len(url_cols) + 1)): + from_idx = idx + if bool(self._request("HEAD", url_cols[idx - 1])): + break + for idx in range(from_idx, len(url_cols)): + response = self._request("MKCOL", url_cols[idx]) + if response.status_code not in (200, 201): + raise HTTPError(response.status_code, response.reason) + + def gc(self): + raise NotImplementedError + + def list_cache_paths(self, prefix=None, progress_callback=None): + raise NotImplementedError + + def walk_files(self, path_info): + raise NotImplementedError diff --git a/dvc/remote/webdavs.py b/dvc/remote/webdavs.py new file mode 100644 index 0000000000..1302123340 --- /dev/null +++ b/dvc/remote/webdavs.py @@ -0,0 +1,15 @@ +from .webdav import RemoteWEBDAV +from dvc.scheme import Schemes + + +class RemoteWEBDAVS(RemoteWEBDAV): + scheme = Schemes.WEBDAVS + + def gc(self): + raise NotImplementedError + + def list_cache_paths(self, prefix=None, progress_callback=None): + raise NotImplementedError + + def walk_files(self, path_info): + raise NotImplementedError diff --git a/dvc/scheme.py b/dvc/scheme.py index e64e24f5ac..76c6d7a497 100644 --- a/dvc/scheme.py +++ b/dvc/scheme.py @@ -9,3 +9,5 @@ class Schemes: GDRIVE = "gdrive" LOCAL = "local" OSS = "oss" + WEBDAV = "webdav" + WEBDAVS = "webdavs"