From 7214d541147d98a89f58ae46c41a69bdb7c4b0f6 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Thu, 29 Apr 2021 20:42:04 +0300 Subject: [PATCH 1/3] fs: don't use repo for jobs/hash_jobs --- dvc/fs/__init__.py | 11 +++++++++++ dvc/fs/base.py | 12 ++---------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/dvc/fs/__init__.py b/dvc/fs/__init__.py index bc543fee46..12e47e7987 100644 --- a/dvc/fs/__init__.py +++ b/dvc/fs/__init__.py @@ -87,4 +87,15 @@ def get_cloud_fs(repo, **kwargs): remote_conf = SCHEMA["remote"][str](remote_conf) except Invalid as exc: raise ConfigError(str(exc)) from None + + if "jobs" not in remote_conf: + jobs = repo.config["core"].get("jobs") + if jobs: + remote_conf["jobs"] = jobs + + if "checksum_jobs" not in remote_conf: + checksum_jobs = repo.config["core"].get("checksum_jobs") + if checksum_jobs: + remote_conf["checksum_jobs"] = checksum_jobs + return get_fs_cls(remote_conf)(repo, remote_conf) diff --git a/dvc/fs/base.py b/dvc/fs/base.py index a7c59aba4c..fe0500e2bb 100644 --- a/dvc/fs/base.py +++ b/dvc/fs/base.py @@ -65,19 +65,11 @@ def __init__(self, repo, config): @cached_property def jobs(self): - return ( - self.config.get("jobs") - or (self.repo and self.repo.config["core"].get("jobs")) - or self._JOBS - ) + return self.config.get("jobs") or self._JOBS @cached_property def hash_jobs(self): - return ( - self.config.get("checksum_jobs") - or (self.repo and self.repo.config["core"].get("checksum_jobs")) - or self.HASH_JOBS - ) + return self.config.get("checksum_jobs") or self.HASH_JOBS @classmethod def get_missing_deps(cls): From f39b8ba15f5b7df16993b7ebe6485064bdeb2659 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Thu, 29 Apr 2021 21:06:23 +0300 Subject: [PATCH 2/3] fs: local: exists: use fsspec's LocalFileSystem --- dvc/fs/local.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/dvc/fs/local.py b/dvc/fs/local.py index 61eaafdae5..c0ec707fd2 100644 --- a/dvc/fs/local.py +++ b/dvc/fs/local.py @@ -21,7 +21,10 @@ class LocalFileSystem(BaseFileSystem): TRAVERSE_PREFIX_LEN = 2 def __init__(self, repo, config): + from fsspec.implementations.local import LocalFileSystem as LocalFS + super().__init__(repo, config) + self.fs = LocalFS() url = config.get("url") self.path_info = self.PATH_CLS(url) if url else None @@ -35,11 +38,7 @@ def open(path_info, mode="r", encoding=None, **kwargs): def exists(self, path_info) -> bool: assert isinstance(path_info, str) or path_info.scheme == "local" - if self.repo: - ret = os.path.lexists(path_info) - else: - ret = os.path.exists(path_info) - return ret + return self.fs.exists(path_info): def isfile(self, path_info) -> bool: return os.path.isfile(path_info) From 5ea6276fe6ba8fa3eb0c9cf43895f2ded2770a54 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Fri, 30 Apr 2021 00:35:38 +0300 Subject: [PATCH 3/3] fs: make constructor comply with fsspec --- dvc/checkout.py | 27 +++++++++-- dvc/config.py | 2 +- dvc/dependency/__init__.py | 3 +- dvc/dependency/repo.py | 9 +++- dvc/fs/__init__.py | 7 ++- dvc/fs/azure.py | 6 +-- dvc/fs/base.py | 24 +++------- dvc/fs/dvc.py | 5 +- dvc/fs/fsspec_wrapper.py | 10 ++-- dvc/fs/gdrive.py | 16 ++++--- dvc/fs/git.py | 2 +- dvc/fs/gs.py | 6 +-- dvc/fs/hdfs.py | 4 +- dvc/fs/http.py | 4 +- dvc/fs/local.py | 11 ++--- dvc/fs/memory.py | 4 +- dvc/fs/oss.py | 4 +- dvc/fs/repo.py | 17 +++---- dvc/fs/s3.py | 6 +-- dvc/fs/ssh/__init__.py | 4 +- dvc/fs/webdav.py | 4 +- dvc/fs/webhdfs.py | 4 +- dvc/objects/db/__init__.py | 14 +++--- dvc/objects/db/base.py | 16 +++---- dvc/objects/db/local.py | 8 ++-- dvc/objects/file.py | 2 +- dvc/objects/stage.py | 2 +- dvc/objects/tree.py | 2 +- dvc/output/__init__.py | 3 +- dvc/output/base.py | 7 ++- dvc/remote/__init__.py | 7 +-- dvc/remote/base.py | 12 ++--- dvc/remote/index.py | 4 +- dvc/remote/slow_link_detection.py | 4 +- dvc/repo/__init__.py | 8 ++-- dvc/repo/brancher.py | 2 +- dvc/state.py | 2 +- tests/func/test_fs.py | 26 ++++++---- tests/func/test_utils.py | 2 +- tests/remotes/gdrive.py | 4 +- tests/unit/fs/test_azure.py | 6 +-- tests/unit/fs/test_base.py | 2 +- tests/unit/fs/test_dvc.py | 32 ++++++------- tests/unit/fs/test_repo.py | 48 +++++++++---------- tests/unit/fs/test_s3.py | 33 ++++++------- tests/unit/fs/test_tree.py | 13 +++-- tests/unit/objects/db/test_local.py | 2 +- tests/unit/remote/ssh/test_ssh.py | 20 ++++---- tests/unit/remote/test_base.py | 10 ++-- tests/unit/remote/test_gdrive.py | 12 +++-- tests/unit/remote/test_http.py | 20 ++++---- tests/unit/remote/test_index.py | 2 +- tests/unit/remote/test_oss.py | 2 +- tests/unit/remote/test_remote.py | 14 ++++-- tests/unit/remote/test_remote_tree.py | 22 +++++---- tests/unit/remote/test_slow_link_detection.py | 5 +- tests/unit/remote/test_webdav.py | 8 ++-- tests/unit/remote/test_webhdfs.py | 2 +- tests/unit/test_context.py | 4 +- tests/unit/utils/test_fs.py | 4 +- tests/unit/utils/test_stream.py | 4 +- tests/unit/utils/test_utils.py | 2 +- 62 files changed, 307 insertions(+), 264 deletions(-) diff --git a/dvc/checkout.py b/dvc/checkout.py index 0af6de1a6c..96247c20ae 100644 --- a/dvc/checkout.py +++ b/dvc/checkout.py @@ -150,7 +150,14 @@ def _cache_is_copy(cache, path_info): def _checkout_file( - path_info, fs, obj, cache, force, progress_callback=None, relink=False, + path_info, + fs, + obj, + cache, + force, + progress_callback=None, + relink=False, + state=None, ): """The file is changed we need to checkout a new copy""" modified = False @@ -170,7 +177,9 @@ def _checkout_file( _link(cache, cache_info, path_info) modified = True - fs.repo.state.save(path_info, fs, obj.hash_info) + if state: + state.save(path_info, fs, obj.hash_info) + if progress_callback: progress_callback(str(path_info)) @@ -202,6 +211,7 @@ def _checkout_dir( progress_callback=None, relink=False, dvcignore: Optional[DvcIgnoreFilter] = None, + state=None, ): modified = False # Create dir separately so that dir is created @@ -221,6 +231,7 @@ def _checkout_dir( force, progress_callback, relink, + state=None, ) if entry_modified: modified = True @@ -232,7 +243,8 @@ def _checkout_dir( or modified ) - fs.repo.state.save(path_info, fs, obj.hash_info) + if state: + state.save(path_info, fs, obj.hash_info) # relink is not modified, assume it as nochange return modified and not relink @@ -247,10 +259,11 @@ def _checkout( progress_callback=None, relink=False, dvcignore: Optional[DvcIgnoreFilter] = None, + state=None, ): if not obj.hash_info.isdir: ret = _checkout_file( - path_info, fs, obj, cache, force, progress_callback, relink + path_info, fs, obj, cache, force, progress_callback, relink, state, ) else: ret = _checkout_dir( @@ -262,9 +275,11 @@ def _checkout( progress_callback, relink, dvcignore=dvcignore, + state=state, ) - fs.repo.state.save_link(path_info, fs) + if state: + state.save_link(path_info, fs) return ret @@ -279,6 +294,7 @@ def checkout( relink=False, quiet=False, dvcignore: Optional[DvcIgnoreFilter] = None, + state=None, ): if path_info.scheme not in ["local", cache.fs.scheme]: raise NotImplementedError @@ -330,4 +346,5 @@ def checkout( progress_callback, relink, dvcignore=dvcignore, + state=state, ) diff --git a/dvc/config.py b/dvc/config.py index 0ed1a44122..13a746b415 100644 --- a/dvc/config.py +++ b/dvc/config.py @@ -79,7 +79,7 @@ def __init__( else: self.dvc_dir = os.path.abspath(os.path.realpath(dvc_dir)) - self.wfs = LocalFileSystem(None, {"url": self.dvc_dir}) + self.wfs = LocalFileSystem(url=self.dvc_dir) self.fs = fs or self.wfs self.load(validate=validate, config=config) diff --git a/dvc/dependency/__init__.py b/dvc/dependency/__init__.py index 76c9573552..c03cd2d5ec 100644 --- a/dvc/dependency/__init__.py +++ b/dvc/dependency/__init__.py @@ -50,7 +50,8 @@ def _get(stage, p, info): parsed = urlparse(p) if p else None if parsed and parsed.scheme == "remote": - fs = get_cloud_fs(stage.repo, name=parsed.netloc) + cls, config = get_cloud_fs(stage.repo, name=parsed.netloc) + fs = cls(**config) return DEP_MAP[fs.scheme](stage, p, info, fs=fs) if info and info.get(RepoDependency.PARAM_REPO): diff --git a/dvc/dependency/repo.py b/dvc/dependency/repo.py index bdab8e6413..af88653275 100644 --- a/dvc/dependency/repo.py +++ b/dvc/dependency/repo.py @@ -98,7 +98,14 @@ def download(self, to, jobs=None): ) save(odb, obj, jobs=jobs) - checkout(to.path_info, to.fs, obj, odb, dvcignore=None) + checkout( + to.path_info, + to.fs, + obj, + odb, + dvcignore=None, + state=self.repo.state, + ) def update(self, rev=None): if rev: diff --git a/dvc/fs/__init__.py b/dvc/fs/__init__.py index 12e47e7987..97b606de9e 100644 --- a/dvc/fs/__init__.py +++ b/dvc/fs/__init__.py @@ -98,4 +98,9 @@ def get_cloud_fs(repo, **kwargs): if checksum_jobs: remote_conf["checksum_jobs"] = checksum_jobs - return get_fs_cls(remote_conf)(repo, remote_conf) + cls = get_fs_cls(remote_conf) + + if isinstance(cls, GDriveFileSystem): + remote_conf["gdrive_credentials_tmp_dir"] = repo.tmp_dir + + return cls, remote_conf diff --git a/dvc/fs/azure.py b/dvc/fs/azure.py index d9904e5073..c3ddb1078f 100644 --- a/dvc/fs/azure.py +++ b/dvc/fs/azure.py @@ -78,8 +78,8 @@ class AzureFileSystem(FSSpecWrapper): # pylint:disable=abstract-method "azure-identity": "azure.identity", } - def __init__(self, repo, config): - super().__init__(repo, config) + def __init__(self, **config): + super().__init__(**config) url = config.get("url") self.path_info = self.PATH_CLS(url) @@ -92,7 +92,7 @@ def __init__(self, repo, config): self.path_info = self.PATH_CLS(url) self.bucket = self.path_info.bucket - def _prepare_credentials(self, config): + def _prepare_credentials(self, **config): from azure.identity.aio import DefaultAzureCredential # Disable spam from failed cred types for DefaultAzureCredential diff --git a/dvc/fs/base.py b/dvc/fs/base.py index fe0500e2bb..632404725d 100644 --- a/dvc/fs/base.py +++ b/dvc/fs/base.py @@ -4,12 +4,9 @@ from multiprocessing import cpu_count from typing import Any, ClassVar, Dict, FrozenSet, Optional -from funcy import cached_property - from dvc.exceptions import DvcException from dvc.path_info import URLInfo from dvc.progress import Tqdm -from dvc.scheme import Schemes from dvc.utils import tmp_fname from dvc.utils.fs import makedirs, move from dvc.utils.http import open_url @@ -55,21 +52,13 @@ class BaseFileSystem: PARAM_CHECKSUM: ClassVar[Optional[str]] = None DETAIL_FIELDS: FrozenSet[str] = frozenset() - def __init__(self, repo, config): - self.repo = repo - self.config = config - - self._check_requires() + def __init__(self, **kwargs): + self._check_requires(**kwargs) self.path_info = None - @cached_property - def jobs(self): - return self.config.get("jobs") or self._JOBS - - @cached_property - def hash_jobs(self): - return self.config.get("checksum_jobs") or self.HASH_JOBS + self.jobs = kwargs.get("jobs") or self._JOBS + self.hash_jobs = kwargs.get("checksum_jobs") or self.HASH_JOBS @classmethod def get_missing_deps(cls): @@ -84,7 +73,8 @@ def get_missing_deps(cls): return missing - def _check_requires(self): + def _check_requires(self, **kwargs): + from ..scheme import Schemes from ..utils import format_link from ..utils.pkg import PKG @@ -92,7 +82,7 @@ def _check_requires(self): if not missing: return - url = self.config.get("url", f"{self.scheme}://") + url = kwargs.get("url", f"{self.scheme}://") scheme = self.scheme if scheme == Schemes.WEBDAVS: diff --git a/dvc/fs/dvc.py b/dvc/fs/dvc.py index a49701cc7f..b3b5b6a15a 100644 --- a/dvc/fs/dvc.py +++ b/dvc/fs/dvc.py @@ -26,8 +26,9 @@ class DvcFileSystem(BaseFileSystem): # pylint:disable=abstract-method scheme = "local" PARAM_CHECKSUM = "md5" - def __init__(self, repo): - super().__init__(repo, {"url": repo.root_dir}) + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.repo = kwargs["repo"] def _find_outs(self, path, *args, **kwargs): outs = self.repo.find_outs_by_path(path, *args, **kwargs) diff --git a/dvc/fs/fsspec_wrapper.py b/dvc/fs/fsspec_wrapper.py index 08f6f6be89..6e13c9c3fe 100644 --- a/dvc/fs/fsspec_wrapper.py +++ b/dvc/fs/fsspec_wrapper.py @@ -11,10 +11,10 @@ # pylint: disable=no-member class FSSpecWrapper(BaseFileSystem): - def __init__(self, repo, config): - super().__init__(repo, config) + def __init__(self, **kwargs): + super().__init__(**kwargs) self.fs_args = {"skip_instance_cache": True} - self.fs_args.update(self._prepare_credentials(config)) + self.fs_args.update(self._prepare_credentials(**kwargs)) @cached_property def fs(self): @@ -50,7 +50,9 @@ def _entry_hook(self, entry): entries within info() and ls(detail=True) calls""" return entry - def _prepare_credentials(self, config): # pylint: disable=unused-argument + def _prepare_credentials( + self, **config + ): # pylint: disable=unused-argument """Prepare the arguments for authentication to the host filesystem""" return {} diff --git a/dvc/fs/gdrive.py b/dvc/fs/gdrive.py index cee3982269..dd7844c96d 100644 --- a/dvc/fs/gdrive.py +++ b/dvc/fs/gdrive.py @@ -100,8 +100,8 @@ class GDriveFileSystem(BaseFileSystem): # pylint:disable=abstract-method DEFAULT_GDRIVE_CLIENT_ID = "710796635688-iivsgbgsb6uv1fap6635dhvuei09o66c.apps.googleusercontent.com" # noqa: E501 DEFAULT_GDRIVE_CLIENT_SECRET = "a1Fz59uTpVNeG_VGuSKDLJXv" - def __init__(self, repo, config): - super().__init__(repo, config) + def __init__(self, **config): + super().__init__(**config) self.path_info = self.PATH_CLS(config["url"]) @@ -126,17 +126,19 @@ def __init__(self, repo, config): self._client_id = config.get("gdrive_client_id") self._client_secret = config.get("gdrive_client_secret") self._validate_config() + + tmp_dir = config["gdrive_credentials_tmp_dir"] + assert tmp_dir + self._gdrive_service_credentials_path = tmp_fname( - os.path.join(self.repo.tmp_dir, "") + os.path.join(tmp_dir, "") ) self._gdrive_user_credentials_path = ( - tmp_fname(os.path.join(self.repo.tmp_dir, "")) + tmp_fname(os.path.join(tmp_dir, "")) if os.getenv(GDriveFileSystem.GDRIVE_CREDENTIALS_DATA) else config.get( "gdrive_user_credentials_file", - os.path.join( - self.repo.tmp_dir, self.DEFAULT_USER_CREDENTIALS_FILE, - ), + os.path.join(tmp_dir, self.DEFAULT_USER_CREDENTIALS_FILE,), ) ) diff --git a/dvc/fs/git.py b/dvc/fs/git.py index 24276fbce6..b816fcf26b 100644 --- a/dvc/fs/git.py +++ b/dvc/fs/git.py @@ -12,7 +12,7 @@ class GitFileSystem(BaseFileSystem): # pylint:disable=abstract-method scheme = "local" def __init__(self, root_dir, trie): - super().__init__(None, {}) + super().__init__() self._fs_root = root_dir self.trie = trie diff --git a/dvc/fs/gs.py b/dvc/fs/gs.py index d68156aa84..81961089db 100644 --- a/dvc/fs/gs.py +++ b/dvc/fs/gs.py @@ -16,13 +16,13 @@ class GSFileSystem(FSSpecWrapper): # pylint:disable=abstract-method PARAM_CHECKSUM = "etag" DETAIL_FIELDS = frozenset(("etag", "size")) - def __init__(self, repo, config): - super().__init__(repo, config) + def __init__(self, **config): + super().__init__(**config) url = config.get("url", "gs://") self.path_info = self.PATH_CLS(url) - def _prepare_credentials(self, config): + def _prepare_credentials(self, **config): login_info = {"consistency": None} login_info["project"] = config.get("projectname") login_info["token"] = config.get("credentialpath") diff --git a/dvc/fs/hdfs.py b/dvc/fs/hdfs.py index 5ee98a312a..635cf3e381 100644 --- a/dvc/fs/hdfs.py +++ b/dvc/fs/hdfs.py @@ -65,8 +65,8 @@ class HDFSFileSystem(BaseFileSystem): PARAM_CHECKSUM = "checksum" TRAVERSE_PREFIX_LEN = 2 - def __init__(self, repo, config): - super().__init__(repo, config) + def __init__(self, **config): + super().__init__(**config) self.path_info = None url = config.get("url") diff --git a/dvc/fs/http.py b/dvc/fs/http.py index a7a5b4ffe2..a0f3d5dd30 100644 --- a/dvc/fs/http.py +++ b/dvc/fs/http.py @@ -36,8 +36,8 @@ class HTTPFileSystem(BaseFileSystem): # pylint:disable=abstract-method REQUEST_TIMEOUT = 60 CHUNK_SIZE = 2 ** 16 - def __init__(self, repo, config): - super().__init__(repo, config) + def __init__(self, **config): + super().__init__(**config) url = config.get("url") if url: diff --git a/dvc/fs/local.py b/dvc/fs/local.py index c0ec707fd2..c387910fe1 100644 --- a/dvc/fs/local.py +++ b/dvc/fs/local.py @@ -20,17 +20,14 @@ class LocalFileSystem(BaseFileSystem): PARAM_PATH = "path" TRAVERSE_PREFIX_LEN = 2 - def __init__(self, repo, config): + def __init__(self, **config): from fsspec.implementations.local import LocalFileSystem as LocalFS - super().__init__(repo, config) + super().__init__(**config) self.fs = LocalFS() url = config.get("url") self.path_info = self.PATH_CLS(url) if url else None - - @property - def fs_root(self): - return self.config.get("url") + self.fs_root = url @staticmethod def open(path_info, mode="r", encoding=None, **kwargs): @@ -38,7 +35,7 @@ def open(path_info, mode="r", encoding=None, **kwargs): def exists(self, path_info) -> bool: assert isinstance(path_info, str) or path_info.scheme == "local" - return self.fs.exists(path_info): + return self.fs.exists(path_info) def isfile(self, path_info) -> bool: return os.path.isfile(path_info) diff --git a/dvc/fs/memory.py b/dvc/fs/memory.py index 6bf5fa4d34..1175e9a0cf 100644 --- a/dvc/fs/memory.py +++ b/dvc/fs/memory.py @@ -5,10 +5,10 @@ class MemoryFileSystem(BaseFileSystem): scheme = "local" PARAM_CHECKSUM = "md5" - def __init__(self, repo, config): + def __init__(self, **kwargs): from fsspec.implementations.memory import MemoryFileSystem as MemFS - super().__init__(repo, config) + super().__init__(**kwargs) self.fs = MemFS() diff --git a/dvc/fs/oss.py b/dvc/fs/oss.py index 6cd217720d..20f4fde928 100644 --- a/dvc/fs/oss.py +++ b/dvc/fs/oss.py @@ -39,8 +39,8 @@ class OSSFileSystem(BaseFileSystem): # pylint:disable=abstract-method COPY_POLL_SECONDS = 5 LIST_OBJECT_PAGE_SIZE = 100 - def __init__(self, repo, config): - super().__init__(repo, config) + def __init__(self, **config): + super().__init__(**config) url = config.get("url") self.path_info = self.PATH_CLS(url) if url else None diff --git a/dvc/fs/repo.py b/dvc/fs/repo.py index 75821a2316..bc59c0e699 100644 --- a/dvc/fs/repo.py +++ b/dvc/fs/repo.py @@ -35,9 +35,9 @@ class RepoFileSystem(BaseFileSystem): # pylint:disable=abstract-method PARAM_CHECKSUM = "md5" def __init__( - self, repo, subrepos=False, repo_factory: RepoFactory = None, + self, repo=None, subrepos=False, repo_factory: RepoFactory = None, ): - super().__init__(repo, {"url": repo.root_dir}) + super().__init__() from dvc.utils.collections import PathStringTrie @@ -49,6 +49,7 @@ def __init__( self.repo_factory = repo_factory self._main_repo = repo + self.hash_jobs = repo.fs.hash_jobs self.root_dir = repo.root_dir self._traverse_subrepos = subrepos @@ -61,7 +62,7 @@ def __init__( """Keep a dvcfs instance of each repo.""" if hasattr(repo, "dvc_dir"): - self._dvcfss[repo.root_dir] = DvcFileSystem(repo) + self._dvcfss[repo.root_dir] = DvcFileSystem(repo=repo) def _get_repo(self, path: str) -> Optional["Repo"]: """Returns repo that the path falls in, using prefix. @@ -93,11 +94,11 @@ def _update(self, dirs, starting_repo): if self._is_dvc_repo(d): repo = self.repo_factory( d, - scm=self.repo.scm, - rev=self.repo.get_rev(), + scm=self._main_repo.scm, + rev=self._main_repo.get_rev(), repo_factory=self.repo_factory, ) - self._dvcfss[repo.root_dir] = DvcFileSystem(repo) + self._dvcfss[repo.root_dir] = DvcFileSystem(repo=repo) self._subrepos_trie[d] = repo def _is_dvc_repo(self, dir_path): @@ -382,10 +383,6 @@ def _download( with self.open(from_info, "rb", **kwargs) as from_fobj: shutil.copyfileobj(from_fobj, wrapped) - @property - def hash_jobs(self): # pylint: disable=invalid-overridden-method - return self._main_repo.fs.hash_jobs - def metadata(self, path): abspath = os.path.abspath(path) path_info = PathInfo(abspath) diff --git a/dvc/fs/s3.py b/dvc/fs/s3.py index b8ce9a323e..0a6b6165cd 100644 --- a/dvc/fs/s3.py +++ b/dvc/fs/s3.py @@ -28,8 +28,8 @@ class BaseS3FileSystem(FSSpecWrapper): # pylint:disable=abstract-method "grant_write_acp": "GrantWriteACP", } - def __init__(self, repo, config): - super().__init__(repo, config) + def __init__(self, **config): + super().__init__(**config) url = config.get("url", "s3://") self.path_info = self.PATH_CLS(url) @@ -83,7 +83,7 @@ def _load_aws_config_file(self, profile): s3_config = profile_config.get("s3", {}) return self._split_s3_config(s3_config) - def _prepare_credentials(self, config): + def _prepare_credentials(self, **config): from dvc.config import ConfigError from dvc.utils.flatten import flatten, unflatten diff --git a/dvc/fs/ssh/__init__.py b/dvc/fs/ssh/__init__.py index 6753d038eb..17fd7e041a 100644 --- a/dvc/fs/ssh/__init__.py +++ b/dvc/fs/ssh/__init__.py @@ -46,8 +46,8 @@ class SSHFileSystem(BaseFileSystem): # pylint:disable=abstract-method DEFAULT_PORT = 22 TIMEOUT = 1800 - def __init__(self, repo, config): - super().__init__(repo, config) + def __init__(self, **config): + super().__init__(**config) url = config.get("url") if url: parsed = urlparse(url) diff --git a/dvc/fs/webdav.py b/dvc/fs/webdav.py index c5b82cca52..eca6c51296 100644 --- a/dvc/fs/webdav.py +++ b/dvc/fs/webdav.py @@ -46,9 +46,9 @@ class WebDAVFileSystem(BaseFileSystem): # pylint:disable=abstract-method DETAIL_FIELDS = frozenset(("etag", "size")) # Constructor - def __init__(self, repo, config): + def __init__(self, **config): # Call BaseFileSystem constructor - super().__init__(repo, config) + super().__init__(**config) # Get username from configuration self.user = config.get("user", None) diff --git a/dvc/fs/webhdfs.py b/dvc/fs/webhdfs.py index 74249c017b..1dc8b3c346 100644 --- a/dvc/fs/webhdfs.py +++ b/dvc/fs/webhdfs.py @@ -37,8 +37,8 @@ class WebHDFSFileSystem(BaseFileSystem): # pylint:disable=abstract-method PARAM_CHECKSUM = "checksum" TRAVERSE_PREFIX_LEN = 2 - def __init__(self, repo, config): - super().__init__(repo, config) + def __init__(self, **config): + super().__init__(**config) self.path_info = None url = config.get("url") diff --git a/dvc/objects/db/__init__.py b/dvc/objects/db/__init__.py index dd75d2fd6b..41f45f0cfd 100644 --- a/dvc/objects/db/__init__.py +++ b/dvc/objects/db/__init__.py @@ -3,22 +3,22 @@ from dvc.scheme import Schemes -def get_odb(fs): +def get_odb(fs, **config): from .base import ObjectDB from .gdrive import GDriveObjectDB from .local import LocalObjectDB from .ssh import SSHObjectDB if fs.scheme == Schemes.LOCAL: - return LocalObjectDB(fs) + return LocalObjectDB(fs, **config) if fs.scheme == Schemes.SSH: - return SSHObjectDB(fs) + return SSHObjectDB(fs, **config) if fs.scheme == Schemes.GDRIVE: - return GDriveObjectDB(fs) + return GDriveObjectDB(fs, **config) - return ObjectDB(fs) + return ObjectDB(fs, **config) def _get_odb(repo, settings): @@ -27,8 +27,8 @@ def _get_odb(repo, settings): if not settings: return None - fs = get_cloud_fs(repo, **settings) - return get_odb(fs) + cls, config = get_cloud_fs(repo, **settings) + return get_odb(cls(**config), state=repo.state, **config) class ODBManager: diff --git a/dvc/objects/db/base.py b/dvc/objects/db/base.py index 8fce7a8f7a..4e637a805e 100644 --- a/dvc/objects/db/base.py +++ b/dvc/objects/db/base.py @@ -18,15 +18,15 @@ class ObjectDB: DEFAULT_CACHE_TYPES = ["copy"] CACHE_MODE: Optional[int] = None - def __init__(self, fs): - self.fs = fs - self.repo = fs.repo + def __init__(self, fs, **config): + from dvc.state import StateNoop - self.verify = fs.config.get("verify", self.DEFAULT_VERIFY) - self.cache_types = fs.config.get("type") or copy( - self.DEFAULT_CACHE_TYPES - ) + self.fs = fs + self.state = config.get("state", StateNoop()) + self.verify = config.get("verify", self.DEFAULT_VERIFY) + self.cache_types = config.get("type") or copy(self.DEFAULT_CACHE_TYPES) self.cache_type_confirmed = False + self.slow_link_warning = config.get("slow_link_warning", True) def move(self, from_info, to_info): self.fs.move(from_info, to_info) @@ -80,7 +80,7 @@ def add(self, path_info, fs, hash_info, move=True, **kwargs): raise self.protect(cache_info) - self.fs.repo.state.save(cache_info, self.fs, hash_info) + self.state.save(cache_info, self.fs, hash_info) callback = kwargs.get("download_callback") if callback: diff --git a/dvc/objects/db/local.py b/dvc/objects/db/local.py index 5a87084a9a..65c1019906 100644 --- a/dvc/objects/db/local.py +++ b/dvc/objects/db/local.py @@ -22,11 +22,11 @@ class LocalObjectDB(ObjectDB): CACHE_MODE = 0o444 UNPACKED_DIR_SUFFIX = ".unpacked" - def __init__(self, fs): - super().__init__(fs) - self.cache_dir = fs.config.get("url") + def __init__(self, fs, **config): + super().__init__(fs, **config) + self.cache_dir = config.get("url") - shared = fs.config.get("shared") + shared = config.get("shared") if shared: self._file_mode = 0o664 self._dir_mode = 0o2775 diff --git a/dvc/objects/file.py b/dvc/objects/file.py index a1b74f6128..a9545d2904 100644 --- a/dvc/objects/file.py +++ b/dvc/objects/file.py @@ -49,7 +49,7 @@ def check(self, odb, check_hash=True): return None actual = get_file_hash( - self.path_info, self.fs, self.hash_info.name, odb.repo.state + self.path_info, self.fs, self.hash_info.name, odb.state ) logger.trace( diff --git a/dvc/objects/stage.py b/dvc/objects/stage.py index f6e228bdf5..e069340f36 100644 --- a/dvc/objects/stage.py +++ b/dvc/objects/stage.py @@ -186,7 +186,7 @@ def stage(odb, path_info, fs, name, upload=False, **kwargs): errno.ENOENT, os.strerror(errno.ENOENT), path_info ) - state = odb.repo.state + state = odb.state # pylint: disable=assignment-from-none hash_info = state.get(path_info, fs) diff --git a/dvc/objects/tree.py b/dvc/objects/tree.py index c146a98800..77db7e8f35 100644 --- a/dvc/objects/tree.py +++ b/dvc/objects/tree.py @@ -40,7 +40,7 @@ def digest(self): from dvc.path_info import PathInfo from dvc.utils import tmp_fname - memfs = MemoryFileSystem(None, {}) + memfs = MemoryFileSystem() path_info = PathInfo(tmp_fname("")) with memfs.open(path_info, "wb") as fobj: fobj.write(self.as_bytes()) diff --git a/dvc/output/__init__.py b/dvc/output/__init__.py index 55c322d84a..f292d23a00 100644 --- a/dvc/output/__init__.py +++ b/dvc/output/__init__.py @@ -77,7 +77,8 @@ def _get( parsed = urlparse(p) if parsed.scheme == "remote": - fs = get_cloud_fs(stage.repo, name=parsed.netloc) + cls, config = get_cloud_fs(stage.repo, name=parsed.netloc) + fs = cls(**config) return OUTS_MAP[fs.scheme]( stage, p, diff --git a/dvc/output/base.py b/dvc/output/base.py index dfec17eca9..80aefd2b09 100644 --- a/dvc/output/base.py +++ b/dvc/output/base.py @@ -122,7 +122,7 @@ def __init__( if fs: self.fs = fs else: - self.fs = self.FS_CLS(self.repo, {}) + self.fs = self.FS_CLS() self._validate_output_path(path, stage) # This output (and dependency) objects have too many paths/urls # here is a list and comments: @@ -365,6 +365,7 @@ def commit(self, filter_info=None): self.odb, relink=True, dvcignore=self.dvcignore, + state=self.repo.state, ) self.set_exec() @@ -469,6 +470,7 @@ def checkout( force=force, progress_callback=progress_callback, relink=relink, + state=self.repo.state, **kwargs, ) except CheckoutError: @@ -508,7 +510,8 @@ def transfer( if odb is None: odb = self.odb - from_fs = get_cloud_fs(self.repo, url=source) + cls, config = get_cloud_fs(self.repo, url=source) + from_fs = cls(**config) from_info = from_fs.path_info # When running import-url --to-remote / add --to-remote/-o ... we diff --git a/dvc/remote/__init__.py b/dvc/remote/__init__.py index aad726221b..4f82cfdc23 100644 --- a/dvc/remote/__init__.py +++ b/dvc/remote/__init__.py @@ -4,7 +4,8 @@ def get_remote(repo, **kwargs): - fs = get_cloud_fs(repo, **kwargs) + cls, config = get_cloud_fs(repo, **kwargs) + fs = cls(**config) if fs.scheme == "local": - return LocalRemote(fs) - return Remote(fs) + return LocalRemote(fs, repo.tmp_dir, **config) + return Remote(fs, repo.tmp_dir, **config) diff --git a/dvc/remote/base.py b/dvc/remote/base.py index 67036ce610..8d2003b48d 100644 --- a/dvc/remote/base.py +++ b/dvc/remote/base.py @@ -67,19 +67,17 @@ class Remote: INDEX_CLS = RemoteIndex - def __init__(self, fs): + def __init__(self, fs, tmp_dir, **config): from dvc.objects.db import get_odb self.fs = fs - self.repo = fs.repo - self.odb = get_odb(self.fs) + self.odb = get_odb(self.fs, **config) - config = fs.config url = config.get("url") if url: index_name = hashlib.sha256(url.encode("utf-8")).hexdigest() self.index = self.INDEX_CLS( - self.repo, index_name, dir_suffix=self.fs.CHECKSUM_DIR_SUFFIX + tmp_dir, index_name, dir_suffix=self.fs.CHECKSUM_DIR_SUFFIX ) else: self.index = RemoteIndexNoop() @@ -476,7 +474,7 @@ def push(self, cache, named_cache, jobs=None, show_checksums=False): cache_file = self.odb.hash_to_path_info(checksum) if self.fs.exists(cache_file): hash_info = HashInfo(self.fs.PARAM_CHECKSUM, checksum) - self.fs.repo.state.save(cache_file, self.fs, hash_info) + self.odb.state.save(cache_file, self.fs, hash_info) self.odb.protect(cache_file) return ret @@ -500,7 +498,7 @@ def pull(self, cache, named_cache, jobs=None, show_checksums=False): # during download will not be moved from tmp_file # (see `BaseFileSystem.download()`) hash_info = HashInfo(cache.fs.PARAM_CHECKSUM, checksum) - cache.fs.repo.state.save(cache_file, cache.fs, hash_info) + cache.state.save(cache_file, cache.fs, hash_info) cache.protect(cache_file) return ret diff --git a/dvc/remote/index.py b/dvc/remote/index.py index 6df3aeff3d..3f070afa57 100644 --- a/dvc/remote/index.py +++ b/dvc/remote/index.py @@ -70,9 +70,9 @@ class RemoteIndex: INDEX_TABLE_LAYOUT = "checksum TEXT PRIMARY KEY, dir INTEGER NOT NULL" INDEX_DIR = "index" - def __init__(self, repo, name, dir_suffix=".dir"): + def __init__(self, tmp_dir, name, dir_suffix=".dir"): self.path = os.path.join( - repo.tmp_dir, self.INDEX_DIR, f"{name}{self.INDEX_SUFFIX}" + tmp_dir, self.INDEX_DIR, f"{name}{self.INDEX_SUFFIX}" ) self.dir_suffix = dir_suffix diff --git a/dvc/remote/slow_link_detection.py b/dvc/remote/slow_link_detection.py index 6b6481cb21..51208265db 100644 --- a/dvc/remote/slow_link_detection.py +++ b/dvc/remote/slow_link_detection.py @@ -29,9 +29,7 @@ def wrapper(remote, *args, **kwargs): if this.already_displayed: return f(remote, *args, **kwargs) - cache_conf = remote.repo.config["cache"] - slow_link_warning = cache_conf.get("slow_link_warning", True) - if not slow_link_warning or cache_conf.get("type"): + if not remote.slow_link_warning or remote.cache_types: return f(remote, *args, **kwargs) start = time.time() diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 832b640ba7..43195419b7 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -161,7 +161,7 @@ def __init__( if scm: self._fs = scm.get_fs(rev) else: - self._fs = LocalFileSystem(self, {"url": self.root_dir}) + self._fs = LocalFileSystem(url=self.root_dir) self.config = Config(self.dvc_dir, fs=self.fs, config=config) self._uninitialized = uninitialized @@ -170,13 +170,13 @@ def __init__( # used by RepoFileSystem to determine if it should traverse subrepos self.subrepos = subrepos - self.odb = ODBManager(self) self.cloud = DataCloud(self) self.stage = StageLoad(self) if scm or not self.dvc_dir: self.lock = LockNoop() self.state = StateNoop() + self.odb = ODBManager(self) else: self.lock = make_lock( os.path.join(self.tmp_dir, "lock"), @@ -189,6 +189,8 @@ def __init__( # avoid any possible state corruption in 'shared cache dir' # scenario. self.state = State(self.root_dir, self.tmp_dir, self.dvcignore) + self.odb = ODBManager(self) + self.stage_cache = StageCache(self) self._ignore() @@ -472,7 +474,7 @@ def is_dvc_internal(self, path): def dvcfs(self): from dvc.fs.dvc import DvcFileSystem - return DvcFileSystem(self) + return DvcFileSystem(repo=self) @cached_property def repo_fs(self): diff --git a/dvc/repo/brancher.py b/dvc/repo/brancher.py index 8c2420f7be..c88f2abf0d 100644 --- a/dvc/repo/brancher.py +++ b/dvc/repo/brancher.py @@ -36,7 +36,7 @@ def brancher( # noqa: E302 scm = self.scm - self.fs = LocalFileSystem(self, {"url": self.root_dir}) + self.fs = LocalFileSystem(url=self.root_dir) yield "workspace" if revs and "workspace" in revs: diff --git a/dvc/state.py b/dvc/state.py index 040e84c58b..10ab64d891 100644 --- a/dvc/state.py +++ b/dvc/state.py @@ -72,7 +72,7 @@ def __init__(self, root_dir=None, tmp_dir=None, dvcignore=None): self.tmp_dir = tmp_dir self.root_dir = root_dir self.dvcignore = dvcignore - self.fs = LocalFileSystem(None, {"url": self.root_dir}) + self.fs = LocalFileSystem() if not tmp_dir: return diff --git a/tests/func/test_fs.py b/tests/func/test_fs.py index a96ab147f5..5403ff6964 100644 --- a/tests/func/test_fs.py +++ b/tests/func/test_fs.py @@ -15,7 +15,7 @@ class TestLocalFileSystem(TestDir): def setUp(self): super().setUp() - self.fs = LocalFileSystem(None, {}) + self.fs = LocalFileSystem() def test_open(self): with self.fs.open(self.FOO) as fd: @@ -116,7 +116,7 @@ def convert_to_sets(walk_results): class TestWalkInNoSCM(AssertWalkEqualMixin, TestDir): def test(self): - fs = LocalFileSystem(None, {"url": self._root_dir}) + fs = LocalFileSystem() self.assertWalkEqual( fs.walk(self._root_dir), [ @@ -135,7 +135,7 @@ def test(self): ) def test_subdir(self): - fs = LocalFileSystem(None, {"url": self._root_dir}) + fs = LocalFileSystem() self.assertWalkEqual( fs.walk(join("data_dir", "data_sub_dir")), [(join("data_dir", "data_sub_dir"), [], ["data_sub"])], @@ -144,7 +144,7 @@ def test_subdir(self): class TestWalkInGit(AssertWalkEqualMixin, TestGit): def test_nobranch(self): - fs = LocalFileSystem(None, {"url": self._root_dir}) + fs = LocalFileSystem(url=self._root_dir) walk_result = [] for root, dirs, files in fs.walk("."): dirs[:] = [i for i in dirs if i != ".git"] @@ -244,7 +244,8 @@ def test_walk_dont_ignore_subrepos(tmp_dir, scm, dvc): ) def test_fs_getsize(dvc, cloud): cloud.gen({"data": {"foo": "foo"}, "baz": "baz baz"}) - fs = get_cloud_fs(dvc, **cloud.config) + cls, config = get_cloud_fs(dvc, **cloud.config) + fs = cls(**config) path_info = fs.path_info assert fs.getsize(path_info / "baz") == 7 @@ -269,7 +270,8 @@ def test_fs_getsize(dvc, cloud): ) def test_fs_upload_fobj(dvc, tmp_dir, cloud): tmp_dir.gen("foo", "foo") - fs = get_cloud_fs(dvc, **cloud.config) + cls, config = get_cloud_fs(dvc, **cloud.config) + fs = cls(**config) from_info = tmp_dir / "foo" to_info = fs.path_info / "foo" @@ -295,7 +297,8 @@ def test_fs_ls(dvc, cloud): } } ) - fs = get_cloud_fs(dvc, **cloud.config) + cls, config = get_cloud_fs(dvc, **cloud.config) + fs = cls(**config) path_info = cloud / "directory" assert {os.path.basename(file_key) for file_key in fs.ls(path_info)} == { @@ -324,7 +327,8 @@ def test_fs_ls(dvc, cloud): ) def test_fs_find_recursive(dvc, cloud): cloud.gen({"data": {"foo": "foo", "bar": {"baz": "baz"}, "quux": "quux"}}) - fs = get_cloud_fs(dvc, **cloud.config) + cls, config = get_cloud_fs(dvc, **cloud.config) + fs = cls(**config) path_info = fs.path_info assert { @@ -345,7 +349,8 @@ def test_fs_find_recursive(dvc, cloud): ) def test_fs_find_with_etag(dvc, cloud): cloud.gen({"data": {"foo": "foo", "bar": {"baz": "baz"}, "quux": "quux"}}) - fs = get_cloud_fs(dvc, **cloud.config) + cls, config = get_cloud_fs(dvc, **cloud.config) + fs = cls(**config) path_info = fs.path_info for details in fs.find(path_info / "data", detail=True): @@ -366,7 +371,8 @@ def test_fs_find_with_etag(dvc, cloud): ) def test_fs_fsspec_path_management(dvc, cloud): cloud.gen({"foo": "foo", "data": {"bar": "bar", "baz": {"foo": "foo"}}}) - fs = get_cloud_fs(dvc, **cloud.config) + cls, config = get_cloud_fs(dvc, **cloud.config) + fs = cls(**config) root = cloud.parents[len(cloud.parents) - 1] bucket_details = fs.info(root) diff --git a/tests/func/test_utils.py b/tests/func/test_utils.py index 4110abeda7..026357afd8 100644 --- a/tests/func/test_utils.py +++ b/tests/func/test_utils.py @@ -3,7 +3,7 @@ def test_file_md5_crlf(tmp_dir): - fs = LocalFileSystem(None, {}) + fs = LocalFileSystem() tmp_dir.gen("cr", b"a\nb\nc") tmp_dir.gen("crlf", b"a\r\nb\r\nc") assert utils.file_md5("cr", fs) == utils.file_md5("crlf", fs) diff --git a/tests/remotes/gdrive.py b/tests/remotes/gdrive.py index edab218db0..17dff1acb6 100644 --- a/tests/remotes/gdrive.py +++ b/tests/remotes/gdrive.py @@ -124,6 +124,8 @@ def gdrive(test_config, make_tmp_dir): tmp_dir = make_tmp_dir("gdrive", dvc=True) ret = GDrive(GDrive.get_url()) - fs = GDriveFileSystem(tmp_dir.dvc, ret.config) + fs = GDriveFileSystem( + gdrive_credentials_tmp_dir=tmp_dir.dvc.tmp_dir, **ret.config + ) fs._gdrive_create_dir("root", fs.path_info.path) yield ret diff --git a/tests/unit/fs/test_azure.py b/tests/unit/fs/test_azure.py index 764d426139..cd79c898f3 100644 --- a/tests/unit/fs/test_azure.py +++ b/tests/unit/fs/test_azure.py @@ -20,7 +20,7 @@ def test_init_env_var(monkeypatch, dvc): monkeypatch.setenv("AZURE_STORAGE_CONNECTION_STRING", connection_string) config = {"url": "azure://"} - fs = AzureFileSystem(dvc, config) + fs = AzureFileSystem(**config) assert fs.path_info == "azure://" + container_name @@ -28,14 +28,14 @@ def test_init(dvc): prefix = "some/prefix" url = f"azure://{container_name}/{prefix}" config = {"url": url, "connection_string": connection_string} - fs = AzureFileSystem(dvc, config) + fs = AzureFileSystem(**config) assert fs.path_info == url def test_info(tmp_dir, azure): tmp_dir.gen("foo", "foo") - fs = AzureFileSystem(None, azure.config) + fs = AzureFileSystem(**azure.config) to_info = azure fs.upload(PathInfo("foo"), to_info) assert fs.exists(to_info) diff --git a/tests/unit/fs/test_base.py b/tests/unit/fs/test_base.py index 986e86f0d8..42016b32bc 100644 --- a/tests/unit/fs/test_base.py +++ b/tests/unit/fs/test_base.py @@ -16,4 +16,4 @@ def test_missing_deps(pkg, msg, mocker): mocker.patch.object(BaseFileSystem, "REQUIRES", requires) mocker.patch("dvc.utils.pkg.PKG", pkg) with pytest.raises(RemoteMissingDepsError, match=msg): - BaseFileSystem(None, {}) + BaseFileSystem() diff --git a/tests/unit/fs/test_dvc.py b/tests/unit/fs/test_dvc.py index d192102688..752018b608 100644 --- a/tests/unit/fs/test_dvc.py +++ b/tests/unit/fs/test_dvc.py @@ -14,7 +14,7 @@ def test_exists(tmp_dir, dvc): dvc.add("foo") (tmp_dir / "foo").unlink() - fs = DvcFileSystem(dvc) + fs = DvcFileSystem(repo=dvc) assert fs.exists("foo") @@ -23,7 +23,7 @@ def test_open(tmp_dir, dvc): dvc.add("foo") (tmp_dir / "foo").unlink() - fs = DvcFileSystem(dvc) + fs = DvcFileSystem(repo=dvc) with fs.open(PathInfo(tmp_dir) / "foo", "r") as fobj: assert fobj.read() == "foo" @@ -32,7 +32,7 @@ def test_open_dirty_hash(tmp_dir, dvc): tmp_dir.dvc_gen("file", "file") (tmp_dir / "file").write_text("something") - fs = DvcFileSystem(dvc) + fs = DvcFileSystem(repo=dvc) with fs.open(PathInfo(tmp_dir) / "file", "r") as fobj: # NOTE: Unlike RepoFileSystem, DvcFileSystem should not # be affected by a dirty workspace. @@ -43,7 +43,7 @@ def test_open_dirty_no_hash(tmp_dir, dvc): tmp_dir.gen("file", "file") (tmp_dir / "file.dvc").write_text("outs:\n- path: file\n") - fs = DvcFileSystem(dvc) + fs = DvcFileSystem(repo=dvc) # NOTE: Unlike RepoFileSystem, DvcFileSystem should not # be affected by a dirty workspace. with pytest.raises(FileNotFoundError): @@ -66,7 +66,7 @@ def test_open_in_history(tmp_dir, scm, dvc): if rev == "workspace": continue - fs = DvcFileSystem(dvc) + fs = DvcFileSystem(repo=dvc) with fs.open(PathInfo(tmp_dir) / "foo", "r") as fobj: assert fobj.read() == "foo" @@ -74,7 +74,7 @@ def test_open_in_history(tmp_dir, scm, dvc): def test_isdir_isfile(tmp_dir, dvc): tmp_dir.gen({"datafile": "data", "datadir": {"foo": "foo", "bar": "bar"}}) - fs = DvcFileSystem(dvc) + fs = DvcFileSystem(repo=dvc) assert not fs.isdir("datadir") assert not fs.isfile("datadir") assert not fs.isdir("datafile") @@ -95,7 +95,7 @@ def test_isdir_mixed(tmp_dir, dvc): dvc.add(str(tmp_dir / "dir" / "foo")) - fs = DvcFileSystem(dvc) + fs = DvcFileSystem(repo=dvc) assert fs.isdir("dir") assert not fs.isfile("dir") @@ -113,7 +113,7 @@ def test_walk(tmp_dir, dvc): ) dvc.add("dir", recursive=True) - fs = DvcFileSystem(dvc) + fs = DvcFileSystem(repo=dvc) expected = [ str(tmp_dir / "dir" / "subdir1"), @@ -147,7 +147,7 @@ def test_walk_dir(tmp_dir, dvc): ) dvc.add("dir") - fs = DvcFileSystem(dvc) + fs = DvcFileSystem(repo=dvc) expected = [ str(tmp_dir / "dir" / "subdir1"), @@ -173,7 +173,7 @@ def onerror(exc): raise exc tmp_dir.dvc_gen("foo", "foo") - fs = DvcFileSystem(dvc) + fs = DvcFileSystem(repo=dvc) # path does not exist for _ in fs.walk("dir"): @@ -193,14 +193,14 @@ def onerror(exc): def test_isdvc(tmp_dir, dvc): tmp_dir.gen({"foo": "foo", "bar": "bar"}) dvc.add("foo") - fs = DvcFileSystem(dvc) + fs = DvcFileSystem(repo=dvc) assert fs.isdvc("foo") assert not fs.isdvc("bar") def test_get_hash_file(tmp_dir, dvc): tmp_dir.dvc_gen({"foo": "foo"}) - fs = DvcFileSystem(dvc) + fs = DvcFileSystem(repo=dvc) assert ( fs.info(PathInfo(tmp_dir) / "foo")["md5"] == "acbd18db4cc2f85cedef654fccc4a4d8" @@ -213,7 +213,7 @@ def test_get_hash_dir(tmp_dir, dvc, mocker): tmp_dir.dvc_gen( {"dir": {"foo": "foo", "bar": "bar", "subdir": {"data": "data"}}} ) - fs = DvcFileSystem(dvc) + fs = DvcFileSystem(repo=dvc) get_file_hash_spy = mocker.spy(dvc_module.objects.stage, "get_file_hash") assert ( fs.info(PathInfo(tmp_dir) / "dir")["md5"] @@ -226,7 +226,7 @@ def test_get_hash_granular(tmp_dir, dvc): tmp_dir.dvc_gen( {"dir": {"foo": "foo", "bar": "bar", "subdir": {"data": "data"}}} ) - fs = DvcFileSystem(dvc) + fs = DvcFileSystem(repo=dvc) subdir = PathInfo(tmp_dir) / "dir" / "subdir" assert fs.info(subdir).get("md5") is None assert stage(dvc.odb.local, subdir, fs, "md5").hash_info == HashInfo( @@ -244,7 +244,7 @@ def test_get_hash_dirty_file(tmp_dir, dvc): tmp_dir.dvc_gen("file", "file") (tmp_dir / "file").write_text("something") - fs = DvcFileSystem(dvc) + fs = DvcFileSystem(repo=dvc) expected = "8c7dd922ad47494fc02c388e12c00eac" assert fs.info(PathInfo(tmp_dir) / "file").get("md5") == expected assert stage( @@ -256,7 +256,7 @@ def test_get_hash_dirty_dir(tmp_dir, dvc): tmp_dir.dvc_gen({"dir": {"foo": "foo", "bar": "bar"}}) (tmp_dir / "dir" / "baz").write_text("baz") - fs = DvcFileSystem(dvc) + fs = DvcFileSystem(repo=dvc) expected = "5ea40360f5b4ec688df672a4db9c17d1.dir" assert fs.info(PathInfo(tmp_dir) / "dir").get("md5") == expected assert stage( diff --git a/tests/unit/fs/test_repo.py b/tests/unit/fs/test_repo.py index aaf6acc8e3..bc80196d19 100644 --- a/tests/unit/fs/test_repo.py +++ b/tests/unit/fs/test_repo.py @@ -15,7 +15,7 @@ def test_exists(tmp_dir, dvc): dvc.add("foo") (tmp_dir / "foo").unlink() - fs = RepoFileSystem(dvc) + fs = RepoFileSystem(repo=dvc) assert fs.exists("foo") @@ -24,7 +24,7 @@ def test_open(tmp_dir, dvc): dvc.add("foo") (tmp_dir / "foo").unlink() - fs = RepoFileSystem(dvc) + fs = RepoFileSystem(repo=dvc) with fs.open(PathInfo(tmp_dir) / "foo", "r") as fobj: assert fobj.read() == "foo" @@ -33,7 +33,7 @@ def test_open_dirty_hash(tmp_dir, dvc): tmp_dir.dvc_gen("file", "file") (tmp_dir / "file").write_text("something") - fs = RepoFileSystem(dvc) + fs = RepoFileSystem(repo=dvc) with fs.open(PathInfo(tmp_dir) / "file", "r") as fobj: assert fobj.read() == "something" @@ -42,7 +42,7 @@ def test_open_dirty_no_hash(tmp_dir, dvc): tmp_dir.gen("file", "file") (tmp_dir / "file.dvc").write_text("outs:\n- path: file\n") - fs = RepoFileSystem(dvc) + fs = RepoFileSystem(repo=dvc) with fs.open(PathInfo(tmp_dir) / "file", "r") as fobj: assert fobj.read() == "file" @@ -62,7 +62,7 @@ def test_open_in_history(tmp_dir, scm, dvc): if rev == "workspace": continue - fs = RepoFileSystem(dvc) + fs = RepoFileSystem(repo=dvc) with fs.open(PathInfo(tmp_dir) / "foo", "r") as fobj: assert fobj.read() == "foo" @@ -70,7 +70,7 @@ def test_open_in_history(tmp_dir, scm, dvc): def test_isdir_isfile(tmp_dir, dvc): tmp_dir.gen({"datafile": "data", "datadir": {"foo": "foo", "bar": "bar"}}) - fs = RepoFileSystem(dvc) + fs = RepoFileSystem(repo=dvc) assert fs.isdir("datadir") assert not fs.isfile("datadir") assert not fs.isdvc("datadir") @@ -95,7 +95,7 @@ def test_exists_isdir_isfile_dirty(tmp_dir, dvc): {"datafile": "data", "datadir": {"foo": "foo", "bar": "bar"}} ) - fs = RepoFileSystem(dvc) + fs = RepoFileSystem(repo=dvc) shutil.rmtree(tmp_dir / "datadir") (tmp_dir / "datafile").unlink() @@ -131,7 +131,7 @@ def test_isdir_mixed(tmp_dir, dvc): dvc.add(str(tmp_dir / "dir" / "foo")) - fs = RepoFileSystem(dvc) + fs = RepoFileSystem(repo=dvc) assert fs.isdir("dir") assert not fs.isfile("dir") @@ -161,7 +161,7 @@ def test_walk(tmp_dir, dvc, dvcfiles, extra_expected): ) dvc.add(str(tmp_dir / "dir"), recursive=True) tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}}) - fs = RepoFileSystem(dvc) + fs = RepoFileSystem(repo=dvc) expected = [ PathInfo("dir") / "subdir1", @@ -196,7 +196,7 @@ def test_walk_dirty(tmp_dir, dvc): tmp_dir.gen({"dir": {"bar": "bar", "subdir3": {"foo3": "foo3"}}}) (tmp_dir / "dir" / "foo").unlink() - fs = RepoFileSystem(dvc) + fs = RepoFileSystem(repo=dvc) expected = [ PathInfo("dir") / "subdir1", PathInfo("dir") / "subdir2", @@ -224,7 +224,7 @@ def test_walk_dirty_cached_dir(tmp_dir, scm, dvc): ) (tmp_dir / "data" / "foo").unlink() - fs = RepoFileSystem(dvc) + fs = RepoFileSystem(repo=dvc) data = PathInfo(tmp_dir) / "data" @@ -248,7 +248,7 @@ def test_walk_mixed_dir(tmp_dir, scm, dvc): ) tmp_dir.scm.commit("add dir") - fs = RepoFileSystem(dvc) + fs = RepoFileSystem(repo=dvc) expected = [ str(PathInfo("dir") / "foo"), @@ -269,7 +269,7 @@ def onerror(exc): raise exc tmp_dir.dvc_gen("foo", "foo") - fs = RepoFileSystem(dvc) + fs = RepoFileSystem(repo=dvc) # path does not exist for _ in fs.walk("dir"): @@ -290,7 +290,7 @@ def test_isdvc(tmp_dir, dvc): tmp_dir.gen({"foo": "foo", "bar": "bar", "dir": {"baz": "baz"}}) dvc.add("foo") dvc.add("dir") - fs = RepoFileSystem(dvc) + fs = RepoFileSystem(repo=dvc) assert fs.isdvc("foo") assert not fs.isdvc("bar") assert fs.isdvc("dir") @@ -325,7 +325,7 @@ def test_subrepos(tmp_dir, scm, dvc): ) dvc._reset() - fs = RepoFileSystem(dvc, subrepos=True) + fs = RepoFileSystem(repo=dvc, subrepos=True) def assert_fs_belongs_to_repo(ret_val): method = fs._get_repo @@ -403,7 +403,7 @@ def test_subrepo_walk(tmp_dir, scm, dvc, dvcfiles, extra_expected): # using fs that does not have dvcignore dvc._reset() - fs = RepoFileSystem(dvc) + fs = RepoFileSystem(repo=dvc) expected = [ PathInfo("dir") / "repo", PathInfo("dir") / "repo.txt", @@ -448,7 +448,7 @@ def test_repo_fs_no_subrepos(tmp_dir, dvc, scm): # using fs that does not have dvcignore dvc._reset() - fs = RepoFileSystem(dvc) + fs = RepoFileSystem(repo=dvc) expected = [ tmp_dir / ".dvcignore", tmp_dir / ".gitignore", @@ -481,7 +481,7 @@ def test_repo_fs_no_subrepos(tmp_dir, dvc, scm): def test_get_hash_cached_file(tmp_dir, dvc, mocker): tmp_dir.dvc_gen({"foo": "foo"}) - fs = RepoFileSystem(dvc) + fs = RepoFileSystem(repo=dvc) expected = "acbd18db4cc2f85cedef654fccc4a4d8" assert fs.info(PathInfo(tmp_dir) / "foo").get("md5") is None assert stage( @@ -495,7 +495,7 @@ def test_get_hash_cached_dir(tmp_dir, dvc, mocker): tmp_dir.dvc_gen( {"dir": {"foo": "foo", "bar": "bar", "subdir": {"data": "data"}}} ) - fs = RepoFileSystem(dvc) + fs = RepoFileSystem(repo=dvc) expected = "8761c4e9acad696bee718615e23e22db.dir" assert fs.info(PathInfo(tmp_dir) / "dir").get("md5") is None assert stage( @@ -513,7 +513,7 @@ def test_get_hash_cached_granular(tmp_dir, dvc, mocker): tmp_dir.dvc_gen( {"dir": {"foo": "foo", "bar": "bar", "subdir": {"data": "data"}}} ) - fs = RepoFileSystem(dvc) + fs = RepoFileSystem(repo=dvc) subdir = PathInfo(tmp_dir) / "dir" / "subdir" assert fs.info(subdir).get("md5") is None assert stage(dvc.odb.local, subdir, fs, "md5").hash_info == HashInfo( @@ -541,7 +541,7 @@ def test_get_hash_mixed_dir(tmp_dir, scm, dvc): ) tmp_dir.scm.commit("add dir") - fs = RepoFileSystem(dvc) + fs = RepoFileSystem(repo=dvc) actual = stage( dvc.odb.local, PathInfo(tmp_dir) / "dir", fs, "md5" ).hash_info @@ -553,7 +553,7 @@ def test_get_hash_dirty_file(tmp_dir, dvc): tmp_dir.dvc_gen("file", "file") (tmp_dir / "file").write_text("something") - fs = RepoFileSystem(dvc) + fs = RepoFileSystem(repo=dvc) assert fs.info(PathInfo(tmp_dir) / "file").get("md5") is None actual = stage( dvc.odb.local, PathInfo(tmp_dir) / "file", fs, "md5" @@ -577,7 +577,7 @@ def test_get_hash_dirty_dir(tmp_dir, dvc): tmp_dir.dvc_gen({"dir": {"foo": "foo", "bar": "bar"}}) (tmp_dir / "dir" / "baz").write_text("baz") - fs = RepoFileSystem(dvc) + fs = RepoFileSystem(repo=dvc) actual = stage( dvc.odb.local, PathInfo(tmp_dir) / "dir", fs, "md5" ).hash_info @@ -631,7 +631,7 @@ def dvc_structure(suffix): expected[str(tmp_dir / "subrepo1")].add("subrepo3") actual = {} - fs = RepoFileSystem(dvc) + fs = RepoFileSystem(repo=dvc) for root, dirs, files in fs.walk( str(tmp_dir), ignore_subrepos=not traverse_subrepos ): diff --git a/tests/unit/fs/test_s3.py b/tests/unit/fs/test_s3.py index ad9c11ef6b..ec323753bf 100644 --- a/tests/unit/fs/test_s3.py +++ b/tests/unit/fs/test_s3.py @@ -28,7 +28,7 @@ def grants(): def test_init(dvc): config = {"url": url} - fs = S3FileSystem(dvc, config) + fs = S3FileSystem(**config) assert fs.path_info == url @@ -37,7 +37,7 @@ def test_verify_ssl_default_param(dvc): config = { "url": url, } - fs = S3FileSystem(dvc, config) + fs = S3FileSystem(**config) assert fs.fs_args["client_kwargs"]["verify"] @@ -47,12 +47,12 @@ def test_s3_config_credentialpath(dvc, monkeypatch): monkeypatch.setattr(os, "environ", environment) config = {"url": url, "credentialpath": "somewhere"} - S3FileSystem(dvc, config) + S3FileSystem(**config) assert environment["AWS_SHARED_CREDENTIALS_FILE"] == "somewhere" environment.clear() config = {"url": url, "configpath": "somewhere"} - S3FileSystem(dvc, config) + S3FileSystem(**config) assert environment["AWS_CONFIG_FILE"] == "somewhere" environment.clear() @@ -61,7 +61,7 @@ def test_s3_config_credentialpath(dvc, monkeypatch): "credentialpath": "somewhere", "configpath": "elsewhere", } - S3FileSystem(dvc, config) + S3FileSystem(**config) assert environment["AWS_SHARED_CREDENTIALS_FILE"] == "somewhere" assert environment["AWS_CONFIG_FILE"] == "elsewhere" environment.clear() @@ -69,7 +69,7 @@ def test_s3_config_credentialpath(dvc, monkeypatch): def test_ssl_verify_bool_param(dvc): config = {"url": url, "ssl_verify": False} - fs = S3FileSystem(dvc, config) + fs = S3FileSystem(**config) assert fs.fs_args["client_kwargs"]["verify"] == config["ssl_verify"] @@ -82,7 +82,7 @@ def test_grants(dvc): "grant_write_acp": "id=write-acp-permission-id", "grant_full_control": "id=full-control-permission-id", } - fs = S3FileSystem(dvc, config) + fs = S3FileSystem(**config) extra_args = fs.fs_args["s3_additional_kwargs"] assert ( @@ -99,23 +99,20 @@ def test_grants_mutually_exclusive_acl_error(dvc, grants): config = {"url": url, "acl": "public-read", grant_option: grant_value} with pytest.raises(ConfigError): - S3FileSystem(dvc, config) + S3FileSystem(**config) def test_sse_kms_key_id(dvc): - fs = S3FileSystem(dvc, {"url": url, "sse_kms_key_id": "key"}) + fs = S3FileSystem(url=url, sse_kms_key_id="key") assert fs.fs_args["s3_additional_kwargs"]["SSEKMSKeyId"] == "key" def test_key_id_and_secret(dvc): fs = S3FileSystem( - dvc, - { - "url": url, - "access_key_id": key_id, - "secret_access_key": key_secret, - "session_token": session_token, - }, + url=url, + access_key_id=key_id, + secret_access_key=key_secret, + session_token=session_token, ) assert fs.fs_args["key"] == key_id assert fs.fs_args["secret"] == key_secret @@ -154,7 +151,7 @@ def test_s3_aws_config(tmp_dir, dvc, s3, monkeypatch): m.setenv(var, str(tmp_dir)) # Fresh import to see the effects of changing HOME variable s3_mod = importlib.reload(sys.modules[S3FileSystem.__module__]) - fs = s3_mod.S3FileSystem(dvc, s3.config) + fs = s3_mod.S3FileSystem(**s3.config) importlib.reload(sys.modules[S3FileSystem.__module__]) @@ -190,7 +187,7 @@ def test_s3_aws_config_different_profile(tmp_dir, dvc, s3, monkeypatch): ) monkeypatch.setenv("AWS_CONFIG_FILE", config_file) - fs = S3FileSystem(dvc, {**s3.config, "profile": "dev"}) + fs = S3FileSystem(profile="dev", **s3.config) s3_config = fs.fs_args["config_kwargs"]["s3"] assert s3_config["addressing_style"] == "virtual" diff --git a/tests/unit/fs/test_tree.py b/tests/unit/fs/test_tree.py index 1613367ebf..14fcfd0c4e 100644 --- a/tests/unit/fs/test_tree.py +++ b/tests/unit/fs/test_tree.py @@ -16,9 +16,12 @@ def test_get_cloud_fs(tmp_dir, dvc): first = base / "first" second = first / "second" - assert get_cloud_fs(dvc, name="base").path_info == base - assert get_cloud_fs(dvc, name="first").path_info == first - assert get_cloud_fs(dvc, name="second").path_info == second + cls, config = get_cloud_fs(dvc, name="base") + assert cls(**config).path_info == base + cls, config = get_cloud_fs(dvc, name="first") + assert cls(**config).path_info == first + cls, config = get_cloud_fs(dvc, name="second") + assert cls(**config).path_info == second def test_get_cloud_fs_validate(tmp_dir, dvc): @@ -36,10 +39,10 @@ def test_get_cloud_fs_validate(tmp_dir, dvc): default=False, ) - assert get_cloud_fs(dvc, name="base").config == { + assert get_cloud_fs(dvc, name="base")[1] == { "url": "ssh://example.com/path" } - assert get_cloud_fs(dvc, name="first").config == { + assert get_cloud_fs(dvc, name="first")[1] == { "url": "ssh://example.com/path/first", "type": ["symlink"], } diff --git a/tests/unit/objects/db/test_local.py b/tests/unit/objects/db/test_local.py index 20badab1bc..bde91601ec 100644 --- a/tests/unit/objects/db/test_local.py +++ b/tests/unit/objects/db/test_local.py @@ -15,7 +15,7 @@ def test_status_download_optimization(mocker, dvc): And the desired files to fetch are already on the local cache, Don't check the existence of the desired files on the remote cache """ - odb = LocalObjectDB(LocalFileSystem(dvc, {})) + odb = LocalObjectDB(LocalFileSystem()) infos = NamedCache() infos.add("local", "acbd18db4cc2f85cedef654fccc4a4d8", "foo") diff --git a/tests/unit/remote/ssh/test_ssh.py b/tests/unit/remote/ssh/test_ssh.py index fe865833c7..f853986ea0 100644 --- a/tests/unit/remote/ssh/test_ssh.py +++ b/tests/unit/remote/ssh/test_ssh.py @@ -19,20 +19,20 @@ def test_url(dvc): url = f"ssh://{user}@{host}:{port}{path}" config = {"url": url} - fs = SSHFileSystem(dvc, config) + fs = SSHFileSystem(**config) assert fs.path_info == url # SCP-like URL ssh://[user@]host.xz:/absolute/path url = f"ssh://{user}@{host}:{path}" config = {"url": url} - fs = SSHFileSystem(dvc, config) + fs = SSHFileSystem(**config) assert fs.path_info == url def test_no_path(dvc): config = {"url": "ssh://127.0.0.1"} - fs = SSHFileSystem(dvc, config) + fs = SSHFileSystem(**config) assert fs.path_info.path == "" @@ -66,7 +66,7 @@ def test_no_path(dvc): def test_ssh_host_override_from_config( mock_file, mock_exists, dvc, config, expected_host ): - fs = SSHFileSystem(dvc, config) + fs = SSHFileSystem(**config) mock_exists.assert_called_with(SSHFileSystem.ssh_config_filename()) mock_file.assert_called_with(SSHFileSystem.ssh_config_filename()) @@ -94,7 +94,7 @@ def test_ssh_host_override_from_config( read_data=mock_ssh_config, ) def test_ssh_user(mock_file, mock_exists, dvc, config, expected_user): - fs = SSHFileSystem(dvc, config) + fs = SSHFileSystem(**config) mock_exists.assert_called_with(SSHFileSystem.ssh_config_filename()) mock_file.assert_called_with(SSHFileSystem.ssh_config_filename()) @@ -119,7 +119,7 @@ def test_ssh_user(mock_file, mock_exists, dvc, config, expected_user): read_data=mock_ssh_config, ) def test_ssh_port(mock_file, mock_exists, dvc, config, expected_port): - fs = SSHFileSystem(dvc, config) + fs = SSHFileSystem(**config) mock_exists.assert_called_with(SSHFileSystem.ssh_config_filename()) mock_file.assert_called_with(SSHFileSystem.ssh_config_filename()) @@ -154,7 +154,7 @@ def test_ssh_port(mock_file, mock_exists, dvc, config, expected_port): read_data=mock_ssh_config, ) def test_ssh_keyfile(mock_file, mock_exists, dvc, config, expected_keyfile): - fs = SSHFileSystem(dvc, config) + fs = SSHFileSystem(**config) mock_exists.assert_called_with(SSHFileSystem.ssh_config_filename()) mock_file.assert_called_with(SSHFileSystem.ssh_config_filename()) @@ -176,7 +176,7 @@ def test_ssh_keyfile(mock_file, mock_exists, dvc, config, expected_keyfile): read_data=mock_ssh_config, ) def test_ssh_gss_auth(mock_file, mock_exists, dvc, config, expected_gss_auth): - fs = SSHFileSystem(dvc, config) + fs = SSHFileSystem(**config) mock_exists.assert_called_with(SSHFileSystem.ssh_config_filename()) mock_file.assert_called_with(SSHFileSystem.ssh_config_filename()) @@ -201,7 +201,7 @@ def test_ssh_gss_auth(mock_file, mock_exists, dvc, config, expected_gss_auth): def test_ssh_allow_agent( mock_file, mock_exists, dvc, config, expected_allow_agent ): - fs = SSHFileSystem(dvc, config) + fs = SSHFileSystem(**config) mock_exists.assert_called_with(SSHFileSystem.ssh_config_filename()) mock_file.assert_called_with(SSHFileSystem.ssh_config_filename()) @@ -209,7 +209,7 @@ def test_ssh_allow_agent( def test_hardlink_optimization(dvc, tmp_dir, ssh): - fs = SSHFileSystem(dvc, ssh.config) + fs = SSHFileSystem(**ssh.config) from_info = fs.path_info / "empty" to_info = fs.path_info / "link" diff --git a/tests/unit/remote/test_base.py b/tests/unit/remote/test_base.py index eb5a833ee9..d9ca539bc2 100644 --- a/tests/unit/remote/test_base.py +++ b/tests/unit/remote/test_base.py @@ -31,13 +31,13 @@ def test_cmd_error(dvc): side_effect=RemoteCmdError("base", cmd, ret, err), ): with pytest.raises(RemoteCmdError): - BaseFileSystem(dvc, config).remove("file") + BaseFileSystem(**config).remove("file") @mock.patch.object(ObjectDB, "list_hashes_traverse") @mock.patch.object(ObjectDB, "list_hashes_exists") def test_hashes_exist(object_exists, traverse, dvc): - odb = ObjectDB(BaseFileSystem(dvc, {})) + odb = ObjectDB(BaseFileSystem()) # remote does not support traverse odb.fs.CAN_TRAVERSE = False @@ -90,7 +90,7 @@ def test_hashes_exist(object_exists, traverse, dvc): ObjectDB, "_path_to_hash", side_effect=lambda x: x, ) def test_list_hashes_traverse(_path_to_hash, list_hashes, dvc): - odb = ObjectDB(BaseFileSystem(dvc, {})) + odb = ObjectDB(BaseFileSystem()) odb.fs.path_info = PathInfo("foo") # parallel traverse @@ -115,7 +115,7 @@ def test_list_hashes_traverse(_path_to_hash, list_hashes, dvc): def test_list_hashes(dvc): - odb = ObjectDB(BaseFileSystem(dvc, {})) + odb = ObjectDB(BaseFileSystem()) odb.fs.path_info = PathInfo("foo") with mock.patch.object( @@ -126,7 +126,7 @@ def test_list_hashes(dvc): def test_list_paths(dvc): - odb = ObjectDB(BaseFileSystem(dvc, {})) + odb = ObjectDB(BaseFileSystem()) odb.fs.path_info = PathInfo("foo") with mock.patch.object(odb.fs, "walk_files", return_value=[]) as walk_mock: diff --git a/tests/unit/remote/test_gdrive.py b/tests/unit/remote/test_gdrive.py index b5c5a69335..4fe02eb8da 100644 --- a/tests/unit/remote/test_gdrive.py +++ b/tests/unit/remote/test_gdrive.py @@ -15,11 +15,15 @@ class TestRemoteGDrive: } def test_init(self, dvc): - fs = GDriveFileSystem(dvc, self.CONFIG) + fs = GDriveFileSystem( + gdrive_credentials_tmp_dir=dvc.tmp_dir, **self.CONFIG + ) assert str(fs.path_info) == self.CONFIG["url"] def test_drive(self, dvc, monkeypatch): - fs = GDriveFileSystem(dvc, self.CONFIG) + fs = GDriveFileSystem( + gdrive_credentials_tmp_dir=dvc.tmp_dir, **self.CONFIG + ) monkeypatch.setenv( GDriveFileSystem.GDRIVE_CREDENTIALS_DATA, USER_CREDS_TOKEN_REFRESH_ERROR, @@ -28,7 +32,9 @@ def test_drive(self, dvc, monkeypatch): assert fs._drive monkeypatch.setenv(GDriveFileSystem.GDRIVE_CREDENTIALS_DATA, "") - fs = GDriveFileSystem(dvc, self.CONFIG) + fs = GDriveFileSystem( + gdrive_credentials_tmp_dir=dvc.tmp_dir, **self.CONFIG + ) monkeypatch.setenv( GDriveFileSystem.GDRIVE_CREDENTIALS_DATA, USER_CREDS_MISSED_KEY_ERROR, diff --git a/tests/unit/remote/test_http.py b/tests/unit/remote/test_http.py index 7b76284fd9..c9f5d19eb6 100644 --- a/tests/unit/remote/test_http.py +++ b/tests/unit/remote/test_http.py @@ -9,7 +9,7 @@ def test_download_fails_on_error_code(dvc, http): - fs = HTTPFileSystem(dvc, http.config) + fs = HTTPFileSystem(**http.config) with pytest.raises(HTTPError): fs._download(http / "missing.txt", "missing.txt") @@ -23,7 +23,7 @@ def test_public_auth_method(dvc): "password": "", } - fs = HTTPFileSystem(dvc, config) + fs = HTTPFileSystem(**config) assert fs._auth_method() is None @@ -42,7 +42,7 @@ def test_basic_auth_method(dvc): "password": password, } - fs = HTTPFileSystem(dvc, config) + fs = HTTPFileSystem(**config) assert fs._auth_method() == auth assert isinstance(fs._auth_method(), HTTPBasicAuth) @@ -62,7 +62,7 @@ def test_digest_auth_method(dvc): "password": password, } - fs = HTTPFileSystem(dvc, config) + fs = HTTPFileSystem(**config) assert fs._auth_method() == auth assert isinstance(fs._auth_method(), HTTPDigestAuth) @@ -79,7 +79,7 @@ def test_custom_auth_method(dvc): "password": password, } - fs = HTTPFileSystem(dvc, config) + fs = HTTPFileSystem(**config) assert fs._auth_method() is None assert header in fs.headers @@ -92,7 +92,7 @@ def test_ssl_verify_is_enabled_by_default(dvc): "path_info": "file.html", } - fs = HTTPFileSystem(dvc, config) + fs = HTTPFileSystem(**config) assert fs._session.verify is True @@ -104,7 +104,7 @@ def test_ssl_verify_disable(dvc): "ssl_verify": False, } - fs = HTTPFileSystem(dvc, config) + fs = HTTPFileSystem(**config) assert fs._session.verify is False @@ -124,7 +124,7 @@ def test_http_method(dvc): "method": "PUT", } - fs = HTTPFileSystem(dvc, config) + fs = HTTPFileSystem(**config) assert fs._auth_method() == auth assert fs.method == "PUT" @@ -137,7 +137,7 @@ def test_exists(mocker): # on HEAD request failure. res.raw = io.StringIO("foo") - fs = HTTPFileSystem(None, {}) + fs = HTTPFileSystem() mocker.patch.object(fs, "request", return_value=res) url = URLInfo("https://example.org/file.txt") @@ -161,7 +161,7 @@ def test_content_length(mocker, headers, expected_size): res.headers.update(headers) res.status_code = 200 - fs = HTTPFileSystem(None, {}) + fs = HTTPFileSystem() mocker.patch.object(fs, "request", return_value=res) url = URLInfo("https://example.org/file.txt") diff --git a/tests/unit/remote/test_index.py b/tests/unit/remote/test_index.py index a76bc79375..0dfed0193c 100644 --- a/tests/unit/remote/test_index.py +++ b/tests/unit/remote/test_index.py @@ -8,7 +8,7 @@ @pytest.fixture(scope="function") def index(dvc): - idx = RemoteIndex(dvc, "foo") + idx = RemoteIndex(dvc.tmp_dir, "foo") idx.load() yield idx idx.dump() diff --git a/tests/unit/remote/test_oss.py b/tests/unit/remote/test_oss.py index e9fc9ba4c1..558fab00f2 100644 --- a/tests/unit/remote/test_oss.py +++ b/tests/unit/remote/test_oss.py @@ -15,7 +15,7 @@ def test_init(dvc): "oss_key_secret": key_secret, "oss_endpoint": endpoint, } - fs = OSSFileSystem(dvc, config) + fs = OSSFileSystem(**config) assert fs.path_info == url assert fs.endpoint == endpoint assert fs.key_id == key_id diff --git a/tests/unit/remote/test_remote.py b/tests/unit/remote/test_remote.py index c775115353..881aca6d84 100644 --- a/tests/unit/remote/test_remote.py +++ b/tests/unit/remote/test_remote.py @@ -12,7 +12,8 @@ def test_remote_with_hash_jobs(dvc): } dvc.config["core"]["checksum_jobs"] = 200 - fs = get_cloud_fs(dvc, name="with_hash_jobs") + cls, config = get_cloud_fs(dvc, name="with_hash_jobs") + fs = cls(**config) assert fs.hash_jobs == 100 @@ -23,7 +24,8 @@ def test_remote_with_jobs(dvc): } dvc.config["core"]["jobs"] = 200 - fs = get_cloud_fs(dvc, name="with_jobs") + cls, config = get_cloud_fs(dvc, name="with_jobs") + fs = cls(**config) assert fs.jobs == 100 @@ -31,21 +33,23 @@ def test_remote_without_hash_jobs(dvc): dvc.config["remote"]["without_hash_jobs"] = {"url": "s3://bucket/name"} dvc.config["core"]["checksum_jobs"] = 200 - fs = get_cloud_fs(dvc, name="without_hash_jobs") + cls, config = get_cloud_fs(dvc, name="without_hash_jobs") + fs = cls(**config) assert fs.hash_jobs == 200 def test_remote_without_hash_jobs_default(dvc): dvc.config["remote"]["without_hash_jobs"] = {"url": "s3://bucket/name"} - fs = get_cloud_fs(dvc, name="without_hash_jobs") + cls, config = get_cloud_fs(dvc, name="without_hash_jobs") + fs = cls(**config) assert fs.hash_jobs == fs.HASH_JOBS @pytest.mark.parametrize("fs_cls", [GSFileSystem, S3FileSystem]) def test_makedirs_not_create_for_top_level_path(fs_cls, dvc, mocker): url = f"{fs_cls.scheme}://bucket/" - fs = fs_cls(dvc, {"url": url}) + fs = fs_cls(url=url) mocked_client = mocker.PropertyMock() mocker.patch.object(fs_cls, "fs", mocked_client) diff --git a/tests/unit/remote/test_remote_tree.py b/tests/unit/remote/test_remote_tree.py index e68f9b1127..502594f88e 100644 --- a/tests/unit/remote/test_remote_tree.py +++ b/tests/unit/remote/test_remote_tree.py @@ -87,21 +87,25 @@ def test_walk_files(remote): assert list(remote.fs.walk_files(remote.fs.path_info / "data")) == files -@pytest.mark.parametrize("remote", [pytest.lazy_fixture("s3")], indirect=True) -def test_copy_preserve_etag_across_buckets(remote, dvc): - s3 = remote.fs +@pytest.mark.parametrize("cloud", [pytest.lazy_fixture("s3")]) +def test_copy_preserve_etag_across_buckets(cloud, dvc): + cloud.gen(FILE_WITH_CONTENTS) + rem = get_remote(dvc, **cloud.config) + s3 = rem.fs s3.fs.mkdir("another/") - another = S3FileSystem( - dvc, {**remote.fs.config, "url": "s3://another", "region": "us-east-1"} - ) + config = cloud.config.copy() + config["url"] = "s3://another" + config["region"] = "us-east-1" + + another = S3FileSystem(**config) - from_info = remote.fs.path_info / "foo" + from_info = rem.fs.path_info / "foo" to_info = another.path_info / "foo" - remote.fs.copy(from_info, to_info) + rem.fs.copy(from_info, to_info) - from_hash = remote.fs.info(from_info)["etag"] + from_hash = rem.fs.info(from_info)["etag"] to_hash = another.info(to_info)["etag"] assert from_hash == to_hash diff --git a/tests/unit/remote/test_slow_link_detection.py b/tests/unit/remote/test_slow_link_detection.py index 01491d9700..49343a8825 100644 --- a/tests/unit/remote/test_slow_link_detection.py +++ b/tests/unit/remote/test_slow_link_detection.py @@ -14,9 +14,8 @@ def timeout_immediately(monkeypatch): def make_remote(): def _make_remote(cache_type=None, should_warn=True): remote = mock.Mock() - remote.repo.config = { - "cache": {"type": cache_type, "slow_link_warning": should_warn} - } + remote.cache_types = cache_type + remote.slow_link_warning = should_warn return remote return _make_remote diff --git a/tests/unit/remote/test_webdav.py b/tests/unit/remote/test_webdav.py index 6584ed2240..bf9d3c4004 100644 --- a/tests/unit/remote/test_webdav.py +++ b/tests/unit/remote/test_webdav.py @@ -12,7 +12,7 @@ # Test minimum requiered configuration (url) def test_init(dvc): config = {"url": url} - fs = WebDAVFileSystem(dvc, config) + fs = WebDAVFileSystem(**config) assert fs.path_info == url @@ -22,7 +22,7 @@ def test_init(dvc): "config", [{"url": url, "user": user}, {"url": userurl}] ) def test_user(dvc, config): - fs = WebDAVFileSystem(dvc, config) + fs = WebDAVFileSystem(**config) assert fs.user == user @@ -30,7 +30,7 @@ def test_user(dvc, config): # Test username extraction from url def test_userurl(dvc): config = {"url": userurl} - fs = WebDAVFileSystem(dvc, config) + fs = WebDAVFileSystem(**config) assert fs.path_info == userurl assert fs.user == user @@ -40,6 +40,6 @@ def test_userurl(dvc): # test password from config def test_password(dvc): config = {"url": url, "user": user, "password": password} - fs = WebDAVFileSystem(dvc, config) + fs = WebDAVFileSystem(**config) assert fs.password == password diff --git a/tests/unit/remote/test_webhdfs.py b/tests/unit/remote/test_webhdfs.py index 3b9c2b7ae9..bef48f6098 100644 --- a/tests/unit/remote/test_webhdfs.py +++ b/tests/unit/remote/test_webhdfs.py @@ -16,7 +16,7 @@ def test_init(dvc): "user": user, } - fs = WebHDFSFileSystem(dvc, config) + fs = WebHDFSFileSystem(**config) assert fs.path_info == url assert fs.token == webhdfs_token assert fs.alias == webhdfs_alias diff --git a/tests/unit/test_context.py b/tests/unit/test_context.py index 8b61453870..ea8957504d 100644 --- a/tests/unit/test_context.py +++ b/tests/unit/test_context.py @@ -288,7 +288,7 @@ def test_track(tmp_dir): ], "dct": {"foo": "foo", "bar": "bar", "baz": "baz"}, } - fs = LocalFileSystem(None, config={}) + fs = LocalFileSystem() path = tmp_dir / "params.yaml" dump_yaml(path, d, fs) @@ -323,7 +323,7 @@ def test_track_from_multiple_files(tmp_dir): d1 = {"Train": {"us": {"lr": 10}}} d2 = {"Train": {"us": {"layers": 100}}} - fs = LocalFileSystem(None, config={}) + fs = LocalFileSystem() path1 = tmp_dir / "params.yaml" path2 = tmp_dir / "params2.yaml" dump_yaml(path1, d1, fs) diff --git a/tests/unit/utils/test_fs.py b/tests/unit/utils/test_fs.py index 9653d0e347..4865f159f1 100644 --- a/tests/unit/utils/test_fs.py +++ b/tests/unit/utils/test_fs.py @@ -28,7 +28,7 @@ class TestMtimeAndSize(TestDir): def test(self): - fs = LocalFileSystem(None, {"url": self.root_dir}) + fs = LocalFileSystem(url=self.root_dir) file_time, file_size = get_mtime_and_size(self.DATA, fs) dir_time, dir_size = get_mtime_and_size(self.DATA_DIR, fs) @@ -129,7 +129,7 @@ def test_path_object_and_str_are_valid_types_get_mtime_and_size(tmp_dir): tmp_dir.gen( {"dir": {"dir_file": "dir file content"}, "file": "file_content"} ) - fs = LocalFileSystem(None, {"url": os.fspath(tmp_dir)}) + fs = LocalFileSystem(url=os.fspath(tmp_dir)) time, size = get_mtime_and_size("dir", fs) object_time, object_size = get_mtime_and_size(PathInfo("dir"), fs) diff --git a/tests/unit/utils/test_stream.py b/tests/unit/utils/test_stream.py index 1a0d6d0638..439dcd7ff0 100644 --- a/tests/unit/utils/test_stream.py +++ b/tests/unit/utils/test_stream.py @@ -19,7 +19,7 @@ def test_hashed_stream_reader(tmp_dir): assert stream_reader.read(1) == b"o" assert stream_reader.tell() == 3 - hex_digest = file_md5(foo, LocalFileSystem(None, {})) + hex_digest = file_md5(foo, LocalFileSystem()) assert stream_reader.is_text_file assert hex_digest == stream_reader.hash_info.value @@ -43,6 +43,6 @@ def test_hashed_stream_reader_as_chunks(tmp_dir): assert stream_reader.tell() == actual_size == total_read - hex_digest = file_md5(foo, LocalFileSystem(None, {})) + hex_digest = file_md5(foo, LocalFileSystem()) assert not stream_reader.is_text_file assert hex_digest == stream_reader.hash_info.value diff --git a/tests/unit/utils/test_utils.py b/tests/unit/utils/test_utils.py index 558b6f5035..b86c9044d0 100644 --- a/tests/unit/utils/test_utils.py +++ b/tests/unit/utils/test_utils.py @@ -87,7 +87,7 @@ def test_fix_env_pyenv(path, orig): def test_file_md5(tmp_dir): tmp_dir.gen("foo", "foo content") - fs = LocalFileSystem(None, {}) + fs = LocalFileSystem() assert file_md5("foo", fs) == file_md5(PathInfo("foo"), fs)