Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 16 additions & 14 deletions dvc/repo/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@
from dvc.exceptions import OutputNotFoundError
from dvc.path_info import PathInfo
from dvc.remote.base import RemoteActionNotImplemented
from dvc.scm.tree import BaseTree
from dvc.tree.base import BaseRemoteTree
from dvc.utils import file_md5
from dvc.utils.fs import copy_fobj_to_file, makedirs

logger = logging.getLogger(__name__)


class DvcTree(BaseTree): # pylint:disable=abstract-method
class DvcTree(BaseRemoteTree): # pylint:disable=abstract-method
"""DVC repo tree.

Args:
Expand All @@ -27,7 +27,7 @@ class DvcTree(BaseTree): # pylint:disable=abstract-method
"""

def __init__(self, repo, fetch=False, stream=False):
self.repo = repo
super().__init__(repo, {"url": repo.root_dir})
self.fetch = fetch
self.stream = stream

Expand Down Expand Up @@ -101,14 +101,14 @@ def open(
cache_path = out.cache_path
return open(cache_path, mode=mode, encoding=encoding)

def exists(self, path):
def exists(self, path): # pylint: disable=arguments-differ
try:
self._find_outs(path, strict=False, recursive=True)
return True
except OutputNotFoundError:
return False

def isdir(self, path):
def isdir(self, path): # pylint: disable=arguments-differ
if not self.exists(path):
return False

Expand All @@ -134,7 +134,7 @@ def isdir(self, path):
except FileNotFoundError:
return True

def isfile(self, path):
def isfile(self, path): # pylint: disable=arguments-differ
if not self.exists(path):
return False

Expand Down Expand Up @@ -237,7 +237,7 @@ def get_file_hash(self, path_info):
return out.checksum


class RepoTree(BaseTree): # pylint:disable=abstract-method
class RepoTree(BaseRemoteTree): # pylint:disable=abstract-method
"""DVC + git-tracked files tree.

Args:
Expand All @@ -247,7 +247,7 @@ class RepoTree(BaseTree): # pylint:disable=abstract-method
"""

def __init__(self, repo, **kwargs):
self.repo = repo
super().__init__(repo, {"url": repo.root_dir})
if hasattr(repo, "dvc_dir"):
self.dvctree = DvcTree(repo, **kwargs)
else:
Expand All @@ -266,7 +266,9 @@ def stream(self):
return self.dvctree.stream
return False

def open(self, path, mode="r", encoding="utf-8", **kwargs):
def open(
self, path, mode="r", encoding="utf-8", **kwargs
): # pylint: disable=arguments-differ
if "b" in mode:
encoding = None

Expand All @@ -276,20 +278,20 @@ def open(self, path, mode="r", encoding="utf-8", **kwargs):
)
return self.repo.tree.open(path, mode=mode, encoding=encoding)

def exists(self, path):
def exists(self, path): # pylint: disable=arguments-differ
return self.repo.tree.exists(path) or (
self.dvctree and self.dvctree.exists(path)
)

def isdir(self, path):
def isdir(self, path): # pylint: disable=arguments-differ
return self.repo.tree.isdir(path) or (
self.dvctree and self.dvctree.isdir(path)
)

def isdvc(self, path, **kwargs):
return self.dvctree is not None and self.dvctree.isdvc(path, **kwargs)

def isfile(self, path):
def isfile(self, path): # pylint: disable=arguments-differ
return self.repo.tree.isfile(path) or (
self.dvctree and self.dvctree.isfile(path)
)
Expand Down Expand Up @@ -393,7 +395,7 @@ def walk(
repo_walk = self.repo.tree.walk(top, topdown=topdown)
yield from self._walk(dvc_walk, repo_walk, dvcfiles=dvcfiles)

def walk_files(self, top, **kwargs):
def walk_files(self, top, **kwargs): # pylint: disable=arguments-differ
for root, _, files in self.walk(top, **kwargs):
for fname in files:
yield PathInfo(root) / fname
Expand Down Expand Up @@ -437,5 +439,5 @@ def copytree(self, top, dest):
copy_fobj_to_file(fobj, dest_dir / fname)

@property
def hash_jobs(self):
def hash_jobs(self): # pylint: disable=invalid-overridden-method
return self.repo.tree.hash_jobs
23 changes: 16 additions & 7 deletions dvc/scm/git/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from funcy import cached_property

from dvc.exceptions import DvcException
from dvc.scm.tree import BaseTree
from dvc.tree.base import BaseRemoteTree
from dvc.utils import relpath

# see git-fast-import(1)
Expand All @@ -21,7 +21,7 @@ def _item_basename(item):
return os.path.basename(item.path)


class GitTree(BaseTree): # pylint:disable=abstract-method
class GitTree(BaseRemoteTree): # pylint:disable=abstract-method
"""Proxies the repo file access methods to Git objects"""

def __init__(self, git, rev, use_dvcignore=False, dvcignore_root=None):
Expand All @@ -31,6 +31,7 @@ def __init__(self, git, rev, use_dvcignore=False, dvcignore_root=None):
git (dvc.scm.Git):
branch:
"""
super().__init__(None, {})
self.git = git
self.rev = rev
self.use_dvcignore = use_dvcignore
Expand All @@ -52,7 +53,9 @@ def dvcignore(self):
self.use_dvcignore = True
return ret

def open(self, path, mode="r", encoding="utf-8"):
def open(
self, path, mode="r", encoding="utf-8"
): # pylint: disable=arguments-differ
Comment on lines +56 to +58
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These arguments differ will be addressed a bit later. We also need to stop setting mode and encoding like that, and instead do the same thing pathlib's Path does (we've started doing it in tests/remotes/ already).

assert mode in {"r", "rb"}

relative_path = relpath(path, self.git.working_dir)
Expand All @@ -73,23 +76,23 @@ def open(self, path, mode="r", encoding="utf-8"):
return io.BytesIO(data)
return io.StringIO(data.decode(encoding))

def exists(self, path):
def exists(self, path): # pylint: disable=arguments-differ
if self._git_object_by_path(path) is None:
return False

return not self.dvcignore.is_ignored_file(
path
) and not self.dvcignore.is_ignored_dir(path)

def isdir(self, path):
def isdir(self, path): # pylint: disable=arguments-differ
obj = self._git_object_by_path(path)
if obj is None:
return False
if obj.mode != GIT_MODE_DIR:
return False
return not self.dvcignore.is_ignored_dir(path)

def isfile(self, path):
def isfile(self, path): # pylint: disable=arguments-differ
obj = self._git_object_by_path(path)
if obj is None:
return False
Expand Down Expand Up @@ -214,7 +217,13 @@ def to_ctime(git_time):
)

@property
def hash_jobs(self):
def hash_jobs(self): # pylint: disable=invalid-overridden-method
# NOTE: gitpython is not threadsafe. See
# https://github.com/iterative/dvc/issues/4079
return 1

def walk_files(self, top): # pylint: disable=arguments-differ
for root, _, files in self.walk(top):
for file in files:
# NOTE: os.path.join is ~5.5 times slower
yield f"{root}{os.sep}{file}"
39 changes: 0 additions & 39 deletions dvc/scm/tree.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,3 @@
import os


class BaseTree:
"""Abstract class to represent access to files"""

@property
def tree_root(self):
pass

def open(self, path, mode="r", encoding="utf-8"):
"""Open file and return a stream."""

def exists(self, path):
"""Test whether a path exists."""

def isdir(self, path):
"""Return true if the pathname refers to an existing directory."""

def isfile(self, path):
"""Test whether a path is a regular file"""

def walk(self, top, topdown=True, onerror=None):
"""Directory tree generator.

See `os.walk` for the docs. Differences:
- no support for symlinks
"""

def walk_files(self, top):
for root, _, files in self.walk(top):
for file in files:
# NOTE: os.path.join is ~5.5 times slower
yield f"{root}{os.sep}{file}"

def makedirs(self, path, mode=0o777, exist_ok=True):
raise NotImplementedError


def is_working_tree(tree):
from dvc.tree.local import LocalRemoteTree

Expand Down