Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 20 additions & 9 deletions dvc/ignore.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@

from pathspec.patterns import GitWildMatchPattern
from pathspec.util import normalize_file
from pygtrie import StringTrie

from dvc.path_info import PathInfo
from dvc.pathspec_math import PatternInfo, merge_patterns
from dvc.system import System
from dvc.utils import relpath
from dvc.utils.collections import PathStringTrie

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -156,7 +156,7 @@ class DvcIgnoreFilterNoop:
def __init__(self, tree, root_dir):
pass

def __call__(self, root, dirs, files):
def __call__(self, root, dirs, files, **kwargs):
return dirs, files

def is_ignored_dir(self, _):
Expand All @@ -183,10 +183,11 @@ def __init__(self, tree, root_dir):

self.tree = tree
self.root_dir = root_dir
self.ignores_trie_tree = StringTrie(separator=os.sep)
self.ignores_trie_tree = PathStringTrie()
self.ignores_trie_tree[root_dir] = DvcIgnorePatterns(
default_ignore_patterns, root_dir
)
self._ignored_subrepos = PathStringTrie()
self._update(self.root_dir)

def _update(self, dirname):
Expand Down Expand Up @@ -222,7 +223,10 @@ def _update(self, dirname):
def _update_sub_repo(self, root, dirs):
for d in dirs:
if self._is_dvc_repo(root, d):
Comment thread
skshetry marked this conversation as resolved.
Outdated
new_pattern = DvcIgnorePatterns(["/{}/".format(d)], root)
self._ignored_subrepos[root] = self._ignored_subrepos.get(
root, set()
) | {d}
new_pattern = DvcIgnorePatterns([f"/{d}/"], root)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know that fstrings are better than format() , but there wasn't really a reason to change it in this PR πŸ™‚

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had to change that before when I was experimenting with it. And, f-string is easy to read and understand (here, speed does not matter but could have been another reason), and therefore, I kept it.

old_pattern = self.ignores_trie_tree.longest_prefix(root).value
if old_pattern:
self.ignores_trie_tree[root] = DvcIgnorePatterns(
Expand All @@ -236,12 +240,13 @@ def _update_sub_repo(self, root, dirs):
else:
self.ignores_trie_tree[root] = new_pattern

def __call__(self, root, dirs, files):
def __call__(self, root, dirs, files, ignore_subrepos=True):
ignore_pattern = self._get_trie_pattern(root)
if ignore_pattern:
return ignore_pattern(root, dirs, files)
else:
return dirs, files
dirs, files = ignore_pattern(root, dirs, files)
if not ignore_subrepos:
dirs.extend(self._ignored_subrepos.get(root, []))
return dirs, files

def _get_trie_pattern(self, dirname):
ignore_pattern = self.ignores_trie_tree.get(dirname)
Expand Down Expand Up @@ -277,8 +282,14 @@ def _is_ignored(self, path, is_dir=False):
else:
return False

def is_ignored_dir(self, path):
def _is_subrepo(self, path):
dirname, basename = os.path.split(os.path.normpath(path))
return basename in self._ignored_subrepos.get(dirname, set())

def is_ignored_dir(self, path, ignore_subrepos=True):
path = os.path.abspath(path)
if not ignore_subrepos:
return not self._is_subrepo(path)
if path == self.root_dir:
return False

Expand Down
20 changes: 16 additions & 4 deletions dvc/tree/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,15 @@ def exists(
path
) and not self.dvcignore.is_ignored_dir(path)

def isdir(self, path): # pylint: disable=arguments-differ
def isdir(
self, path, use_dvcignore=True
): # pylint: disable=arguments-differ
obj = self._git_object_by_path(path)
if obj is None:
return False
if obj.mode != GIT_MODE_DIR:
return False
return not self.dvcignore.is_ignored_dir(path)
return not (use_dvcignore and self.dvcignore.is_ignored_dir(path))

def isfile(self, path): # pylint: disable=arguments-differ
obj = self._git_object_by_path(path)
Expand Down Expand Up @@ -156,7 +158,14 @@ def _walk(self, tree, topdown=True):
if not topdown:
yield os.path.normpath(tree.abspath), dirs, nondirs

def walk(self, top, topdown=True, onerror=None, use_dvcignore=True):
def walk(
self,
top,
topdown=True,
onerror=None,
use_dvcignore=True,
ignore_subrepos=True,
):
"""Directory tree generator.

See `os.walk` for the docs. Differences:
Expand All @@ -176,7 +185,10 @@ def walk(self, top, topdown=True, onerror=None, use_dvcignore=True):
for root, dirs, files in self._walk(tree, topdown=topdown):
if use_dvcignore:
dirs[:], files[:] = self.dvcignore(
os.path.abspath(root), dirs, files
os.path.abspath(root),
dirs,
files,
ignore_subrepos=ignore_subrepos,
)
yield root, dirs, files

Expand Down
21 changes: 16 additions & 5 deletions dvc/tree/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,18 +86,26 @@ def isfile(self, path_info):

return not self.dvcignore.is_ignored_file(path_info)

def isdir(self, path_info):
def isdir(
self, path_info, use_dvcignore=True
): # pylint: disable=arguments-differ
if not os.path.isdir(path_info):
return False

return not self.dvcignore.is_ignored_dir(path_info)
return not (use_dvcignore and self.dvcignore.is_ignored_dir(path_info))

def iscopy(self, path_info):
return not (
System.is_symlink(path_info) or System.is_hardlink(path_info)
)

def walk(self, top, topdown=True, onerror=None, use_dvcignore=True):
def walk(
self,
top,
topdown=True,
onerror=None,
use_dvcignore=True,
ignore_subrepos=True,
):
"""Directory tree generator.

See `os.walk` for the docs. Differences:
Expand All @@ -108,7 +116,10 @@ def walk(self, top, topdown=True, onerror=None, use_dvcignore=True):
):
if use_dvcignore:
dirs[:], files[:] = self.dvcignore(
os.path.abspath(root), dirs, files
os.path.abspath(root),
dirs,
files,
ignore_subrepos=ignore_subrepos,
)

yield os.path.normpath(root), dirs, files
Expand Down
11 changes: 11 additions & 0 deletions dvc/utils/collections.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
import os
from collections.abc import Mapping

from pygtrie import StringTrie as _StringTrie


class PathStringTrie(_StringTrie):
"""Trie based on platform-dependent separator for pathname components."""

def __init__(self, *args, **kwargs):
kwargs["separator"] = os.sep
super().__init__(*args, **kwargs)


def apply_diff(src, dest):
"""Recursively apply changes from src to dest.
Expand Down
18 changes: 12 additions & 6 deletions tests/dir_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@


class TmpDir(pathlib.Path):
scheme = "local"

def __new__(cls, *args, **kwargs):
if cls is TmpDir:
cls = ( # pylint: disable=self-cls-assignment
Expand All @@ -87,7 +89,7 @@ def __new__(cls, *args, **kwargs):
self._init()
return self

def init(self, *, scm=False, dvc=False):
def init(self, *, scm=False, dvc=False, subdir=False):
from dvc.repo import Repo
from dvc.scm.git import Git

Expand All @@ -100,7 +102,9 @@ def init(self, *, scm=False, dvc=False):
git_init(str_path)
if dvc:
self.dvc = Repo.init(
str_path, no_scm=not scm and not hasattr(self, "scm")
str_path,
no_scm=not scm and not hasattr(self, "scm"),
subdir=subdir,
)
if scm:
self.scm = self.dvc.scm if hasattr(self, "dvc") else Git(str_path)
Expand All @@ -123,10 +127,10 @@ def gen(self, struct, text=""):
if isinstance(struct, (str, bytes, pathlib.PurePath)):
struct = {struct: text}

self._gen(struct)
return struct.keys()
return self._gen(struct)

def _gen(self, struct, prefix=None):
paths = []
for name, contents in struct.items():
path = (prefix or self) / name

Expand All @@ -141,6 +145,8 @@ def _gen(self, struct, prefix=None):
path.write_bytes(contents)
else:
path.write_text(contents, encoding="utf-8")
paths.append(path)
return paths

def dvc_gen(self, struct, text="", commit=None):
paths = self.gen(struct, text)
Expand Down Expand Up @@ -249,10 +255,10 @@ class PosixTmpDir(TmpDir, pathlib.PurePosixPath):

@pytest.fixture(scope="session")
def make_tmp_dir(tmp_path_factory, request):
def make(name, *, scm=False, dvc=False):
def make(name, *, scm=False, dvc=False, **kwargs):
path = tmp_path_factory.mktemp(name) if isinstance(name, str) else name
new_dir = TmpDir(path)
new_dir.init(scm=scm, dvc=dvc)
new_dir.init(scm=scm, dvc=dvc, **kwargs)
request.addfinalizer(new_dir.close)
return new_dir

Expand Down
22 changes: 22 additions & 0 deletions tests/func/test_ignore.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,28 @@ def test_ignore_subrepo(tmp_dir, scm, dvc):
assert subrepo.tree.exists(PathInfo(subrepo_dir / "foo"))


def test_ignore_resurface_subrepo(tmp_dir, scm, dvc):
tmp_dir.dvc_gen({"foo": "foo"}, commit="add foo")
subrepo_dir = tmp_dir / "subdir"
subrepo_dir.mkdir()
with subrepo_dir.chdir():
Repo.init(subdir=True)

dvc.tree.__dict__.pop("dvcignore", None)

dirs = ["subdir"]
files = ["foo"]
assert dvc.tree.dvcignore(os.fspath(tmp_dir), dirs, files) == ([], files)
assert dvc.tree.dvcignore(
os.fspath(tmp_dir), dirs, files, ignore_subrepos=False
) == (dirs, files)

assert dvc.tree.dvcignore.is_ignored_dir(os.fspath(subrepo_dir))
assert not dvc.tree.dvcignore.is_ignored_dir(
os.fspath(subrepo_dir), ignore_subrepos=False
)


def test_ignore_blank_line(tmp_dir, dvc):
tmp_dir.gen({"dir": {"ignored": "text", "other": "text2"}})
tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "foo\n\ndir/ignored")
Expand Down
23 changes: 23 additions & 0 deletions tests/func/test_tree.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from operator import itemgetter
from os.path import join

from dvc.path_info import PathInfo
Expand Down Expand Up @@ -245,3 +246,25 @@ def test_cleantree_subrepo(tmp_dir, dvc, scm, monkeypatch):
assert subrepo.tree.isfile(path / "foo")
assert subrepo.tree.exists(path / "dir")
assert subrepo.tree.isdir(path / "dir")


def test_walk_dont_ignore_subrepos(tmp_dir, scm, dvc):
tmp_dir.dvc_gen({"foo": "foo"}, commit="add foo")
subrepo_dir = tmp_dir / "subdir"
subrepo_dir.mkdir()
with subrepo_dir.chdir():
Repo.init(subdir=True)
scm.add(["subdir"])
scm.commit("Add subrepo")

dvc_tree = dvc.tree
scm_tree = scm.get_tree("HEAD", use_dvcignore=True)
path = os.fspath(tmp_dir)
get_dirs = itemgetter(1)

assert get_dirs(next(dvc_tree.walk(path))) == []
assert get_dirs(next(scm_tree.walk(path))) == []

kw = dict(ignore_subrepos=False)
assert get_dirs(next(dvc_tree.walk(path, **kw))) == ["subdir"]
assert get_dirs(next(scm_tree.walk(path, **kw))) == ["subdir"]