Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 134 additions & 0 deletions dvc/ignore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import os

from dulwich.ignore import match_pattern, read_ignore_patterns
from dvc.utils.compat import cast_bytes
from dvc.utils.fs import get_parent_dirs_up_to


class DvcIgnoreFileHandler(object):
def __init__(self, tree):
self.tree = tree

def read_patterns(self, path):
with self.tree.open(path, binary=True) as stream:
return self._read_patterns(stream)

def get_repo_root(self):
return self.tree.tree_root

def _read_patterns(self, binary_stream):
negate_patterns = []
patterns = []
for pattern in read_ignore_patterns(binary_stream):
if pattern.lstrip().startswith(b"!"):
negate_patterns.append(pattern)
else:
patterns.append(pattern)

return negate_patterns, patterns


class DvcIgnore(object):
DVCIGNORE_FILE = ".dvcignore"

def __call__(self, root, dirs, files):
raise NotImplementedError


class DvcIgnoreFromFile(DvcIgnore):
Comment thread
efiop marked this conversation as resolved.
Outdated
def __init__(self, ignore_file_path, ignore_handler):
self.ignore_file_path = ignore_file_path
self.dirname = os.path.normpath(os.path.dirname(ignore_file_path))
self.patterns = []
self.negate_patterns = []

self.negate_patterns, self.patterns = ignore_handler.read_patterns(
ignore_file_path
)

def __call__(self, root, dirs, files):
files = [f for f in files if not self.matches(root, f)]
dirs = [d for d in dirs if not self.matches(root, d)]

return dirs, files

def get_match(self, abs_path):
rel_path = os.path.relpath(abs_path, self.dirname)
if os.name == "nt":
rel_path = rel_path.replace("\\", "/")
rel_path = cast_bytes(rel_path, "utf-8")

for pattern in self.patterns:
Comment thread
efiop marked this conversation as resolved.
Outdated
if match_pattern(
rel_path, pattern
) and self._no_negate_pattern_matches(rel_path):
return (abs_path, pattern, self.ignore_file_path)
return None

def matches(self, dirname, basename):
if self.get_match(os.path.join(dirname, basename)):
return True
return False

def _no_negate_pattern_matches(self, path):
return all([not match_pattern(path, p) for p in self.negate_patterns])

def __hash__(self):
return hash(self.ignore_file_path)


class DvcIgnoreConstant(DvcIgnore):
def __init__(self, basename):
self.basename = basename
Comment thread
efiop marked this conversation as resolved.
Outdated


class DvcIgnoreDir(DvcIgnoreConstant):
def __call__(self, root, dirs, files):
dirs = [d for d in dirs if not d == self.basename]

return dirs, files


class DvcIgnoreFile(DvcIgnoreConstant):
def __call__(self, root, dirs, files):
files = [f for f in files if not f == self.basename]

return dirs, files


class DvcIgnoreFilter(object):
def __init__(self, wdir, ignore_file_handler=None):
self.ignores = [
DvcIgnoreDir(".git"),
DvcIgnoreDir(".hg"),
DvcIgnoreDir(".dvc"),
DvcIgnoreFile(".dvcignore"),
]

self.ignore_file_handler = ignore_file_handler
self._process_ignores_in_parent_dirs(wdir)

def _process_ignores_in_parent_dirs(self, wdir):
if self.ignore_file_handler:
wdir = os.path.normpath(os.path.abspath(wdir))
ignore_search_end_dir = self.ignore_file_handler.get_repo_root()
parent_dirs = get_parent_dirs_up_to(wdir, ignore_search_end_dir)
for d in parent_dirs:
self.update(d)

def update(self, wdir):
ignore_file_path = os.path.join(wdir, DvcIgnore.DVCIGNORE_FILE)
if os.path.exists(ignore_file_path):
file_ignore = DvcIgnoreFromFile(
ignore_file_path, ignore_handler=self.ignore_file_handler
)
self.ignores.append(file_ignore)

def __call__(self, root, dirs, files):
if self.ignore_file_handler:
self.update(root)

for ignore in self.ignores:
dirs, files = ignore(root, dirs, files)

return dirs, files
10 changes: 7 additions & 3 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
OutputNotFoundError,
TargetNotDirectoryError,
)

from dvc.ignore import DvcIgnoreFileHandler

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -58,7 +58,7 @@ def __init__(self, root_dir=None):

self.config = Config(self.dvc_dir)

self.tree = WorkingTree()
self.tree = WorkingTree(self.root_dir)

self.scm = SCM(self.root_dir, repo=self)
self.lock = Lock(self.dvc_dir)
Expand Down Expand Up @@ -390,7 +390,11 @@ def stages(self, from_directory=None, check_dag=True):

stages = []
outs = []
for root, dirs, files in self.tree.walk(from_directory):

ignore_file_handler = DvcIgnoreFileHandler(self.tree)
for root, dirs, files in self.tree.walk(
from_directory, ignore_file_handler=ignore_file_handler
):
for fname in files:
path = os.path.join(root, fname)
if not Stage.is_valid_filename(path):
Expand Down
2 changes: 1 addition & 1 deletion dvc/repo/brancher.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def brancher( # noqa: E302
if self.scm.is_dirty():
from dvc.scm.tree import WorkingTree

self.tree = WorkingTree()
self.tree = WorkingTree(self.root_dir)
yield "Working Tree"

if all_branches:
Expand Down
39 changes: 32 additions & 7 deletions dvc/scm/git/tree.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import errno
import os

from dvc.utils.compat import StringIO
from dvc.ignore import DvcIgnoreFilter
from dvc.utils.compat import StringIO, BytesIO

from dvc.scm.tree import BaseTree

Expand All @@ -24,7 +25,11 @@ def __init__(self, git, rev):
self.git = git
self.rev = rev

def open(self, path):
@property
def tree_root(self):
return self.git.working_dir

def open(self, path, binary=False):

relpath = os.path.relpath(path, self.git.working_dir)

Expand All @@ -39,7 +44,10 @@ def open(self, path):
# read it immediately, also it needs to be to decoded if we follow
# the `open()` behavior (since data_stream.read() returns bytes,
# and `open` with default "r" mode returns str)
return StringIO(obj.data_stream.read().decode("utf-8"))
data = obj.data_stream.read()
if binary:
return BytesIO(data)
return StringIO(data.decode("utf-8"))

def exists(self, path):
return self.git_object_by_path(path) is not None
Expand Down Expand Up @@ -81,8 +89,13 @@ def git_object_by_path(self, path):
tree = tree[i]
return tree

def _walk(self, tree, topdown=True):

def _walk(
self,
tree,
topdown=True,
ignore_file_handler=None,
dvc_ignore_filter=None,
):
dirs, nondirs = [], []
for i in tree:
if i.mode == GIT_MODE_DIR:
Expand All @@ -91,14 +104,26 @@ def _walk(self, tree, topdown=True):
nondirs.append(i.name)

if topdown:
if not dvc_ignore_filter:
dvc_ignore_filter = DvcIgnoreFilter(
tree.abspath, ignore_file_handler=ignore_file_handler
)
dirs, nondirs = dvc_ignore_filter(tree.path, dirs, nondirs)
yield os.path.normpath(tree.path), dirs, nondirs

for i in dirs:
for x in self._walk(tree[i], topdown=True):
for x in self._walk(
tree[i],
topdown=True,
ignore_file_handler=ignore_file_handler,
dvc_ignore_filter=dvc_ignore_filter,
):
yield x

if not topdown:
yield os.path.normpath(tree.path), dirs, nondirs

def walk(self, top, topdown=True):
def walk(self, top, topdown=True, ignore_file_handler=None):
"""Directory tree generator.

See `os.walk` for the docs. Differences:
Expand Down
31 changes: 23 additions & 8 deletions dvc/scm/tree.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
import os

from dvc.utils import dvc_walk
from dvc.utils.compat import open


class BaseTree(object):
"""Abstract class to represent access to files"""

def open(self, path):
@property
def tree_root(self):
pass

def open(self, path, binary=False):
"""Open file and return a stream."""

def exists(self, path):
Expand All @@ -18,7 +23,7 @@ def isdir(self, path):
def isfile(self, path):
"""Test whether a path is a regular file"""

def walk(self, top, topdown=True):
def walk(self, top, topdown=True, ignore_file_handler=None):
"""Directory tree generator.

See `os.walk` for the docs. Differences:
Expand All @@ -30,8 +35,17 @@ def walk(self, top, topdown=True):
class WorkingTree(BaseTree):
"""Proxies the repo file access methods to working tree files"""

def open(self, path):
def __init__(self, repo_root=os.getcwd()):
self.repo_root = repo_root

@property
def tree_root(self):
return self.repo_root

def open(self, path, binary=False):
"""Open file and return a stream."""
if binary:
return open(path, "rb")
return open(path, encoding="utf-8")

def exists(self, path):
Expand All @@ -46,7 +60,7 @@ def isfile(self, path):
"""Test whether a path is a regular file"""
return os.path.isfile(path)

def walk(self, top, topdown=True):
def walk(self, top, topdown=True, ignore_file_handler=None):
"""Directory tree generator.

See `os.walk` for the docs. Differences:
Expand All @@ -57,9 +71,10 @@ def walk(self, top, topdown=True):
def onerror(e):
raise e

for root, dirs, files in os.walk(
top, topdown=topdown, onerror=onerror
for root, dirs, files in dvc_walk(
Comment thread
pared marked this conversation as resolved.
Outdated
top,
topdown=topdown,
onerror=onerror,
ignore_file_handler=ignore_file_handler,
):
if topdown:
dirs[:] = [i for i in dirs if i not in (".git", ".hg", ".dvc")]
yield os.path.normpath(root), dirs, files
35 changes: 33 additions & 2 deletions dvc/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,39 @@ def load_stage_file_fobj(fobj, path):
raise StageFileCorruptedError(path)


def walk_files(directory):
for root, _, files in os.walk(str(directory)):
def dvc_walk(
top,
topdown=True,
onerror=None,
followlinks=False,
ignore_file_handler=None,
):
"""
Proxy for `os.walk` directory tree generator.
Utilizes DvcIgnoreFilter functionality.
"""
ignore_filter = None
if topdown:
from dvc.ignore import DvcIgnoreFilter

ignore_filter = DvcIgnoreFilter(
top, ignore_file_handler=ignore_file_handler
)

for root, dirs, files in os.walk(
top, topdown=topdown, onerror=onerror, followlinks=followlinks
):

if ignore_filter:
dirs[:], files[:] = ignore_filter(root, dirs, files)

yield root, dirs, files


def walk_files(directory, ignore_file_handler=None):
for root, _, files in dvc_walk(
str(directory), ignore_file_handler=ignore_file_handler
):
for f in files:
yield os.path.join(root, f)

Expand Down
3 changes: 2 additions & 1 deletion dvc/utils/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def _makedirs(name, mode=0o777, exist_ok=False):
if is_py2:
from urlparse import urlparse, urljoin # noqa: F401
from StringIO import StringIO # noqa: F401
from io import BytesIO # noqa: F401
from BaseHTTPServer import HTTPServer # noqa: F401
from SimpleHTTPServer import SimpleHTTPRequestHandler # noqa: F401
import ConfigParser # noqa: F401
Expand All @@ -111,7 +112,7 @@ def _makedirs(name, mode=0o777, exist_ok=False):
elif is_py3:
from os import makedirs # noqa: F401
from urllib.parse import urlparse, urljoin # noqa: F401
from io import StringIO # noqa: F401
from io import StringIO, BytesIO # noqa: F401
from http.server import ( # noqa: F401
HTTPServer, # noqa: F401
SimpleHTTPRequestHandler, # noqa: F401
Expand Down
Loading