Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
4539448
Decoupling dvcignore and fs
Apr 14, 2021
37d835b
Solve all of the arugments error.
Apr 15, 2021
e903ad6
add dvcignore to dvc list
Apr 15, 2021
ddabe4b
Solve some test_ignore cases
karajan1001 Apr 17, 2021
08f2af6
add dvcignore to checkout.
karajan1001 Apr 19, 2021
579df8b
Solve two tests
karajan1001 Apr 20, 2021
0a14199
Get rid of ignore_filter
karajan1001 Apr 21, 2021
5da4476
Add ignore examine to download
karajan1001 Apr 23, 2021
96f5c60
Solve a test issue
karajan1001 Apr 23, 2021
fb2baab
Solve dvcignore and subrepo problems
karajan1001 Apr 26, 2021
948e8f2
Solve sub repo error
karajan1001 Apr 26, 2021
9291261
Revert some changes
karajan1001 Apr 27, 2021
bed503d
pass all tests
karajan1001 Apr 27, 2021
ff3a647
Remove debug codes
karajan1001 Apr 27, 2021
ecb3def
Some final exams
karajan1001 Apr 28, 2021
3335f44
dvcignore trie tree only update when needed
karajan1001 Apr 30, 2021
0228218
reuse of the dir result in path walk.
karajan1001 Apr 30, 2021
efd422f
Some change after code review.
karajan1001 May 3, 2021
049576a
Recover something from false `--force` push
karajan1001 May 3, 2021
87fa570
add abspath trans to dvcginore
karajan1001 May 3, 2021
50c41d9
pass pylint
karajan1001 May 3, 2021
dcb6060
pylint related
karajan1001 May 3, 2021
550ed53
Update dvc/fs/repo.py
karajan1001 May 4, 2021
d2a39f6
Changes related to reviews.
karajan1001 May 4, 2021
e2c63b1
dvcignore call api change
karajan1001 May 6, 2021
0235086
Get rid of PathInfo in test_ignore.py
karajan1001 May 6, 2021
7b1813a
fs.walk follow the arguments of os.walk
karajan1001 May 6, 2021
264f791
Dvcignore as parameters
karajan1001 May 6, 2021
f8e0e59
pass dvcignore as arguments to checkouts
karajan1001 May 6, 2021
09fe320
Decoupling state and repo
karajan1001 May 6, 2021
d54edee
Update dvc/dvcfile.py
karajan1001 May 7, 2021
7a38d12
Update dvc/objects/stage.py
karajan1001 May 7, 2021
a55517a
Solve some small problems
karajan1001 May 10, 2021
6dab04b
Add tests to verify that dependency now not ignore
karajan1001 May 10, 2021
2cfc1a6
Update dvc/ignore.py
efiop May 10, 2021
583b366
Update dvc/fs/git.py
efiop May 10, 2021
2971123
Update dvc/repo/__init__.py
efiop May 10, 2021
17bb445
Some small improvements
karajan1001 May 11, 2021
7d394d4
pass black
karajan1001 May 11, 2021
d569ba1
Refactor is_ignored
karajan1001 May 11, 2021
adc3e87
Update dvc/output/base.py
efiop May 11, 2021
b2086ef
repofs: use dvcignore in exists/isdir/isfile
May 11, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 41 additions & 8 deletions dvc/checkout.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@
ConfirmRemoveError,
DvcException,
)
from dvc.ignore import DvcIgnoreFilter
from dvc.objects import check, load
from dvc.objects.errors import ObjectFormatError
from dvc.objects.stage import stage
from dvc.remote.slow_link_detection import ( # type: ignore[attr-defined]
slow_link_guard,
)
from dvc.types import Optional

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -175,8 +177,13 @@ def _checkout_file(
return modified


def _remove_redundant_files(path_info, fs, obj, cache, force):
existing_files = set(fs.walk_files(path_info))
def _remove_redundant_files(
path_info, fs, obj, cache, force, dvcignore: Optional[DvcIgnoreFilter],
):
if dvcignore:
existing_files = set(dvcignore.walk_files(fs, path_info))
else:
existing_files = set(fs.walk_files(path_info))

needed_files = {path_info.joinpath(*key) for key, _ in obj}
redundant_files = existing_files - needed_files
Expand All @@ -187,9 +194,16 @@ def _remove_redundant_files(path_info, fs, obj, cache, force):


def _checkout_dir(
path_info, fs, obj, cache, force, progress_callback=None, relink=False,
path_info,
fs,
obj,
cache,
force,
progress_callback=None,
relink=False,
dvcignore: Optional[DvcIgnoreFilter] = None,
):
modified = False, False
modified = False
# Create dir separately so that dir is created
# even if there are no files in it
if not fs.exists(path_info):
Expand All @@ -212,7 +226,10 @@ def _checkout_dir(
modified = True

modified = (
_remove_redundant_files(path_info, fs, obj, cache, force) or modified
_remove_redundant_files(
path_info, fs, obj, cache, force, dvcignore=dvcignore
)
or modified
)

fs.repo.state.save(path_info, fs, obj.hash_info)
Expand All @@ -229,14 +246,22 @@ def _checkout(
force=False,
progress_callback=None,
relink=False,
dvcignore: Optional[DvcIgnoreFilter] = None,
):
if not obj.hash_info.isdir:
ret = _checkout_file(
path_info, fs, obj, cache, force, progress_callback, relink
)
else:
ret = _checkout_dir(
path_info, fs, obj, cache, force, progress_callback, relink,
path_info,
fs,
obj,
cache,
force,
progress_callback,
relink,
dvcignore=dvcignore,
)

fs.repo.state.save_link(path_info, fs)
Expand All @@ -253,6 +278,7 @@ def checkout(
progress_callback=None,
relink=False,
quiet=False,
dvcignore: Optional[DvcIgnoreFilter] = None,
):
if path_info.scheme not in ["local", cache.fs.scheme]:
raise NotImplementedError
Expand All @@ -269,7 +295,7 @@ def checkout(
failed = path_info

elif not relink and not _changed(path_info, fs, obj, cache):
logger.trace("Data '%s' didn't change.", path_info)
logger.trace("Data '%s' didn't change.", path_info) # type: ignore
Comment thread
efiop marked this conversation as resolved.
Outdated
skip = True
else:
try:
Expand All @@ -296,5 +322,12 @@ def checkout(
logger.debug("Checking out '%s' with cache '%s'.", path_info, obj)

return _checkout(
path_info, fs, obj, cache, force, progress_callback, relink,
path_info,
fs,
obj,
cache,
force,
progress_callback,
relink,
dvcignore=dvcignore,
)
2 changes: 1 addition & 1 deletion dvc/command/check_ignore.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
class CmdCheckIgnore(CmdBase):
def __init__(self, args):
super().__init__(args)
self.ignore_filter = self.repo.fs.dvcignore
self.ignore_filter = self.repo.dvcignore

def _show_results(self, result):
if not result.match and not self.args.non_matching:
Expand Down
2 changes: 1 addition & 1 deletion dvc/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def _load_config(self, level):
filename = self.files[level]
fs = self._get_fs(level)

if fs.exists(filename, use_dvcignore=False):
if fs.exists(filename):
with fs.open(filename) as fobj:
conf_obj = ConfigObj(fobj)
else:
Expand Down
10 changes: 2 additions & 8 deletions dvc/dependency/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ def _get_hash(self, locked=True):
path_info,
repo.repo_fs,
self.repo.odb.local.fs.PARAM_CHECKSUM,
follow_subrepos=False,
).hash_info

def workspace_status(self):
Expand Down Expand Up @@ -95,16 +94,11 @@ def download(self, to, jobs=None):
except (NoOutputOrStageError, NoRemoteError):
pass
obj = stage(
odb,
path_info,
repo.repo_fs,
odb.fs.PARAM_CHECKSUM,
jobs=jobs,
follow_subrepos=False,
odb, path_info, repo.repo_fs, odb.fs.PARAM_CHECKSUM, jobs=jobs,
)
save(odb, obj, jobs=jobs)

checkout(to.path_info, to.fs, obj, odb)
checkout(to.path_info, to.fs, obj, odb, dvcignore=None)

def update(self, rev=None):
if rev:
Expand Down
9 changes: 6 additions & 3 deletions dvc/dvcfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ def relpath(self):
return relpath(self.path)

def exists(self):
return self.repo.fs.exists(self.path)
is_ignored = self.repo.dvcignore.is_ignored_file(self.path)
return self.repo.fs.exists(self.path) and not is_ignored
Comment on lines +127 to +128
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Love how we now use is_ignored_file instead of relying on implicit is_ignored(includes is_ignored_dir which we don't care about and can be harmful) inside fs ❤️


def _is_git_ignored(self):
return is_git_ignored(self.repo, self.path)
Expand All @@ -144,8 +145,10 @@ def _load(self):
# 3. path doesn't represent a regular file
# 4. when the file is git ignored
if not self.exists():
is_ignored = self.repo.fs.exists(self.path, use_dvcignore=False)
raise StageFileDoesNotExistError(self.path, dvc_ignored=is_ignored)
dvc_ignored = self.repo.dvcignore.is_ignored_file(self.path)
raise StageFileDoesNotExistError(
self.path, dvc_ignored=dvc_ignored
)

self._verify_filename()
if not self.repo.fs.isfile(self.path):
Expand Down
2 changes: 1 addition & 1 deletion dvc/fs/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class AzureAuthError(DvcException):
pass


class AzureFileSystem(FSSpecWrapper):
class AzureFileSystem(FSSpecWrapper): # pylint:disable=abstract-method
Copy link
Copy Markdown
Contributor

@efiop efiop May 11, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a less dangerous way: you could add walk to FSSpecWrapper.

EDIT: on the other hand we've been using it in other FileSystems for awhile 🙁

scheme = Schemes.AZURE
PATH_CLS = CloudURLInfo
PARAM_CHECKSUM = "etag"
Expand Down
9 changes: 7 additions & 2 deletions dvc/fs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from dvc.exceptions import DvcException
from dvc.path_info import URLInfo
from dvc.progress import Tqdm
from dvc.scheme import Schemes
from dvc.utils import tmp_fname
from dvc.utils.fs import makedirs, move
from dvc.utils.http import open_url
Expand Down Expand Up @@ -92,7 +93,6 @@ def get_missing_deps(cls):
return missing

def _check_requires(self):
from ..scheme import Schemes
from ..utils import format_link
from ..utils.pkg import PKG

Expand Down Expand Up @@ -139,7 +139,7 @@ def open(self, path_info, mode: str = "r", encoding: str = None, **kwargs):

raise RemoteActionNotImplemented("open", self.scheme)

def exists(self, path_info, use_dvcignore=True) -> bool:
def exists(self, path_info) -> bool:
raise NotImplementedError

# pylint: disable=unused-argument
Expand All @@ -166,6 +166,11 @@ def iscopy(self, path_info):
"""Check if this file is an independent copy."""
return False # We can't be sure by default

def walk(self, top, topdown=True, onerror=None, **kwargs):
"""Return a generator with (root, dirs, files).
"""
raise NotImplementedError

def walk_files(self, path_info, **kwargs):
"""Return a generator with `PathInfo`s to all the files.

Expand Down
5 changes: 4 additions & 1 deletion dvc/fs/fsspec_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import shutil
from functools import lru_cache

from funcy import cached_property

from dvc.progress import Tqdm

from .base import BaseFileSystem
Expand All @@ -14,6 +16,7 @@ def __init__(self, repo, config):
self.fs_args = {"skip_instance_cache": True}
self.fs_args.update(self._prepare_credentials(config))

@cached_property
def fs(self):
raise NotImplementedError

Expand Down Expand Up @@ -88,7 +91,7 @@ def open(
def copy(self, from_info, to_info):
self.fs.copy(self._with_bucket(from_info), self._with_bucket(to_info))

def exists(self, path_info, use_dvcignore=False):
def exists(self, path_info) -> bool:
return self.fs.exists(self._with_bucket(path_info))

def ls(self, path_info, detail=False):
Expand Down
4 changes: 2 additions & 2 deletions dvc/fs/gdrive.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def __init__(self, url):
self._spath = re.sub("/{2,}", "/", self._spath.rstrip("/"))


class GDriveFileSystem(BaseFileSystem):
class GDriveFileSystem(BaseFileSystem): # pylint:disable=abstract-method
scheme = Schemes.GDRIVE
PATH_CLS = GDriveURLInfo
PARAM_CHECKSUM = "checksum"
Expand Down Expand Up @@ -517,7 +517,7 @@ def _get_item_id(self, path_info, create=False, use_cache=True, hint=None):
assert not create
raise FileMissingError(path_info, hint)

def exists(self, path_info, use_dvcignore=True):
def exists(self, path_info) -> bool:
try:
self._get_item_id(path_info)
except FileMissingError:
Expand Down
Loading