diff --git a/dvc/config_schema.py b/dvc/config_schema.py index 13590cf97f..e87eee784d 100644 --- a/dvc/config_schema.py +++ b/dvc/config_schema.py @@ -123,6 +123,7 @@ class RelPath(str): Optional("experiments"): Bool, # obsoleted Optional("check_update", default=True): Bool, "machine": Lower, + "excludesfile": str, }, "cache": { "local": str, diff --git a/dvc/ignore.py b/dvc/ignore.py index b26d62ff12..2b4f05ca92 100644 --- a/dvc/ignore.py +++ b/dvc/ignore.py @@ -8,6 +8,7 @@ from pathspec.util import normalize_file from pygtrie import Trie +from dvc.config import Config from dvc.fs import AnyFSPath, FileSystem, Schemes, localfs from dvc.pathspec_math import PatternInfo, merge_patterns from dvc.types import List, Optional @@ -17,6 +18,7 @@ class DvcIgnore: DVCIGNORE_FILE = ".dvcignore" + GLOBAL_IGNORE_FILE = "ignore" def __call__(self, root, dirs, files): raise NotImplementedError @@ -25,10 +27,12 @@ def __call__(self, root, dirs, files): class DvcIgnorePatterns(DvcIgnore): def __init__(self, pattern_list, dirname, sep): if pattern_list: - if isinstance(pattern_list[0], str): - pattern_list = [ - PatternInfo(pattern, "") for pattern in pattern_list - ] + pattern_list = [ + PatternInfo(pattern, "") + if isinstance(pattern, str) + else pattern + for pattern in pattern_list + ] self.sep = sep self.pattern_list = pattern_list @@ -172,14 +176,19 @@ def __init__(self, fs, root_dir): self.root_dir = root_dir self.ignores_trie_fs = Trie() self._ignores_trie_subrepos = Trie() + self.config = Config() key = self._get_key(root_dir) + + default_ignore_patterns.extend(self._get_global_ignore_patterns()) + self.ignores_trie_fs[key] = DvcIgnorePatterns( default_ignore_patterns, root_dir, fs.sep, ) self._ignores_trie_subrepos[key] = self.ignores_trie_fs[key] + self._update( self.root_dir, self._ignores_trie_subrepos, @@ -193,6 +202,32 @@ def __init__(self, fs, root_dir): ignore_subrepos=True, ) + def _get_global_ignore_file(self): + core_config = self.config.get("core", {}) + config_ignore_file = core_config.get("excludesfile", None) + + if config_ignore_file: + return config_ignore_file + + for level in ["global", "system"]: + ignore_file_path_at_level = self.fs.path.join( + Config.get_dir(level), DvcIgnore.GLOBAL_IGNORE_FILE + ) + if self.fs.exists(ignore_file_path_at_level): + return ignore_file_path_at_level + + return None + + def _get_global_ignore_patterns(self): + global_ignore_file = self._get_global_ignore_file() + + if global_ignore_file and self.fs.exists(global_ignore_file): + return DvcIgnorePatterns.from_file( + global_ignore_file, self.fs, global_ignore_file + ).pattern_list + + return [] + def _get_key(self, path): parts = self.fs.path.relparts(path, self.root_dir) if parts == (".",): diff --git a/tests/conftest.py b/tests/conftest.py index 4c46ba448c..7b28562288 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -181,10 +181,15 @@ def mocked_webbrowser_open(mocker): @pytest.fixture(autouse=True) -def isolate(tmp_path_factory, monkeypatch) -> None: +def isolate(tmp_path_factory, monkeypatch, mocker) -> None: path = tmp_path_factory.mktemp("mock") home_dir = path / "home" home_dir.mkdir() + root_dir = path / "root" + root_dir.mkdir() + + def config_dir_generator(dirname): + return lambda x, y: str(dirname / x / (y or "")) if sys.platform == "win32": home_drive, home_path = os.path.splitdrive(home_dir) @@ -199,8 +204,37 @@ def isolate(tmp_path_factory, monkeypatch) -> None: path = home_dir / "AppData" / sub_path path.mkdir(parents=True) monkeypatch.setenv(env_var, os.fspath(path)) + + # NOTE(meshde): The env vars set above don't seem to affect the + # response of appdirs.site_config_dir or appdirs.user_config_dir + # on Windows and these continue to return the actual respective + # config dirs on Windows machines + # + # Hence, resorting to mocking these functions entirely + mocker.patch( + "appdirs.site_config_dir", + side_effect=config_dir_generator(root_dir), + ) + mocker.patch( + "appdirs.user_config_dir", + side_effect=config_dir_generator(home_dir), + ) + elif sys.platform == "darwin": + monkeypatch.setenv("HOME", str(home_dir)) + + # NOTE(meshde): appdirs.site_config_dir statically returns + # /Library/Application Support/ on macos leaving us no way to + # manipulate the response of this function using env variables + # + # Hence, resorting to mocking this function entirely + mocker.patch( + "appdirs.site_config_dir", + side_effect=config_dir_generator(root_dir), + ) else: monkeypatch.setenv("HOME", str(home_dir)) + monkeypatch.setenv("XDG_CONFIG_HOME", str(home_dir)) + monkeypatch.setenv("XDG_CONFIG_DIRS", str(root_dir)) monkeypatch.setenv("GIT_CONFIG_NOSYSTEM", "1") contents = b""" diff --git a/tests/func/test_check_ignore.py b/tests/func/test_check_ignore.py index 6e377af494..2cdf814596 100644 --- a/tests/func/test_check_ignore.py +++ b/tests/func/test_check_ignore.py @@ -1,8 +1,10 @@ import os +from pathlib import Path import pytest from dvc.cli import main +from dvc.config import Config from dvc.ignore import DvcIgnore @@ -98,6 +100,101 @@ def test_check_ignore_sub_repo(tmp_dir, dvc, capsys): ) +def test_check_excludesfile(tmp_dir, dvc, capsys): + excludesfile = Path.home() / DvcIgnore.DVCIGNORE_FILE + + with dvc.config.edit() as conf: + conf["core"]["excludesfile"] = str(excludesfile) + tmp_dir.gen( + { + "dir": { + "ignored_in_repo_root": "ignored_in_repo_root", + "ignored_in_excludesfile": "ignored_in_excludesfile", + } + } + ) + tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "ignored_in_repo_root") + (excludesfile).write_text("ignored_in_excludesfile", encoding="utf-8") + + assert main(["check-ignore", "-d", "ignored_in_repo_root"]) == 0 + output, _ = capsys.readouterr() + assert ( + output + == f"{DvcIgnore.DVCIGNORE_FILE}:1:ignored_in_repo_root\t" + + "ignored_in_repo_root\n" + ) + + assert main(["check-ignore", "-d", "ignored_in_excludesfile"]) == 0 + output, _ = capsys.readouterr() + assert ( + output + == f"{excludesfile}:1:ignored_in_excludesfile" + + "\tignored_in_excludesfile\n" + ) + + +def test_check_global_dvcignore(tmp_path, tmp_dir, dvc, capsys): + tmp_dir.gen( + { + "dir": { + "ignored_in_repo_root": "ignored_in_repo_root", + "ignored_in_global": "ignored_in_global", + } + } + ) + tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "ignored_in_repo_root") + global_path = Path(Config.get_dir("global")) + global_path.mkdir(parents=True, exist_ok=True) + global_dvcignore = global_path / DvcIgnore.GLOBAL_IGNORE_FILE + global_dvcignore.write_text("ignored_in_global", encoding="utf-8") + + assert main(["check-ignore", "-d", "ignored_in_repo_root"]) == 0 + output, _ = capsys.readouterr() + assert ( + output + == f"{DvcIgnore.DVCIGNORE_FILE}:1:ignored_in_repo_root\t" + + "ignored_in_repo_root\n" + ) + + assert main(["check-ignore", "-d", "ignored_in_global"]) == 0 + output, _ = capsys.readouterr() + assert ( + output + == f"{global_dvcignore}:1:ignored_in_global\tignored_in_global\n" + ) + + +def test_check_system_dvcignore(tmp_path, tmp_dir, dvc, capsys): + tmp_dir.gen( + { + "dir": { + "ignored_in_repo_root": "ignored_in_repo_root", + "ignored_in_system": "ignored_in_system", + } + } + ) + tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "ignored_in_repo_root") + system_path = Path(Config.get_dir("system")) + system_dvcignore = system_path / DvcIgnore.GLOBAL_IGNORE_FILE + system_path.mkdir(parents=True, exist_ok=True) + system_dvcignore.write_text("ignored_in_system", encoding="utf-8") + + assert main(["check-ignore", "-d", "ignored_in_repo_root"]) == 0 + output, _ = capsys.readouterr() + assert ( + output + == f"{DvcIgnore.DVCIGNORE_FILE}:1:ignored_in_repo_root\t" + + "ignored_in_repo_root\n" + ) + + assert main(["check-ignore", "-d", "ignored_in_system"]) == 0 + output, _ = capsys.readouterr() + assert ( + output + == f"{system_dvcignore}:1:ignored_in_system\tignored_in_system\n" + ) + + def test_check_sub_dir_ignore_file(tmp_dir, dvc, capsys): tmp_dir.gen( { diff --git a/tests/func/test_ignore.py b/tests/func/test_ignore.py index adfb450779..26004efac6 100644 --- a/tests/func/test_ignore.py +++ b/tests/func/test_ignore.py @@ -4,6 +4,7 @@ import pytest +from dvc.config import Config from dvc.ignore import DvcIgnore, DvcIgnorePatterns from dvc.output import OutputIsIgnoredError from dvc.pathspec_math import PatternInfo, merge_patterns @@ -22,6 +23,22 @@ def walk_files(dvc, *args): yield from dvc.dvcignore.find(*args) +@pytest.fixture +def global_dvcignore(): + global_path = Path(Config.get_dir("global")) + global_path.mkdir(parents=True, exist_ok=True) + global_dvcignore_path = global_path / DvcIgnore.GLOBAL_IGNORE_FILE + global_dvcignore_path.write_text("ignored_in_global", encoding="utf-8") + + +@pytest.fixture +def system_dvcignore(): + system_path = Path(Config.get_dir("system")) + system_path.mkdir(parents=True, exist_ok=True) + system_dvcignore_path = system_path / DvcIgnore.GLOBAL_IGNORE_FILE + system_dvcignore_path.write_text("ignored_in_system", encoding="utf-8") + + @pytest.mark.parametrize("filename", ["ignored", "ั‚ะตัั‚"]) def test_ignore(tmp_dir, dvc, filename): tmp_dir.gen({"dir": {filename: filename, "other": "text2"}}) @@ -36,6 +53,98 @@ def test_ignore(tmp_dir, dvc, filename): } +@pytest.mark.parametrize("file_exists", [True, False]) +@pytest.mark.usefixtures("global_dvcignore", "system_dvcignore") +def test_ignore_from_excludesfile(tmp_path, tmp_dir, dvc, file_exists): + # NOTE(meshde): if core.excludesfile is defined in the config + # then the ignore patterns from the global or system .dvcignore + # should not be considered irrespective of whether or not the path + # given to code.excludesfile exists in the file system + excludesfile = Path.home() / (DvcIgnore.DVCIGNORE_FILE + "_custom") + + with dvc.config.edit() as conf: + conf["core"]["excludesfile"] = str(excludesfile) + + tmp_dir.gen( + { + "dir": { + "ignored_in_repo_root": "ignored_in_repo_root", + "ignored_in_excludesfile": "ignored_in_excludesfile", + "ignored_in_global": "ignored_in_global", + "ignored_in_system": "ignored_in_system", + } + } + ) + tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "ignored_in_repo_root") + if file_exists: + (excludesfile).write_text("ignored_in_excludesfile", encoding="utf-8") + + dvc._reset() + + result = set(walk_files(dvc, dvc.fs, tmp_dir)) + + files_to_be_ignored = ["ignored_in_repo_root"] + files_not_to_be_ignored = ["ignored_in_global", "ignored_in_system"] + + if file_exists: + files_to_be_ignored.append("ignored_in_excludesfile") + else: + files_not_to_be_ignored.append("ignored_in_excludesfile") + + for ignored_file in files_to_be_ignored: + assert (tmp_dir / "dir" / ignored_file).fs_path not in result + + for file in files_not_to_be_ignored: + assert (tmp_dir / "dir" / file).fs_path in result + + +@pytest.mark.usefixtures("global_dvcignore", "system_dvcignore") +def test_ignore_from_global_dvcignore(tmp_path, tmp_dir, dvc): + # NOTE(meshde): if core.excludesfile is not defined in the config + # and global .dvcignore exists then consider read patterns from this file + # and do not consider patterns from system .dvcignore + tmp_dir.gen( + { + "dir": { + "ignored_in_repo_root": "ignored_in_repo_root", + "ignored_in_global": "ignored_in_global", + "ignored_in_system": "ignored_in_system", + } + } + ) + tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "ignored_in_repo_root") + dvc._reset() + + result = set(walk_files(dvc, dvc.fs, tmp_dir)) + + for ignored_file in ["ignored_in_repo_root", "ignored_in_global"]: + assert (tmp_dir / "dir" / ignored_file).fs_path not in result + + assert (tmp_dir / "dir" / "ignored_in_system").fs_path in result + + +@pytest.mark.usefixtures("system_dvcignore") +def test_ignore_from_system_dvcignore(tmp_path, tmp_dir, dvc): + # NOTE(meshde): if core.excludesfile is not defined in the config and + # global .dvcignore does not exist but system .dvcignore exists then + # consider ignore patterns from this file + tmp_dir.gen( + { + "dir": { + "ignored_in_repo_root": "ignored_in_repo_root", + "ignored_in_system": "ignored_in_system", + } + } + ) + tmp_dir.gen(DvcIgnore.DVCIGNORE_FILE, "ignored_in_repo_root") + dvc._reset() + + result = set(walk_files(dvc, dvc.fs, tmp_dir)) + + for ignored_file in ["ignored_in_repo_root", "ignored_in_system"]: + assert (tmp_dir / "dir" / ignored_file).fs_path not in result + + def test_rename_ignored_file(tmp_dir, dvc): tmp_dir.gen({"dir": {"ignored": "...", "other": "text"}}) diff --git a/tests/unit/test_ignore.py b/tests/unit/test_ignore.py index c5f4b5a3da..c3663f8d01 100644 --- a/tests/unit/test_ignore.py +++ b/tests/unit/test_ignore.py @@ -4,6 +4,7 @@ import pytest from dvc.ignore import DvcIgnorePatterns +from dvc.pathspec_math import PatternInfo def mock_dvcignore(dvcignore_path, patterns): @@ -213,3 +214,35 @@ def test_should_ignore_dir(omit_dir, sub_dir): assert set(new_dirs) == {"dir1", "dir2"} assert set(new_files) == {"file1", "file2", omit_dir} + + +def test_allow_string_pattern_info_mix_input(): + pattern1 = DvcIgnorePatterns( + [ + "pattern1_string", + PatternInfo("pattern1_info", "pattern1_info_pattern_info"), + ], + "root", + os.sep, + ) + + assert PatternInfo("pattern1_string", "") in pattern1.pattern_list + assert ( + PatternInfo("pattern1_info", "pattern1_info_pattern_info") + in pattern1.pattern_list + ) + + pattern2 = DvcIgnorePatterns( + [ + PatternInfo("pattern2_info", "pattern2_info_pattern_info"), + "pattern2_string", + ], + "root", + os.sep, + ) + + assert PatternInfo("pattern2_string", "") in pattern2.pattern_list + assert ( + PatternInfo("pattern2_info", "pattern2_info_pattern_info") + in pattern2.pattern_list + )