From 68c7366ef04400c4559a069d1c70341ecc0e53af Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Tue, 19 Nov 2019 05:34:19 +0545 Subject: [PATCH 1/4] utils: implement `path_isin` to compare child and parent paths --- dvc/utils/fs.py | 11 +++++++++ tests/unit/utils/test_fs.py | 48 ++++++++++++++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/dvc/utils/fs.py b/dvc/utils/fs.py index c73a23563e..cb15088eaa 100644 --- a/dvc/utils/fs.py +++ b/dvc/utils/fs.py @@ -134,3 +134,14 @@ def remove(path): except OSError as exc: if exc.errno != errno.ENOENT: raise + + +def path_isin(child, parent): + """Check if given `child` path is inside `parent`.""" + + def normalize_path(path): + return os.path.normpath(fspath(path)) + + parent = os.path.join(normalize_path(parent), "") + child = normalize_path(child) + return child != parent and child.startswith(parent) diff --git a/tests/unit/utils/test_fs.py b/tests/unit/utils/test_fs.py index a8faabb0c1..7b136df7b5 100644 --- a/tests/unit/utils/test_fs.py +++ b/tests/unit/utils/test_fs.py @@ -16,7 +16,7 @@ from dvc.utils.fs import get_inode from dvc.utils.fs import get_mtime_and_size from dvc.utils.fs import move -from dvc.utils.fs import remove +from dvc.utils.fs import path_isin, remove from tests.basic_env import TestDir from tests.utils import spy @@ -164,3 +164,49 @@ def test_remove(repo_dir): remove(path_info) assert not os.path.isfile(path_info.fspath) + + +@pytest.mark.parametrize( + "parent", + [ + (os.path.join("path", "to", "")), + (os.path.join("path", "to")), + (os.path.join("path", "")), + (os.path.join("path")), + ], +) +def test_path_isin_positive(parent): + child = os.path.join("path", "to", "folder") + assert path_isin(child, parent) + + +def test_path_isin_on_same_path(): + path = os.path.join("path", "to", "folder") + path2 = os.path.join(path, "") + + assert not path_isin(path, path) + assert not path_isin(path, path2) + assert not path_isin(path2, path) + assert not path_isin(path2, path2) + + +def test_path_isin_on_common_substring_path(): + path1 = os.path.join("path", "to", "folder1") + path2 = os.path.join("path", "to", "folder") + + assert not path_isin(path1, path2) + + +def test_path_isin_accepts_pathinfo(): + child = os.path.join("path", "to", "folder") + parent = PathInfo(child) / ".." + + assert path_isin(child, parent) + assert not path_isin(parent, child) + + +def test_path_isin_with_absolute_path(): + parent = os.path.abspath("path") + child = os.path.join(parent, "to", "folder") + + assert path_isin(child, parent) From f07a70a052e89ce201ce4822833d31602b6326f2 Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Tue, 19 Nov 2019 05:36:11 +0545 Subject: [PATCH 2/4] get/import: display path relative to temporary repo --- dvc/output/local.py | 11 ++++++++++- tests/unit/output/test_local.py | 32 +++++++++++++++++++++++++++++++- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/dvc/output/local.py b/dvc/output/local.py index ecdeafdae3..f698a123a9 100644 --- a/dvc/output/local.py +++ b/dvc/output/local.py @@ -7,9 +7,11 @@ from dvc.istextfile import istextfile from dvc.output.base import OutputBase from dvc.remote.local import RemoteLOCAL +from dvc.utils import relpath from dvc.utils.compat import fspath_py35 from dvc.utils.compat import str from dvc.utils.compat import urlparse +from dvc.utils.fs import path_isin logger = logging.getLogger(__name__) @@ -38,7 +40,14 @@ def _parse_path(self, remote, path): return self.REMOTE.path_cls(abs_p) def __str__(self): - return str(self.path_info) + if not self.is_in_repo: + return str(self.def_path) + + cur_dir = os.getcwd() + if path_isin(cur_dir, self.repo.root_dir): + return relpath(self.path_info, cur_dir) + + return relpath(self.path_info, self.repo.root_dir) @property def fspath(self): diff --git a/tests/unit/output/test_local.py b/tests/unit/output/test_local.py index 0999472c6f..82d671cf73 100644 --- a/tests/unit/output/test_local.py +++ b/tests/unit/output/test_local.py @@ -1,8 +1,10 @@ -from mock import patch +import os +from mock import patch from dvc.output import OutputLOCAL from dvc.remote.local import RemoteLOCAL from dvc.stage import Stage +from dvc.utils import relpath from tests.basic_env import TestDvc @@ -21,6 +23,34 @@ def test_save_missing(self): o.save() +def test_str_workdir_outside_repo(erepo): + stage = Stage(erepo.dvc) + output = OutputLOCAL(stage, "path", cache=False) + + assert relpath("path", erepo.dvc.root_dir) == str(output) + + +def test_str_workdir_inside_repo(dvc_repo): + stage = Stage(dvc_repo) + output = OutputLOCAL(stage, "path", cache=False) + + assert "path" == str(output) + + stage = Stage(dvc_repo, wdir="some_folder") + output = OutputLOCAL(stage, "path", cache=False) + + assert os.path.join("some_folder", "path") == str(output) + + +def test_str_on_absolute_path(dvc_repo): + stage = Stage(dvc_repo) + + path = os.path.abspath(os.path.join("path", "to", "file")) + output = OutputLOCAL(stage, path, cache=False) + + assert path == str(output) + + class TestGetFilesNumber(TestDvc): def _get_output(self): stage = Stage(self.dvc) From 58624e0230866a7b55833c2ad66bcb29da28685a Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Tue, 19 Nov 2019 21:52:33 +0545 Subject: [PATCH 3/4] utils/fs: use fspath_py35 instead of fspath --- dvc/utils/fs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/utils/fs.py b/dvc/utils/fs.py index cb15088eaa..1b51576164 100644 --- a/dvc/utils/fs.py +++ b/dvc/utils/fs.py @@ -140,7 +140,7 @@ def path_isin(child, parent): """Check if given `child` path is inside `parent`.""" def normalize_path(path): - return os.path.normpath(fspath(path)) + return os.path.normpath(fspath_py35(path)) parent = os.path.join(normalize_path(parent), "") child = normalize_path(child) From 83c8ab5c26a2564998e0ded8283596f264f1e3f7 Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Tue, 19 Nov 2019 21:54:23 +0545 Subject: [PATCH 4/4] test: Refactor tests --- tests/unit/utils/test_fs.py | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/tests/unit/utils/test_fs.py b/tests/unit/utils/test_fs.py index 7b136df7b5..8092129076 100644 --- a/tests/unit/utils/test_fs.py +++ b/tests/unit/utils/test_fs.py @@ -166,28 +166,23 @@ def test_remove(repo_dir): assert not os.path.isfile(path_info.fspath) -@pytest.mark.parametrize( - "parent", - [ - (os.path.join("path", "to", "")), - (os.path.join("path", "to")), - (os.path.join("path", "")), - (os.path.join("path")), - ], -) -def test_path_isin_positive(parent): +def test_path_isin_positive(): child = os.path.join("path", "to", "folder") - assert path_isin(child, parent) + + assert path_isin(child, os.path.join("path", "to", "")) + assert path_isin(child, os.path.join("path", "to")) + assert path_isin(child, os.path.join("path", "")) + assert path_isin(child, os.path.join("path")) def test_path_isin_on_same_path(): path = os.path.join("path", "to", "folder") - path2 = os.path.join(path, "") + path_with_sep = os.path.join(path, "") assert not path_isin(path, path) - assert not path_isin(path, path2) - assert not path_isin(path2, path) - assert not path_isin(path2, path2) + assert not path_isin(path, path_with_sep) + assert not path_isin(path_with_sep, path) + assert not path_isin(path_with_sep, path_with_sep) def test_path_isin_on_common_substring_path():