diff --git a/dvc/repo/diff.py b/dvc/repo/diff.py index 8b474ef9cc..9c54e7bc1c 100644 --- a/dvc/repo/diff.py +++ b/dvc/repo/diff.py @@ -22,29 +22,25 @@ def _paths_checksums(): A dictionary of checksums addressed by relpaths collected from the current tree outputs. - Unpack directories to include their entries - To help distinguish between a directory and a file output, the former one will come with a trailing slash in the path: directory: "data/" file: "data" """ - result = {} - - for stage in self.stages: - for output in stage.outs: - if not output.is_dir_checksum: - result.update({str(output): output.checksum}) - continue - - result.update({os.path.join(str(output), ""): output.checksum}) - - for entry in output.dir_cache: - path = str(output.path_info / entry["relpath"]) - result.update({path: entry["md5"]}) - return result + def _to_path(output): + return ( + str(output) + if not output.is_dir_checksum + else os.path.join(str(output), "") + ) + + return { + _to_path(output): output.checksum + for stage in self.stages + for output in stage.outs + } working_tree = self.tree a_tree = self.scm.get_tree(a_rev) diff --git a/tests/func/test_diff.py b/tests/func/test_diff.py index 827039fb13..668fa5ebfa 100644 --- a/tests/func/test_diff.py +++ b/tests/func/test_diff.py @@ -3,6 +3,8 @@ import pytest import shutil +from funcy import first + from dvc.compat import fspath from dvc.exceptions import DvcException @@ -40,11 +42,7 @@ def test_no_cache_entry(tmp_dir, scm, dvc): dir_checksum = "5fb6b29836c388e093ca0715c872fe2a.dir" assert dvc.diff() == { - "added": [ - {"path": os.path.join("dir", ""), "hash": dir_checksum}, - {"path": os.path.join("dir", "1"), "hash": digest("1")}, - {"path": os.path.join("dir", "2"), "hash": digest("2")}, - ], + "added": [{"path": os.path.join("dir", ""), "hash": dir_checksum}], "deleted": [], "modified": [ { @@ -126,15 +124,13 @@ def test_directories(tmp_dir, scm, dvc): "path": os.path.join("dir", ""), "hash": "5fb6b29836c388e093ca0715c872fe2a.dir", }, - {"path": os.path.join("dir", "1"), "hash": digest("1")}, - {"path": os.path.join("dir", "2"), "hash": digest("2")}, ], "deleted": [], "modified": [], } assert dvc.diff(":/directory", ":/modify") == { - "added": [{"path": os.path.join("dir", "3"), "hash": digest("3")}], + "added": [], "deleted": [], "modified": [ { @@ -144,16 +140,12 @@ def test_directories(tmp_dir, scm, dvc): "new": "9b5faf37366b3370fd98e3e60ca439c1.dir", }, }, - { - "path": os.path.join("dir", "2"), - "hash": {"old": digest("2"), "new": digest("two")}, - }, ], } assert dvc.diff(":/modify", ":/delete") == { "added": [], - "deleted": [{"path": os.path.join("dir", "2"), "hash": digest("two")}], + "deleted": [], "modified": [ { "path": os.path.join("dir", ""), @@ -164,3 +156,24 @@ def test_directories(tmp_dir, scm, dvc): } ], } + + +def test_diff_no_cache(tmp_dir, scm, dvc): + (stage,) = tmp_dir.dvc_gen( + {"dir": {"file": "file content"}}, commit="first" + ) + scm.tag("v1") + tmp_dir.dvc_gen( + {"dir": {"file": "modified file content"}}, commit="second" + ) + + os.remove(first(stage.outs).cache_path) + shutil.rmtree("dir") + + # invalidate_dir_info to force cache loading + dvc.cache.local._dir_info = {} + + diff = dvc.diff("v1") + assert diff["added"] == [] + assert diff["deleted"] == [] + assert first(diff["modified"])["path"] == os.path.join("dir", "")