Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 31 additions & 8 deletions dvc/command/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,15 @@ def _digest(checksum):
return "{}..{}".format(checksum["old"][0:8], checksum["new"][0:8])


def _show_md(diff, show_hash=False):
def _show_md(diff, show_hash=False, hide_missing=False):
from dvc.utils.diff import table

header = ["Status", "Hash", "Path"] if show_hash else ["Status", "Path"]
rows = []
for status in ["added", "deleted", "modified"]:
statuses = ["added", "deleted", "modified"]
if not hide_missing:
statuses.append("not in cache")
for status in statuses:
entries = diff.get(status, [])
if not entries:
continue
Expand All @@ -39,7 +42,7 @@ def _show_md(diff, show_hash=False):

class CmdDiff(CmdBase):
@staticmethod
def _format(diff):
def _format(diff, hide_missing=False):
"""
Given a diff structure, generate a string of paths separated
by new lines and grouped together by their state.
Expand Down Expand Up @@ -69,12 +72,16 @@ def _format(diff):
"added": colorama.Fore.GREEN,
"modified": colorama.Fore.YELLOW,
"deleted": colorama.Fore.RED,
"not in cache": colorama.Fore.YELLOW,
}

summary = {}
groups = []

for state in ["added", "deleted", "modified"]:
states = ["added", "deleted", "modified"]
if not hide_missing:
states.append("not in cache")
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you think of uncached? I don't like that, yes, it's ugly; but it fits with others?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I think uncached or missing would be cleaner as well. I went with not in cache here since that's what we return from output.status(), but I'm not opposed to changing it.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

missing seems ambiguous, uncached looks ugly. missing cache is the same as not in cache. πŸ˜•
I'll leave that up-to-you.

for state in states:
summary[state] = 0
entries = diff[state]

Expand Down Expand Up @@ -105,17 +112,26 @@ def _format(diff):
)
)

groups.append(
if not sum(summary.values()):
return None

fmt = (
"files summary: {added} added, {deleted} deleted,"
" {modified} modified".format_map(summary)
" {modified} modified"
)
if not hide_missing:
fmt += ", {not in cache} not in cache"
groups.append(fmt.format_map(summary))

return "\n\n".join(groups)

def run(self):
try:
diff = self.repo.diff(self.args.a_rev, self.args.b_rev)
show_hash = self.args.show_hash
hide_missing = self.args.b_rev or self.args.hide_missing
if hide_missing:
del diff["not in cache"]

for key, entries in diff.items():
entries = sorted(entries, key=lambda entry: entry["path"])
Expand All @@ -127,9 +143,11 @@ def run(self):
if self.args.show_json:
logger.info(json.dumps(diff))
elif self.args.show_md:
logger.info(_show_md(diff, show_hash))
logger.info(_show_md(diff, show_hash, hide_missing))
elif diff:
logger.info(self._format(diff))
output = self._format(diff, hide_missing)
if output:
logger.info(output)

except DvcException:
logger.exception("failed to get diff")
Expand Down Expand Up @@ -178,4 +196,9 @@ def add_parser(subparsers, parent_parser):
action="store_true",
default=False,
)
diff_parser.add_argument(
"--hide-missing",
help="Hide missing cache file status.",
action="store_true",
)
diff_parser.set_defaults(func=CmdDiff)
22 changes: 21 additions & 1 deletion dvc/repo/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,14 @@ def diff(self, a_rev="HEAD", b_rev=None):
# Compare paths between the old and new tree.
# set() efficiently converts dict keys to a set
added = sorted(set(new) - set(old))
deleted = sorted(set(old) - set(new))
deleted_or_missing = set(old) - set(new)
if b_rev == "workspace":
# missing status is only applicable when diffing local workspace
# against a commit
missing = sorted(_filter_missing(self, deleted_or_missing))
else:
missing = []
deleted = sorted(deleted_or_missing - set(missing))
modified = sorted(set(old) & set(new))

ret = {
Expand All @@ -47,6 +54,9 @@ def diff(self, a_rev="HEAD", b_rev=None):
for path in modified
if old[path] != new[path]
],
"not in cache": [
{"path": path, "hash": old[path]} for path in missing
],
}

return ret if any(ret.values()) else {}
Expand Down Expand Up @@ -104,3 +114,13 @@ def _dir_output_paths(repo_tree, output):
yield str(fname), repo_tree.get_hash(fname).value
except NoRemoteError:
logger.warning("dir cache entry for '%s' is missing", output)


def _filter_missing(repo, paths):
repo_tree = RepoTree(repo, stream=True)
for path in paths:
metadata = repo_tree.metadata(path)
if metadata.is_dvc:
out = metadata.outs[0]
if out.status()[str(out)] == "not in cache":
yield path
24 changes: 24 additions & 0 deletions tests/func/test_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def test_added(tmp_dir, scm, dvc):
"added": [{"path": "file", "hash": digest("text")}],
"deleted": [],
"modified": [],
"not in cache": [],
}


Expand Down Expand Up @@ -55,6 +56,7 @@ def test_no_cache_entry(tmp_dir, scm, dvc):
"hash": {"old": digest("first"), "new": digest("second")},
}
],
"not in cache": [],
}


Expand All @@ -66,6 +68,7 @@ def test_deleted(tmp_dir, scm, dvc):
"added": [],
"deleted": [{"path": "file", "hash": digest("text")}],
"modified": [],
"not in cache": [],
}


Expand All @@ -82,6 +85,7 @@ def test_modified(tmp_dir, scm, dvc):
"hash": {"old": digest("first"), "new": digest("second")},
}
],
"not in cache": [],
}


Expand All @@ -98,12 +102,14 @@ def test_refs(tmp_dir, scm, dvc):
"added": [],
"deleted": [],
"modified": [{"path": "file", "hash": {"old": HEAD_1, "new": HEAD}}],
"not in cache": [],
}

assert dvc.diff("HEAD~2", "HEAD~1") == {
"added": [],
"deleted": [],
"modified": [{"path": "file", "hash": {"old": HEAD_2, "new": HEAD_1}}],
"not in cache": [],
}

with pytest.raises(DvcException, match=r"unknown Git revision 'missing'"):
Expand Down Expand Up @@ -134,6 +140,7 @@ def test_directories(tmp_dir, scm, dvc):
],
"deleted": [],
"modified": [],
"not in cache": [],
}

assert dvc.diff(":/directory", ":/modify") == {
Expand All @@ -152,6 +159,7 @@ def test_directories(tmp_dir, scm, dvc):
"hash": {"old": digest("2"), "new": digest("two")},
},
],
"not in cache": [],
}

assert dvc.diff(":/modify", ":/delete") == {
Expand All @@ -168,6 +176,7 @@ def test_directories(tmp_dir, scm, dvc):
},
}
],
"not in cache": [],
}


Expand All @@ -189,6 +198,20 @@ def test_diff_no_cache(tmp_dir, scm, dvc):
assert diff["added"] == []
assert diff["deleted"] == []
assert first(diff["modified"])["path"] == os.path.join("dir", "")
assert diff["not in cache"] == []

(tmp_dir / "dir" / "file").unlink()
remove(str(tmp_dir / "dir"))
diff = dvc.diff()
assert diff["added"] == []
assert diff["deleted"] == []
assert diff["modified"] == []
assert diff["not in cache"] == [
{
"path": os.path.join("dir", ""),
"hash": "f0f7a307d223921557c929f944bf5303.dir",
},
]


def test_diff_dirty(tmp_dir, scm, dvc):
Expand Down Expand Up @@ -223,6 +246,7 @@ def test_diff_dirty(tmp_dir, scm, dvc):
"path": os.path.join("dir", ""),
}
],
"not in cache": [],
}


Expand Down
Loading