From 7b7f4e8c8a7218ade717991128ec519041a3d2be Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Mon, 4 May 2020 17:23:38 +0545 Subject: [PATCH 1/5] pull/fetch/push: add stats --- dvc/command/checkout.py | 32 ++++------------ dvc/command/data_sync.py | 28 +++++++++----- dvc/repo/pull.py | 2 +- dvc/utils/humanize.py | 27 ++++++++++++++ tests/func/test_checkout.py | 2 +- tests/func/test_data_cloud.py | 58 ++++++++++++++++++++++++++--- tests/unit/command/test_checkout.py | 29 +-------------- tests/unit/utils/test_humanize.py | 40 ++++++++++++++++++++ 8 files changed, 148 insertions(+), 70 deletions(-) create mode 100644 dvc/utils/humanize.py create mode 100644 tests/unit/utils/test_humanize.py diff --git a/dvc/command/checkout.py b/dvc/command/checkout.py index 1c06b3ffe7..34c31897d6 100644 --- a/dvc/command/checkout.py +++ b/dvc/command/checkout.py @@ -1,39 +1,17 @@ import argparse import logging +import operator import colorama from dvc.command.base import append_doc_link from dvc.command.base import CmdBase from dvc.exceptions import CheckoutError +from dvc.utils.humanize import get_summary logger = logging.getLogger(__name__) -def _human_join(words): - words = list(words) - if not words: - return "" - - return ( - "{before} and {after}".format( - before=", ".join(words[:-1]), after=words[-1], - ) - if len(words) > 1 - else words[0] - ) - - -def log_summary(stats): - states = ("added", "deleted", "modified") - summary = ( - "{} {}".format(len(stats[state]), state) - for state in states - if stats.get(state) - ) - logger.info(_human_join(summary) or "No changes.") - - def log_changes(stats): colors = [ ("modified", colorama.Fore.YELLOW,), @@ -75,7 +53,11 @@ def run(self): stats = exc.stats if self.args.summary: - log_summary(stats) + default_message = "No changes." + msg = get_summary( + sorted(stats.items(), key=operator.itemgetter(0)) + ) + logging.info(msg or default_message) else: log_changes(stats) diff --git a/dvc/command/data_sync.py b/dvc/command/data_sync.py index 3091fc915a..0e0a0aaf5f 100644 --- a/dvc/command/data_sync.py +++ b/dvc/command/data_sync.py @@ -3,7 +3,7 @@ from dvc.command.base import append_doc_link from dvc.command.base import CmdBase -from dvc.command.checkout import log_summary +from dvc.utils.humanize import get_summary from dvc.exceptions import DvcException, CheckoutError @@ -11,13 +11,22 @@ class CmdDataBase(CmdBase): - @classmethod - def check_up_to_date(cls, processed_files_count): - if processed_files_count == 0: - logger.info("Everything is up to date.") + def log_summary(self, stats): + default_msg = "Everything is up to date." + logger.info(get_summary(stats) or default_msg) class CmdDataPull(CmdDataBase): + def log_summary(self, stats): + # keeping `pull` stats ordered with `fetch` being always at the start + return super().log_summary( + [ + (key, stats[key]) + for key in ("fetched", "added", "deleted", "modified") + if key in stats + ] + ) + def run(self): try: stats = self.repo.pull( @@ -31,13 +40,12 @@ def run(self): force=self.args.force, recursive=self.args.recursive, ) - log_summary(stats) + self.log_summary(stats) except (CheckoutError, DvcException) as exc: - log_summary(getattr(exc, "stats", {})) + self.log_summary(getattr(exc, "stats", {})) logger.exception("failed to pull data from the cloud") return 1 - self.check_up_to_date(stats["downloaded"]) return 0 @@ -54,10 +62,10 @@ def run(self): with_deps=self.args.with_deps, recursive=self.args.recursive, ) + self.log_summary([("pushed", processed_files_count)]) except DvcException: logger.exception("failed to push data to the cloud") return 1 - self.check_up_to_date(processed_files_count) return 0 @@ -74,10 +82,10 @@ def run(self): with_deps=self.args.with_deps, recursive=self.args.recursive, ) + self.log_summary([("fetched", processed_files_count)]) except DvcException: logger.exception("failed to fetch data from the cloud") return 1 - self.check_up_to_date(processed_files_count) return 0 diff --git a/dvc/repo/pull.py b/dvc/repo/pull.py index 4623f6a4ec..444ac34412 100644 --- a/dvc/repo/pull.py +++ b/dvc/repo/pull.py @@ -33,5 +33,5 @@ def pull( targets=targets, with_deps=with_deps, force=force, recursive=recursive ) - stats["downloaded"] = processed_files_count + stats["fetched"] = processed_files_count return stats diff --git a/dvc/utils/humanize.py b/dvc/utils/humanize.py new file mode 100644 index 0000000000..163e60dcfe --- /dev/null +++ b/dvc/utils/humanize.py @@ -0,0 +1,27 @@ +from funcy import is_seq + + +def join(words): + words = list(words) + if not words: + return "" + + return ( + "{before} and {after}".format( + before=", ".join(words[:-1]), after=words[-1], + ) + if len(words) > 1 + else words[0] + ) + + +def get_summary(stats): + status = ( + (state, len(data) if is_seq(data) else data) + for state, data in stats + if data + ) + return join( + "{} file{} {}".format(num, "s" if num > 1 else "", state) + for state, num in status + ) diff --git a/tests/func/test_checkout.py b/tests/func/test_checkout.py index 56c58f23d4..6f743fefe3 100644 --- a/tests/func/test_checkout.py +++ b/tests/func/test_checkout.py @@ -686,7 +686,7 @@ def test_stats_does_not_show_changes_by_default(tmp_dir, dvc, scm, caplog): with caplog.at_level(logging.INFO, logger="dvc"): caplog.clear() assert main(["checkout", "--summary"]) == 0 - assert "2 deleted" in caplog.text + assert "2 files deleted" in caplog.text assert "dir" not in caplog.text assert "other" not in caplog.text diff --git a/tests/func/test_data_cloud.py b/tests/func/test_data_cloud.py index a9336edee3..ae3cfa5b48 100644 --- a/tests/func/test_data_cloud.py +++ b/tests/func/test_data_cloud.py @@ -724,14 +724,14 @@ def test_pull_git_imports(request, tmp_dir, dvc, scm, erepo): dvc.imp(fspath(erepo), "foo") dvc.imp(fspath(erepo), "dir", out="new_dir", rev="HEAD~") - assert dvc.pull()["downloaded"] == 0 + assert dvc.pull()["fetched"] == 0 for item in ["foo", "new_dir", dvc.cache.local.cache_dir]: remove(item) os.makedirs(dvc.cache.local.cache_dir, exist_ok=True) clean_repos() - assert dvc.pull(force=True)["downloaded"] == 2 + assert dvc.pull(force=True)["fetched"] == 2 assert (tmp_dir / "foo").exists() assert (tmp_dir / "foo").read_text() == "foo" @@ -751,11 +751,11 @@ def test_pull_external_dvc_imports(tmp_dir, dvc, scm, erepo_dir): dvc.imp(fspath(erepo_dir), "foo") dvc.imp(fspath(erepo_dir), "dir", out="new_dir", rev="HEAD~") - assert dvc.pull()["downloaded"] == 0 + assert dvc.pull()["fetched"] == 0 clean(["foo", "new_dir"], dvc) - assert dvc.pull(force=True)["downloaded"] == 2 + assert dvc.pull(force=True)["fetched"] == 2 assert (tmp_dir / "foo").exists() assert (tmp_dir / "foo").read_text() == "foo" @@ -796,7 +796,7 @@ def test_dvc_pull_pipeline_stages(tmp_dir, dvc, local_remote, run_copy): for target in [stage.addressing, out]: clean(outs, dvc) stats = dvc.pull([target]) - assert stats["downloaded"] == 1 + assert stats["fetched"] == 1 assert stats["added"] == [out] assert os.path.exists(out) assert not any(os.path.exists(out) for out in set(outs) - {out}) @@ -847,3 +847,51 @@ def test_pipeline_file_target_ops(tmp_dir, dvc, local_remote, run_copy): with pytest.raises(StageNotFound): dvc.pull(["dvc.yaml:StageThatDoesNotExist"]) + + +@pytest.mark.parametrize( + "fs, msg", + [ + ({"foo": "foo", "bar": "bar"}, "2 files pushed"), + ({"foo": "foo"}, "1 file pushed"), + ({}, "Everything is up to date"), + ], +) +def test_push_stats(tmp_dir, dvc, fs, msg, local_remote, caplog): + tmp_dir.dvc_gen(fs) + caplog.clear() + with caplog.at_level(level=logging.INFO, logger="dvc"): + main(["push"]) + assert msg in caplog.text + + +@pytest.mark.parametrize( + "fs, msg", + [ + ({"foo": "foo", "bar": "bar"}, "2 files fetched"), + ({"foo": "foo"}, "1 file fetched"), + ({}, "Everything is up to date."), + ], +) +def test_fetch_stats(tmp_dir, dvc, fs, msg, local_remote, caplog): + tmp_dir.dvc_gen(fs) + dvc.push() + clean(list(fs.keys()), dvc) + caplog.clear() + with caplog.at_level(level=logging.INFO, logger="dvc"): + main(["fetch"]) + assert msg in caplog.text + + +def test_pull_stats(tmp_dir, dvc, local_remote, caplog): + tmp_dir.dvc_gen({"foo": "foo", "bar": "bar"}) + dvc.push() + clean(["foo", "bar"], dvc) + (tmp_dir / "bar").write_text("foobar") + caplog.clear() + with caplog.at_level(level=logging.INFO, logger="dvc"): + main(["pull", "--force"]) + assert "2 files fetched, 1 file added and 1 file modified" in caplog.text + with caplog.at_level(level=logging.INFO, logger="dvc"): + main(["pull"]) + assert "Everything is up to date." in caplog.text diff --git a/tests/unit/command/test_checkout.py b/tests/unit/command/test_checkout.py index 0090f93558..e2315a290d 100644 --- a/tests/unit/command/test_checkout.py +++ b/tests/unit/command/test_checkout.py @@ -1,7 +1,7 @@ import logging from dvc.cli import parse_args -from dvc.command.checkout import CmdCheckout, log_summary, log_changes +from dvc.command.checkout import CmdCheckout, log_changes def test_checkout(tmp_dir, dvc, mocker): @@ -23,33 +23,6 @@ def test_checkout(tmp_dir, dvc, mocker): ) -def test_log_summary(caplog): - stats = { - "added": ["file1", "file2", "file3"], - "deleted": ["file4", "file5"], - "modified": ["file6", "file7"], - } - - def _assert_output(stats, expected_text): - with caplog.at_level(logging.INFO, logger="dvc"): - caplog.clear() - log_summary(stats) - assert expected_text in caplog.text - - _assert_output(stats, "3 added, 2 deleted and 2 modified") - - del stats["deleted"][1] - _assert_output(stats, "3 added, 1 deleted and 2 modified") - - del stats["deleted"][0] - _assert_output(stats, "3 added and 2 modified") - - del stats["modified"] - _assert_output(stats, "3 added") - - _assert_output({}, "No changes.") - - def test_log_changes(caplog): stats = { "added": ["file1", "dir1/"], diff --git a/tests/unit/utils/test_humanize.py b/tests/unit/utils/test_humanize.py new file mode 100644 index 0000000000..19d4b8b8f8 --- /dev/null +++ b/tests/unit/utils/test_humanize.py @@ -0,0 +1,40 @@ +from collections import OrderedDict + +from dvc.utils.humanize import get_summary + + +def test_get_summary(): + # dict, so that we could delete from it easily + stats = OrderedDict( + [ + ("fetched", 3), + ("added", ["file1", "file2", "file3"]), + ("deleted", ["file4", "file5"]), + ("modified", ["file6", "file7"]), + ] + ) + + assert get_summary(stats.items()) == ( + "3 files fetched, " + "3 files added, " + "2 files deleted " + "and " + "2 files modified" + ) + + del stats["fetched"] + del stats["deleted"][1] + assert ( + get_summary(stats.items()) + == "3 files added, 1 file deleted and 2 files modified" + ) + + del stats["deleted"][0] + assert get_summary(stats.items()) == "3 files added and 2 files modified" + + del stats["modified"] + assert get_summary(stats.items()) == "3 files added" + + assert get_summary([]) == "" + assert get_summary([("x", 0), ("y", [])]) == "" + assert get_summary([("x", 1), ("y", [])]) == "1 file x" From 60407633af457beac4c9edef51da44991679cb68 Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Mon, 4 May 2020 18:23:25 +0545 Subject: [PATCH 2/5] fix typo: s/logging/logger --- dvc/command/checkout.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/command/checkout.py b/dvc/command/checkout.py index 34c31897d6..d390ef6994 100644 --- a/dvc/command/checkout.py +++ b/dvc/command/checkout.py @@ -57,7 +57,7 @@ def run(self): msg = get_summary( sorted(stats.items(), key=operator.itemgetter(0)) ) - logging.info(msg or default_message) + logger.info(msg or default_message) else: log_changes(stats) From 9da72d7b24dc5698470bdb4e4abe07794b4e5202 Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Mon, 4 May 2020 18:47:34 +0545 Subject: [PATCH 3/5] pull: make stats detailed, with summary at the bottom --- dvc/command/data_sync.py | 17 ++++++----------- tests/func/test_data_cloud.py | 6 +++++- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/dvc/command/data_sync.py b/dvc/command/data_sync.py index 0e0a0aaf5f..ec181c73da 100644 --- a/dvc/command/data_sync.py +++ b/dvc/command/data_sync.py @@ -3,6 +3,7 @@ from dvc.command.base import append_doc_link from dvc.command.base import CmdBase +from dvc.command.checkout import log_changes from dvc.utils.humanize import get_summary from dvc.exceptions import DvcException, CheckoutError @@ -13,19 +14,13 @@ class CmdDataBase(CmdBase): def log_summary(self, stats): default_msg = "Everything is up to date." - logger.info(get_summary(stats) or default_msg) + logger.info(get_summary(stats.items()) or default_msg) class CmdDataPull(CmdDataBase): def log_summary(self, stats): - # keeping `pull` stats ordered with `fetch` being always at the start - return super().log_summary( - [ - (key, stats[key]) - for key in ("fetched", "added", "deleted", "modified") - if key in stats - ] - ) + log_changes(stats) + super().log_summary(stats) def run(self): try: @@ -62,7 +57,7 @@ def run(self): with_deps=self.args.with_deps, recursive=self.args.recursive, ) - self.log_summary([("pushed", processed_files_count)]) + self.log_summary({"pushed": processed_files_count}) except DvcException: logger.exception("failed to push data to the cloud") return 1 @@ -82,7 +77,7 @@ def run(self): with_deps=self.args.with_deps, recursive=self.args.recursive, ) - self.log_summary([("fetched", processed_files_count)]) + self.log_summary({"fetched": processed_files_count}) except DvcException: logger.exception("failed to fetch data from the cloud") return 1 diff --git a/tests/func/test_data_cloud.py b/tests/func/test_data_cloud.py index ae3cfa5b48..305a4e6797 100644 --- a/tests/func/test_data_cloud.py +++ b/tests/func/test_data_cloud.py @@ -891,7 +891,11 @@ def test_pull_stats(tmp_dir, dvc, local_remote, caplog): caplog.clear() with caplog.at_level(level=logging.INFO, logger="dvc"): main(["pull", "--force"]) - assert "2 files fetched, 1 file added and 1 file modified" in caplog.text + assert "M\tbar" in caplog.text + assert "A\tfoo" in caplog.text + assert "2 files fetched" in caplog.text + assert "1 file added" in caplog.text + assert "1 file modified" in caplog.text with caplog.at_level(level=logging.INFO, logger="dvc"): main(["pull"]) assert "Everything is up to date." in caplog.text From 1396707be80af4e0cd76b1244dd35211f7ed9a35 Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Mon, 4 May 2020 20:12:19 +0545 Subject: [PATCH 4/5] tests: mock properly, set return_value --- tests/unit/command/test_data_sync.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/unit/command/test_data_sync.py b/tests/unit/command/test_data_sync.py index 41d464dbd9..74133b8338 100644 --- a/tests/unit/command/test_data_sync.py +++ b/tests/unit/command/test_data_sync.py @@ -22,7 +22,7 @@ def test_fetch(mocker): assert cli_args.func == CmdDataFetch cmd = cli_args.func(cli_args) - m = mocker.patch.object(cmd.repo, "fetch", autospec=True) + m = mocker.patch.object(cmd.repo, "fetch", autospec=True, return_value=0) assert cmd.run() == 0 @@ -96,8 +96,7 @@ def test_push(mocker): assert cli_args.func == CmdDataPush cmd = cli_args.func(cli_args) - m = mocker.patch.object(cmd.repo, "push", autospec=True) - + m = mocker.patch.object(cmd.repo, "push", autospec=True, return_value=0) assert cmd.run() == 0 m.assert_called_once_with( From 48bf9c8d98894a5a492738e38fddb7a240a65198 Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Mon, 4 May 2020 20:13:14 +0545 Subject: [PATCH 5/5] add a line space --- tests/unit/command/test_data_sync.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/command/test_data_sync.py b/tests/unit/command/test_data_sync.py index 74133b8338..dab1379eef 100644 --- a/tests/unit/command/test_data_sync.py +++ b/tests/unit/command/test_data_sync.py @@ -97,6 +97,7 @@ def test_push(mocker): cmd = cli_args.func(cli_args) m = mocker.patch.object(cmd.repo, "push", autospec=True, return_value=0) + assert cmd.run() == 0 m.assert_called_once_with(