From a5c4070807f87fc15324b440501d31024d8dfef6 Mon Sep 17 00:00:00 2001 From: Charles Baynham Date: Fri, 17 Apr 2020 20:11:45 +0100 Subject: [PATCH 1/6] Only reproduce steps once --- tests/unit/repo/test_reproduce.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/unit/repo/test_reproduce.py b/tests/unit/repo/test_reproduce.py index 2b49273ed3..946653da3c 100644 --- a/tests/unit/repo/test_reproduce.py +++ b/tests/unit/repo/test_reproduce.py @@ -1,4 +1,5 @@ from dvc.repo.reproduce import _get_active_graph +import mock def test_get_active_graph(tmp_dir, dvc): @@ -23,3 +24,15 @@ def test_get_active_graph(tmp_dir, dvc): active_graph = _get_active_graph(graph) assert set(active_graph.nodes) == {bar_stage, baz_stage} assert not active_graph.edges + + +@patch("dvc.repo.reproduce._reproduce_stage") +def test_number_reproduces(tmp_dir, dvc): + tmp_dir.dvc_gen({"pre-foo": "pre-foo"}) + + foo_stage = dvc.run(deps=["pre-foo"], outs=["foo"], cmd="echo foo > foo") + bar_stage = dvc.run(deps=["foo"], outs=["bar"], cmd="echo bar > bar") + baz_stage = dvc.run(deps=["foo"], outs=["baz"], cmd="echo baz > baz") + baz_stage = dvc.run(deps=["bar"], outs=["boop"], cmd="echo boop > boop") + + \ No newline at end of file From d916fc700d77cf0d9ae2638ffe337a9206a5781a Mon Sep 17 00:00:00 2001 From: Charles Baynham Date: Fri, 17 Apr 2020 20:30:56 +0100 Subject: [PATCH 2/6] Add test --- tests/unit/repo/test_reproduce.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/unit/repo/test_reproduce.py b/tests/unit/repo/test_reproduce.py index 946653da3c..4f302269fe 100644 --- a/tests/unit/repo/test_reproduce.py +++ b/tests/unit/repo/test_reproduce.py @@ -1,5 +1,7 @@ -from dvc.repo.reproduce import _get_active_graph import mock +import pytest + +from dvc.repo.reproduce import _get_active_graph, _reproduce_stage def test_get_active_graph(tmp_dir, dvc): @@ -26,8 +28,8 @@ def test_get_active_graph(tmp_dir, dvc): assert not active_graph.edges -@patch("dvc.repo.reproduce._reproduce_stage") -def test_number_reproduces(tmp_dir, dvc): +@mock.patch("dvc.repo.reproduce._reproduce_stage", returns=[]) +def test_number_reproduces(tmp_dir, dvc, reproduce_stage_mock): tmp_dir.dvc_gen({"pre-foo": "pre-foo"}) foo_stage = dvc.run(deps=["pre-foo"], outs=["foo"], cmd="echo foo > foo") @@ -35,4 +37,8 @@ def test_number_reproduces(tmp_dir, dvc): baz_stage = dvc.run(deps=["foo"], outs=["baz"], cmd="echo baz > baz") baz_stage = dvc.run(deps=["bar"], outs=["boop"], cmd="echo boop > boop") - \ No newline at end of file + reproduce_stage_mock.reset_mock() + + dvc.reproduce(all_pipelines=True) + + assert reproduce_stage_mock.call_count == 5 From 3b167b55deb2e61859f7bad3bbadd58b22a06038 Mon Sep 17 00:00:00 2001 From: Charles Baynham Date: Fri, 17 Apr 2020 20:36:48 +0100 Subject: [PATCH 3/6] Fix linter nasties --- tests/unit/repo/test_reproduce.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/unit/repo/test_reproduce.py b/tests/unit/repo/test_reproduce.py index 4f302269fe..94e0b09131 100644 --- a/tests/unit/repo/test_reproduce.py +++ b/tests/unit/repo/test_reproduce.py @@ -1,7 +1,6 @@ import mock -import pytest -from dvc.repo.reproduce import _get_active_graph, _reproduce_stage +from dvc.repo.reproduce import _get_active_graph def test_get_active_graph(tmp_dir, dvc): @@ -32,10 +31,10 @@ def test_get_active_graph(tmp_dir, dvc): def test_number_reproduces(tmp_dir, dvc, reproduce_stage_mock): tmp_dir.dvc_gen({"pre-foo": "pre-foo"}) - foo_stage = dvc.run(deps=["pre-foo"], outs=["foo"], cmd="echo foo > foo") - bar_stage = dvc.run(deps=["foo"], outs=["bar"], cmd="echo bar > bar") - baz_stage = dvc.run(deps=["foo"], outs=["baz"], cmd="echo baz > baz") - baz_stage = dvc.run(deps=["bar"], outs=["boop"], cmd="echo boop > boop") + dvc.run(deps=["pre-foo"], outs=["foo"], cmd="echo foo > foo") + dvc.run(deps=["foo"], outs=["bar"], cmd="echo bar > bar") + dvc.run(deps=["foo"], outs=["baz"], cmd="echo baz > baz") + dvc.run(deps=["bar"], outs=["boop"], cmd="echo boop > boop") reproduce_stage_mock.reset_mock() From 5933c2f58686aaa3c795f577b0f47d2935bdca1e Mon Sep 17 00:00:00 2001 From: Charles Baynham Date: Fri, 17 Apr 2020 21:07:04 +0100 Subject: [PATCH 4/6] Got the order wrong --- tests/unit/repo/test_reproduce.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/repo/test_reproduce.py b/tests/unit/repo/test_reproduce.py index 94e0b09131..7df9603f36 100644 --- a/tests/unit/repo/test_reproduce.py +++ b/tests/unit/repo/test_reproduce.py @@ -28,7 +28,7 @@ def test_get_active_graph(tmp_dir, dvc): @mock.patch("dvc.repo.reproduce._reproduce_stage", returns=[]) -def test_number_reproduces(tmp_dir, dvc, reproduce_stage_mock): +def test_number_reproduces(reproduce_stage_mock, tmp_dir, dvc): tmp_dir.dvc_gen({"pre-foo": "pre-foo"}) dvc.run(deps=["pre-foo"], outs=["foo"], cmd="echo foo > foo") From e4136e4ba83e30cc41e5ed86d0439cdd64066947 Mon Sep 17 00:00:00 2001 From: Charles Baynham Date: Sat, 18 Apr 2020 00:41:38 +0100 Subject: [PATCH 5/6] Check for previous run --- dvc/repo/reproduce.py | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/dvc/repo/reproduce.py b/dvc/repo/reproduce.py index bb31368c02..f74024b077 100644 --- a/dvc/repo/reproduce.py +++ b/dvc/repo/reproduce.py @@ -91,9 +91,11 @@ def reproduce( targets = self.collect(target, recursive=recursive, graph=active_graph) ret = [] + checked_stages = set() for target in targets: - stages = _reproduce_stages(active_graph, target, **kwargs) + stages, these_checked_stages = _reproduce_stages(active_graph, target, checked_stages, **kwargs) ret.extend(stages) + checked_stages.update(these_checked_stages) return ret @@ -101,6 +103,7 @@ def reproduce( def _reproduce_stages( G, stage, + checked_stages, downstream=False, ignore_build_cache=False, single_item=False, @@ -157,19 +160,22 @@ def _reproduce_stages( pipeline = nx.dfs_postorder_nodes(G, stage) result = [] + these_checked_stages = [] for st in pipeline: - try: - ret = _reproduce_stage(st, **kwargs) - - if len(ret) != 0 and ignore_build_cache: - # NOTE: we are walking our pipeline from the top to the - # bottom. If one stage is changed, it will be reproduced, - # which tells us that we should force reproducing all of - # the other stages down below, even if their direct - # dependencies didn't change. - kwargs["force"] = True - - result.extend(ret) - except Exception as exc: - raise ReproductionError(st.relpath) from exc - return result + if st not in checked_stages: + try: + ret = _reproduce_stage(st, **kwargs) + these_checked_stages.append(st) + + if len(ret) != 0 and ignore_build_cache: + # NOTE: we are walking our pipeline from the top to the + # bottom. If one stage is changed, it will be reproduced, + # which tells us that we should force reproducing all of + # the other stages down below, even if their direct + # dependencies didn't change. + kwargs["force"] = True + + result.extend(ret) + except Exception as exc: + raise ReproductionError(st.relpath) from exc + return result, these_checked_stages From 0c36b42b4f9efa2a9508025341b8e80c4a49658e Mon Sep 17 00:00:00 2001 From: "Restyled.io" Date: Fri, 17 Apr 2020 23:43:37 +0000 Subject: [PATCH 6/6] Restyled by black --- dvc/repo/reproduce.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dvc/repo/reproduce.py b/dvc/repo/reproduce.py index 3c01cf2c84..9f28f1218e 100644 --- a/dvc/repo/reproduce.py +++ b/dvc/repo/reproduce.py @@ -97,7 +97,9 @@ def reproduce( ret = [] checked_stages = set() for target in targets: - stages, these_checked_stages = _reproduce_stages(active_graph, target, checked_stages, **kwargs) + stages, these_checked_stages = _reproduce_stages( + active_graph, target, checked_stages, **kwargs + ) ret.extend(stages) checked_stages.update(these_checked_stages)