diff --git a/dvc/repo/reproduce.py b/dvc/repo/reproduce.py index 2e9697c886..9f28f1218e 100644 --- a/dvc/repo/reproduce.py +++ b/dvc/repo/reproduce.py @@ -95,9 +95,13 @@ def reproduce( ) ret = [] + checked_stages = set() for target in targets: - stages = _reproduce_stages(active_graph, target, **kwargs) + stages, these_checked_stages = _reproduce_stages( + active_graph, target, checked_stages, **kwargs + ) ret.extend(stages) + checked_stages.update(these_checked_stages) return ret @@ -105,6 +109,7 @@ def reproduce( def _reproduce_stages( G, stage, + checked_stages, downstream=False, ignore_build_cache=False, single_item=False, @@ -161,19 +166,22 @@ def _reproduce_stages( pipeline = nx.dfs_postorder_nodes(G, stage) result = [] + these_checked_stages = [] for st in pipeline: - try: - ret = _reproduce_stage(st, **kwargs) - - if len(ret) != 0 and ignore_build_cache: - # NOTE: we are walking our pipeline from the top to the - # bottom. If one stage is changed, it will be reproduced, - # which tells us that we should force reproducing all of - # the other stages down below, even if their direct - # dependencies didn't change. - kwargs["force"] = True - - result.extend(ret) - except Exception as exc: - raise ReproductionError(st.relpath) from exc - return result + if st not in checked_stages: + try: + ret = _reproduce_stage(st, **kwargs) + these_checked_stages.append(st) + + if len(ret) != 0 and ignore_build_cache: + # NOTE: we are walking our pipeline from the top to the + # bottom. If one stage is changed, it will be reproduced, + # which tells us that we should force reproducing all of + # the other stages down below, even if their direct + # dependencies didn't change. + kwargs["force"] = True + + result.extend(ret) + except Exception as exc: + raise ReproductionError(st.relpath) from exc + return result, these_checked_stages diff --git a/tests/unit/repo/test_reproduce.py b/tests/unit/repo/test_reproduce.py index 2b49273ed3..7df9603f36 100644 --- a/tests/unit/repo/test_reproduce.py +++ b/tests/unit/repo/test_reproduce.py @@ -1,3 +1,5 @@ +import mock + from dvc.repo.reproduce import _get_active_graph @@ -23,3 +25,19 @@ def test_get_active_graph(tmp_dir, dvc): active_graph = _get_active_graph(graph) assert set(active_graph.nodes) == {bar_stage, baz_stage} assert not active_graph.edges + + +@mock.patch("dvc.repo.reproduce._reproduce_stage", returns=[]) +def test_number_reproduces(reproduce_stage_mock, tmp_dir, dvc): + tmp_dir.dvc_gen({"pre-foo": "pre-foo"}) + + dvc.run(deps=["pre-foo"], outs=["foo"], cmd="echo foo > foo") + dvc.run(deps=["foo"], outs=["bar"], cmd="echo bar > bar") + dvc.run(deps=["foo"], outs=["baz"], cmd="echo baz > baz") + dvc.run(deps=["bar"], outs=["boop"], cmd="echo boop > boop") + + reproduce_stage_mock.reset_mock() + + dvc.reproduce(all_pipelines=True) + + assert reproduce_stage_mock.call_count == 5