Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions dvc/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,19 @@ def __init__(self, output, stages):

assert isinstance(output, str)
assert all(hasattr(stage, "relpath") for stage in stages)
msg = ""
stage_names = [s.addressing for s in stages]
stages_str = " ".join(stage_names)
if len(stages) == 1:
msg = "output '{}' is already specified in {}.".format(
output, first(stages)
)
stage_name = first(stages)
msg = f"output '{output}' is already specified in {stage_name}."
else:
msg = "output '{}' is already specified in stages:\n{}".format(
output, "\n".join(f"\t- {s.addressing}" for s in stages)
output, "\n".join(f"\t- {s}" for s in stage_names)
)
msg += (
f"\nUse `dvc remove {stages_str}` to stop tracking the overlapping output."
)
super().__init__(msg)
self.stages = stages
self.output = output
Expand Down
13 changes: 11 additions & 2 deletions dvc/repo/stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@
from functools import wraps
from typing import Iterable, List, NamedTuple, Optional, Set, Tuple, Union

from dvc.exceptions import NoOutputOrStageError, OutputNotFoundError
from dvc.exceptions import (
NoOutputOrStageError,
OutputDuplicationError,
OutputNotFoundError,
)
from dvc.repo import lock_repo
from dvc.ui import ui
from dvc.utils import as_posix, parse_target
Expand Down Expand Up @@ -176,7 +180,12 @@ def create(

check_stage_exists(self.repo, stage, stage.path)

self.repo.check_graph(stages={stage})
try:
self.repo.check_graph(stages={stage})
except OutputDuplicationError as exc:
# Don't include the stage currently being added.
exc.stages.remove(stage)
raise OutputDuplicationError(exc.output, exc.stages) from None
Comment on lines +183 to +188
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be nice to add a test for this

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any suggestions for where to add it?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably in tests/fun/test_add, something like: https://github.com/iterative/dvc/blob/121d56b7391f5325e5eb6ff41448980f0ede00df/tests/func/test_add.py#L749-L753 or even just update that one and use match with the expected exception message

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the message vary if the order is different from the one in the test (i.e. you run dvc add first)?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it does. This PR doesn't change the error for dvc add, only for dvc stage add, so not sure if it makes sense to put it there. Had trouble finding any tests about dvc stage add beyond testing the cmd args.

Copy link
Copy Markdown
Contributor

@daavoo daavoo Mar 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok. I understand.

From a quick look, I think I would personally add it to tests/func/test_stage.py then. Like:

def test_stage_add_duplicated_output(tmp_dir, dvc):
    tmp_dir.dvc_gen("foo", "foo") 
    dvc.add("foo")

    with pytest.raises(
        OutputDuplicationError,
        match="Use `dvc remove foo.dvc` to stop tracking the overlapping output."
    ):
        dvc.stage.add(name="duplicated", cmd="echo bar > foo", outs=["foo"])

But I am also confused about where the logic for StageLoad.create is/should be tested cc @skshetry

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added the test you suggested @daavoo. It makes sense to me.


restore_fields(stage)
return stage
Expand Down
12 changes: 12 additions & 0 deletions tests/func/test_stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from dvc.annotations import Annotation
from dvc.dvcfile import SingleStageFile
from dvc.exceptions import OutputDuplicationError
from dvc.fs import LocalFileSystem
from dvc.output import Output
from dvc.repo import Repo, lock_repo
Expand Down Expand Up @@ -333,3 +334,14 @@ def test_stage_run_checkpoint(tmp_dir, dvc, mocker, checkpoint):
mock_cmd_run.assert_called_with(
stage, checkpoint_func=callback, dry=False, run_env=None
)


def test_stage_add_duplicated_output(tmp_dir, dvc):
tmp_dir.dvc_gen("foo", "foo")
dvc.add("foo")

with pytest.raises(
OutputDuplicationError,
match="Use `dvc remove foo.dvc` to stop tracking the overlapping output.",
):
dvc.stage.add(name="duplicated", cmd="echo bar > foo", outs=["foo"])