From 78fd15ad40442762d07f63d4aefb7f4411a7adeb Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Thu, 21 May 2020 18:08:44 +0545 Subject: [PATCH 1/9] addressing: allow stage name addressing without colon --- dvc/command/pipeline.py | 6 +-- dvc/repo/__init__.py | 57 ++++++++++++++++++++++------- dvc/repo/lock.py | 6 +-- dvc/repo/remove.py | 9 ++--- dvc/repo/reproduce.py | 4 +- dvc/stage/__init__.py | 26 ++++--------- dvc/utils/__init__.py | 18 ++++++--- tests/func/test_pipeline.py | 16 ++++---- tests/func/test_repro_multistage.py | 6 +-- tests/func/test_stage.py | 2 +- tests/func/test_status.py | 6 +-- tests/unit/utils/test_utils.py | 3 ++ 12 files changed, 89 insertions(+), 70 deletions(-) diff --git a/dvc/command/pipeline.py b/dvc/command/pipeline.py index a7c82dadda..ab70a77ea4 100644 --- a/dvc/command/pipeline.py +++ b/dvc/command/pipeline.py @@ -10,11 +10,10 @@ class CmdPipelineShow(CmdBase): def _show(self, target, commands, outs, locked): import networkx - from dvc import dvcfile from dvc.utils import parse_target path, name = parse_target(target) - stage = dvcfile.Dvcfile(self.repo, path).stages[name] + stage = self.repo.get_stage(path, name) G = self.repo.graph stages = networkx.dfs_postorder_nodes(G, stage) if locked: @@ -58,12 +57,11 @@ def _build_output_graph(G, target_stage): def _build_graph(self, target, commands=False, outs=False): import networkx - from dvc import dvcfile from dvc.repo.graph import get_pipeline from dvc.utils import parse_target path, name = parse_target(target) - target_stage = dvcfile.Dvcfile(self.repo, path).stages[name] + target_stage = self.repo.get_stage(path, name) G = get_pipeline(self.repo.pipelines, target_stage) nodes = set() diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 145144189c..66c1bc0441 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -1,3 +1,4 @@ +import logging import os from contextlib import contextmanager from functools import wraps @@ -5,6 +6,7 @@ from funcy import cached_property, cat, first from dvc.config import Config +from dvc.dvcfile import PIPELINE_FILE, Dvcfile from dvc.exceptions import FileMissingError from dvc.exceptions import IsADirectoryError as DvcIsADirectoryError from dvc.exceptions import NotDvcRepoError, OutputNotFoundError @@ -16,6 +18,8 @@ from ..utils import parse_target from .graph import check_acyclic, get_pipeline, get_pipelines +logger = logging.getLogger(__name__) + def locked(f): @wraps(f) @@ -181,6 +185,25 @@ def _ignore(self): self.scm.ignore_list(flist) + def get_stage(self, path=None, name=None): + if not path: + path = PIPELINE_FILE + logger.debug("Assuming '%s' to be a stage inside '%s'", name, path) + + dvcfile = Dvcfile(self, path) + return dvcfile.stages[name] + + def get_stages(self, path=None, name=None): + if not path: + path = PIPELINE_FILE + logger.debug("Assuming '%s' to be a stage inside '%s'", name, path) + + if name: + return [self.get_stage(path, name)] + + dvcfile = Dvcfile(self, path) + return list(dvcfile.stages.values()) + def check_modified_graph(self, new_stages): """Generate graph including the new stage to check for errors""" # Building graph might be costly for the ones with many DVC-files, @@ -206,7 +229,6 @@ def _collect_inside(self, path, graph): def collect(self, target, with_deps=False, recursive=False, graph=None): import networkx as nx - from ..dvcfile import Dvcfile if not target: return list(graph) if graph else self.stages @@ -217,10 +239,9 @@ def collect(self, target, with_deps=False, recursive=False, graph=None): ) path, name = parse_target(target) - dvcfile = Dvcfile(self, path) - stages = list(dvcfile.stages.filter(name).values()) + stages = self.get_stages(path, name) if not with_deps: - return stages + return list(stages) res = set() for stage in stages: @@ -229,20 +250,29 @@ def collect(self, target, with_deps=False, recursive=False, graph=None): return res def collect_granular(self, target, *args, **kwargs): - from ..dvcfile import Dvcfile, is_valid_filename + from ..dvcfile import Dvcfile, is_valid_filename, PIPELINE_FILE if not target: return [(stage, None) for stage in self.stages] file, name = parse_target(target) - if is_valid_filename(file) and not kwargs.get("with_deps"): - # Optimization: do not collect the graph for a specific .dvc target - stages = Dvcfile(self, file).stages.filter(name) - return [(stage, None) for stage in stages.values()] + # Optimization: do not collect the graph for a specific target + if not kwargs.get("with_deps") and not file: + # parsing is ambiguous when it does not have a colon + # or if it's not a dvcfile, as it can be a stage name + # in `dvc.yaml` or, an output in a stage. + logger.debug( + "Checking if stage '%s' is in '%s'", target, PIPELINE_FILE + ) + dvcfile = Dvcfile(self, PIPELINE_FILE) + if dvcfile.exists() and name in dvcfile.stages: + return [(self.get_stage(PIPELINE_FILE, name), None)] + elif not kwargs.get("with_deps") and is_valid_filename(file): + return [(stage, None) for stage in self.get_stages(file, name)] try: - (out,) = self.find_outs_by_path(file, strict=False) - filter_info = PathInfo(os.path.abspath(file)) + (out,) = self.find_outs_by_path(target, strict=False) + filter_info = PathInfo(os.path.abspath(target)) return [(out.stage, filter_info)] except OutputNotFoundError: stages = self.collect(target, *args, **kwargs) @@ -443,7 +473,7 @@ def plot_templates(self): return PlotTemplates(self.dvc_dir) def _collect_stages(self): - from dvc.dvcfile import Dvcfile, is_valid_filename + from dvc.dvcfile import is_valid_filename stages = [] outs = set() @@ -451,8 +481,7 @@ def _collect_stages(self): for root, dirs, files in self.tree.walk(self.root_dir): for file_name in filter(is_valid_filename, files): path = os.path.join(root, file_name) - stage_loader = Dvcfile(self, path).stages - stages.extend(stage_loader.values()) + stages.extend(self.get_stages(path)) outs.update( out.fspath for stage in stages diff --git a/dvc/repo/lock.py b/dvc/repo/lock.py index b3d006950a..9b1f592683 100644 --- a/dvc/repo/lock.py +++ b/dvc/repo/lock.py @@ -3,13 +3,11 @@ @locked def lock(self, target, unlock=False): - from .. import dvcfile from dvc.utils import parse_target path, name = parse_target(target) - dvcfile = dvcfile.Dvcfile(self, path) - stage = dvcfile.stages[name] + stage = self.get_stage(path, name) stage.locked = False if unlock else True - dvcfile.dump(stage, update_pipeline=True) + stage.dvcfile.dump(stage, update_pipeline=True) return stage diff --git a/dvc/repo/remove.py b/dvc/repo/remove.py index 5c1f1023cf..d00f330099 100644 --- a/dvc/repo/remove.py +++ b/dvc/repo/remove.py @@ -8,15 +8,14 @@ @locked def remove(self, target, dvc_only=False): - from ..dvcfile import Dvcfile + from ..dvcfile import Dvcfile, is_valid_filename path, name = parse_target(target) - dvcfile = Dvcfile(self, path) - stages = list(dvcfile.stages.filter(name).values()) + stages = self.get_stages(path, name) for stage in stages: stage.remove_outs(force=True) - if not dvc_only: - dvcfile.remove() + if path and is_valid_filename(path) and not dvc_only: + Dvcfile(self, path).remove() return stages diff --git a/dvc/repo/reproduce.py b/dvc/repo/reproduce.py index de36f93c8d..e6cd902fae 100644 --- a/dvc/repo/reproduce.py +++ b/dvc/repo/reproduce.py @@ -61,7 +61,6 @@ def reproduce( all_pipelines=False, **kwargs ): - from ..dvcfile import Dvcfile from dvc.utils import parse_target if not target and not all_pipelines: @@ -81,8 +80,7 @@ def reproduce( if all_pipelines: pipelines = active_pipelines else: - dvcfile = Dvcfile(self, path) - stage = dvcfile.stages[name] + stage = self.get_stage(path, name) pipelines = [get_pipeline(active_pipelines, stage)] targets = [] diff --git a/dvc/stage/__init__.py b/dvc/stage/__init__.py index 8a20381401..129fc4818f 100644 --- a/dvc/stage/__init__.py +++ b/dvc/stage/__init__.py @@ -144,14 +144,10 @@ def dvcfile(self, dvcfile): self._dvcfile = dvcfile def __repr__(self): - return "Stage: '{path}'".format( - path=self.path_in_repo if self.path else "No path" - ) + return f"Stage: '{self.addressing}'" def __str__(self): - return "stage: '{path}'".format( - path=self.relpath if self.path else "No path" - ) + return f"stage: '{self.addressing}'" @property def addressing(self): @@ -159,7 +155,7 @@ def addressing(self): Useful for alternative presentations where we don't need `Stage:` prefix. """ - return self.relpath + return self.relpath if self.path else "No path" def __hash__(self): return hash(self.path_in_repo) @@ -530,19 +526,13 @@ def __eq__(self, other): def __hash__(self): return hash((self.path_in_repo, self.name)) - def __repr__(self): - return "Stage: '{path}:{name}'".format( - path=self.relpath if self.path else "No path", name=self.name - ) - - def __str__(self): - return "stage: '{path}:{name}'".format( - path=self.relpath if self.path else "No path", name=self.name - ) - @property def addressing(self): - return super().addressing + ":" + self.name + from dvc.dvcfile import PIPELINE_FILE + + if self.path and self.relpath == PIPELINE_FILE: + return self.name + return f"{super().addressing}:{self.name}" def reload(self): return self.dvcfile.stages[self.name] diff --git a/dvc/utils/__init__.py b/dvc/utils/__init__.py index b614d6cbe6..95e1278768 100644 --- a/dvc/utils/__init__.py +++ b/dvc/utils/__init__.py @@ -395,7 +395,7 @@ def format_link(link): def parse_target(target, default=None): - from dvc.dvcfile import PIPELINE_FILE, PIPELINE_LOCK + from dvc.dvcfile import PIPELINE_FILE, PIPELINE_LOCK, is_valid_filename from dvc.exceptions import DvcException if not target: @@ -409,13 +409,19 @@ def parse_target(target, default=None): match.group("path"), match.group("name"), ) + if path: + if os.path.basename(path) == PIPELINE_LOCK: + raise DvcException( + "Did you mean: `{}`?".format( + target.replace(".lock", ".yaml", 1) + ) + ) + if not name: + ret = (target, None) + return ret if is_valid_filename(target) else ret[::-1] + if not path: path = default or PIPELINE_FILE logger.debug("Assuming file to be '%s'", path) - if os.path.basename(path) == PIPELINE_LOCK: - raise DvcException( - "Did you mean: `{}`?".format(target.replace(".lock", ".yaml", 1)) - ) - return path, name diff --git a/tests/func/test_pipeline.py b/tests/func/test_pipeline.py index 765b45dfac..d52c8852b5 100644 --- a/tests/func/test_pipeline.py +++ b/tests/func/test_pipeline.py @@ -296,20 +296,20 @@ def test_pipeline_list_show_multistage(tmp_dir, dvc, run_copy, caplog): with caplog.at_level(logging.INFO, "dvc"): command._show("foobar.dvc", False, False, False) output = caplog.text.splitlines() - assert "dvc.yaml:copy-foo-bar" in output[0] + assert "copy-foo-bar" in output[0] assert "foobar.dvc" in output[1] caplog.clear() with caplog.at_level(logging.INFO, "dvc"): - command._show("dvc.yaml:copy-foo-bar", False, False, False) - assert "dvc.yaml:copy-foo-bar" in caplog.text + command._show("copy-foo-bar", False, False, False) + assert "copy-foo-bar" in caplog.text assert "foobar.dvc" not in caplog.text command = CmdPipelineList([]) caplog.clear() with caplog.at_level(logging.INFO, "dvc"): command.run() - assert "dvc.yaml:copy-foo-bar" in caplog.text + assert "copy-foo-bar" in caplog.text assert "foobar.dvc" in caplog.text assert "1 pipelines in total" @@ -320,13 +320,13 @@ def test_pipeline_ascii_multistage(tmp_dir, dvc, run_copy): run_copy("bar", "foobar", single_stage=True) command = CmdPipelineShow([]) nodes, edges, _ = command._build_graph("foobar.dvc") - assert set(nodes) == {"dvc.yaml:copy-foo-bar", "foobar.dvc"} + assert set(nodes) == {"copy-foo-bar", "foobar.dvc"} assert set(edges) == { - ("foobar.dvc", "dvc.yaml:copy-foo-bar"), + ("foobar.dvc", "copy-foo-bar"), } - nodes, *_ = command._build_graph("dvc.yaml:copy-foo-bar") - assert set(nodes) == {"dvc.yaml:copy-foo-bar"} + nodes, *_ = command._build_graph("copy-foo-bar") + assert set(nodes) == {"copy-foo-bar"} def test_pipeline_multi_outputs_stages(dvc): diff --git a/tests/func/test_repro_multistage.py b/tests/func/test_repro_multistage.py index 63b3b87557..2ae0023356 100644 --- a/tests/func/test_repro_multistage.py +++ b/tests/func/test_repro_multistage.py @@ -312,15 +312,13 @@ def test_repro_when_cmd_changes(tmp_dir, dvc, run_copy): tmp_dir.gen("foo", "foo") stage = run_copy("foo", "bar", name="copy-file") - target = ":copy-file" + target = "copy-file" assert not dvc.reproduce(target) stage.cmd = " ".join(stage.cmd.split()) # change cmd spacing by two PipelineFile(dvc, PIPELINE_FILE)._dump_pipeline_file(stage) - assert dvc.status([target]) == { - PIPELINE_FILE + target: ["changed command"] - } + assert dvc.status([target]) == {target: ["changed command"]} assert dvc.reproduce(target)[0] == stage diff --git a/tests/func/test_stage.py b/tests/func/test_stage.py index 284fe3b48c..83c6a5cc8e 100644 --- a/tests/func/test_stage.py +++ b/tests/func/test_stage.py @@ -202,7 +202,7 @@ def test_stage_addressing(tmp_dir, dvc, run_copy): assert stage1.addressing == "bar.dvc" stage2 = run_copy("bar", "baz", name="copy-bar-baz") - assert stage2.addressing == "dvc.yaml:copy-bar-baz" + assert stage2.addressing == "copy-bar-baz" folder = tmp_dir / "dir" folder.mkdir() diff --git a/tests/func/test_status.py b/tests/func/test_status.py index e94934f194..48aff22920 100644 --- a/tests/func/test_status.py +++ b/tests/func/test_status.py @@ -73,12 +73,12 @@ def test_status_on_pipeline_stages(tmp_dir, dvc, run_copy): stage.cmd = " ".join(stage.cmd.split()) stage.dvcfile._dump_pipeline_file(stage) - assert dvc.status() == {"dvc.yaml:copy-foo-bar": ["changed command"]} + assert dvc.status() == {"copy-foo-bar": ["changed command"]} # delete outputs (tmp_dir / "bar").unlink() assert dvc.status() == { - "dvc.yaml:copy-foo-bar": [ + "copy-foo-bar": [ {"changed outs": {"bar": "deleted"}}, "changed command", ] @@ -86,7 +86,7 @@ def test_status_on_pipeline_stages(tmp_dir, dvc, run_copy): (tmp_dir / "foo").unlink() assert dvc.status() == { "foo.dvc": [{"changed outs": {"foo": "deleted"}}], - "dvc.yaml:copy-foo-bar": [ + "copy-foo-bar": [ {"changed deps": {"foo": "deleted"}}, {"changed outs": {"bar": "deleted"}}, "changed command", diff --git a/tests/unit/utils/test_utils.py b/tests/unit/utils/test_utils.py index 08b754df8a..1b8aa9c470 100644 --- a/tests/unit/utils/test_utils.py +++ b/tests/unit/utils/test_utils.py @@ -146,6 +146,9 @@ def test_resolve_output(inp, out, is_dir, expected, mocker): ["../models/stage.dvc", ("../models/stage.dvc", None), "def"], [":name", ("default", "name"), "default"], [":name", ("default", "name"), "default"], + ["something.dvc:name", ("something.dvc", "name"), None], + ["../something.dvc:name", ("../something.dvc", "name"), None], + ["file", (None, "file"), None], ], ) def test_parse_target(inp, out, default): From f47a3f336317c360efa16505db98f3cf7339a5d6 Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Fri, 22 May 2020 19:05:18 +0545 Subject: [PATCH 2/9] repo: reorganize imports --- dvc/repo/__init__.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 66c1bc0441..9f855dbf39 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -6,7 +6,7 @@ from funcy import cached_property, cat, first from dvc.config import Config -from dvc.dvcfile import PIPELINE_FILE, Dvcfile +from dvc.dvcfile import PIPELINE_FILE, Dvcfile, is_valid_filename from dvc.exceptions import FileMissingError from dvc.exceptions import IsADirectoryError as DvcIsADirectoryError from dvc.exceptions import NotDvcRepoError, OutputNotFoundError @@ -250,8 +250,6 @@ def collect(self, target, with_deps=False, recursive=False, graph=None): return res def collect_granular(self, target, *args, **kwargs): - from ..dvcfile import Dvcfile, is_valid_filename, PIPELINE_FILE - if not target: return [(stage, None) for stage in self.stages] @@ -473,8 +471,6 @@ def plot_templates(self): return PlotTemplates(self.dvc_dir) def _collect_stages(self): - from dvc.dvcfile import is_valid_filename - stages = [] outs = set() From 1ea6b4999f6140e3192f3f26b9f6b8881b00e47a Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Mon, 25 May 2020 19:10:20 +0545 Subject: [PATCH 3/9] refactor/reorganize collect_granular --- dvc/exceptions.py | 6 +++ dvc/repo/__init__.py | 88 ++++++++++++++++++++++++++++++----------- dvc/repo/checkout.py | 22 +++++++---- dvc/repo/commit.py | 1 + dvc/stage/exceptions.py | 15 +++---- dvc/stage/loader.py | 13 ------ 6 files changed, 92 insertions(+), 53 deletions(-) diff --git a/dvc/exceptions.py b/dvc/exceptions.py index 0a62f9437b..acda76bdc5 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -322,3 +322,9 @@ def __init__(self, path_info): class IsADirectoryError(DvcException): """Raised when a file operation is requested on a directory.""" + + +class NoOutputOrStage(DvcException): + """ + Raised when the target is neither an output nor a stage name in dvc.yaml + """ diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 9f855dbf39..266de1ebef 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -9,12 +9,17 @@ from dvc.dvcfile import PIPELINE_FILE, Dvcfile, is_valid_filename from dvc.exceptions import FileMissingError from dvc.exceptions import IsADirectoryError as DvcIsADirectoryError -from dvc.exceptions import NotDvcRepoError, OutputNotFoundError +from dvc.exceptions import ( + NoOutputOrStage, + NotDvcRepoError, + OutputNotFoundError, +) from dvc.ignore import CleanTree from dvc.path_info import PathInfo from dvc.repo.tree import RepoTree from dvc.utils.fs import path_isin +from ..stage.exceptions import StageFileDoesNotExistError, StageNotFound from ..utils import parse_target from .graph import check_acyclic, get_pipeline, get_pipelines @@ -227,9 +232,9 @@ def _collect_inside(self, path, graph): stages = nx.dfs_postorder_nodes(graph) return [stage for stage in stages if path_isin(stage.path, path)] - def collect(self, target, with_deps=False, recursive=False, graph=None): - import networkx as nx - + def collect( + self, target=None, with_deps=False, recursive=False, graph=None + ): if not target: return list(graph) if graph else self.stages @@ -241,40 +246,79 @@ def collect(self, target, with_deps=False, recursive=False, graph=None): path, name = parse_target(target) stages = self.get_stages(path, name) if not with_deps: - return list(stages) + return stages res = set() for stage in stages: - pipeline = get_pipeline(get_pipelines(graph or self.graph), stage) - res.update(nx.dfs_postorder_nodes(pipeline, stage)) + res.update(self._collect_pipeline(stage, graph=graph)) return res - def collect_granular(self, target, *args, **kwargs): + def _collect_pipeline(self, stage, graph=None): + import networkx as nx + + pipeline = get_pipeline(get_pipelines(graph or self.graph), stage) + return nx.dfs_postorder_nodes(pipeline, stage) + + def _collect_from_default_dvcfile(self, target): + dvcfile = Dvcfile(self, PIPELINE_FILE) + if dvcfile.exists(): + return dvcfile.stages.get(target) + + def collect_granular( + self, target=None, with_deps=False, recursive=False, graph=None + ): + """ + Priority is in the order of following in case of ambiguity: + - .dvc file or .yaml file + - dir if recursive and directory exists + - stage_name + - output file + """ if not target: return [(stage, None) for stage in self.stages] file, name = parse_target(target) + stages = [] + # Optimization: do not collect the graph for a specific target - if not kwargs.get("with_deps") and not file: + if not file: # parsing is ambiguous when it does not have a colon # or if it's not a dvcfile, as it can be a stage name # in `dvc.yaml` or, an output in a stage. logger.debug( "Checking if stage '%s' is in '%s'", target, PIPELINE_FILE ) - dvcfile = Dvcfile(self, PIPELINE_FILE) - if dvcfile.exists() and name in dvcfile.stages: - return [(self.get_stage(PIPELINE_FILE, name), None)] - elif not kwargs.get("with_deps") and is_valid_filename(file): - return [(stage, None) for stage in self.get_stages(file, name)] - - try: - (out,) = self.find_outs_by_path(target, strict=False) - filter_info = PathInfo(os.path.abspath(target)) - return [(out.stage, filter_info)] - except OutputNotFoundError: - stages = self.collect(target, *args, **kwargs) - return [(stage, None) for stage in stages] + if not (recursive and os.path.isdir(target)): + stage = self._collect_from_default_dvcfile(target) + if stage: + stages = ( + self._collect_pipeline(stage) if with_deps else [stage] + ) + elif not with_deps and is_valid_filename(file): + stages = self.get_stages(file, name) + + if not stages: + if not (recursive and os.path.isdir(target)): + try: + (out,) = self.find_outs_by_path(target, strict=False) + filter_info = PathInfo(os.path.abspath(target)) + return [(out.stage, filter_info)] + except OutputNotFoundError: + pass + + try: + stages = self.collect(target, with_deps, recursive, graph) + except StageFileDoesNotExistError as exc: + # collect() might try to use `target` as a stage name + # and throw error that dvc.yaml does not exist, whereas it + # should say that both stage name and file does not exist. + if file and is_valid_filename(file): + raise + raise NoOutputOrStage(target, exc.file) from exc + except StageNotFound as exc: + raise NoOutputOrStage(target, exc.file) from exc + + return [(stage, None) for stage in stages] def used_cache( self, diff --git a/dvc/repo/checkout.py b/dvc/repo/checkout.py index 9adda0e047..0eacd53455 100644 --- a/dvc/repo/checkout.py +++ b/dvc/repo/checkout.py @@ -1,7 +1,11 @@ import logging import os -from dvc.exceptions import CheckoutError, CheckoutErrorSuggestGit +from dvc.exceptions import ( + CheckoutError, + CheckoutErrorSuggestGit, + NoOutputOrStage, +) from dvc.progress import Tqdm from dvc.utils import relpath @@ -18,11 +22,11 @@ def _get_unused_links(repo): return repo.state.get_unused_links(used) -def _fspath_dir(path, root): +def _fspath_dir(path): if not os.path.exists(str(path)): return str(path) - path = relpath(path, root) + path = relpath(path) return os.path.join(path, "") if os.path.isdir(path) else path @@ -56,7 +60,7 @@ def _checkout( targets = [None] unused = _get_unused_links(self) - stats["deleted"] = [_fspath_dir(u, self.root_dir) for u in unused] + stats["deleted"] = [_fspath_dir(u) for u in unused] self.state.remove_links(unused) if isinstance(targets, str): @@ -70,7 +74,11 @@ def _checkout( target, with_deps=with_deps, recursive=recursive ) ) - except (StageFileDoesNotExistError, StageFileBadNameError) as exc: + except ( + StageFileDoesNotExistError, + StageFileBadNameError, + NoOutputOrStage, + ) as exc: if not target: raise raise CheckoutErrorSuggestGit(target) from exc @@ -87,9 +95,7 @@ def _checkout( filter_info=filter_info, ) for key, items in result.items(): - stats[key].extend( - _fspath_dir(path, self.root_dir) for path in items - ) + stats[key].extend(_fspath_dir(path) for path in items) if stats.get("failed"): raise CheckoutError(stats["failed"], stats) diff --git a/dvc/repo/commit.py b/dvc/repo/commit.py index 4e63298153..e63ae59a42 100644 --- a/dvc/repo/commit.py +++ b/dvc/repo/commit.py @@ -46,3 +46,4 @@ def commit(self, target, with_deps=False, recursive=False, force=False): stage.commit() Dvcfile(self, stage.path).dump(stage) + return stages diff --git a/dvc/stage/exceptions.py b/dvc/stage/exceptions.py index 4be365238f..6ad7c2f81e 100644 --- a/dvc/stage/exceptions.py +++ b/dvc/stage/exceptions.py @@ -15,15 +15,8 @@ class StageFileFormatError(DvcException): class StageFileDoesNotExistError(DvcException): def __init__(self, fname): - from dvc.dvcfile import DVC_FILE_SUFFIX, is_dvc_file - - msg = f"'{fname}' does not exist." - - sname = fname + DVC_FILE_SUFFIX - if is_dvc_file(sname): - msg += f" Do you mean '{sname}'?" - - super().__init__(msg) + self.file = fname + super().__init__(f"'{self.file}' does not exist.") class StageFileAlreadyExistsError(DvcException): @@ -87,8 +80,10 @@ def __init__(self, missing_files): class StageNotFound(KeyError, DvcException): def __init__(self, file, name): + self.file = file.relpath + self.name = name super().__init__( - f"Stage '{name}' not found inside '{file.relpath}' file" + f"Stage '{self.name}' not found inside '{self.file}' file" ) diff --git a/dvc/stage/loader.py b/dvc/stage/loader.py index d973f31a94..169d5d3039 100644 --- a/dvc/stage/loader.py +++ b/dvc/stage/loader.py @@ -30,16 +30,6 @@ def __init__(self, dvcfile, stages_data, lockfile_data=None): self.stages_data = stages_data or {} self.lockfile_data = lockfile_data or {} - def filter(self, item=None): - if not item: - return self - - if item not in self: - raise StageNotFound(self.dvcfile, item) - - data = {item: self.stages_data[item]} if item in self else {} - return self.__class__(self.dvcfile, data, self.lockfile_data) - @staticmethod def fill_from_lock(stage, lock_data): from .params import StageParams @@ -224,9 +214,6 @@ def __init__(self, dvcfile, stage_data, stage_text=None): self.stage_data = stage_data or {} self.stage_text = stage_text - def filter(self, item=None): - return self - def __getitem__(self, item): if item: logger.warning( From f99a1900fea201d65f3efa8f19a4f23e19afae5c Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Mon, 25 May 2020 19:11:26 +0545 Subject: [PATCH 4/9] add tests for stage addressing --- tests/func/test_checkout.py | 71 ++++++-- tests/func/test_commit.py | 14 ++ tests/func/test_data_cloud.py | 21 +++ tests/func/test_dvcfile.py | 39 +---- tests/func/test_repo.py | 251 ++++++++++++++++++++++++++++ tests/func/test_repro.py | 2 +- tests/func/test_repro_multistage.py | 2 +- tests/func/test_status.py | 2 +- tests/unit/repo/test_repo.py | 15 ++ 9 files changed, 365 insertions(+), 52 deletions(-) diff --git a/tests/func/test_checkout.py b/tests/func/test_checkout.py index 171f16c93d..dcac6050b9 100644 --- a/tests/func/test_checkout.py +++ b/tests/func/test_checkout.py @@ -7,22 +7,20 @@ import pytest from mock import patch -from dvc.dvcfile import DVC_FILE_SUFFIX, Dvcfile +from dvc.dvcfile import DVC_FILE_SUFFIX, PIPELINE_FILE, Dvcfile from dvc.exceptions import ( CheckoutError, CheckoutErrorSuggestGit, ConfirmRemoveError, DvcException, + NoOutputOrStage, ) from dvc.main import main from dvc.remote import S3Remote from dvc.remote.local import LocalRemote from dvc.repo import Repo as DvcRepo from dvc.stage import Stage -from dvc.stage.exceptions import ( - StageFileBadNameError, - StageFileDoesNotExistError, -) +from dvc.stage.exceptions import StageFileDoesNotExistError from dvc.system import System from dvc.utils import relpath from dvc.utils.fs import walk_files @@ -403,17 +401,26 @@ class TestCheckoutSuggestGit(TestRepro): def test(self): try: - self.dvc.checkout(targets=["gitbranch"]) + self.dvc.checkout(targets="gitbranch") except DvcException as exc: self.assertIsInstance(exc, CheckoutErrorSuggestGit) - self.assertIsInstance(exc.__cause__, StageFileDoesNotExistError) + self.assertIsInstance(exc.__cause__, NoOutputOrStage) + self.assertIsInstance( + exc.__cause__.__cause__, StageFileDoesNotExistError + ) + + try: + self.dvc.checkout(targets=self.FOO) + except DvcException as exc: + self.assertIsInstance(exc, CheckoutErrorSuggestGit) + self.assertIsInstance(exc.__cause__, NoOutputOrStage) self.assertIsNone(exc.__cause__.__cause__) try: - self.dvc.checkout(targets=[self.FOO]) + self.dvc.checkout(targets="looks-like-dvcfile.dvc") except DvcException as exc: self.assertIsInstance(exc, CheckoutErrorSuggestGit) - self.assertIsInstance(exc.__cause__, StageFileBadNameError) + self.assertIsInstance(exc.__cause__, StageFileDoesNotExistError) self.assertIsNone(exc.__cause__.__cause__) @@ -770,9 +777,51 @@ def test_checkout_for_external_outputs(tmp_dir, dvc): assert stats == {**empty_checkout, "modified": [str(file_path)]} -def test_checkouts_for_pipeline_tracked_outs(tmp_dir, dvc, scm, run_copy): - from dvc.dvcfile import PIPELINE_FILE +def test_checkouts_with_different_addressing(tmp_dir, dvc, run_copy): + tmp_dir.gen({"foo": "foo", "lorem": "lorem"}) + run_copy("foo", "bar", name="copy-foo-bar") + run_copy("lorem", "ipsum", name="copy-lorem-ipsum") + + (tmp_dir / "bar").unlink() + (tmp_dir / "ipsum").unlink() + assert set(dvc.checkout(PIPELINE_FILE)["added"]) == {"bar", "ipsum"} + + (tmp_dir / "bar").unlink() + (tmp_dir / "ipsum").unlink() + assert set(dvc.checkout(":")["added"]) == {"bar", "ipsum"} + + (tmp_dir / "bar").unlink() + assert dvc.checkout("copy-foo-bar")["added"] == ["bar"] + (tmp_dir / "bar").unlink() + assert dvc.checkout("dvc.yaml:copy-foo-bar")["added"] == ["bar"] + + (tmp_dir / "bar").unlink() + assert dvc.checkout(":copy-foo-bar")["added"] == ["bar"] + + (tmp_dir / "bar").unlink() + (tmp_dir / "data").mkdir() + with (tmp_dir / "data").chdir(): + assert dvc.checkout(relpath(tmp_dir / "dvc.yaml") + ":copy-foo-bar")[ + "added" + ] == [relpath(tmp_dir / "bar")] + + (tmp_dir / "bar").unlink() + assert dvc.checkout("bar")["added"] == ["bar"] + + +def test_checkouts_on_same_stage_name_and_output_name(tmp_dir, dvc, run_copy): + tmp_dir.gen("foo", "foo") + run_copy("foo", "bar", name="copy-foo-bar") + run_copy("foo", "copy-foo-bar", name="make_collision") + + (tmp_dir / "bar").unlink() + (tmp_dir / "copy-foo-bar").unlink() + assert dvc.checkout("copy-foo-bar")["added"] == ["bar"] + assert dvc.checkout("./copy-foo-bar")["added"] == ["copy-foo-bar"] + + +def test_checkouts_for_pipeline_tracked_outs(tmp_dir, dvc, scm, run_copy): tmp_dir.gen("foo", "foo") stage1 = run_copy("foo", "bar", name="copy-foo-bar") tmp_dir.gen("lorem", "lorem") diff --git a/tests/func/test_commit.py b/tests/func/test_commit.py index 3b3872e941..870573e9f9 100644 --- a/tests/func/test_commit.py +++ b/tests/func/test_commit.py @@ -1,5 +1,6 @@ import pytest +from dvc.dvcfile import PIPELINE_FILE from dvc.stage.exceptions import StageCommitError from dvc.utils.stage import dump_stage_file, load_stage_file @@ -81,3 +82,16 @@ def test_commit_no_exec(tmp_dir, dvc): assert dvc.status(stage.path) dvc.commit(stage.path, force=True) assert dvc.status(stage.path) == {} + + +def test_commit_pipeline_stage(tmp_dir, dvc, run_copy): + tmp_dir.gen("foo", "foo") + stage = run_copy("foo", "bar", no_commit=True, name="copy-foo-bar") + assert dvc.status(stage.addressing) + assert dvc.commit(stage.addressing, force=True) == [stage] + assert not dvc.status(stage.addressing) + + # just to confirm different variants work + assert dvc.commit(f":{stage.addressing}") == [stage] + assert dvc.commit(f"{PIPELINE_FILE}:{stage.addressing}") == [stage] + assert dvc.commit(PIPELINE_FILE) == [stage] diff --git a/tests/func/test_data_cloud.py b/tests/func/test_data_cloud.py index bbb201c296..fb5da6e751 100644 --- a/tests/func/test_data_cloud.py +++ b/tests/func/test_data_cloud.py @@ -932,3 +932,24 @@ def test_push_pull_all(tmp_dir, scm, dvc, setup_remote, key, expected): clean(["foo", "bar", "baz"], dvc) assert dvc.pull(**{key: True})["fetched"] == expected + + +def test_push_pull_fetch_pipeline_stages(tmp_dir, dvc, run_copy, setup_remote): + remote_path = setup_remote(dvc) + tmp_dir.dvc_gen("foo", "foo") + run_copy("foo", "bar", no_commit=True, name="copy-foo-bar") + + dvc.push("copy-foo-bar") + assert len(recurse_list_dir(remote_path)) == 1 + # pushing everything so as we can check pull/fetch only downloads + # from specified targets + dvc.push() + clean(["foo", "bar"], dvc) + + dvc.pull("copy-foo-bar") + assert (tmp_dir / "bar").exists() + assert len(recurse_list_dir(dvc.cache.local.cache_dir)) == 1 + clean(["bar"], dvc) + + dvc.fetch("copy-foo-bar") + assert len(recurse_list_dir(dvc.cache.local.cache_dir)) == 1 diff --git a/tests/func/test_dvcfile.py b/tests/func/test_dvcfile.py index 51e6e00cee..4ba82e5bd5 100644 --- a/tests/func/test_dvcfile.py +++ b/tests/func/test_dvcfile.py @@ -1,10 +1,7 @@ import pytest from dvc.dvcfile import PIPELINE_FILE, Dvcfile -from dvc.stage.exceptions import ( - StageFileDoesNotExistError, - StageNameUnspecified, -) +from dvc.stage.exceptions import StageFileDoesNotExistError from dvc.stage.loader import StageNotFound @@ -163,37 +160,3 @@ def test_stage_collection(tmp_dir, dvc): single_stage=True, ) assert {s for s in dvc.stages} == {stage1, stage3, stage2} - - -def test_stage_filter(tmp_dir, dvc, run_copy): - tmp_dir.gen("foo", "foo") - stage1 = run_copy("foo", "bar", name="copy-foo-bar") - stage2 = run_copy("bar", "bax", name="copy-bar-bax") - stage3 = run_copy("bax", "baz", name="copy-bax-baz") - stages = Dvcfile(dvc, PIPELINE_FILE).stages - assert set(stages.filter(None).values()) == { - stage1, - stage2, - stage3, - } - assert set(stages.filter("copy-bar-bax").values()) == {stage2} - assert stages.filter("copy-bar-bax").get("copy-bar-bax") == stage2 - with pytest.raises(StageNameUnspecified): - stages.get(None) - - with pytest.raises(StageNotFound): - assert stages["unknown"] - - with pytest.raises(StageNotFound): - assert stages.filter("unknown") - - -def test_stage_filter_in_singlestage_file(tmp_dir, dvc, run_copy): - tmp_dir.gen("foo", "foo") - stage = run_copy("foo", "bar", single_stage=True) - dvcfile = Dvcfile(dvc, stage.path) - assert set(dvcfile.stages.filter(None).values()) == {stage} - assert dvcfile.stages.filter(None).get(None) == stage - assert dvcfile.stages.filter("copy-bar-bax").get("copy-bar-bax") == stage - assert dvcfile.stages.filter("copy-bar-bax").get(None) == stage - assert set(dvcfile.stages.filter("unknown").values()) == {stage} diff --git a/tests/func/test_repo.py b/tests/func/test_repo.py index 5fc55c1194..49ed8cfc29 100644 --- a/tests/func/test_repo.py +++ b/tests/func/test_repo.py @@ -1,8 +1,21 @@ import os +from operator import itemgetter + +import pytest from dvc.cache import Cache +from dvc.dvcfile import PIPELINE_FILE +from dvc.exceptions import NoOutputOrStage +from dvc.path_info import PathInfo from dvc.repo import Repo +from dvc.stage.exceptions import ( + StageFileDoesNotExistError, + StageNameUnspecified, + StageNotFound, +) from dvc.system import System +from dvc.utils import relpath +from dvc.utils.fs import remove def test_destroy(tmp_dir, dvc, run_copy): @@ -88,6 +101,12 @@ def collect_outs(*args, **kwargs): assert collect_outs("dvc.yaml:copy-foo-foobar", recursive=True) == { "foobar" } + assert collect_outs("copy-foo-foobar") == {"foobar"} + assert collect_outs("copy-foo-foobar", with_deps=True) == { + "foobar", + "foo", + } + assert collect_outs("copy-foo-foobar", recursive=True) == {"foobar"} run_copy("foobar", "baz", name="copy-foobar-baz") assert collect_outs("dvc.yaml") == {"foobar", "baz"} @@ -98,6 +117,26 @@ def collect_outs(*args, **kwargs): } +def test_collect_dir_recursive(tmp_dir, dvc, run_head): + tmp_dir.gen({"dir": {"foo": "foo"}}) + (stage1,) = dvc.add("dir", recursive=True) + with (tmp_dir / "dir").chdir(): + stage2 = run_head("foo", name="copy-foo-bar") + stage3 = run_head("foo-1", single_stage=True) + assert set(dvc.collect("dir", recursive=True)) == {stage1, stage2, stage3} + + +def test_collect_with_not_existing_output_or_stage_name( + tmp_dir, dvc, run_copy +): + with pytest.raises(StageFileDoesNotExistError): + dvc.collect("some_file") + tmp_dir.dvc_gen("foo", "foo") + run_copy("foo", "bar", name="copy-foo-bar") + with pytest.raises(StageNotFound): + dvc.collect("some_file") + + def test_stages(tmp_dir, dvc): def stages(): return {stage.relpath for stage in Repo(os.fspath(tmp_dir)).stages} @@ -113,3 +152,215 @@ def stages(): tmp_dir.gen(".dvcignore", "dir") assert stages() == {"file.dvc"} + + +@pytest.fixture +def stages(tmp_dir, run_copy): + stage1, stage2 = tmp_dir.dvc_gen({"foo": "foo", "lorem": "lorem"}) + return { + "foo-generate": stage1, + "lorem-generate": stage2, + "copy-foo-bar": run_copy("foo", "bar", single_stage=True), + "copy-bar-foobar": run_copy("bar", "foobar", name="copy-bar-foobar"), + "copy-lorem-ipsum": run_copy("lorem", "ipsum", name="lorem-ipsum"), + } + + +def test_collect_granular_with_no_target(tmp_dir, dvc, stages): + assert set(map(itemgetter(0), dvc.collect_granular())) == set( + stages.values() + ) + assert list(map(itemgetter(1), dvc.collect_granular())) == [None] * len( + stages + ) + + +def test_collect_granular_with_target(tmp_dir, dvc, stages): + assert dvc.collect_granular("bar.dvc") == [(stages["copy-foo-bar"], None)] + assert dvc.collect_granular(PIPELINE_FILE) == [ + (stages["copy-bar-foobar"], None), + (stages["copy-lorem-ipsum"], None), + ] + assert dvc.collect_granular(":") == [ + (stages["copy-bar-foobar"], None), + (stages["copy-lorem-ipsum"], None), + ] + assert dvc.collect_granular("copy-bar-foobar") == [ + (stages["copy-bar-foobar"], None) + ] + assert dvc.collect_granular(":copy-bar-foobar") == [ + (stages["copy-bar-foobar"], None) + ] + assert dvc.collect_granular("dvc.yaml:copy-bar-foobar") == [ + (stages["copy-bar-foobar"], None) + ] + + with (tmp_dir / dvc.DVC_DIR).chdir(): + assert dvc.collect_granular( + relpath(tmp_dir / PIPELINE_FILE) + ":copy-bar-foobar" + ) == [(stages["copy-bar-foobar"], None)] + + assert dvc.collect_granular("foobar") == [ + (stages["copy-bar-foobar"], PathInfo(tmp_dir / "foobar")) + ] + + +@pytest.mark.parametrize( + "target", + [ + "not_existing.dvc", + "not_existing.dvc:stage_name", + "not_existing/dvc.yaml", + "not_existing/dvc.yaml:stage_name", + ], +) +def test_collect_with_not_existing_dvcfile(tmp_dir, dvc, target): + with pytest.raises(StageFileDoesNotExistError): + dvc.collect_granular(target) + with pytest.raises(StageFileDoesNotExistError): + dvc.collect(target) + + +def test_collect_granular_with_not_existing_output_or_stage_name(tmp_dir, dvc): + with pytest.raises(NoOutputOrStage): + dvc.collect_granular("some_file") + with pytest.raises(NoOutputOrStage): + dvc.collect_granular("some_file", recursive=True) + + +def test_collect_granular_with_deps(tmp_dir, dvc, stages): + assert set( + map(itemgetter(0), dvc.collect_granular("bar.dvc", with_deps=True)) + ) == {stages["copy-foo-bar"], stages["foo-generate"]} + assert set( + map( + itemgetter(0), + dvc.collect_granular("copy-bar-foobar", with_deps=True), + ) + ) == { + stages["copy-bar-foobar"], + stages["copy-foo-bar"], + stages["foo-generate"], + } + assert set( + map( + itemgetter(0), dvc.collect_granular(PIPELINE_FILE, with_deps=True), + ) + ) == set(stages.values()) + + +def test_collect_granular_same_output_name_stage_name(tmp_dir, dvc, run_copy): + (stage1,) = tmp_dir.dvc_gen("foo", "foo") + (stage2,) = tmp_dir.dvc_gen("copy-foo-bar", "copy-foo-bar") + stage3 = run_copy("foo", "bar", name="copy-foo-bar") + + assert dvc.collect_granular("copy-foo-bar") == [(stage3, None)] + + coll = dvc.collect_granular("copy-foo-bar", with_deps=True) + assert set(map(itemgetter(0), coll)) == {stage3, stage1} + assert list(map(itemgetter(1), coll)) == [None] * 2 + + assert dvc.collect_granular("./copy-foo-bar") == [ + (stage2, PathInfo(tmp_dir / "copy-foo-bar")) + ] + assert dvc.collect_granular("./copy-foo-bar", with_deps=True) == [ + (stage2, PathInfo(tmp_dir / "copy-foo-bar")) + ] + + +def test_collect_granular_priority_on_collision(tmp_dir, dvc, run_copy): + tmp_dir.gen({"dir": {"foo": "foo"}, "foo": "foo"}) + (stage1,) = dvc.add("dir", recursive=True) + stage2 = run_copy("foo", "bar", name="dir") + + assert dvc.collect_granular("dir") == [(stage2, None)] + assert dvc.collect_granular("dir", recursive=True) == [(stage1, None)] + + remove(tmp_dir / "dir") + + assert dvc.collect_granular("dir") == [(stage2, None)] + assert dvc.collect_granular("dir", recursive=True) == [(stage2, None)] + + +def test_collect_granular_collision_output_dir_stage_name( + tmp_dir, dvc, run_copy +): + stage1, stage2, = tmp_dir.dvc_gen({"dir": {"foo": "foo"}, "foo": "foo"}) + stage3 = run_copy("foo", "bar", name="dir") + + assert dvc.collect_granular("dir") == [(stage3, None)] + assert not dvc.collect_granular("dir", recursive=True) + assert dvc.collect_granular("./dir") == [ + (stage1, PathInfo(tmp_dir / "dir")) + ] + + +def test_collect_granular_not_existing_stage_name(tmp_dir, dvc, run_copy): + tmp_dir.dvc_gen("foo", "foo") + (stage,) = tmp_dir.dvc_gen("copy-foo-bar", "copy-foo-bar") + run_copy("foo", "bar", name="copy-foo-bar") + + assert dvc.collect_granular("copy-foo-bar.dvc:stage_name_not_needed") == [ + (stage, None) + ] + with pytest.raises(StageNotFound): + dvc.collect_granular("dvc.yaml:does-not-exist") + + +def test_get_stages(tmp_dir, dvc, run_copy): + with pytest.raises(StageFileDoesNotExistError): + dvc.get_stages() + + tmp_dir.gen("foo", "foo") + stage1 = run_copy("foo", "bar", name="copy-foo-bar") + stage2 = run_copy("bar", "foobar", name="copy-bar-foobar") + + assert set(dvc.get_stages()) == {stage1, stage2} + assert set(dvc.get_stages(path=PIPELINE_FILE)) == {stage1, stage2} + assert set(dvc.get_stages(name="copy-bar-foobar")) == {stage2} + assert set(dvc.get_stages(path=PIPELINE_FILE, name="copy-bar-foobar")) == { + stage2 + } + + with pytest.raises(StageFileDoesNotExistError): + dvc.get_stages(path=relpath(tmp_dir / ".." / PIPELINE_FILE)) + + with pytest.raises(StageNotFound): + dvc.get_stages(path=PIPELINE_FILE, name="copy") + + +def test_get_stages_old_dvcfile(tmp_dir, dvc): + (stage1,) = tmp_dir.dvc_gen("foo", "foo") + assert set(dvc.get_stages("foo.dvc")) == {stage1} + assert set(dvc.get_stages("foo.dvc", name="foo-generate")) == {stage1} + + with pytest.raises(StageFileDoesNotExistError): + dvc.get_stages(path=relpath(tmp_dir / ".." / "foo.dvc")) + + +def test_get_stage(tmp_dir, dvc, run_copy): + tmp_dir.gen("foo", "foo") + stage1 = run_copy("foo", "bar", name="copy-foo-bar") + + with pytest.raises(StageNameUnspecified): + dvc.get_stage() + + with pytest.raises(StageNameUnspecified): + dvc.get_stage(path=PIPELINE_FILE) + + assert dvc.get_stage(path=PIPELINE_FILE, name="copy-foo-bar") == stage1 + assert dvc.get_stage(name="copy-foo-bar") == stage1 + + with pytest.raises(StageFileDoesNotExistError): + dvc.get_stage(path="something.yaml", name="name") + + with pytest.raises(StageNotFound): + dvc.get_stage(name="random_name") + + +def test_get_stage_single_stage_dvcfile(tmp_dir, dvc): + (stage1,) = tmp_dir.dvc_gen("foo", "foo") + assert dvc.get_stage("foo.dvc") == stage1 + assert dvc.get_stage("foo.dvc", name="jpt") == stage1 + with pytest.raises(StageFileDoesNotExistError): + dvc.get_stage(path="bar.dvc", name="name") diff --git a/tests/func/test_repro.py b/tests/func/test_repro.py index d5e5077384..63eaf82253 100644 --- a/tests/func/test_repro.py +++ b/tests/func/test_repro.py @@ -56,7 +56,7 @@ def _run(self, **kwargs): @staticmethod def _get_stage_target(stage): - return stage.path + return stage.addressing class TestRepro(SingleStageRun, TestDvc): diff --git a/tests/func/test_repro_multistage.py b/tests/func/test_repro_multistage.py index 2ae0023356..27c3ba92fc 100644 --- a/tests/func/test_repro_multistage.py +++ b/tests/func/test_repro_multistage.py @@ -32,7 +32,7 @@ def _run(self, **kwargs): @staticmethod def _get_stage_target(stage): - return stage.path + ":" + stage.name + return stage.addressing class TestReproFailMultiStage(MultiStageRun, test_repro.TestReproFail): diff --git a/tests/func/test_status.py b/tests/func/test_status.py index 48aff22920..c79c361ada 100644 --- a/tests/func/test_status.py +++ b/tests/func/test_status.py @@ -73,7 +73,7 @@ def test_status_on_pipeline_stages(tmp_dir, dvc, run_copy): stage.cmd = " ".join(stage.cmd.split()) stage.dvcfile._dump_pipeline_file(stage) - assert dvc.status() == {"copy-foo-bar": ["changed command"]} + assert dvc.status("copy-foo-bar") == {"copy-foo-bar": ["changed command"]} # delete outputs (tmp_dir / "bar").unlink() diff --git a/tests/unit/repo/test_repo.py b/tests/unit/repo/test_repo.py index 581cb06a71..6395d47a82 100644 --- a/tests/unit/repo/test_repo.py +++ b/tests/unit/repo/test_repo.py @@ -87,6 +87,21 @@ def test_collect_optimization(tmp_dir, dvc, mocker): dvc.collect_granular(stage.path) +def test_collect_optimization_on_stage_name(tmp_dir, dvc, mocker, run_copy): + tmp_dir.dvc_gen("foo", "foo") + stage = run_copy("foo", "bar", name="copy-foo-bar") + # Forget cached stages and graph and error out on collection + dvc._reset() + mocker.patch( + "dvc.repo.Repo.stages", + property(raiser(Exception("Should not collect"))), + ) + + # Should read stage directly instead of collecting the whole graph + assert dvc.collect("copy-foo-bar") == [stage] + assert dvc.collect_granular("copy-foo-bar") == [(stage, None)] + + def test_skip_graph_checks(tmp_dir, dvc, mocker, run_copy): # See https://github.com/iterative/dvc/issues/2671 for more info mock_collect_graph = mocker.patch("dvc.repo.Repo._collect_graph") From 35c6c2fc2954ebdc01ead950cc6f2e7c6558aba2 Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Mon, 25 May 2020 19:22:06 +0545 Subject: [PATCH 5/9] fix ds and cc issues --- tests/func/test_repo.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/func/test_repo.py b/tests/func/test_repo.py index 49ed8cfc29..9ca8fda6b3 100644 --- a/tests/func/test_repo.py +++ b/tests/func/test_repo.py @@ -4,7 +4,7 @@ import pytest from dvc.cache import Cache -from dvc.dvcfile import PIPELINE_FILE +from dvc.dvcfile import PIPELINE_FILE, PIPELINE_LOCK from dvc.exceptions import NoOutputOrStage from dvc.path_info import PathInfo from dvc.repo import Repo @@ -19,8 +19,6 @@ def test_destroy(tmp_dir, dvc, run_copy): - from dvc.dvcfile import PIPELINE_FILE, PIPELINE_LOCK - dvc.config["cache"]["type"] = ["symlink"] dvc.cache = Cache(dvc) @@ -285,7 +283,7 @@ def test_collect_granular_priority_on_collision(tmp_dir, dvc, run_copy): def test_collect_granular_collision_output_dir_stage_name( tmp_dir, dvc, run_copy ): - stage1, stage2, = tmp_dir.dvc_gen({"dir": {"foo": "foo"}, "foo": "foo"}) + (stage1,) = tmp_dir.dvc_gen({"dir": {"foo": "foo"}, "foo": "foo"}) stage3 = run_copy("foo", "bar", name="dir") assert dvc.collect_granular("dir") == [(stage3, None)] From 37bdffb313fc5410cf19733692ba416a7d76d677 Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Mon, 25 May 2020 19:44:44 +0545 Subject: [PATCH 6/9] fix unpack error in test --- tests/func/test_repo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/func/test_repo.py b/tests/func/test_repo.py index 9ca8fda6b3..60aeef95b4 100644 --- a/tests/func/test_repo.py +++ b/tests/func/test_repo.py @@ -283,7 +283,7 @@ def test_collect_granular_priority_on_collision(tmp_dir, dvc, run_copy): def test_collect_granular_collision_output_dir_stage_name( tmp_dir, dvc, run_copy ): - (stage1,) = tmp_dir.dvc_gen({"dir": {"foo": "foo"}, "foo": "foo"}) + stage1, *_ = tmp_dir.dvc_gen({"dir": {"foo": "foo"}, "foo": "foo"}) stage3 = run_copy("foo", "bar", name="dir") assert dvc.collect_granular("dir") == [(stage3, None)] From d13becf52e58147bf375c834b87eba8ff88feb07 Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Mon, 25 May 2020 19:55:03 +0545 Subject: [PATCH 7/9] tests: stage string representations --- tests/func/test_stage.py | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/tests/func/test_stage.py b/tests/func/test_stage.py index 83c6a5cc8e..2d7c9ac36b 100644 --- a/tests/func/test_stage.py +++ b/tests/func/test_stage.py @@ -8,7 +8,7 @@ from dvc.output.local import LocalOutput from dvc.remote.local import LocalRemote from dvc.repo import Repo -from dvc.stage import Stage +from dvc.stage import PipelineStage, Stage from dvc.stage.exceptions import StageFileFormatError from dvc.utils.stage import dump_stage_file, load_stage_file from tests.basic_env import TestDvc @@ -196,19 +196,39 @@ def test_parent_repo_collect_stages(tmp_dir, scm, dvc): assert subrepo_stages != [] -def test_stage_addressing(tmp_dir, dvc, run_copy): +def test_stage_strings_representation(tmp_dir, dvc, run_copy): tmp_dir.dvc_gen("foo", "foo") stage1 = run_copy("foo", "bar", single_stage=True) assert stage1.addressing == "bar.dvc" + assert repr(stage1) == "Stage: 'bar.dvc'" + assert str(stage1) == "stage: 'bar.dvc'" stage2 = run_copy("bar", "baz", name="copy-bar-baz") assert stage2.addressing == "copy-bar-baz" + assert repr(stage2) == "Stage: 'copy-bar-baz'" + assert str(stage2) == "stage: 'copy-bar-baz'" folder = tmp_dir / "dir" folder.mkdir() with folder.chdir(): - assert stage1.addressing == os.path.relpath(stage1.path) - assert ( - stage2.addressing - == os.path.relpath(stage2.path) + ":" + stage2.name - ) + rel_path = os.path.relpath(stage1.path) + assert stage1.addressing == rel_path + assert repr(stage1) == f"Stage: '{rel_path}'" + assert str(stage1) == f"stage: '{rel_path}'" + + rel_path = os.path.relpath(stage2.path) + assert stage2.addressing == f"{rel_path}:{stage2.name}" + assert repr(stage2) == f"Stage: '{rel_path}:{stage2.name}'" + assert str(stage2) == f"stage: '{rel_path}:{stage2.name}'" + + +def test_stage_on_no_path_string_repr(tmp_dir, dvc): + s = Stage(dvc) + assert s.addressing == "No path" + assert repr(s) == "Stage: 'No path'" + assert str(s) == "stage: 'No path'" + + p = PipelineStage(dvc, name="stage_name") + assert p.addressing == "No path:stage_name" + assert repr(p) == "Stage: 'No path:stage_name'" + assert str(p) == "stage: 'No path:stage_name'" From 009a7a9914cbaab6c17f0b4f001a05e4554feb44 Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Mon, 25 May 2020 20:15:30 +0545 Subject: [PATCH 8/9] exception: add error message Also remove `cause` when throwing NoOutputOrStage, as CheckoutErrorSuggestGit will start chaining all the messages in cause of the exception. --- dvc/exceptions.py | 6 ++++++ dvc/repo/__init__.py | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/dvc/exceptions.py b/dvc/exceptions.py index acda76bdc5..4fcb6cbe0e 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -328,3 +328,9 @@ class NoOutputOrStage(DvcException): """ Raised when the target is neither an output nor a stage name in dvc.yaml """ + + def __init__(self, target, file): + super().__init__( + f"'{target}' " + f"does not exist as an output or a stage name in '{file}'" + ) diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 266de1ebef..6804559059 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -314,9 +314,9 @@ def collect_granular( # should say that both stage name and file does not exist. if file and is_valid_filename(file): raise - raise NoOutputOrStage(target, exc.file) from exc + raise NoOutputOrStage(target, exc.file) except StageNotFound as exc: - raise NoOutputOrStage(target, exc.file) from exc + raise NoOutputOrStage(target, exc.file) return [(stage, None) for stage in stages] From bae4b1ea1bc4786ed9448f92f65875c2ba5d0572 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Tue, 26 May 2020 03:05:08 +0300 Subject: [PATCH 9/9] rename NoOutputOrStage NoOutputOrStageError --- dvc/exceptions.py | 2 +- dvc/repo/__init__.py | 6 +++--- dvc/repo/checkout.py | 4 ++-- tests/func/test_checkout.py | 6 +++--- tests/func/test_repo.py | 6 +++--- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/dvc/exceptions.py b/dvc/exceptions.py index 4fcb6cbe0e..e9cf1f83a9 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -324,7 +324,7 @@ class IsADirectoryError(DvcException): """Raised when a file operation is requested on a directory.""" -class NoOutputOrStage(DvcException): +class NoOutputOrStageError(DvcException): """ Raised when the target is neither an output nor a stage name in dvc.yaml """ diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 6804559059..32030ea6a9 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -10,7 +10,7 @@ from dvc.exceptions import FileMissingError from dvc.exceptions import IsADirectoryError as DvcIsADirectoryError from dvc.exceptions import ( - NoOutputOrStage, + NoOutputOrStageError, NotDvcRepoError, OutputNotFoundError, ) @@ -314,9 +314,9 @@ def collect_granular( # should say that both stage name and file does not exist. if file and is_valid_filename(file): raise - raise NoOutputOrStage(target, exc.file) + raise NoOutputOrStageError(target, exc.file) from exc except StageNotFound as exc: - raise NoOutputOrStage(target, exc.file) + raise NoOutputOrStageError(target, exc.file) from exc return [(stage, None) for stage in stages] diff --git a/dvc/repo/checkout.py b/dvc/repo/checkout.py index 0eacd53455..bcfe31223f 100644 --- a/dvc/repo/checkout.py +++ b/dvc/repo/checkout.py @@ -4,7 +4,7 @@ from dvc.exceptions import ( CheckoutError, CheckoutErrorSuggestGit, - NoOutputOrStage, + NoOutputOrStageError, ) from dvc.progress import Tqdm from dvc.utils import relpath @@ -77,7 +77,7 @@ def _checkout( except ( StageFileDoesNotExistError, StageFileBadNameError, - NoOutputOrStage, + NoOutputOrStageError, ) as exc: if not target: raise diff --git a/tests/func/test_checkout.py b/tests/func/test_checkout.py index dcac6050b9..d47b597391 100644 --- a/tests/func/test_checkout.py +++ b/tests/func/test_checkout.py @@ -13,7 +13,7 @@ CheckoutErrorSuggestGit, ConfirmRemoveError, DvcException, - NoOutputOrStage, + NoOutputOrStageError, ) from dvc.main import main from dvc.remote import S3Remote @@ -404,7 +404,7 @@ def test(self): self.dvc.checkout(targets="gitbranch") except DvcException as exc: self.assertIsInstance(exc, CheckoutErrorSuggestGit) - self.assertIsInstance(exc.__cause__, NoOutputOrStage) + self.assertIsInstance(exc.__cause__, NoOutputOrStageError) self.assertIsInstance( exc.__cause__.__cause__, StageFileDoesNotExistError ) @@ -413,7 +413,7 @@ def test(self): self.dvc.checkout(targets=self.FOO) except DvcException as exc: self.assertIsInstance(exc, CheckoutErrorSuggestGit) - self.assertIsInstance(exc.__cause__, NoOutputOrStage) + self.assertIsInstance(exc.__cause__, NoOutputOrStageError) self.assertIsNone(exc.__cause__.__cause__) try: diff --git a/tests/func/test_repo.py b/tests/func/test_repo.py index 60aeef95b4..76138cf8d4 100644 --- a/tests/func/test_repo.py +++ b/tests/func/test_repo.py @@ -5,7 +5,7 @@ from dvc.cache import Cache from dvc.dvcfile import PIPELINE_FILE, PIPELINE_LOCK -from dvc.exceptions import NoOutputOrStage +from dvc.exceptions import NoOutputOrStageError from dvc.path_info import PathInfo from dvc.repo import Repo from dvc.stage.exceptions import ( @@ -220,9 +220,9 @@ def test_collect_with_not_existing_dvcfile(tmp_dir, dvc, target): def test_collect_granular_with_not_existing_output_or_stage_name(tmp_dir, dvc): - with pytest.raises(NoOutputOrStage): + with pytest.raises(NoOutputOrStageError): dvc.collect_granular("some_file") - with pytest.raises(NoOutputOrStage): + with pytest.raises(NoOutputOrStageError): dvc.collect_granular("some_file", recursive=True)