From 3f7b2c85c3e2bae0710b29182ccba9b240883a9c Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Sun, 12 Apr 2020 00:23:14 +0300 Subject: [PATCH] stage: resolve stage path before creating the stage Current logic is extremely complex and messy, hiding a lot of bugs inside. There are pretty much only 2 scenarios for stage creation: 1) `dvc add/import/etc` when dvcfile is created beside the output file 2) `dvc run` when dvcfile is created at the place of execution both are transparently affected by `--fname` flag. Since we've removed `--cwd` complexity already, there is no reason to generalize stage path resolution for those two scenarios. --- dvc/repo/add.py | 7 +++--- dvc/repo/imp_url.py | 18 ++++++++------ dvc/repo/run.py | 22 ++++++++++++++--- dvc/stage.py | 53 ++++++++-------------------------------- dvc/utils/__init__.py | 26 ++++++++++++++++++++ tests/unit/test_stage.py | 10 -------- 6 files changed, 68 insertions(+), 68 deletions(-) diff --git a/dvc/repo/add.py b/dvc/repo/add.py index b0a0fa586c..42a28d5ef2 100644 --- a/dvc/repo/add.py +++ b/dvc/repo/add.py @@ -12,7 +12,7 @@ from ..progress import Tqdm from ..repo.scm_context import scm_context from ..stage import Stage -from ..utils import LARGE_DIR_SIZE +from ..utils import LARGE_DIR_SIZE, resolve_paths logger = logging.getLogger(__name__) @@ -123,9 +123,8 @@ def _create_stages(repo, targets, fname, pbar=None): disable=len(targets) < LARGE_DIR_SIZE, unit="file", ): - stage = Stage.create( - repo, outs=[out], accompany_outs=True, fname=fname - ) + path, wdir, out = resolve_paths(repo, out) + stage = Stage.create(repo, fname or path, wdir=wdir, outs=[out]) repo._reset() if not stage: diff --git a/dvc/repo/imp_url.py b/dvc/repo/imp_url.py index d62f867788..23f2968740 100644 --- a/dvc/repo/imp_url.py +++ b/dvc/repo/imp_url.py @@ -1,6 +1,9 @@ +import os + from . import locked as locked_repo from dvc.repo.scm_context import scm_context -from dvc.utils import resolve_output +from dvc.utils import resolve_output, resolve_paths, relpath +from dvc.utils.fs import path_isin @locked_repo @@ -9,15 +12,14 @@ def imp_url(self, url, out=None, fname=None, erepo=None, locked=True): from dvc.stage import Stage out = resolve_output(url, out) + path, wdir, out = resolve_paths(self, out) + + # NOTE: when user is importing something from within his own repository + if os.path.exists(url) and path_isin(os.path.abspath(url), self.root_dir): + url = relpath(url, wdir) stage = Stage.create( - self, - cmd=None, - deps=[url], - outs=[out], - fname=fname, - erepo=erepo, - accompany_outs=True, + self, fname or path, wdir=wdir, deps=[url], outs=[out], erepo=erepo, ) if stage is None: diff --git a/dvc/repo/run.py b/dvc/repo/run.py index e5d62ec872..32796edf7d 100644 --- a/dvc/repo/run.py +++ b/dvc/repo/run.py @@ -1,14 +1,30 @@ +import os + from . import locked from .scm_context import scm_context @locked @scm_context -def run(self, no_exec=False, **kwargs): +def run(self, fname=None, no_exec=False, **kwargs): from dvc.stage import Stage - stage = Stage.create(self, **kwargs) - + outs = ( + kwargs.get("outs", []) + + kwargs.get("outs_no_cache", []) + + kwargs.get("metrics", []) + + kwargs.get("metrics_no_cache", []) + + kwargs.get("outs_persist", []) + + kwargs.get("outs_persist_no_cache", []) + ) + + if outs: + base = os.path.basename(os.path.normpath(outs[0])) + path = base + Stage.STAGE_FILE_SUFFIX + else: + path = Stage.STAGE_FILE + + stage = Stage.create(self, fname or path, **kwargs) if stage is None: return None diff --git a/dvc/stage.py b/dvc/stage.py index c008cd3e80..840af22411 100644 --- a/dvc/stage.py +++ b/dvc/stage.py @@ -7,7 +7,6 @@ import threading from functools import wraps -from itertools import chain from funcy import decorator from voluptuous import Any, Schema, MultipleInvalid @@ -21,7 +20,6 @@ from dvc.utils import relpath from dvc.utils.fs import path_isin from dvc.utils.collections import apply_diff -from dvc.utils.fs import contains_symlink_up_to from dvc.utils.stage import dump_stage_file from dvc.utils.stage import parse_stage from dvc.utils.stage import parse_stage_for_update @@ -418,23 +416,6 @@ def validate(d, fname=None): except MultipleInvalid as exc: raise StageFileFormatError(fname, exc) - @classmethod - def _stage_fname(cls, outs, add): - if not outs: - return cls.STAGE_FILE - - out = outs[0] - fname = out.path_info.name + cls.STAGE_FILE_SUFFIX - - if ( - add - and out.is_in_repo - and not contains_symlink_up_to(out.fspath, out.repo.root_dir) - ): - fname = out.path_info.with_name(fname).fspath - - return fname - @staticmethod def _check_stage_path(repo, path, is_wdir=False): assert repo is not None @@ -505,12 +486,20 @@ def is_cached(self): return True @staticmethod - def create(repo, accompany_outs=False, **kwargs): + def create(repo, path, **kwargs): wdir = kwargs.get("wdir", None) or os.curdir - fname = kwargs.get("fname", None) + + wdir = os.path.abspath(wdir) + path = os.path.abspath(path) + + Stage._check_dvc_filename(path) + + Stage._check_stage_path(repo, wdir, is_wdir=kwargs.get("wdir")) + Stage._check_stage_path(repo, os.path.dirname(path)) stage = Stage( repo=repo, + path=path, wdir=wdir, cmd=kwargs.get("cmd", None), locked=kwargs.get("locked", False), @@ -527,28 +516,6 @@ def create(repo, accompany_outs=False, **kwargs): stage._check_circular_dependency() stage._check_duplicated_arguments() - if not fname: - fname = Stage._stage_fname(stage.outs, accompany_outs) - stage._check_dvc_filename(fname) - - # Autodetecting wdir for add, we need to create outs first to do that, - # so we start with wdir = . and remap out paths later. - if accompany_outs and kwargs.get("wdir") is None: - wdir = os.path.dirname(fname) - - for out in chain(stage.outs, stage.deps): - if out.is_in_repo: - out.def_path = relpath(out.path_info, wdir) - - wdir = os.path.abspath(wdir) - path = os.path.abspath(fname) - - Stage._check_stage_path(repo, wdir, is_wdir=kwargs.get("wdir")) - Stage._check_stage_path(repo, os.path.dirname(path)) - - stage.wdir = wdir - stage.path = path - ignore_build_cache = kwargs.get("ignore_build_cache", False) # NOTE: remove outs before we check build cache diff --git a/dvc/utils/__init__.py b/dvc/utils/__init__.py index bf02ecdb24..70afdf265e 100644 --- a/dvc/utils/__init__.py +++ b/dvc/utils/__init__.py @@ -344,6 +344,32 @@ def resolve_output(inp, out): return out +def resolve_paths(repo, out): + from ..stage import Stage + from ..path_info import PathInfo + from .fs import contains_symlink_up_to + + abspath = os.path.abspath(out) + dirname = os.path.dirname(abspath) + base = os.path.basename(os.path.normpath(out)) + + # NOTE: `out` might not exist yet, so using `dirname`(aka `wdir`) to check + # if it is a local path. + if ( + os.path.exists(dirname) # out might not exist yet, so + and PathInfo(abspath).isin_or_eq(repo.root_dir) + and not contains_symlink_up_to(abspath, repo.root_dir) + ): + wdir = dirname + out = base + else: + wdir = os.getcwd() + + path = os.path.join(wdir, base + Stage.STAGE_FILE_SUFFIX) + + return (path, wdir, out) + + def format_link(link): return "<{blue}{link}{nc}>".format( blue=colorama.Fore.CYAN, link=link, nc=colorama.Fore.RESET diff --git a/tests/unit/test_stage.py b/tests/unit/test_stage.py index 298f3fd51e..9e5ce1f64c 100644 --- a/tests/unit/test_stage.py +++ b/tests/unit/test_stage.py @@ -7,7 +7,6 @@ import pytest from dvc.dependency.repo import DependencyREPO -from dvc.path_info import PathInfo from dvc.stage import Stage from dvc.stage import StageUpdateError @@ -61,15 +60,6 @@ def test(self): self.assertEqual(stage.dumpd()["wdir"], "../..") -@pytest.mark.parametrize("add", [True, False]) -def test_stage_fname(add): - out = mock.Mock() - out.is_in_repo = False - out.path_info = PathInfo("path/to/out.txt") - fname = Stage._stage_fname([out], add) - assert fname == "out.txt.dvc" - - def test_stage_update(mocker): dep = DependencyREPO({"url": "example.com"}, None, "dep_path") mocker.patch.object(dep, "update", return_value=None)