From b590dbf4b036929c07ca108df27919c6a850db74 Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Wed, 20 May 2020 18:48:33 +0545 Subject: [PATCH 1/7] log: streamline/make it readable --- dvc/dvcfile.py | 17 ++++++++++++++--- dvc/output/base.py | 8 +++----- dvc/repo/reproduce.py | 6 +++++- dvc/repo/run.py | 3 +-- dvc/stage/__init__.py | 29 ++++++++++++++++------------- dvc/stage/run.py | 19 +++++++++++++++---- 6 files changed, 54 insertions(+), 28 deletions(-) diff --git a/dvc/dvcfile.py b/dvc/dvcfile.py index c2722c7a74..efac9c5021 100644 --- a/dvc/dvcfile.py +++ b/dvc/dvcfile.py @@ -181,9 +181,9 @@ def dump(self, stage, update_pipeline=False, **kwargs): assert isinstance(stage, PipelineStage) check_dvc_filename(self.path) - self._dump_lockfile(stage) if update_pipeline and not stage.is_data_source: self._dump_pipeline_file(stage) + self._dump_lockfile(stage) def _dump_lockfile(self, stage): self._lockfile.dump(stage) @@ -194,6 +194,7 @@ def _dump_pipeline_file(self, stage): with open(self.path) as fd: data = parse_stage_for_update(fd.read(), self.path) else: + logger.info("'%s' does not exist, creating…", self.relpath) open(self.path, "w+").close() data["stages"] = data.get("stages", {}) @@ -204,6 +205,9 @@ def _dump_pipeline_file(self, stage): else: data["stages"].update(stage_data) + logger.info( + "Adding stage '%s' to '%s'…", stage.name, self.relpath, + ) dump_stage_file(self.path, data) self.repo.scm.track_file(relpath(self.path)) @@ -247,15 +251,22 @@ def load(self): def dump(self, stage, **kwargs): stage_data = serialize.to_lockfile(stage) if not self.exists(): + modified = True + logger.info("Generating lock file…") data = stage_data open(self.path, "w+").close() else: with self.repo.tree.open(self.path, "r") as fd: data = parse_stage_for_update(fd.read(), self.path) + modified = data.get(stage.name, {}) != stage_data.get( + stage.name, {} + ) + if modified: + logger.info("Updating lock file…") data.update(stage_data) - dump_stage_file(self.path, data) - self.repo.scm.track_file(relpath(self.path)) + if modified: + self.repo.scm.track_file(self.relpath) class Dvcfile: diff --git a/dvc/output/base.py b/dvc/output/base.py index fc9b52fdba..855f36a85c 100644 --- a/dvc/output/base.py +++ b/dvc/output/base.py @@ -241,17 +241,15 @@ def save(self): if self.metric or self.plot: self.verify_metric() if not self.IS_DEPENDENCY: - logger.info( - "Output '{}' doesn't use cache. Skipping saving.".format( - self - ) + logger.debug( + "Output '%s' doesn't use cache. Skipping saving.", self ) return assert not self.IS_DEPENDENCY if not self.changed(): - logger.info(f"Output '{self}' didn't change. Skipping saving.") + logger.debug("Output '%s' didn't change. Skipping saving.", self) return self.info = self.save_info() diff --git a/dvc/repo/reproduce.py b/dvc/repo/reproduce.py index 78a302f3be..b474b6a8e6 100644 --- a/dvc/repo/reproduce.py +++ b/dvc/repo/reproduce.py @@ -164,7 +164,11 @@ def _reproduce_stages( force_downstream = kwargs.pop("force_downstream", False) result = [] - for stage in pipeline: + for idx, stage in enumerate(pipeline): + if idx != 0: + # Cosmetic newline + logger.info("") + try: ret = _reproduce_stage(stage, **kwargs) diff --git a/dvc/repo/run.py b/dvc/repo/run.py index 0b6f0354fb..ae049f204e 100644 --- a/dvc/repo/run.py +++ b/dvc/repo/run.py @@ -82,8 +82,7 @@ def run(self, fname=None, no_exec=False, single_stage=False, **kwargs): raise OutputDuplicationError(exc.output, set(exc.stages) - {stage}) if no_exec: - for out in stage.outs: - out.ignore() + stage.ignore_outs() else: stage.run( no_commit=kwargs.get("no_commit", False), diff --git a/dvc/stage/__init__.py b/dvc/stage/__init__.py index 79800ea6c9..5d1c0c2589 100644 --- a/dvc/stage/__init__.py +++ b/dvc/stage/__init__.py @@ -77,7 +77,7 @@ def create_stage(cls, repo, path, **kwargs): logger.warning("Build cache is ignored when persisting outputs.") if not ignore_run_cache and stage.can_be_skipped: - logger.info("Stage is cached, skipping.") + logger.info("Stage is cached, skipping…") return None return stage @@ -209,10 +209,11 @@ def changed_deps(self): return False if self.is_callback: - logger.warning( - '{stage} is a "callback" stage ' + logger.debug( + '%s is a "callback" stage ' "(has a command and no dependencies) and thus always " - "considered as changed.".format(stage=self) + "considered as changed.", + self, ) return True @@ -248,9 +249,9 @@ def changed_outs(self): return False - def changed_stage(self, warn=False): + def changed_stage(self): changed = self.md5 != self.compute_md5() - if changed and warn: + if changed: logger.debug(self._changed_stage_entry()) return changed @@ -259,14 +260,12 @@ def changed(self): is_changed = ( # Short-circuit order: stage md5 is fast, # deps are expected to change - self.changed_stage(warn=True) + self.changed_stage() or self.changed_deps() or self.changed_outs() ) if is_changed: - logger.info("%s changed.", self) - else: - logger.info("%s didn't change.", self) + logger.debug("%s changed.", self) return is_changed @rwlocked(write=["outs"]) @@ -294,8 +293,8 @@ def remove(self, force=False, remove_outs=True): @rwlocked(read=["deps"], write=["outs"]) def reproduce(self, interactive=False, **kwargs): - if not (kwargs.get("force", False) or self.changed()): + logger.info("Stage '%s' didn't change, skipping…", self.addressing) return None msg = ( @@ -375,6 +374,10 @@ def save_outs(self): for out in self.outs: out.save() + def ignore_outs(self): + for out in self.outs: + out.ignore() + @staticmethod def _changed_entries(entries): return [str(entry) for entry in entries if entry.changed_checksum()] @@ -548,8 +551,8 @@ def _status_stage(self, ret): if self.cmd_changed: ret.append("changed command") - def changed_stage(self, warn=False): - if self.cmd_changed and warn: + def changed_stage(self): + if self.cmd_changed: logger.debug(self._changed_stage_entry()) return self.cmd_changed diff --git a/dvc/stage/run.py b/dvc/stage/run.py index 54b1bfbc79..ebac0ed612 100644 --- a/dvc/stage/run.py +++ b/dvc/stage/run.py @@ -79,11 +79,14 @@ def cmd_run(stage, *args, **kwargs): def _is_cached(stage): - return ( + cached = ( not stage.is_callback and not stage.always_changed and stage.already_cached() ) + if cached: + logger.info("Stage '%s' is cached, skipping run…", stage.addressing) + return cached def restored_from_cache(stage): @@ -93,7 +96,12 @@ def restored_from_cache(stage): return False # restore stage from build cache stage_cache.restore(stage) - return stage.outs_cached() + restored = stage.outs_cached() + if restored: + logger.info( + "Restored stage %s from run-cache, skipping…", stage.addressing + ) + return restored def run_stage(stage, dry=False, force=False, run_cache=False): @@ -102,10 +110,13 @@ def run_stage(stage, dry=False, force=False, run_cache=False): run_cache and restored_from_cache(stage) ) if stage_cached: - logger.info("Stage is cached, skipping.") stage.checkout() return - logger.info("Running command:\n\t%s", stage.cmd) + callback_str = "callback " if stage.is_callback else "" + logger.info( + "Running %s" "stage %s with command:", callback_str, stage.addressing, + ) + logger.info("\t%s", stage.cmd) if not dry: cmd_run(stage) From e893366c5ddf031867a6fb3944365cb37d87858d Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Wed, 20 May 2020 18:55:54 +0545 Subject: [PATCH 2/7] Remove 'run' from log message --- dvc/stage/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/stage/run.py b/dvc/stage/run.py index ebac0ed612..1174afc065 100644 --- a/dvc/stage/run.py +++ b/dvc/stage/run.py @@ -85,7 +85,7 @@ def _is_cached(stage): and stage.already_cached() ) if cached: - logger.info("Stage '%s' is cached, skipping run…", stage.addressing) + logger.info("Stage '%s' is cached, skipping…", stage.addressing) return cached From 26572e41e891fbbc6a13aec02a366b8d990ccb75 Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Wed, 20 May 2020 19:10:54 +0545 Subject: [PATCH 3/7] fix args passing in imports --- dvc/stage/imports.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/dvc/stage/imports.py b/dvc/stage/imports.py index 6e7dded379..957e018e36 100644 --- a/dvc/stage/imports.py +++ b/dvc/stage/imports.py @@ -23,11 +23,7 @@ def sync_import(stage, dry=False, force=False): if dry: return - if ( - not force - and not stage.changed_stage(warn=True) - and stage.already_cached() - ): + if not force and not stage.changed_stage() and stage.already_cached(): stage.outs[0].checkout() else: stage.deps[0].download(stage.outs[0]) From dbfe38a15fa791409986220e49b8ba923c4df7ba Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Wed, 20 May 2020 19:46:56 +0545 Subject: [PATCH 4/7] tests: fix test --- tests/unit/stage/test_run.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/unit/stage/test_run.py b/tests/unit/stage/test_run.py index b0595051a1..d5a88460b9 100644 --- a/tests/unit/stage/test_run.py +++ b/tests/unit/stage/test_run.py @@ -6,5 +6,9 @@ def test_run_stage_dry(caplog): with caplog.at_level(level=logging.INFO, logger="dvc"): - run_stage(Stage(None, cmd="mycmd arg1 arg2"), dry=True) - assert caplog.messages == ["Running command:\n\tmycmd arg1 arg2"] + stage = Stage(None, "stage.dvc", cmd="mycmd arg1 arg2") + run_stage(stage, dry=True) + assert caplog.messages == [ + "Running callback stage 'stage.dvc' with command:", + "\t" + "mycmd arg1 arg2", + ] From d5424ce7de74172dbf1e1d84cc391a8fb045f043 Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Wed, 20 May 2020 19:47:24 +0545 Subject: [PATCH 5/7] run: Add quotes around stage name --- dvc/stage/run.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dvc/stage/run.py b/dvc/stage/run.py index 1174afc065..c2cf17f79c 100644 --- a/dvc/stage/run.py +++ b/dvc/stage/run.py @@ -115,7 +115,9 @@ def run_stage(stage, dry=False, force=False, run_cache=False): callback_str = "callback " if stage.is_callback else "" logger.info( - "Running %s" "stage %s with command:", callback_str, stage.addressing, + "Running %s" "stage '%s' with command:", + callback_str, + stage.addressing, ) logger.info("\t%s", stage.cmd) if not dry: From e5828b54039e31256f671814b5a3e0afd6b01c1f Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Wed, 20 May 2020 19:58:10 +0545 Subject: [PATCH 6/7] run: add quotes for run-cache restore log --- dvc/stage/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/stage/run.py b/dvc/stage/run.py index c2cf17f79c..a39137ff9b 100644 --- a/dvc/stage/run.py +++ b/dvc/stage/run.py @@ -99,7 +99,7 @@ def restored_from_cache(stage): restored = stage.outs_cached() if restored: logger.info( - "Restored stage %s from run-cache, skipping…", stage.addressing + "Restored stage '%s' from run-cache, skipping…", stage.addressing ) return restored From 7c15229deac5fda77ebee7907902685e9056a5bb Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Wed, 20 May 2020 19:03:57 +0545 Subject: [PATCH 7/7] log: use bright/dim for ui --- dvc/dvcfile.py | 14 +++++++++----- dvc/stage/__init__.py | 7 +++++-- dvc/stage/run.py | 17 +++++++++++------ dvc/utils/__init__.py | 11 +++++++++++ tests/unit/stage/test_run.py | 5 ++++- 5 files changed, 40 insertions(+), 14 deletions(-) diff --git a/dvc/dvcfile.py b/dvc/dvcfile.py index efac9c5021..036d4413df 100644 --- a/dvc/dvcfile.py +++ b/dvc/dvcfile.py @@ -15,7 +15,7 @@ StageFileIsNotDvcFileError, ) from dvc.stage.loader import SingleStageLoader, StageLoader -from dvc.utils import relpath +from dvc.utils import relpath, styled from dvc.utils.collections import apply_diff from dvc.utils.stage import ( dump_stage_file, @@ -194,7 +194,9 @@ def _dump_pipeline_file(self, stage): with open(self.path) as fd: data = parse_stage_for_update(fd.read(), self.path) else: - logger.info("'%s' does not exist, creating…", self.relpath) + logger.info( + "%s does not exist, creating…", styled(self.relpath, "bold") + ) open(self.path, "w+").close() data["stages"] = data.get("stages", {}) @@ -206,7 +208,9 @@ def _dump_pipeline_file(self, stage): data["stages"].update(stage_data) logger.info( - "Adding stage '%s' to '%s'…", stage.name, self.relpath, + "Adding stage '%s' to '%s'…", + styled(stage.name, "bold"), + styled(self.relpath, "bold"), ) dump_stage_file(self.path, data) self.repo.scm.track_file(relpath(self.path)) @@ -252,7 +256,7 @@ def dump(self, stage, **kwargs): stage_data = serialize.to_lockfile(stage) if not self.exists(): modified = True - logger.info("Generating lock file…") + logger.info(styled("Generating lock file…", "dim")) data = stage_data open(self.path, "w+").close() else: @@ -262,7 +266,7 @@ def dump(self, stage, **kwargs): stage.name, {} ) if modified: - logger.info("Updating lock file…") + logger.info(styled("Updating lock file…", "dim")) data.update(stage_data) dump_stage_file(self.path, data) if modified: diff --git a/dvc/stage/__init__.py b/dvc/stage/__init__.py index 5d1c0c2589..be1b8f796e 100644 --- a/dvc/stage/__init__.py +++ b/dvc/stage/__init__.py @@ -8,7 +8,7 @@ import dvc.dependency as dependency import dvc.prompt as prompt from dvc.exceptions import CheckoutError, DvcException -from dvc.utils import relpath +from dvc.utils import relpath, styled from . import params from .decorators import rwlocked @@ -294,7 +294,10 @@ def remove(self, force=False, remove_outs=True): @rwlocked(read=["deps"], write=["outs"]) def reproduce(self, interactive=False, **kwargs): if not (kwargs.get("force", False) or self.changed()): - logger.info("Stage '%s' didn't change, skipping…", self.addressing) + logger.info( + "Stage '%s' didn't change, skipping…", + styled(self.addressing, "bold"), + ) return None msg = ( diff --git a/dvc/stage/run.py b/dvc/stage/run.py index a39137ff9b..162e04b09f 100644 --- a/dvc/stage/run.py +++ b/dvc/stage/run.py @@ -4,7 +4,7 @@ import subprocess import threading -from dvc.utils import fix_env +from dvc.utils import fix_env, styled from .decorators import unlocked_repo from .exceptions import StageCmdFailedError @@ -85,7 +85,9 @@ def _is_cached(stage): and stage.already_cached() ) if cached: - logger.info("Stage '%s' is cached, skipping…", stage.addressing) + logger.info( + "Stage %s is cached, skipping…", styled(stage.addressing, "bold"), + ) return cached @@ -99,7 +101,8 @@ def restored_from_cache(stage): restored = stage.outs_cached() if restored: logger.info( - "Restored stage '%s' from run-cache, skipping…", stage.addressing + "Restored stage %s from run-cache, skipping…", + styled(stage.addressing, "bold"), ) return restored @@ -113,11 +116,13 @@ def run_stage(stage, dry=False, force=False, run_cache=False): stage.checkout() return - callback_str = "callback " if stage.is_callback else "" + callback_str = ( + "{} ".format(styled("callback", "bold")) if stage.is_callback else "" + ) logger.info( - "Running %s" "stage '%s' with command:", + "Running %s" "stage %s with command:", callback_str, - stage.addressing, + styled(stage.addressing, "bold"), ) logger.info("\t%s", stage.cmd) if not dry: diff --git a/dvc/utils/__init__.py b/dvc/utils/__init__.py index 3055371682..f88cb86235 100644 --- a/dvc/utils/__init__.py +++ b/dvc/utils/__init__.py @@ -270,6 +270,17 @@ def colorize(message, color=None): ) +def styled(message, style=None): + if not style: + return message + styles = { + "bold": colorama.Style.BRIGHT, + "normal": colorama.Style.NORMAL, + "dim": colorama.Style.DIM, + } + return f"{styles[style]}{message}{styles['normal']}" + + def boxify(message, border_color=None): """Put a message inside a box. diff --git a/tests/unit/stage/test_run.py b/tests/unit/stage/test_run.py index d5a88460b9..07f798295e 100644 --- a/tests/unit/stage/test_run.py +++ b/tests/unit/stage/test_run.py @@ -2,6 +2,7 @@ from dvc.stage import Stage from dvc.stage.run import run_stage +from dvc.utils import styled def test_run_stage_dry(caplog): @@ -9,6 +10,8 @@ def test_run_stage_dry(caplog): stage = Stage(None, "stage.dvc", cmd="mycmd arg1 arg2") run_stage(stage, dry=True) assert caplog.messages == [ - "Running callback stage 'stage.dvc' with command:", + "Running {} stage {} with command:".format( + styled("callback", "bold"), styled("stage.dvc", "bold") + ), "\t" + "mycmd arg1 arg2", ]