From 5a711e8b9ec28c080d590753d3e6a0d68440701d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Wed, 19 Feb 2020 17:04:31 +0100 Subject: [PATCH] Stage: get checksum from repo if no checksum in stage file --- dvc/output/base.py | 21 ++++++++++---- tests/func/test_stage.py | 1 + tests/unit/output/test_output.py | 50 +++++++++++++++++++++++++++++++- 3 files changed, 66 insertions(+), 6 deletions(-) diff --git a/dvc/output/base.py b/dvc/output/base.py index d59276da7a..d07010ad6a 100644 --- a/dvc/output/base.py +++ b/dvc/output/base.py @@ -409,12 +409,23 @@ def get_used_cache(self, **kwargs): cache.external[dep.repo_pair].add(dep.def_path) return cache - if not self.info: - logger.warning( - "Output '{}'({}) is missing version info. Cache for it will " - "not be collected. Use `dvc repro` to get your pipeline up to " - "date.".format(self, self.stage) + if not self.checksum: + msg = ( + "Output '{}'({}) is missing version info. " + "Cache for it will not be collected. " + "Use `dvc repro` to get your pipeline up to date.".format( + self, self.stage + ) ) + if self.exists: + msg += ( + "\n" + "You can also use `dvc commit {stage}` to associate " + "existing '{out}' with '{stage}'.".format( + out=self, stage=self.stage.relpath + ) + ) + logger.warning(msg) return NamedCache() ret = NamedCache.make(self.scheme, self.checksum, str(self)) diff --git a/tests/func/test_stage.py b/tests/func/test_stage.py index ce4991fd80..949e8b5671 100644 --- a/tests/func/test_stage.py +++ b/tests/func/test_stage.py @@ -1,6 +1,7 @@ import os import tempfile + from dvc.main import main from dvc.output.local import OutputLOCAL from dvc.remote.local import RemoteLOCAL diff --git a/tests/unit/output/test_output.py b/tests/unit/output/test_output.py index 2cf30e9680..fb9325471b 100644 --- a/tests/unit/output/test_output.py +++ b/tests/unit/output/test_output.py @@ -1,8 +1,12 @@ +import logging + import pytest +from funcy import first from voluptuous import Schema, MultipleInvalid -from dvc.output import CHECKSUM_SCHEMA +from dvc.cache import NamedCache +from dvc.output import CHECKSUM_SCHEMA, OutputBase @pytest.mark.parametrize( @@ -40,3 +44,47 @@ def test_checksum_schema(value, expected): def test_checksum_schema_fail(value): with pytest.raises(MultipleInvalid): Schema(CHECKSUM_SCHEMA)(value)["md5"] + + +@pytest.mark.parametrize( + "exists, expected_message", + [ + ( + False, + ( + "Output 'path'(Stage stage.dvc) is missing version info. " + "Cache for it will not be collected. " + "Use `dvc repro` to get your pipeline up to date." + ), + ), + ( + True, + ( + "Output 'path'(Stage stage.dvc) is missing version info. " + "Cache for it will not be collected. " + "Use `dvc repro` to get your pipeline up to date.\n" + "You can also use `dvc commit stage.dvc` to associate " + "existing 'path' with 'stage.dvc'." + ), + ), + ], +) +def test_get_used_cache(exists, expected_message, mocker, caplog): + stage = mocker.MagicMock() + mocker.patch.object(stage, "__str__", return_value="Stage stage.dvc") + mocker.patch.object(stage, "relpath", "stage.dvc") + + output = OutputBase(stage, "path") + + mocker.patch.object(output, "use_cache", True) + mocker.patch.object(stage, "is_repo_import", False) + mocker.patch.object( + OutputBase, "checksum", new_callable=mocker.PropertyMock + ).return_value = None + mocker.patch.object( + OutputBase, "exists", new_callable=mocker.PropertyMock + ).return_value = exists + + with caplog.at_level(logging.WARNING, logger="dvc"): + assert isinstance(output.get_used_cache(), NamedCache) + assert first(caplog.messages) == expected_message