From a03628921335c6018cca89e786e1eadc5504bf66 Mon Sep 17 00:00:00 2001 From: David de la Iglesia Castro Date: Mon, 13 Feb 2023 15:35:29 +0100 Subject: [PATCH 1/7] Early draft of log_artifact --- src/dvclive/live.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/dvclive/live.py b/src/dvclive/live.py index ba7c2573..79d44514 100644 --- a/src/dvclive/live.py +++ b/src/dvclive/live.py @@ -74,6 +74,7 @@ def __init__( self._experiment_rev: Optional[str] = None self._inside_dvc_exp: bool = False self._dvc_repo = None + self._include_untracked: List[str] = [] self._init_dvc() self._latest_studio_step = self.step if resume else -1 @@ -124,6 +125,7 @@ def _init_dvc(self): self._dvc_repo.scm, self._baseline_rev ) mark_dvclive_only_started() + self._include_untracked.append(self.dir) else: logger.warning( "Can't save experiment without a DVC Repo." @@ -299,6 +301,19 @@ def log_param( """Saves the given parameter value to yaml""" self.log_params({name: val}) + def log_artifact( + self, + path: str, + ): + """Saves the given parameter value to yaml""" + if self._dvc_repo is not None and not self._inside_dvc_exp: + dvc_file = self._dvc_repo.add(path)[0].addressing + if self._save_dvc_exp: + self._include_untracked.append(dvc_file) + self._include_untracked.append( + str(Path(dvc_file).parent / ".gitignore") + ) + def make_summary(self, update_step: bool = True): if self._step is not None and update_step: self.summary["step"] = self.step @@ -371,7 +386,9 @@ def end(self): and self._save_dvc_exp ): self._experiment_rev = self._dvc_repo.experiments.save( - name=self._exp_name, include_untracked=self.dir, force=True + name=self._exp_name, + include_untracked=self._include_untracked, + force=True, ) mark_dvclive_only_ended() From f3b4e0f7e1cd8a59ff4711d93bdec8c5ae36a26e Mon Sep 17 00:00:00 2001 From: karajan1001 Date: Wed, 15 Feb 2023 16:03:28 +0800 Subject: [PATCH 2/7] Implement log_artifact method for live fix: #378 --- src/dvclive/error.py | 9 +++++++++ src/dvclive/live.py | 28 ++++++++++++++++++++++++---- tests/test_dvc.py | 15 ++++++++++++--- 3 files changed, 45 insertions(+), 7 deletions(-) diff --git a/src/dvclive/error.py b/src/dvclive/error.py index 674e5340..e33f62e7 100644 --- a/src/dvclive/error.py +++ b/src/dvclive/error.py @@ -27,3 +27,12 @@ class InvalidParameterTypeError(DvcLiveError): def __init__(self, val: Any): self.val = val super().__init__(f"Parameter type {type(val)} is not supported.") + + +class DuplicateArtifactError(DvcLiveError): + def __init__(self, name): + self.name = name + super().__init__( + f"File '{name}' had already been a output of another " + "dvc stage, can't add it as a artifact of dvclive." + ) diff --git a/src/dvclive/live.py b/src/dvclive/live.py index 79d44514..437f5da6 100644 --- a/src/dvclive/live.py +++ b/src/dvclive/live.py @@ -16,7 +16,12 @@ mark_dvclive_only_ended, mark_dvclive_only_started, ) -from .error import InvalidDataTypeError, InvalidParameterTypeError, InvalidPlotTypeError +from .error import ( + DuplicateArtifactError, + InvalidDataTypeError, + InvalidParameterTypeError, + InvalidPlotTypeError, +) from .plots import PLOT_TYPES, SKLEARN_PLOTS, Image, Metric, NumpyEncoder from .report import make_report from .serialize import dump_json, dump_yaml, load_yaml @@ -56,6 +61,7 @@ def __init__( self._images: Dict[str, Any] = {} self._params: Dict[str, Any] = {} self._plots: Dict[str, Any] = {} + self._outs: Set[str] = set() self._inside_with = False self._dvcyaml = dvcyaml @@ -305,9 +311,23 @@ def log_artifact( self, path: str, ): - """Saves the given parameter value to yaml""" - if self._dvc_repo is not None and not self._inside_dvc_exp: - dvc_file = self._dvc_repo.add(path)[0].addressing + from dvc.exceptions import OutputDuplicationError + + """Saves a local file or directory as an artifact""" + if not isinstance(path, str): + raise InvalidDataTypeError("artifact", type(path)) + + if self._dvc_repo is not None: + if path in self._outs or self._inside_dvc_exp: + stage = self._dvc_repo.commit(path, force=True) + else: + try: + stage = self._dvc_repo.add(path) + except OutputDuplicationError: + raise DuplicateArtifactError(path) + self._outs.add(path) + dvc_file = stage[0].addressing + if self._save_dvc_exp: self._include_untracked.append(dvc_file) self._include_untracked.append( diff --git a/tests/test_dvc.py b/tests/test_dvc.py index 3b78465a..a80c9452 100644 --- a/tests/test_dvc.py +++ b/tests/test_dvc.py @@ -1,4 +1,6 @@ # pylint: disable=unused-argument,protected-access +from pathlib import Path + import pytest from dvc.repo import Repo from PIL import Image @@ -90,14 +92,21 @@ def test_exp_save_on_end(tmp_dir, mocker, save): dvc_repo.index.stages = [] dvc_repo.scm.get_rev.return_value = "current_rev" dvc_repo.scm.get_ref.return_value = None + (tmp_dir / "data").write_text("data") with mocker.patch("dvclive.live.get_dvc_repo", return_value=dvc_repo): live = Live(save_dvc_exp=save) + live.log_artifact("data") live.end() if save: assert live._baseline_rev is not None assert live._exp_name != "dvclive-exp" + dvcfile = dvc_repo.add()[0].addressing + assert dvcfile.exists() + gitignore = str(Path(dvcfile).parent / ".gitignore") dvc_repo.experiments.save.assert_called_with( - name=live._exp_name, include_untracked=live.dir, force=True + name=live._exp_name, + include_untracked=[live.dir, dvcfile, gitignore], + force=True, ) else: assert live._baseline_rev is not None @@ -134,7 +143,7 @@ def test_exp_save_run_on_dvc_repro(tmp_dir, mocker): live.end() dvc_repo.experiments.save.assert_called_with( - name=live._exp_name, include_untracked=live.dir, force=True + name=live._exp_name, include_untracked=[live.dir], force=True ) @@ -181,5 +190,5 @@ def test_exp_save_with_dvc_files(tmp_dir, mocker): live.end() dvc_repo.experiments.save.assert_called_with( - name=live._exp_name, include_untracked=live.dir, force=True + name=live._exp_name, include_untracked=[live.dir], force=True ) From 497fb213194115208a2348903c6c0ad5486185a1 Mon Sep 17 00:00:00 2001 From: David de la Iglesia Castro Date: Mon, 20 Feb 2023 15:07:38 +0100 Subject: [PATCH 3/7] tests: Create `mocked_dvc_repo` and `dvc_repo` fixtures. --- tests/conftest.py | 21 +++++++++++++++++++++ tests/test_dvc.py | 45 ++++++++++++--------------------------------- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index d4d208ef..21f4ff70 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,6 +9,27 @@ def tmp_dir(tmp_path, monkeypatch): yield tmp_path +@pytest.fixture +def mocked_dvc_repo(mocker): + _dvc_repo = mocker.MagicMock() + _dvc_repo.index.stages = [] + _dvc_repo.scm.get_rev.return_value = "current_rev" + _dvc_repo.scm.get_ref.return_value = None + mocker.patch("dvclive.live.get_dvc_repo", return_value=_dvc_repo) + return _dvc_repo + + +@pytest.fixture +def dvc_repo(tmp_dir): + from dvc.repo import Repo + from scmrepo.git import Git + + Git.init(tmp_dir) + repo = Repo.init(tmp_dir) + repo.scm.add_commit(".", "init") + return repo + + @pytest.fixture(autouse=True) def capture_wrap(): # https://github.com/pytest-dev/pytest/issues/5502#issuecomment-678368525 diff --git a/tests/test_dvc.py b/tests/test_dvc.py index a80c9452..f502d481 100644 --- a/tests/test_dvc.py +++ b/tests/test_dvc.py @@ -1,5 +1,4 @@ # pylint: disable=unused-argument,protected-access -from pathlib import Path import pytest from dvc.repo import Repo @@ -87,31 +86,21 @@ def test_make_dvcyaml_all_plots(tmp_dir): @pytest.mark.parametrize("save", [True, False]) -def test_exp_save_on_end(tmp_dir, mocker, save): - dvc_repo = mocker.MagicMock() - dvc_repo.index.stages = [] - dvc_repo.scm.get_rev.return_value = "current_rev" - dvc_repo.scm.get_ref.return_value = None - (tmp_dir / "data").write_text("data") - with mocker.patch("dvclive.live.get_dvc_repo", return_value=dvc_repo): - live = Live(save_dvc_exp=save) - live.log_artifact("data") - live.end() +def test_exp_save_on_end(tmp_dir, save, mocked_dvc_repo): + live = Live(save_dvc_exp=save) + live.end() if save: assert live._baseline_rev is not None assert live._exp_name != "dvclive-exp" - dvcfile = dvc_repo.add()[0].addressing - assert dvcfile.exists() - gitignore = str(Path(dvcfile).parent / ".gitignore") - dvc_repo.experiments.save.assert_called_with( + mocked_dvc_repo.experiments.save.assert_called_with( name=live._exp_name, - include_untracked=[live.dir, dvcfile, gitignore], + include_untracked=[live.dir], force=True, ) else: assert live._baseline_rev is not None assert live._exp_name == "dvclive-exp" - dvc_repo.experiments.save.assert_not_called() + mocked_dvc_repo.experiments.save.assert_not_called() def test_exp_save_skip_on_env_vars(tmp_dir, monkeypatch, mocker): @@ -148,14 +137,9 @@ def test_exp_save_run_on_dvc_repro(tmp_dir, mocker): @pytest.mark.parametrize("dvcyaml", [True, False]) -def test_dvcyaml_on_next_step(tmp_dir, mocker, dvcyaml): - dvc_repo = mocker.MagicMock() - dvc_repo.index.stages = [] - dvc_repo.scm.get_rev.return_value = "current_rev" - dvc_repo.scm.get_ref.return_value = None - with mocker.patch("dvclive.live.get_dvc_repo", return_value=dvc_repo): - live = Live(dvcyaml=dvcyaml) - live.next_step() +def test_dvcyaml_on_next_step(tmp_dir, dvcyaml, mocked_dvc_repo): + live = Live(dvcyaml=dvcyaml) + live.next_step() if dvcyaml: assert (tmp_dir / live.dvc_file).exists() else: @@ -163,14 +147,9 @@ def test_dvcyaml_on_next_step(tmp_dir, mocker, dvcyaml): @pytest.mark.parametrize("dvcyaml", [True, False]) -def test_dvcyaml_on_end(tmp_dir, mocker, dvcyaml): - dvc_repo = mocker.MagicMock() - dvc_repo.index.stages = [] - dvc_repo.scm.get_rev.return_value = "current_rev" - dvc_repo.scm.get_ref.return_value = None - with mocker.patch("dvclive.live.get_dvc_repo", return_value=dvc_repo): - live = Live(dvcyaml=dvcyaml) - live.end() +def test_dvcyaml_on_end(tmp_dir, dvcyaml, mocked_dvc_repo): + live = Live(dvcyaml=dvcyaml) + live.end() if dvcyaml: assert (tmp_dir / live.dvc_file).exists() else: From e5f5564df6219ed712594e45b56fe42d4ab23844 Mon Sep 17 00:00:00 2001 From: David de la Iglesia Castro Date: Mon, 20 Feb 2023 15:09:25 +0100 Subject: [PATCH 4/7] log_artifact: Changes from review. - Don't use `commit` - Don't raise error - Add `test_log_artifact` --- src/dvclive/error.py | 9 -------- src/dvclive/live.py | 33 +++++++++++--------------- tests/test_log_artifact.py | 47 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 29 deletions(-) create mode 100644 tests/test_log_artifact.py diff --git a/src/dvclive/error.py b/src/dvclive/error.py index e33f62e7..674e5340 100644 --- a/src/dvclive/error.py +++ b/src/dvclive/error.py @@ -27,12 +27,3 @@ class InvalidParameterTypeError(DvcLiveError): def __init__(self, val: Any): self.val = val super().__init__(f"Parameter type {type(val)} is not supported.") - - -class DuplicateArtifactError(DvcLiveError): - def __init__(self, name): - self.name = name - super().__init__( - f"File '{name}' had already been a output of another " - "dvc stage, can't add it as a artifact of dvclive." - ) diff --git a/src/dvclive/live.py b/src/dvclive/live.py index 437f5da6..bd81c687 100644 --- a/src/dvclive/live.py +++ b/src/dvclive/live.py @@ -16,12 +16,7 @@ mark_dvclive_only_ended, mark_dvclive_only_started, ) -from .error import ( - DuplicateArtifactError, - InvalidDataTypeError, - InvalidParameterTypeError, - InvalidPlotTypeError, -) +from .error import InvalidDataTypeError, InvalidParameterTypeError, InvalidPlotTypeError from .plots import PLOT_TYPES, SKLEARN_PLOTS, Image, Metric, NumpyEncoder from .report import make_report from .serialize import dump_json, dump_yaml, load_yaml @@ -40,6 +35,7 @@ logger.setLevel(os.getenv(env.DVCLIVE_LOGLEVEL, "INFO").upper()) ParamLike = Union[int, float, str, bool, List["ParamLike"], Dict[str, "ParamLike"]] +StrPath = Union[str, Path] class Live: @@ -309,23 +305,20 @@ def log_param( def log_artifact( self, - path: str, + path: StrPath, ): - from dvc.exceptions import OutputDuplicationError - - """Saves a local file or directory as an artifact""" - if not isinstance(path, str): - raise InvalidDataTypeError("artifact", type(path)) + """Tracks a local file or directory with DVC""" + if not isinstance(path, (str, Path)): + raise InvalidDataTypeError(path, type(path)) if self._dvc_repo is not None: - if path in self._outs or self._inside_dvc_exp: - stage = self._dvc_repo.commit(path, force=True) - else: - try: - stage = self._dvc_repo.add(path) - except OutputDuplicationError: - raise DuplicateArtifactError(path) - self._outs.add(path) + try: + stage = self._dvc_repo.add(path) + except Exception as e: + logger.warning(f"Failed to dvc add {path}: {e}") + return + + self._outs.add(path) dvc_file = stage[0].addressing if self._save_dvc_exp: diff --git a/tests/test_log_artifact.py b/tests/test_log_artifact.py new file mode 100644 index 00000000..624ea33e --- /dev/null +++ b/tests/test_log_artifact.py @@ -0,0 +1,47 @@ +# pylint: disable=unused-argument,protected-access +from dvclive import Live + + +def test_log_artifact(tmp_dir, dvc_repo): + data = tmp_dir / "data" + data.touch() + with Live() as live: + live.log_artifact("data") + assert data.with_suffix(".dvc").exists() + + +def test_log_artifact_on_existing_dvc_file(tmp_dir, dvc_repo): + data = tmp_dir / "data" + data.write_text("foo") + with Live() as live: + live.log_artifact("data") + + prev_content = data.with_suffix(".dvc").read_text() + + with Live() as live: + data.write_text("bar") + live.log_artifact("data") + + assert data.with_suffix(".dvc").read_text() != prev_content + + +def test_log_artifact_twice(tmp_dir, dvc_repo): + data = tmp_dir / "data" + with Live() as live: + for i in range(2): + data.write_text(str(i)) + live.log_artifact("data") + assert data.with_suffix(".dvc").exists() + + +def test_log_artifact_with_save_dvc_exp(tmp_dir, mocker, mocked_dvc_repo): + stage = mocker.MagicMock() + stage.addressing = "data" + mocked_dvc_repo.add.return_value = [stage] + with Live(save_dvc_exp=True) as live: + live.log_artifact("data") + mocked_dvc_repo.experiments.save.assert_called_with( + name=live._exp_name, + include_untracked=[live.dir, "data", ".gitignore"], + force=True, + ) From dca22cf9ad0614132682ad7c582f219fdfb5a473 Mon Sep 17 00:00:00 2001 From: David de la Iglesia Castro Date: Mon, 20 Feb 2023 15:20:44 +0100 Subject: [PATCH 5/7] pylint fixes --- src/dvclive/live.py | 2 +- tests/conftest.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dvclive/live.py b/src/dvclive/live.py index bd81c687..9d6f136e 100644 --- a/src/dvclive/live.py +++ b/src/dvclive/live.py @@ -314,7 +314,7 @@ def log_artifact( if self._dvc_repo is not None: try: stage = self._dvc_repo.add(path) - except Exception as e: + except Exception as e: # pylint: disable=broad-except logger.warning(f"Failed to dvc add {path}: {e}") return diff --git a/tests/conftest.py b/tests/conftest.py index 21f4ff70..1a3ec47b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -20,7 +20,7 @@ def mocked_dvc_repo(mocker): @pytest.fixture -def dvc_repo(tmp_dir): +def dvc_repo(tmp_dir): # pylint: disable=redefined-outer-name from dvc.repo import Repo from scmrepo.git import Git From a8db6f0e5354fe48d5a4cd5a5ea66607c64ed8c3 Mon Sep 17 00:00:00 2001 From: David de la Iglesia Castro Date: Mon, 20 Feb 2023 15:32:39 +0100 Subject: [PATCH 6/7] mypy fixes --- src/dvclive/live.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/dvclive/live.py b/src/dvclive/live.py index 9d6f136e..4b012e36 100644 --- a/src/dvclive/live.py +++ b/src/dvclive/live.py @@ -57,7 +57,7 @@ def __init__( self._images: Dict[str, Any] = {} self._params: Dict[str, Any] = {} self._plots: Dict[str, Any] = {} - self._outs: Set[str] = set() + self._outs: Set[StrPath] = set() self._inside_with = False self._dvcyaml = dvcyaml @@ -295,18 +295,11 @@ def log_params(self, params: Dict[str, ParamLike]): self._dump_params() logger.debug(f"Logged {params} parameters to {self.params_file}") - def log_param( - self, - name: str, - val: ParamLike, - ): + def log_param(self, name: str, val: ParamLike): """Saves the given parameter value to yaml""" self.log_params({name: val}) - def log_artifact( - self, - path: StrPath, - ): + def log_artifact(self, path: StrPath): """Tracks a local file or directory with DVC""" if not isinstance(path, (str, Path)): raise InvalidDataTypeError(path, type(path)) From 58075d43b3edc0bd3976a44be4b76a08bcf21a8f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 9 Mar 2023 11:28:49 +0000 Subject: [PATCH 7/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/dvclive/live.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dvclive/live.py b/src/dvclive/live.py index 33bffde2..e641452d 100644 --- a/src/dvclive/live.py +++ b/src/dvclive/live.py @@ -399,8 +399,8 @@ def end(self): try: self._experiment_rev = self._dvc_repo.experiments.save( - name=self._exp_name, - include_untracked=self._include_untracked, + name=self._exp_name, + include_untracked=self._include_untracked, force=True, ) except DvcException as e: