diff --git a/dvc/remote/base.py b/dvc/remote/base.py index be5929fd21..f7586b8893 100644 --- a/dvc/remote/base.py +++ b/dvc/remote/base.py @@ -878,8 +878,8 @@ def _checkout_file( """The file is changed we need to checkout a new copy""" cache_info = self.checksum_to_path_info(checksum) if self.exists(path_info): - logger.warning( - "data '{}' exists. Removing before checkout.", path_info + logger.info( + "The file '{}' already exists. It will be replaced.", path_info ) self.safe_remove(path_info, force=force) diff --git a/dvc/repo/checkout.py b/dvc/repo/checkout.py index 26f9e18e81..2d9c5de4e6 100644 --- a/dvc/repo/checkout.py +++ b/dvc/repo/checkout.py @@ -15,7 +15,14 @@ def _cleanup_unused_links(repo): for out in stage.outs if out.scheme == "local" ] - repo.state.remove_unused_links(used) + + unused = repo.state.get_unused_links(used) + for link in unused: + logger.info( + "Removing '{}' as it already exists in the current worktree.", link + ) + repo.state.remove_links(unused) + return bool(unused) def get_all_files_numbers(pairs): @@ -34,9 +41,10 @@ def _checkout( ): from dvc.stage import StageFileDoesNotExistError, StageFileBadNameError + cleaned = False if not targets: targets = [None] - _cleanup_unused_links(self) + cleaned = _cleanup_unused_links(self) pairs = set() for target in targets: @@ -52,7 +60,7 @@ def _checkout( raise CheckoutErrorSuggestGit(target) from exc total = get_all_files_numbers(pairs) - if total == 0: + if total == 0 and not cleaned: logger.info("Nothing to do") failed = [] with Tqdm( diff --git a/dvc/state.py b/dvc/state.py index 6c9106d1e0..d54393aede 100644 --- a/dvc/state.py +++ b/dvc/state.py @@ -443,36 +443,37 @@ def save_link(self, path_info): ) self._execute(cmd, (relative_path, self._to_sqlite(inode), mtime)) - def remove_unused_links(self, used): - """Removes all saved links except the ones that are used. + def get_unused_links(self, used): + """Returns all saved links except the ones that are used. Args: - used (list): list of used links that should not be removed. + used (list): list of used links """ unused = [] self._execute("SELECT * FROM {}".format(self.LINK_STATE_TABLE)) for row in self.cursor: - relpath, inode, mtime = row + relative_path, inode, mtime = row inode = self._from_sqlite(inode) - path = os.path.join(self.root_dir, relpath) + path = os.path.join(self.root_dir, relative_path) - if path in used: - continue - - if not os.path.exists(path): + if path in used or not os.path.exists(path): continue actual_inode = get_inode(path) actual_mtime, _ = get_mtime_and_size(path, self.repo.tree) - if inode == actual_inode and mtime == actual_mtime: - logger.debug("Removing '{}' as unused link.", path) - remove(path) - unused.append(relpath) + if (inode, mtime) == (actual_inode, actual_mtime): + unused.append(relative_path) + + return unused + + def remove_links(self, links): + for link in links: + remove(os.path.join(self.root_dir, link)) for chunk_unused in to_chunks( - unused, chunk_size=SQLITE_MAX_VARIABLES_NUMBER + links, chunk_size=SQLITE_MAX_VARIABLES_NUMBER ): cmd = "DELETE FROM {} WHERE path IN ({})".format( self.LINK_STATE_TABLE, ",".join(["?"] * len(chunk_unused)) diff --git a/tests/func/test_state.py b/tests/func/test_state.py index 810338506c..8e841d8c55 100644 --- a/tests/func/test_state.py +++ b/tests/func/test_state.py @@ -1,4 +1,5 @@ import mock +import os from dvc.path_info import PathInfo from dvc.state import State @@ -69,16 +70,33 @@ def test_get_state_record_for_inode(get_inode_mock, tmp_dir, dvc): assert ret is not None -def test_remove_unused_links(tmp_dir, dvc): - assert len(tmp_dir.dvc_gen("foo", "foo_content")) == 1 - assert len(tmp_dir.dvc_gen("bar", "bar_content")) == 1 +def test_remove_links(tmp_dir, dvc): + tmp_dir.dvc_gen({"foo": "foo_content", "bar": "bar_content"}) - cmd_count_links = "SELECT count(*) FROM {}".format(State.LINK_STATE_TABLE) with dvc.state: + cmd_count_links = "SELECT count(*) FROM {}".format( + State.LINK_STATE_TABLE + ) result = dvc.state._execute(cmd_count_links).fetchone()[0] assert result == 2 - dvc.state.remove_unused_links([]) + dvc.state.remove_links(["foo", "bar"]) result = dvc.state._execute(cmd_count_links).fetchone()[0] assert result == 0 + + +def test_get_unused_links(tmp_dir, dvc): + tmp_dir.dvc_gen({"foo": "foo_content", "bar": "bar_content"}) + + with dvc.state: + links = [os.path.join(dvc.root_dir, link) for link in ["foo", "bar"]] + assert set(dvc.state.get_unused_links([])) == {"foo", "bar"} + assert set(dvc.state.get_unused_links(links[:1])) == {"bar"} + assert set(dvc.state.get_unused_links(links)) == set() + assert set( + dvc.state.get_unused_links( + used=links[:1] + + [os.path.join(dvc.root_dir, "not-existing-file")] + ) + ) == {"bar"}