diff --git a/dvc/external_repo.py b/dvc/external_repo.py index 8e46569b33..144fd46a80 100644 --- a/dvc/external_repo.py +++ b/dvc/external_repo.py @@ -66,7 +66,7 @@ def __init__(self, root_dir, url): super().__init__(root_dir) self.url = url self._set_cache_dir() - self._set_upstream() + self._fix_upstream() def pull_to(self, path, to_info): """ @@ -123,21 +123,35 @@ def _set_cache_dir(self): self.cache.local.cache_dir = cache_dir - def _set_upstream(self): - # check if the URL is local and no default remote is present - # add default remote pointing to the original repo's cache location - if os.path.isdir(self.url): - if not self.config["core"].get("remote"): - src_repo = Repo(self.url) - try: - cache_dir = src_repo.cache.local.cache_dir - finally: - src_repo.close() - - self.config["remote"]["auto-generated-upstream"] = { - "url": cache_dir - } - self.config["core"]["remote"] = "auto-generated-upstream" + def _fix_upstream(self): + if not os.path.isdir(self.url): + return + + remote_name = self.config["core"].get("remote") + src_repo = Repo(self.url) + try: + if remote_name: + self._fix_local_remote(src_repo, remote_name) + else: + self._add_upstream(src_repo) + finally: + src_repo.close() + + def _fix_local_remote(self, src_repo, remote_name): + # If a remote URL is relative to the source repo, + # it will have changed upon config load and made + # relative to this new repo. Restore the old one here. + new_remote = self.config["remote"][remote_name] + old_remote = src_repo.config["remote"][remote_name] + if new_remote["url"] != old_remote["url"]: + new_remote["url"] = old_remote["url"] + + def _add_upstream(self, src_repo): + # Fill the empty upstream entry with a new remote pointing to the + # original repo's cache location. + cache_dir = src_repo.cache.local.cache_dir + self.config["remote"]["auto-generated-upstream"] = {"url": cache_dir} + self.config["core"]["remote"] = "auto-generated-upstream" class ExternalGitRepo: diff --git a/tests/func/test_external_repo.py b/tests/func/test_external_repo.py index d1a23de462..fbbaf37fda 100644 --- a/tests/func/test_external_repo.py +++ b/tests/func/test_external_repo.py @@ -1,4 +1,5 @@ import os +import shutil from mock import patch from dvc.compat import fspath @@ -6,6 +7,7 @@ from dvc.external_repo import external_repo from dvc.scm.git import Git from dvc.remote import RemoteLOCAL +from dvc.utils import relpath def test_external_repo(erepo_dir): @@ -88,3 +90,32 @@ def test_pull_subdir_file(tmp_dir, erepo_dir): assert dest.is_file() assert dest.read_text() == "contents" + + +def test_relative_remote(erepo_dir, tmp_dir): + # these steps reproduce the script on this issue: + # https://github.com/iterative/dvc/issues/2756 + with erepo_dir.chdir(): + erepo_dir.dvc_gen("file", "contents", commit="create file") + + upstream_dir = tmp_dir + upstream_url = relpath(upstream_dir, erepo_dir) + with erepo_dir.dvc.config.edit() as conf: + conf["remote"]["upstream"] = {"url": upstream_url} + conf["core"]["remote"] = "upstream" + + erepo_dir.scm_add( + erepo_dir.dvc.config.files["repo"], commit="Update dvc config" + ) + erepo_dir.dvc.push() + + (erepo_dir / "file").unlink() + shutil.rmtree(erepo_dir.dvc.cache.local.cache_dir) + + url = fspath(erepo_dir) + + with external_repo(url) as repo: + assert os.path.isabs(repo.config["remote"]["upstream"]["url"]) + assert os.path.isdir(repo.config["remote"]["upstream"]["url"]) + with repo.open_by_relpath("file") as fd: + assert fd.read() == "contents"