From 48b646b943ede94f70cef5f222401a0d0f90d109 Mon Sep 17 00:00:00 2001 From: Bruno Deferrari Date: Fri, 21 Feb 2020 12:42:18 -0300 Subject: [PATCH 01/12] erepo: fix relative urls for remotes (#2756) --- dvc/external_repo.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/dvc/external_repo.py b/dvc/external_repo.py index 8e46569b33..b67fa56c55 100644 --- a/dvc/external_repo.py +++ b/dvc/external_repo.py @@ -124,11 +124,23 @@ def _set_cache_dir(self): self.cache.local.cache_dir = cache_dir def _set_upstream(self): - # check if the URL is local and no default remote is present - # add default remote pointing to the original repo's cache location + # check if the URL is local and: + # - if no default remote is present, then add default remote + # pointing to the original repo's cache location + # - if a default remote is present, make the URL + # relative to the original repo if os.path.isdir(self.url): - if not self.config["core"].get("remote"): - src_repo = Repo(self.url) + src_repo = Repo(self.url) + remote_name = self.config["core"].get("remote") + if remote_name: + remote_cfg = self.config["remote"][remote_name] + old_remote_url = src_repo.config["remote"][remote_name]["url"] + if remote_cfg["url"] != old_remote_url: + remote_cfg["url"] = os.path.join( + src_repo.root_dir, old_remote_url + ) + src_repo.close() + else: try: cache_dir = src_repo.cache.local.cache_dir finally: From ecb44a7be5259d560c4af44ca80b281518451809 Mon Sep 17 00:00:00 2001 From: Bruno Deferrari Date: Fri, 21 Feb 2020 16:54:44 -0300 Subject: [PATCH 02/12] erepo: add test case for relative path remotes (#2756) --- tests/func/test_external_repo.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/func/test_external_repo.py b/tests/func/test_external_repo.py index d1a23de462..53fcff3589 100644 --- a/tests/func/test_external_repo.py +++ b/tests/func/test_external_repo.py @@ -1,4 +1,5 @@ import os +import shutil from mock import patch from dvc.compat import fspath @@ -6,6 +7,7 @@ from dvc.external_repo import external_repo from dvc.scm.git import Git from dvc.remote import RemoteLOCAL +from dvc.utils import relpath def test_external_repo(erepo_dir): @@ -88,3 +90,29 @@ def test_pull_subdir_file(tmp_dir, erepo_dir): assert dest.is_file() assert dest.read_text() == "contents" + + +def test_relative_remote(erepo_dir, tmp_dir, tmp_path_factory): + # these steps reproduce the script on this issue: + # https://github.com/iterative/dvc/issues/2756 + with erepo_dir.chdir(): + erepo_dir.dvc_gen("file", "contents", commit="create file") + + upstream_dir = fspath(tmp_path_factory.mktemp("upstream")) + upstream_url = relpath(upstream_dir, erepo_dir) + with erepo_dir.dvc.config.edit() as conf: + conf["remote"]["upstream"] = {"url": upstream_url} + conf["core"]["remote"] = "upstream" + + erepo_dir.dvc.push() + + os.remove(erepo_dir / "file") + shutil.rmtree(erepo_dir / ".dvc" / "cache") + + url = fspath(erepo_dir) + + with external_repo(url) as repo: + assert os.path.isabs(repo.config["remote"]["upstream"]["url"]) + assert os.path.isdir(repo.config["remote"]["upstream"]["url"]) + with repo.open_by_relpath("file") as fd: + assert fd.read() == "contents" From 8bea319abe01242d1539d8894b15e7154074c986 Mon Sep 17 00:00:00 2001 From: Bruno Deferrari Date: Fri, 21 Feb 2020 17:07:29 -0300 Subject: [PATCH 03/12] Remove unused argument --- tests/func/test_external_repo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/func/test_external_repo.py b/tests/func/test_external_repo.py index 53fcff3589..13d970a477 100644 --- a/tests/func/test_external_repo.py +++ b/tests/func/test_external_repo.py @@ -92,7 +92,7 @@ def test_pull_subdir_file(tmp_dir, erepo_dir): assert dest.read_text() == "contents" -def test_relative_remote(erepo_dir, tmp_dir, tmp_path_factory): +def test_relative_remote(erepo_dir, tmp_path_factory): # these steps reproduce the script on this issue: # https://github.com/iterative/dvc/issues/2756 with erepo_dir.chdir(): From 505d35c9f90d544ba42e8914fd4c348353dcf795 Mon Sep 17 00:00:00 2001 From: Bruno Deferrari Date: Fri, 21 Feb 2020 17:18:38 -0300 Subject: [PATCH 04/12] erepo: move handling of relative remotes to a separate method (#2756) --- dvc/external_repo.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/dvc/external_repo.py b/dvc/external_repo.py index b67fa56c55..94ef118dd5 100644 --- a/dvc/external_repo.py +++ b/dvc/external_repo.py @@ -66,6 +66,7 @@ def __init__(self, root_dir, url): super().__init__(root_dir) self.url = url self._set_cache_dir() + self._set_relative_upstream() self._set_upstream() def pull_to(self, path, to_info): @@ -123,16 +124,14 @@ def _set_cache_dir(self): self.cache.local.cache_dir = cache_dir - def _set_upstream(self): - # check if the URL is local and: - # - if no default remote is present, then add default remote - # pointing to the original repo's cache location - # - if a default remote is present, make the URL - # relative to the original repo + def _set_relative_upstream(self): + # check if the URL is local and if a default relative-path + # remote is present, if so, make the URL relative to the + # original repo if os.path.isdir(self.url): - src_repo = Repo(self.url) remote_name = self.config["core"].get("remote") if remote_name: + src_repo = Repo(self.url) remote_cfg = self.config["remote"][remote_name] old_remote_url = src_repo.config["remote"][remote_name]["url"] if remote_cfg["url"] != old_remote_url: @@ -140,7 +139,13 @@ def _set_upstream(self): src_repo.root_dir, old_remote_url ) src_repo.close() - else: + + def _set_upstream(self): + # check if the URL is local and no default remote is present + # add default remote pointing to the original repo's cache location + if os.path.isdir(self.url): + if not self.config["core"].get("remote"): + src_repo = Repo(self.url) try: cache_dir = src_repo.cache.local.cache_dir finally: From 89764693620ef44ca2d6a377de6aa749135676f3 Mon Sep 17 00:00:00 2001 From: Bruno Deferrari Date: Fri, 21 Feb 2020 17:54:49 -0300 Subject: [PATCH 05/12] erepo: don't manually build paths on test (#2756) --- tests/func/test_external_repo.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/func/test_external_repo.py b/tests/func/test_external_repo.py index 13d970a477..7fdd1b7066 100644 --- a/tests/func/test_external_repo.py +++ b/tests/func/test_external_repo.py @@ -106,8 +106,9 @@ def test_relative_remote(erepo_dir, tmp_path_factory): erepo_dir.dvc.push() - os.remove(erepo_dir / "file") - shutil.rmtree(erepo_dir / ".dvc" / "cache") + with erepo_dir.chdir(): + os.remove("file") + shutil.rmtree(erepo_dir.dvc.cache.local.cache_dir) url = fspath(erepo_dir) From ce3b511ba91d2f7c1040834ca597b7555717c938 Mon Sep 17 00:00:00 2001 From: Bruno Deferrari Date: Sun, 23 Feb 2020 11:25:02 -0300 Subject: [PATCH 06/12] erepo: refactor handling of missing or relative upstream (#2756) --- dvc/external_repo.py | 66 +++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 32 deletions(-) diff --git a/dvc/external_repo.py b/dvc/external_repo.py index 94ef118dd5..e1a628626d 100644 --- a/dvc/external_repo.py +++ b/dvc/external_repo.py @@ -66,8 +66,7 @@ def __init__(self, root_dir, url): super().__init__(root_dir) self.url = url self._set_cache_dir() - self._set_relative_upstream() - self._set_upstream() + self._fix_upstream() def pull_to(self, path, to_info): """ @@ -124,37 +123,40 @@ def _set_cache_dir(self): self.cache.local.cache_dir = cache_dir - def _set_relative_upstream(self): - # check if the URL is local and if a default relative-path - # remote is present, if so, make the URL relative to the - # original repo + def _fix_upstream(self): if os.path.isdir(self.url): - remote_name = self.config["core"].get("remote") - if remote_name: - src_repo = Repo(self.url) - remote_cfg = self.config["remote"][remote_name] - old_remote_url = src_repo.config["remote"][remote_name]["url"] - if remote_cfg["url"] != old_remote_url: - remote_cfg["url"] = os.path.join( - src_repo.root_dir, old_remote_url - ) - src_repo.close() - - def _set_upstream(self): - # check if the URL is local and no default remote is present - # add default remote pointing to the original repo's cache location - if os.path.isdir(self.url): - if not self.config["core"].get("remote"): - src_repo = Repo(self.url) - try: - cache_dir = src_repo.cache.local.cache_dir - finally: - src_repo.close() - - self.config["remote"]["auto-generated-upstream"] = { - "url": cache_dir - } - self.config["core"]["remote"] = "auto-generated-upstream" + upstream_name = self.config["core"].get("remote") + if upstream_name: + self._fix_local_remote(upstream_name) + else: + self._add_upstream() + + def _fix_local_remote(self, remote_name): + # Keep the relative path for a local remote relative + # to the original repo. + src_repo = Repo(self.url) + try: + new_remote = self.config["remote"][remote_name] + old_remote = src_repo.config["remote"][remote_name] + # NOTE: it is assumed that only local+relative urls + # will be different from their old version. Revise this + # test if that stops being true. + if new_remote["url"] != old_remote["url"]: + new_remote["url"] = old_remote["url"] + finally: + src_repo.close() + + def _add_upstream(self): + # Fill the empty upstream entry with a new remote pointing to the + # original repo's cache location. + src_repo = Repo(self.url) + try: + cache_dir = src_repo.cache.local.cache_dir + finally: + src_repo.close() + + self.config["remote"]["auto-generated-upstream"] = {"url": cache_dir} + self.config["core"]["remote"] = "auto-generated-upstream" class ExternalGitRepo: From 9b6261f5d2dd0df9415e5773979c589c58ca5cb2 Mon Sep 17 00:00:00 2001 From: Bruno Deferrari Date: Sun, 23 Feb 2020 12:14:21 -0300 Subject: [PATCH 07/12] erepo: move handling of source repo resource to `_fix_upstream` (#2756) --- dvc/external_repo.py | 45 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/dvc/external_repo.py b/dvc/external_repo.py index e1a628626d..59d21951c3 100644 --- a/dvc/external_repo.py +++ b/dvc/external_repo.py @@ -126,35 +126,30 @@ def _set_cache_dir(self): def _fix_upstream(self): if os.path.isdir(self.url): upstream_name = self.config["core"].get("remote") - if upstream_name: - self._fix_local_remote(upstream_name) - else: - self._add_upstream() - - def _fix_local_remote(self, remote_name): + src_repo = Repo(self.url) + try: + if upstream_name: + self._fix_local_remote(src_repo, upstream_name) + else: + self._add_upstream(src_repo) + finally: + src_repo.close() + + def _fix_local_remote(self, src_repo, remote_name): # Keep the relative path for a local remote relative # to the original repo. - src_repo = Repo(self.url) - try: - new_remote = self.config["remote"][remote_name] - old_remote = src_repo.config["remote"][remote_name] - # NOTE: it is assumed that only local+relative urls - # will be different from their old version. Revise this - # test if that stops being true. - if new_remote["url"] != old_remote["url"]: - new_remote["url"] = old_remote["url"] - finally: - src_repo.close() - - def _add_upstream(self): + new_remote = self.config["remote"][remote_name] + old_remote = src_repo.config["remote"][remote_name] + # NOTE: it is assumed that only local+relative urls + # will be different from their old version. Revise this + # test if that stops being true. + if new_remote["url"] != old_remote["url"]: + new_remote["url"] = old_remote["url"] + + def _add_upstream(self, src_repo): # Fill the empty upstream entry with a new remote pointing to the # original repo's cache location. - src_repo = Repo(self.url) - try: - cache_dir = src_repo.cache.local.cache_dir - finally: - src_repo.close() - + cache_dir = src_repo.cache.local.cache_dir self.config["remote"]["auto-generated-upstream"] = {"url": cache_dir} self.config["core"]["remote"] = "auto-generated-upstream" From 80bd6eb0970409b7d0ff88b0b32c252aee1e0ef3 Mon Sep 17 00:00:00 2001 From: Bruno Deferrari Date: Mon, 24 Feb 2020 14:41:47 -0300 Subject: [PATCH 08/12] erepo: improve relative remote tests (#2756) - use `tmp_dir` fixture - commit config file changes --- tests/func/test_external_repo.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/func/test_external_repo.py b/tests/func/test_external_repo.py index 7fdd1b7066..aa3284d055 100644 --- a/tests/func/test_external_repo.py +++ b/tests/func/test_external_repo.py @@ -92,22 +92,23 @@ def test_pull_subdir_file(tmp_dir, erepo_dir): assert dest.read_text() == "contents" -def test_relative_remote(erepo_dir, tmp_path_factory): +def test_relative_remote(erepo_dir, tmp_dir): # these steps reproduce the script on this issue: # https://github.com/iterative/dvc/issues/2756 with erepo_dir.chdir(): erepo_dir.dvc_gen("file", "contents", commit="create file") - upstream_dir = fspath(tmp_path_factory.mktemp("upstream")) + upstream_dir = tmp_dir upstream_url = relpath(upstream_dir, erepo_dir) with erepo_dir.dvc.config.edit() as conf: conf["remote"]["upstream"] = {"url": upstream_url} conf["core"]["remote"] = "upstream" + erepo_dir.scm_add(erepo_dir.dvc.config.files["repo"]) + erepo_dir.scm.commit("Update dvc config") erepo_dir.dvc.push() - with erepo_dir.chdir(): - os.remove("file") + (erepo_dir / "file").unlink() shutil.rmtree(erepo_dir.dvc.cache.local.cache_dir) url = fspath(erepo_dir) From 3bc32b714e864a4b8e334b9908599067740ca6c3 Mon Sep 17 00:00:00 2001 From: Bruno Deferrari Date: Mon, 24 Feb 2020 14:42:15 -0300 Subject: [PATCH 09/12] erepo: Remove comment (#2756) --- dvc/external_repo.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/dvc/external_repo.py b/dvc/external_repo.py index 59d21951c3..aa699b1121 100644 --- a/dvc/external_repo.py +++ b/dvc/external_repo.py @@ -140,9 +140,6 @@ def _fix_local_remote(self, src_repo, remote_name): # to the original repo. new_remote = self.config["remote"][remote_name] old_remote = src_repo.config["remote"][remote_name] - # NOTE: it is assumed that only local+relative urls - # will be different from their old version. Revise this - # test if that stops being true. if new_remote["url"] != old_remote["url"]: new_remote["url"] = old_remote["url"] From 0790e2646c1919a5c7d316e2ce1f75b1f4b02bff Mon Sep 17 00:00:00 2001 From: Bruno Deferrari Date: Mon, 24 Feb 2020 14:46:38 -0300 Subject: [PATCH 10/12] erepo: use scm_add to commit too, no need for separate scm.commit call (#2756) --- tests/func/test_external_repo.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/func/test_external_repo.py b/tests/func/test_external_repo.py index aa3284d055..fbbaf37fda 100644 --- a/tests/func/test_external_repo.py +++ b/tests/func/test_external_repo.py @@ -104,8 +104,9 @@ def test_relative_remote(erepo_dir, tmp_dir): conf["remote"]["upstream"] = {"url": upstream_url} conf["core"]["remote"] = "upstream" - erepo_dir.scm_add(erepo_dir.dvc.config.files["repo"]) - erepo_dir.scm.commit("Update dvc config") + erepo_dir.scm_add( + erepo_dir.dvc.config.files["repo"], commit="Update dvc config" + ) erepo_dir.dvc.push() (erepo_dir / "file").unlink() From 07e28c2e68ae6637d5a771d97a9ca7e42b4b28cb Mon Sep 17 00:00:00 2001 From: Bruno Deferrari Date: Tue, 25 Feb 2020 23:33:42 -0300 Subject: [PATCH 11/12] erepo: minor refactoring in `_fix_upstream` (#2756) --- dvc/external_repo.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/dvc/external_repo.py b/dvc/external_repo.py index aa699b1121..8ea8d9a059 100644 --- a/dvc/external_repo.py +++ b/dvc/external_repo.py @@ -124,20 +124,20 @@ def _set_cache_dir(self): self.cache.local.cache_dir = cache_dir def _fix_upstream(self): - if os.path.isdir(self.url): - upstream_name = self.config["core"].get("remote") - src_repo = Repo(self.url) - try: - if upstream_name: - self._fix_local_remote(src_repo, upstream_name) - else: - self._add_upstream(src_repo) - finally: - src_repo.close() + if not os.path.isdir(self.url): + return + + remote_name = self.config["core"].get("remote") + src_repo = Repo(self.url) + try: + if remote_name: + self._fix_local_remote(src_repo, remote_name) + else: + self._add_upstream(src_repo) + finally: + src_repo.close() def _fix_local_remote(self, src_repo, remote_name): - # Keep the relative path for a local remote relative - # to the original repo. new_remote = self.config["remote"][remote_name] old_remote = src_repo.config["remote"][remote_name] if new_remote["url"] != old_remote["url"]: From 565e4a5079a3bbaf6adb957f51d381bbba8138f6 Mon Sep 17 00:00:00 2001 From: Bruno Deferrari Date: Wed, 26 Feb 2020 01:31:44 -0300 Subject: [PATCH 12/12] erepo: add comment explaining that old relative paths are restored (#2756) --- dvc/external_repo.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dvc/external_repo.py b/dvc/external_repo.py index 8ea8d9a059..144fd46a80 100644 --- a/dvc/external_repo.py +++ b/dvc/external_repo.py @@ -138,6 +138,9 @@ def _fix_upstream(self): src_repo.close() def _fix_local_remote(self, src_repo, remote_name): + # If a remote URL is relative to the source repo, + # it will have changed upon config load and made + # relative to this new repo. Restore the old one here. new_remote = self.config["remote"][remote_name] old_remote = src_repo.config["remote"][remote_name] if new_remote["url"] != old_remote["url"]: