From f1c1233dfeca8eed04877b870b24c1971bd05813 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Mon, 8 Jun 2020 15:37:27 +0900 Subject: [PATCH 1/3] remote: move get_file_checksum() into tree * makes RemoteTree and RepoTree consistent with regard to checksum calculation --- dvc/remote/azure.py | 6 +++--- dvc/remote/base.py | 3 +++ dvc/remote/gdrive.py | 6 +++--- dvc/remote/gs.py | 28 ++++++++++++------------ dvc/remote/hdfs.py | 44 +++++++++++++++++++------------------- dvc/remote/http.py | 38 ++++++++++++++++---------------- dvc/remote/local.py | 6 +++--- dvc/remote/s3.py | 8 +++---- dvc/remote/ssh/__init__.py | 14 ++++++------ 9 files changed, 77 insertions(+), 76 deletions(-) diff --git a/dvc/remote/azure.py b/dvc/remote/azure.py index 4feb57f40b..c09ef47a0f 100644 --- a/dvc/remote/azure.py +++ b/dvc/remote/azure.py @@ -108,6 +108,9 @@ def remove(self, path_info): logger.debug(f"Removing {path_info}") self.blob_service.delete_blob(path_info.bucket, path_info.path) + def get_file_checksum(self, path_info): + return self.get_etag(path_info) + def _upload( self, from_file, to_info, name=None, no_progress_bar=False, **_kwargs ): @@ -138,6 +141,3 @@ class AzureRemote(BaseRemote): COPY_POLL_SECONDS = 5 LIST_OBJECT_PAGE_SIZE = 5000 TREE_CLS = AzureRemoteTree - - def get_file_checksum(self, path_info): - return self.tree.get_etag(path_info) diff --git a/dvc/remote/base.py b/dvc/remote/base.py index 46ae988a28..089e731d61 100644 --- a/dvc/remote/base.py +++ b/dvc/remote/base.py @@ -165,6 +165,9 @@ def hardlink(self, from_info, to_info): def reflink(self, from_info, to_info): raise RemoteActionNotImplemented("reflink", self.scheme) + def get_file_checksum(self, path_info): + raise NotImplementedError + @staticmethod def _handle_transfer_exception(from_info, to_info, exception, operation): if isinstance(exception, OSError) and exception.errno == errno.EMFILE: diff --git a/dvc/remote/gdrive.py b/dvc/remote/gdrive.py index 0204a8997e..83cd2b3f6e 100644 --- a/dvc/remote/gdrive.py +++ b/dvc/remote/gdrive.py @@ -545,6 +545,9 @@ def remove(self, path_info): item_id = self._get_item_id(path_info) self.gdrive_delete_file(item_id) + def get_file_checksum(self, path_info): + raise NotImplementedError + def _upload(self, from_file, to_info, name=None, no_progress_bar=False): dirname = to_info.parent assert dirname @@ -567,6 +570,3 @@ class GDriveRemote(BaseRemote): TRAVERSE_WEIGHT_MULTIPLIER = 1 TRAVERSE_PREFIX_LEN = 2 TREE_CLS = GDriveRemoteTree - - def get_file_checksum(self, path_info): - raise NotImplementedError diff --git a/dvc/remote/gs.py b/dvc/remote/gs.py index 4d71198b77..c59c2da719 100644 --- a/dvc/remote/gs.py +++ b/dvc/remote/gs.py @@ -157,6 +157,20 @@ def copy(self, from_info, to_info): to_bucket = self.gs.bucket(to_info.bucket) from_bucket.copy_blob(blob, to_bucket, new_name=to_info.path) + def get_file_checksum(self, path_info): + import base64 + import codecs + + bucket = path_info.bucket + path = path_info.path + blob = self.gs.bucket(bucket).get_blob(path) + if not blob: + return None + + b64_md5 = blob.md5_hash + md5 = base64.b64decode(b64_md5) + return codecs.getencoder("hex")(md5)[0].decode("utf-8") + def _upload(self, from_file, to_info, name=None, no_progress_bar=False): bucket = self.gs.bucket(to_info.bucket) _upload_to_bucket( @@ -186,17 +200,3 @@ class GSRemote(BaseRemote): REQUIRES = {"google-cloud-storage": "google.cloud.storage"} PARAM_CHECKSUM = "md5" TREE_CLS = GSRemoteTree - - def get_file_checksum(self, path_info): - import base64 - import codecs - - bucket = path_info.bucket - path = path_info.path - blob = self.tree.gs.bucket(bucket).get_blob(path) - if not blob: - return None - - b64_md5 = blob.md5_hash - md5 = base64.b64decode(b64_md5) - return codecs.getencoder("hex")(md5)[0].decode("utf-8") diff --git a/dvc/remote/hdfs.py b/dvc/remote/hdfs.py index e39ebc318b..8c77cbdce8 100644 --- a/dvc/remote/hdfs.py +++ b/dvc/remote/hdfs.py @@ -122,28 +122,6 @@ def copy(self, from_info, to_info, **_kwargs): self.remove(tmp_info) raise - def _upload(self, from_file, to_info, **_kwargs): - with self.hdfs(to_info) as hdfs: - hdfs.mkdir(posixpath.dirname(to_info.path)) - tmp_file = tmp_fname(to_info.path) - with open(from_file, "rb") as fobj: - hdfs.upload(tmp_file, fobj) - hdfs.rename(tmp_file, to_info.path) - - def _download(self, from_info, to_file, **_kwargs): - with self.hdfs(from_info) as hdfs: - with open(to_file, "wb+") as fobj: - hdfs.download(from_info.path, fobj) - - -class HDFSRemote(BaseRemote): - scheme = Schemes.HDFS - REGEX = r"^hdfs://((?P.*)@)?.*$" - PARAM_CHECKSUM = "checksum" - REQUIRES = {"pyarrow": "pyarrow"} - TRAVERSE_PREFIX_LEN = 2 - TREE_CLS = HDFSRemoteTree - def hadoop_fs(self, cmd, user=None): cmd = "hadoop fs -" + cmd if user: @@ -182,3 +160,25 @@ def get_file_checksum(self, path_info): f"checksum {path_info.path}", user=path_info.user ) return self._group(regex, stdout, "checksum") + + def _upload(self, from_file, to_info, **_kwargs): + with self.hdfs(to_info) as hdfs: + hdfs.mkdir(posixpath.dirname(to_info.path)) + tmp_file = tmp_fname(to_info.path) + with open(from_file, "rb") as fobj: + hdfs.upload(tmp_file, fobj) + hdfs.rename(tmp_file, to_info.path) + + def _download(self, from_info, to_file, **_kwargs): + with self.hdfs(from_info) as hdfs: + with open(to_file, "wb+") as fobj: + hdfs.download(from_info.path, fobj) + + +class HDFSRemote(BaseRemote): + scheme = Schemes.HDFS + REGEX = r"^hdfs://((?P.*)@)?.*$" + PARAM_CHECKSUM = "checksum" + REQUIRES = {"pyarrow": "pyarrow"} + TRAVERSE_PREFIX_LEN = 2 + TREE_CLS = HDFSRemoteTree diff --git a/dvc/remote/http.py b/dvc/remote/http.py index 154550bd16..8a45fc3b90 100644 --- a/dvc/remote/http.py +++ b/dvc/remote/http.py @@ -121,6 +121,25 @@ def request(self, method, url, **kwargs): def exists(self, path_info): return bool(self.request("HEAD", path_info.url)) + def get_file_checksum(self, path_info): + url = path_info.url + headers = self.request("HEAD", url).headers + etag = headers.get("ETag") or headers.get("Content-MD5") + + if not etag: + raise DvcException( + "could not find an ETag or " + "Content-MD5 header for '{url}'".format(url=url) + ) + + if etag.startswith("W/"): + raise DvcException( + "Weak ETags are not supported." + " (Etag: '{etag}', URL: '{url}')".format(etag=etag, url=url) + ) + + return etag + def _download(self, from_info, to_file, name=None, no_progress_bar=False): response = self.request("GET", from_info.url, stream=True) if response.status_code != 200: @@ -174,25 +193,6 @@ class HTTPRemote(BaseRemote): CAN_TRAVERSE = False TREE_CLS = HTTPRemoteTree - def get_file_checksum(self, path_info): - url = path_info.url - headers = self.tree.request("HEAD", url).headers - etag = headers.get("ETag") or headers.get("Content-MD5") - - if not etag: - raise DvcException( - "could not find an ETag or " - "Content-MD5 header for '{url}'".format(url=url) - ) - - if etag.startswith("W/"): - raise DvcException( - "Weak ETags are not supported." - " (Etag: '{etag}', URL: '{url}')".format(etag=etag, url=url) - ) - - return etag - def list_cache_paths(self, prefix=None, progress_callback=None): raise NotImplementedError diff --git a/dvc/remote/local.py b/dvc/remote/local.py index 12ed0e691b..56b9d964fd 100644 --- a/dvc/remote/local.py +++ b/dvc/remote/local.py @@ -209,6 +209,9 @@ def reflink(self, from_info, to_info): os.chmod(tmp_info, self.file_mode) os.rename(tmp_info, to_info) + def get_file_checksum(self, path_info): + return file_md5(path_info)[0] + @staticmethod def getsize(path_info): return os.path.getsize(path_info) @@ -318,9 +321,6 @@ def _verify_link(self, path_info, link_type): super()._verify_link(path_info, link_type) - def get_file_checksum(self, path_info): - return file_md5(path_info)[0] - def cache_exists(self, checksums, jobs=None, name=None): return [ checksum diff --git a/dvc/remote/s3.py b/dvc/remote/s3.py index 642a743bb5..2fb079dbda 100644 --- a/dvc/remote/s3.py +++ b/dvc/remote/s3.py @@ -305,6 +305,9 @@ def _copy(cls, s3, from_info, to_info, extra_args): if etag != cached_etag: raise ETagMismatchError(etag, cached_etag) + def get_file_checksum(self, path_info): + return self.get_etag(self.s3, path_info.bucket, path_info.path) + def _upload(self, from_file, to_info, name=None, no_progress_bar=False): total = os.path.getsize(from_file) with Tqdm( @@ -338,8 +341,3 @@ class S3Remote(BaseRemote): REQUIRES = {"boto3": "boto3"} PARAM_CHECKSUM = "etag" TREE_CLS = S3RemoteTree - - def get_file_checksum(self, path_info): - return self.tree.get_etag( - self.tree.s3, path_info.bucket, path_info.path - ) diff --git a/dvc/remote/ssh/__init__.py b/dvc/remote/ssh/__init__.py index 54a968d4f4..3048810330 100644 --- a/dvc/remote/ssh/__init__.py +++ b/dvc/remote/ssh/__init__.py @@ -225,6 +225,13 @@ def reflink(self, from_info, to_info): with self.ssh(from_info) as ssh: ssh.reflink(from_info.path, to_info.path) + def get_file_checksum(self, path_info): + if path_info.scheme != self.scheme: + raise NotImplementedError + + with self.ssh(path_info) as ssh: + return ssh.md5(path_info.path) + def getsize(self, path_info): with self.ssh(path_info) as ssh: return ssh.getsize(path_info.path) @@ -265,13 +272,6 @@ class SSHRemote(BaseRemote): DEFAULT_CACHE_TYPES = ["copy"] - def get_file_checksum(self, path_info): - if path_info.scheme != self.scheme: - raise NotImplementedError - - with self.tree.ssh(path_info) as ssh: - return ssh.md5(path_info.path) - def list_cache_paths(self, prefix=None, progress_callback=None): if prefix: root = posixpath.join(self.path_info.path, prefix[:2]) From 45f531415a85893a22bddba7a57fcb6202ff3a7e Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Mon, 8 Jun 2020 15:38:52 +0900 Subject: [PATCH 2/3] remote: save() now takes explicit tree parameter * when tree is remote.tree, save will be a move + link operation (same as default existing behavior) * when saving path from a different tree, save will be a copy operation --- dvc/external_repo.py | 3 +- dvc/output/base.py | 2 +- dvc/remote/base.py | 178 ++++++++++++++++++++----------------------- 3 files changed, 86 insertions(+), 97 deletions(-) diff --git a/dvc/external_repo.py b/dvc/external_repo.py index 2c66be806c..2efa0414c9 100644 --- a/dvc/external_repo.py +++ b/dvc/external_repo.py @@ -126,8 +126,9 @@ def download_update(result): raise PathMissingError(path, self.url) save_info = self.local_cache.save( path, + self.repo_tree, None, - tree=self.repo_tree, + save_link=False, download_callback=download_update, ) save_infos.append(save_info) diff --git a/dvc/output/base.py b/dvc/output/base.py index f6eceec9f2..8806a4d844 100644 --- a/dvc/output/base.py +++ b/dvc/output/base.py @@ -267,7 +267,7 @@ def save(self): def commit(self): if self.use_cache: - self.cache.save(self.path_info, self.info) + self.cache.save(self.path_info, self.cache.tree, self.info) def dumpd(self): ret = copy(self.info) diff --git a/dvc/remote/base.py b/dvc/remote/base.py index 089e731d61..f217a45f8a 100644 --- a/dvc/remote/base.py +++ b/dvc/remote/base.py @@ -400,17 +400,14 @@ def supported(cls, config): def cache(self): return getattr(self.repo.cache, self.scheme) - def get_file_checksum(self, path_info): - raise NotImplementedError - - def _calculate_checksums(self, file_infos): + def _calculate_checksums(self, file_infos, tree): file_infos = list(file_infos) with Tqdm( total=len(file_infos), unit="md5", desc="Computing file/dir hashes (only done once)", ) as pbar: - worker = pbar.wrap_fn(self.get_file_checksum) + worker = pbar.wrap_fn(tree.get_file_checksum) with ThreadPoolExecutor( max_workers=self.checksum_jobs ) as executor: @@ -418,33 +415,29 @@ def _calculate_checksums(self, file_infos): checksums = dict(zip(file_infos, tasks)) return checksums - def _collect_dir(self, path_info, tree=None, save_tree=False, **kwargs): + def _collect_dir(self, path_info, tree, save_tree=False, **kwargs): file_infos = set() - if tree: - walk_files = tree.walk_files - else: - walk_files = self.tree.walk_files - - for fname in walk_files(path_info, **kwargs): + for fname in tree.walk_files(path_info, **kwargs): if DvcIgnore.DVCIGNORE_FILE == fname.name: raise DvcIgnoreInCollectedDirError(fname.parent) file_infos.add(fname) - if tree: - checksums = {fi: tree.get_file_checksum(fi) for fi in file_infos} - if save_tree: - for fi, checksum in checksums.items(): - self._save_file(fi, checksum, tree=tree, **kwargs) - else: - checksums = {fi: self.state.get(fi) for fi in file_infos} - not_in_state = { - fi for fi, checksum in checksums.items() if checksum is None - } + checksums = {fi: self.state.get(fi) for fi in file_infos} + not_in_state = { + fi for fi, checksum in checksums.items() if checksum is None + } - new_checksums = self._calculate_checksums(not_in_state) - checksums.update(new_checksums) + new_checksums = self._calculate_checksums(not_in_state, tree) + checksums.update(new_checksums) + + if save_tree: + logger.debug("_collect_dir save_tree") + logger.debug(f"{kwargs}") + for fi, checksum in checksums.items(): + logger.debug(f"_collect_dir saving '{fi}' '{checksum}'") + self._save_file(fi, tree, checksum, **kwargs) result = [ { @@ -466,12 +459,16 @@ def _collect_dir(self, path_info, tree=None, save_tree=False, **kwargs): return sorted(result, key=itemgetter(self.PARAM_RELPATH)) def get_dir_checksum(self, path_info, tree=None): + if not tree: + tree = self.tree + if not self.cache: raise RemoteCacheRequiredError(path_info) - dir_info = self._collect_dir(path_info, tree=None) - if tree: - # don't save state entry for path_info if it is a tree path + dir_info = self._collect_dir(path_info, tree) + if tree != self.tree: + # don't save state entry for path_info if it is from a different + # tree path_info = None return self._save_dir_info(dir_info, path_info) @@ -493,11 +490,12 @@ def _get_dir_info_checksum(self, dir_info): with open(tmp, "w+") as fobj: json.dump(dir_info, fobj, sort_keys=True) + tree = self.cache.tree from_info = PathInfo(tmp) - to_info = self.cache.path_info / tmp_fname("") - self.cache.tree.upload(from_info, to_info, no_progress_bar=True) + to_info = tree.path_info / tmp_fname("") + tree.upload(from_info, to_info, no_progress_bar=True) - checksum = self.get_file_checksum(to_info) + self.CHECKSUM_DIR_SUFFIX + checksum = tree.get_file_checksum(to_info) + self.CHECKSUM_DIR_SUFFIX return checksum, to_info def get_dir_cache(self, checksum): @@ -547,10 +545,13 @@ def is_dir_checksum(cls, checksum): return False return checksum.endswith(cls.CHECKSUM_DIR_SUFFIX) - def get_checksum(self, path_info): + def get_checksum(self, path_info, tree=None): assert isinstance(path_info, str) or path_info.scheme == self.scheme - if not self.tree.exists(path_info): + if not tree: + tree = self.tree + + if not tree.exists(path_info): return None checksum = self.state.get(path_info) @@ -561,27 +562,25 @@ def get_checksum(self, path_info): if ( checksum and self.is_dir_checksum(checksum) - and not self.tree.exists( - self.cache.checksum_to_path_info(checksum) - ) + and not tree.exists(self.cache.checksum_to_path_info(checksum)) ): checksum = None if checksum: return checksum - if self.tree.isdir(path_info): - checksum = self.get_dir_checksum(path_info) + if tree.isdir(path_info): + checksum = self.get_dir_checksum(path_info, tree) else: - checksum = self.get_file_checksum(path_info) + checksum = tree.get_file_checksum(path_info) - if checksum: + if checksum and self.tree.exists(path_info): self.state.save(path_info, checksum) return checksum - def save_info(self, path_info): - return {self.PARAM_CHECKSUM: self.get_checksum(path_info)} + def save_info(self, path_info, tree=None): + return {self.PARAM_CHECKSUM: self.get_checksum(path_info, tree=tree)} def changed(self, path_info, checksum_info): """Checks if data has changed. @@ -680,25 +679,11 @@ def _do_link(self, from_info, to_info, link_method): "Created '%s': %s -> %s", self.cache_types[0], from_info, to_info, ) - def _save_file( - self, path_info, checksum, save_link=True, tree=None, **kwargs - ): + def _save_file(self, path_info, tree, checksum, save_link=True, **kwargs): assert checksum cache_info = self.checksum_to_path_info(checksum) - if tree: - if self.changed_cache(checksum): - with tree.open(path_info, mode="rb") as fobj: - # if tree has fetch enabled, DVC out will be fetched on - # open and we do not need to read/copy any data - if not ( - tree.isdvc(path_info, strict=False) and tree.fetch - ): - self.tree.copy_fobj(fobj, cache_info) - callback = kwargs.get("download_callback") - if callback: - callback(1) - else: + if tree == self.tree: if self.changed_cache(checksum): self.tree.move(path_info, cache_info, mode=self.CACHE_MODE) self.link(cache_info, path_info) @@ -713,12 +698,23 @@ def _save_file( if save_link: self.state.save_link(path_info) - - # we need to update path and cache, since in case of reflink, - # or copy cache type moving original file results in updates on - # next executed command, which causes md5 recalculation - if not tree or is_working_tree(tree): + # we need to update path and cache, since in case of reflink, + # or copy cache type moving original file results in updates on + # next executed command, which causes md5 recalculation self.state.save(path_info, checksum) + else: + if self.changed_cache(checksum): + with tree.open(path_info, mode="rb") as fobj: + # if tree has fetch enabled, DVC out will be fetched on + # open and we do not need to read/copy any data + if not ( + tree.isdvc(path_info, strict=False) and tree.fetch + ): + self.tree.copy_fobj(fobj, cache_info) + callback = kwargs.get("download_callback") + if callback: + callback(1) + self.state.save(cache_info, checksum) return {self.PARAM_CHECKSUM: checksum} @@ -744,15 +740,8 @@ def _cache_is_copy(self, path_info): self.cache_type_confirmed = True return self.cache_types[0] == "copy" - def _save_dir( - self, path_info, checksum, save_link=True, tree=None, **kwargs - ): - if tree: - dir_info = self._collect_dir( - path_info, tree=tree, save_tree=True, **kwargs - ) - checksum = self._save_dir_info(dir_info) - else: + def _save_dir(self, path_info, tree, checksum, save_link=True, **kwargs): + if checksum: dir_info = self.get_dir_cache(checksum) for entry in Tqdm( @@ -760,10 +749,18 @@ def _save_dir( ): entry_info = path_info / entry[self.PARAM_RELPATH] entry_checksum = entry[self.PARAM_CHECKSUM] - self._save_file(entry_info, entry_checksum, save_link=False) + self._save_file( + entry_info, tree, entry_checksum, save_link=False + ) if save_link: self.state.save_link(path_info) + self.state.save(path_info, checksum) + else: + dir_info = self._collect_dir( + path_info, tree, save_tree=True, **kwargs + ) + checksum = self._save_dir_info(dir_info) cache_info = self.checksum_to_path_info(checksum) self.state.save(cache_info, checksum) @@ -775,42 +772,33 @@ def _save_dir( def protect(path_info): pass - def save( - self, path_info, checksum_info, save_link=True, tree=None, **kwargs - ): + def save(self, path_info, tree, checksum_info, save_link=True, **kwargs): if path_info.scheme != self.scheme: raise RemoteActionNotImplemented( f"save {path_info.scheme} -> {self.scheme}", self.scheme, ) - if tree: - if tree.isdir(path_info): - # save checksum will be computed during tree walk - checksum = None - else: - checksum = tree.get_file_checksum(path_info) - else: + if checksum_info: checksum = checksum_info[self.PARAM_CHECKSUM] - return self._save(path_info, checksum, save_link, tree, **kwargs) - - def _save(self, path_info, checksum, save_link=True, tree=None, **kwargs): - if tree: - logger.debug("Saving tree path '%s' to cache.", path_info) + elif tree.isdir(path_info): + # for dirs, save checksum will be computed during tree walk + checksum = None else: + checksum = tree.get_file_checksum(path_info) + return self._save(path_info, tree, checksum, save_link, **kwargs) + + def _save(self, path_info, tree, checksum, save_link=True, **kwargs): + if checksum: to_info = self.checksum_to_path_info(checksum) logger.debug("Saving '%s' to '%s'.", path_info, to_info) - - if tree: - isdir = tree.isdir - save_link = False else: - isdir = self.tree.isdir + logger.debug("Saving tree path '%s' to cache.", path_info) - if isdir(path_info): + if tree.isdir(path_info): return self._save_dir( - path_info, checksum, save_link, tree, **kwargs + path_info, tree, checksum, save_link, **kwargs ) - return self._save_file(path_info, checksum, save_link, tree, **kwargs) + return self._save_file(path_info, tree, checksum, save_link, **kwargs) def open(self, *args, **kwargs): return self.tree.open(*args, **kwargs) From 046622777e8ae68014bdad9428cb286be15391a5 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Mon, 8 Jun 2020 15:40:25 +0900 Subject: [PATCH 3/3] tests: update for moved remote/tree functions --- tests/func/test_data_cloud.py | 5 +++-- tests/func/test_tree.py | 2 +- tests/unit/remote/test_azure.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/func/test_data_cloud.py b/tests/func/test_data_cloud.py index 9783fbd7a7..fbd49da466 100644 --- a/tests/func/test_data_cloud.py +++ b/tests/func/test_data_cloud.py @@ -24,6 +24,7 @@ SSHRemote, ) from dvc.remote.base import STATUS_DELETED, STATUS_NEW, STATUS_OK +from dvc.remote.local import LocalRemoteTree from dvc.stage.exceptions import StageNotFound from dvc.utils import file_md5 from dvc.utils.fs import remove @@ -614,7 +615,7 @@ def test(self): def test_checksum_recalculation(mocker, dvc, tmp_dir): tmp_dir.gen({"foo": "foo"}) - test_get_file_checksum = mocker.spy(LocalRemote, "get_file_checksum") + test_get_file_checksum = mocker.spy(LocalRemoteTree, "get_file_checksum") url = Local.get_url() ret = main(["remote", "add", "-d", TEST_REMOTE, url]) assert ret == 0 @@ -693,7 +694,7 @@ def test_verify_checksums( remove("dir") remove(dvc.cache.local.cache_dir) - checksum_spy = mocker.spy(dvc.cache.local, "get_file_checksum") + checksum_spy = mocker.spy(dvc.cache.local.tree, "get_file_checksum") dvc.pull() assert checksum_spy.call_count == 0 diff --git a/tests/func/test_tree.py b/tests/func/test_tree.py index 1df0000d5e..daffd364e1 100644 --- a/tests/func/test_tree.py +++ b/tests/func/test_tree.py @@ -218,7 +218,7 @@ def test_repotree_cache_save(tmp_dir, dvc, scm, erepo_dir, setup_remote): with erepo_dir.dvc.state: cache = dvc.cache.local with cache.state: - cache.save(PathInfo(erepo_dir / "dir"), None, tree=tree) + cache.save(PathInfo(erepo_dir / "dir"), tree, None) for checksum in expected: assert os.path.exists(cache.checksum_to_path_info(checksum)) diff --git a/tests/unit/remote/test_azure.py b/tests/unit/remote/test_azure.py index b49f2ee631..c9009e9f71 100644 --- a/tests/unit/remote/test_azure.py +++ b/tests/unit/remote/test_azure.py @@ -42,7 +42,7 @@ def test_get_file_checksum(tmp_dir): to_info = remote.tree.PATH_CLS(Azure.get_url()) remote.tree.upload(PathInfo("foo"), to_info) assert remote.tree.exists(to_info) - checksum = remote.get_file_checksum(to_info) + checksum = remote.tree.get_file_checksum(to_info) assert checksum assert isinstance(checksum, str) assert checksum.strip("'").strip('"') == checksum