Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 34 additions & 16 deletions dvc/cache/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ def _get_plans(self, download, remote, status_info, status):
path_infos = []
names = []
hashes = []
missing = []
for md5, info in Tqdm(
status_info.items(), desc="Analysing status", unit="file"
):
Expand All @@ -263,6 +264,8 @@ def _get_plans(self, download, remote, status_info, status):
path_infos.append(remote.tree.hash_to_path_info(md5))
names.append(info["name"])
hashes.append(md5)
elif info["status"] == STATUS_MISSING:
missing.append(md5)

if download:
to_infos = cache
Expand All @@ -271,7 +274,7 @@ def _get_plans(self, download, remote, status_info, status):
to_infos = path_infos
from_infos = cache

return from_infos, to_infos, names, hashes
return (from_infos, to_infos, names, hashes), missing

def _process(
self,
Expand Down Expand Up @@ -312,8 +315,10 @@ def _process(
download=download,
)

dir_plans = self._get_plans(download, remote, dir_status, status)
file_plans = self._get_plans(download, remote, file_status, status)
dir_plans, _ = self._get_plans(download, remote, dir_status, status)
file_plans, missing_files = self._get_plans(
download, remote, file_status, status
)

total = len(dir_plans[0]) + len(file_plans[0])
if total == 0:
Expand All @@ -339,19 +344,32 @@ def _process(
)
dir_futures = {}
for from_info, to_info, name, dir_hash in zip(*dir_plans):
wait_futures = {
future
for file_hash, future in file_futures.items()
if file_hash in dir_contents[dir_hash]
}
dir_futures[dir_hash] = executor.submit(
self._dir_upload,
func,
wait_futures,
from_info,
to_info,
name,
)
# if for some reason a file contained in this dir is
# missing both locally and in the remote, we want to
# push whatever file content we have, but should not
# push .dir file
for file_hash in missing_files:
if file_hash in dir_contents[dir_hash]:
logger.debug(
"directory '%s' contains missing files,"
"skipping .dir file upload",
name,
)
break
else:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't else be intended to match the if?

Copy link
Copy Markdown
Contributor Author

@pmrowla pmrowla Aug 7, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's intended to be for ... else ..., if we get through the list of missing files and none of them belong to the current directory, then we hit the else case and queue the .dir file upload

wait_futures = {
future
for file_hash, future in file_futures.items()
if file_hash in dir_contents[dir_hash]
}
dir_futures[dir_hash] = executor.submit(
self._dir_upload,
func,
wait_futures,
from_info,
to_info,
name,
)
fails = sum(
future.result()
for future in concat(
Expand Down
20 changes: 20 additions & 0 deletions tests/func/test_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,3 +412,23 @@ def test_protect_local_remote(tmp_dir, dvc, local_remote):

assert os.path.exists(remote_cache_file)
assert stat.S_IMODE(os.stat(remote_cache_file).st_mode) == 0o444


def test_push_incomplete_dir(tmp_dir, dvc, mocker, local_remote):
(stage,) = tmp_dir.dvc_gen({"dir": {"foo": "foo", "bar": "bar"}})
remote = dvc.cloud.get_remote("upstream")

cache = dvc.cache.local
dir_hash = stage.outs[0].checksum
used = stage.get_used_cache(remote=remote)

# remove one of the cache files for directory
file_hashes = list(used.child_keys(cache.tree.scheme, dir_hash))
remove(cache.tree.hash_to_path_info(file_hashes[0]))

dvc.push()
assert not remote.tree.exists(remote.tree.hash_to_path_info(dir_hash))
assert not remote.tree.exists(
remote.tree.hash_to_path_info(file_hashes[0])
)
assert remote.tree.exists(remote.tree.hash_to_path_info(file_hashes[1]))