-
Notifications
You must be signed in to change notification settings - Fork 1.3k
dvc: support group cache mode
#2298
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
5aae88b
9c5fd0c
b2fe3f7
713e288
24428e4
640e737
9955f1c
2b36d25
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,13 +4,15 @@ | |
|
|
||
| from dvc.scheme import Schemes | ||
| from dvc.remote.local.slow_link_detection import slow_link_guard | ||
| from dvc.utils.compat import str, makedirs, fspath_py35 | ||
| from dvc.utils.compat import str, fspath_py35 | ||
|
|
||
| import os | ||
| import stat | ||
| import errno | ||
| from shortuuid import uuid | ||
| import shutil | ||
| import logging | ||
| from functools import partial | ||
|
|
||
| from dvc.system import System | ||
| from dvc.remote.base import ( | ||
|
|
@@ -29,6 +31,7 @@ | |
| walk_files, | ||
| relpath, | ||
| dvc_walk, | ||
| makedirs, | ||
| ) | ||
| from dvc.config import Config | ||
| from dvc.exceptions import DvcException | ||
|
|
@@ -56,10 +59,19 @@ class RemoteLOCAL(RemoteBASE): | |
| "reflink": System.reflink, | ||
| } | ||
|
|
||
| SHARED_MODE_MAP = {None: (0o644, 0o755), "group": (0o664, 0o775)} | ||
|
efiop marked this conversation as resolved.
Outdated
|
||
|
|
||
| def __init__(self, repo, config): | ||
| super(RemoteLOCAL, self).__init__(repo, config) | ||
| self.protected = config.get(Config.SECTION_CACHE_PROTECTED, False) | ||
|
|
||
| shared = config.get(Config.SECTION_CACHE_SHARED) | ||
| self._file_mode, self._dir_mode = self.SHARED_MODE_MAP[shared] | ||
|
|
||
| if self.protected: | ||
| # cache files are set to be read-only for everyone | ||
| self._file_mode = stat.S_IREAD | stat.S_IRGRP | stat.S_IROTH | ||
|
efiop marked this conversation as resolved.
Outdated
|
||
|
|
||
| types = config.get(Config.SECTION_CACHE_TYPE, None) | ||
| if types: | ||
| if isinstance(types, str): | ||
|
|
@@ -119,10 +131,12 @@ def exists(self, path_info): | |
| return os.path.lexists(fspath_py35(path_info)) | ||
|
|
||
| def makedirs(self, path_info): | ||
| if not self.exists(path_info): | ||
| os.makedirs(fspath_py35(path_info)) | ||
| makedirs(path_info, exist_ok=True, mode=self._dir_mode) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🤔 a little bit confusing to use a method with the same name under the method, what do you think about using
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But this is |
||
|
|
||
| def link(self, from_info, to_info, link_type=None): | ||
| def link(self, from_info, to_info): | ||
| self._link(from_info, to_info, self.cache_types) | ||
|
|
||
| def _link(self, from_info, to_info, link_types): | ||
| from_path = from_info.fspath | ||
| to_path = to_info.fspath | ||
|
|
||
|
|
@@ -140,11 +154,6 @@ def link(self, from_info, to_info, link_type=None): | |
| logger.debug(msg) | ||
| return | ||
|
|
||
| if not link_type: | ||
| link_types = self.cache_types | ||
| else: | ||
| link_types = [link_type] | ||
|
|
||
| self._try_links(from_info, to_info, link_types) | ||
|
|
||
| @classmethod | ||
|
|
@@ -156,7 +165,7 @@ def _get_link_method(cls, link_type): | |
| "Cache type: '{}' not supported!".format(link_type) | ||
| ) | ||
|
|
||
| def _link(self, from_info, to_info, link_method): | ||
| def _do_link(self, from_info, to_info, link_method): | ||
| if self.exists(to_info): | ||
| raise DvcException("Link '{}' already exists!".format(to_info)) | ||
| else: | ||
|
|
@@ -179,7 +188,7 @@ def _try_links(self, from_info, to_info, link_types): | |
| while i > 0: | ||
| link_method = self._get_link_method(link_types[0]) | ||
| try: | ||
| self._link(from_info, to_info, link_method) | ||
| self._do_link(from_info, to_info, link_method) | ||
| return | ||
|
|
||
| except DvcException as exc: | ||
|
|
@@ -234,20 +243,17 @@ def move(self, from_info, to_info): | |
| if from_info.scheme != "local" or to_info.scheme != "local": | ||
| raise NotImplementedError | ||
|
|
||
| inp = from_info.fspath | ||
| outp = to_info.fspath | ||
| if self.isfile(from_info): | ||
| mode = self._file_mode | ||
| else: | ||
| mode = self._dir_mode | ||
|
|
||
| # moving in two stages to make the whole operation atomic in | ||
| # case inp and outp are in different filesystems and actual | ||
| # physical copying of data is happening | ||
| tmp = "{}.{}".format(outp, str(uuid())) | ||
| move(inp, tmp) | ||
| move(tmp, outp) | ||
| move(from_info, to_info, mode=mode) | ||
|
|
||
| def cache_exists(self, md5s, jobs=None): | ||
| def cache_exists(self, checksums, jobs=None): | ||
| return [ | ||
| checksum | ||
| for checksum in progress(md5s) | ||
| for checksum in progress(checksums) | ||
| if not self.changed_cache_file(checksum) | ||
| ] | ||
|
|
||
|
|
@@ -373,7 +379,11 @@ def _process( | |
| ) | ||
|
|
||
| if download: | ||
| func = remote.download | ||
| func = partial( | ||
| remote.download, | ||
| dir_mode=self._dir_mode, | ||
| file_mode=self._file_mode, | ||
| ) | ||
| status = STATUS_DELETED | ||
| else: | ||
| func = remote.upload | ||
|
|
@@ -487,9 +497,20 @@ def unprotect(self, path_info): | |
|
|
||
| @staticmethod | ||
| def protect(path_info): | ||
| os.chmod( | ||
| fspath_py35(path_info), stat.S_IREAD | stat.S_IRGRP | stat.S_IROTH | ||
| ) | ||
| path = fspath_py35(path_info) | ||
| mode = stat.S_IREAD | stat.S_IRGRP | stat.S_IROTH | ||
|
|
||
| try: | ||
| os.chmod(path, mode) | ||
| except OSError as exc: | ||
| # In share cache scenario, we might not own the cache file, so we | ||
| # need to check if cache file is already protected. | ||
| if exc.errno not in [errno.EPERM, errno.EACCES]: | ||
| raise | ||
|
|
||
| actual = os.stat(path).st_mode | ||
| if actual & mode != mode: | ||
| raise | ||
|
efiop marked this conversation as resolved.
Outdated
|
||
|
|
||
| def _get_unpacked_dir_path_info(self, checksum): | ||
| info = self.checksum_to_path_info(checksum) | ||
|
|
@@ -524,8 +545,13 @@ def _create_unpacked_dir(self, checksum, dir_info, unpacked_dir_info): | |
| entry[self.PARAM_CHECKSUM] | ||
| ) | ||
| relative_path = entry[self.PARAM_RELPATH] | ||
| self.link( | ||
| entry_cache_info, unpacked_dir_info / relative_path, "hardlink" | ||
| # In shared cache mode some cache files might not be owned by the | ||
| # user, so we need to use symlinks because, unless | ||
| # /proc/sys/fs/protected_hardlinks is disabled, the user is not | ||
| # allowed to create hardlinks to files that he doesn't own. | ||
| link_types = ["hardlink", "symlink"] | ||
| self._link( | ||
| entry_cache_info, unpacked_dir_info / relative_path, link_types | ||
| ) | ||
|
|
||
| self.state.save(unpacked_dir_info, checksum) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this implies that
cacheislocal, right? are there any downsides of doing this? like a remote cache withsshfor exampleThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not really, if remote supports dirs then it should support
makedirs. So currently ssh and local support makedirs.