Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions dvc/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def __init__(self, repo):
cache_dir = config.get(Config.SECTION_CACHE_DIR, default_cache_dir)
cache_type = config.get(Config.SECTION_CACHE_TYPE)
protected = config.get(Config.SECTION_CACHE_PROTECTED)
shared = config.get(Config.SECTION_CACHE_SHARED)

settings = {
Config.PRIVATE_CWD: config.get(
Expand All @@ -54,6 +55,7 @@ def __init__(self, repo):
Config.SECTION_REMOTE_URL: cache_dir,
Config.SECTION_CACHE_TYPE: cache_type,
Config.SECTION_CACHE_PROTECTED: protected,
Config.SECTION_CACHE_SHARED: shared,
}

self.local = Remote(repo, **settings)
Expand Down
3 changes: 3 additions & 0 deletions dvc/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ class Config(object): # pylint: disable=too-many-instance-attributes
SECTION_CACHE_TYPE = "type"
SECTION_CACHE_TYPE_SCHEMA = supported_cache_type
SECTION_CACHE_PROTECTED = "protected"
SECTION_CACHE_SHARED = "shared"
SECTION_CACHE_SHARED_SCHEMA = And(Use(str.lower), Choices("group"))
SECTION_CACHE_LOCAL = "local"
SECTION_CACHE_S3 = "s3"
SECTION_CACHE_GS = "gs"
Expand All @@ -177,6 +179,7 @@ class Config(object): # pylint: disable=too-many-instance-attributes
Optional(SECTION_CACHE_DIR): str,
Optional(SECTION_CACHE_TYPE, default=None): SECTION_CACHE_TYPE_SCHEMA,
Optional(SECTION_CACHE_PROTECTED, default=False): BOOL_SCHEMA,
Optional(SECTION_CACHE_SHARED): SECTION_CACHE_SHARED_SCHEMA,
Optional(PRIVATE_CWD): str,
Optional(SECTION_CACHE_SLOW_LINK_WARNING, default=True): BOOL_SCHEMA,
}
Expand Down
33 changes: 25 additions & 8 deletions dvc/remote/base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import unicode_literals

from dvc.ignore import DvcIgnore
from dvc.utils.compat import str, basestring, urlparse, fspath_py35, makedirs
from dvc.utils.compat import str, basestring, urlparse

import os
import json
Expand All @@ -20,7 +20,14 @@
DvcIgnoreInCollectedDirError,
)
from dvc.progress import progress, ProgressCallback
from dvc.utils import LARGE_DIR_SIZE, tmp_fname, to_chunks, move, relpath
from dvc.utils import (
LARGE_DIR_SIZE,
tmp_fname,
to_chunks,
move,
relpath,
makedirs,
)
from dvc.state import StateNoop
from dvc.path_info import PathInfo, URLInfo

Expand Down Expand Up @@ -202,6 +209,7 @@ def get_dir_checksum(self, path_info):
checksum, tmp_info = self._get_dir_info_checksum(dir_info)
new_info = self.cache.checksum_to_path_info(checksum)
if self.cache.changed_cache_file(checksum):
self.cache.makedirs(new_info.parent)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this implies that cache is local, right? are there any downsides of doing this? like a remote cache with ssh for example

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really, if remote supports dirs then it should support makedirs. So currently ssh and local support makedirs.

self.cache.move(tmp_info, new_info)

self.state.save(path_info, checksum)
Expand Down Expand Up @@ -348,14 +356,15 @@ def changed(self, path_info, checksum_info):
logger.debug("'{}' hasn't changed.".format(path_info))
return False

def link(self, from_info, to_info, link_type=None):
def link(self, from_info, to_info):
self.copy(from_info, to_info)

def _save_file(self, path_info, checksum, save_link=True):
assert checksum

cache_info = self.checksum_to_path_info(checksum)
if self.changed_cache(checksum):
self.makedirs(cache_info.parent)
self.move(path_info, cache_info)
else:
self.remove(path_info)
Expand Down Expand Up @@ -456,7 +465,15 @@ def upload(self, from_info, to_info, name=None, no_progress_bar=False):

return 0

def download(self, from_info, to_info, name=None, no_progress_bar=False):
def download(
self,
from_info,
to_info,
name=None,
no_progress_bar=False,
file_mode=None,
dir_mode=None,
):
if not hasattr(self, "_download"):
raise RemoteActionNotImplemented("download", self.scheme)

Expand All @@ -479,7 +496,7 @@ def download(self, from_info, to_info, name=None, no_progress_bar=False):
# lets at least show start and finish
progress.update_target(name, 0, None)

makedirs(fspath_py35(to_info.parent), exist_ok=True)
makedirs(to_info.parent, exist_ok=True, mode=dir_mode)
tmp_file = tmp_fname(to_info)

try:
Expand All @@ -491,7 +508,7 @@ def download(self, from_info, to_info, name=None, no_progress_bar=False):
logger.exception(msg.format(from_info, to_info))
return 1 # 1 fail

move(tmp_file, fspath_py35(to_info))
move(tmp_file, to_info, mode=file_mode)

if not no_progress_bar:
progress.finish_target(name)
Expand Down Expand Up @@ -756,7 +773,7 @@ def checkout(
msg = "Checking out '{}' with cache '{}'."
logger.debug(msg.format(str(path_info), checksum))

return self._checkout(path_info, checksum, force, progress_callback)
self._checkout(path_info, checksum, force, progress_callback)

def _checkout(
self, path_info, checksum, force=False, progress_callback=None
Expand Down Expand Up @@ -786,5 +803,5 @@ def extract_used_local_checksums(self, cinfos):
def _changed_unpacked_dir(self, checksum):
return True

def _update_unpacked_dir(self, checksum, progress_callback=None):
def _update_unpacked_dir(self, checksum):
pass
80 changes: 53 additions & 27 deletions dvc/remote/local/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@

from dvc.scheme import Schemes
from dvc.remote.local.slow_link_detection import slow_link_guard
from dvc.utils.compat import str, makedirs, fspath_py35
from dvc.utils.compat import str, fspath_py35

import os
import stat
import errno
from shortuuid import uuid
import shutil
import logging
from functools import partial

from dvc.system import System
from dvc.remote.base import (
Expand All @@ -29,6 +31,7 @@
walk_files,
relpath,
dvc_walk,
makedirs,
)
from dvc.config import Config
from dvc.exceptions import DvcException
Expand Down Expand Up @@ -56,10 +59,19 @@ class RemoteLOCAL(RemoteBASE):
"reflink": System.reflink,
}

SHARED_MODE_MAP = {None: (0o644, 0o755), "group": (0o664, 0o775)}
Comment thread
efiop marked this conversation as resolved.
Outdated

def __init__(self, repo, config):
super(RemoteLOCAL, self).__init__(repo, config)
self.protected = config.get(Config.SECTION_CACHE_PROTECTED, False)

shared = config.get(Config.SECTION_CACHE_SHARED)
self._file_mode, self._dir_mode = self.SHARED_MODE_MAP[shared]

if self.protected:
# cache files are set to be read-only for everyone
self._file_mode = stat.S_IREAD | stat.S_IRGRP | stat.S_IROTH
Comment thread
efiop marked this conversation as resolved.
Outdated

types = config.get(Config.SECTION_CACHE_TYPE, None)
if types:
if isinstance(types, str):
Expand Down Expand Up @@ -119,10 +131,12 @@ def exists(self, path_info):
return os.path.lexists(fspath_py35(path_info))

def makedirs(self, path_info):
if not self.exists(path_info):
os.makedirs(fspath_py35(path_info))
makedirs(path_info, exist_ok=True, mode=self._dir_mode)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤔 a little bit confusing to use a method with the same name under the method, what do you think about using utils.makedirs? or import it differently?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But this is makedirs and RemoteLOCAL.makedirs(or self.makedirs), so it doesn't create collisions. If we were to change this one to utils.makedirs, then we would have to change move,remove and other helpers too. Feels unnecessary.


def link(self, from_info, to_info, link_type=None):
def link(self, from_info, to_info):
self._link(from_info, to_info, self.cache_types)

def _link(self, from_info, to_info, link_types):
from_path = from_info.fspath
to_path = to_info.fspath

Expand All @@ -140,11 +154,6 @@ def link(self, from_info, to_info, link_type=None):
logger.debug(msg)
return

if not link_type:
link_types = self.cache_types
else:
link_types = [link_type]

self._try_links(from_info, to_info, link_types)

@classmethod
Expand All @@ -156,7 +165,7 @@ def _get_link_method(cls, link_type):
"Cache type: '{}' not supported!".format(link_type)
)

def _link(self, from_info, to_info, link_method):
def _do_link(self, from_info, to_info, link_method):
if self.exists(to_info):
raise DvcException("Link '{}' already exists!".format(to_info))
else:
Expand All @@ -179,7 +188,7 @@ def _try_links(self, from_info, to_info, link_types):
while i > 0:
link_method = self._get_link_method(link_types[0])
try:
self._link(from_info, to_info, link_method)
self._do_link(from_info, to_info, link_method)
return

except DvcException as exc:
Expand Down Expand Up @@ -234,20 +243,17 @@ def move(self, from_info, to_info):
if from_info.scheme != "local" or to_info.scheme != "local":
raise NotImplementedError

inp = from_info.fspath
outp = to_info.fspath
if self.isfile(from_info):
mode = self._file_mode
else:
mode = self._dir_mode

# moving in two stages to make the whole operation atomic in
# case inp and outp are in different filesystems and actual
# physical copying of data is happening
tmp = "{}.{}".format(outp, str(uuid()))
move(inp, tmp)
move(tmp, outp)
move(from_info, to_info, mode=mode)

def cache_exists(self, md5s, jobs=None):
def cache_exists(self, checksums, jobs=None):
return [
checksum
for checksum in progress(md5s)
for checksum in progress(checksums)
if not self.changed_cache_file(checksum)
]

Expand Down Expand Up @@ -373,7 +379,11 @@ def _process(
)

if download:
func = remote.download
func = partial(
remote.download,
dir_mode=self._dir_mode,
file_mode=self._file_mode,
)
status = STATUS_DELETED
else:
func = remote.upload
Expand Down Expand Up @@ -487,9 +497,20 @@ def unprotect(self, path_info):

@staticmethod
def protect(path_info):
os.chmod(
fspath_py35(path_info), stat.S_IREAD | stat.S_IRGRP | stat.S_IROTH
)
path = fspath_py35(path_info)
mode = stat.S_IREAD | stat.S_IRGRP | stat.S_IROTH

try:
os.chmod(path, mode)
except OSError as exc:
# In share cache scenario, we might not own the cache file, so we
# need to check if cache file is already protected.
if exc.errno not in [errno.EPERM, errno.EACCES]:
raise

actual = os.stat(path).st_mode
if actual & mode != mode:
raise
Comment thread
efiop marked this conversation as resolved.
Outdated

def _get_unpacked_dir_path_info(self, checksum):
info = self.checksum_to_path_info(checksum)
Expand Down Expand Up @@ -524,8 +545,13 @@ def _create_unpacked_dir(self, checksum, dir_info, unpacked_dir_info):
entry[self.PARAM_CHECKSUM]
)
relative_path = entry[self.PARAM_RELPATH]
self.link(
entry_cache_info, unpacked_dir_info / relative_path, "hardlink"
# In shared cache mode some cache files might not be owned by the
# user, so we need to use symlinks because, unless
# /proc/sys/fs/protected_hardlinks is disabled, the user is not
# allowed to create hardlinks to files that he doesn't own.
link_types = ["hardlink", "symlink"]
self._link(
entry_cache_info, unpacked_dir_info / relative_path, link_types
)

self.state.save(unpacked_dir_info, checksum)
Expand Down
Loading