Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion dvc/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import unicode_literals

from dvc.utils.compat import str, open, urlparse
from dvc.utils import checksum as modchecksum

import os
import errno
Expand Down Expand Up @@ -52,6 +53,19 @@ def supported_loglevel(level):
return level in ["info", "debug", "warning", "error"]


def supported_checksum_local(checksum_types):
"""Checks if hash config option has a valid value.

Args:
checksum_types (list/string): hash name(s).
"""
if modchecksum.checksum_types_from_str(
checksum_types, modchecksum.LOCAL_SUPPORTED_CHECKSUM_TYPES
):
return True
return False
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not simply:

return bool(modchecksum.checksum_types_from_str(...))

?



def supported_cloud(cloud):
"""Checks if obsoleted cloud option has a valid value.

Expand Down Expand Up @@ -106,7 +120,7 @@ def is_percent(val):
Returns:
bool: True if 0<=value<=100, False otherwise.
"""
return int(val) >= 0 and int(val) <= 100
return 0 <= int(val) <= 100


class Config(object): # pylint: disable=too-many-instance-attributes
Expand Down Expand Up @@ -145,6 +159,15 @@ class Config(object): # pylint: disable=too-many-instance-attributes
SECTION_CORE_ANALYTICS = "analytics"
SECTION_CORE_ANALYTICS_SCHEMA = BOOL_SCHEMA

SECTION_CHECKSUM = "checksum"
SECTION_CHECKSUM_LOCAL = "local"
SECTION_CHECKSUM_LOCAL_SCHEMA = supported_checksum_local
SECTION_CHECKSUM_SCHEMA = {
Optional(
SECTION_CHECKSUM_LOCAL, default=None
): SECTION_CHECKSUM_LOCAL_SCHEMA
}

SECTION_CACHE = "cache"
SECTION_CACHE_DIR = "dir"
SECTION_CACHE_TYPE = "type"
Expand Down Expand Up @@ -278,6 +301,7 @@ class Config(object): # pylint: disable=too-many-instance-attributes
Optional(Regex(SECTION_REMOTE_REGEX)): SECTION_REMOTE_SCHEMA,
Optional(SECTION_CACHE, default={}): SECTION_CACHE_SCHEMA,
Optional(SECTION_STATE, default={}): SECTION_STATE_SCHEMA,
Optional(SECTION_CHECKSUM, default={}): SECTION_CHECKSUM_SCHEMA,
# backward compatibility
Optional(SECTION_AWS, default={}): SECTION_AWS_SCHEMA,
Optional(SECTION_GCP, default={}): SECTION_GCP_SCHEMA,
Expand Down
22 changes: 3 additions & 19 deletions dvc/output/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import schema

from dvc.scheme import Schemes
from dvc.utils import checksum as modchecksum
from dvc.utils.compat import urlparse, str

from dvc.output.base import OutputBase
Expand All @@ -13,9 +14,6 @@
from dvc.output.ssh import OutputSSH

from dvc.remote import Remote
from dvc.remote.s3 import RemoteS3
from dvc.remote.hdfs import RemoteHDFS
from dvc.remote.local import RemoteLOCAL

OUTS = [
OutputHDFS,
Expand All @@ -33,23 +31,9 @@
Schemes.LOCAL: OutputLOCAL,
}

# NOTE: currently there are only 3 possible checksum names:
#
# 1) md5 (LOCAL, SSH, GS);
# 2) etag (S3);
# 3) checksum (HDFS);
#
# so when a few types of outputs share the same name, we only need
# specify it once.
CHECKSUM_SCHEMA = {
schema.Optional(RemoteLOCAL.PARAM_CHECKSUM): schema.Or(str, None),
schema.Optional(RemoteS3.PARAM_CHECKSUM): schema.Or(str, None),
schema.Optional(RemoteHDFS.PARAM_CHECKSUM): schema.Or(str, None),
}

TAGS_SCHEMA = {schema.Optional(str): CHECKSUM_SCHEMA}
TAGS_SCHEMA = {schema.Optional(str): modchecksum.CHECKSUM_SCHEMA}

SCHEMA = CHECKSUM_SCHEMA.copy()
SCHEMA = modchecksum.CHECKSUM_SCHEMA.copy()
SCHEMA[OutputBase.PARAM_PATH] = str
SCHEMA[schema.Optional(OutputBase.PARAM_CACHE)] = bool
SCHEMA[schema.Optional(OutputBase.PARAM_METRIC)] = OutputBase.METRIC_SCHEMA
Expand Down
22 changes: 14 additions & 8 deletions dvc/output/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,20 @@ def cache_path(self):
def sep(self):
return "/"

@property
def checksum_type(self):
for t in self.remote.checksum_types():
if t in self.info:
return t
return None

@property
def checksum(self):
return self.info.get(self.remote.PARAM_CHECKSUM)
for t in self.remote.checksum_types():
info = self.info.get(t)
if info:
return info
return None
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could be:

return next((self.info[t] for t in self.remote.checksum_types() if t in self.info), None)
# or
return next(filter(bool, map(self.info.get, self.remote.checksum_types())), None) 

:)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or

return self.info[self.checksum]


@property
def is_dir_checksum(self):
Expand All @@ -154,18 +165,13 @@ def exists(self):
return self.remote.exists(self.path_info)

def changed_checksum(self):
return (
self.checksum
!= self.remote.save_info(self.path_info)[
self.remote.PARAM_CHECKSUM
]
)
return self.cache.changed_checksum(self.path_info, self.info)

def changed_cache(self):
if not self.use_cache or not self.checksum:
return True

return self.cache.changed_cache(self.checksum)
return self.cache.changed_cache(self.checksum, self.checksum_type)

def status(self):
if self.checksum and self.use_cache and self.changed_cache():
Expand Down
3 changes: 3 additions & 0 deletions dvc/remote/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
BlockBlobService = None

from dvc.utils import tmp_fname, move
from dvc.utils import checksum as modchecksum
from dvc.utils.compat import urlparse, makedirs
from dvc.progress import progress
from dvc.config import Config
Expand Down Expand Up @@ -44,6 +45,8 @@ class RemoteAZURE(RemoteBASE):
PARAM_CHECKSUM = "etag"
COPY_POLL_SECONDS = 5

SUPPORTED_CHECKSUM_TYPES = modchecksum.AZURE_SUPPORTED_CHECKSUM_TYPES

def __init__(self, repo, config):
super(RemoteAZURE, self).__init__(repo, config)

Expand Down
Loading