From 0640ed265d104cb0d489a92a08f20dffc412783f Mon Sep 17 00:00:00 2001 From: Steffan Sluis Date: Fri, 22 May 2020 11:46:40 +0200 Subject: [PATCH 1/3] azure: support external dependencies and outputs - Implement get_file_checksum with get_etag - Implement a DependencyAzure and OutputAzure classes - Add AzureOutput to OUTS, OUTS_MAP and CHECKSUMS_SCHEMA - Add AzureDependency to DEPS and DEPS_MAP - Add unit tests for AzureOutput and AzureDependency Fixes #3540. --- dvc/dependency/__init__.py | 3 +++ dvc/dependency/azure.py | 6 ++++++ dvc/output/__init__.py | 5 +++++ dvc/output/azure.py | 6 ++++++ dvc/remote/azure.py | 7 +++++++ tests/unit/dependency/test_azure.py | 7 +++++++ tests/unit/output/test_azure.py | 7 +++++++ 7 files changed, 41 insertions(+) create mode 100644 dvc/dependency/azure.py create mode 100644 dvc/output/azure.py create mode 100644 tests/unit/dependency/test_azure.py create mode 100644 tests/unit/output/test_azure.py diff --git a/dvc/dependency/__init__.py b/dvc/dependency/__init__.py index 45de5bf57d..1fa2073e24 100644 --- a/dvc/dependency/__init__.py +++ b/dvc/dependency/__init__.py @@ -2,6 +2,7 @@ from urllib.parse import urlparse import dvc.output as output +from dvc.dependency.azure import AzureDependency from dvc.dependency.gs import GSDependency from dvc.dependency.hdfs import HDFSDependency from dvc.dependency.http import HTTPDependency @@ -17,6 +18,7 @@ from .repo import RepoDependency DEPS = [ + AzureDependency, GSDependency, HDFSDependency, HTTPDependency, @@ -30,6 +32,7 @@ Schemes.LOCAL: LocalDependency, Schemes.SSH: SSHDependency, Schemes.S3: S3Dependency, + Schemes.AZURE: AzureDependency, Schemes.GS: GSDependency, Schemes.HDFS: HDFSDependency, Schemes.HTTP: HTTPDependency, diff --git a/dvc/dependency/azure.py b/dvc/dependency/azure.py new file mode 100644 index 0000000000..809e227fed --- /dev/null +++ b/dvc/dependency/azure.py @@ -0,0 +1,6 @@ +from dvc.dependency.base import BaseDependency +from dvc.output.azure import AzureOutput + + +class AzureDependency(BaseDependency, AzureOutput): + pass diff --git a/dvc/output/__init__.py b/dvc/output/__init__.py index 492b0e4862..c083868398 100644 --- a/dvc/output/__init__.py +++ b/dvc/output/__init__.py @@ -3,18 +3,21 @@ from voluptuous import And, Any, Coerce, Length, Lower, Required, SetTo from dvc.output.base import BaseOutput +from dvc.output.azure import AzureOutput from dvc.output.gs import GSOutput from dvc.output.hdfs import HDFSOutput from dvc.output.local import LocalOutput from dvc.output.s3 import S3Output from dvc.output.ssh import SSHOutput from dvc.remote import Remote +from dvc.remote.azure import AzureRemote from dvc.remote.hdfs import HDFSRemote from dvc.remote.local import LocalRemote from dvc.remote.s3 import S3Remote from dvc.scheme import Schemes OUTS = [ + AzureOutput, HDFSOutput, S3Output, GSOutput, @@ -23,6 +26,7 @@ ] OUTS_MAP = { + Schemes.AZURE: AzureOutput, Schemes.HDFS: HDFSOutput, Schemes.S3: S3Output, Schemes.GS: GSOutput, @@ -45,6 +49,7 @@ # so when a few types of outputs share the same name, we only need # specify it once. CHECKSUMS_SCHEMA = { + AzureRemote.PARAM_CHECKSUM: CHECKSUM_SCHEMA, LocalRemote.PARAM_CHECKSUM: CHECKSUM_SCHEMA, S3Remote.PARAM_CHECKSUM: CHECKSUM_SCHEMA, HDFSRemote.PARAM_CHECKSUM: CHECKSUM_SCHEMA, diff --git a/dvc/output/azure.py b/dvc/output/azure.py new file mode 100644 index 0000000000..a47ca3b8ea --- /dev/null +++ b/dvc/output/azure.py @@ -0,0 +1,6 @@ +from dvc.output.base import BaseOutput +from dvc.remote.azure import AzureRemote + + +class AzureOutput(BaseOutput): + REMOTE = AzureRemote diff --git a/dvc/remote/azure.py b/dvc/remote/azure.py index e39ba2a061..0253014668 100644 --- a/dvc/remote/azure.py +++ b/dvc/remote/azure.py @@ -81,6 +81,13 @@ def blob_service(self): blob_service.create_container(self.path_info.bucket) return blob_service + def get_etag(cls, path_info): + etag = self.blob_service.get_blob_properties(path_info.bucket, path_info.path).properties.etag + return etag.strip('"') + + def get_file_checksum(self, path_info): + return self.get_etag(path_info) + def remove(self, path_info): if path_info.scheme != self.scheme: raise NotImplementedError diff --git a/tests/unit/dependency/test_azure.py b/tests/unit/dependency/test_azure.py new file mode 100644 index 0000000000..fff7910615 --- /dev/null +++ b/tests/unit/dependency/test_azure.py @@ -0,0 +1,7 @@ +from dvc.dependency.azure import AzureDependency +from tests.unit.dependency.test_local import TestLocalDependency + + +class TestAzureDependency(TestLocalDependency): + def _get_cls(self): + return AzureDependency diff --git a/tests/unit/output/test_azure.py b/tests/unit/output/test_azure.py new file mode 100644 index 0000000000..2564f94690 --- /dev/null +++ b/tests/unit/output/test_azure.py @@ -0,0 +1,7 @@ +from dvc.output.azure import AzureOutput +from tests.unit.output.test_local import TestLocalOutput + + +class TestAzureOutput(TestLocalOutput): + def _get_cls(self): + return AzureOutput From 0cd3bda7fb622e186c1a5fbd56343c8cd200850f Mon Sep 17 00:00:00 2001 From: "Restyled.io" Date: Fri, 22 May 2020 11:05:33 +0000 Subject: [PATCH 2/3] Restyled by black --- dvc/remote/azure.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dvc/remote/azure.py b/dvc/remote/azure.py index 0253014668..873934df42 100644 --- a/dvc/remote/azure.py +++ b/dvc/remote/azure.py @@ -82,7 +82,9 @@ def blob_service(self): return blob_service def get_etag(cls, path_info): - etag = self.blob_service.get_blob_properties(path_info.bucket, path_info.path).properties.etag + etag = self.blob_service.get_blob_properties( + path_info.bucket, path_info.path + ).properties.etag return etag.strip('"') def get_file_checksum(self, path_info): From 728381f61619129be155cfe1eadee7e26a81d68b Mon Sep 17 00:00:00 2001 From: "Restyled.io" Date: Fri, 22 May 2020 11:05:35 +0000 Subject: [PATCH 3/3] Restyled by isort --- dvc/output/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dvc/output/__init__.py b/dvc/output/__init__.py index c083868398..0d3a7650b6 100644 --- a/dvc/output/__init__.py +++ b/dvc/output/__init__.py @@ -2,8 +2,8 @@ from voluptuous import And, Any, Coerce, Length, Lower, Required, SetTo -from dvc.output.base import BaseOutput from dvc.output.azure import AzureOutput +from dvc.output.base import BaseOutput from dvc.output.gs import GSOutput from dvc.output.hdfs import HDFSOutput from dvc.output.local import LocalOutput