Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions dvc/dependency/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from urllib.parse import urlparse

import dvc.output as output
from dvc.dependency.azure import AzureDependency
from dvc.dependency.gs import GSDependency
from dvc.dependency.hdfs import HDFSDependency
from dvc.dependency.http import HTTPDependency
Expand All @@ -17,6 +18,7 @@
from .repo import RepoDependency

DEPS = [
AzureDependency,
GSDependency,
HDFSDependency,
HTTPDependency,
Expand All @@ -30,6 +32,7 @@
Schemes.LOCAL: LocalDependency,
Schemes.SSH: SSHDependency,
Schemes.S3: S3Dependency,
Schemes.AZURE: AzureDependency,
Schemes.GS: GSDependency,
Schemes.HDFS: HDFSDependency,
Schemes.HTTP: HTTPDependency,
Expand Down
7 changes: 7 additions & 0 deletions dvc/dependency/azure.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from dvc.dependency.base import BaseDependency
from dvc.output.base import BaseOutput
from dvc.remote.azure import AzureRemote


class AzureDependency(BaseDependency, BaseOutput):
REMOTE = AzureRemote
15 changes: 11 additions & 4 deletions dvc/remote/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,8 @@ def __init__(self, repo, config):
self.path_info = self.path_cls(url)

if not self.path_info.bucket:
self.path_info = self.path_cls.from_parts(
scheme=self.scheme,
netloc=os.getenv("AZURE_STORAGE_CONTAINER_NAME"),
)
container = os.getenv("AZURE_STORAGE_CONTAINER_NAME")
self.path_info = self.path_cls(f"azure://{container}")

self.connection_string = config.get("connection_string") or os.getenv(
"AZURE_STORAGE_CONNECTION_STRING"
Expand All @@ -58,6 +56,15 @@ def blob_service(self):
blob_service.create_container(self.path_info.bucket)
return blob_service

def get_etag(self, path_info):
etag = self.blob_service.get_blob_properties(
path_info.bucket, path_info.path
).properties.etag
return etag.strip('"')

def get_file_checksum(self, path_info):
return self.get_etag(path_info)

def remove(self, path_info):
if path_info.scheme != self.scheme:
raise NotImplementedError
Expand Down
7 changes: 7 additions & 0 deletions tests/unit/dependency/test_azure.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from dvc.dependency.azure import AzureDependency
from tests.unit.dependency.test_local import TestLocalDependency


class TestAzureDependency(TestLocalDependency):
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@steffansluis, Are you sure it's running locally?

AzureRemote remote parses path via regex:
https://github.com/iterative/dvc/blob/865d355554b27be5ea49640c14556c46f7906327/dvc/remote/azure.py#L22-L27

And, on TestLocalDependency, the path is of the following type which fails on regex parsing:
https://github.com/iterative/dvc/blob/f5055940a2f806ccdf8be0193115641effd69416/tests/unit/dependency/test_local.py#L14

Same thing is happening on TestAzureOutput. Maybe, change the path that's getting passed to AzureRemote for tests?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Having done source tests/remotes_env).

python -m tests tests/unit/output/test_azure.py tests/unit/dependency/test_azure.py shows (among other things) the following output:

[gw1] [ 25%] PASSED tests/unit/output/test_azure.py::TestLocalOutput::test_save_missing                                                                                                       
[gw3] [ 50%] PASSED tests/unit/dependency/test_azure.py::TestLocalDependency::test_save_missing                                                                                               
[gw0] [ 75%] PASSED tests/unit/output/test_azure.py::TestAzureOutput::test_save_missing                                                                                                       
[gw2] [100%] PASSED tests/unit/dependency/test_azure.py::TestAzureDependency::test_save_missing                                                                                               

The tests are basic copies of the tests for S3: https://github.com/iterative/dvc/blob/f5055940a2f806ccdf8be0193115641effd69416/tests/unit/dependency/test_s3.py#L5-L7

And looking at the tests for the HDFS output (which also uses regexes) this should be fine?:
https://github.com/iterative/dvc/blob/f5055940a2f806ccdf8be0193115641effd69416/tests/unit/dependency/test_hdfs.py#L5-L7

def _get_cls(self):
return AzureDependency