Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 21 additions & 9 deletions dvc/remote/azure.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import logging
import os
import threading
from datetime import datetime, timedelta

Expand All @@ -16,23 +15,38 @@
class AzureRemoteTree(BaseRemoteTree):
scheme = Schemes.AZURE
PATH_CLS = CloudURLInfo
REQUIRES = {"azure-storage-blob": "azure.storage.blob"}
REQUIRES = {
"azure-storage-blob": "azure.storage.blob",
"azure-cli-core": "azure.cli.core",
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worried about this dependency being too heavy, might go back to plain knack solution instead.

}
PARAM_CHECKSUM = "etag"
COPY_POLL_SECONDS = 5
LIST_OBJECT_PAGE_SIZE = 5000

def __init__(self, repo, config):
from azure.cli.core import get_default_cli

super().__init__(repo, config)

# NOTE: az_config takes care of env vars
az_config = get_default_cli().config

url = config.get("url", "azure://")
self.path_info = self.PATH_CLS(url)

if not self.path_info.bucket:
container = os.getenv("AZURE_STORAGE_CONTAINER_NAME")
container = az_config.get("storage", "container_name", None)
self.path_info = self.PATH_CLS(f"azure://{container}")

self.connection_string = config.get("connection_string") or os.getenv(
"AZURE_STORAGE_CONNECTION_STRING"
self._conn_kwargs = {
opt: config.get(opt) or az_config.get("storage", opt, None)
for opt in ["connection_string", "sas_token"]
}
self._conn_kwargs["account_name"] = az_config.get(
"storage", "account", None
)
self._conn_kwargs["account_key"] = az_config.get(
"storage", "key", None
)

@wrap_prop(threading.Lock())
Expand All @@ -43,10 +57,8 @@ def blob_service(self):
from azure.common import AzureMissingResourceHttpError

logger.debug(f"URL {self.path_info}")
logger.debug(f"Connection string {self.connection_string}")
blob_service = BlockBlobService(
connection_string=self.connection_string
)
logger.debug(f"Connection options {self._conn_kwargs}")
blob_service = BlockBlobService(**self._conn_kwargs)
logger.debug(f"Container name {self.path_info.bucket}")
try: # verify that container exists
blob_service.list_blobs(
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def run(self):
gs = ["google-cloud-storage==1.19.0"]
gdrive = ["pydrive2>=1.4.14"]
s3 = ["boto3>=1.9.201"]
azure = ["azure-storage-blob==2.1.0"]
azure = ["azure-storage-blob==2.1.0", "azure-cli-core>=2.0.70"]
oss = ["oss2==2.6.1"]
ssh = ["paramiko>=2.5.0"]
hdfs = ["pyarrow>=0.17.0"]
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/remote/test_azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_init_env_var(monkeypatch, dvc):
config = {"url": "azure://"}
tree = AzureRemoteTree(dvc, config)
assert tree.path_info == "azure://" + container_name
assert tree.connection_string == connection_string
assert tree._conn_kwargs["connection_string"] == connection_string


def test_init(dvc):
Expand All @@ -29,7 +29,7 @@ def test_init(dvc):
config = {"url": url, "connection_string": connection_string}
tree = AzureRemoteTree(dvc, config)
assert tree.path_info == url
assert tree.connection_string == connection_string
assert tree._conn_kwargs["connection_string"] == connection_string


def test_get_file_hash(tmp_dir):
Expand Down