Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 43 additions & 19 deletions cpp/src/arrow/filesystem/azurefs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,17 @@ bool AzureOptions::Equals(const AzureOptions& other) const {
return false;
}
switch (credential_kind_) {
case CredentialKind::kDefault:
case CredentialKind::kAnonymous:
return true;
case CredentialKind::kTokenCredential:
return token_credential_ == other.token_credential_;
case CredentialKind::kStorageSharedKeyCredential:
case CredentialKind::kStorageSharedKey:
return storage_shared_key_credential_->AccountName ==
other.storage_shared_key_credential_->AccountName;
case CredentialKind::kClientSecret:
case CredentialKind::kManagedIdentity:
case CredentialKind::kWorkloadIdentity:
return token_credential_->GetCredentialName() ==
other.token_credential_->GetCredentialName();
}
DCHECK(false);
return false;
Expand Down Expand Up @@ -113,8 +117,19 @@ std::string AzureOptions::AccountDfsUrl(const std::string& account_name) const {
return BuildBaseUrl(dfs_storage_scheme, dfs_storage_authority, account_name);
}

Status AzureOptions::ConfigureDefaultCredential() {
credential_kind_ = CredentialKind::kDefault;
token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
return Status::OK();
}

Status AzureOptions::ConfigureAnonymousCredential() {
credential_kind_ = CredentialKind::kAnonymous;
return Status::OK();
}

Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_key) {
credential_kind_ = CredentialKind::kStorageSharedKeyCredential;
credential_kind_ = CredentialKind::kStorageSharedKey;
if (account_name.empty()) {
return Status::Invalid("AzureOptions doesn't contain a valid account name");
}
Expand All @@ -126,27 +141,21 @@ Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_ke
Status AzureOptions::ConfigureClientSecretCredential(const std::string& tenant_id,
const std::string& client_id,
const std::string& client_secret) {
credential_kind_ = CredentialKind::kTokenCredential;
credential_kind_ = CredentialKind::kClientSecret;
token_credential_ = std::make_shared<Azure::Identity::ClientSecretCredential>(
tenant_id, client_id, client_secret);
return Status::OK();
}

Status AzureOptions::ConfigureDefaultCredential() {
credential_kind_ = CredentialKind::kTokenCredential;
token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
return Status::OK();
}

Status AzureOptions::ConfigureManagedIdentityCredential(const std::string& client_id) {
credential_kind_ = CredentialKind::kTokenCredential;
credential_kind_ = CredentialKind::kManagedIdentity;
token_credential_ =
std::make_shared<Azure::Identity::ManagedIdentityCredential>(client_id);
return Status::OK();
}

Status AzureOptions::ConfigureWorkloadIdentityCredential() {
credential_kind_ = CredentialKind::kTokenCredential;
credential_kind_ = CredentialKind::kWorkloadIdentity;
token_credential_ = std::make_shared<Azure::Identity::WorkloadIdentityCredential>();
return Status::OK();
}
Expand All @@ -158,11 +167,18 @@ Result<std::unique_ptr<Blobs::BlobServiceClient>> AzureOptions::MakeBlobServiceC
}
switch (credential_kind_) {
case CredentialKind::kAnonymous:
break;
case CredentialKind::kTokenCredential:
return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name));
case CredentialKind::kDefault:
if (!token_credential_) {
token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
}
[[fallthrough]];
case CredentialKind::kClientSecret:
case CredentialKind::kManagedIdentity:
case CredentialKind::kWorkloadIdentity:
return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name),
token_credential_);
case CredentialKind::kStorageSharedKeyCredential:
case CredentialKind::kStorageSharedKey:
return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name),
storage_shared_key_credential_);
}
Expand All @@ -176,11 +192,19 @@ AzureOptions::MakeDataLakeServiceClient() const {
}
switch (credential_kind_) {
case CredentialKind::kAnonymous:
break;
case CredentialKind::kTokenCredential:
return std::make_unique<DataLake::DataLakeServiceClient>(
AccountDfsUrl(account_name));
case CredentialKind::kDefault:
if (!token_credential_) {
token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
}
[[fallthrough]];
case CredentialKind::kClientSecret:
case CredentialKind::kManagedIdentity:
case CredentialKind::kWorkloadIdentity:
return std::make_unique<DataLake::DataLakeServiceClient>(
AccountDfsUrl(account_name), token_credential_);
case CredentialKind::kStorageSharedKeyCredential:
case CredentialKind::kStorageSharedKey:
return std::make_unique<DataLake::DataLakeServiceClient>(
AccountDfsUrl(account_name), storage_shared_key_credential_);
}
Expand Down
37 changes: 26 additions & 11 deletions cpp/src/arrow/filesystem/azurefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,23 @@ namespace arrow::fs {
class TestAzureFileSystem;

/// Options for the AzureFileSystem implementation.
///
/// By default, authentication is handled by the Azure SDK's credential chain
/// which may read from multiple environment variables, such as:
/// - `AZURE_TENANT_ID`
/// - `AZURE_CLIENT_ID`
/// - `AZURE_CLIENT_SECRET`
/// - `AZURE_AUTHORITY_HOST`
/// - `AZURE_CLIENT_CERTIFICATE_PATH`
/// - `AZURE_FEDERATED_TOKEN_FILE`
///
/// Functions are provided for explicit configuration of credentials if that is preferred.
struct ARROW_EXPORT AzureOptions {
/// \brief account name of the Azure Storage account.
/// \brief The name of the Azure Storage Account being accessed.
///
/// All service URLs will be constructed using this storage account name.
/// `ConfigureAccountKeyCredential` assumes the user wants to authenticate
/// this account.
std::string account_name;

/// \brief hostname[:port] of the Azure Blob Storage Service.
Expand Down Expand Up @@ -92,30 +107,30 @@ struct ARROW_EXPORT AzureOptions {

private:
enum class CredentialKind {
kDefault,
kAnonymous,
kTokenCredential,
kStorageSharedKeyCredential,
} credential_kind_ = CredentialKind::kAnonymous;
kStorageSharedKey,
kClientSecret,
kManagedIdentity,
kWorkloadIdentity,
Comment on lines +112 to +115
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we really need to change this. I would rather just keep kTokenCredential to cover all the credentials that are based on https://github.com/Azure/azure-sdk-for-cpp/blob/e5e675440b44ace7d7a9e7bc303f877c06b59ea5/sdk/core/azure-core/inc/azure/core/credentials/credentials.hpp#L68

Copy link
Contributor Author

@felipecrv felipecrv Jan 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need this to support Equals. Think of it as runtime type-information that describes which concrete implementation of TokenCredential is being used.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Besides that, we would need to make a distinction between kDefault and all the others kToken at least (to support the implicit default behavior). It's clearer if we then make a distinction on all of them.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I would have done it differently but I don't feel strongly.

} credential_kind_ = CredentialKind::kDefault;

std::shared_ptr<Azure::Core::Credentials::TokenCredential> token_credential_;
std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
storage_shared_key_credential_;
mutable std::shared_ptr<Azure::Core::Credentials::TokenCredential> token_credential_;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about always creating Azure::Identity::DefaultAzureCredential instead of using mutable?

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index e98d56c73..5099cd25d 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -110,7 +110,6 @@ std::string AzureOptions::AccountDfsUrl(const std::string& account_name) const {
 
 Status AzureOptions::ConfigureDefaultCredential() {
   credential_kind_ = CredentialKind::kDefault;
-  token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
   return Status::OK();
 }
 
@@ -160,10 +159,9 @@ Result<std::unique_ptr<Blobs::BlobServiceClient>> AzureOptions::MakeBlobServiceC
     case CredentialKind::kAnonymous:
       return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name));
     case CredentialKind::kDefault:
-      if (!token_credential_) {
-        token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
-      }
-      [[fallthrough]];
+      return std::make_unique<Blobs::BlobServiceClient>(
+          AccountBlobUrl(account_name),
+          std::make_shared<Azure::Identity::DefaultAzureCredential>());
     case CredentialKind::kClientSecret:
     case CredentialKind::kManagedIdentity:
     case CredentialKind::kWorkloadIdentity:
@@ -186,10 +184,9 @@ AzureOptions::MakeDataLakeServiceClient() const {
       return std::make_unique<DataLake::DataLakeServiceClient>(
           AccountDfsUrl(account_name));
     case CredentialKind::kDefault:
-      if (!token_credential_) {
-        token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
-      }
-      [[fallthrough]];
+      return std::make_unique<DataLake::DataLakeServiceClient>(
+          AccountDfsUrl(account_name),
+          std::make_shared<Azure::Identity::DefaultAzureCredential>());
     case CredentialKind::kClientSecret:
     case CredentialKind::kManagedIdentity:
     case CredentialKind::kWorkloadIdentity:
diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h
index 55f89ba47..ba612f799 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -117,7 +117,7 @@ struct ARROW_EXPORT AzureOptions {
 
   std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
       storage_shared_key_credential_;
-  mutable std::shared_ptr<Azure::Core::Credentials::TokenCredential> token_credential_;
+  std::shared_ptr<Azure::Core::Credentials::TokenCredential> token_credential_;
 
  public:
   AzureOptions();

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Creating DefaultAzureCredential is a lot of non-trivial work: env vars, multiple allocations and probes. We can't assume it to be a cheap operation. mutable is a common solution to lazily-initialized class members and this one has a very well-defined behavior: it's set once, then it doesn't change at all anymore.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And it's important that we do mutate it in ConfigureDefaultCredential because the user might want to explicitly initialize the credential immediately after doing something with the env variables used by the Azure SDK or right before unsetting the variables.


public:
AzureOptions();
~AzureOptions();

Status ConfigureDefaultCredential();

Status ConfigureManagedIdentityCredential(const std::string& client_id = std::string());

Status ConfigureWorkloadIdentityCredential();

Status ConfigureAnonymousCredential();
Status ConfigureAccountKeyCredential(const std::string& account_key);

Status ConfigureClientSecretCredential(const std::string& tenant_id,
const std::string& client_id,
const std::string& client_secret);
Status ConfigureManagedIdentityCredential(const std::string& client_id = std::string());
Status ConfigureWorkloadIdentityCredential();

bool Equals(const AzureOptions& other) const;

Expand Down
30 changes: 23 additions & 7 deletions cpp/src/arrow/filesystem/azurefs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -280,22 +280,38 @@ TEST(AzureFileSystem, InitializingFilesystemWithoutAccountNameFails) {
ASSERT_RAISES(Invalid, AzureFileSystem::Make(options));
}

TEST(AzureFileSystem, InitializeFilesystemWithClientSecretCredential) {
TEST(AzureFileSystem, InitializeWithDefaultCredential) {
AzureOptions options;
options.account_name = "dummy-account-name";
ARROW_EXPECT_OK(
options.ConfigureClientSecretCredential("tenant_id", "client_id", "client_secret"));
ARROW_EXPECT_OK(options.ConfigureDefaultCredential());
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
}

TEST(AzureFileSystem, InitializeFilesystemWithDefaultCredential) {
TEST(AzureFileSystem, InitializeWithDefaultCredentialImplicitly) {
AzureOptions options;
options.account_name = "dummy-account-name";
ARROW_EXPECT_OK(options.ConfigureDefaultCredential());
AzureOptions explictly_default_options;
explictly_default_options.account_name = "dummy-account-name";
ARROW_EXPECT_OK(explictly_default_options.ConfigureDefaultCredential());
ASSERT_TRUE(options.Equals(explictly_default_options));
}

TEST(AzureFileSystem, InitializeWithAnonymousCredential) {
AzureOptions options;
options.account_name = "dummy-account-name";
ARROW_EXPECT_OK(options.ConfigureAnonymousCredential());
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
}

TEST(AzureFileSystem, InitializeWithClientSecretCredential) {
AzureOptions options;
options.account_name = "dummy-account-name";
ARROW_EXPECT_OK(
options.ConfigureClientSecretCredential("tenant_id", "client_id", "client_secret"));
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
}

TEST(AzureFileSystem, InitializeFilesystemWithManagedIdentityCredential) {
TEST(AzureFileSystem, InitializeWithManagedIdentityCredential) {
AzureOptions options;
options.account_name = "dummy-account-name";
ARROW_EXPECT_OK(options.ConfigureManagedIdentityCredential());
Expand All @@ -305,7 +321,7 @@ TEST(AzureFileSystem, InitializeFilesystemWithManagedIdentityCredential) {
EXPECT_OK_AND_ASSIGN(fs, AzureFileSystem::Make(options));
}

TEST(AzureFileSystem, InitializeFilesystemWithWorkloadIdentityCredential) {
TEST(AzureFileSystem, InitializeWithWorkloadIdentityCredential) {
AzureOptions options;
options.account_name = "dummy-account-name";
ARROW_EXPECT_OK(options.ConfigureWorkloadIdentityCredential());
Expand Down