From fdf593102bce8363f3e15a8f8e9f5eba4ce0509d Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Wed, 3 Jan 2024 22:00:42 -0300 Subject: [PATCH 1/6] GH-39449: [C++] Use default Azure credentials implicitly and support anonymous credentials explictly --- cpp/src/arrow/filesystem/azurefs.cc | 39 +++++++++++++++++------- cpp/src/arrow/filesystem/azurefs.h | 20 ++++++------ cpp/src/arrow/filesystem/azurefs_test.cc | 24 +++++++++++---- 3 files changed, 55 insertions(+), 28 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 9569eff2e47..eb7e363e629 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -73,7 +73,8 @@ bool AzureOptions::Equals(const AzureOptions& other) const { return false; } switch (credential_kind_) { - case CredentialKind::kAnonymous: + case CredentialKind::kDefaultCredential: + case CredentialKind::kAnonymousCredential: return true; case CredentialKind::kTokenCredential: return token_credential_ == other.token_credential_; @@ -113,6 +114,17 @@ std::string AzureOptions::AccountDfsUrl(const std::string& account_name) const { return BuildBaseUrl(dfs_storage_scheme, dfs_storage_authority, account_name); } +Status AzureOptions::ConfigureDefaultCredential() { + credential_kind_ = CredentialKind::kDefaultCredential; + token_credential_ = std::make_shared(); + return Status::OK(); +} + +Status AzureOptions::ConfigureAnonymousCredential() { + credential_kind_ = CredentialKind::kAnonymousCredential; + return Status::OK(); +} + Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_key) { credential_kind_ = CredentialKind::kStorageSharedKeyCredential; if (account_name.empty()) { @@ -132,12 +144,6 @@ Status AzureOptions::ConfigureClientSecretCredential(const std::string& tenant_i return Status::OK(); } -Status AzureOptions::ConfigureDefaultCredential() { - credential_kind_ = CredentialKind::kTokenCredential; - token_credential_ = std::make_shared(); - return Status::OK(); -} - Status AzureOptions::ConfigureManagedIdentityCredential(const std::string& client_id) { credential_kind_ = CredentialKind::kTokenCredential; token_credential_ = @@ -157,8 +163,13 @@ Result> AzureOptions::MakeBlobServiceC return Status::Invalid("AzureOptions doesn't contain a valid account name"); } switch (credential_kind_) { - case CredentialKind::kAnonymous: - break; + case CredentialKind::kAnonymousCredential: + return std::make_unique(AccountBlobUrl(account_name)); + case CredentialKind::kDefaultCredential: + if (!token_credential_) { + token_credential_ = std::make_shared(); + } + [[fallthrough]]; case CredentialKind::kTokenCredential: return std::make_unique(AccountBlobUrl(account_name), token_credential_); @@ -175,8 +186,14 @@ AzureOptions::MakeDataLakeServiceClient() const { return Status::Invalid("AzureOptions doesn't contain a valid account name"); } switch (credential_kind_) { - case CredentialKind::kAnonymous: - break; + case CredentialKind::kAnonymousCredential: + return std::make_unique( + AccountDfsUrl(account_name)); + case CredentialKind::kDefaultCredential: + if (!token_credential_) { + token_credential_ = std::make_shared(); + } + [[fallthrough]]; case CredentialKind::kTokenCredential: return std::make_unique( AccountDfsUrl(account_name), token_credential_); diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index 78e0a8148c6..a3a8331d60c 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -48,7 +48,7 @@ class TestAzureFileSystem; /// Options for the AzureFileSystem implementation. struct ARROW_EXPORT AzureOptions { - /// \brief account name of the Azure Storage account. + /// \brief Name of the Azure Storage Account. std::string account_name; /// \brief hostname[:port] of the Azure Blob Storage Service. @@ -92,30 +92,28 @@ struct ARROW_EXPORT AzureOptions { private: enum class CredentialKind { - kAnonymous, - kTokenCredential, + kDefaultCredential, + kAnonymousCredential, kStorageSharedKeyCredential, - } credential_kind_ = CredentialKind::kAnonymous; + kTokenCredential, + } credential_kind_ = CredentialKind::kDefaultCredential; - std::shared_ptr token_credential_; std::shared_ptr storage_shared_key_credential_; + mutable std::shared_ptr token_credential_; public: AzureOptions(); ~AzureOptions(); Status ConfigureDefaultCredential(); - - Status ConfigureManagedIdentityCredential(const std::string& client_id = std::string()); - - Status ConfigureWorkloadIdentityCredential(); - + Status ConfigureAnonymousCredential(); Status ConfigureAccountKeyCredential(const std::string& account_key); - Status ConfigureClientSecretCredential(const std::string& tenant_id, const std::string& client_id, const std::string& client_secret); + Status ConfigureManagedIdentityCredential(const std::string& client_id = std::string()); + Status ConfigureWorkloadIdentityCredential(); bool Equals(const AzureOptions& other) const; diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index ff94578b041..31f3b643200 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -280,22 +280,34 @@ TEST(AzureFileSystem, InitializingFilesystemWithoutAccountNameFails) { ASSERT_RAISES(Invalid, AzureFileSystem::Make(options)); } -TEST(AzureFileSystem, InitializeFilesystemWithClientSecretCredential) { +TEST(AzureFileSystem, InitializeWithDefaultCredential) { AzureOptions options; options.account_name = "dummy-account-name"; - ARROW_EXPECT_OK( - options.ConfigureClientSecretCredential("tenant_id", "client_id", "client_secret")); + ARROW_EXPECT_OK(options.ConfigureDefaultCredential()); EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options)); } -TEST(AzureFileSystem, InitializeFilesystemWithDefaultCredential) { +TEST(AzureFileSystem, InitializeWithDefaultCredentialImplicitly) { AzureOptions options; options.account_name = "dummy-account-name"; ARROW_EXPECT_OK(options.ConfigureDefaultCredential()); EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options)); + + AzureOptions explictly_default_options; + explictly_default_options.account_name = "dummy-account-name"; + ARROW_EXPECT_OK(explictly_default_options.ConfigureDefaultCredential()); + ASSERT_TRUE(options.Equals(explictly_default_options)); +} + +TEST(AzureFileSystem, InitializeWithClientSecretCredential) { + AzureOptions options; + options.account_name = "dummy-account-name"; + ARROW_EXPECT_OK( + options.ConfigureClientSecretCredential("tenant_id", "client_id", "client_secret")); + EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options)); } -TEST(AzureFileSystem, InitializeFilesystemWithManagedIdentityCredential) { +TEST(AzureFileSystem, InitializeWithManagedIdentityCredential) { AzureOptions options; options.account_name = "dummy-account-name"; ARROW_EXPECT_OK(options.ConfigureManagedIdentityCredential()); @@ -305,7 +317,7 @@ TEST(AzureFileSystem, InitializeFilesystemWithManagedIdentityCredential) { EXPECT_OK_AND_ASSIGN(fs, AzureFileSystem::Make(options)); } -TEST(AzureFileSystem, InitializeFilesystemWithWorkloadIdentityCredential) { +TEST(AzureFileSystem, InitializeWithWorkloadIdentityCredential) { AzureOptions options; options.account_name = "dummy-account-name"; ARROW_EXPECT_OK(options.ConfigureWorkloadIdentityCredential()); From 3e2adff00e99a9e03247b280fc84f875164d6b7a Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Wed, 3 Jan 2024 22:43:54 -0300 Subject: [PATCH 2/6] Add one enum entry per kind of credential configuration --- cpp/src/arrow/filesystem/azurefs.cc | 45 +++++++++++++++++------------ cpp/src/arrow/filesystem/azurefs.h | 12 ++++---- 2 files changed, 33 insertions(+), 24 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index eb7e363e629..730adabd48b 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -73,14 +73,17 @@ bool AzureOptions::Equals(const AzureOptions& other) const { return false; } switch (credential_kind_) { - case CredentialKind::kDefaultCredential: - case CredentialKind::kAnonymousCredential: + case CredentialKind::kDefault: + case CredentialKind::kAnonymous: return true; - case CredentialKind::kTokenCredential: - return token_credential_ == other.token_credential_; - case CredentialKind::kStorageSharedKeyCredential: + case CredentialKind::kStorageSharedKey: return storage_shared_key_credential_->AccountName == other.storage_shared_key_credential_->AccountName; + case CredentialKind::kClientSecret: + case CredentialKind::kManagedIdentity: + case CredentialKind::kWorkloadIdentity: + return token_credential_->GetCredentialName() == + other.token_credential_->GetCredentialName(); } DCHECK(false); return false; @@ -115,18 +118,18 @@ std::string AzureOptions::AccountDfsUrl(const std::string& account_name) const { } Status AzureOptions::ConfigureDefaultCredential() { - credential_kind_ = CredentialKind::kDefaultCredential; + credential_kind_ = CredentialKind::kDefault; token_credential_ = std::make_shared(); return Status::OK(); } Status AzureOptions::ConfigureAnonymousCredential() { - credential_kind_ = CredentialKind::kAnonymousCredential; + credential_kind_ = CredentialKind::kAnonymous; return Status::OK(); } Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_key) { - credential_kind_ = CredentialKind::kStorageSharedKeyCredential; + credential_kind_ = CredentialKind::kStorageSharedKey; if (account_name.empty()) { return Status::Invalid("AzureOptions doesn't contain a valid account name"); } @@ -138,21 +141,21 @@ Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_ke Status AzureOptions::ConfigureClientSecretCredential(const std::string& tenant_id, const std::string& client_id, const std::string& client_secret) { - credential_kind_ = CredentialKind::kTokenCredential; + credential_kind_ = CredentialKind::kClientSecret; token_credential_ = std::make_shared( tenant_id, client_id, client_secret); return Status::OK(); } Status AzureOptions::ConfigureManagedIdentityCredential(const std::string& client_id) { - credential_kind_ = CredentialKind::kTokenCredential; + credential_kind_ = CredentialKind::kManagedIdentity; token_credential_ = std::make_shared(client_id); return Status::OK(); } Status AzureOptions::ConfigureWorkloadIdentityCredential() { - credential_kind_ = CredentialKind::kTokenCredential; + credential_kind_ = CredentialKind::kWorkloadIdentity; token_credential_ = std::make_shared(); return Status::OK(); } @@ -163,17 +166,19 @@ Result> AzureOptions::MakeBlobServiceC return Status::Invalid("AzureOptions doesn't contain a valid account name"); } switch (credential_kind_) { - case CredentialKind::kAnonymousCredential: + case CredentialKind::kAnonymous: return std::make_unique(AccountBlobUrl(account_name)); - case CredentialKind::kDefaultCredential: + case CredentialKind::kDefault: if (!token_credential_) { token_credential_ = std::make_shared(); } [[fallthrough]]; - case CredentialKind::kTokenCredential: + case CredentialKind::kClientSecret: + case CredentialKind::kManagedIdentity: + case CredentialKind::kWorkloadIdentity: return std::make_unique(AccountBlobUrl(account_name), token_credential_); - case CredentialKind::kStorageSharedKeyCredential: + case CredentialKind::kStorageSharedKey: return std::make_unique(AccountBlobUrl(account_name), storage_shared_key_credential_); } @@ -186,18 +191,20 @@ AzureOptions::MakeDataLakeServiceClient() const { return Status::Invalid("AzureOptions doesn't contain a valid account name"); } switch (credential_kind_) { - case CredentialKind::kAnonymousCredential: + case CredentialKind::kAnonymous: return std::make_unique( AccountDfsUrl(account_name)); - case CredentialKind::kDefaultCredential: + case CredentialKind::kDefault: if (!token_credential_) { token_credential_ = std::make_shared(); } [[fallthrough]]; - case CredentialKind::kTokenCredential: + case CredentialKind::kClientSecret: + case CredentialKind::kManagedIdentity: + case CredentialKind::kWorkloadIdentity: return std::make_unique( AccountDfsUrl(account_name), token_credential_); - case CredentialKind::kStorageSharedKeyCredential: + case CredentialKind::kStorageSharedKey: return std::make_unique( AccountDfsUrl(account_name), storage_shared_key_credential_); } diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index a3a8331d60c..011f1a884f3 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -92,11 +92,13 @@ struct ARROW_EXPORT AzureOptions { private: enum class CredentialKind { - kDefaultCredential, - kAnonymousCredential, - kStorageSharedKeyCredential, - kTokenCredential, - } credential_kind_ = CredentialKind::kDefaultCredential; + kDefault, + kAnonymous, + kStorageSharedKey, + kClientSecret, + kManagedIdentity, + kWorkloadIdentity, + } credential_kind_ = CredentialKind::kDefault; std::shared_ptr storage_shared_key_credential_; From ea37431b98759149b934938faf89401885a57189 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Thu, 4 Jan 2024 12:41:14 -0300 Subject: [PATCH 3/6] Expand explanation of AzureOptions::account_name --- cpp/src/arrow/filesystem/azurefs.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index 011f1a884f3..784c8a5a6e0 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -48,7 +48,11 @@ class TestAzureFileSystem; /// Options for the AzureFileSystem implementation. struct ARROW_EXPORT AzureOptions { - /// \brief Name of the Azure Storage Account. + /// \brief The name of the Azure Storage Account being accessed. + /// + /// All service URLs will be constructed using this storage account name. + /// `ConfigureAccountKeyCredential` assumes the user wants to authenticate + /// this account. std::string account_name; /// \brief hostname[:port] of the Azure Blob Storage Service. From 90d457dc0200d582db33bc975d428678fab0f22b Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Thu, 4 Jan 2024 12:42:55 -0300 Subject: [PATCH 4/6] Add test for ConfigureAnonymousCredential() --- cpp/src/arrow/filesystem/azurefs_test.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 31f3b643200..0c905f0409b 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -299,6 +299,13 @@ TEST(AzureFileSystem, InitializeWithDefaultCredentialImplicitly) { ASSERT_TRUE(options.Equals(explictly_default_options)); } +TEST(AzureFileSystem, InitializeWithAnonymousCredential) { + AzureOptions options; + options.account_name = "dummy-account-name"; + ARROW_EXPECT_OK(options.ConfigureAnonymousCredential()); + EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options)); +} + TEST(AzureFileSystem, InitializeWithClientSecretCredential) { AzureOptions options; options.account_name = "dummy-account-name"; From 32d16900a5416e41b73efed759274591391061d5 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Thu, 4 Jan 2024 12:57:04 -0300 Subject: [PATCH 5/6] Explain authentication in the AzureOptions docstring --- cpp/src/arrow/filesystem/azurefs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index 784c8a5a6e0..55f89ba4776 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -47,6 +47,17 @@ namespace arrow::fs { class TestAzureFileSystem; /// Options for the AzureFileSystem implementation. +/// +/// By default, authentication is handled by the Azure SDK's credential chain +/// which may read from multiple environment variables, such as: +/// - `AZURE_TENANT_ID` +/// - `AZURE_CLIENT_ID` +/// - `AZURE_CLIENT_SECRET` +/// - `AZURE_AUTHORITY_HOST` +/// - `AZURE_CLIENT_CERTIFICATE_PATH` +/// - `AZURE_FEDERATED_TOKEN_FILE` +/// +/// Functions are provided for explicit configuration of credentials if that is preferred. struct ARROW_EXPORT AzureOptions { /// \brief The name of the Azure Storage Account being accessed. /// From 3460eb7cfba41c8a4230044e11bf2cc8af232894 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Thu, 4 Jan 2024 21:12:59 -0300 Subject: [PATCH 6/6] Fix unit test Co-authored-by: Sutou Kouhei --- cpp/src/arrow/filesystem/azurefs_test.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 0c905f0409b..6104b04411b 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -290,9 +290,6 @@ TEST(AzureFileSystem, InitializeWithDefaultCredential) { TEST(AzureFileSystem, InitializeWithDefaultCredentialImplicitly) { AzureOptions options; options.account_name = "dummy-account-name"; - ARROW_EXPECT_OK(options.ConfigureDefaultCredential()); - EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options)); - AzureOptions explictly_default_options; explictly_default_options.account_name = "dummy-account-name"; ARROW_EXPECT_OK(explictly_default_options.ConfigureDefaultCredential());