From 2d0b1833961f8eca9176d7797afe35d97c4748bf Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 18 Apr 2024 16:21:09 +0900 Subject: [PATCH 1/4] GH-41095: [C++][FS][Azure] Add support for CopyFile with hierarchical namespace support We need to add SAS (Shared Access Signatures) token for source URL. --- cpp/src/arrow/filesystem/azurefs.cc | 45 ++++++++++++++++++++++++----- cpp/src/arrow/filesystem/azurefs.h | 7 +++++ 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index bb8309a2474..8d7e62c1e80 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -277,6 +277,13 @@ std::string BuildBaseUrl(const std::string& scheme, const std::string& authority url += "/"; return url; } + +template +Status ExceptionToStatus(const Storage::StorageException& exception, + PrefixArgs&&... prefix_args) { + return Status::IOError(std::forward(prefix_args)..., " Azure Error: [", + exception.ErrorCode, "] ", exception.what()); +} } // namespace std::string AzureOptions::AccountBlobUrl(const std::string& account_name) const { @@ -381,6 +388,24 @@ AzureOptions::MakeDataLakeServiceClient() const { return Status::Invalid("AzureOptions doesn't contain a valid auth configuration"); } +Result AzureOptions::GenerateSASToken( + Storage::Sas::BlobSasBuilder* builder) const { + if (storage_shared_key_credential_) { + return builder->GenerateSasToken(*storage_shared_key_credential_); + } else { + // This part isn't tested. This may not work. + ARROW_ASSIGN_OR_RAISE(auto client, MakeBlobServiceClient()); + try { + auto delegation_key_response = client->GetUserDelegationKey(builder->ExpiresOn); + + return builder->GenerateSasToken(delegation_key_response.Value, account_name); + } catch (const Storage::StorageException& exception) { + return ExceptionToStatus(exception, "GetUserDelegationKey failed for '", + client->GetUrl(), "'."); + } + } +} + namespace { // An AzureFileSystem represents an Azure storage account. An AzureLocation describes a @@ -450,13 +475,6 @@ struct AzureLocation { } }; -template -Status ExceptionToStatus(const Storage::StorageException& exception, - PrefixArgs&&... prefix_args) { - return Status::IOError(std::forward(prefix_args)..., " Azure Error: [", - exception.ErrorCode, "] ", exception.what()); -} - Status PathNotFound(const AzureLocation& location) { return ::arrow::fs::internal::PathNotFound(location.all); } @@ -2868,8 +2886,19 @@ class AzureFileSystem::Impl { if (src == dest) { return Status::OK(); } + std::string sas_token; + { + Storage::Sas::BlobSasBuilder builder; + std::chrono::seconds available_period(60); + builder.ExpiresOn = std::chrono::system_clock::now() + available_period; + builder.BlobContainerName = src.container; + builder.BlobName = src.path; + builder.Resource = Storage::Sas::BlobSasResource::Blob; + builder.SetPermissions(Storage::Sas::BlobSasPermissions::Read); + ARROW_ASSIGN_OR_RAISE(sas_token, options_.GenerateSASToken(&builder)); + } + auto src_url = GetBlobClient(src.container, src.path).GetUrl() + sas_token; auto dest_blob_client = GetBlobClient(dest.container, dest.path); - auto src_url = GetBlobClient(src.container, src.path).GetUrl(); if (!dest.path.empty()) { auto dest_parent = dest.parent(); if (!dest_parent.path.empty()) { diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index 350014954f0..5c75c31cc87 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -37,6 +37,10 @@ namespace Azure::Storage::Blobs { class BlobServiceClient; } +namespace Azure::Storage::Sas { +class BlobSasBuilder; +} + namespace Azure::Storage::Files::DataLake { class DataLakeFileSystemClient; class DataLakeServiceClient; @@ -196,6 +200,9 @@ struct ARROW_EXPORT AzureOptions { Result> MakeDataLakeServiceClient() const; + + Result GenerateSASToken( + Azure::Storage::Sas::BlobSasBuilder* builder) const; }; /// \brief FileSystem implementation backed by Azure Blob Storage (ABS) [1] and From 7fa1fbf5fb0b289710376db91050a21c4a296b13 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 19 Apr 2024 06:23:49 +0900 Subject: [PATCH 2/4] Pass Blobs::BlobServiceClient --- cpp/src/arrow/filesystem/azurefs.cc | 6 +++--- cpp/src/arrow/filesystem/azurefs.h | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 8d7e62c1e80..f6c0cc4f4c7 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -389,12 +389,11 @@ AzureOptions::MakeDataLakeServiceClient() const { } Result AzureOptions::GenerateSASToken( - Storage::Sas::BlobSasBuilder* builder) const { + Storage::Sas::BlobSasBuilder* builder, Blobs::BlobServiceClient* client) const { if (storage_shared_key_credential_) { return builder->GenerateSasToken(*storage_shared_key_credential_); } else { // This part isn't tested. This may not work. - ARROW_ASSIGN_OR_RAISE(auto client, MakeBlobServiceClient()); try { auto delegation_key_response = client->GetUserDelegationKey(builder->ExpiresOn); @@ -2895,7 +2894,8 @@ class AzureFileSystem::Impl { builder.BlobName = src.path; builder.Resource = Storage::Sas::BlobSasResource::Blob; builder.SetPermissions(Storage::Sas::BlobSasPermissions::Read); - ARROW_ASSIGN_OR_RAISE(sas_token, options_.GenerateSASToken(&builder)); + ARROW_ASSIGN_OR_RAISE( + sas_token, options_.GenerateSASToken(&builder, blob_service_client_.get())); } auto src_url = GetBlobClient(src.container, src.path).GetUrl() + sas_token; auto dest_blob_client = GetBlobClient(dest.container, dest.path); diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index 5c75c31cc87..667b4e372ae 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -38,7 +38,7 @@ class BlobServiceClient; } namespace Azure::Storage::Sas { -class BlobSasBuilder; +struct BlobSasBuilder; } namespace Azure::Storage::Files::DataLake { @@ -202,7 +202,8 @@ struct ARROW_EXPORT AzureOptions { MakeDataLakeServiceClient() const; Result GenerateSASToken( - Azure::Storage::Sas::BlobSasBuilder* builder) const; + Azure::Storage::Sas::BlobSasBuilder* builder, + Azure::Storage::Blobs::BlobServiceClient* client) const; }; /// \brief FileSystem implementation backed by Azure Blob Storage (ABS) [1] and From 6631c57ce7e34a8597b9dca85c78f35cd22c99eb Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sun, 21 Apr 2024 12:37:22 +0900 Subject: [PATCH 3/4] Add TODO --- cpp/src/arrow/filesystem/azurefs.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index f6c0cc4f4c7..2dfdbb7a740 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -393,7 +393,7 @@ Result AzureOptions::GenerateSASToken( if (storage_shared_key_credential_) { return builder->GenerateSasToken(*storage_shared_key_credential_); } else { - // This part isn't tested. This may not work. + // GH-39344: This part isn't tested. This may not work. try { auto delegation_key_response = client->GetUserDelegationKey(builder->ExpiresOn); From a0be2e8cf7f1c46d57d6a89cf091e3ef2387b521 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sun, 21 Apr 2024 12:38:07 +0900 Subject: [PATCH 4/4] Remove needless new line --- cpp/src/arrow/filesystem/azurefs.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 2dfdbb7a740..ac563b13458 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -396,7 +396,6 @@ Result AzureOptions::GenerateSASToken( // GH-39344: This part isn't tested. This may not work. try { auto delegation_key_response = client->GetUserDelegationKey(builder->ExpiresOn); - return builder->GenerateSasToken(delegation_key_response.Value, account_name); } catch (const Storage::StorageException& exception) { return ExceptionToStatus(exception, "GetUserDelegationKey failed for '",