From 704b69ec8b5861d745bea0f5d6301cfc3f7c0729 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sun, 3 Mar 2024 18:28:40 +0900 Subject: [PATCH 1/9] GH-40028: [C++][FS][Azure] Add AzureFileSystem support to FileSystemFromUri() --- cpp/src/arrow/filesystem/azurefs.cc | 127 +++++++++++++ cpp/src/arrow/filesystem/azurefs.h | 8 + cpp/src/arrow/filesystem/azurefs_test.cc | 223 +++++++++++++++++++++++ cpp/src/arrow/filesystem/filesystem.cc | 18 +- 4 files changed, 373 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 8ae33b88188..8df8f0ee1aa 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -65,6 +65,133 @@ AzureOptions::AzureOptions() = default; AzureOptions::~AzureOptions() = default; +Result AzureOptions::FromUri(const arrow::internal::Uri& uri, + std::string* out_path) { + AzureOptions options; + const auto host = uri.host(); + std::string container; + std::string path; + if (arrow::internal::EndsWith(host, options.blob_storage_authority)) { + options.account_name = + host.substr(0, host.size() - options.blob_storage_authority.size()); + auto components = internal::SplitAbstractPath(uri.path()); + if (!components.empty()) { + container = components[0]; + path = internal::JoinAbstractPath(components.begin() + 1, components.end()); + } + } else if (arrow::internal::EndsWith(host, options.dfs_storage_authority)) { + options.account_name = + host.substr(0, host.size() - options.dfs_storage_authority.size()); + container = uri.username(); + path = uri.path(); + } else { + options.account_name = uri.username(); + std::string host_port = host; + const auto port_text = uri.port_text(); + if (!port_text.empty()) { + host_port += ":" + port_text; + } + options.blob_storage_authority = host_port; + options.dfs_storage_authority = host_port; + if (uri.scheme() == "abfs") { + options.blob_storage_scheme = "http"; + options.dfs_storage_scheme = "http"; + } + auto components = internal::SplitAbstractPath(uri.path()); + if (!components.empty()) { + container = components[0]; + path = internal::JoinAbstractPath(components.begin() + 1, components.end()); + } + } + const auto account_key = uri.password(); + + if (container.empty()) { + return Status::Invalid("Missing container name in Azure Blob File System URI"); + } + if (out_path != nullptr) { + *out_path = std::string(internal::ConcatAbstractPath(container, path)); + } + + std::unordered_map options_map; + ARROW_ASSIGN_OR_RAISE(const auto options_items, uri.query_items()); + for (const auto& kv : options_items) { + options_map.emplace(kv.first, kv.second); + } + + CredentialKind credential_kind = options.account_name.empty() + ? CredentialKind::kAnonymous + : CredentialKind::kDefault; + std::string tenant_id; + std::string client_id; + std::string client_secret; + for (const auto& kv : options_map) { + if (kv.first == "blob_storage_authority") { + options.blob_storage_authority = kv.second; + } else if (kv.first == "dfs_storage_authority") { + options.dfs_storage_authority = kv.second; + } else if (kv.first == "blob_storage_scheme") { + options.blob_storage_scheme = kv.second; + } else if (kv.first == "dfs_storage_scheme") { + options.dfs_storage_scheme = kv.second; + } else if (kv.first == "credential_kind") { + if (kv.second == "default") { + credential_kind = CredentialKind::kDefault; + } else if (kv.second == "anonymous") { + credential_kind = CredentialKind::kAnonymous; + } else if (kv.second == "storage_shared_key") { + credential_kind = CredentialKind::kStorageSharedKey; + } else if (kv.second == "client_secret") { + credential_kind = CredentialKind::kClientSecret; + } else if (kv.second == "managed_identity") { + credential_kind = CredentialKind::kManagedIdentity; + } else if (kv.second == "workload_identity") { + credential_kind = CredentialKind::kWorkloadIdentity; + } else { + return Status::Invalid("Unexpected credential_kind: '", kv.second, "'"); + } + } else if (kv.first == "tenant_id") { + tenant_id = kv.second; + } else if (kv.first == "client_id") { + client_id = kv.second; + } else if (kv.first == "client_secret") { + client_secret = kv.second; + } else { + return Status::Invalid( + "Unexpected query parameter in Azure Blob File System URI: '", kv.first, "'"); + } + } + + switch (credential_kind) { + case CredentialKind::kDefault: + break; + case CredentialKind::kAnonymous: + RETURN_NOT_OK(options.ConfigureAnonymousCredential()); + break; + case CredentialKind::kStorageSharedKey: + RETURN_NOT_OK(options.ConfigureAccountKeyCredential(account_key)); + break; + case CredentialKind::kClientSecret: + RETURN_NOT_OK( + options.ConfigureClientSecretCredential(tenant_id, client_id, client_secret)); + break; + case CredentialKind::kManagedIdentity: + RETURN_NOT_OK(options.ConfigureManagedIdentityCredential(client_id)); + break; + case CredentialKind::kWorkloadIdentity: + RETURN_NOT_OK(options.ConfigureWorkloadIdentityCredential()); + break; + } + + return options; +} + +Result AzureOptions::FromUri(const std::string& uri_string, + std::string* out_path) { + arrow::internal::Uri uri; + RETURN_NOT_OK(uri.Parse(uri_string)); + return FromUri(uri, out_path); +} + bool AzureOptions::Equals(const AzureOptions& other) const { // TODO(GH-38598): update here when more auth methods are added. const bool equals = blob_storage_authority == other.blob_storage_authority && diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index 2a131e40c05..704759d0389 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -45,6 +45,7 @@ class DataLakeServiceClient; namespace arrow::fs { class TestAzureFileSystem; +class TestAzureOptions; /// Options for the AzureFileSystem implementation. /// @@ -59,6 +60,8 @@ class TestAzureFileSystem; /// /// Functions are provided for explicit configuration of credentials if that is preferred. struct ARROW_EXPORT AzureOptions { + friend class TestAzureOptions; + /// \brief The name of the Azure Storage Account being accessed. /// /// All service URLs will be constructed using this storage account name. @@ -123,6 +126,11 @@ struct ARROW_EXPORT AzureOptions { AzureOptions(); ~AzureOptions(); + /// Initialize from URIs such as "abfs://container/blog". + static Result FromUri(const arrow::internal::Uri& uri, + std::string* out_path); + static Result FromUri(const std::string& uri, std::string* out_path); + Status ConfigureDefaultCredential(); Status ConfigureAnonymousCredential(); Status ConfigureAccountKeyCredential(const std::string& account_key); diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index f21876f03cc..7efc64b605b 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -336,6 +336,229 @@ TEST(AzureFileSystem, OptionsCompare) { EXPECT_TRUE(options.Equals(options)); } +class TestAzureOptions : public ::testing::Test { + public: + void TestFromUriBlobStorage() { + AzureOptions default_options; + std::string path; + ASSERT_OK_AND_ASSIGN( + auto options, + AzureOptions::FromUri("abfs://account.blob.core.windows.net/container/dir/blob", + &path)); + ASSERT_EQ(options.account_name, "account"); + ASSERT_EQ(options.blob_storage_authority, default_options.blob_storage_authority); + ASSERT_EQ(options.dfs_storage_authority, default_options.dfs_storage_authority); + ASSERT_EQ(options.blob_storage_scheme, default_options.blob_storage_scheme); + ASSERT_EQ(options.dfs_storage_scheme, default_options.dfs_storage_scheme); + ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kDefault); + ASSERT_EQ(path, "container/dir/blob"); + } + + void TestFromUriBlobStorageEmptyContainer() { + ASSERT_RAISES( + Invalid, AzureOptions::FromUri("abfs://account.blob.core.windows.net/", nullptr)); + } + + void TestFromUriDfsStorage() { + AzureOptions default_options; + std::string path; + ASSERT_OK_AND_ASSIGN( + auto options, + AzureOptions::FromUri("abfs://file_system@account.dfs.core.windows.net/dir/file", + &path)); + ASSERT_EQ(options.account_name, "account"); + ASSERT_EQ(options.blob_storage_authority, default_options.blob_storage_authority); + ASSERT_EQ(options.dfs_storage_authority, default_options.dfs_storage_authority); + ASSERT_EQ(options.blob_storage_scheme, default_options.blob_storage_scheme); + ASSERT_EQ(options.dfs_storage_scheme, default_options.dfs_storage_scheme); + ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kDefault); + ASSERT_EQ(path, "file_system/dir/file"); + } + + void TestFromUriDfsStorageEmptyContainer() { + ASSERT_RAISES(Invalid, AzureOptions::FromUri( + "abfs://account.dfs.core.windows.net/dir/file", nullptr)); + } + + void TestFromUriAbfs() { + std::string path; + ASSERT_OK_AND_ASSIGN( + auto options, + AzureOptions::FromUri( + "abfs://account:password@127.0.0.1:10000/container/dir/blob", &path)); + ASSERT_EQ(options.account_name, "account"); + ASSERT_EQ(options.blob_storage_authority, "127.0.0.1:10000"); + ASSERT_EQ(options.dfs_storage_authority, "127.0.0.1:10000"); + ASSERT_EQ(options.blob_storage_scheme, "http"); + ASSERT_EQ(options.dfs_storage_scheme, "http"); + ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kDefault); + ASSERT_EQ(path, "container/dir/blob"); + } + + void TestFromUriAbfsEmptyContainer() { + ASSERT_RAISES(Invalid, AzureOptions::FromUri( + "abfs://account:password@127.0.0.1:10000/", nullptr)); + } + + void TestFromUriAbfss() { + std::string path; + ASSERT_OK_AND_ASSIGN( + auto options, + AzureOptions::FromUri( + "abfss://account:password@127.0.0.1:10000/container/dir/blob", &path)); + ASSERT_EQ(options.account_name, "account"); + ASSERT_EQ(options.blob_storage_authority, "127.0.0.1:10000"); + ASSERT_EQ(options.dfs_storage_authority, "127.0.0.1:10000"); + ASSERT_EQ(options.blob_storage_scheme, "https"); + ASSERT_EQ(options.dfs_storage_scheme, "https"); + ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kDefault); + ASSERT_EQ(path, "container/dir/blob"); + } + + void TestFromUriCredentialDefault() { + ASSERT_OK_AND_ASSIGN( + auto options, + AzureOptions::FromUri("abfs://account.blob.core.windows.net/container/dir/blob?" + "credential_kind=default", + nullptr)); + ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kDefault); + } + + void TestFromUriCredentialAnonymous() { + ASSERT_OK_AND_ASSIGN( + auto options, + AzureOptions::FromUri("abfs://account.blob.core.windows.net/container/dir/blob?" + "credential_kind=anonymous", + nullptr)); + ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kAnonymous); + } + + void TestFromUriCredentialStorageSharedKey() { + ASSERT_OK_AND_ASSIGN( + auto options, + AzureOptions::FromUri("abfs://account.blob.core.windows.net/container/dir/blob?" + "credential_kind=storage_shared_key", + nullptr)); + ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kStorageSharedKey); + } + + void TestFromUriCredentialClientSecret() { + ASSERT_OK_AND_ASSIGN( + auto options, + AzureOptions::FromUri("abfs://account.blob.core.windows.net/container/dir/blob?" + "credential_kind=client_secret&" + "tenant_id=tenant-id&" + "client_id=client-id&" + "client_secret=client-secret", + nullptr)); + ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kClientSecret); + } + + void TestFromUriCredentialManagedIdentity() { + ASSERT_OK_AND_ASSIGN( + auto options, + AzureOptions::FromUri("abfs://account.blob.core.windows.net/container/dir/blob?" + "credential_kind=managed_identity&" + "client_id=client-id", + nullptr)); + ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kManagedIdentity); + } + + void TestFromUriCredentialWorkloadIdentity() { + ASSERT_OK_AND_ASSIGN( + auto options, + AzureOptions::FromUri("abfs://account.blob.core.windows.net/container/dir/blob?" + "credential_kind=workload_identity", + nullptr)); + ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kWorkloadIdentity); + } + + void TestFromUriCredentialInvalid() { + ASSERT_RAISES(Invalid, AzureOptions::FromUri( + "abfs://file_system@account.dfs.core.windows.net/dir/file?" + "credential_kind=invalid", + nullptr)); + } + void TestFromUriBlobStorageAuthority() { + ASSERT_OK_AND_ASSIGN( + auto options, + AzureOptions::FromUri("abfs://account.blob.core.windows.net/container/dir/blob?" + "blob_storage_authority=.blob.local", + nullptr)); + ASSERT_EQ(options.blob_storage_authority, ".blob.local"); + } + + void TestFromUriDfsStorageAuthority() { + ASSERT_OK_AND_ASSIGN( + auto options, + AzureOptions::FromUri("abfs://file_system@account.dfs.core.windows.net/dir/file?" + "dfs_storage_authority=.dfs.local", + nullptr)); + ASSERT_EQ(options.dfs_storage_authority, ".dfs.local"); + } + + void TestFromUriBlobStorageScheme() { + ASSERT_OK_AND_ASSIGN( + auto options, + AzureOptions::FromUri("abfs://account.blob.core.windows.net/container/dir/blob?" + "blob_storage_scheme=http", + nullptr)); + ASSERT_EQ(options.blob_storage_scheme, "http"); + } + + void TestFromUriDfsStorageScheme() { + ASSERT_OK_AND_ASSIGN( + auto options, + AzureOptions::FromUri("abfs://file_system@account.dfs.core.windows.net/dir/file?" + "dfs_storage_scheme=http", + nullptr)); + ASSERT_EQ(options.dfs_storage_scheme, "http"); + } + + void TestFromUriInvalidQueryParameter() { + ASSERT_RAISES(Invalid, AzureOptions::FromUri( + "abfs://file_system@account.dfs.core.windows.net/dir/file?" + "unknown=invalid", + nullptr)); + } +}; + +TEST_F(TestAzureOptions, FromUriBlobStorage) { TestFromUriBlobStorage(); } +TEST_F(TestAzureOptions, FromUriBlobStorageEmptyContainer) { + TestFromUriBlobStorageEmptyContainer(); +} +TEST_F(TestAzureOptions, FromUriDfsStorage) { TestFromUriDfsStorage(); } +TEST_F(TestAzureOptions, FromUriDfsStorageEmptyContainer) { + TestFromUriDfsStorageEmptyContainer(); +} +TEST_F(TestAzureOptions, FromUriAbfs) { TestFromUriAbfs(); } +TEST_F(TestAzureOptions, FromUriAbfsEmptyContainer) { TestFromUriAbfsEmptyContainer(); } +TEST_F(TestAzureOptions, FromUriAbfss) { TestFromUriAbfss(); } +TEST_F(TestAzureOptions, FromUriCredentialDefault) { TestFromUriCredentialDefault(); } +TEST_F(TestAzureOptions, FromUriCredentialAnonymous) { TestFromUriCredentialAnonymous(); } +TEST_F(TestAzureOptions, FromUriCredentialStorageSharedKey) { + TestFromUriCredentialStorageSharedKey(); +} +TEST_F(TestAzureOptions, FromUriCredentialClientSecret) { + TestFromUriCredentialClientSecret(); +} +TEST_F(TestAzureOptions, FromUriCredentialManagedIdentity) { + TestFromUriCredentialManagedIdentity(); +} +TEST_F(TestAzureOptions, FromUriCredentialWorkloadIdentity) { + TestFromUriCredentialWorkloadIdentity(); +} +TEST_F(TestAzureOptions, FromUriCredentialInvalid) { TestFromUriCredentialInvalid(); } +TEST_F(TestAzureOptions, FromUriBlobStorageAuthority) { + TestFromUriBlobStorageAuthority(); +} +TEST_F(TestAzureOptions, FromUriDfsStorageAuthority) { TestFromUriDfsStorageAuthority(); } +TEST_F(TestAzureOptions, FromUriBlobStorageScheme) { TestFromUriBlobStorageScheme(); } +TEST_F(TestAzureOptions, FromUriDfsStorageScheme) { TestFromUriDfsStorageScheme(); } +TEST_F(TestAzureOptions, FromUriInvalidQueryParameter) { + TestFromUriInvalidQueryParameter(); +} + struct PreexistingData { public: using RNG = random::pcg32_fast; diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc index 810e9c179b1..1fb74d41298 100644 --- a/cpp/src/arrow/filesystem/filesystem.cc +++ b/cpp/src/arrow/filesystem/filesystem.cc @@ -21,12 +21,15 @@ #include "arrow/util/config.h" #include "arrow/filesystem/filesystem.h" -#ifdef ARROW_HDFS -#include "arrow/filesystem/hdfs.h" +#ifdef ARROW_AZURE +#include "arrow/filesystem/azurefs.h" #endif #ifdef ARROW_GCS #include "arrow/filesystem/gcsfs.h" #endif +#ifdef ARROW_HDFS +#include "arrow/filesystem/hdfs.h" +#endif #ifdef ARROW_S3 #include "arrow/filesystem/s3fs.h" #endif @@ -690,6 +693,16 @@ Result> FileSystemFromUriReal(const Uri& uri, } return std::make_shared(options, io_context); } + if (scheme == "abfs" || scheme == "abfss") { +#ifdef ARROW_AZURE + ARROW_ASSIGN_OR_RAISE(auto options, AzureOptions::FromUri(uri, out_path)); + return AzureFileSystem::Make(options, io_context); +#else + return Status::NotImplemented( + "Got Azure Blob File System URI but Arrow compiled without Azure Blob File " + "System support"); +#endif + } if (scheme == "gs" || scheme == "gcs") { #ifdef ARROW_GCS ARROW_ASSIGN_OR_RAISE(auto options, GcsOptions::FromUri(uri, out_path)); @@ -698,7 +711,6 @@ Result> FileSystemFromUriReal(const Uri& uri, return Status::NotImplemented("Got GCS URI but Arrow compiled without GCS support"); #endif } - if (scheme == "hdfs" || scheme == "viewfs") { #ifdef ARROW_HDFS ARROW_ASSIGN_OR_RAISE(auto options, HdfsOptions::FromUri(uri)); From 9e7d75e5b9afa37a336162478964465271d71c24 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Mon, 4 Mar 2024 05:29:59 +0900 Subject: [PATCH 2/9] Update example URIs --- cpp/src/arrow/filesystem/azurefs.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index 704759d0389..335f950f32a 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -126,7 +126,9 @@ struct ARROW_EXPORT AzureOptions { AzureOptions(); ~AzureOptions(); - /// Initialize from URIs such as "abfs://container/blog". + /// Initialize from URIs such as + /// "abfs://account.blob.core.windows.net/container/dir/blob" and + /// "abfs://file_system@account.dfs.core.windows.net/dir/file". static Result FromUri(const arrow::internal::Uri& uri, std::string* out_path); static Result FromUri(const std::string& uri, std::string* out_path); From 81076bc19631ab371a71eb8f3915c28402b82dfc Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Mon, 4 Mar 2024 05:35:21 +0900 Subject: [PATCH 3/9] Add the document URI for abfs:// and abfss:// --- cpp/src/arrow/filesystem/filesystem.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc index 1fb74d41298..c787117fe80 100644 --- a/cpp/src/arrow/filesystem/filesystem.cc +++ b/cpp/src/arrow/filesystem/filesystem.cc @@ -693,6 +693,11 @@ Result> FileSystemFromUriReal(const Uri& uri, } return std::make_shared(options, io_context); } + /// "abfs" and "abfss" schemes are taken from + /// the Azure Data Lake Storage Gen2 URI: + /// https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction-abfs-uri + /// + /// abfs[s]://@.dfs.core.windows.net// if (scheme == "abfs" || scheme == "abfss") { #ifdef ARROW_AZURE ARROW_ASSIGN_OR_RAISE(auto options, AzureOptions::FromUri(uri, out_path)); From fb692ded5b728d26adecd88e22964cebf4ace11e Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 5 Mar 2024 14:09:20 +0900 Subject: [PATCH 4/9] Remove needless empty container check --- cpp/src/arrow/filesystem/azurefs.cc | 3 --- cpp/src/arrow/filesystem/azurefs_test.cc | 22 ---------------------- 2 files changed, 25 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 8df8f0ee1aa..8e3fe951c60 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -105,9 +105,6 @@ Result AzureOptions::FromUri(const arrow::internal::Uri& uri, } const auto account_key = uri.password(); - if (container.empty()) { - return Status::Invalid("Missing container name in Azure Blob File System URI"); - } if (out_path != nullptr) { *out_path = std::string(internal::ConcatAbstractPath(container, path)); } diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 7efc64b605b..f1c14f3550c 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -354,11 +354,6 @@ class TestAzureOptions : public ::testing::Test { ASSERT_EQ(path, "container/dir/blob"); } - void TestFromUriBlobStorageEmptyContainer() { - ASSERT_RAISES( - Invalid, AzureOptions::FromUri("abfs://account.blob.core.windows.net/", nullptr)); - } - void TestFromUriDfsStorage() { AzureOptions default_options; std::string path; @@ -375,11 +370,6 @@ class TestAzureOptions : public ::testing::Test { ASSERT_EQ(path, "file_system/dir/file"); } - void TestFromUriDfsStorageEmptyContainer() { - ASSERT_RAISES(Invalid, AzureOptions::FromUri( - "abfs://account.dfs.core.windows.net/dir/file", nullptr)); - } - void TestFromUriAbfs() { std::string path; ASSERT_OK_AND_ASSIGN( @@ -395,11 +385,6 @@ class TestAzureOptions : public ::testing::Test { ASSERT_EQ(path, "container/dir/blob"); } - void TestFromUriAbfsEmptyContainer() { - ASSERT_RAISES(Invalid, AzureOptions::FromUri( - "abfs://account:password@127.0.0.1:10000/", nullptr)); - } - void TestFromUriAbfss() { std::string path; ASSERT_OK_AND_ASSIGN( @@ -524,15 +509,8 @@ class TestAzureOptions : public ::testing::Test { }; TEST_F(TestAzureOptions, FromUriBlobStorage) { TestFromUriBlobStorage(); } -TEST_F(TestAzureOptions, FromUriBlobStorageEmptyContainer) { - TestFromUriBlobStorageEmptyContainer(); -} TEST_F(TestAzureOptions, FromUriDfsStorage) { TestFromUriDfsStorage(); } -TEST_F(TestAzureOptions, FromUriDfsStorageEmptyContainer) { - TestFromUriDfsStorageEmptyContainer(); -} TEST_F(TestAzureOptions, FromUriAbfs) { TestFromUriAbfs(); } -TEST_F(TestAzureOptions, FromUriAbfsEmptyContainer) { TestFromUriAbfsEmptyContainer(); } TEST_F(TestAzureOptions, FromUriAbfss) { TestFromUriAbfss(); } TEST_F(TestAzureOptions, FromUriCredentialDefault) { TestFromUriCredentialDefault(); } TEST_F(TestAzureOptions, FromUriCredentialAnonymous) { TestFromUriCredentialAnonymous(); } From 376017f54d8c2be7915042e3cf53effa1ef13aee Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 5 Mar 2024 14:10:47 +0900 Subject: [PATCH 5/9] Remove needless options_map --- cpp/src/arrow/filesystem/azurefs.cc | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 8e3fe951c60..cef4c7c1a93 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -109,19 +109,14 @@ Result AzureOptions::FromUri(const arrow::internal::Uri& uri, *out_path = std::string(internal::ConcatAbstractPath(container, path)); } - std::unordered_map options_map; - ARROW_ASSIGN_OR_RAISE(const auto options_items, uri.query_items()); - for (const auto& kv : options_items) { - options_map.emplace(kv.first, kv.second); - } - CredentialKind credential_kind = options.account_name.empty() ? CredentialKind::kAnonymous : CredentialKind::kDefault; std::string tenant_id; std::string client_id; std::string client_secret; - for (const auto& kv : options_map) { + ARROW_ASSIGN_OR_RAISE(const auto options_items, uri.query_items()); + for (const auto& kv : options_items) { if (kv.first == "blob_storage_authority") { options.blob_storage_authority = kv.second; } else if (kv.first == "dfs_storage_authority") { From b8e6b837fe22ccecc81c9384f75bede681d4f1d6 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 7 Mar 2024 11:55:53 +0900 Subject: [PATCH 6/9] Implement discussed spec Supported formats: 1. abfs[s]://[:@].blob.core.windows.net[/[/]] 2. abfs[s]://[:]@.dfs.core.windows.net[/path] 3. abfs[s]://[]@][<:port>][/[/path]] 4. abfs[s]://[]@][/path] Added query parameters: * enable_tls: It replaces blob_storage_scheme and dfs_storage_scheme parameters. Removed query parameters: * blob_storage_scheme: Replaced with enable_tls. * dfs_storage_scheme: Replaced with enable_tls. Changed query parameters: * credential_kind: Accepts only "default", "anonymous" and "workload_identity". --- cpp/src/arrow/filesystem/azurefs.cc | 185 ++++++++++++++--------- cpp/src/arrow/filesystem/azurefs.h | 49 +++++- cpp/src/arrow/filesystem/azurefs_test.cc | 53 +++---- cpp/src/arrow/filesystem/filesystem.cc | 5 - 4 files changed, 186 insertions(+), 106 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index cef4c7c1a93..6fd5679a339 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -65,115 +65,162 @@ AzureOptions::AzureOptions() = default; AzureOptions::~AzureOptions() = default; -Result AzureOptions::FromUri(const arrow::internal::Uri& uri, - std::string* out_path) { - AzureOptions options; +void AzureOptions::ExtractFromUriSchemeAndHierPart(const arrow::internal::Uri& uri, + std::string* out_path) { const auto host = uri.host(); - std::string container; std::string path; - if (arrow::internal::EndsWith(host, options.blob_storage_authority)) { - options.account_name = - host.substr(0, host.size() - options.blob_storage_authority.size()); - auto components = internal::SplitAbstractPath(uri.path()); - if (!components.empty()) { - container = components[0]; - path = internal::JoinAbstractPath(components.begin() + 1, components.end()); - } - } else if (arrow::internal::EndsWith(host, options.dfs_storage_authority)) { - options.account_name = - host.substr(0, host.size() - options.dfs_storage_authority.size()); - container = uri.username(); - path = uri.path(); + if (arrow::internal::EndsWith(host, blob_storage_authority)) { + account_name = host.substr(0, host.size() - blob_storage_authority.size()); + path = internal::RemoveLeadingSlash(uri.path()); + } else if (arrow::internal::EndsWith(host, dfs_storage_authority)) { + account_name = host.substr(0, host.size() - dfs_storage_authority.size()); + path = internal::ConcatAbstractPath(uri.username(), uri.path()); } else { - options.account_name = uri.username(); - std::string host_port = host; + account_name = uri.username(); const auto port_text = uri.port_text(); - if (!port_text.empty()) { - host_port += ":" + port_text; - } - options.blob_storage_authority = host_port; - options.dfs_storage_authority = host_port; - if (uri.scheme() == "abfs") { - options.blob_storage_scheme = "http"; - options.dfs_storage_scheme = "http"; - } - auto components = internal::SplitAbstractPath(uri.path()); - if (!components.empty()) { - container = components[0]; - path = internal::JoinAbstractPath(components.begin() + 1, components.end()); + if (host.find(".") == std::string::npos && port_text.empty()) { + // abfs://container/dir/file + path = internal::ConcatAbstractPath(host, uri.path()); + } else { + // abfs://host.domain/container/dir/file + // abfs://host.domain:port/container/dir/file + // abfs://host:port/container/dir/file + std::string host_port = host; + if (!port_text.empty()) { + host_port += ":" + port_text; + } + blob_storage_authority = host_port; + dfs_storage_authority = host_port; + path = internal::RemoveLeadingSlash(uri.path()); } } - const auto account_key = uri.password(); - if (out_path != nullptr) { - *out_path = std::string(internal::ConcatAbstractPath(container, path)); + *out_path = path; } +} - CredentialKind credential_kind = options.account_name.empty() - ? CredentialKind::kAnonymous - : CredentialKind::kDefault; +Status AzureOptions::ExtractFromUriQuery(const arrow::internal::Uri& uri) { + const auto account_key = uri.password(); + std::optional credential_kind; + std::optional credential_kind_value; std::string tenant_id; std::string client_id; std::string client_secret; ARROW_ASSIGN_OR_RAISE(const auto options_items, uri.query_items()); for (const auto& kv : options_items) { if (kv.first == "blob_storage_authority") { - options.blob_storage_authority = kv.second; + blob_storage_authority = kv.second; } else if (kv.first == "dfs_storage_authority") { - options.dfs_storage_authority = kv.second; - } else if (kv.first == "blob_storage_scheme") { - options.blob_storage_scheme = kv.second; - } else if (kv.first == "dfs_storage_scheme") { - options.dfs_storage_scheme = kv.second; + dfs_storage_authority = kv.second; } else if (kv.first == "credential_kind") { if (kv.second == "default") { credential_kind = CredentialKind::kDefault; } else if (kv.second == "anonymous") { credential_kind = CredentialKind::kAnonymous; - } else if (kv.second == "storage_shared_key") { - credential_kind = CredentialKind::kStorageSharedKey; - } else if (kv.second == "client_secret") { - credential_kind = CredentialKind::kClientSecret; - } else if (kv.second == "managed_identity") { - credential_kind = CredentialKind::kManagedIdentity; } else if (kv.second == "workload_identity") { credential_kind = CredentialKind::kWorkloadIdentity; } else { + // Other credential kinds should be inferred from the given + // parameters automatically. return Status::Invalid("Unexpected credential_kind: '", kv.second, "'"); } + credential_kind_value = kv.second; } else if (kv.first == "tenant_id") { tenant_id = kv.second; } else if (kv.first == "client_id") { client_id = kv.second; } else if (kv.first == "client_secret") { client_secret = kv.second; + } else if (kv.first == "enable_tls") { + ARROW_ASSIGN_OR_RAISE(auto enable_tls, ::arrow::internal::ParseBoolean(kv.second)); + if (enable_tls) { + blob_storage_scheme = "https"; + dfs_storage_scheme = "https"; + } else { + blob_storage_scheme = "http"; + dfs_storage_scheme = "http"; + } } else { return Status::Invalid( "Unexpected query parameter in Azure Blob File System URI: '", kv.first, "'"); } } - switch (credential_kind) { - case CredentialKind::kDefault: - break; - case CredentialKind::kAnonymous: - RETURN_NOT_OK(options.ConfigureAnonymousCredential()); - break; - case CredentialKind::kStorageSharedKey: - RETURN_NOT_OK(options.ConfigureAccountKeyCredential(account_key)); - break; - case CredentialKind::kClientSecret: - RETURN_NOT_OK( - options.ConfigureClientSecretCredential(tenant_id, client_id, client_secret)); - break; - case CredentialKind::kManagedIdentity: - RETURN_NOT_OK(options.ConfigureManagedIdentityCredential(client_id)); - break; - case CredentialKind::kWorkloadIdentity: - RETURN_NOT_OK(options.ConfigureWorkloadIdentityCredential()); - break; + if (credential_kind) { + if (!account_key.empty()) { + return Status::Invalid("Password must not be specified with credential_kind=", + *credential_kind_value); + } + if (!tenant_id.empty()) { + return Status::Invalid("tenant_id must not be specified with credential_kind=", + *credential_kind_value); + } + if (!client_id.empty()) { + return Status::Invalid("client_id must not be specified with credential_kind=", + *credential_kind_value); + } + if (!client_secret.empty()) { + return Status::Invalid("client_secret must not be specified with credential_kind=", + *credential_kind_value); + } + + switch (*credential_kind) { + case CredentialKind::kAnonymous: + RETURN_NOT_OK(ConfigureAnonymousCredential()); + break; + case CredentialKind::kWorkloadIdentity: + RETURN_NOT_OK(ConfigureWorkloadIdentityCredential()); + break; + default: + // Default credential + break; + } + } else { + if (!account_key.empty()) { + // With password + if (!tenant_id.empty()) { + return Status::Invalid("tenant_id must not be specified with password"); + } + if (!client_id.empty()) { + return Status::Invalid("client_id must not be specified with password"); + } + if (!client_secret.empty()) { + return Status::Invalid("client_secret must not be specified with password"); + } + RETURN_NOT_OK(ConfigureAccountKeyCredential(account_key)); + } else { + // Without password + if (tenant_id.empty() && client_id.empty() && client_secret.empty()) { + // No related parameters + if (account_name.empty()) { + RETURN_NOT_OK(ConfigureAnonymousCredential()); + } else { + // Default credential + } + } else { + // One or more tenant_id, client_id or client_secret are specified + if (client_id.empty()) { + return Status::Invalid("client_id must be specified"); + } + if (tenant_id.empty() && client_secret.empty()) { + RETURN_NOT_OK(ConfigureManagedIdentityCredential(client_id)); + } else if (!tenant_id.empty() && !client_secret.empty()) { + RETURN_NOT_OK( + ConfigureClientSecretCredential(tenant_id, client_id, client_secret)); + } else { + return Status::Invalid("Both of tenant_id and client_secret must be specified"); + } + } + } } + return Status::OK(); +} +Result AzureOptions::FromUri(const arrow::internal::Uri& uri, + std::string* out_path) { + AzureOptions options; + options.ExtractFromUriSchemeAndHierPart(uri, out_path); + RETURN_NOT_OK(options.ExtractFromUriQuery(uri)); return options; } diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index 335f950f32a..8ff3f0485fc 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -126,9 +126,52 @@ struct ARROW_EXPORT AzureOptions { AzureOptions(); ~AzureOptions(); - /// Initialize from URIs such as - /// "abfs://account.blob.core.windows.net/container/dir/blob" and - /// "abfs://file_system@account.dfs.core.windows.net/dir/file". + private: + void ExtractFromUriSchemeAndHierPart(const arrow::internal::Uri& uri, + std::string* out_path); + Status ExtractFromUriQuery(const arrow::internal::Uri& uri); + + public: + /// \brief Construct a new AzureOptions from an URI. + /// + /// Supported formats: + /// + /// 1. abfs[s]://[:@].blob.core.windows.net[/[/]] + /// 2. abfs[s]://[:]@.dfs.core.windows.net[/path] + /// 3. abfs[s]://[]@][<:port>][/[/path]] + /// 4. abfs[s]://[]@][/path] + /// + /// 1. and 2. are compatible with the Azure Data Lake Storage Gen2 URIs: + /// https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction-abfs-uri + /// + /// 3. is for Azure Blob Storage compatible service including Azurite. + /// + /// 4. is a shorter version of 1. and 2. + /// + /// Note that there is no difference between abfs and abfss. HTTPS is + /// used with abfs by default. You can force to use HTTP by specifying + /// "enable_tls=false" query. + /// + /// Supported query parameters: + /// + /// * blob_storage_authority: Set AzureOptions::blob_storage_authority + /// * dfs_storage_authority: Set AzureOptions::dfs_storage_authority + /// * enable_tls: If it's "false" or "0", HTTP not HTTPS is used. + /// * credential_kind: One of "default", "anonymous", + /// "workload_identity". If "default" is specified, it's just + /// ignored. If "anonymous" is specified, + /// AzureOptions::ConfigureAnonymousCredential() is called. If + /// "workload_identity" is specified, + /// AzureOptions::ConfigureWorkloadIdentityCredential() is called. + /// * tenant_id: You must specify "client_id" and "client_secret" + /// too. AzureOptions::ConfigureClientSecretCredential() is called. + /// * client_id: If you don't specify "tenant_id" and + /// "client_secret", + /// AzureOptions::ConfigureManagedIdentityCredential() is + /// called. If you specify "tenant_id" and "client_secret" too, + /// AzureOptions::ConfigureClientSecretCredential() is called. + /// * client_secret: You must specify "tenant_id" and "client_id" + /// too. AzureOptions::ConfigureClientSecretCredential() is called. static Result FromUri(const arrow::internal::Uri& uri, std::string* out_path); static Result FromUri(const std::string& uri, std::string* out_path); diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index f1c14f3550c..0ce84043a53 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -379,9 +379,9 @@ class TestAzureOptions : public ::testing::Test { ASSERT_EQ(options.account_name, "account"); ASSERT_EQ(options.blob_storage_authority, "127.0.0.1:10000"); ASSERT_EQ(options.dfs_storage_authority, "127.0.0.1:10000"); - ASSERT_EQ(options.blob_storage_scheme, "http"); - ASSERT_EQ(options.dfs_storage_scheme, "http"); - ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kDefault); + ASSERT_EQ(options.blob_storage_scheme, "https"); + ASSERT_EQ(options.dfs_storage_scheme, "https"); + ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kStorageSharedKey); ASSERT_EQ(path, "container/dir/blob"); } @@ -396,7 +396,23 @@ class TestAzureOptions : public ::testing::Test { ASSERT_EQ(options.dfs_storage_authority, "127.0.0.1:10000"); ASSERT_EQ(options.blob_storage_scheme, "https"); ASSERT_EQ(options.dfs_storage_scheme, "https"); - ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kDefault); + ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kStorageSharedKey); + ASSERT_EQ(path, "container/dir/blob"); + } + + void TestFromUriEnableTls() { + std::string path; + ASSERT_OK_AND_ASSIGN(auto options, + AzureOptions::FromUri( + "abfs://account:password@127.0.0.1:10000/container/dir/blob?" + "enable_tls=false", + &path)); + ASSERT_EQ(options.account_name, "account"); + ASSERT_EQ(options.blob_storage_authority, "127.0.0.1:10000"); + ASSERT_EQ(options.dfs_storage_authority, "127.0.0.1:10000"); + ASSERT_EQ(options.blob_storage_scheme, "http"); + ASSERT_EQ(options.dfs_storage_scheme, "http"); + ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kStorageSharedKey); ASSERT_EQ(path, "container/dir/blob"); } @@ -421,9 +437,9 @@ class TestAzureOptions : public ::testing::Test { void TestFromUriCredentialStorageSharedKey() { ASSERT_OK_AND_ASSIGN( auto options, - AzureOptions::FromUri("abfs://account.blob.core.windows.net/container/dir/blob?" - "credential_kind=storage_shared_key", - nullptr)); + AzureOptions::FromUri( + "abfs://:password@account.blob.core.windows.net/container/dir/blob", + nullptr)); ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kStorageSharedKey); } @@ -431,7 +447,6 @@ class TestAzureOptions : public ::testing::Test { ASSERT_OK_AND_ASSIGN( auto options, AzureOptions::FromUri("abfs://account.blob.core.windows.net/container/dir/blob?" - "credential_kind=client_secret&" "tenant_id=tenant-id&" "client_id=client-id&" "client_secret=client-secret", @@ -443,7 +458,6 @@ class TestAzureOptions : public ::testing::Test { ASSERT_OK_AND_ASSIGN( auto options, AzureOptions::FromUri("abfs://account.blob.core.windows.net/container/dir/blob?" - "credential_kind=managed_identity&" "client_id=client-id", nullptr)); ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kManagedIdentity); @@ -482,24 +496,6 @@ class TestAzureOptions : public ::testing::Test { ASSERT_EQ(options.dfs_storage_authority, ".dfs.local"); } - void TestFromUriBlobStorageScheme() { - ASSERT_OK_AND_ASSIGN( - auto options, - AzureOptions::FromUri("abfs://account.blob.core.windows.net/container/dir/blob?" - "blob_storage_scheme=http", - nullptr)); - ASSERT_EQ(options.blob_storage_scheme, "http"); - } - - void TestFromUriDfsStorageScheme() { - ASSERT_OK_AND_ASSIGN( - auto options, - AzureOptions::FromUri("abfs://file_system@account.dfs.core.windows.net/dir/file?" - "dfs_storage_scheme=http", - nullptr)); - ASSERT_EQ(options.dfs_storage_scheme, "http"); - } - void TestFromUriInvalidQueryParameter() { ASSERT_RAISES(Invalid, AzureOptions::FromUri( "abfs://file_system@account.dfs.core.windows.net/dir/file?" @@ -512,6 +508,7 @@ TEST_F(TestAzureOptions, FromUriBlobStorage) { TestFromUriBlobStorage(); } TEST_F(TestAzureOptions, FromUriDfsStorage) { TestFromUriDfsStorage(); } TEST_F(TestAzureOptions, FromUriAbfs) { TestFromUriAbfs(); } TEST_F(TestAzureOptions, FromUriAbfss) { TestFromUriAbfss(); } +TEST_F(TestAzureOptions, FromUriEnableTls) { TestFromUriEnableTls(); } TEST_F(TestAzureOptions, FromUriCredentialDefault) { TestFromUriCredentialDefault(); } TEST_F(TestAzureOptions, FromUriCredentialAnonymous) { TestFromUriCredentialAnonymous(); } TEST_F(TestAzureOptions, FromUriCredentialStorageSharedKey) { @@ -531,8 +528,6 @@ TEST_F(TestAzureOptions, FromUriBlobStorageAuthority) { TestFromUriBlobStorageAuthority(); } TEST_F(TestAzureOptions, FromUriDfsStorageAuthority) { TestFromUriDfsStorageAuthority(); } -TEST_F(TestAzureOptions, FromUriBlobStorageScheme) { TestFromUriBlobStorageScheme(); } -TEST_F(TestAzureOptions, FromUriDfsStorageScheme) { TestFromUriDfsStorageScheme(); } TEST_F(TestAzureOptions, FromUriInvalidQueryParameter) { TestFromUriInvalidQueryParameter(); } diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc index c787117fe80..1fb74d41298 100644 --- a/cpp/src/arrow/filesystem/filesystem.cc +++ b/cpp/src/arrow/filesystem/filesystem.cc @@ -693,11 +693,6 @@ Result> FileSystemFromUriReal(const Uri& uri, } return std::make_shared(options, io_context); } - /// "abfs" and "abfss" schemes are taken from - /// the Azure Data Lake Storage Gen2 URI: - /// https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction-abfs-uri - /// - /// abfs[s]://@.dfs.core.windows.net// if (scheme == "abfs" || scheme == "abfss") { #ifdef ARROW_AZURE ARROW_ASSIGN_OR_RAISE(auto options, AzureOptions::FromUri(uri, out_path)); From 5927be8b14e8891383a8a1e967b07a39fb5bc477 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 7 Mar 2024 22:05:14 +0900 Subject: [PATCH 7/9] Escape special characters --- cpp/src/arrow/filesystem/azurefs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index 8ff3f0485fc..d5d2411d271 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -136,10 +136,10 @@ struct ARROW_EXPORT AzureOptions { /// /// Supported formats: /// - /// 1. abfs[s]://[:@].blob.core.windows.net[/[/]] - /// 2. abfs[s]://[:]@.dfs.core.windows.net[/path] - /// 3. abfs[s]://[]@][<:port>][/[/path]] - /// 4. abfs[s]://[]@][/path] + /// 1. abfs[s]://[:\@]\.blob.core.windows.net[/\[/\]] + /// 2. abfs[s]://\[:\]@\.dfs.core.windows.net[/path] + /// 3. abfs[s]://[\]@]\[\<:port\>][/\[/path]] + /// 4. abfs[s]://[\]@]\[/path] /// /// 1. and 2. are compatible with the Azure Data Lake Storage Gen2 URIs: /// https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction-abfs-uri From 0a76645a66a7c622b38fb60114ac8c80f2b1c7ee Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 7 Mar 2024 22:40:21 +0900 Subject: [PATCH 8/9] Fix lint error --- cpp/src/arrow/filesystem/azurefs.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index d5d2411d271..fd937b94650 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -136,9 +136,11 @@ struct ARROW_EXPORT AzureOptions { /// /// Supported formats: /// - /// 1. abfs[s]://[:\@]\.blob.core.windows.net[/\[/\]] + /// 1. + /// abfs[s]://[:\@]\.blob.core.windows.net[/\[/\]] /// 2. abfs[s]://\[:\]@\.dfs.core.windows.net[/path] - /// 3. abfs[s]://[\]@]\[\<:port\>][/\[/path]] + /// 3. + /// abfs[s]://[\]@]\[\<:port\>][/\[/path]] /// 4. abfs[s]://[\]@]\[/path] /// /// 1. and 2. are compatible with the Azure Data Lake Storage Gen2 URIs: From 58d948f15e06c3c79d4346cb38e5376b2d31c652 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 8 Mar 2024 13:41:41 +0900 Subject: [PATCH 9/9] Fix lint error --- cpp/src/arrow/filesystem/azurefs.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index fd937b94650..6218bf574e8 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -136,11 +136,12 @@ struct ARROW_EXPORT AzureOptions { /// /// Supported formats: /// - /// 1. - /// abfs[s]://[:\@]\.blob.core.windows.net[/\[/\]] - /// 2. abfs[s]://\[:\]@\.dfs.core.windows.net[/path] - /// 3. - /// abfs[s]://[\]@]\[\<:port\>][/\[/path]] + /// 1. abfs[s]://[:\@]\.blob.core.windows.net + /// [/\[/\]] + /// 2. abfs[s]://\[:\]@\.dfs.core.windows.net + /// [/path] + /// 3. abfs[s]://[\]@]\[\<:port\>] + /// [/\[/path]] /// 4. abfs[s]://[\]@]\[/path] /// /// 1. and 2. are compatible with the Azure Data Lake Storage Gen2 URIs: