From c8a0d8bd2fd3fa687da92a80e9c7576fe2fe92a9 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Fri, 8 Dec 2023 17:50:46 -0300 Subject: [PATCH 01/26] Introduce BodyTextView() and IsContainerNotFound() --- cpp/src/arrow/filesystem/azurefs.cc | 97 ++++++++++++++++------------- 1 file changed, 52 insertions(+), 45 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 824a8fb5314..2455ebfc1e8 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -66,9 +66,8 @@ Status AzureOptions::ConfigureAccountKeyCredentials(const std::string& account_n namespace { -// An AzureFileSystem represents a single Azure storage -// account. AzureLocation describes a container and path within -// that storage account. +// An AzureFileSystem represents an Azure storage account. An AzureLocation describes a +// container in that storage account and a path within that container. struct AzureLocation { std::string all; std::string container; @@ -82,8 +81,8 @@ struct AzureLocation { // path_parts = [testdir, testfile.txt] if (internal::IsLikelyUri(string)) { return Status::Invalid( - "Expected an Azure object location of the form 'container/path...', got a URI: " - "'", + "Expected an Azure object location of the form 'container/path...'," + " got a URI: '", string, "'"); } auto first_sep = string.find_first_of(internal::kSep); @@ -130,11 +129,7 @@ struct AzureLocation { private: Status Validate() { auto status = internal::ValidateAbstractPathParts(path_parts); - if (!status.ok()) { - return Status::Invalid(status.message(), " in location ", all); - } else { - return status; - } + return status.ok() ? status : Status::Invalid(status.message(), " in location ", all); } }; @@ -153,23 +148,41 @@ Status ValidateFileLocation(const AzureLocation& location) { if (location.path.empty()) { return NotAFile(location); } - ARROW_RETURN_NOT_OK(internal::AssertNoTrailingSlash(location.path)); - return Status::OK(); + return internal::AssertNoTrailingSlash(location.path); +} + +std::string_view BodyTextView(const Azure::Core::Http::RawResponse& raw_response) { + const auto& body = raw_response.GetBody(); +#ifndef NDEBUG + auto& headers = raw_response.GetHeaders(); + auto content_type = headers.find("Content-Type"); + if (content_type != headers.end()) { + DCHECK_EQ(std::string_view{content_type->second}.substr(5), "text/"); + } +#endif + return std::string_view{reinterpret_cast(body.data()), body.size()}; } Status StatusFromErrorResponse(const std::string& url, - Azure::Core::Http::RawResponse* raw_response, + const Azure::Core::Http::RawResponse& raw_response, const std::string& context) { - const auto& body = raw_response->GetBody(); // There isn't an Azure specification that response body on error // doesn't contain any binary data but we assume it. We hope that // error response body has useful information for the error. - std::string_view body_text(reinterpret_cast(body.data()), body.size()); - return Status::IOError(context, ": ", url, ": ", raw_response->GetReasonPhrase(), " (", - static_cast(raw_response->GetStatusCode()), + auto body_text = BodyTextView(raw_response); + return Status::IOError(context, ": ", url, ": ", raw_response.GetReasonPhrase(), " (", + static_cast(raw_response.GetStatusCode()), "): ", body_text); } +bool IsContainerNotFound(const Azure::Storage::StorageException& exception) { + if (exception.ErrorCode == "ContainerNotFound") { + DCHECK_EQ(exception.StatusCode, Azure::Core::Http::HttpStatusCode::NotFound); + return true; + } + return false; +} + template std::string FormatValue(typename TypeTraits::CType value) { struct StringAppender { @@ -701,7 +714,7 @@ class AzureFileSystem::Impl { AzureOptions options_; internal::HierarchicalNamespaceDetector hierarchical_namespace_; - explicit Impl(AzureOptions options, io::IOContext io_context) + Impl(AzureOptions options, io::IOContext io_context) : io_context_(io_context), options_(std::move(options)) {} Status Init() { @@ -710,8 +723,7 @@ class AzureFileSystem::Impl { datalake_service_client_ = std::make_unique( options_.account_dfs_url, options_.storage_credentials_provider); - RETURN_NOT_OK(hierarchical_namespace_.Init(datalake_service_client_.get())); - return Status::OK(); + return hierarchical_namespace_.Init(datalake_service_client_.get()); } const AzureOptions& options() const { return options_; } @@ -722,12 +734,10 @@ class AzureFileSystem::Impl { info.set_path(location.all); if (location.container.empty()) { - // The location is invalid if the container is empty but not - // path. + // The location is invalid if the container is empty but the path is not. DCHECK(location.path.empty()); - // The location must refer to the root of the Azure storage - // account. This is a directory, and there isn't any extra - // metadata to fetch. + // This location must be derived from the root path. FileInfo should describe it + // as a directory and there isn't any extra metadata to fetch. info.set_type(FileType::Directory); return info; } @@ -739,10 +749,10 @@ class AzureFileSystem::Impl { auto properties = container_client.GetProperties(); info.set_type(FileType::Directory); info.set_mtime( - std::chrono::system_clock::time_point(properties.Value.LastModified)); + std::chrono::system_clock::time_point{properties.Value.LastModified}); return info; } catch (const Azure::Storage::StorageException& exception) { - if (exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) { + if (IsContainerNotFound(exception)) { info.set_type(FileType::NotFound); return info; } @@ -753,6 +763,8 @@ class AzureFileSystem::Impl { exception); } } + + // There is a path to search within the container. auto file_client = datalake_service_client_->GetFileSystemClient(location.container) .GetFileClient(location.path); try { @@ -770,7 +782,7 @@ class AzureFileSystem::Impl { info.set_size(properties.Value.FileSize); } info.set_mtime( - std::chrono::system_clock::time_point(properties.Value.LastModified)); + std::chrono::system_clock::time_point{properties.Value.LastModified}); return info; } catch (const Azure::Storage::StorageException& exception) { if (exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) { @@ -785,11 +797,9 @@ class AzureFileSystem::Impl { // On flat namespace accounts there are no real directories. Directories are only // implied by using `/` in the blob name. Azure::Storage::Blobs::ListBlobsOptions list_blob_options; - // If listing the prefix `path.path_to_file` with trailing slash returns at least // one result then `path` refers to an implied directory. - auto prefix = internal::EnsureTrailingSlash(location.path); - list_blob_options.Prefix = prefix; + list_blob_options.Prefix = internal::EnsureTrailingSlash(location.path); // We only need to know if there is at least one result, so minimise page size // for efficiency. list_blob_options.PageSizeHint = 1; @@ -798,15 +808,13 @@ class AzureFileSystem::Impl { auto paged_list_result = blob_service_client_->GetBlobContainerClient(location.container) .ListBlobs(list_blob_options); - if (paged_list_result.Blobs.size() > 0) { - info.set_type(FileType::Directory); - } else { - info.set_type(FileType::NotFound); - } + auto file_type = paged_list_result.Blobs.size() > 0 ? FileType::Directory + : FileType::NotFound; + info.set_type(file_type); return info; } catch (const Azure::Storage::StorageException& exception) { return internal::ExceptionToStatus( - "ListBlobs for '" + prefix + + "ListBlobs for '" + *list_blob_options.Prefix + "' failed with an unexpected Azure error. GetFileInfo is unable to " "determine whether the path should be considered an implied directory.", exception); @@ -840,7 +848,7 @@ class AzureFileSystem::Impl { return Status::OK(); } - static FileInfo FileInfoFromBlob(const std::string& container, + static FileInfo FileInfoFromBlob(std::string_view container, const Azure::Storage::Blobs::Models::BlobItem& blob) { auto path = internal::ConcatAbstractPath(container, blob.Name); if (internal::HasTrailingSlash(blob.Name)) { @@ -852,7 +860,7 @@ class AzureFileSystem::Impl { return info; } - static FileInfo DirectoryFileInfoFromPath(const std::string& path) { + static FileInfo DirectoryFileInfoFromPath(std::string_view path) { return FileInfo{std::string{internal::RemoveTrailingSlash(path)}, FileType::Directory}; } @@ -965,7 +973,7 @@ class AzureFileSystem::Impl { } } } catch (const Azure::Storage::StorageException& exception) { - if (exception.ErrorCode == "ContainerNotFound") { + if (IsContainerNotFound(exception)) { found = false; } else { return internal::ExceptionToStatus( @@ -1070,7 +1078,7 @@ class AzureFileSystem::Impl { return Status::OK(); } else { return StatusFromErrorResponse( - container_client.GetUrl(), response.RawResponse.get(), + container_client.GetUrl(), *response.RawResponse, "Failed to create a container: " + location.container); } } catch (const Azure::Storage::StorageException& exception) { @@ -1098,8 +1106,7 @@ class AzureFileSystem::Impl { if (response.Value.Created) { return Status::OK(); } else { - return StatusFromErrorResponse(directory_client.GetUrl(), - response.RawResponse.get(), + return StatusFromErrorResponse(directory_client.GetUrl(), *response.RawResponse, "Failed to create a directory: " + location.path); } } catch (const Azure::Storage::StorageException& exception) { @@ -1264,7 +1271,7 @@ class AzureFileSystem::Impl { return Status::OK(); } else { return StatusFromErrorResponse( - container_client.GetUrl(), response.RawResponse.get(), + container_client.GetUrl(), *response.RawResponse, "Failed to delete a container: " + location.container); } } catch (const Azure::Storage::StorageException& exception) { @@ -1287,7 +1294,7 @@ class AzureFileSystem::Impl { return Status::OK(); } else { return StatusFromErrorResponse( - directory_client.GetUrl(), response.RawResponse.get(), + directory_client.GetUrl(), *response.RawResponse, "Failed to delete a directory: " + location.path); } } catch (const Azure::Storage::StorageException& exception) { From b0ba6aec46d158d91715719dbcce7ca18772b9f8 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Fri, 8 Dec 2023 20:33:24 -0300 Subject: [PATCH 02/26] Explain better what ADLS Gen is and how it interacts with Blobs Storage --- cpp/src/arrow/filesystem/azurefs.h | 38 ++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index b2865b059ef..f189d89ffcb 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -90,21 +90,35 @@ struct ARROW_EXPORT AzureOptions { bool Equals(const AzureOptions& other) const; }; -/// \brief Azure-backed FileSystem implementation for ABFS and ADLS. +/// \brief FileSystem implementation backed by Azure Blob Storage (ABS) [1] and +/// Azure Data Lake Storage Gen2 (ADLS Gen2) [2]. /// -/// ABFS (Azure Blob Storage - https://azure.microsoft.com/en-us/products/storage/blobs/) -/// object-based cloud storage system. +/// ADLS Gen2 isn't a dedicated service or account type. It's a set of capabilities that +/// support high throughput analytic workloads, built on Azure Blob Storage. All the data +/// ingested via the ADLS Gen2 APIs is persisted as blobs in the storage account. +/// ADLS Gen2 provides filesystem semantics, file-level security, and Hadoop +/// compatibility. ADLS Gen1 exists as a separate object that will retired on 2024-02-29 +/// and new ADLS accounts use Gen2 instead. /// -/// ADLS (Azure Data Lake Storage - -/// https://azure.microsoft.com/en-us/products/storage/data-lake-storage/) -/// is a scalable data storage system designed for big-data applications. -/// ADLS provides filesystem semantics, file-level security, and Hadoop -/// compatibility. Gen1 exists as a separate object that will retired -/// on Feb 29, 2024. New ADLS accounts will use Gen2 instead, which is -/// implemented on top of ABFS. +/// ADLS Gen2 and Blob APIs can operate on the same data, but there are +/// some limitations [3]. The ones that are relevant to this +/// implementation are listed here: /// -/// TODO: GH-18014 Complete the internal implementation -/// and review the documentation +/// - You can't use Blob APIs, and ADLS APIs to write to the same instance of a file. If +/// you write to a file by using ADLS APIs then that file's blocks won't be visible +/// to calls to the GetBlockList Blob API. The only exception is when you're +/// overwriting. +/// - When you use the ListBlobs operation without specifying a delimiter, the results +/// include both directories and blobs. If you choose to use a delimiter, use only a +/// forward slash (/) -- the only supported delimiter. +/// - If you use the DeleteBlob API to delete a directory, that directory is deleted only +/// if it's empty. This means that you can't use the Blob API delete directories +/// recursively. +/// +/// [1]: https://azure.microsoft.com/en-us/products/storage/blobs +/// [2]: https://azure.microsoft.com/en-us/products/storage/data-lake-storage +/// [3]: +/// https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-known-issues class ARROW_EXPORT AzureFileSystem : public FileSystem { public: ~AzureFileSystem() override = default; From 2a13efc61df83cecc8aa9d0ecf1406286461ede9 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Fri, 8 Dec 2023 21:46:25 -0300 Subject: [PATCH 03/26] Change AzureOptions towards how it should work --- cpp/src/arrow/filesystem/azurefs.cc | 85 ++++++++++++++++------ cpp/src/arrow/filesystem/azurefs.h | 91 ++++++++++++------------ cpp/src/arrow/filesystem/azurefs_test.cc | 14 ++-- 3 files changed, 116 insertions(+), 74 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 2455ebfc1e8..914cc01a4ae 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -33,37 +33,81 @@ #include "arrow/util/logging.h" #include "arrow/util/string.h" -namespace arrow { -namespace fs { +namespace arrow::fs { + +namespace Blobs = Azure::Storage::Blobs; +namespace Core = Azure::Core; +namespace DataLake = Azure::Storage::Files::DataLake; +namespace Storage = Azure::Storage; // ----------------------------------------------------------------------- // AzureOptions Implementation AzureOptions::AzureOptions() = default; +AzureOptions::~AzureOptions() = default; + bool AzureOptions::Equals(const AzureOptions& other) const { - return (account_dfs_url == other.account_dfs_url && - account_blob_url == other.account_blob_url && - credentials_kind == other.credentials_kind && - default_metadata == other.default_metadata); + // TODO(GH-38598): update here when more auth methods are added. + const bool ret = backend == other.backend && + default_metadata == other.default_metadata && + account_blob_url_ == other.account_blob_url_ && + account_dfs_url_ == other.account_dfs_url_ && + credential_kind_ == other.credential_kind_; + if (!ret) { + return false; + } + switch (credential_kind_) { + case CredentialKind::kAnonymous: + return true; + case CredentialKind::kStorageSharedKeyCredential: + return storage_shared_key_credential_->AccountName == + other.storage_shared_key_credential_->AccountName; + } + DCHECK(false); + return false; } -Status AzureOptions::ConfigureAccountKeyCredentials(const std::string& account_name, - const std::string& account_key) { - if (this->backend == AzureBackend::Azurite) { - account_blob_url = "http://127.0.0.1:10000/" + account_name + "/"; - account_dfs_url = "http://127.0.0.1:10000/" + account_name + "/"; +Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_name, + const std::string& account_key) { + if (this->backend == AzureBackend::kAzurite) { + account_blob_url_ = "http://127.0.0.1:10000/" + account_name + "/"; + account_dfs_url_ = "http://127.0.0.1:10000/" + account_name + "/"; } else { - account_dfs_url = "https://" + account_name + ".dfs.core.windows.net/"; - account_blob_url = "https://" + account_name + ".blob.core.windows.net/"; + account_dfs_url_ = "https://" + account_name + ".dfs.core.windows.net/"; + account_blob_url_ = "https://" + account_name + ".blob.core.windows.net/"; } - storage_credentials_provider = - std::make_shared(account_name, - account_key); - credentials_kind = AzureCredentialsKind::StorageCredentials; + credential_kind_ = CredentialKind::kStorageSharedKeyCredential; + storage_shared_key_credential_ = + std::make_shared(account_name, account_key); return Status::OK(); } +std::unique_ptr AzureOptions::MakeBlobServiceClient() const { + switch (credential_kind_) { + case CredentialKind::kAnonymous: + break; + case CredentialKind::kStorageSharedKeyCredential: + return std::make_unique(account_blob_url_, + storage_shared_key_credential_); + } + DCHECK(false) << "AzureOptions doesn't contain a valid auth configuration"; + return nullptr; +} + +std::unique_ptr AzureOptions::MakeDataLakeServiceClient() + const { + switch (credential_kind_) { + case CredentialKind::kAnonymous: + break; + case CredentialKind::kStorageSharedKeyCredential: + return std::make_unique( + account_dfs_url_, storage_shared_key_credential_); + } + DCHECK(false) << "AzureOptions doesn't contain a valid auth configuration"; + return nullptr; +} + namespace { // An AzureFileSystem represents an Azure storage account. An AzureLocation describes a @@ -718,11 +762,8 @@ class AzureFileSystem::Impl { : io_context_(io_context), options_(std::move(options)) {} Status Init() { - blob_service_client_ = std::make_unique( - options_.account_blob_url, options_.storage_credentials_provider); - datalake_service_client_ = - std::make_unique( - options_.account_dfs_url, options_.storage_credentials_provider); + blob_service_client_ = options_.MakeBlobServiceClient(); + datalake_service_client_ = options_.MakeDataLakeServiceClient(); return hierarchical_namespace_.Init(datalake_service_client_.get()); } diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index f189d89ffcb..5ecf932046b 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -25,69 +25,73 @@ #include "arrow/util/macros.h" #include "arrow/util/uri.h" -namespace Azure { -namespace Core { -namespace Credentials { - +namespace Azure::Core::Credentials { class TokenCredential; +} -} // namespace Credentials -} // namespace Core -namespace Storage { - +namespace Azure::Storage { class StorageSharedKeyCredential; +} -} // namespace Storage -} // namespace Azure - -namespace arrow { -namespace fs { - -enum class AzureCredentialsKind : int8_t { - /// Anonymous access (no credentials used), public - Anonymous, - /// Use explicitly-provided access key pair - StorageCredentials, - /// Use ServicePrincipleCredentials - ServicePrincipleCredentials, - /// Use Sas Token to authenticate - Sas, - /// Use Connection String - ConnectionString -}; +namespace Azure::Storage::Blobs { +class BlobServiceClient; +} + +namespace Azure::Storage::Files::DataLake { +class DataLakeServiceClient; +} + +namespace arrow::fs { enum class AzureBackend : bool { - /// Official Azure Remote Backend - Azure, - /// Local Simulated Storage - Azurite + /// \brief Official Azure Remote Backend + kAzure, + /// \brief Local Simulated Storage + kAzurite }; /// Options for the AzureFileSystem implementation. struct ARROW_EXPORT AzureOptions { - std::string account_dfs_url; - std::string account_blob_url; - AzureBackend backend = AzureBackend::Azure; - AzureCredentialsKind credentials_kind = AzureCredentialsKind::Anonymous; + /// \brief The backend to connect to: Azure or Azurite (for testing). + AzureBackend backend = AzureBackend::kAzure; - std::string sas_token; - std::string connection_string; - std::shared_ptr - storage_credentials_provider; - std::shared_ptr - service_principle_credentials_provider; + // TODO(GH-38598): Add support for more auth methods. + // std::string connection_string; + // std::string sas_token; /// \brief Default metadata for OpenOutputStream. /// /// This will be ignored if non-empty metadata is passed to OpenOutputStream. std::shared_ptr default_metadata; + private: + std::string account_blob_url_; + std::string account_dfs_url_; + + enum class CredentialKind { + kAnonymous, + kStorageSharedKeyCredential, + } credential_kind_ = CredentialKind::kAnonymous; + + std::shared_ptr + storage_shared_key_credential_; + + public: AzureOptions(); + ~AzureOptions(); - Status ConfigureAccountKeyCredentials(const std::string& account_name, - const std::string& account_key); + Status ConfigureAccountKeyCredential(const std::string& account_name, + const std::string& account_key); bool Equals(const AzureOptions& other) const; + + const std::string& AccountBlobUrl() const { return account_blob_url_; } + const std::string& AccountDfsUrl() const { return account_dfs_url_; } + + std::unique_ptr MakeBlobServiceClient() const; + + std::unique_ptr + MakeDataLakeServiceClient() const; }; /// \brief FileSystem implementation backed by Azure Blob Storage (ABS) [1] and @@ -177,5 +181,4 @@ class ARROW_EXPORT AzureFileSystem : public FileSystem { std::unique_ptr impl_; }; -} // namespace fs -} // namespace arrow +} // namespace arrow::fs diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 792c63b2094..3bab50e23bc 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -217,10 +217,8 @@ class AzureFileSystemTest : public ::testing::Test { } // Stop-gap solution before GH-39119 is fixed. container_name_ = "z" + RandomChars(31); - blob_service_client_ = std::make_unique( - options_.account_blob_url, options_.storage_credentials_provider); - datalake_service_client_ = std::make_unique( - options_.account_dfs_url, options_.storage_credentials_provider); + blob_service_client_ = options_.MakeBlobServiceClient(); + datalake_service_client_ = options_.MakeDataLakeServiceClient(); ASSERT_OK_AND_ASSIGN(fs_, AzureFileSystem::Make(options_)); auto container_client = CreateContainer(container_name_); @@ -388,8 +386,8 @@ class AzuriteFileSystemTest : public AzureFileSystemTest { ARROW_EXPECT_OK(GetAzuriteEnv()->status()); ARROW_ASSIGN_OR_RAISE(debug_log_start_, GetAzuriteEnv()->GetDebugLogSize()); AzureOptions options; - options.backend = AzureBackend::Azurite; - ARROW_EXPECT_OK(options.ConfigureAccountKeyCredentials( + options.backend = AzureBackend::kAzurite; + ARROW_EXPECT_OK(options.ConfigureAccountKeyCredential( GetAzuriteEnv()->account_name(), GetAzuriteEnv()->account_key())); return options; } @@ -413,7 +411,7 @@ class AzureFlatNamespaceFileSystemTest : public AzureFileSystemTest { const auto account_key = std::getenv("AZURE_FLAT_NAMESPACE_ACCOUNT_KEY"); const auto account_name = std::getenv("AZURE_FLAT_NAMESPACE_ACCOUNT_NAME"); if (account_key && account_name) { - RETURN_NOT_OK(options.ConfigureAccountKeyCredentials(account_name, account_key)); + RETURN_NOT_OK(options.ConfigureAccountKeyCredential(account_name, account_key)); return options; } return Status::Cancelled( @@ -444,7 +442,7 @@ class AzureHierarchicalNamespaceFileSystemTest : public AzureFileSystemTest { const auto account_key = std::getenv("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_KEY"); const auto account_name = std::getenv("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_NAME"); if (account_key && account_name) { - RETURN_NOT_OK(options.ConfigureAccountKeyCredentials(account_name, account_key)); + RETURN_NOT_OK(options.ConfigureAccountKeyCredential(account_name, account_key)); return options; } return Status::Cancelled( From fe9a92311f4ae9bbbf8835005cc86fdde0c8462e Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Fri, 8 Dec 2023 21:58:01 -0300 Subject: [PATCH 04/26] Use namespace aliases to shorten the references to Azure:: types --- cpp/src/arrow/filesystem/azurefs.cc | 217 +++++++++++------------ cpp/src/arrow/filesystem/azurefs_test.cc | 13 +- 2 files changed, 113 insertions(+), 117 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 914cc01a4ae..045ebbf4083 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -195,7 +195,7 @@ Status ValidateFileLocation(const AzureLocation& location) { return internal::AssertNoTrailingSlash(location.path); } -std::string_view BodyTextView(const Azure::Core::Http::RawResponse& raw_response) { +std::string_view BodyTextView(const Core::Http::RawResponse& raw_response) { const auto& body = raw_response.GetBody(); #ifndef NDEBUG auto& headers = raw_response.GetHeaders(); @@ -208,7 +208,7 @@ std::string_view BodyTextView(const Azure::Core::Http::RawResponse& raw_response } Status StatusFromErrorResponse(const std::string& url, - const Azure::Core::Http::RawResponse& raw_response, + const Core::Http::RawResponse& raw_response, const std::string& context) { // There isn't an Azure specification that response body on error // doesn't contain any binary data but we assume it. We hope that @@ -219,9 +219,9 @@ Status StatusFromErrorResponse(const std::string& url, "): ", body_text); } -bool IsContainerNotFound(const Azure::Storage::StorageException& exception) { +bool IsContainerNotFound(const Storage::StorageException& exception) { if (exception.ErrorCode == "ContainerNotFound") { - DCHECK_EQ(exception.StatusCode, Azure::Core::Http::HttpStatusCode::NotFound); + DCHECK_EQ(exception.StatusCode, Core::Http::HttpStatusCode::NotFound); return true; } return false; @@ -242,7 +242,7 @@ std::string FormatValue(typename TypeTraits::CType value) { } std::shared_ptr PropertiesToMetadata( - const Azure::Storage::Blobs::Models::BlobProperties& properties) { + const Blobs::Models::BlobProperties& properties) { auto metadata = std::make_shared(); // Not supported yet: // * properties.ObjectReplicationSourceProperties @@ -373,7 +373,7 @@ std::shared_ptr PropertiesToMetadata( class ObjectInputFile final : public io::RandomAccessFile { public: - ObjectInputFile(std::shared_ptr blob_client, + ObjectInputFile(std::shared_ptr blob_client, const io::IOContext& io_context, AzureLocation location, int64_t size = kNoSize) : blob_client_(std::move(blob_client)), @@ -391,8 +391,8 @@ class ObjectInputFile final : public io::RandomAccessFile { content_length_ = properties.Value.BlobSize; metadata_ = PropertiesToMetadata(properties.Value); return Status::OK(); - } catch (const Azure::Storage::StorageException& exception) { - if (exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) { + } catch (const Storage::StorageException& exception) { + if (exception.StatusCode == Core::Http::HttpStatusCode::NotFound) { return PathNotFound(location_); } return internal::ExceptionToStatus( @@ -468,14 +468,14 @@ class ObjectInputFile final : public io::RandomAccessFile { } // Read the desired range of bytes - Azure::Core::Http::HttpRange range{position, nbytes}; - Azure::Storage::Blobs::DownloadBlobToOptions download_options; + Core::Http::HttpRange range{position, nbytes}; + Storage::Blobs::DownloadBlobToOptions download_options; download_options.Range = range; try { return blob_client_ ->DownloadTo(reinterpret_cast(out), nbytes, download_options) .Value.ContentRange.Length.Value(); - } catch (const Azure::Storage::StorageException& exception) { + } catch (const Storage::StorageException& exception) { return internal::ExceptionToStatus("DownloadTo from '" + blob_client_->GetUrl() + "' at position " + std::to_string(position) + " for " + std::to_string(nbytes) + @@ -516,7 +516,7 @@ class ObjectInputFile final : public io::RandomAccessFile { } private: - std::shared_ptr blob_client_; + std::shared_ptr blob_client_; const io::IOContext io_context_; AzureLocation location_; @@ -526,11 +526,10 @@ class ObjectInputFile final : public io::RandomAccessFile { std::shared_ptr metadata_; }; -Status CreateEmptyBlockBlob( - std::shared_ptr block_blob_client) { +Status CreateEmptyBlockBlob(std::shared_ptr block_blob_client) { try { block_blob_client->UploadFrom(nullptr, 0); - } catch (const Azure::Storage::StorageException& exception) { + } catch (const Storage::StorageException& exception) { return internal::ExceptionToStatus( "UploadFrom failed for '" + block_blob_client->GetUrl() + "' with an unexpected Azure error. There is no existing blob at this " @@ -541,11 +540,11 @@ Status CreateEmptyBlockBlob( return Status::OK(); } -Result GetBlockList( - std::shared_ptr block_blob_client) { +Result GetBlockList( + std::shared_ptr block_blob_client) { try { return block_blob_client->GetBlockList().Value; - } catch (Azure::Storage::StorageException& exception) { + } catch (Storage::StorageException& exception) { return internal::ExceptionToStatus( "GetBlockList failed for '" + block_blob_client->GetUrl() + "' with an unexpected Azure error. Cannot write to a file without first " @@ -554,19 +553,19 @@ Result GetBlockList( } } -Azure::Storage::Metadata ArrowMetadataToAzureMetadata( +Storage::Metadata ArrowMetadataToAzureMetadata( const std::shared_ptr& arrow_metadata) { - Azure::Storage::Metadata azure_metadata; + Storage::Metadata azure_metadata; for (auto key_value : arrow_metadata->sorted_pairs()) { azure_metadata[key_value.first] = key_value.second; } return azure_metadata; } -Status CommitBlockList( - std::shared_ptr block_blob_client, - const std::vector& block_ids, const Azure::Storage::Metadata& metadata) { - Azure::Storage::Blobs::CommitBlockListOptions options; +Status CommitBlockList(std::shared_ptr block_blob_client, + const std::vector& block_ids, + const Storage::Metadata& metadata) { + Blobs::CommitBlockListOptions options; options.Metadata = metadata; try { // CommitBlockList puts all block_ids in the latest element. That means in the case of @@ -574,7 +573,7 @@ Status CommitBlockList( // previously committed blocks. // https://learn.microsoft.com/en-us/rest/api/storageservices/put-block-list?tabs=microsoft-entra-id#request-body block_blob_client->CommitBlockList(block_ids, options); - } catch (const Azure::Storage::StorageException& exception) { + } catch (const Storage::StorageException& exception) { return internal::ExceptionToStatus( "CommitBlockList failed for '" + block_blob_client->GetUrl() + "' with an unexpected Azure error. Committing is required to flush an " @@ -586,11 +585,10 @@ Status CommitBlockList( class ObjectAppendStream final : public io::OutputStream { public: - ObjectAppendStream( - std::shared_ptr block_blob_client, - const io::IOContext& io_context, const AzureLocation& location, - const std::shared_ptr& metadata, - const AzureOptions& options, int64_t size = kNoSize) + ObjectAppendStream(std::shared_ptr block_blob_client, + const io::IOContext& io_context, const AzureLocation& location, + const std::shared_ptr& metadata, + const AzureOptions& options, int64_t size = kNoSize) : block_blob_client_(std::move(block_blob_client)), io_context_(io_context), location_(location), @@ -617,8 +615,8 @@ class ObjectAppendStream final : public io::OutputStream { auto properties = block_blob_client_->GetProperties(); content_length_ = properties.Value.BlobSize; pos_ = content_length_; - } catch (const Azure::Storage::StorageException& exception) { - if (exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) { + } catch (const Storage::StorageException& exception) { + if (exception.StatusCode == Core::Http::HttpStatusCode::NotFound) { RETURN_NOT_OK(CreateEmptyBlockBlob(block_blob_client_)); } else { return internal::ExceptionToStatus( @@ -691,7 +689,7 @@ class ObjectAppendStream final : public io::OutputStream { std::shared_ptr owned_buffer = nullptr) { RETURN_NOT_OK(CheckClosed("append")); auto append_data = reinterpret_cast(data); - Azure::Core::IO::MemoryBodyStream block_content(append_data, nbytes); + Core::IO::MemoryBodyStream block_content(append_data, nbytes); if (block_content.Length() == 0) { return Status::OK(); } @@ -714,12 +712,12 @@ class ObjectAppendStream final : public io::OutputStream { // if the blob was previously created with one block, with id `00001-arrow` then the // next block we append will conflict with that, and cause corruption. new_block_id += "-arrow"; - new_block_id = Azure::Core::Convert::Base64Encode( + new_block_id = Core::Convert::Base64Encode( std::vector(new_block_id.begin(), new_block_id.end())); try { block_blob_client_->StageBlock(new_block_id, block_content); - } catch (const Azure::Storage::StorageException& exception) { + } catch (const Storage::StorageException& exception) { return internal::ExceptionToStatus( "StageBlock failed for '" + block_blob_client_->GetUrl() + "' new_block_id: '" + new_block_id + @@ -733,7 +731,7 @@ class ObjectAppendStream final : public io::OutputStream { return Status::OK(); } - std::shared_ptr block_blob_client_; + std::shared_ptr block_blob_client_; const io::IOContext io_context_; const AzureLocation location_; @@ -741,7 +739,7 @@ class ObjectAppendStream final : public io::OutputStream { int64_t pos_ = 0; int64_t content_length_ = kNoSize; std::vector block_ids_; - Azure::Storage::Metadata metadata_; + Storage::Metadata metadata_; }; } // namespace @@ -752,9 +750,9 @@ class ObjectAppendStream final : public io::OutputStream { class AzureFileSystem::Impl { public: io::IOContext io_context_; - std::unique_ptr + std::unique_ptr datalake_service_client_; - std::unique_ptr blob_service_client_; + std::unique_ptr blob_service_client_; AzureOptions options_; internal::HierarchicalNamespaceDetector hierarchical_namespace_; @@ -792,7 +790,7 @@ class AzureFileSystem::Impl { info.set_mtime( std::chrono::system_clock::time_point{properties.Value.LastModified}); return info; - } catch (const Azure::Storage::StorageException& exception) { + } catch (const Storage::StorageException& exception) { if (IsContainerNotFound(exception)) { info.set_type(FileType::NotFound); return info; @@ -816,6 +814,8 @@ class AzureFileSystem::Impl { // For a path with a trailing slash a hierarchical namespace may return a blob // with that trailing slash removed. For consistency with flat namespace and // other filesystems we chose to return NotFound. + // + // NOTE(felipecrv): could this be an empty directory marker? info.set_type(FileType::NotFound); return info; } else { @@ -825,8 +825,8 @@ class AzureFileSystem::Impl { info.set_mtime( std::chrono::system_clock::time_point{properties.Value.LastModified}); return info; - } catch (const Azure::Storage::StorageException& exception) { - if (exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) { + } catch (const Storage::StorageException& exception) { + if (exception.StatusCode == Core::Http::HttpStatusCode::NotFound) { ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled, hierarchical_namespace_.Enabled(location.container)); if (hierarchical_namespace_enabled) { @@ -837,7 +837,7 @@ class AzureFileSystem::Impl { } // On flat namespace accounts there are no real directories. Directories are only // implied by using `/` in the blob name. - Azure::Storage::Blobs::ListBlobsOptions list_blob_options; + Blobs::ListBlobsOptions list_blob_options; // If listing the prefix `path.path_to_file` with trailing slash returns at least // one result then `path` refers to an implied directory. list_blob_options.Prefix = internal::EnsureTrailingSlash(location.path); @@ -853,7 +853,7 @@ class AzureFileSystem::Impl { : FileType::NotFound; info.set_type(file_type); return info; - } catch (const Azure::Storage::StorageException& exception) { + } catch (const Storage::StorageException& exception) { return internal::ExceptionToStatus( "ListBlobs for '" + *list_blob_options.Prefix + "' failed with an unexpected Azure error. GetFileInfo is unable to " @@ -871,9 +871,8 @@ class AzureFileSystem::Impl { private: template - Status VisitContainers(const Azure::Core::Context& context, - OnContainer&& on_container) const { - Azure::Storage::Blobs::ListBlobContainersOptions options; + Status VisitContainers(const Core::Context& context, OnContainer&& on_container) const { + Blobs::ListBlobContainersOptions options; try { auto container_list_response = blob_service_client_->ListBlobContainers(options, context); @@ -883,14 +882,14 @@ class AzureFileSystem::Impl { RETURN_NOT_OK(on_container(container)); } } - } catch (const Azure::Storage::StorageException& exception) { + } catch (const Storage::StorageException& exception) { return internal::ExceptionToStatus("Failed to list account containers.", exception); } return Status::OK(); } static FileInfo FileInfoFromBlob(std::string_view container, - const Azure::Storage::Blobs::Models::BlobItem& blob) { + const Blobs::Models::BlobItem& blob) { auto path = internal::ConcatAbstractPath(container, blob.Name); if (internal::HasTrailingSlash(blob.Name)) { return DirectoryFileInfoFromPath(path); @@ -919,13 +918,13 @@ class AzureFileSystem::Impl { /// \pre container_client is the client for the container named like the first /// segment of select.base_dir. Status GetFileInfoWithSelectorFromContainer( - const Azure::Storage::Blobs::BlobContainerClient& container_client, - const Azure::Core::Context& context, Azure::Nullable page_size_hint, - const FileSelector& select, FileInfoVector* acc_results) { + const Blobs::BlobContainerClient& container_client, const Core::Context& context, + Azure::Nullable page_size_hint, const FileSelector& select, + FileInfoVector* acc_results) { ARROW_ASSIGN_OR_RAISE(auto base_location, AzureLocation::FromString(select.base_dir)); bool found = false; - Azure::Storage::Blobs::ListBlobsOptions options; + Blobs::ListBlobsOptions options; if (internal::IsEmptyPath(base_location.path)) { // If the base_dir is the root of the container, then we want to list all blobs in // the container and the Prefix should be empty and not even include the trailing @@ -936,7 +935,7 @@ class AzureFileSystem::Impl { options.Prefix = internal::EnsureTrailingSlash(base_location.path); } options.PageSizeHint = page_size_hint; - options.Include = Azure::Storage::Blobs::Models::ListBlobsIncludeFlags::Metadata; + options.Include = Blobs::Models::ListBlobsIncludeFlags::Metadata; auto recurse = [&](const std::string& blob_prefix) noexcept -> Status { if (select.recursive && select.max_recursion > 0) { @@ -952,15 +951,14 @@ class AzureFileSystem::Impl { return Status::OK(); }; - auto process_blob = - [&](const Azure::Storage::Blobs::Models::BlobItem& blob) noexcept { - // blob.Name has trailing slash only when Prefix is an empty - // directory marker blob for the directory we're listing - // from, and we should skip it. - if (!internal::HasTrailingSlash(blob.Name)) { - acc_results->push_back(FileInfoFromBlob(base_location.container, blob)); - } - }; + auto process_blob = [&](const Blobs::Models::BlobItem& blob) noexcept { + // blob.Name has trailing slash only when Prefix is an empty + // directory marker blob for the directory we're listing + // from, and we should skip it. + if (!internal::HasTrailingSlash(blob.Name)) { + acc_results->push_back(FileInfoFromBlob(base_location.container, blob)); + } + }; auto process_prefix = [&](const std::string& prefix) noexcept -> Status { const auto path = internal::ConcatAbstractPath(base_location.container, prefix); acc_results->push_back(DirectoryFileInfoFromPath(path)); @@ -1013,7 +1011,7 @@ class AzureFileSystem::Impl { RETURN_NOT_OK(process_prefix(list_response.BlobPrefixes[blob_prefix_index])); } } - } catch (const Azure::Storage::StorageException& exception) { + } catch (const Storage::StorageException& exception) { if (IsContainerNotFound(exception)) { found = false; } else { @@ -1030,7 +1028,7 @@ class AzureFileSystem::Impl { } public: - Status GetFileInfoWithSelector(const Azure::Core::Context& context, + Status GetFileInfoWithSelector(const Core::Context& context, Azure::Nullable page_size_hint, const FileSelector& select, FileInfoVector* acc_results) { @@ -1040,29 +1038,28 @@ class AzureFileSystem::Impl { // Without a container, the base_location is equivalent to the filesystem // root -- `/`. FileSelector::allow_not_found doesn't matter in this case // because the root always exists. - auto on_container = - [&](const Azure::Storage::Blobs::Models::BlobContainerItem& container) { - // Deleted containers are not listed by ListContainers. - DCHECK(!container.IsDeleted); - - // Every container is considered a directory. - FileInfo info{container.Name, FileType::Directory}; - info.set_mtime( - std::chrono::system_clock::time_point{container.Details.LastModified}); - acc_results->push_back(std::move(info)); - - // Recurse into containers (subdirectories) if requested. - if (select.recursive && select.max_recursion > 0) { - FileSelector sub_select; - sub_select.base_dir = container.Name; - sub_select.allow_not_found = true; - sub_select.recursive = true; - sub_select.max_recursion = select.max_recursion - 1; - ARROW_RETURN_NOT_OK(GetFileInfoWithSelector(context, page_size_hint, - sub_select, acc_results)); - } - return Status::OK(); - }; + auto on_container = [&](const Blobs::Models::BlobContainerItem& container) { + // Deleted containers are not listed by ListContainers. + DCHECK(!container.IsDeleted); + + // Every container is considered a directory. + FileInfo info{container.Name, FileType::Directory}; + info.set_mtime( + std::chrono::system_clock::time_point{container.Details.LastModified}); + acc_results->push_back(std::move(info)); + + // Recurse into containers (subdirectories) if requested. + if (select.recursive && select.max_recursion > 0) { + FileSelector sub_select; + sub_select.base_dir = container.Name; + sub_select.allow_not_found = true; + sub_select.recursive = true; + sub_select.max_recursion = select.max_recursion - 1; + ARROW_RETURN_NOT_OK( + GetFileInfoWithSelector(context, page_size_hint, sub_select, acc_results)); + } + return Status::OK(); + }; return VisitContainers(context, std::move(on_container)); } @@ -1075,7 +1072,7 @@ class AzureFileSystem::Impl { Result> OpenInputFile(const AzureLocation& location, AzureFileSystem* fs) { RETURN_NOT_OK(ValidateFileLocation(location)); - auto blob_client = std::make_shared( + auto blob_client = std::make_shared( blob_service_client_->GetBlobContainerClient(location.container) .GetBlobClient(location.path)); @@ -1095,7 +1092,7 @@ class AzureFileSystem::Impl { } ARROW_ASSIGN_OR_RAISE(auto location, AzureLocation::FromString(info.path())); RETURN_NOT_OK(ValidateFileLocation(location)); - auto blob_client = std::make_shared( + auto blob_client = std::make_shared( blob_service_client_->GetBlobContainerClient(location.container) .GetBlobClient(location.path)); @@ -1122,7 +1119,7 @@ class AzureFileSystem::Impl { container_client.GetUrl(), *response.RawResponse, "Failed to create a container: " + location.container); } - } catch (const Azure::Storage::StorageException& exception) { + } catch (const Storage::StorageException& exception) { return internal::ExceptionToStatus( "Failed to create a container: " + location.container + ": " + container_client.GetUrl(), @@ -1150,7 +1147,7 @@ class AzureFileSystem::Impl { return StatusFromErrorResponse(directory_client.GetUrl(), *response.RawResponse, "Failed to create a directory: " + location.path); } - } catch (const Azure::Storage::StorageException& exception) { + } catch (const Storage::StorageException& exception) { return internal::ExceptionToStatus( "Failed to create a directory: " + location.path + ": " + directory_client.GetUrl(), @@ -1167,7 +1164,7 @@ class AzureFileSystem::Impl { blob_service_client_->GetBlobContainerClient(location.container); try { container_client.CreateIfNotExists(); - } catch (const Azure::Storage::StorageException& exception) { + } catch (const Storage::StorageException& exception) { return internal::ExceptionToStatus( "Failed to create a container: " + location.container + " (" + container_client.GetUrl() + ")", @@ -1189,7 +1186,7 @@ class AzureFileSystem::Impl { .GetDirectoryClient(location.path); try { directory_client.CreateIfNotExists(); - } catch (const Azure::Storage::StorageException& exception) { + } catch (const Storage::StorageException& exception) { return internal::ExceptionToStatus( "Failed to create a directory: " + location.path + " (" + directory_client.GetUrl() + ")", @@ -1206,7 +1203,7 @@ class AzureFileSystem::Impl { AzureFileSystem* fs) { RETURN_NOT_OK(ValidateFileLocation(location)); - auto block_blob_client = std::make_shared( + auto block_blob_client = std::make_shared( blob_service_client_->GetBlobContainerClient(location.container) .GetBlockBlobClient(location.path)); @@ -1228,7 +1225,7 @@ class AzureFileSystem::Impl { bool missing_dir_ok) { auto container_client = blob_service_client_->GetBlobContainerClient(location.container); - Azure::Storage::Blobs::ListBlobsOptions options; + Blobs::ListBlobsOptions options; if (!location.path.empty()) { options.Prefix = internal::EnsureTrailingSlash(location.path); } @@ -1248,15 +1245,14 @@ class AzureFileSystem::Impl { continue; } auto batch = container_client.CreateBatch(); - std::vector> + std::vector> deferred_responses; for (const auto& blob_item : list_response.Blobs) { deferred_responses.push_back(batch.DeleteBlob(blob_item.Name)); } try { container_client.SubmitBatch(batch); - } catch (const Azure::Storage::StorageException& exception) { + } catch (const Storage::StorageException& exception) { return internal::ExceptionToStatus( "Failed to delete blobs in a directory: " + location.path + ": " + container_client.GetUrl(), @@ -1269,7 +1265,7 @@ class AzureFileSystem::Impl { try { auto delete_result = deferred_response.GetResponse(); success = delete_result.Value.Deleted; - } catch (const Azure::Storage::StorageException& exception) { + } catch (const Storage::StorageException& exception) { success = false; } if (!success) { @@ -1288,7 +1284,7 @@ class AzureFileSystem::Impl { } } } - } catch (const Azure::Storage::StorageException& exception) { + } catch (const Storage::StorageException& exception) { return internal::ExceptionToStatus( "Failed to list blobs in a directory: " + location.path + ": " + container_client.GetUrl(), @@ -1315,7 +1311,7 @@ class AzureFileSystem::Impl { container_client.GetUrl(), *response.RawResponse, "Failed to delete a container: " + location.container); } - } catch (const Azure::Storage::StorageException& exception) { + } catch (const Storage::StorageException& exception) { return internal::ExceptionToStatus( "Failed to delete a container: " + location.container + ": " + container_client.GetUrl(), @@ -1338,7 +1334,7 @@ class AzureFileSystem::Impl { directory_client.GetUrl(), *response.RawResponse, "Failed to delete a directory: " + location.path); } - } catch (const Azure::Storage::StorageException& exception) { + } catch (const Storage::StorageException& exception) { return internal::ExceptionToStatus( "Failed to delete a directory: " + location.path + ": " + directory_client.GetUrl(), @@ -1370,7 +1366,7 @@ class AzureFileSystem::Impl { file_system_client.GetDirectoryClient(path.Name); try { sub_directory_client.DeleteRecursive(); - } catch (const Azure::Storage::StorageException& exception) { + } catch (const Storage::StorageException& exception) { return internal::ExceptionToStatus( "Failed to delete a sub directory: " + location.container + internal::kSep + path.Name + ": " + sub_directory_client.GetUrl(), @@ -1380,7 +1376,7 @@ class AzureFileSystem::Impl { auto sub_file_client = file_system_client.GetFileClient(path.Name); try { sub_file_client.Delete(); - } catch (const Azure::Storage::StorageException& exception) { + } catch (const Storage::StorageException& exception) { return internal::ExceptionToStatus( "Failed to delete a sub file: " + location.container + internal::kSep + path.Name + ": " + sub_file_client.GetUrl(), @@ -1389,9 +1385,9 @@ class AzureFileSystem::Impl { } } } - } catch (const Azure::Storage::StorageException& exception) { + } catch (const Storage::StorageException& exception) { if (missing_dir_ok && - exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) { + exception.StatusCode == Core::Http::HttpStatusCode::NotFound) { return Status::OK(); } else { return internal::ExceptionToStatus( @@ -1419,7 +1415,7 @@ class AzureFileSystem::Impl { .GetUrl(); try { dest_blob_client.CopyFromUri(src_url); - } catch (const Azure::Storage::StorageException& exception) { + } catch (const Storage::StorageException& exception) { return internal::ExceptionToStatus( "Failed to copy a blob. (" + src_url + " -> " + dest_blob_client.GetUrl() + ")", exception); @@ -1447,7 +1443,7 @@ Result AzureFileSystem::GetFileInfo(const std::string& path) { } Result AzureFileSystem::GetFileInfo(const FileSelector& select) { - Azure::Core::Context context; + Core::Context context; Azure::Nullable page_size_hint; // unspecified FileInfoVector results; RETURN_NOT_OK( @@ -1539,5 +1535,4 @@ AzureFileSystem::AzureFileSystem(const AzureOptions& options, default_async_is_sync_ = false; } -} // namespace fs -} // namespace arrow +} // namespace arrow::fs diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 3bab50e23bc..b5e0434849e 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -71,7 +71,8 @@ using ::testing::Not; using ::testing::NotNull; namespace Blobs = Azure::Storage::Blobs; -namespace Files = Azure::Storage::Files; +namespace Core = Azure::Core; +namespace DataLake = Azure::Storage::Files::DataLake; auto const* kLoremIpsum = R"""( Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor @@ -87,8 +88,8 @@ class AzuriteEnv : public ::testing::Environment { AzuriteEnv() { account_name_ = "devstoreaccount1"; account_key_ = - "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/" - "KBHBeksoGMGw=="; + "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/" + "K1SZFPTOtr/KBHBeksoGMGw=="; auto exe_path = bp::search_path("azurite"); if (exe_path.empty()) { auto error = std::string("Could not find Azurite emulator."); @@ -197,7 +198,7 @@ class AzureFileSystemTest : public ::testing::Test { public: std::shared_ptr fs_; std::unique_ptr blob_service_client_; - std::unique_ptr datalake_service_client_; + std::unique_ptr datalake_service_client_; AzureOptions options_; std::mt19937_64 generator_; std::string container_name_; @@ -1200,7 +1201,7 @@ TEST_F(AzuriteFileSystemTest, TestWriteMetadata) { .GetBlockBlobClient(path) .GetProperties() .Value.Metadata; - EXPECT_EQ(Azure::Core::CaseInsensitiveMap{std::make_pair("foo", "bar")}, blob_metadata); + EXPECT_EQ(Core::CaseInsensitiveMap{std::make_pair("foo", "bar")}, blob_metadata); // Check that explicit metadata overrides the defaults. ASSERT_OK_AND_ASSIGN( @@ -1213,7 +1214,7 @@ TEST_F(AzuriteFileSystemTest, TestWriteMetadata) { .GetProperties() .Value.Metadata; // Defaults are overwritten and not merged. - EXPECT_EQ(Azure::Core::CaseInsensitiveMap{std::make_pair("bar", "foo")}, blob_metadata); + EXPECT_EQ(Core::CaseInsensitiveMap{std::make_pair("bar", "foo")}, blob_metadata); } TEST_F(AzuriteFileSystemTest, OpenOutputStreamSmall) { From aa485259d63c8f93ddd1ae1a0558985c392ea1c8 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Tue, 12 Dec 2023 20:12:45 -0300 Subject: [PATCH 05/26] Refactor the Azure FS tests --- cpp/src/arrow/filesystem/azurefs_test.cc | 762 ++++++++++++++--------- 1 file changed, 451 insertions(+), 311 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index b5e0434849e..84cc18a31d2 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -36,6 +36,7 @@ #include "arrow/filesystem/azurefs.h" #include "arrow/filesystem/azurefs_internal.h" +#include #include #include @@ -74,54 +75,110 @@ namespace Blobs = Azure::Storage::Blobs; namespace Core = Azure::Core; namespace DataLake = Azure::Storage::Files::DataLake; -auto const* kLoremIpsum = R"""( -Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor -incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis -nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. -Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu -fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in -culpa qui officia deserunt mollit anim id est laborum. -)"""; +class BaseAzureEnv : public ::testing::Environment { + protected: + std::string account_name_; + std::string account_key_; + + BaseAzureEnv(std::string account_name, std::string account_key) + : account_name_(std::move(account_name)), account_key_(std::move(account_key)) {} -class AzuriteEnv : public ::testing::Environment { public: - AzuriteEnv() { - account_name_ = "devstoreaccount1"; - account_key_ = - "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/" - "K1SZFPTOtr/KBHBeksoGMGw=="; - auto exe_path = bp::search_path("azurite"); - if (exe_path.empty()) { - auto error = std::string("Could not find Azurite emulator."); - status_ = Status::Invalid(error); - return; + ~BaseAzureEnv() override = default; + + const std::string& account_name() const { return account_name_; } + const std::string& account_key() const { return account_key_; } + + virtual AzureBackend backend() const = 0; + + virtual bool WithHierarchicalNamespace() const { return false; } + + virtual Result GetDebugLogSize() { return 0; } + virtual Status DumpDebugLog(int64_t position) { + return Status::NotImplemented("BaseAzureEnv::DumpDebugLog"); + } +}; + +template +class AzureEnvImpl : public BaseAzureEnv { + protected: + static Result MakeAndAddToGlobalTestEnvironment() { + ARROW_ASSIGN_OR_RAISE(auto instance, AzureEnvClass::Make()); + auto* heap_ptr = instance.release(); + ::testing::AddGlobalTestEnvironment(heap_ptr); + return heap_ptr; + } + + using BaseAzureEnv::BaseAzureEnv; + + static Result> MakeFromEnvVars( + const std::string& account_name_var, const std::string& account_key_var) { + const auto account_name = std::getenv(account_name_var.c_str()); + const auto account_key = std::getenv(account_key_var.c_str()); + if (!account_name) { + return Status::Cancelled(account_name_var + " not set."); } - auto temp_dir_ = *TemporaryDir::Make("azurefs-test-"); - auto debug_log_path_result = temp_dir_->path().Join("debug.log"); - if (!debug_log_path_result.ok()) { - status_ = debug_log_path_result.status(); - return; + if (!account_key) { + return Status::Cancelled(account_key_var + " not set."); } - debug_log_path_ = *debug_log_path_result; - server_process_ = - bp::child(boost::this_process::environment(), exe_path, "--silent", "--location", - temp_dir_->path().ToString(), "--debug", debug_log_path_.ToString()); - if (!(server_process_.valid() && server_process_.running())) { - auto error = "Could not start Azurite emulator."; - server_process_.terminate(); - server_process_.wait(); - status_ = Status::Invalid(error); - return; + return std::unique_ptr{new AzureEnvClass(account_name, account_key)}; + } + + public: + ~AzureEnvImpl() override = default; + + static Result GetInstance() { + static auto env = MakeAndAddToGlobalTestEnvironment(); + if (env.ok()) { + return env; } - status_ = Status::OK(); + return env.status(); } + AzureBackend backend() const final { return AzureEnvClass::kBackend; } +}; + +class AzuriteEnv : public AzureEnvImpl { + private: + std::unique_ptr temp_dir_; + arrow::internal::PlatformFilename debug_log_path_; + bp::child server_process_; + + using AzureEnvImpl::AzureEnvImpl; + + public: + static const AzureBackend kBackend = AzureBackend::kAzurite; + ~AzuriteEnv() override { server_process_.terminate(); server_process_.wait(); } - Result GetDebugLogSize() { + static Result> Make() { + auto self = std::unique_ptr( + new AzuriteEnv("devstoreaccount1", + "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/" + "K1SZFPTOtr/KBHBeksoGMGw==")); + auto exe_path = bp::search_path("azurite"); + if (exe_path.empty()) { + return Status::Invalid("Could not find Azurite emulator."); + } + ARROW_ASSIGN_OR_RAISE(self->temp_dir_, TemporaryDir::Make("azurefs-test-")); + ARROW_ASSIGN_OR_RAISE(self->debug_log_path_, + self->temp_dir_->path().Join("debug.log")); + auto server_process = bp::child( + boost::this_process::environment(), exe_path, "--silent", "--location", + self->temp_dir_->path().ToString(), "--debug", self->debug_log_path_.ToString()); + if (!server_process.valid() || !server_process.running()) { + server_process.terminate(); + server_process.wait(); + return Status::Invalid("Could not start Azurite emulator."); + } + self->server_process_ = std::move(server_process); + return self; + } + + Result GetDebugLogSize() override { ARROW_ASSIGN_OR_RAISE(auto exists, arrow::internal::FileExists(debug_log_path_)); if (!exists) { return 0; @@ -132,7 +189,7 @@ class AzuriteEnv : public ::testing::Environment { return arrow::internal::FileTell(file_descriptor.fd()); } - Status DumpDebugLog(int64_t position = 0) { + Status DumpDebugLog(int64_t position) override { ARROW_ASSIGN_OR_RAISE(auto exists, arrow::internal::FileExists(debug_log_path_)); if (!exists) { return Status::OK(); @@ -158,25 +215,39 @@ class AzuriteEnv : public ::testing::Environment { std::cerr << std::endl; return Status::OK(); } +}; - const std::string& account_name() const { return account_name_; } - const std::string& account_key() const { return account_key_; } - const Status status() const { return status_; } - +class AzureFlatNSEnv : public AzureEnvImpl { private: - std::string account_name_; - std::string account_key_; - bp::child server_process_; - Status status_; - std::unique_ptr temp_dir_; - arrow::internal::PlatformFilename debug_log_path_; + using AzureEnvImpl::AzureEnvImpl; + + public: + static const AzureBackend kBackend = AzureBackend::kAzure; + + ~AzureFlatNSEnv() override = default; + + static Result> Make() { + return MakeFromEnvVars("AZURE_FLAT_NAMESPACE_ACCOUNT_NAME", + "AZURE_FLAT_NAMESPACE_ACCOUNT_KEY"); + } }; -auto* azurite_env = ::testing::AddGlobalTestEnvironment(new AzuriteEnv); +class AzureHierarchicalNSEnv : public AzureEnvImpl { + private: + using AzureEnvImpl::AzureEnvImpl; + + public: + static const AzureBackend kBackend = AzureBackend::kAzure; -AzuriteEnv* GetAzuriteEnv() { - return ::arrow::internal::checked_cast(azurite_env); -} + ~AzureHierarchicalNSEnv() override = default; + + static Result> Make() { + return MakeFromEnvVars("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_NAME", + "AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_KEY"); + } + + bool WithHierarchicalNamespace() const final { return true; } +}; // Placeholder tests // TODO: GH-18014 Remove once a proper test is added @@ -194,42 +265,76 @@ TEST(AzureFileSystem, OptionsCompare) { EXPECT_TRUE(options.Equals(options)); } +auto const* kLoremIpsum = R"""( +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor +incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis +nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu +fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in +culpa qui officia deserunt mollit anim id est laborum. +)"""; + class AzureFileSystemTest : public ::testing::Test { - public: + protected: + // Set in constructor + std::mt19937_64 generator_; + + // Set in SetUp() + int64_t debug_log_start_ = 0; + bool set_up_succeeded_ = false; + AzureOptions options_; + std::shared_ptr fs_; std::unique_ptr blob_service_client_; std::unique_ptr datalake_service_client_; - AzureOptions options_; - std::mt19937_64 generator_; + + // Set in SetUpPreexistingData() std::string container_name_; - bool suite_skipped_ = false; + public: AzureFileSystemTest() : generator_(std::random_device()()) {} - virtual Result MakeOptions() = 0; + virtual Result GetAzureEnv() const = 0; + + static Result MakeOptions(BaseAzureEnv* env) { + AzureOptions options; + options.backend = env->backend(); + ARROW_EXPECT_OK( + options.ConfigureAccountKeyCredential(env->account_name(), env->account_key())); + return options; + } void SetUp() override { - auto options = MakeOptions(); - if (options.ok()) { - options_ = *options; + auto make_options = [this]() -> Result { + ARROW_ASSIGN_OR_RAISE(auto env, GetAzureEnv()); + EXPECT_THAT(env, NotNull()); + ARROW_ASSIGN_OR_RAISE(debug_log_start_, env->GetDebugLogSize()); + return MakeOptions(env); + }; + auto options_res = make_options(); + if (!options_res.ok() && options_res.status().IsCancelled()) { + GTEST_SKIP() << options_res.status().message(); } else { - suite_skipped_ = true; - GTEST_SKIP() << options.status().message(); + EXPECT_OK_AND_ASSIGN(options_, std::move(options_res)); } - // Stop-gap solution before GH-39119 is fixed. - container_name_ = "z" + RandomChars(31); + + ASSERT_OK_AND_ASSIGN(fs_, AzureFileSystem::Make(options_)); blob_service_client_ = options_.MakeBlobServiceClient(); datalake_service_client_ = options_.MakeDataLakeServiceClient(); - ASSERT_OK_AND_ASSIGN(fs_, AzureFileSystem::Make(options_)); - auto container_client = CreateContainer(container_name_); + set_up_succeeded_ = true; - auto blob_client = container_client.GetBlockBlobClient(PreexistingObjectName()); - blob_client.UploadFrom(reinterpret_cast(kLoremIpsum), - strlen(kLoremIpsum)); + SetUpPreexistingData(); + } + + void SetUpPreexistingData() { + // Stop-gap solution before GH-39119 is fixed. + container_name_ = "z" + RandomChars(31); + auto container_client = CreateContainer(container_name_); + CreateBlob(container_client, PreexistingObjectName(), kLoremIpsum); } void TearDown() override { - if (!suite_skipped_) { + if (set_up_succeeded_) { auto containers = blob_service_client_->ListBlobContainers(); for (auto container : containers.BlobContainers) { auto container_client = @@ -237,6 +342,13 @@ class AzureFileSystemTest : public ::testing::Test { container_client.DeleteIfExists(); } } + if (HasFailure()) { + // XXX: This may not include all logs in the target test because + // Azurite doesn't flush debug logs immediately... You may want + // to check the log manually... + EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv()); + ARROW_IGNORE_EXPR(env->DumpDebugLog(debug_log_start_)); + } } Blobs::BlobContainerClient CreateContainer(const std::string& name) { @@ -299,9 +411,6 @@ class AzureFileSystemTest : public ::testing::Test { ASSERT_OK(output->Close()); } - void RunGetFileInfoObjectWithNestedStructureTest(); - void RunGetFileInfoObjectTest(); - struct HierarchicalPaths { std::string container; std::string directory; @@ -379,120 +488,128 @@ class AzureFileSystemTest : public ::testing::Test { AssertFileInfo(infos[12], PreexistingContainerName(), FileType::Directory); AssertFileInfo(infos[13], PreexistingObjectPath(), FileType::File); } -}; -class AzuriteFileSystemTest : public AzureFileSystemTest { - Result MakeOptions() override { - EXPECT_THAT(GetAzuriteEnv(), NotNull()); - ARROW_EXPECT_OK(GetAzuriteEnv()->status()); - ARROW_ASSIGN_OR_RAISE(debug_log_start_, GetAzuriteEnv()->GetDebugLogSize()); - AzureOptions options; - options.backend = AzureBackend::kAzurite; - ARROW_EXPECT_OK(options.ConfigureAccountKeyCredential( - GetAzuriteEnv()->account_name(), GetAzuriteEnv()->account_key())); - return options; + bool WithHierarchicalNamespace() const { + EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv()); + return env->WithHierarchicalNamespace(); } - void TearDown() override { - AzureFileSystemTest::TearDown(); - if (HasFailure()) { - // XXX: This may not include all logs in the target test because - // Azurite doesn't flush debug logs immediately... You may want - // to check the log manually... - ARROW_IGNORE_EXPR(GetAzuriteEnv()->DumpDebugLog(debug_log_start_)); + // Tests that are called from more than one implementation of AzureFileSystemTest + + void DetectHierarchicalNamespaceTest(); + void GetFileInfoObjectTest(); + void GetFileInfoObjectWithNestedStructureTest(); + + void DeleteDirSuccessEmptyTest() { + const auto directory_path = + internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); + + if (WithHierarchicalNamespace()) { + ASSERT_OK(fs_->CreateDir(directory_path, true)); + arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::Directory); + ASSERT_OK(fs_->DeleteDir(directory_path)); + arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); + } else { + // There is only virtual directory without hierarchical namespace + // support. So the CreateDir() and DeleteDir() do nothing. + ASSERT_OK(fs_->CreateDir(directory_path)); + arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); + ASSERT_OK(fs_->DeleteDir(directory_path)); + arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); } } - int64_t debug_log_start_ = 0; -}; + void CreateDirSuccessContainerAndDirectoryTest() { + const auto path = PreexistingContainerPath() + RandomDirectoryName(); + ASSERT_OK(fs_->CreateDir(path, false)); + if (WithHierarchicalNamespace()) { + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); + } else { + // There is only virtual directory without hierarchical namespace + // support. So the CreateDir() does nothing. + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); + } + } -class AzureFlatNamespaceFileSystemTest : public AzureFileSystemTest { - Result MakeOptions() override { - AzureOptions options; - const auto account_key = std::getenv("AZURE_FLAT_NAMESPACE_ACCOUNT_KEY"); - const auto account_name = std::getenv("AZURE_FLAT_NAMESPACE_ACCOUNT_NAME"); - if (account_key && account_name) { - RETURN_NOT_OK(options.ConfigureAccountKeyCredential(account_name, account_key)); - return options; + void CreateDirRecursiveSuccessContainerOnlyTest() { + auto container_name = RandomContainerName(); + ASSERT_OK(fs_->CreateDir(container_name, true)); + arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); + } + + void CreateDirRecursiveSuccessDirectoryOnlyTest() { + const auto parent = PreexistingContainerPath() + RandomDirectoryName(); + const auto path = internal::ConcatAbstractPath(parent, "new-sub"); + ASSERT_OK(fs_->CreateDir(path, true)); + if (WithHierarchicalNamespace()) { + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory); + } else { + // There is only virtual directory without hierarchical namespace + // support. So the CreateDir() does nothing. + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); + arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound); } - return Status::Cancelled( - "Connection details not provided for a real flat namespace " - "account."); } -}; -// How to enable this test: -// -// You need an Azure account. You should be able to create a free -// account at https://azure.microsoft.com/en-gb/free/ . You should be -// able to create a storage account through the portal Web UI. -// -// See also the official document how to create a storage account: -// https://learn.microsoft.com/en-us/azure/storage/blobs/create-data-lake-storage-account -// -// A few suggestions on configuration: -// -// * Use Standard general-purpose v2 not premium -// * Use LRS redundancy -// * Obviously you need to enable hierarchical namespace. -// * Set the default access tier to hot -// * SFTP, NFS and file shares are not required. -class AzureHierarchicalNamespaceFileSystemTest : public AzureFileSystemTest { - Result MakeOptions() override { - AzureOptions options; - const auto account_key = std::getenv("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_KEY"); - const auto account_name = std::getenv("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_NAME"); - if (account_key && account_name) { - RETURN_NOT_OK(options.ConfigureAccountKeyCredential(account_name, account_key)); - return options; + void CreateDirRecursiveSuccessContainerAndDirectoryTest() { + auto container_name = RandomContainerName(); + const auto parent = + internal::ConcatAbstractPath(container_name, RandomDirectoryName()); + const auto path = internal::ConcatAbstractPath(parent, "new-sub"); + ASSERT_OK(fs_->CreateDir(path, true)); + if (WithHierarchicalNamespace()) { + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); + } else { + // There is only virtual directory without hierarchical namespace + // support. So the CreateDir() does nothing. + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); + arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound); + arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); } - return Status::Cancelled( - "Connection details not provided for a real hierarchical namespace " - "account."); } -}; -TEST_F(AzureFlatNamespaceFileSystemTest, DetectHierarchicalNamespace) { - auto hierarchical_namespace = internal::HierarchicalNamespaceDetector(); - ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get())); - ASSERT_OK_AND_EQ(false, hierarchical_namespace.Enabled(PreexistingContainerName())); -} + void DeleteDirContentsSuccessNonexistentTest() { + const auto directory_path = + internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); + ASSERT_OK(fs_->DeleteDirContents(directory_path, true)); + arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); + } -TEST_F(AzureHierarchicalNamespaceFileSystemTest, DetectHierarchicalNamespace) { - auto hierarchical_namespace = internal::HierarchicalNamespaceDetector(); - ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get())); - ASSERT_OK_AND_EQ(true, hierarchical_namespace.Enabled(PreexistingContainerName())); -} + void DeleteDirContentsFailureNonexistentTest() { + const auto directory_path = + internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); + ASSERT_RAISES(IOError, fs_->DeleteDirContents(directory_path, false)); + } +}; -TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespace) { - auto hierarchical_namespace = internal::HierarchicalNamespaceDetector(); - ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get())); - ASSERT_OK_AND_EQ(false, hierarchical_namespace.Enabled(PreexistingContainerName())); -} +void AzureFileSystemTest::DetectHierarchicalNamespaceTest() { + // Check the environments are implemented and injected here correctly. + auto expected = WithHierarchicalNamespace(); -TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespaceFailsWithMissingContainer) { auto hierarchical_namespace = internal::HierarchicalNamespaceDetector(); ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get())); - ASSERT_NOT_OK(hierarchical_namespace.Enabled("nonexistent-container")); + ASSERT_OK_AND_EQ(expected, hierarchical_namespace.Enabled(PreexistingContainerName())); } -TEST_F(AzuriteFileSystemTest, GetFileInfoAccount) { - AssertFileInfo(fs_.get(), "", FileType::Directory); - - // URI - ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://")); -} - -TEST_F(AzuriteFileSystemTest, GetFileInfoContainer) { - AssertFileInfo(fs_.get(), PreexistingContainerName(), FileType::Directory); +void AzureFileSystemTest::GetFileInfoObjectTest() { + auto object_properties = + blob_service_client_->GetBlobContainerClient(PreexistingContainerName()) + .GetBlobClient(PreexistingObjectName()) + .GetProperties() + .Value; - AssertFileInfo(fs_.get(), "nonexistent-container", FileType::NotFound); + AssertFileInfo(fs_.get(), PreexistingObjectPath(), FileType::File, + std::chrono::system_clock::time_point{object_properties.LastModified}, + static_cast(object_properties.BlobSize)); // URI - ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + PreexistingContainerName())); + ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + PreexistingObjectName())); } -void AzureFileSystemTest::RunGetFileInfoObjectWithNestedStructureTest() { +void AzureFileSystemTest::GetFileInfoObjectWithNestedStructureTest() { // Adds detailed tests to handle cases of different edge cases // with directory naming conventions (e.g. with and without slashes). constexpr auto kObjectName = "test-object-dir/some_other_dir/another_dir/foo"; @@ -536,12 +653,78 @@ void AzureFileSystemTest::RunGetFileInfoObjectWithNestedStructureTest() { FileType::NotFound); } -TEST_F(AzuriteFileSystemTest, GetFileInfoObjectWithNestedStructure) { - RunGetFileInfoObjectWithNestedStructureTest(); +// How to enable the non-Azurite tests: +// +// You need an Azure account. You should be able to create a free account [1]. +// Through the portal Web UI, you should create a storage account [2]. +// +// +// A few suggestions on configuration: +// +// * Use Standard general-purpose v2 not premium +// * Use LRS redundancy +// * Set the default access tier to hot +// * SFTP, NFS and file shares are not required. +// +// You need to enable Hierarchical Namespace on the storage account +// +// You must not enable Hierarchical Namespace on the storage account used for +// AzureFlatNSFileSystemTest, but you must enable it on the storage account +// used for AzureHierarchicalNSFileSystemTest. +// +// The credentials should be placed in the correct environment variables: +// +// * AZURE_FLAT_NAMESPACE_ACCOUNT_NAME +// * AZURE_FLAT_NAMESPACE_ACCOUNT_KEY +// * AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_NAME +// * AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_KEY +// +// [1]: https://azure.microsoft.com/en-gb/free/ +// [2]: +// https://learn.microsoft.com/en-us/azure/storage/blobs/create-data-lake-storage-account +class AzureFlatNSFileSystemTest : public AzureFileSystemTest { + public: + using AzureFileSystemTest::AzureFileSystemTest; + + Result GetAzureEnv() const final { + return AzureFlatNSEnv::GetInstance(); + } +}; + +class AzureHierarchicalNSFileSystemTest : public AzureFileSystemTest { + public: + using AzureFileSystemTest::AzureFileSystemTest; + + Result GetAzureEnv() const final { + return AzureHierarchicalNSEnv::GetInstance(); + } +}; + +class AzuriteFileSystemTest : public AzureFileSystemTest { + public: + using AzureFileSystemTest::AzureFileSystemTest; + + Result GetAzureEnv() const final { return AzuriteEnv::GetInstance(); } +}; + +// Tests using a real storage account *without Hierarchical Namespace enabled* + +TEST_F(AzureFlatNSFileSystemTest, DetectHierarchicalNamespace) { + this->DetectHierarchicalNamespaceTest(); } -TEST_F(AzureHierarchicalNamespaceFileSystemTest, GetFileInfoObjectWithNestedStructure) { - RunGetFileInfoObjectWithNestedStructureTest(); +// Tests using a real storage account *with Hierarchical Namespace enabled* + +TEST_F(AzureHierarchicalNSFileSystemTest, DetectHierarchicalNamespace) { + this->DetectHierarchicalNamespaceTest(); +} + +TEST_F(AzureHierarchicalNSFileSystemTest, GetFileInfoObject) { + this->GetFileInfoObjectTest(); +} + +TEST_F(AzureHierarchicalNSFileSystemTest, GetFileInfoObjectWithNestedStructure) { + this->GetFileInfoObjectWithNestedStructureTest(); datalake_service_client_->GetFileSystemClient(PreexistingContainerName()) .GetDirectoryClient("test-empty-object-dir") .Create(); @@ -550,25 +733,107 @@ TEST_F(AzureHierarchicalNamespaceFileSystemTest, GetFileInfoObjectWithNestedStru FileType::Directory); } -void AzureFileSystemTest::RunGetFileInfoObjectTest() { - auto object_properties = - blob_service_client_->GetBlobContainerClient(PreexistingContainerName()) - .GetBlobClient(PreexistingObjectName()) - .GetProperties() - .Value; +TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirSuccessEmpty) { + return DeleteDirSuccessEmptyTest(); +} - AssertFileInfo(fs_.get(), PreexistingObjectPath(), FileType::File, - std::chrono::system_clock::time_point(object_properties.LastModified), - static_cast(object_properties.BlobSize)); +TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirFailureNonexistent) { + const auto path = + internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); + ASSERT_RAISES(IOError, fs_->DeleteDir(path)); +} + +TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirSuccessHaveBlob) { + const auto directory_path = + internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); + const auto blob_path = internal::ConcatAbstractPath(directory_path, "hello.txt"); + ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(blob_path)); + ASSERT_OK(output->Write(std::string_view("hello"))); + ASSERT_OK(output->Close()); + arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::File); + ASSERT_OK(fs_->DeleteDir(directory_path)); + arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::NotFound); +} + +TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirSuccessHaveDirectory) { + const auto parent = + internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); + const auto path = internal::ConcatAbstractPath(parent, "new-sub"); + ASSERT_OK(fs_->CreateDir(path, true)); + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory); + ASSERT_OK(fs_->DeleteDir(parent)); + arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); + arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound); +} + +TEST_F(AzureHierarchicalNSFileSystemTest, CreateDirSuccessContainerAndDirectory) { + this->CreateDirSuccessContainerAndDirectoryTest(); +} + +TEST_F(AzureHierarchicalNSFileSystemTest, CreateDirRecursiveSuccessContainerOnly) { + this->CreateDirRecursiveSuccessContainerOnlyTest(); +} + +TEST_F(AzureHierarchicalNSFileSystemTest, CreateDirRecursiveSuccessDirectoryOnly) { + this->CreateDirRecursiveSuccessDirectoryOnlyTest(); +} + +TEST_F(AzureHierarchicalNSFileSystemTest, + CreateDirRecursiveSuccessContainerAndDirectory) { + this->CreateDirRecursiveSuccessContainerAndDirectoryTest(); +} + +TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirContentsSuccessExist) { + HierarchicalPaths paths; + CreateHierarchicalData(paths); + ASSERT_OK(fs_->DeleteDirContents(paths.directory)); + arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::Directory); + for (const auto& sub_path : paths.sub_paths) { + arrow::fs::AssertFileInfo(fs_.get(), sub_path, FileType::NotFound); + } +} + +TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirContentsSuccessNonexistent) { + this->DeleteDirContentsSuccessNonexistentTest(); +} + +TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirContentsFailureNonexistent) { + this->DeleteDirContentsFailureNonexistentTest(); +} + +// Tests using Azurite (the local Azure emulator) + +TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespace) { + this->DetectHierarchicalNamespaceTest(); +} + +TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespaceFailsWithMissingContainer) { + auto hierarchical_namespace = internal::HierarchicalNamespaceDetector(); + ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get())); + ASSERT_NOT_OK(hierarchical_namespace.Enabled("nonexistent-container")); +} + +TEST_F(AzuriteFileSystemTest, GetFileInfoObject) { this->GetFileInfoObjectTest(); } + +TEST_F(AzuriteFileSystemTest, GetFileInfoObjectWithNestedStructure) { + this->GetFileInfoObjectWithNestedStructureTest(); +} + +TEST_F(AzuriteFileSystemTest, GetFileInfoAccount) { + AssertFileInfo(fs_.get(), "", FileType::Directory); // URI - ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + PreexistingObjectName())); + ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://")); } -TEST_F(AzuriteFileSystemTest, GetFileInfoObject) { RunGetFileInfoObjectTest(); } +TEST_F(AzuriteFileSystemTest, GetFileInfoContainer) { + AssertFileInfo(fs_.get(), PreexistingContainerName(), FileType::Directory); + + AssertFileInfo(fs_.get(), "nonexistent-container", FileType::NotFound); -TEST_F(AzureHierarchicalNamespaceFileSystemTest, GetFileInfoObject) { - RunGetFileInfoObjectTest(); + // URI + ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + PreexistingContainerName())); } TEST_F(AzuriteFileSystemTest, GetFileInfoSelector) { @@ -756,17 +1021,7 @@ TEST_F(AzuriteFileSystemTest, CreateDirSuccessContainerOnly) { } TEST_F(AzuriteFileSystemTest, CreateDirSuccessContainerAndDirectory) { - const auto path = PreexistingContainerPath() + RandomDirectoryName(); - ASSERT_OK(fs_->CreateDir(path, false)); - // There is only virtual directory without hierarchical namespace - // support. So the CreateDir() does nothing. - arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); -} - -TEST_F(AzureHierarchicalNamespaceFileSystemTest, CreateDirSuccessContainerAndDirectory) { - const auto path = PreexistingContainerPath() + RandomDirectoryName(); - ASSERT_OK(fs_->CreateDir(path, false)); - arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); + this->CreateDirSuccessContainerAndDirectoryTest(); } TEST_F(AzuriteFileSystemTest, CreateDirFailureDirectoryWithMissingContainer) { @@ -778,57 +1033,16 @@ TEST_F(AzuriteFileSystemTest, CreateDirRecursiveFailureNoContainer) { ASSERT_RAISES(Invalid, fs_->CreateDir("", true)); } -TEST_F(AzureHierarchicalNamespaceFileSystemTest, CreateDirRecursiveSuccessContainerOnly) { - auto container_name = RandomContainerName(); - ASSERT_OK(fs_->CreateDir(container_name, true)); - arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); -} - TEST_F(AzuriteFileSystemTest, CreateDirRecursiveSuccessContainerOnly) { - auto container_name = RandomContainerName(); - ASSERT_OK(fs_->CreateDir(container_name, true)); - arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); -} - -TEST_F(AzureHierarchicalNamespaceFileSystemTest, CreateDirRecursiveSuccessDirectoryOnly) { - const auto parent = PreexistingContainerPath() + RandomDirectoryName(); - const auto path = internal::ConcatAbstractPath(parent, "new-sub"); - ASSERT_OK(fs_->CreateDir(path, true)); - arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); - arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory); + this->CreateDirRecursiveSuccessContainerOnlyTest(); } TEST_F(AzuriteFileSystemTest, CreateDirRecursiveSuccessDirectoryOnly) { - const auto parent = PreexistingContainerPath() + RandomDirectoryName(); - const auto path = internal::ConcatAbstractPath(parent, "new-sub"); - ASSERT_OK(fs_->CreateDir(path, true)); - // There is only virtual directory without hierarchical namespace - // support. So the CreateDir() does nothing. - arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); - arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound); -} - -TEST_F(AzureHierarchicalNamespaceFileSystemTest, - CreateDirRecursiveSuccessContainerAndDirectory) { - auto container_name = RandomContainerName(); - const auto parent = internal::ConcatAbstractPath(container_name, RandomDirectoryName()); - const auto path = internal::ConcatAbstractPath(parent, "new-sub"); - ASSERT_OK(fs_->CreateDir(path, true)); - arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); - arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory); - arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); + this->CreateDirRecursiveSuccessDirectoryOnlyTest(); } TEST_F(AzuriteFileSystemTest, CreateDirRecursiveSuccessContainerAndDirectory) { - auto container_name = RandomContainerName(); - const auto parent = internal::ConcatAbstractPath(container_name, RandomDirectoryName()); - const auto path = internal::ConcatAbstractPath(parent, "new-sub"); - ASSERT_OK(fs_->CreateDir(path, true)); - // There is only virtual directory without hierarchical namespace - // support. So the CreateDir() does nothing. - arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); - arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound); - arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); + this->CreateDirRecursiveSuccessContainerAndDirectoryTest(); } TEST_F(AzuriteFileSystemTest, CreateDirUri) { @@ -844,14 +1058,7 @@ TEST_F(AzuriteFileSystemTest, DeleteDirSuccessContainer) { } TEST_F(AzuriteFileSystemTest, DeleteDirSuccessEmpty) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - // There is only virtual directory without hierarchical namespace - // support. So the CreateDir() and DeleteDir() do nothing. - ASSERT_OK(fs_->CreateDir(directory_path)); - arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); - ASSERT_OK(fs_->DeleteDir(directory_path)); - arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); + this->DeleteDirSuccessEmptyTest(); } TEST_F(AzuriteFileSystemTest, DeleteDirSuccessNonexistent) { @@ -889,45 +1096,6 @@ TEST_F(AzuriteFileSystemTest, DeleteDirSuccessHaveBlobs) { } } -TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirSuccessEmpty) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - ASSERT_OK(fs_->CreateDir(directory_path, true)); - arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::Directory); - ASSERT_OK(fs_->DeleteDir(directory_path)); - arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); -} - -TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirFailureNonexistent) { - const auto path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - ASSERT_RAISES(IOError, fs_->DeleteDir(path)); -} - -TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirSuccessHaveBlob) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - const auto blob_path = internal::ConcatAbstractPath(directory_path, "hello.txt"); - ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(blob_path)); - ASSERT_OK(output->Write(std::string_view("hello"))); - ASSERT_OK(output->Close()); - arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::File); - ASSERT_OK(fs_->DeleteDir(directory_path)); - arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::NotFound); -} - -TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirSuccessHaveDirectory) { - const auto parent = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - const auto path = internal::ConcatAbstractPath(parent, "new-sub"); - ASSERT_OK(fs_->CreateDir(path, true)); - arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); - arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory); - ASSERT_OK(fs_->DeleteDir(parent)); - arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); - arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound); -} - TEST_F(AzuriteFileSystemTest, DeleteDirUri) { ASSERT_RAISES(Invalid, fs_->DeleteDir("abfs://" + PreexistingContainerPath())); } @@ -963,39 +1131,11 @@ TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessDirectory) { } TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessNonexistent) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - ASSERT_OK(fs_->DeleteDirContents(directory_path, true)); - arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); + this->DeleteDirContentsSuccessNonexistentTest(); } TEST_F(AzuriteFileSystemTest, DeleteDirContentsFailureNonexistent) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - ASSERT_RAISES(IOError, fs_->DeleteDirContents(directory_path, false)); -} - -TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirContentsSuccessExist) { - HierarchicalPaths paths; - CreateHierarchicalData(paths); - ASSERT_OK(fs_->DeleteDirContents(paths.directory)); - arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::Directory); - for (const auto& sub_path : paths.sub_paths) { - arrow::fs::AssertFileInfo(fs_.get(), sub_path, FileType::NotFound); - } -} - -TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirContentsSuccessNonexistent) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - ASSERT_OK(fs_->DeleteDirContents(directory_path, true)); - arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); -} - -TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirContentsFailureNonexistent) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - ASSERT_RAISES(IOError, fs_->DeleteDirContents(directory_path, false)); + this->DeleteDirContentsFailureNonexistentTest(); } TEST_F(AzuriteFileSystemTest, CopyFileSuccessDestinationNonexistent) { @@ -1183,7 +1323,7 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamClosed) { ASSERT_RAISES(Invalid, stream->Tell()); } -TEST_F(AzuriteFileSystemTest, TestWriteMetadata) { +TEST_F(AzuriteFileSystemTest, WriteMetadata) { options_.default_metadata = arrow::key_value_metadata({{"foo", "bar"}}); ASSERT_OK_AND_ASSIGN(auto fs_with_defaults, AzureFileSystem::Make(options_)); From 0a8bf1a22bc1717ed95ec88b9b43ee748bc61e4c Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Tue, 12 Dec 2023 21:41:13 -0300 Subject: [PATCH 06/26] azurefs_test.cc: Use TYPED_TEST_SUITE to generate tests for all envs --- cpp/src/arrow/filesystem/azurefs_test.cc | 131 ++++++++--------------- 1 file changed, 47 insertions(+), 84 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 84cc18a31d2..d94530d08ad 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -651,8 +651,25 @@ void AzureFileSystemTest::GetFileInfoObjectWithNestedStructureTest() { FileType::NotFound); AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/some_other_di", FileType::NotFound); + + if (WithHierarchicalNamespace()) { + datalake_service_client_->GetFileSystemClient(PreexistingContainerName()) + .GetDirectoryClient("test-empty-object-dir") + .Create(); + + AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-empty-object-dir", + FileType::Directory); + } } +template +class AzureFileSystemTestImpl : public AzureFileSystemTest { + public: + using AzureFileSystemTest::AzureFileSystemTest; + + Result GetAzureEnv() const final { return AzureEnvClass::GetInstance(); } +}; + // How to enable the non-Azurite tests: // // You need an Azure account. You should be able to create a free account [1]. @@ -682,61 +699,54 @@ void AzureFileSystemTest::GetFileInfoObjectWithNestedStructureTest() { // [1]: https://azure.microsoft.com/en-gb/free/ // [2]: // https://learn.microsoft.com/en-us/azure/storage/blobs/create-data-lake-storage-account -class AzureFlatNSFileSystemTest : public AzureFileSystemTest { - public: - using AzureFileSystemTest::AzureFileSystemTest; +using AzureFlatNSFileSystemTest = AzureFileSystemTestImpl; +using AzureHierarchicalNSFileSystemTest = AzureFileSystemTestImpl; +using AzuriteFileSystemTest = AzureFileSystemTestImpl; - Result GetAzureEnv() const final { - return AzureFlatNSEnv::GetInstance(); - } -}; - -class AzureHierarchicalNSFileSystemTest : public AzureFileSystemTest { - public: - using AzureFileSystemTest::AzureFileSystemTest; - - Result GetAzureEnv() const final { - return AzureHierarchicalNSEnv::GetInstance(); - } -}; +// Tests using all the 3 environments (Azurite, Azure w/o HNS (flat), Azure w/ HNS) -class AzuriteFileSystemTest : public AzureFileSystemTest { - public: - using AzureFileSystemTest::AzureFileSystemTest; +template +using AzureFileSystemTestOnAllEnvs = AzureFileSystemTestImpl; - Result GetAzureEnv() const final { return AzuriteEnv::GetInstance(); } -}; +using AllEnvironments = + ::testing::Types; -// Tests using a real storage account *without Hierarchical Namespace enabled* +TYPED_TEST_SUITE(AzureFileSystemTestOnAllEnvs, AllEnvironments); -TEST_F(AzureFlatNSFileSystemTest, DetectHierarchicalNamespace) { +TYPED_TEST(AzureFileSystemTestOnAllEnvs, DetectHierarchicalNamespace) { this->DetectHierarchicalNamespaceTest(); } -// Tests using a real storage account *with Hierarchical Namespace enabled* - -TEST_F(AzureHierarchicalNSFileSystemTest, DetectHierarchicalNamespace) { - this->DetectHierarchicalNamespaceTest(); +TYPED_TEST(AzureFileSystemTestOnAllEnvs, GetFileInfoObject) { + this->GetFileInfoObjectTest(); } -TEST_F(AzureHierarchicalNSFileSystemTest, GetFileInfoObject) { - this->GetFileInfoObjectTest(); +TYPED_TEST(AzureFileSystemTestOnAllEnvs, DeleteDirSuccessEmpty) { + this->DeleteDirSuccessEmptyTest(); } -TEST_F(AzureHierarchicalNSFileSystemTest, GetFileInfoObjectWithNestedStructure) { +TYPED_TEST(AzureFileSystemTestOnAllEnvs, GetFileInfoObjectWithNestedStructure) { this->GetFileInfoObjectWithNestedStructureTest(); - datalake_service_client_->GetFileSystemClient(PreexistingContainerName()) - .GetDirectoryClient("test-empty-object-dir") - .Create(); +} - AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-empty-object-dir", - FileType::Directory); +TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirSuccessContainerAndDirectory) { + this->CreateDirSuccessContainerAndDirectoryTest(); } -TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirSuccessEmpty) { - return DeleteDirSuccessEmptyTest(); +TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessContainerOnly) { + this->CreateDirRecursiveSuccessContainerOnlyTest(); } +TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessDirectoryOnly) { + this->CreateDirRecursiveSuccessDirectoryOnlyTest(); +} + +TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessContainerAndDirectory) { + this->CreateDirRecursiveSuccessContainerAndDirectoryTest(); +} + +// Tests using a real storage account *with Hierarchical Namespace enabled* + TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirFailureNonexistent) { const auto path = internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); @@ -767,23 +777,6 @@ TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirSuccessHaveDirectory) { arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound); } -TEST_F(AzureHierarchicalNSFileSystemTest, CreateDirSuccessContainerAndDirectory) { - this->CreateDirSuccessContainerAndDirectoryTest(); -} - -TEST_F(AzureHierarchicalNSFileSystemTest, CreateDirRecursiveSuccessContainerOnly) { - this->CreateDirRecursiveSuccessContainerOnlyTest(); -} - -TEST_F(AzureHierarchicalNSFileSystemTest, CreateDirRecursiveSuccessDirectoryOnly) { - this->CreateDirRecursiveSuccessDirectoryOnlyTest(); -} - -TEST_F(AzureHierarchicalNSFileSystemTest, - CreateDirRecursiveSuccessContainerAndDirectory) { - this->CreateDirRecursiveSuccessContainerAndDirectoryTest(); -} - TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirContentsSuccessExist) { HierarchicalPaths paths; CreateHierarchicalData(paths); @@ -804,22 +797,12 @@ TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirContentsFailureNonexistent) { // Tests using Azurite (the local Azure emulator) -TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespace) { - this->DetectHierarchicalNamespaceTest(); -} - TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespaceFailsWithMissingContainer) { auto hierarchical_namespace = internal::HierarchicalNamespaceDetector(); ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get())); ASSERT_NOT_OK(hierarchical_namespace.Enabled("nonexistent-container")); } -TEST_F(AzuriteFileSystemTest, GetFileInfoObject) { this->GetFileInfoObjectTest(); } - -TEST_F(AzuriteFileSystemTest, GetFileInfoObjectWithNestedStructure) { - this->GetFileInfoObjectWithNestedStructureTest(); -} - TEST_F(AzuriteFileSystemTest, GetFileInfoAccount) { AssertFileInfo(fs_.get(), "", FileType::Directory); @@ -1020,10 +1003,6 @@ TEST_F(AzuriteFileSystemTest, CreateDirSuccessContainerOnly) { arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); } -TEST_F(AzuriteFileSystemTest, CreateDirSuccessContainerAndDirectory) { - this->CreateDirSuccessContainerAndDirectoryTest(); -} - TEST_F(AzuriteFileSystemTest, CreateDirFailureDirectoryWithMissingContainer) { const auto path = std::string("not-a-container/new-directory"); ASSERT_RAISES(IOError, fs_->CreateDir(path, false)); @@ -1033,18 +1012,6 @@ TEST_F(AzuriteFileSystemTest, CreateDirRecursiveFailureNoContainer) { ASSERT_RAISES(Invalid, fs_->CreateDir("", true)); } -TEST_F(AzuriteFileSystemTest, CreateDirRecursiveSuccessContainerOnly) { - this->CreateDirRecursiveSuccessContainerOnlyTest(); -} - -TEST_F(AzuriteFileSystemTest, CreateDirRecursiveSuccessDirectoryOnly) { - this->CreateDirRecursiveSuccessDirectoryOnlyTest(); -} - -TEST_F(AzuriteFileSystemTest, CreateDirRecursiveSuccessContainerAndDirectory) { - this->CreateDirRecursiveSuccessContainerAndDirectoryTest(); -} - TEST_F(AzuriteFileSystemTest, CreateDirUri) { ASSERT_RAISES(Invalid, fs_->CreateDir("abfs://" + RandomContainerName(), true)); } @@ -1057,10 +1024,6 @@ TEST_F(AzuriteFileSystemTest, DeleteDirSuccessContainer) { arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::NotFound); } -TEST_F(AzuriteFileSystemTest, DeleteDirSuccessEmpty) { - this->DeleteDirSuccessEmptyTest(); -} - TEST_F(AzuriteFileSystemTest, DeleteDirSuccessNonexistent) { const auto directory_path = internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); From 79145a578fdb08d6764fc3fae360b749e390e80c Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Tue, 12 Dec 2023 22:57:58 -0300 Subject: [PATCH 07/26] Make PreexistingData setup explicit --- cpp/src/arrow/filesystem/azurefs_test.cc | 448 ++++++++++++----------- 1 file changed, 242 insertions(+), 206 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index d94530d08ad..80c4ff9dcb5 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -265,7 +265,15 @@ TEST(AzureFileSystem, OptionsCompare) { EXPECT_TRUE(options.Equals(options)); } -auto const* kLoremIpsum = R"""( +struct PreexistingData { + public: + using RNG = std::mt19937_64; + + public: + const std::string container_name; + static constexpr char const* kObjectName = "test-object-name"; + + static constexpr char const* kLoremIpsum = R"""( Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. @@ -274,10 +282,47 @@ fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. )"""; + public: + explicit PreexistingData(RNG& rng) : container_name{RandomContainerName(rng)} {} + + // Accessors + std::string ContainerPath() const { return container_name + '/'; } + std::string ObjectPath() const { return ContainerPath() + kObjectName; } + std::string NotFoundObjectPath() const { return ContainerPath() + "not-found"; } + + std::string RandomDirectoryPath(RNG& rng) { + return internal::ConcatAbstractPath(container_name, RandomDirectoryName(rng)); + } + + // Utilities + + static std::string RandomContainerName(RNG& rng) { return RandomChars(32, rng); } + static std::string RandomDirectoryName(RNG& rng) { return RandomChars(32, rng); } + + static std::string RandomLine(int lineno, std::size_t width, RNG& rng) { + auto line = std::to_string(lineno) + ": "; + line += RandomChars(width - line.size() - 1, rng); + line += '\n'; + return line; + } + + static std::size_t RandomIndex(std::size_t end, RNG& rng) { + return std::uniform_int_distribution(0, end - 1)(rng); + } + + static std::string RandomChars(std::size_t count, RNG& rng) { + auto const fillers = std::string("abcdefghijlkmnopqrstuvwxyz0123456789"); + std::uniform_int_distribution d(0, fillers.size() - 1); + std::string s; + std::generate_n(std::back_inserter(s), count, [&] { return fillers[d(rng)]; }); + return s; + } +}; + class AzureFileSystemTest : public ::testing::Test { protected: // Set in constructor - std::mt19937_64 generator_; + std::mt19937_64 rng_; // Set in SetUp() int64_t debug_log_start_ = 0; @@ -288,11 +333,8 @@ class AzureFileSystemTest : public ::testing::Test { std::unique_ptr blob_service_client_; std::unique_ptr datalake_service_client_; - // Set in SetUpPreexistingData() - std::string container_name_; - public: - AzureFileSystemTest() : generator_(std::random_device()()) {} + AzureFileSystemTest() : rng_(std::random_device()()) {} virtual Result GetAzureEnv() const = 0; @@ -322,15 +364,6 @@ class AzureFileSystemTest : public ::testing::Test { blob_service_client_ = options_.MakeBlobServiceClient(); datalake_service_client_ = options_.MakeDataLakeServiceClient(); set_up_succeeded_ = true; - - SetUpPreexistingData(); - } - - void SetUpPreexistingData() { - // Stop-gap solution before GH-39119 is fixed. - container_name_ = "z" + RandomChars(31); - auto container_client = CreateContainer(container_name_); - CreateBlob(container_client, PreexistingObjectName(), kLoremIpsum); } void TearDown() override { @@ -365,52 +398,21 @@ class AzureFileSystemTest : public ::testing::Test { return blob_client; } - std::string PreexistingContainerName() const { return container_name_; } - - std::string PreexistingContainerPath() const { - return PreexistingContainerName() + '/'; - } - - static std::string PreexistingObjectName() { return "test-object-name"; } - - std::string PreexistingObjectPath() const { - return PreexistingContainerPath() + PreexistingObjectName(); - } - - std::string NotFoundObjectPath() { return PreexistingContainerPath() + "not-found"; } - - std::string RandomLine(int lineno, std::size_t width) { - auto line = std::to_string(lineno) + ": "; - line += RandomChars(width - line.size() - 1); - line += '\n'; - return line; - } - - std::size_t RandomIndex(std::size_t end) { - return std::uniform_int_distribution(0, end - 1)(generator_); - } - - std::string RandomChars(std::size_t count) { - auto const fillers = std::string("abcdefghijlkmnopqrstuvwxyz0123456789"); - std::uniform_int_distribution d(0, fillers.size() - 1); - std::string s; - std::generate_n(std::back_inserter(s), count, [&] { return fillers[d(generator_)]; }); - return s; - } - - std::string RandomContainerName() { return RandomChars(32); } - - std::string RandomDirectoryName() { return RandomChars(32); } - - void UploadLines(const std::vector& lines, const char* path_to_file, + void UploadLines(const std::vector& lines, const std::string& path, int total_size) { - const auto path = PreexistingContainerPath() + path_to_file; ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); const auto all_lines = std::accumulate(lines.begin(), lines.end(), std::string("")); ASSERT_OK(output->Write(all_lines)); ASSERT_OK(output->Close()); } + PreexistingData SetUpPreexistingData() { + PreexistingData data(rng_); + auto container_client = CreateContainer(data.container_name); + CreateBlob(container_client, data.kObjectName, PreexistingData::kLoremIpsum); + return data; + } + struct HierarchicalPaths { std::string container; std::string directory; @@ -418,10 +420,10 @@ class AzureFileSystemTest : public ::testing::Test { }; // Need to use "void" as the return type to use ASSERT_* in this method. - void CreateHierarchicalData(HierarchicalPaths& paths) { - const auto container_path = RandomContainerName(); - const auto directory_path = - internal::ConcatAbstractPath(container_path, RandomDirectoryName()); + void CreateHierarchicalData(HierarchicalPaths* paths) { + auto data = SetUpPreexistingData(); + const auto container_name = data.container_name; + const auto directory_path = data.RandomDirectoryPath(rng_); const auto sub_directory_path = internal::ConcatAbstractPath(directory_path, "new-sub"); const auto sub_blob_path = @@ -435,15 +437,15 @@ class AzureFileSystemTest : public ::testing::Test { ASSERT_OK(output->Write(std::string_view("top"))); ASSERT_OK(output->Close()); - AssertFileInfo(fs_.get(), container_path, FileType::Directory); + AssertFileInfo(fs_.get(), container_name, FileType::Directory); AssertFileInfo(fs_.get(), directory_path, FileType::Directory); AssertFileInfo(fs_.get(), sub_directory_path, FileType::Directory); AssertFileInfo(fs_.get(), sub_blob_path, FileType::File); AssertFileInfo(fs_.get(), top_blob_path, FileType::File); - paths.container = container_path; - paths.directory = directory_path; - paths.sub_paths = { + paths->container = container_name; + paths->directory = directory_path; + paths->sub_paths = { sub_directory_path, sub_blob_path, top_blob_path, @@ -470,7 +472,7 @@ class AzureFileSystemTest : public ::testing::Test { } void AssertInfoAllContainersRecursive(const std::vector& infos) { - ASSERT_EQ(infos.size(), 14); + ASSERT_EQ(infos.size(), 12); AssertFileInfo(infos[0], "container", FileType::Directory); AssertFileInfo(infos[1], "container/emptydir", FileType::Directory); AssertFileInfo(infos[2], "container/otherdir", FileType::Directory); @@ -485,8 +487,6 @@ class AzureFileSystemTest : public ::testing::Test { strlen(kSubData)); AssertFileInfo(infos[10], "container/somefile", FileType::File, strlen(kSomeData)); AssertFileInfo(infos[11], "empty-container", FileType::Directory); - AssertFileInfo(infos[12], PreexistingContainerName(), FileType::Directory); - AssertFileInfo(infos[13], PreexistingObjectPath(), FileType::File); } bool WithHierarchicalNamespace() const { @@ -501,8 +501,9 @@ class AzureFileSystemTest : public ::testing::Test { void GetFileInfoObjectWithNestedStructureTest(); void DeleteDirSuccessEmptyTest() { + auto data = SetUpPreexistingData(); const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); + internal::ConcatAbstractPath(data.container_name, data.RandomDirectoryName(rng_)); if (WithHierarchicalNamespace()) { ASSERT_OK(fs_->CreateDir(directory_path, true)); @@ -520,7 +521,8 @@ class AzureFileSystemTest : public ::testing::Test { } void CreateDirSuccessContainerAndDirectoryTest() { - const auto path = PreexistingContainerPath() + RandomDirectoryName(); + auto data = SetUpPreexistingData(); + const auto path = data.RandomDirectoryPath(rng_); ASSERT_OK(fs_->CreateDir(path, false)); if (WithHierarchicalNamespace()) { arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); @@ -532,13 +534,14 @@ class AzureFileSystemTest : public ::testing::Test { } void CreateDirRecursiveSuccessContainerOnlyTest() { - auto container_name = RandomContainerName(); + auto container_name = PreexistingData::RandomContainerName(rng_); ASSERT_OK(fs_->CreateDir(container_name, true)); arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); } void CreateDirRecursiveSuccessDirectoryOnlyTest() { - const auto parent = PreexistingContainerPath() + RandomDirectoryName(); + auto data = SetUpPreexistingData(); + const auto parent = data.RandomDirectoryPath(rng_); const auto path = internal::ConcatAbstractPath(parent, "new-sub"); ASSERT_OK(fs_->CreateDir(path, true)); if (WithHierarchicalNamespace()) { @@ -553,34 +556,33 @@ class AzureFileSystemTest : public ::testing::Test { } void CreateDirRecursiveSuccessContainerAndDirectoryTest() { - auto container_name = RandomContainerName(); - const auto parent = - internal::ConcatAbstractPath(container_name, RandomDirectoryName()); + auto data = SetUpPreexistingData(); + const auto parent = data.RandomDirectoryPath(rng_); const auto path = internal::ConcatAbstractPath(parent, "new-sub"); ASSERT_OK(fs_->CreateDir(path, true)); if (WithHierarchicalNamespace()) { arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory); - arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), data.container_name, FileType::Directory); } else { // There is only virtual directory without hierarchical namespace // support. So the CreateDir() does nothing. arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound); arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound); - arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), data.container_name, FileType::Directory); } } void DeleteDirContentsSuccessNonexistentTest() { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); + auto data = SetUpPreexistingData(); + const auto directory_path = data.RandomDirectoryPath(rng_); ASSERT_OK(fs_->DeleteDirContents(directory_path, true)); arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); } void DeleteDirContentsFailureNonexistentTest() { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); + auto data = SetUpPreexistingData(); + const auto directory_path = data.RandomDirectoryPath(rng_); ASSERT_RAISES(IOError, fs_->DeleteDirContents(directory_path, false)); } }; @@ -589,75 +591,75 @@ void AzureFileSystemTest::DetectHierarchicalNamespaceTest() { // Check the environments are implemented and injected here correctly. auto expected = WithHierarchicalNamespace(); + auto data = SetUpPreexistingData(); auto hierarchical_namespace = internal::HierarchicalNamespaceDetector(); ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get())); - ASSERT_OK_AND_EQ(expected, hierarchical_namespace.Enabled(PreexistingContainerName())); + ASSERT_OK_AND_EQ(expected, hierarchical_namespace.Enabled(data.container_name)); } void AzureFileSystemTest::GetFileInfoObjectTest() { + auto data = SetUpPreexistingData(); auto object_properties = - blob_service_client_->GetBlobContainerClient(PreexistingContainerName()) - .GetBlobClient(PreexistingObjectName()) + blob_service_client_->GetBlobContainerClient(data.container_name) + .GetBlobClient(data.kObjectName) .GetProperties() .Value; - AssertFileInfo(fs_.get(), PreexistingObjectPath(), FileType::File, + AssertFileInfo(fs_.get(), data.ObjectPath(), FileType::File, std::chrono::system_clock::time_point{object_properties.LastModified}, static_cast(object_properties.BlobSize)); // URI - ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + PreexistingObjectName())); + ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + std::string{data.kObjectName})); } void AzureFileSystemTest::GetFileInfoObjectWithNestedStructureTest() { + auto data = SetUpPreexistingData(); // Adds detailed tests to handle cases of different edge cases // with directory naming conventions (e.g. with and without slashes). constexpr auto kObjectName = "test-object-dir/some_other_dir/another_dir/foo"; ASSERT_OK_AND_ASSIGN( auto output, - fs_->OpenOutputStream(PreexistingContainerPath() + kObjectName, /*metadata=*/{})); - const std::string_view data(kLoremIpsum); - ASSERT_OK(output->Write(data)); + fs_->OpenOutputStream(data.ContainerPath() + kObjectName, /*metadata=*/{})); + const std::string_view lorem_ipsum(PreexistingData::kLoremIpsum); + ASSERT_OK(output->Write(lorem_ipsum)); ASSERT_OK(output->Close()); // 0 is immediately after "/" lexicographically, ensure that this doesn't // cause unexpected issues. - ASSERT_OK_AND_ASSIGN(output, - fs_->OpenOutputStream( - PreexistingContainerPath() + "test-object-dir/some_other_dir0", - /*metadata=*/{})); - ASSERT_OK(output->Write(data)); - ASSERT_OK(output->Close()); ASSERT_OK_AND_ASSIGN( - output, fs_->OpenOutputStream(PreexistingContainerPath() + kObjectName + "0", - /*metadata=*/{})); - ASSERT_OK(output->Write(data)); + output, + fs_->OpenOutputStream(data.ContainerPath() + "test-object-dir/some_other_dir0", + /*metadata=*/{})); + ASSERT_OK(output->Write(lorem_ipsum)); + ASSERT_OK(output->Close()); + ASSERT_OK_AND_ASSIGN(output, + fs_->OpenOutputStream(data.ContainerPath() + kObjectName + "0", + /*metadata=*/{})); + ASSERT_OK(output->Write(lorem_ipsum)); ASSERT_OK(output->Close()); - AssertFileInfo(fs_.get(), PreexistingContainerPath() + kObjectName, FileType::File); - AssertFileInfo(fs_.get(), PreexistingContainerPath() + kObjectName + "/", - FileType::NotFound); - AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir", + AssertFileInfo(fs_.get(), data.ContainerPath() + kObjectName, FileType::File); + AssertFileInfo(fs_.get(), data.ContainerPath() + kObjectName + "/", FileType::NotFound); + AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir", FileType::Directory); - AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/", + AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir/", FileType::Directory); - AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/some_other_dir", + AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir/some_other_dir", FileType::Directory); - AssertFileInfo(fs_.get(), - PreexistingContainerPath() + "test-object-dir/some_other_dir/", + AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir/some_other_dir/", FileType::Directory); - AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-di", - FileType::NotFound); - AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/some_other_di", + AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-di", FileType::NotFound); + AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir/some_other_di", FileType::NotFound); if (WithHierarchicalNamespace()) { - datalake_service_client_->GetFileSystemClient(PreexistingContainerName()) + datalake_service_client_->GetFileSystemClient(data.container_name) .GetDirectoryClient("test-empty-object-dir") .Create(); - AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-empty-object-dir", + AssertFileInfo(fs_.get(), data.ContainerPath() + "test-empty-object-dir", FileType::Directory); } } @@ -748,14 +750,14 @@ TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessContainerAndDi // Tests using a real storage account *with Hierarchical Namespace enabled* TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirFailureNonexistent) { - const auto path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); + auto data = SetUpPreexistingData(); + const auto path = data.RandomDirectoryPath(rng_); ASSERT_RAISES(IOError, fs_->DeleteDir(path)); } TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirSuccessHaveBlob) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); + auto data = SetUpPreexistingData(); + const auto directory_path = data.RandomDirectoryPath(rng_); const auto blob_path = internal::ConcatAbstractPath(directory_path, "hello.txt"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(blob_path)); ASSERT_OK(output->Write(std::string_view("hello"))); @@ -766,8 +768,8 @@ TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirSuccessHaveBlob) { } TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirSuccessHaveDirectory) { - const auto parent = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); + auto data = SetUpPreexistingData(); + const auto parent = data.RandomDirectoryPath(rng_); const auto path = internal::ConcatAbstractPath(parent, "new-sub"); ASSERT_OK(fs_->CreateDir(path, true)); arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); @@ -778,8 +780,9 @@ TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirSuccessHaveDirectory) { } TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirContentsSuccessExist) { + auto preexisting_data = SetUpPreexistingData(); HierarchicalPaths paths; - CreateHierarchicalData(paths); + CreateHierarchicalData(&paths); ASSERT_OK(fs_->DeleteDirContents(paths.directory)); arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::Directory); for (const auto& sub_path : paths.sub_paths) { @@ -811,12 +814,13 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoAccount) { } TEST_F(AzuriteFileSystemTest, GetFileInfoContainer) { - AssertFileInfo(fs_.get(), PreexistingContainerName(), FileType::Directory); + auto data = SetUpPreexistingData(); + AssertFileInfo(fs_.get(), data.container_name, FileType::Directory); AssertFileInfo(fs_.get(), "nonexistent-container", FileType::NotFound); // URI - ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + PreexistingContainerName())); + ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + data.container_name)); } TEST_F(AzuriteFileSystemTest, GetFileInfoSelector) { @@ -828,11 +832,10 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoSelector) { // Root dir select.base_dir = ""; ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select)); - ASSERT_EQ(infos.size(), 3); + ASSERT_EQ(infos.size(), 2); ASSERT_EQ(infos, SortedInfos(infos)); AssertFileInfo(infos[0], "container", FileType::Directory); AssertFileInfo(infos[1], "empty-container", FileType::Directory); - AssertFileInfo(infos[2], container_name_, FileType::Directory); // Empty container select.base_dir = "empty-container"; @@ -898,7 +901,7 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorRecursive) { // Root dir select.base_dir = ""; ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select)); - ASSERT_EQ(infos.size(), 14); + ASSERT_EQ(infos.size(), 12); ASSERT_EQ(infos, SortedInfos(infos)); AssertInfoAllContainersRecursive(infos); @@ -998,7 +1001,7 @@ TEST_F(AzuriteFileSystemTest, CreateDirFailureNoContainer) { } TEST_F(AzuriteFileSystemTest, CreateDirSuccessContainerOnly) { - auto container_name = RandomContainerName(); + auto container_name = PreexistingData::RandomContainerName(rng_); ASSERT_OK(fs_->CreateDir(container_name, false)); arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); } @@ -1013,11 +1016,13 @@ TEST_F(AzuriteFileSystemTest, CreateDirRecursiveFailureNoContainer) { } TEST_F(AzuriteFileSystemTest, CreateDirUri) { - ASSERT_RAISES(Invalid, fs_->CreateDir("abfs://" + RandomContainerName(), true)); + ASSERT_RAISES( + Invalid, + fs_->CreateDir("abfs://" + PreexistingData::RandomContainerName(rng_), true)); } TEST_F(AzuriteFileSystemTest, DeleteDirSuccessContainer) { - const auto container_name = RandomContainerName(); + const auto container_name = PreexistingData::RandomContainerName(rng_); ASSERT_OK(fs_->CreateDir(container_name)); arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); ASSERT_OK(fs_->DeleteDir(container_name)); @@ -1025,8 +1030,8 @@ TEST_F(AzuriteFileSystemTest, DeleteDirSuccessContainer) { } TEST_F(AzuriteFileSystemTest, DeleteDirSuccessNonexistent) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); + auto data = SetUpPreexistingData(); + const auto directory_path = data.RandomDirectoryPath(rng_); // There is only virtual directory without hierarchical namespace // support. So the DeleteDir() for nonexistent directory does nothing. ASSERT_OK(fs_->DeleteDir(directory_path)); @@ -1038,8 +1043,8 @@ TEST_F(AzuriteFileSystemTest, DeleteDirSuccessHaveBlobs) { GTEST_SKIP() << "This test fails by an Azurite problem: " "https://github.com/Azure/Azurite/pull/2302"; #endif - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); + auto data = SetUpPreexistingData(); + const auto directory_path = data.RandomDirectoryPath(rng_); // We must use 257 or more blobs here to test pagination of ListBlobs(). // Because we can't add 257 or more delete blob requests to one SubmitBatch(). int64_t n_blobs = 257; @@ -1060,7 +1065,8 @@ TEST_F(AzuriteFileSystemTest, DeleteDirSuccessHaveBlobs) { } TEST_F(AzuriteFileSystemTest, DeleteDirUri) { - ASSERT_RAISES(Invalid, fs_->DeleteDir("abfs://" + PreexistingContainerPath())); + auto data = SetUpPreexistingData(); + ASSERT_RAISES(Invalid, fs_->DeleteDir("abfs://" + data.ContainerPath())); } TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessContainer) { @@ -1068,8 +1074,9 @@ TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessContainer) { GTEST_SKIP() << "This test fails by an Azurite problem: " "https://github.com/Azure/Azurite/pull/2302"; #endif + auto data = SetUpPreexistingData(); HierarchicalPaths paths; - CreateHierarchicalData(paths); + CreateHierarchicalData(&paths); ASSERT_OK(fs_->DeleteDirContents(paths.container)); arrow::fs::AssertFileInfo(fs_.get(), paths.container, FileType::Directory); arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::NotFound); @@ -1083,8 +1090,9 @@ TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessDirectory) { GTEST_SKIP() << "This test fails by an Azurite problem: " "https://github.com/Azure/Azurite/pull/2302"; #endif + auto data = SetUpPreexistingData(); HierarchicalPaths paths; - CreateHierarchicalData(paths); + CreateHierarchicalData(&paths); ASSERT_OK(fs_->DeleteDirContents(paths.directory)); // GH-38772: We may change this to FileType::Directory. arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::NotFound); @@ -1102,61 +1110,66 @@ TEST_F(AzuriteFileSystemTest, DeleteDirContentsFailureNonexistent) { } TEST_F(AzuriteFileSystemTest, CopyFileSuccessDestinationNonexistent) { + auto data = SetUpPreexistingData(); const auto destination_path = - internal::ConcatAbstractPath(PreexistingContainerName(), "copy-destionation"); - ASSERT_OK(fs_->CopyFile(PreexistingObjectPath(), destination_path)); + internal::ConcatAbstractPath(data.container_name, "copy-destionation"); + ASSERT_OK(fs_->CopyFile(data.ObjectPath(), destination_path)); ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(destination_path)); ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(info)); ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024)); - EXPECT_EQ(kLoremIpsum, buffer->ToString()); + EXPECT_EQ(PreexistingData::kLoremIpsum, buffer->ToString()); } TEST_F(AzuriteFileSystemTest, CopyFileSuccessDestinationSame) { - ASSERT_OK(fs_->CopyFile(PreexistingObjectPath(), PreexistingObjectPath())); - ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(PreexistingObjectPath())); + auto data = SetUpPreexistingData(); + ASSERT_OK(fs_->CopyFile(data.ObjectPath(), data.ObjectPath())); + ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ObjectPath())); ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(info)); ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024)); - EXPECT_EQ(kLoremIpsum, buffer->ToString()); + EXPECT_EQ(PreexistingData::kLoremIpsum, buffer->ToString()); } TEST_F(AzuriteFileSystemTest, CopyFileFailureDestinationTrailingSlash) { - ASSERT_RAISES(IOError, - fs_->CopyFile(PreexistingObjectPath(), - internal::EnsureTrailingSlash(PreexistingObjectPath()))); + auto data = SetUpPreexistingData(); + ASSERT_RAISES(IOError, fs_->CopyFile(data.ObjectPath(), + internal::EnsureTrailingSlash(data.ObjectPath()))); } TEST_F(AzuriteFileSystemTest, CopyFileFailureSourceNonexistent) { + auto data = SetUpPreexistingData(); const auto destination_path = - internal::ConcatAbstractPath(PreexistingContainerName(), "copy-destionation"); - ASSERT_RAISES(IOError, fs_->CopyFile(NotFoundObjectPath(), destination_path)); + internal::ConcatAbstractPath(data.container_name, "copy-destionation"); + ASSERT_RAISES(IOError, fs_->CopyFile(data.NotFoundObjectPath(), destination_path)); } TEST_F(AzuriteFileSystemTest, CopyFileFailureDestinationParentNonexistent) { - const auto destination_path = - internal::ConcatAbstractPath(RandomContainerName(), "copy-destionation"); - ASSERT_RAISES(IOError, fs_->CopyFile(PreexistingObjectPath(), destination_path)); + auto data = SetUpPreexistingData(); + const auto destination_path = internal::ConcatAbstractPath( + PreexistingData::RandomContainerName(rng_), "copy-destionation"); + ASSERT_RAISES(IOError, fs_->CopyFile(data.ObjectPath(), destination_path)); } TEST_F(AzuriteFileSystemTest, CopyFileUri) { + auto data = SetUpPreexistingData(); const auto destination_path = - internal::ConcatAbstractPath(PreexistingContainerName(), "copy-destionation"); - ASSERT_RAISES(Invalid, - fs_->CopyFile("abfs://" + PreexistingObjectPath(), destination_path)); - ASSERT_RAISES(Invalid, - fs_->CopyFile(PreexistingObjectPath(), "abfs://" + destination_path)); + internal::ConcatAbstractPath(data.container_name, "copy-destionation"); + ASSERT_RAISES(Invalid, fs_->CopyFile("abfs://" + data.ObjectPath(), destination_path)); + ASSERT_RAISES(Invalid, fs_->CopyFile(data.ObjectPath(), "abfs://" + destination_path)); } TEST_F(AzuriteFileSystemTest, OpenInputStreamString) { + auto data = SetUpPreexistingData(); std::shared_ptr stream; - ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(PreexistingObjectPath())); + ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(data.ObjectPath())); ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024)); - EXPECT_EQ(buffer->ToString(), kLoremIpsum); + EXPECT_EQ(buffer->ToString(), PreexistingData::kLoremIpsum); } TEST_F(AzuriteFileSystemTest, OpenInputStreamStringBuffers) { + auto data = SetUpPreexistingData(); std::shared_ptr stream; - ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(PreexistingObjectPath())); + ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(data.ObjectPath())); std::string contents; std::shared_ptr buffer; @@ -1165,23 +1178,25 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamStringBuffers) { contents.append(buffer->ToString()); } while (buffer && buffer->size() != 0); - EXPECT_EQ(contents, kLoremIpsum); + EXPECT_EQ(contents, PreexistingData::kLoremIpsum); } TEST_F(AzuriteFileSystemTest, OpenInputStreamInfo) { - ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(PreexistingObjectPath())); + auto data = SetUpPreexistingData(); + ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ObjectPath())); std::shared_ptr stream; ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(info)); ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024)); - EXPECT_EQ(buffer->ToString(), kLoremIpsum); + EXPECT_EQ(buffer->ToString(), PreexistingData::kLoremIpsum); } TEST_F(AzuriteFileSystemTest, OpenInputStreamEmpty) { + auto data = SetUpPreexistingData(); const auto path_to_file = "empty-object.txt"; - const auto path = PreexistingContainerPath() + path_to_file; - blob_service_client_->GetBlobContainerClient(PreexistingContainerName()) + const auto path = data.ContainerPath() + path_to_file; + blob_service_client_->GetBlobContainerClient(data.container_name) .GetBlockBlobClient(path_to_file) .UploadFrom(nullptr, 0); @@ -1193,23 +1208,27 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamEmpty) { } TEST_F(AzuriteFileSystemTest, OpenInputStreamNotFound) { - ASSERT_RAISES(IOError, fs_->OpenInputStream(NotFoundObjectPath())); + auto data = SetUpPreexistingData(); + ASSERT_RAISES(IOError, fs_->OpenInputStream(data.NotFoundObjectPath())); } TEST_F(AzuriteFileSystemTest, OpenInputStreamInfoInvalid) { - ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(PreexistingContainerPath())); + auto data = SetUpPreexistingData(); + ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ContainerPath())); ASSERT_RAISES(IOError, fs_->OpenInputStream(info)); - ASSERT_OK_AND_ASSIGN(auto info2, fs_->GetFileInfo(NotFoundObjectPath())); + ASSERT_OK_AND_ASSIGN(auto info2, fs_->GetFileInfo(data.NotFoundObjectPath())); ASSERT_RAISES(IOError, fs_->OpenInputStream(info2)); } TEST_F(AzuriteFileSystemTest, OpenInputStreamUri) { - ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfs://" + PreexistingObjectPath())); + auto data = SetUpPreexistingData(); + ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfs://" + data.ObjectPath())); } TEST_F(AzuriteFileSystemTest, OpenInputStreamTrailingSlash) { - ASSERT_RAISES(IOError, fs_->OpenInputStream(PreexistingObjectPath() + '/')); + auto data = SetUpPreexistingData(); + ASSERT_RAISES(IOError, fs_->OpenInputStream(data.ObjectPath() + '/')); } namespace { @@ -1248,8 +1267,9 @@ std::shared_ptr NormalizerKeyValueMetadata( }; // namespace TEST_F(AzuriteFileSystemTest, OpenInputStreamReadMetadata) { + auto data = SetUpPreexistingData(); std::shared_ptr stream; - ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(PreexistingObjectPath())); + ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(data.ObjectPath())); std::shared_ptr actual; ASSERT_OK_AND_ASSIGN(actual, stream->ReadMetadata()); @@ -1278,7 +1298,8 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamReadMetadata) { } TEST_F(AzuriteFileSystemTest, OpenInputStreamClosed) { - ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(PreexistingObjectPath())); + auto data = SetUpPreexistingData(); + ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(data.ObjectPath())); ASSERT_OK(stream->Close()); std::array buffer{}; ASSERT_RAISES(Invalid, stream->Read(buffer.size(), buffer.data())); @@ -1287,23 +1308,23 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamClosed) { } TEST_F(AzuriteFileSystemTest, WriteMetadata) { + auto data = SetUpPreexistingData(); options_.default_metadata = arrow::key_value_metadata({{"foo", "bar"}}); ASSERT_OK_AND_ASSIGN(auto fs_with_defaults, AzureFileSystem::Make(options_)); std::string path = "object_with_defaults"; - auto location = PreexistingContainerPath() + path; + auto location = data.ContainerPath() + path; ASSERT_OK_AND_ASSIGN(auto output, fs_with_defaults->OpenOutputStream(location, /*metadata=*/{})); - const std::string_view expected(kLoremIpsum); + const std::string_view expected(PreexistingData::kLoremIpsum); ASSERT_OK(output->Write(expected)); ASSERT_OK(output->Close()); // Verify the metadata has been set. - auto blob_metadata = - blob_service_client_->GetBlobContainerClient(PreexistingContainerName()) - .GetBlockBlobClient(path) - .GetProperties() - .Value.Metadata; + auto blob_metadata = blob_service_client_->GetBlobContainerClient(data.container_name) + .GetBlockBlobClient(path) + .GetProperties() + .Value.Metadata; EXPECT_EQ(Core::CaseInsensitiveMap{std::make_pair("foo", "bar")}, blob_metadata); // Check that explicit metadata overrides the defaults. @@ -1312,7 +1333,7 @@ TEST_F(AzuriteFileSystemTest, WriteMetadata) { location, /*metadata=*/arrow::key_value_metadata({{"bar", "foo"}}))); ASSERT_OK(output->Write(expected)); ASSERT_OK(output->Close()); - blob_metadata = blob_service_client_->GetBlobContainerClient(PreexistingContainerName()) + blob_metadata = blob_service_client_->GetBlobContainerClient(data.container_name) .GetBlockBlobClient(path) .GetProperties() .Value.Metadata; @@ -1321,9 +1342,10 @@ TEST_F(AzuriteFileSystemTest, WriteMetadata) { } TEST_F(AzuriteFileSystemTest, OpenOutputStreamSmall) { - const auto path = PreexistingContainerPath() + "test-write-object"; + auto data = SetUpPreexistingData(); + const auto path = data.ContainerPath() + "test-write-object"; ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); - const std::string_view expected(kLoremIpsum); + const std::string_view expected(PreexistingData::kLoremIpsum); ASSERT_OK(output->Write(expected)); ASSERT_OK(output->Close()); @@ -1337,7 +1359,8 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamSmall) { } TEST_F(AzuriteFileSystemTest, OpenOutputStreamLarge) { - const auto path = PreexistingContainerPath() + "test-write-object"; + auto data = SetUpPreexistingData(); + const auto path = data.ContainerPath() + "test-write-object"; ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); std::array sizes{257 * 1024, 258 * 1024, 259 * 1024}; std::array buffers{ @@ -1368,7 +1391,8 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamLarge) { } TEST_F(AzuriteFileSystemTest, OpenOutputStreamTruncatesExistingFile) { - const auto path = PreexistingContainerPath() + "test-write-object"; + auto data = SetUpPreexistingData(); + const auto path = data.ContainerPath() + "test-write-object"; ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); const std::string_view expected0("Existing blob content"); ASSERT_OK(output->Write(expected0)); @@ -1383,7 +1407,7 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamTruncatesExistingFile) { EXPECT_EQ(expected0, std::string_view(inbuf.data(), size)); ASSERT_OK_AND_ASSIGN(output, fs_->OpenOutputStream(path, {})); - const std::string_view expected1(kLoremIpsum); + const std::string_view expected1(PreexistingData::kLoremIpsum); ASSERT_OK(output->Write(expected1)); ASSERT_OK(output->Close()); @@ -1394,7 +1418,8 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamTruncatesExistingFile) { } TEST_F(AzuriteFileSystemTest, OpenAppendStreamDoesNotTruncateExistingFile) { - const auto path = PreexistingContainerPath() + "test-write-object"; + auto data = SetUpPreexistingData(); + const auto path = data.ContainerPath() + "test-write-object"; ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); const std::string_view expected0("Existing blob content"); ASSERT_OK(output->Write(expected0)); @@ -1409,7 +1434,7 @@ TEST_F(AzuriteFileSystemTest, OpenAppendStreamDoesNotTruncateExistingFile) { EXPECT_EQ(expected0, std::string_view(inbuf.data())); ASSERT_OK_AND_ASSIGN(output, fs_->OpenAppendStream(path, {})); - const std::string_view expected1(kLoremIpsum); + const std::string_view expected1(PreexistingData::kLoremIpsum); ASSERT_OK(output->Write(expected1)); ASSERT_OK(output->Close()); @@ -1422,34 +1447,39 @@ TEST_F(AzuriteFileSystemTest, OpenAppendStreamDoesNotTruncateExistingFile) { } TEST_F(AzuriteFileSystemTest, OpenOutputStreamClosed) { - const auto path = internal::ConcatAbstractPath(PreexistingContainerName(), - "open-output-stream-closed.txt"); + auto data = SetUpPreexistingData(); + const auto path = + internal::ConcatAbstractPath(data.container_name, "open-output-stream-closed.txt"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); ASSERT_OK(output->Close()); - ASSERT_RAISES(Invalid, output->Write(kLoremIpsum, std::strlen(kLoremIpsum))); + ASSERT_RAISES(Invalid, output->Write(PreexistingData::kLoremIpsum, + std::strlen(PreexistingData::kLoremIpsum))); ASSERT_RAISES(Invalid, output->Flush()); ASSERT_RAISES(Invalid, output->Tell()); } TEST_F(AzuriteFileSystemTest, OpenOutputStreamUri) { - const auto path = internal::ConcatAbstractPath(PreexistingContainerName(), - "open-output-stream-uri.txt"); + auto data = SetUpPreexistingData(); + const auto path = + internal::ConcatAbstractPath(data.container_name, "open-output-stream-uri.txt"); ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfs://" + path)); } TEST_F(AzuriteFileSystemTest, OpenInputFileMixedReadVsReadAt) { + auto data = SetUpPreexistingData(); // Create a file large enough to make the random access tests non-trivial. auto constexpr kLineWidth = 100; auto constexpr kLineCount = 4096; std::vector lines(kLineCount); int lineno = 0; - std::generate_n(lines.begin(), lines.size(), - [&] { return RandomLine(++lineno, kLineWidth); }); + std::generate_n(lines.begin(), lines.size(), [&] { + return PreexistingData::RandomLine(++lineno, kLineWidth, rng_); + }); - const auto path_to_file = "OpenInputFileMixedReadVsReadAt/object-name"; - const auto path = PreexistingContainerPath() + path_to_file; + const auto path = internal::ConcatAbstractPath( + data.container_name, "OpenInputFileMixedReadVsReadAt/object-name"); - UploadLines(lines, path_to_file, kLineCount * kLineWidth); + UploadLines(lines, path, kLineCount * kLineWidth); std::shared_ptr file; ASSERT_OK_AND_ASSIGN(file, fs_->OpenInputFile(path)); @@ -1470,7 +1500,7 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileMixedReadVsReadAt) { } // Verify random reads interleave too. - auto const index = RandomIndex(kLineCount); + auto const index = PreexistingData::RandomIndex(kLineCount, rng_); auto const position = index * kLineWidth; ASSERT_OK_AND_ASSIGN(size, file->ReadAt(position, buffer.size(), buffer.data())); EXPECT_EQ(size, kLineWidth); @@ -1484,26 +1514,28 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileMixedReadVsReadAt) { } TEST_F(AzuriteFileSystemTest, OpenInputFileRandomSeek) { + auto data = SetUpPreexistingData(); // Create a file large enough to make the random access tests non-trivial. auto constexpr kLineWidth = 100; auto constexpr kLineCount = 4096; std::vector lines(kLineCount); int lineno = 0; - std::generate_n(lines.begin(), lines.size(), - [&] { return RandomLine(++lineno, kLineWidth); }); + std::generate_n(lines.begin(), lines.size(), [&] { + return PreexistingData::RandomLine(++lineno, kLineWidth, rng_); + }); - const auto path_to_file = "OpenInputFileRandomSeek/object-name"; - const auto path = PreexistingContainerPath() + path_to_file; + const auto path = internal::ConcatAbstractPath(data.container_name, + "OpenInputFileRandomSeek/object-name"); std::shared_ptr output; - UploadLines(lines, path_to_file, kLineCount * kLineWidth); + UploadLines(lines, path, kLineCount * kLineWidth); std::shared_ptr file; ASSERT_OK_AND_ASSIGN(file, fs_->OpenInputFile(path)); for (int i = 0; i != 32; ++i) { SCOPED_TRACE("Iteration " + std::to_string(i)); // Verify sequential reads work as expected. - auto const index = RandomIndex(kLineCount); + auto const index = PreexistingData::RandomIndex(kLineCount, rng_); auto const position = index * kLineWidth; ASSERT_OK(file->Seek(position)); ASSERT_OK_AND_ASSIGN(auto actual, file->Read(kLineWidth)); @@ -1512,14 +1544,14 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileRandomSeek) { } TEST_F(AzuriteFileSystemTest, OpenInputFileIoContext) { + auto data = SetUpPreexistingData(); // Create a test file. const auto path_to_file = "OpenInputFileIoContext/object-name"; - const auto path = PreexistingContainerPath() + path_to_file; + const auto path = data.ContainerPath() + path_to_file; const std::string contents = "The quick brown fox jumps over the lazy dog"; - auto blob_client = - blob_service_client_->GetBlobContainerClient(PreexistingContainerName()) - .GetBlockBlobClient(path_to_file); + auto blob_client = blob_service_client_->GetBlobContainerClient(data.container_name) + .GetBlockBlobClient(path_to_file); blob_client.UploadFrom(reinterpret_cast(contents.data()), contents.length()); @@ -1529,7 +1561,8 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileIoContext) { } TEST_F(AzuriteFileSystemTest, OpenInputFileInfo) { - ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(PreexistingObjectPath())); + auto data = SetUpPreexistingData(); + ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ObjectPath())); std::shared_ptr file; ASSERT_OK_AND_ASSIGN(file, fs_->OpenInputFile(info)); @@ -1539,24 +1572,27 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileInfo) { auto constexpr kStart = 16; ASSERT_OK_AND_ASSIGN(size, file->ReadAt(kStart, buffer.size(), buffer.data())); - auto const expected = std::string(kLoremIpsum).substr(kStart); + auto const expected = std::string(PreexistingData::kLoremIpsum).substr(kStart); EXPECT_EQ(std::string(buffer.data(), size), expected); } TEST_F(AzuriteFileSystemTest, OpenInputFileNotFound) { - ASSERT_RAISES(IOError, fs_->OpenInputFile(NotFoundObjectPath())); + auto data = SetUpPreexistingData(); + ASSERT_RAISES(IOError, fs_->OpenInputFile(data.NotFoundObjectPath())); } TEST_F(AzuriteFileSystemTest, OpenInputFileInfoInvalid) { - ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(PreexistingContainerPath())); + auto data = SetUpPreexistingData(); + ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ContainerPath())); ASSERT_RAISES(IOError, fs_->OpenInputFile(info)); - ASSERT_OK_AND_ASSIGN(auto info2, fs_->GetFileInfo(NotFoundObjectPath())); + ASSERT_OK_AND_ASSIGN(auto info2, fs_->GetFileInfo(data.NotFoundObjectPath())); ASSERT_RAISES(IOError, fs_->OpenInputFile(info2)); } TEST_F(AzuriteFileSystemTest, OpenInputFileClosed) { - ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputFile(PreexistingObjectPath())); + auto data = SetUpPreexistingData(); + ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputFile(data.ObjectPath())); ASSERT_OK(stream->Close()); std::array buffer{}; ASSERT_RAISES(Invalid, stream->Tell()); From 551f11cf9e4836fbf448072f0cadab93a16b53c5 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Tue, 12 Dec 2023 23:21:51 -0300 Subject: [PATCH 08/26] fixup! Refactor the Azure FS tests --- cpp/src/arrow/filesystem/azurefs_test.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 80c4ff9dcb5..e3056a60f30 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -677,7 +677,6 @@ class AzureFileSystemTestImpl : public AzureFileSystemTest { // You need an Azure account. You should be able to create a free account [1]. // Through the portal Web UI, you should create a storage account [2]. // -// // A few suggestions on configuration: // // * Use Standard general-purpose v2 not premium @@ -685,8 +684,6 @@ class AzureFileSystemTestImpl : public AzureFileSystemTest { // * Set the default access tier to hot // * SFTP, NFS and file shares are not required. // -// You need to enable Hierarchical Namespace on the storage account -// // You must not enable Hierarchical Namespace on the storage account used for // AzureFlatNSFileSystemTest, but you must enable it on the storage account // used for AzureHierarchicalNSFileSystemTest. From 907c2f7027e59b3685833423d2eb29a68e73630c Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Wed, 13 Dec 2023 17:12:26 -0300 Subject: [PATCH 09/26] Explain all the factories for the Azure envs --- cpp/src/arrow/filesystem/azurefs_test.cc | 32 ++++++++++++++++++------ 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index e3056a60f30..8c7ee8165e3 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -101,16 +101,33 @@ class BaseAzureEnv : public ::testing::Environment { template class AzureEnvImpl : public BaseAzureEnv { - protected: + private: + /// \brief Factory function that registers the singleton instance as a global test + /// environment. Must be called only once per implementation (see GetInstance()). + /// + /// Every BaseAzureEnv implementation defines a static and parameter-less member + /// function called Make() that returns a Result>. + /// This templated function performs the following steps: + /// + /// 1) Calls AzureEnvClass::Make() to get an instance of AzureEnvClass. + /// 2) Passes ownership of the AzureEnvClass instance to the testing environment. + /// 3) Returns a Result wrapping the raw heap-allocated pointer. static Result MakeAndAddToGlobalTestEnvironment() { - ARROW_ASSIGN_OR_RAISE(auto instance, AzureEnvClass::Make()); - auto* heap_ptr = instance.release(); + ARROW_ASSIGN_OR_RAISE(auto env, AzureEnvClass::Make()); + auto* heap_ptr = env.release(); ::testing::AddGlobalTestEnvironment(heap_ptr); return heap_ptr; } + protected: using BaseAzureEnv::BaseAzureEnv; + /// \brief Create an AzureEnvClass instance from environment variables. + /// + /// Reads the account name and key from the environment variables. This can be + /// used in BaseAzureEnv implementations that don't need to do any additional + /// setup to create the singleton instance (e.g. AzureFlatNSEnv, + /// AzureHierarchicalNSEnv). static Result> MakeFromEnvVars( const std::string& account_name_var, const std::string& account_key_var) { const auto account_name = std::getenv(account_name_var.c_str()); @@ -128,11 +145,10 @@ class AzureEnvImpl : public BaseAzureEnv { ~AzureEnvImpl() override = default; static Result GetInstance() { - static auto env = MakeAndAddToGlobalTestEnvironment(); - if (env.ok()) { - return env; - } - return env.status(); + // Ensure MakeAndAddToGlobalTestEnvironment() is called only once by storing the + // Result in a static variable. + static auto singleton_env = MakeAndAddToGlobalTestEnvironment(); + return singleton_env; } AzureBackend backend() const final { return AzureEnvClass::kBackend; } From 0e827a42eebb87146dcbda8ce197ed8467fb87be Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Wed, 13 Dec 2023 17:14:29 -0300 Subject: [PATCH 10/26] Use instead of size_t --- cpp/src/arrow/filesystem/azurefs_test.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 8c7ee8165e3..f462d8c99b6 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -315,20 +315,20 @@ culpa qui officia deserunt mollit anim id est laborum. static std::string RandomContainerName(RNG& rng) { return RandomChars(32, rng); } static std::string RandomDirectoryName(RNG& rng) { return RandomChars(32, rng); } - static std::string RandomLine(int lineno, std::size_t width, RNG& rng) { + static std::string RandomLine(int lineno, int width, RNG& rng) { auto line = std::to_string(lineno) + ": "; - line += RandomChars(width - line.size() - 1, rng); + line += RandomChars(width - static_cast(line.size()) - 1, rng); line += '\n'; return line; } - static std::size_t RandomIndex(std::size_t end, RNG& rng) { - return std::uniform_int_distribution(0, end - 1)(rng); + static int RandomIndex(int end, RNG& rng) { + return std::uniform_int_distribution(0, end - 1)(rng); } - static std::string RandomChars(std::size_t count, RNG& rng) { + static std::string RandomChars(int count, RNG& rng) { auto const fillers = std::string("abcdefghijlkmnopqrstuvwxyz0123456789"); - std::uniform_int_distribution d(0, fillers.size() - 1); + std::uniform_int_distribution d(0, static_cast(fillers.size()) - 1); std::string s; std::generate_n(std::back_inserter(s), count, [&] { return fillers[d(rng)]; }); return s; From ffea4e2beb1d075ace8c95ba5a0c37a0c89782d7 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Wed, 13 Dec 2023 17:21:10 -0300 Subject: [PATCH 11/26] Remove bool from enum class AzureBackend --- cpp/src/arrow/filesystem/azurefs.cc | 12 ++++++------ cpp/src/arrow/filesystem/azurefs.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 045ebbf4083..99f3e05c905 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -49,12 +49,12 @@ AzureOptions::~AzureOptions() = default; bool AzureOptions::Equals(const AzureOptions& other) const { // TODO(GH-38598): update here when more auth methods are added. - const bool ret = backend == other.backend && - default_metadata == other.default_metadata && - account_blob_url_ == other.account_blob_url_ && - account_dfs_url_ == other.account_dfs_url_ && - credential_kind_ == other.credential_kind_; - if (!ret) { + const bool equals = backend == other.backend && + default_metadata == other.default_metadata && + account_blob_url_ == other.account_blob_url_ && + account_dfs_url_ == other.account_dfs_url_ && + credential_kind_ == other.credential_kind_; + if (!equals) { return false; } switch (credential_kind_) { diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index 5ecf932046b..1e1f00225b6 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -43,7 +43,7 @@ class DataLakeServiceClient; namespace arrow::fs { -enum class AzureBackend : bool { +enum class AzureBackend { /// \brief Official Azure Remote Backend kAzure, /// \brief Local Simulated Storage From 502997a7bc619a64334462ae16dfb6e12f613a03 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Wed, 13 Dec 2023 17:27:06 -0300 Subject: [PATCH 12/26] Fail if only one of the environment variables is set --- cpp/src/arrow/filesystem/azurefs_test.cc | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index f462d8c99b6..e85176964fc 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -132,11 +132,19 @@ class AzureEnvImpl : public BaseAzureEnv { const std::string& account_name_var, const std::string& account_key_var) { const auto account_name = std::getenv(account_name_var.c_str()); const auto account_key = std::getenv(account_key_var.c_str()); + if (!account_name && !account_key) { + return Status::Cancelled(account_name_var + " and " + account_key_var + + " are not set. Skipping tests."); + } + // If only one of the variables is set. Don't cancel tests, + // fail with a Status::Invalid. if (!account_name) { - return Status::Cancelled(account_name_var + " not set."); + return Status::Invalid(account_name_var + " not set while " + account_key_var + + " is set."); } if (!account_key) { - return Status::Cancelled(account_key_var + " not set."); + return Status::Invalid(account_key_var + " not set while " + account_name_var + + " is set."); } return std::unique_ptr{new AzureEnvClass(account_name, account_key)}; } From e37a7b43be9d4f6cc174c48e002b485b4075f95d Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Wed, 13 Dec 2023 17:51:38 -0300 Subject: [PATCH 13/26] Make PreexistingData::ContainerPath take an argument and use it instead of ConcatAbstractPath where applicable --- cpp/src/arrow/filesystem/azurefs_test.cc | 162 +++++++++++------------ 1 file changed, 74 insertions(+), 88 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index e85176964fc..b8b76b0ce27 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -64,6 +64,7 @@ namespace arrow { using internal::TemporaryDir; namespace fs { +using internal::ConcatAbstractPath; namespace { namespace bp = boost::process; @@ -309,30 +310,20 @@ culpa qui officia deserunt mollit anim id est laborum. public: explicit PreexistingData(RNG& rng) : container_name{RandomContainerName(rng)} {} - // Accessors - std::string ContainerPath() const { return container_name + '/'; } - std::string ObjectPath() const { return ContainerPath() + kObjectName; } - std::string NotFoundObjectPath() const { return ContainerPath() + "not-found"; } - - std::string RandomDirectoryPath(RNG& rng) { - return internal::ConcatAbstractPath(container_name, RandomDirectoryName(rng)); + // Creates a path by concatenating the container name and the stem. + std::string ContainerPath(std::string_view stem) const { + return ConcatAbstractPath(container_name, stem); } - // Utilities - - static std::string RandomContainerName(RNG& rng) { return RandomChars(32, rng); } - static std::string RandomDirectoryName(RNG& rng) { return RandomChars(32, rng); } + std::string ObjectPath() const { return ContainerPath(kObjectName); } + std::string NotFoundObjectPath() const { return ContainerPath("not-found"); } - static std::string RandomLine(int lineno, int width, RNG& rng) { - auto line = std::to_string(lineno) + ": "; - line += RandomChars(width - static_cast(line.size()) - 1, rng); - line += '\n'; - return line; + std::string RandomDirectoryPath(RNG& rng) const { + return ContainerPath(RandomChars(32, rng)); } - static int RandomIndex(int end, RNG& rng) { - return std::uniform_int_distribution(0, end - 1)(rng); - } + // Utilities + static std::string RandomContainerName(RNG& rng) { return RandomChars(32, rng); } static std::string RandomChars(int count, RNG& rng) { auto const fillers = std::string("abcdefghijlkmnopqrstuvwxyz0123456789"); @@ -341,6 +332,17 @@ culpa qui officia deserunt mollit anim id est laborum. std::generate_n(std::back_inserter(s), count, [&] { return fillers[d(rng)]; }); return s; } + + static int RandomIndex(int end, RNG& rng) { + return std::uniform_int_distribution(0, end - 1)(rng); + } + + static std::string RandomLine(int lineno, int width, RNG& rng) { + auto line = std::to_string(lineno) + ": "; + line += RandomChars(width - static_cast(line.size()) - 1, rng); + line += '\n'; + return line; + } }; class AzureFileSystemTest : public ::testing::Test { @@ -446,13 +448,10 @@ class AzureFileSystemTest : public ::testing::Test { // Need to use "void" as the return type to use ASSERT_* in this method. void CreateHierarchicalData(HierarchicalPaths* paths) { auto data = SetUpPreexistingData(); - const auto container_name = data.container_name; const auto directory_path = data.RandomDirectoryPath(rng_); - const auto sub_directory_path = - internal::ConcatAbstractPath(directory_path, "new-sub"); - const auto sub_blob_path = - internal::ConcatAbstractPath(sub_directory_path, "sub.txt"); - const auto top_blob_path = internal::ConcatAbstractPath(directory_path, "top.txt"); + const auto sub_directory_path = ConcatAbstractPath(directory_path, "new-sub"); + const auto sub_blob_path = ConcatAbstractPath(sub_directory_path, "sub.txt"); + const auto top_blob_path = ConcatAbstractPath(directory_path, "top.txt"); ASSERT_OK(fs_->CreateDir(sub_directory_path, true)); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(sub_blob_path)); ASSERT_OK(output->Write(std::string_view("sub"))); @@ -461,13 +460,13 @@ class AzureFileSystemTest : public ::testing::Test { ASSERT_OK(output->Write(std::string_view("top"))); ASSERT_OK(output->Close()); - AssertFileInfo(fs_.get(), container_name, FileType::Directory); + AssertFileInfo(fs_.get(), data.container_name, FileType::Directory); AssertFileInfo(fs_.get(), directory_path, FileType::Directory); AssertFileInfo(fs_.get(), sub_directory_path, FileType::Directory); AssertFileInfo(fs_.get(), sub_blob_path, FileType::File); AssertFileInfo(fs_.get(), top_blob_path, FileType::File); - paths->container = container_name; + paths->container = data.container_name; paths->directory = directory_path; paths->sub_paths = { sub_directory_path, @@ -526,8 +525,7 @@ class AzureFileSystemTest : public ::testing::Test { void DeleteDirSuccessEmptyTest() { auto data = SetUpPreexistingData(); - const auto directory_path = - internal::ConcatAbstractPath(data.container_name, data.RandomDirectoryName(rng_)); + const auto directory_path = data.RandomDirectoryPath(rng_); if (WithHierarchicalNamespace()) { ASSERT_OK(fs_->CreateDir(directory_path, true)); @@ -566,7 +564,7 @@ class AzureFileSystemTest : public ::testing::Test { void CreateDirRecursiveSuccessDirectoryOnlyTest() { auto data = SetUpPreexistingData(); const auto parent = data.RandomDirectoryPath(rng_); - const auto path = internal::ConcatAbstractPath(parent, "new-sub"); + const auto path = ConcatAbstractPath(parent, "new-sub"); ASSERT_OK(fs_->CreateDir(path, true)); if (WithHierarchicalNamespace()) { arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); @@ -582,7 +580,7 @@ class AzureFileSystemTest : public ::testing::Test { void CreateDirRecursiveSuccessContainerAndDirectoryTest() { auto data = SetUpPreexistingData(); const auto parent = data.RandomDirectoryPath(rng_); - const auto path = internal::ConcatAbstractPath(parent, "new-sub"); + const auto path = ConcatAbstractPath(parent, "new-sub"); ASSERT_OK(fs_->CreateDir(path, true)); if (WithHierarchicalNamespace()) { arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); @@ -641,10 +639,9 @@ void AzureFileSystemTest::GetFileInfoObjectWithNestedStructureTest() { auto data = SetUpPreexistingData(); // Adds detailed tests to handle cases of different edge cases // with directory naming conventions (e.g. with and without slashes). - constexpr auto kObjectName = "test-object-dir/some_other_dir/another_dir/foo"; - ASSERT_OK_AND_ASSIGN( - auto output, - fs_->OpenOutputStream(data.ContainerPath() + kObjectName, /*metadata=*/{})); + const std::string kObjectName = "test-object-dir/some_other_dir/another_dir/foo"; + ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(data.ContainerPath(kObjectName), + /*metadata=*/{})); const std::string_view lorem_ipsum(PreexistingData::kLoremIpsum); ASSERT_OK(output->Write(lorem_ipsum)); ASSERT_OK(output->Close()); @@ -652,30 +649,28 @@ void AzureFileSystemTest::GetFileInfoObjectWithNestedStructureTest() { // 0 is immediately after "/" lexicographically, ensure that this doesn't // cause unexpected issues. ASSERT_OK_AND_ASSIGN( - output, - fs_->OpenOutputStream(data.ContainerPath() + "test-object-dir/some_other_dir0", - /*metadata=*/{})); + output, fs_->OpenOutputStream(data.ContainerPath("test-object-dir/some_other_dir0"), + /*metadata=*/{})); ASSERT_OK(output->Write(lorem_ipsum)); ASSERT_OK(output->Close()); ASSERT_OK_AND_ASSIGN(output, - fs_->OpenOutputStream(data.ContainerPath() + kObjectName + "0", + fs_->OpenOutputStream(data.ContainerPath(kObjectName + "0"), /*metadata=*/{})); ASSERT_OK(output->Write(lorem_ipsum)); ASSERT_OK(output->Close()); - AssertFileInfo(fs_.get(), data.ContainerPath() + kObjectName, FileType::File); - AssertFileInfo(fs_.get(), data.ContainerPath() + kObjectName + "/", FileType::NotFound); - AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir", - FileType::Directory); - AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir/", + AssertFileInfo(fs_.get(), data.ContainerPath(kObjectName), FileType::File); + AssertFileInfo(fs_.get(), data.ContainerPath(kObjectName) + "/", FileType::NotFound); + AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir"), FileType::Directory); + AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir") + "/", FileType::Directory); - AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir/some_other_dir", + AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir/some_other_dir"), FileType::Directory); - AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir/some_other_dir/", + AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir/some_other_dir") + "/", FileType::Directory); - AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-di", FileType::NotFound); - AssertFileInfo(fs_.get(), data.ContainerPath() + "test-object-dir/some_other_di", + AssertFileInfo(fs_.get(), data.ContainerPath("test-object-di"), FileType::NotFound); + AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir/some_other_di"), FileType::NotFound); if (WithHierarchicalNamespace()) { @@ -683,7 +678,7 @@ void AzureFileSystemTest::GetFileInfoObjectWithNestedStructureTest() { .GetDirectoryClient("test-empty-object-dir") .Create(); - AssertFileInfo(fs_.get(), data.ContainerPath() + "test-empty-object-dir", + AssertFileInfo(fs_.get(), data.ContainerPath("test-empty-object-dir"), FileType::Directory); } } @@ -779,7 +774,7 @@ TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirFailureNonexistent) { TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirSuccessHaveBlob) { auto data = SetUpPreexistingData(); const auto directory_path = data.RandomDirectoryPath(rng_); - const auto blob_path = internal::ConcatAbstractPath(directory_path, "hello.txt"); + const auto blob_path = ConcatAbstractPath(directory_path, "hello.txt"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(blob_path)); ASSERT_OK(output->Write(std::string_view("hello"))); ASSERT_OK(output->Close()); @@ -791,7 +786,7 @@ TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirSuccessHaveBlob) { TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirSuccessHaveDirectory) { auto data = SetUpPreexistingData(); const auto parent = data.RandomDirectoryPath(rng_); - const auto path = internal::ConcatAbstractPath(parent, "new-sub"); + const auto path = ConcatAbstractPath(parent, "new-sub"); ASSERT_OK(fs_->CreateDir(path, true)); arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory); arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory); @@ -1070,8 +1065,7 @@ TEST_F(AzuriteFileSystemTest, DeleteDirSuccessHaveBlobs) { // Because we can't add 257 or more delete blob requests to one SubmitBatch(). int64_t n_blobs = 257; for (int64_t i = 0; i < n_blobs; ++i) { - const auto blob_path = - internal::ConcatAbstractPath(directory_path, std::to_string(i) + ".txt"); + const auto blob_path = ConcatAbstractPath(directory_path, std::to_string(i) + ".txt"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(blob_path)); ASSERT_OK(output->Write(std::string_view(std::to_string(i)))); ASSERT_OK(output->Close()); @@ -1079,15 +1073,14 @@ TEST_F(AzuriteFileSystemTest, DeleteDirSuccessHaveBlobs) { } ASSERT_OK(fs_->DeleteDir(directory_path)); for (int64_t i = 0; i < n_blobs; ++i) { - const auto blob_path = - internal::ConcatAbstractPath(directory_path, std::to_string(i) + ".txt"); + const auto blob_path = ConcatAbstractPath(directory_path, std::to_string(i) + ".txt"); arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::NotFound); } } TEST_F(AzuriteFileSystemTest, DeleteDirUri) { auto data = SetUpPreexistingData(); - ASSERT_RAISES(Invalid, fs_->DeleteDir("abfs://" + data.ContainerPath())); + ASSERT_RAISES(Invalid, fs_->DeleteDir("abfs://" + data.container_name + "/")); } TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessContainer) { @@ -1132,8 +1125,7 @@ TEST_F(AzuriteFileSystemTest, DeleteDirContentsFailureNonexistent) { TEST_F(AzuriteFileSystemTest, CopyFileSuccessDestinationNonexistent) { auto data = SetUpPreexistingData(); - const auto destination_path = - internal::ConcatAbstractPath(data.container_name, "copy-destionation"); + const auto destination_path = data.ContainerPath("copy-destionation"); ASSERT_OK(fs_->CopyFile(data.ObjectPath(), destination_path)); ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(destination_path)); ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(info)); @@ -1158,22 +1150,20 @@ TEST_F(AzuriteFileSystemTest, CopyFileFailureDestinationTrailingSlash) { TEST_F(AzuriteFileSystemTest, CopyFileFailureSourceNonexistent) { auto data = SetUpPreexistingData(); - const auto destination_path = - internal::ConcatAbstractPath(data.container_name, "copy-destionation"); + const auto destination_path = data.ContainerPath("copy-destionation"); ASSERT_RAISES(IOError, fs_->CopyFile(data.NotFoundObjectPath(), destination_path)); } TEST_F(AzuriteFileSystemTest, CopyFileFailureDestinationParentNonexistent) { auto data = SetUpPreexistingData(); - const auto destination_path = internal::ConcatAbstractPath( - PreexistingData::RandomContainerName(rng_), "copy-destionation"); + const auto destination_path = + ConcatAbstractPath(PreexistingData::RandomContainerName(rng_), "copy-destionation"); ASSERT_RAISES(IOError, fs_->CopyFile(data.ObjectPath(), destination_path)); } TEST_F(AzuriteFileSystemTest, CopyFileUri) { auto data = SetUpPreexistingData(); - const auto destination_path = - internal::ConcatAbstractPath(data.container_name, "copy-destionation"); + const auto destination_path = data.ContainerPath("copy-destionation"); ASSERT_RAISES(Invalid, fs_->CopyFile("abfs://" + data.ObjectPath(), destination_path)); ASSERT_RAISES(Invalid, fs_->CopyFile(data.ObjectPath(), "abfs://" + destination_path)); } @@ -1216,7 +1206,7 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamInfo) { TEST_F(AzuriteFileSystemTest, OpenInputStreamEmpty) { auto data = SetUpPreexistingData(); const auto path_to_file = "empty-object.txt"; - const auto path = data.ContainerPath() + path_to_file; + const auto path = data.ContainerPath(path_to_file); blob_service_client_->GetBlobContainerClient(data.container_name) .GetBlockBlobClient(path_to_file) .UploadFrom(nullptr, 0); @@ -1235,7 +1225,7 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamNotFound) { TEST_F(AzuriteFileSystemTest, OpenInputStreamInfoInvalid) { auto data = SetUpPreexistingData(); - ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ContainerPath())); + ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.container_name + "/")); ASSERT_RAISES(IOError, fs_->OpenInputStream(info)); ASSERT_OK_AND_ASSIGN(auto info2, fs_->GetFileInfo(data.NotFoundObjectPath())); @@ -1333,17 +1323,17 @@ TEST_F(AzuriteFileSystemTest, WriteMetadata) { options_.default_metadata = arrow::key_value_metadata({{"foo", "bar"}}); ASSERT_OK_AND_ASSIGN(auto fs_with_defaults, AzureFileSystem::Make(options_)); - std::string path = "object_with_defaults"; - auto location = data.ContainerPath() + path; + std::string blob_path = "object_with_defaults"; + auto full_path = data.ContainerPath(blob_path); ASSERT_OK_AND_ASSIGN(auto output, - fs_with_defaults->OpenOutputStream(location, /*metadata=*/{})); + fs_with_defaults->OpenOutputStream(full_path, /*metadata=*/{})); const std::string_view expected(PreexistingData::kLoremIpsum); ASSERT_OK(output->Write(expected)); ASSERT_OK(output->Close()); // Verify the metadata has been set. auto blob_metadata = blob_service_client_->GetBlobContainerClient(data.container_name) - .GetBlockBlobClient(path) + .GetBlockBlobClient(blob_path) .GetProperties() .Value.Metadata; EXPECT_EQ(Core::CaseInsensitiveMap{std::make_pair("foo", "bar")}, blob_metadata); @@ -1351,11 +1341,11 @@ TEST_F(AzuriteFileSystemTest, WriteMetadata) { // Check that explicit metadata overrides the defaults. ASSERT_OK_AND_ASSIGN( output, fs_with_defaults->OpenOutputStream( - location, /*metadata=*/arrow::key_value_metadata({{"bar", "foo"}}))); + full_path, /*metadata=*/arrow::key_value_metadata({{"bar", "foo"}}))); ASSERT_OK(output->Write(expected)); ASSERT_OK(output->Close()); blob_metadata = blob_service_client_->GetBlobContainerClient(data.container_name) - .GetBlockBlobClient(path) + .GetBlockBlobClient(blob_path) .GetProperties() .Value.Metadata; // Defaults are overwritten and not merged. @@ -1364,7 +1354,7 @@ TEST_F(AzuriteFileSystemTest, WriteMetadata) { TEST_F(AzuriteFileSystemTest, OpenOutputStreamSmall) { auto data = SetUpPreexistingData(); - const auto path = data.ContainerPath() + "test-write-object"; + const auto path = data.ContainerPath("test-write-object"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); const std::string_view expected(PreexistingData::kLoremIpsum); ASSERT_OK(output->Write(expected)); @@ -1381,7 +1371,7 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamSmall) { TEST_F(AzuriteFileSystemTest, OpenOutputStreamLarge) { auto data = SetUpPreexistingData(); - const auto path = data.ContainerPath() + "test-write-object"; + const auto path = data.ContainerPath("test-write-object"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); std::array sizes{257 * 1024, 258 * 1024, 259 * 1024}; std::array buffers{ @@ -1413,7 +1403,7 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamLarge) { TEST_F(AzuriteFileSystemTest, OpenOutputStreamTruncatesExistingFile) { auto data = SetUpPreexistingData(); - const auto path = data.ContainerPath() + "test-write-object"; + const auto path = data.ContainerPath("test-write-object"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); const std::string_view expected0("Existing blob content"); ASSERT_OK(output->Write(expected0)); @@ -1440,7 +1430,7 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamTruncatesExistingFile) { TEST_F(AzuriteFileSystemTest, OpenAppendStreamDoesNotTruncateExistingFile) { auto data = SetUpPreexistingData(); - const auto path = data.ContainerPath() + "test-write-object"; + const auto path = data.ContainerPath("test-write-object"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); const std::string_view expected0("Existing blob content"); ASSERT_OK(output->Write(expected0)); @@ -1469,8 +1459,7 @@ TEST_F(AzuriteFileSystemTest, OpenAppendStreamDoesNotTruncateExistingFile) { TEST_F(AzuriteFileSystemTest, OpenOutputStreamClosed) { auto data = SetUpPreexistingData(); - const auto path = - internal::ConcatAbstractPath(data.container_name, "open-output-stream-closed.txt"); + const auto path = data.ContainerPath("open-output-stream-closed.txt"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); ASSERT_OK(output->Close()); ASSERT_RAISES(Invalid, output->Write(PreexistingData::kLoremIpsum, @@ -1481,8 +1470,7 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamClosed) { TEST_F(AzuriteFileSystemTest, OpenOutputStreamUri) { auto data = SetUpPreexistingData(); - const auto path = - internal::ConcatAbstractPath(data.container_name, "open-output-stream-uri.txt"); + const auto path = data.ContainerPath("open-output-stream-uri.txt"); ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfs://" + path)); } @@ -1497,8 +1485,7 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileMixedReadVsReadAt) { return PreexistingData::RandomLine(++lineno, kLineWidth, rng_); }); - const auto path = internal::ConcatAbstractPath( - data.container_name, "OpenInputFileMixedReadVsReadAt/object-name"); + const auto path = data.ContainerPath("OpenInputFileMixedReadVsReadAt/object-name"); UploadLines(lines, path, kLineCount * kLineWidth); @@ -1545,8 +1532,7 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileRandomSeek) { return PreexistingData::RandomLine(++lineno, kLineWidth, rng_); }); - const auto path = internal::ConcatAbstractPath(data.container_name, - "OpenInputFileRandomSeek/object-name"); + const auto path = data.ContainerPath("OpenInputFileRandomSeek/object-name"); std::shared_ptr output; UploadLines(lines, path, kLineCount * kLineWidth); @@ -1567,12 +1553,12 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileRandomSeek) { TEST_F(AzuriteFileSystemTest, OpenInputFileIoContext) { auto data = SetUpPreexistingData(); // Create a test file. - const auto path_to_file = "OpenInputFileIoContext/object-name"; - const auto path = data.ContainerPath() + path_to_file; + const auto blob_path = "OpenInputFileIoContext/object-name"; + const auto path = data.ContainerPath(blob_path); const std::string contents = "The quick brown fox jumps over the lazy dog"; auto blob_client = blob_service_client_->GetBlobContainerClient(data.container_name) - .GetBlockBlobClient(path_to_file); + .GetBlockBlobClient(blob_path); blob_client.UploadFrom(reinterpret_cast(contents.data()), contents.length()); @@ -1604,7 +1590,7 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileNotFound) { TEST_F(AzuriteFileSystemTest, OpenInputFileInfoInvalid) { auto data = SetUpPreexistingData(); - ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ContainerPath())); + ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.container_name)); ASSERT_RAISES(IOError, fs_->OpenInputFile(info)); ASSERT_OK_AND_ASSIGN(auto info2, fs_->GetFileInfo(data.NotFoundObjectPath())); From 0a31158dd2b0d51782342b3e247cbfb321c2ada2 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Wed, 13 Dec 2023 17:54:17 -0300 Subject: [PATCH 14/26] Simplify condition --- cpp/src/arrow/filesystem/azurefs_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index b8b76b0ce27..906eb44f267 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -380,7 +380,7 @@ class AzureFileSystemTest : public ::testing::Test { return MakeOptions(env); }; auto options_res = make_options(); - if (!options_res.ok() && options_res.status().IsCancelled()) { + if (options_res.status().IsCancelled()) { GTEST_SKIP() << options_res.status().message(); } else { EXPECT_OK_AND_ASSIGN(options_, std::move(options_res)); From dd9868f396b34e5dbe9ff42a08cdaf27f1cc31ca Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Wed, 13 Dec 2023 18:41:32 -0300 Subject: [PATCH 15/26] Use ASSERT_RAISES instead of ASSERT_NOT_OK --- cpp/src/arrow/filesystem/azurefs_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 906eb44f267..0acf9c7bbef 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -819,7 +819,7 @@ TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirContentsFailureNonexistent) { TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespaceFailsWithMissingContainer) { auto hierarchical_namespace = internal::HierarchicalNamespaceDetector(); ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get())); - ASSERT_NOT_OK(hierarchical_namespace.Enabled("nonexistent-container")); + ASSERT_RAISES(IOError, hierarchical_namespace.Enabled("nonexistent-container")); } TEST_F(AzuriteFileSystemTest, GetFileInfoAccount) { From 5aab62161ba7f0d942a01f1f1ee39f6c6e2707be Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Wed, 13 Dec 2023 19:05:38 -0300 Subject: [PATCH 16/26] Remove redundant std::move --- cpp/src/arrow/filesystem/azurefs_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 0acf9c7bbef..1e4894a28b4 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -383,7 +383,7 @@ class AzureFileSystemTest : public ::testing::Test { if (options_res.status().IsCancelled()) { GTEST_SKIP() << options_res.status().message(); } else { - EXPECT_OK_AND_ASSIGN(options_, std::move(options_res)); + EXPECT_OK_AND_ASSIGN(options_, options_res); } ASSERT_OK_AND_ASSIGN(fs_, AzureFileSystem::Make(options_)); From 6b4993dde9a5a825f3e2044b86b4e00b2338a273 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Wed, 13 Dec 2023 19:09:36 -0300 Subject: [PATCH 17/26] Make AzureOptions factories return a Result<> --- cpp/src/arrow/filesystem/azurefs.cc | 17 ++++++++--------- cpp/src/arrow/filesystem/azurefs.h | 4 ++-- cpp/src/arrow/filesystem/azurefs_test.cc | 4 ++-- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 99f3e05c905..03bffe8f7ce 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -83,7 +83,8 @@ Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_na return Status::OK(); } -std::unique_ptr AzureOptions::MakeBlobServiceClient() const { +Result> AzureOptions::MakeBlobServiceClient() + const { switch (credential_kind_) { case CredentialKind::kAnonymous: break; @@ -91,12 +92,11 @@ std::unique_ptr AzureOptions::MakeBlobServiceClient() return std::make_unique(account_blob_url_, storage_shared_key_credential_); } - DCHECK(false) << "AzureOptions doesn't contain a valid auth configuration"; - return nullptr; + return Status::Invalid("AzureOptions doesn't contain a valid auth configuration"); } -std::unique_ptr AzureOptions::MakeDataLakeServiceClient() - const { +Result> +AzureOptions::MakeDataLakeServiceClient() const { switch (credential_kind_) { case CredentialKind::kAnonymous: break; @@ -104,8 +104,7 @@ std::unique_ptr AzureOptions::MakeDataLakeServi return std::make_unique( account_dfs_url_, storage_shared_key_credential_); } - DCHECK(false) << "AzureOptions doesn't contain a valid auth configuration"; - return nullptr; + return Status::Invalid("AzureOptions doesn't contain a valid auth configuration"); } namespace { @@ -760,8 +759,8 @@ class AzureFileSystem::Impl { : io_context_(io_context), options_(std::move(options)) {} Status Init() { - blob_service_client_ = options_.MakeBlobServiceClient(); - datalake_service_client_ = options_.MakeDataLakeServiceClient(); + ARROW_ASSIGN_OR_RAISE(blob_service_client_, options_.MakeBlobServiceClient()); + ARROW_ASSIGN_OR_RAISE(datalake_service_client_, options_.MakeDataLakeServiceClient()); return hierarchical_namespace_.Init(datalake_service_client_.get()); } diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index 1e1f00225b6..a4d30d5fb34 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -88,9 +88,9 @@ struct ARROW_EXPORT AzureOptions { const std::string& AccountBlobUrl() const { return account_blob_url_; } const std::string& AccountDfsUrl() const { return account_dfs_url_; } - std::unique_ptr MakeBlobServiceClient() const; + Result> MakeBlobServiceClient() const; - std::unique_ptr + Result> MakeDataLakeServiceClient() const; }; diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 1e4894a28b4..1cf5b479478 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -387,8 +387,8 @@ class AzureFileSystemTest : public ::testing::Test { } ASSERT_OK_AND_ASSIGN(fs_, AzureFileSystem::Make(options_)); - blob_service_client_ = options_.MakeBlobServiceClient(); - datalake_service_client_ = options_.MakeDataLakeServiceClient(); + EXPECT_OK_AND_ASSIGN(blob_service_client_, options_.MakeBlobServiceClient()); + EXPECT_OK_AND_ASSIGN(datalake_service_client_, options_.MakeDataLakeServiceClient()); set_up_succeeded_ = true; } From 7fef9f38b7692a9d828ac97df1bf0ca9c0705d4d Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Wed, 13 Dec 2023 19:16:08 -0300 Subject: [PATCH 18/26] Rename all the tests to have Test- in the beginning --- cpp/src/arrow/filesystem/azurefs_test.cc | 180 +++++++++++------------ 1 file changed, 90 insertions(+), 90 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 1cf5b479478..1c6a738d74e 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -345,7 +345,7 @@ culpa qui officia deserunt mollit anim id est laborum. } }; -class AzureFileSystemTest : public ::testing::Test { +class TestAzureFileSystem : public ::testing::Test { protected: // Set in constructor std::mt19937_64 rng_; @@ -360,7 +360,7 @@ class AzureFileSystemTest : public ::testing::Test { std::unique_ptr datalake_service_client_; public: - AzureFileSystemTest() : rng_(std::random_device()()) {} + TestAzureFileSystem() : rng_(std::random_device()()) {} virtual Result GetAzureEnv() const = 0; @@ -517,13 +517,13 @@ class AzureFileSystemTest : public ::testing::Test { return env->WithHierarchicalNamespace(); } - // Tests that are called from more than one implementation of AzureFileSystemTest + // Tests that are called from more than one implementation of TestAzureFileSystem - void DetectHierarchicalNamespaceTest(); - void GetFileInfoObjectTest(); - void GetFileInfoObjectWithNestedStructureTest(); + void TestDetectHierarchicalNamespace(); + void TestGetFileInfoObject(); + void TestGetFileInfoObjectWithNestedStructure(); - void DeleteDirSuccessEmptyTest() { + void TestDeleteDirSuccessEmpty() { auto data = SetUpPreexistingData(); const auto directory_path = data.RandomDirectoryPath(rng_); @@ -542,7 +542,7 @@ class AzureFileSystemTest : public ::testing::Test { } } - void CreateDirSuccessContainerAndDirectoryTest() { + void TestCreateDirSuccessContainerAndDirectory() { auto data = SetUpPreexistingData(); const auto path = data.RandomDirectoryPath(rng_); ASSERT_OK(fs_->CreateDir(path, false)); @@ -555,13 +555,13 @@ class AzureFileSystemTest : public ::testing::Test { } } - void CreateDirRecursiveSuccessContainerOnlyTest() { + void TestCreateDirRecursiveSuccessContainerOnly() { auto container_name = PreexistingData::RandomContainerName(rng_); ASSERT_OK(fs_->CreateDir(container_name, true)); arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); } - void CreateDirRecursiveSuccessDirectoryOnlyTest() { + void TestCreateDirRecursiveSuccessDirectoryOnly() { auto data = SetUpPreexistingData(); const auto parent = data.RandomDirectoryPath(rng_); const auto path = ConcatAbstractPath(parent, "new-sub"); @@ -577,7 +577,7 @@ class AzureFileSystemTest : public ::testing::Test { } } - void CreateDirRecursiveSuccessContainerAndDirectoryTest() { + void TestCreateDirRecursiveSuccessContainerAndDirectory() { auto data = SetUpPreexistingData(); const auto parent = data.RandomDirectoryPath(rng_); const auto path = ConcatAbstractPath(parent, "new-sub"); @@ -595,21 +595,21 @@ class AzureFileSystemTest : public ::testing::Test { } } - void DeleteDirContentsSuccessNonexistentTest() { + void TestDeleteDirContentsSuccessNonexistent() { auto data = SetUpPreexistingData(); const auto directory_path = data.RandomDirectoryPath(rng_); ASSERT_OK(fs_->DeleteDirContents(directory_path, true)); arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); } - void DeleteDirContentsFailureNonexistentTest() { + void TestDeleteDirContentsFailureNonexistent() { auto data = SetUpPreexistingData(); const auto directory_path = data.RandomDirectoryPath(rng_); ASSERT_RAISES(IOError, fs_->DeleteDirContents(directory_path, false)); } }; -void AzureFileSystemTest::DetectHierarchicalNamespaceTest() { +void TestAzureFileSystem::TestDetectHierarchicalNamespace() { // Check the environments are implemented and injected here correctly. auto expected = WithHierarchicalNamespace(); @@ -619,7 +619,7 @@ void AzureFileSystemTest::DetectHierarchicalNamespaceTest() { ASSERT_OK_AND_EQ(expected, hierarchical_namespace.Enabled(data.container_name)); } -void AzureFileSystemTest::GetFileInfoObjectTest() { +void TestAzureFileSystem::TestGetFileInfoObject() { auto data = SetUpPreexistingData(); auto object_properties = blob_service_client_->GetBlobContainerClient(data.container_name) @@ -635,7 +635,7 @@ void AzureFileSystemTest::GetFileInfoObjectTest() { ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + std::string{data.kObjectName})); } -void AzureFileSystemTest::GetFileInfoObjectWithNestedStructureTest() { +void TestAzureFileSystem::TestGetFileInfoObjectWithNestedStructure() { auto data = SetUpPreexistingData(); // Adds detailed tests to handle cases of different edge cases // with directory naming conventions (e.g. with and without slashes). @@ -684,9 +684,9 @@ void AzureFileSystemTest::GetFileInfoObjectWithNestedStructureTest() { } template -class AzureFileSystemTestImpl : public AzureFileSystemTest { +class AzureFileSystemTestImpl : public TestAzureFileSystem { public: - using AzureFileSystemTest::AzureFileSystemTest; + using TestAzureFileSystem::TestAzureFileSystem; Result GetAzureEnv() const final { return AzureEnvClass::GetInstance(); } }; @@ -704,8 +704,8 @@ class AzureFileSystemTestImpl : public AzureFileSystemTest { // * SFTP, NFS and file shares are not required. // // You must not enable Hierarchical Namespace on the storage account used for -// AzureFlatNSFileSystemTest, but you must enable it on the storage account -// used for AzureHierarchicalNSFileSystemTest. +// TestAzureFlatNSFileSystem, but you must enable it on the storage account +// used for TestAzureHierarchicalNSFileSystem. // // The credentials should be placed in the correct environment variables: // @@ -717,9 +717,9 @@ class AzureFileSystemTestImpl : public AzureFileSystemTest { // [1]: https://azure.microsoft.com/en-gb/free/ // [2]: // https://learn.microsoft.com/en-us/azure/storage/blobs/create-data-lake-storage-account -using AzureFlatNSFileSystemTest = AzureFileSystemTestImpl; -using AzureHierarchicalNSFileSystemTest = AzureFileSystemTestImpl; -using AzuriteFileSystemTest = AzureFileSystemTestImpl; +using TestAzureFlatNSFileSystem = AzureFileSystemTestImpl; +using TestAzureHierarchicalNSFileSystem = AzureFileSystemTestImpl; +using TestAzuriteFileSystem = AzureFileSystemTestImpl; // Tests using all the 3 environments (Azurite, Azure w/o HNS (flat), Azure w/ HNS) @@ -732,46 +732,46 @@ using AllEnvironments = TYPED_TEST_SUITE(AzureFileSystemTestOnAllEnvs, AllEnvironments); TYPED_TEST(AzureFileSystemTestOnAllEnvs, DetectHierarchicalNamespace) { - this->DetectHierarchicalNamespaceTest(); + this->TestDetectHierarchicalNamespace(); } TYPED_TEST(AzureFileSystemTestOnAllEnvs, GetFileInfoObject) { - this->GetFileInfoObjectTest(); + this->TestGetFileInfoObject(); } TYPED_TEST(AzureFileSystemTestOnAllEnvs, DeleteDirSuccessEmpty) { - this->DeleteDirSuccessEmptyTest(); + this->TestDeleteDirSuccessEmpty(); } TYPED_TEST(AzureFileSystemTestOnAllEnvs, GetFileInfoObjectWithNestedStructure) { - this->GetFileInfoObjectWithNestedStructureTest(); + this->TestGetFileInfoObjectWithNestedStructure(); } TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirSuccessContainerAndDirectory) { - this->CreateDirSuccessContainerAndDirectoryTest(); + this->TestCreateDirSuccessContainerAndDirectory(); } TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessContainerOnly) { - this->CreateDirRecursiveSuccessContainerOnlyTest(); + this->TestCreateDirRecursiveSuccessContainerOnly(); } TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessDirectoryOnly) { - this->CreateDirRecursiveSuccessDirectoryOnlyTest(); + this->TestCreateDirRecursiveSuccessDirectoryOnly(); } TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessContainerAndDirectory) { - this->CreateDirRecursiveSuccessContainerAndDirectoryTest(); + this->TestCreateDirRecursiveSuccessContainerAndDirectory(); } // Tests using a real storage account *with Hierarchical Namespace enabled* -TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirFailureNonexistent) { +TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirFailureNonexistent) { auto data = SetUpPreexistingData(); const auto path = data.RandomDirectoryPath(rng_); ASSERT_RAISES(IOError, fs_->DeleteDir(path)); } -TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirSuccessHaveBlob) { +TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirSuccessHaveBlob) { auto data = SetUpPreexistingData(); const auto directory_path = data.RandomDirectoryPath(rng_); const auto blob_path = ConcatAbstractPath(directory_path, "hello.txt"); @@ -783,7 +783,7 @@ TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirSuccessHaveBlob) { arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::NotFound); } -TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirSuccessHaveDirectory) { +TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirSuccessHaveDirectory) { auto data = SetUpPreexistingData(); const auto parent = data.RandomDirectoryPath(rng_); const auto path = ConcatAbstractPath(parent, "new-sub"); @@ -795,7 +795,7 @@ TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirSuccessHaveDirectory) { arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound); } -TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirContentsSuccessExist) { +TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsSuccessExist) { auto preexisting_data = SetUpPreexistingData(); HierarchicalPaths paths; CreateHierarchicalData(&paths); @@ -806,30 +806,30 @@ TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirContentsSuccessExist) { } } -TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirContentsSuccessNonexistent) { - this->DeleteDirContentsSuccessNonexistentTest(); +TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsSuccessNonexistent) { + this->TestDeleteDirContentsSuccessNonexistent(); } -TEST_F(AzureHierarchicalNSFileSystemTest, DeleteDirContentsFailureNonexistent) { - this->DeleteDirContentsFailureNonexistentTest(); +TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsFailureNonexistent) { + this->TestDeleteDirContentsFailureNonexistent(); } // Tests using Azurite (the local Azure emulator) -TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespaceFailsWithMissingContainer) { +TEST_F(TestAzuriteFileSystem, DetectHierarchicalNamespaceFailsWithMissingContainer) { auto hierarchical_namespace = internal::HierarchicalNamespaceDetector(); ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get())); ASSERT_RAISES(IOError, hierarchical_namespace.Enabled("nonexistent-container")); } -TEST_F(AzuriteFileSystemTest, GetFileInfoAccount) { +TEST_F(TestAzuriteFileSystem, GetFileInfoAccount) { AssertFileInfo(fs_.get(), "", FileType::Directory); // URI ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://")); } -TEST_F(AzuriteFileSystemTest, GetFileInfoContainer) { +TEST_F(TestAzuriteFileSystem, GetFileInfoContainer) { auto data = SetUpPreexistingData(); AssertFileInfo(fs_.get(), data.container_name, FileType::Directory); @@ -839,7 +839,7 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoContainer) { ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + data.container_name)); } -TEST_F(AzuriteFileSystemTest, GetFileInfoSelector) { +TEST_F(TestAzuriteFileSystem, GetFileInfoSelector) { SetUpSmallFileSystemTree(); FileSelector select; @@ -907,7 +907,7 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoSelector) { ASSERT_EQ(infos.size(), 4); } -TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorRecursive) { +TEST_F(TestAzuriteFileSystem, GetFileInfoSelectorRecursive) { SetUpSmallFileSystemTree(); FileSelector select; @@ -965,7 +965,7 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorRecursive) { AssertFileInfo(infos[3], "container/otherdir/1/2/3/otherfile", FileType::File, 10); } -TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorExplicitImplicitDirDedup) { +TEST_F(TestAzuriteFileSystem, GetFileInfoSelectorExplicitImplicitDirDedup) { { auto container = CreateContainer("container"); CreateBlob(container, "mydir/emptydir1/"); @@ -1012,32 +1012,32 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorExplicitImplicitDirDedup) { AssertFileInfo(infos[0], "container/mydir/nonemptydir2/somefile", FileType::File); } -TEST_F(AzuriteFileSystemTest, CreateDirFailureNoContainer) { +TEST_F(TestAzuriteFileSystem, CreateDirFailureNoContainer) { ASSERT_RAISES(Invalid, fs_->CreateDir("", false)); } -TEST_F(AzuriteFileSystemTest, CreateDirSuccessContainerOnly) { +TEST_F(TestAzuriteFileSystem, CreateDirSuccessContainerOnly) { auto container_name = PreexistingData::RandomContainerName(rng_); ASSERT_OK(fs_->CreateDir(container_name, false)); arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); } -TEST_F(AzuriteFileSystemTest, CreateDirFailureDirectoryWithMissingContainer) { +TEST_F(TestAzuriteFileSystem, CreateDirFailureDirectoryWithMissingContainer) { const auto path = std::string("not-a-container/new-directory"); ASSERT_RAISES(IOError, fs_->CreateDir(path, false)); } -TEST_F(AzuriteFileSystemTest, CreateDirRecursiveFailureNoContainer) { +TEST_F(TestAzuriteFileSystem, CreateDirRecursiveFailureNoContainer) { ASSERT_RAISES(Invalid, fs_->CreateDir("", true)); } -TEST_F(AzuriteFileSystemTest, CreateDirUri) { +TEST_F(TestAzuriteFileSystem, CreateDirUri) { ASSERT_RAISES( Invalid, fs_->CreateDir("abfs://" + PreexistingData::RandomContainerName(rng_), true)); } -TEST_F(AzuriteFileSystemTest, DeleteDirSuccessContainer) { +TEST_F(TestAzuriteFileSystem, DeleteDirSuccessContainer) { const auto container_name = PreexistingData::RandomContainerName(rng_); ASSERT_OK(fs_->CreateDir(container_name)); arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory); @@ -1045,7 +1045,7 @@ TEST_F(AzuriteFileSystemTest, DeleteDirSuccessContainer) { arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::NotFound); } -TEST_F(AzuriteFileSystemTest, DeleteDirSuccessNonexistent) { +TEST_F(TestAzuriteFileSystem, DeleteDirSuccessNonexistent) { auto data = SetUpPreexistingData(); const auto directory_path = data.RandomDirectoryPath(rng_); // There is only virtual directory without hierarchical namespace @@ -1054,7 +1054,7 @@ TEST_F(AzuriteFileSystemTest, DeleteDirSuccessNonexistent) { arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); } -TEST_F(AzuriteFileSystemTest, DeleteDirSuccessHaveBlobs) { +TEST_F(TestAzuriteFileSystem, DeleteDirSuccessHaveBlobs) { #ifdef __APPLE__ GTEST_SKIP() << "This test fails by an Azurite problem: " "https://github.com/Azure/Azurite/pull/2302"; @@ -1078,12 +1078,12 @@ TEST_F(AzuriteFileSystemTest, DeleteDirSuccessHaveBlobs) { } } -TEST_F(AzuriteFileSystemTest, DeleteDirUri) { +TEST_F(TestAzuriteFileSystem, DeleteDirUri) { auto data = SetUpPreexistingData(); ASSERT_RAISES(Invalid, fs_->DeleteDir("abfs://" + data.container_name + "/")); } -TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessContainer) { +TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessContainer) { #ifdef __APPLE__ GTEST_SKIP() << "This test fails by an Azurite problem: " "https://github.com/Azure/Azurite/pull/2302"; @@ -1099,7 +1099,7 @@ TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessContainer) { } } -TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessDirectory) { +TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessDirectory) { #ifdef __APPLE__ GTEST_SKIP() << "This test fails by an Azurite problem: " "https://github.com/Azure/Azurite/pull/2302"; @@ -1115,15 +1115,15 @@ TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessDirectory) { } } -TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessNonexistent) { - this->DeleteDirContentsSuccessNonexistentTest(); +TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessNonexistent) { + this->TestDeleteDirContentsSuccessNonexistent(); } -TEST_F(AzuriteFileSystemTest, DeleteDirContentsFailureNonexistent) { - this->DeleteDirContentsFailureNonexistentTest(); +TEST_F(TestAzuriteFileSystem, DeleteDirContentsFailureNonexistent) { + this->TestDeleteDirContentsFailureNonexistent(); } -TEST_F(AzuriteFileSystemTest, CopyFileSuccessDestinationNonexistent) { +TEST_F(TestAzuriteFileSystem, CopyFileSuccessDestinationNonexistent) { auto data = SetUpPreexistingData(); const auto destination_path = data.ContainerPath("copy-destionation"); ASSERT_OK(fs_->CopyFile(data.ObjectPath(), destination_path)); @@ -1133,7 +1133,7 @@ TEST_F(AzuriteFileSystemTest, CopyFileSuccessDestinationNonexistent) { EXPECT_EQ(PreexistingData::kLoremIpsum, buffer->ToString()); } -TEST_F(AzuriteFileSystemTest, CopyFileSuccessDestinationSame) { +TEST_F(TestAzuriteFileSystem, CopyFileSuccessDestinationSame) { auto data = SetUpPreexistingData(); ASSERT_OK(fs_->CopyFile(data.ObjectPath(), data.ObjectPath())); ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ObjectPath())); @@ -1142,33 +1142,33 @@ TEST_F(AzuriteFileSystemTest, CopyFileSuccessDestinationSame) { EXPECT_EQ(PreexistingData::kLoremIpsum, buffer->ToString()); } -TEST_F(AzuriteFileSystemTest, CopyFileFailureDestinationTrailingSlash) { +TEST_F(TestAzuriteFileSystem, CopyFileFailureDestinationTrailingSlash) { auto data = SetUpPreexistingData(); ASSERT_RAISES(IOError, fs_->CopyFile(data.ObjectPath(), internal::EnsureTrailingSlash(data.ObjectPath()))); } -TEST_F(AzuriteFileSystemTest, CopyFileFailureSourceNonexistent) { +TEST_F(TestAzuriteFileSystem, CopyFileFailureSourceNonexistent) { auto data = SetUpPreexistingData(); const auto destination_path = data.ContainerPath("copy-destionation"); ASSERT_RAISES(IOError, fs_->CopyFile(data.NotFoundObjectPath(), destination_path)); } -TEST_F(AzuriteFileSystemTest, CopyFileFailureDestinationParentNonexistent) { +TEST_F(TestAzuriteFileSystem, CopyFileFailureDestinationParentNonexistent) { auto data = SetUpPreexistingData(); const auto destination_path = ConcatAbstractPath(PreexistingData::RandomContainerName(rng_), "copy-destionation"); ASSERT_RAISES(IOError, fs_->CopyFile(data.ObjectPath(), destination_path)); } -TEST_F(AzuriteFileSystemTest, CopyFileUri) { +TEST_F(TestAzuriteFileSystem, CopyFileUri) { auto data = SetUpPreexistingData(); const auto destination_path = data.ContainerPath("copy-destionation"); ASSERT_RAISES(Invalid, fs_->CopyFile("abfs://" + data.ObjectPath(), destination_path)); ASSERT_RAISES(Invalid, fs_->CopyFile(data.ObjectPath(), "abfs://" + destination_path)); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamString) { +TEST_F(TestAzuriteFileSystem, OpenInputStreamString) { auto data = SetUpPreexistingData(); std::shared_ptr stream; ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(data.ObjectPath())); @@ -1177,7 +1177,7 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamString) { EXPECT_EQ(buffer->ToString(), PreexistingData::kLoremIpsum); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamStringBuffers) { +TEST_F(TestAzuriteFileSystem, OpenInputStreamStringBuffers) { auto data = SetUpPreexistingData(); std::shared_ptr stream; ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(data.ObjectPath())); @@ -1192,7 +1192,7 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamStringBuffers) { EXPECT_EQ(contents, PreexistingData::kLoremIpsum); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamInfo) { +TEST_F(TestAzuriteFileSystem, OpenInputStreamInfo) { auto data = SetUpPreexistingData(); ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ObjectPath())); @@ -1203,7 +1203,7 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamInfo) { EXPECT_EQ(buffer->ToString(), PreexistingData::kLoremIpsum); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamEmpty) { +TEST_F(TestAzuriteFileSystem, OpenInputStreamEmpty) { auto data = SetUpPreexistingData(); const auto path_to_file = "empty-object.txt"; const auto path = data.ContainerPath(path_to_file); @@ -1218,12 +1218,12 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamEmpty) { EXPECT_EQ(size, 0); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamNotFound) { +TEST_F(TestAzuriteFileSystem, OpenInputStreamNotFound) { auto data = SetUpPreexistingData(); ASSERT_RAISES(IOError, fs_->OpenInputStream(data.NotFoundObjectPath())); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamInfoInvalid) { +TEST_F(TestAzuriteFileSystem, OpenInputStreamInfoInvalid) { auto data = SetUpPreexistingData(); ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.container_name + "/")); ASSERT_RAISES(IOError, fs_->OpenInputStream(info)); @@ -1232,12 +1232,12 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamInfoInvalid) { ASSERT_RAISES(IOError, fs_->OpenInputStream(info2)); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamUri) { +TEST_F(TestAzuriteFileSystem, OpenInputStreamUri) { auto data = SetUpPreexistingData(); ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfs://" + data.ObjectPath())); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamTrailingSlash) { +TEST_F(TestAzuriteFileSystem, OpenInputStreamTrailingSlash) { auto data = SetUpPreexistingData(); ASSERT_RAISES(IOError, fs_->OpenInputStream(data.ObjectPath() + '/')); } @@ -1277,7 +1277,7 @@ std::shared_ptr NormalizerKeyValueMetadata( } }; // namespace -TEST_F(AzuriteFileSystemTest, OpenInputStreamReadMetadata) { +TEST_F(TestAzuriteFileSystem, OpenInputStreamReadMetadata) { auto data = SetUpPreexistingData(); std::shared_ptr stream; ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(data.ObjectPath())); @@ -1308,7 +1308,7 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamReadMetadata) { NormalizerKeyValueMetadata(actual)->ToString()); } -TEST_F(AzuriteFileSystemTest, OpenInputStreamClosed) { +TEST_F(TestAzuriteFileSystem, OpenInputStreamClosed) { auto data = SetUpPreexistingData(); ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(data.ObjectPath())); ASSERT_OK(stream->Close()); @@ -1318,7 +1318,7 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamClosed) { ASSERT_RAISES(Invalid, stream->Tell()); } -TEST_F(AzuriteFileSystemTest, WriteMetadata) { +TEST_F(TestAzuriteFileSystem, WriteMetadata) { auto data = SetUpPreexistingData(); options_.default_metadata = arrow::key_value_metadata({{"foo", "bar"}}); @@ -1352,7 +1352,7 @@ TEST_F(AzuriteFileSystemTest, WriteMetadata) { EXPECT_EQ(Core::CaseInsensitiveMap{std::make_pair("bar", "foo")}, blob_metadata); } -TEST_F(AzuriteFileSystemTest, OpenOutputStreamSmall) { +TEST_F(TestAzuriteFileSystem, OpenOutputStreamSmall) { auto data = SetUpPreexistingData(); const auto path = data.ContainerPath("test-write-object"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); @@ -1369,7 +1369,7 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamSmall) { EXPECT_EQ(expected, std::string_view(inbuf.data(), size)); } -TEST_F(AzuriteFileSystemTest, OpenOutputStreamLarge) { +TEST_F(TestAzuriteFileSystem, OpenOutputStreamLarge) { auto data = SetUpPreexistingData(); const auto path = data.ContainerPath("test-write-object"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); @@ -1401,7 +1401,7 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamLarge) { EXPECT_EQ(contents, buffers[0] + buffers[1] + buffers[2]); } -TEST_F(AzuriteFileSystemTest, OpenOutputStreamTruncatesExistingFile) { +TEST_F(TestAzuriteFileSystem, OpenOutputStreamTruncatesExistingFile) { auto data = SetUpPreexistingData(); const auto path = data.ContainerPath("test-write-object"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); @@ -1428,7 +1428,7 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamTruncatesExistingFile) { EXPECT_EQ(expected1, std::string_view(inbuf.data(), size)); } -TEST_F(AzuriteFileSystemTest, OpenAppendStreamDoesNotTruncateExistingFile) { +TEST_F(TestAzuriteFileSystem, OpenAppendStreamDoesNotTruncateExistingFile) { auto data = SetUpPreexistingData(); const auto path = data.ContainerPath("test-write-object"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); @@ -1457,7 +1457,7 @@ TEST_F(AzuriteFileSystemTest, OpenAppendStreamDoesNotTruncateExistingFile) { std::string(expected0) + std::string(expected1)); } -TEST_F(AzuriteFileSystemTest, OpenOutputStreamClosed) { +TEST_F(TestAzuriteFileSystem, OpenOutputStreamClosed) { auto data = SetUpPreexistingData(); const auto path = data.ContainerPath("open-output-stream-closed.txt"); ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {})); @@ -1468,13 +1468,13 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamClosed) { ASSERT_RAISES(Invalid, output->Tell()); } -TEST_F(AzuriteFileSystemTest, OpenOutputStreamUri) { +TEST_F(TestAzuriteFileSystem, OpenOutputStreamUri) { auto data = SetUpPreexistingData(); const auto path = data.ContainerPath("open-output-stream-uri.txt"); ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfs://" + path)); } -TEST_F(AzuriteFileSystemTest, OpenInputFileMixedReadVsReadAt) { +TEST_F(TestAzuriteFileSystem, OpenInputFileMixedReadVsReadAt) { auto data = SetUpPreexistingData(); // Create a file large enough to make the random access tests non-trivial. auto constexpr kLineWidth = 100; @@ -1521,7 +1521,7 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileMixedReadVsReadAt) { } } -TEST_F(AzuriteFileSystemTest, OpenInputFileRandomSeek) { +TEST_F(TestAzuriteFileSystem, OpenInputFileRandomSeek) { auto data = SetUpPreexistingData(); // Create a file large enough to make the random access tests non-trivial. auto constexpr kLineWidth = 100; @@ -1550,7 +1550,7 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileRandomSeek) { } } -TEST_F(AzuriteFileSystemTest, OpenInputFileIoContext) { +TEST_F(TestAzuriteFileSystem, OpenInputFileIoContext) { auto data = SetUpPreexistingData(); // Create a test file. const auto blob_path = "OpenInputFileIoContext/object-name"; @@ -1567,7 +1567,7 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileIoContext) { EXPECT_EQ(fs_->io_context().external_id(), file->io_context().external_id()); } -TEST_F(AzuriteFileSystemTest, OpenInputFileInfo) { +TEST_F(TestAzuriteFileSystem, OpenInputFileInfo) { auto data = SetUpPreexistingData(); ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ObjectPath())); @@ -1583,12 +1583,12 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileInfo) { EXPECT_EQ(std::string(buffer.data(), size), expected); } -TEST_F(AzuriteFileSystemTest, OpenInputFileNotFound) { +TEST_F(TestAzuriteFileSystem, OpenInputFileNotFound) { auto data = SetUpPreexistingData(); ASSERT_RAISES(IOError, fs_->OpenInputFile(data.NotFoundObjectPath())); } -TEST_F(AzuriteFileSystemTest, OpenInputFileInfoInvalid) { +TEST_F(TestAzuriteFileSystem, OpenInputFileInfoInvalid) { auto data = SetUpPreexistingData(); ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.container_name)); ASSERT_RAISES(IOError, fs_->OpenInputFile(info)); @@ -1597,7 +1597,7 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileInfoInvalid) { ASSERT_RAISES(IOError, fs_->OpenInputFile(info2)); } -TEST_F(AzuriteFileSystemTest, OpenInputFileClosed) { +TEST_F(TestAzuriteFileSystem, OpenInputFileClosed) { auto data = SetUpPreexistingData(); ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputFile(data.ObjectPath())); ASSERT_OK(stream->Close()); From 272aa82c503fd7101fe67d21cd4575db59266326 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Wed, 13 Dec 2023 19:29:46 -0300 Subject: [PATCH 19/26] Rename hierarchical_namespace_ to hns_detector_ --- cpp/src/arrow/filesystem/azurefs.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 03bffe8f7ce..d36df9ebe37 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -753,7 +753,7 @@ class AzureFileSystem::Impl { datalake_service_client_; std::unique_ptr blob_service_client_; AzureOptions options_; - internal::HierarchicalNamespaceDetector hierarchical_namespace_; + internal::HierarchicalNamespaceDetector hns_detector_; Impl(AzureOptions options, io::IOContext io_context) : io_context_(io_context), options_(std::move(options)) {} @@ -761,7 +761,7 @@ class AzureFileSystem::Impl { Status Init() { ARROW_ASSIGN_OR_RAISE(blob_service_client_, options_.MakeBlobServiceClient()); ARROW_ASSIGN_OR_RAISE(datalake_service_client_, options_.MakeDataLakeServiceClient()); - return hierarchical_namespace_.Init(datalake_service_client_.get()); + return hns_detector_.Init(datalake_service_client_.get()); } const AzureOptions& options() const { return options_; } @@ -827,7 +827,7 @@ class AzureFileSystem::Impl { } catch (const Storage::StorageException& exception) { if (exception.StatusCode == Core::Http::HttpStatusCode::NotFound) { ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled, - hierarchical_namespace_.Enabled(location.container)); + hns_detector_.Enabled(location.container)); if (hierarchical_namespace_enabled) { // If the hierarchical namespace is enabled, then the storage account will have // explicit directories. Neither a file nor a directory was found. @@ -1127,7 +1127,7 @@ class AzureFileSystem::Impl { } ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled, - hierarchical_namespace_.Enabled(location.container)); + hns_detector_.Enabled(location.container)); if (!hierarchical_namespace_enabled) { // Without hierarchical namespace enabled Azure blob storage has no directories. // Therefore we can't, and don't need to create one. Simply creating a blob with `/` @@ -1171,7 +1171,7 @@ class AzureFileSystem::Impl { } ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled, - hierarchical_namespace_.Enabled(location.container)); + hns_detector_.Enabled(location.container)); if (!hierarchical_namespace_enabled) { // Without hierarchical namespace enabled Azure blob storage has no directories. // Therefore we can't, and don't need to create one. Simply creating a blob with `/` @@ -1319,7 +1319,7 @@ class AzureFileSystem::Impl { } ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled, - hierarchical_namespace_.Enabled(location.container)); + hns_detector_.Enabled(location.container)); if (hierarchical_namespace_enabled) { auto directory_client = datalake_service_client_->GetFileSystemClient(location.container) @@ -1351,7 +1351,7 @@ class AzureFileSystem::Impl { } ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled, - hierarchical_namespace_.Enabled(location.container)); + hns_detector_.Enabled(location.container)); if (hierarchical_namespace_enabled) { auto file_system_client = datalake_service_client_->GetFileSystemClient(location.container); From 5e47e2baaf0eff97e34000166724f131cdf55700 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Wed, 13 Dec 2023 21:40:13 -0300 Subject: [PATCH 20/26] Fix lint error --- cpp/src/arrow/filesystem/azurefs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index a4d30d5fb34..01dd6a81aec 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -88,7 +88,8 @@ struct ARROW_EXPORT AzureOptions { const std::string& AccountBlobUrl() const { return account_blob_url_; } const std::string& AccountDfsUrl() const { return account_dfs_url_; } - Result> MakeBlobServiceClient() const; + Result> + MakeBlobServiceClient() const; Result> MakeDataLakeServiceClient() const; From bdf8cb37957140d694294e6d80c4950e2da4835d Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Thu, 14 Dec 2023 12:32:44 -0300 Subject: [PATCH 21/26] azurefs.cc: Alias the Azure::Core::HTTP namespace as well --- cpp/src/arrow/filesystem/azurefs.cc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index d36df9ebe37..8d506565cf2 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -38,6 +38,7 @@ namespace arrow::fs { namespace Blobs = Azure::Storage::Blobs; namespace Core = Azure::Core; namespace DataLake = Azure::Storage::Files::DataLake; +namespace Http = Azure::Core::Http; namespace Storage = Azure::Storage; // ----------------------------------------------------------------------- @@ -194,7 +195,7 @@ Status ValidateFileLocation(const AzureLocation& location) { return internal::AssertNoTrailingSlash(location.path); } -std::string_view BodyTextView(const Core::Http::RawResponse& raw_response) { +std::string_view BodyTextView(const Http::RawResponse& raw_response) { const auto& body = raw_response.GetBody(); #ifndef NDEBUG auto& headers = raw_response.GetHeaders(); @@ -207,7 +208,7 @@ std::string_view BodyTextView(const Core::Http::RawResponse& raw_response) { } Status StatusFromErrorResponse(const std::string& url, - const Core::Http::RawResponse& raw_response, + const Http::RawResponse& raw_response, const std::string& context) { // There isn't an Azure specification that response body on error // doesn't contain any binary data but we assume it. We hope that @@ -220,7 +221,7 @@ Status StatusFromErrorResponse(const std::string& url, bool IsContainerNotFound(const Storage::StorageException& exception) { if (exception.ErrorCode == "ContainerNotFound") { - DCHECK_EQ(exception.StatusCode, Core::Http::HttpStatusCode::NotFound); + DCHECK_EQ(exception.StatusCode, Http::HttpStatusCode::NotFound); return true; } return false; @@ -391,7 +392,7 @@ class ObjectInputFile final : public io::RandomAccessFile { metadata_ = PropertiesToMetadata(properties.Value); return Status::OK(); } catch (const Storage::StorageException& exception) { - if (exception.StatusCode == Core::Http::HttpStatusCode::NotFound) { + if (exception.StatusCode == Http::HttpStatusCode::NotFound) { return PathNotFound(location_); } return internal::ExceptionToStatus( @@ -467,7 +468,7 @@ class ObjectInputFile final : public io::RandomAccessFile { } // Read the desired range of bytes - Core::Http::HttpRange range{position, nbytes}; + Http::HttpRange range{position, nbytes}; Storage::Blobs::DownloadBlobToOptions download_options; download_options.Range = range; try { @@ -615,7 +616,7 @@ class ObjectAppendStream final : public io::OutputStream { content_length_ = properties.Value.BlobSize; pos_ = content_length_; } catch (const Storage::StorageException& exception) { - if (exception.StatusCode == Core::Http::HttpStatusCode::NotFound) { + if (exception.StatusCode == Http::HttpStatusCode::NotFound) { RETURN_NOT_OK(CreateEmptyBlockBlob(block_blob_client_)); } else { return internal::ExceptionToStatus( @@ -825,7 +826,7 @@ class AzureFileSystem::Impl { std::chrono::system_clock::time_point{properties.Value.LastModified}); return info; } catch (const Storage::StorageException& exception) { - if (exception.StatusCode == Core::Http::HttpStatusCode::NotFound) { + if (exception.StatusCode == Http::HttpStatusCode::NotFound) { ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled, hns_detector_.Enabled(location.container)); if (hierarchical_namespace_enabled) { @@ -1385,8 +1386,7 @@ class AzureFileSystem::Impl { } } } catch (const Storage::StorageException& exception) { - if (missing_dir_ok && - exception.StatusCode == Core::Http::HttpStatusCode::NotFound) { + if (missing_dir_ok && exception.StatusCode == Http::HttpStatusCode::NotFound) { return Status::OK(); } else { return internal::ExceptionToStatus( From e45d94d448e83b8ec6177d4098c3ec2eec9752b4 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Thu, 14 Dec 2023 12:43:16 -0300 Subject: [PATCH 22/26] Move ExceptionToStatus to azurefs.cc --- cpp/src/arrow/filesystem/azurefs.cc | 113 +++++++++---------- cpp/src/arrow/filesystem/azurefs_internal.cc | 6 + cpp/src/arrow/filesystem/azurefs_internal.h | 3 - 3 files changed, 60 insertions(+), 62 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 8d506565cf2..a1c9a312130 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -177,6 +177,11 @@ struct AzureLocation { } }; +Status ExceptionToStatus(const std::string& prefix, + const Azure::Storage::StorageException& exception) { + return Status::IOError(prefix, " Azure Error: ", exception.what()); +} + Status PathNotFound(const AzureLocation& location) { return ::arrow::fs::internal::PathNotFound(location.all); } @@ -395,7 +400,7 @@ class ObjectInputFile final : public io::RandomAccessFile { if (exception.StatusCode == Http::HttpStatusCode::NotFound) { return PathNotFound(location_); } - return internal::ExceptionToStatus( + return ExceptionToStatus( "GetProperties failed for '" + blob_client_->GetUrl() + "' with an unexpected Azure error. Cannot initialise an ObjectInputFile " "without knowing the file size.", @@ -476,12 +481,12 @@ class ObjectInputFile final : public io::RandomAccessFile { ->DownloadTo(reinterpret_cast(out), nbytes, download_options) .Value.ContentRange.Length.Value(); } catch (const Storage::StorageException& exception) { - return internal::ExceptionToStatus("DownloadTo from '" + blob_client_->GetUrl() + - "' at position " + std::to_string(position) + - " for " + std::to_string(nbytes) + - " bytes failed with an Azure error. ReadAt " - "failed to read the required byte range.", - exception); + return ExceptionToStatus("DownloadTo from '" + blob_client_->GetUrl() + + "' at position " + std::to_string(position) + " for " + + std::to_string(nbytes) + + " bytes failed with an Azure error. ReadAt " + "failed to read the required byte range.", + exception); } } @@ -530,7 +535,7 @@ Status CreateEmptyBlockBlob(std::shared_ptr block_blob_c try { block_blob_client->UploadFrom(nullptr, 0); } catch (const Storage::StorageException& exception) { - return internal::ExceptionToStatus( + return ExceptionToStatus( "UploadFrom failed for '" + block_blob_client->GetUrl() + "' with an unexpected Azure error. There is no existing blob at this " "location or the existing blob must be replaced so ObjectAppendStream must " @@ -545,7 +550,7 @@ Result GetBlockList( try { return block_blob_client->GetBlockList().Value; } catch (Storage::StorageException& exception) { - return internal::ExceptionToStatus( + return ExceptionToStatus( "GetBlockList failed for '" + block_blob_client->GetUrl() + "' with an unexpected Azure error. Cannot write to a file without first " "fetching the existing block list.", @@ -574,7 +579,7 @@ Status CommitBlockList(std::shared_ptr block_bl // https://learn.microsoft.com/en-us/rest/api/storageservices/put-block-list?tabs=microsoft-entra-id#request-body block_blob_client->CommitBlockList(block_ids, options); } catch (const Storage::StorageException& exception) { - return internal::ExceptionToStatus( + return ExceptionToStatus( "CommitBlockList failed for '" + block_blob_client->GetUrl() + "' with an unexpected Azure error. Committing is required to flush an " "output/append stream.", @@ -619,7 +624,7 @@ class ObjectAppendStream final : public io::OutputStream { if (exception.StatusCode == Http::HttpStatusCode::NotFound) { RETURN_NOT_OK(CreateEmptyBlockBlob(block_blob_client_)); } else { - return internal::ExceptionToStatus( + return ExceptionToStatus( "GetProperties failed for '" + block_blob_client_->GetUrl() + "' with an unexpected Azure error. Cannot initialise an " "ObjectAppendStream without knowing whether a file already exists at " @@ -718,7 +723,7 @@ class ObjectAppendStream final : public io::OutputStream { try { block_blob_client_->StageBlock(new_block_id, block_content); } catch (const Storage::StorageException& exception) { - return internal::ExceptionToStatus( + return ExceptionToStatus( "StageBlock failed for '" + block_blob_client_->GetUrl() + "' new_block_id: '" + new_block_id + "' with an unexpected Azure error. Staging new blocks is fundamental to " @@ -795,7 +800,7 @@ class AzureFileSystem::Impl { info.set_type(FileType::NotFound); return info; } - return internal::ExceptionToStatus( + return ExceptionToStatus( "GetProperties for '" + container_client.GetUrl() + "' failed with an unexpected Azure error. GetFileInfo is unable to " "determine whether the container exists.", @@ -854,14 +859,14 @@ class AzureFileSystem::Impl { info.set_type(file_type); return info; } catch (const Storage::StorageException& exception) { - return internal::ExceptionToStatus( + return ExceptionToStatus( "ListBlobs for '" + *list_blob_options.Prefix + "' failed with an unexpected Azure error. GetFileInfo is unable to " "determine whether the path should be considered an implied directory.", exception); } } - return internal::ExceptionToStatus( + return ExceptionToStatus( "GetProperties for '" + file_client.GetUrl() + "' failed with an unexpected " "Azure error. GetFileInfo is unable to determine whether the path exists.", @@ -883,7 +888,7 @@ class AzureFileSystem::Impl { } } } catch (const Storage::StorageException& exception) { - return internal::ExceptionToStatus("Failed to list account containers.", exception); + return ExceptionToStatus("Failed to list account containers.", exception); } return Status::OK(); } @@ -1015,10 +1020,9 @@ class AzureFileSystem::Impl { if (IsContainerNotFound(exception)) { found = false; } else { - return internal::ExceptionToStatus( - "Failed to list blobs in a directory: " + select.base_dir + ": " + - container_client.GetUrl(), - exception); + return ExceptionToStatus("Failed to list blobs in a directory: " + + select.base_dir + ": " + container_client.GetUrl(), + exception); } } @@ -1120,10 +1124,9 @@ class AzureFileSystem::Impl { "Failed to create a container: " + location.container); } } catch (const Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to create a container: " + location.container + ": " + - container_client.GetUrl(), - exception); + return ExceptionToStatus("Failed to create a container: " + location.container + + ": " + container_client.GetUrl(), + exception); } } @@ -1148,10 +1151,9 @@ class AzureFileSystem::Impl { "Failed to create a directory: " + location.path); } } catch (const Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to create a directory: " + location.path + ": " + - directory_client.GetUrl(), - exception); + return ExceptionToStatus("Failed to create a directory: " + location.path + ": " + + directory_client.GetUrl(), + exception); } } @@ -1165,10 +1167,9 @@ class AzureFileSystem::Impl { try { container_client.CreateIfNotExists(); } catch (const Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to create a container: " + location.container + " (" + - container_client.GetUrl() + ")", - exception); + return ExceptionToStatus("Failed to create a container: " + location.container + + " (" + container_client.GetUrl() + ")", + exception); } ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled, @@ -1187,10 +1188,9 @@ class AzureFileSystem::Impl { try { directory_client.CreateIfNotExists(); } catch (const Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to create a directory: " + location.path + " (" + - directory_client.GetUrl() + ")", - exception); + return ExceptionToStatus("Failed to create a directory: " + location.path + " (" + + directory_client.GetUrl() + ")", + exception); } } @@ -1253,10 +1253,9 @@ class AzureFileSystem::Impl { try { container_client.SubmitBatch(batch); } catch (const Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to delete blobs in a directory: " + location.path + ": " + - container_client.GetUrl(), - exception); + return ExceptionToStatus("Failed to delete blobs in a directory: " + + location.path + ": " + container_client.GetUrl(), + exception); } std::vector failed_blob_names; for (size_t i = 0; i < deferred_responses.size(); ++i) { @@ -1285,10 +1284,9 @@ class AzureFileSystem::Impl { } } } catch (const Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to list blobs in a directory: " + location.path + ": " + - container_client.GetUrl(), - exception); + return ExceptionToStatus("Failed to list blobs in a directory: " + location.path + + ": " + container_client.GetUrl(), + exception); } return Status::OK(); } @@ -1312,10 +1310,9 @@ class AzureFileSystem::Impl { "Failed to delete a container: " + location.container); } } catch (const Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to delete a container: " + location.container + ": " + - container_client.GetUrl(), - exception); + return ExceptionToStatus("Failed to delete a container: " + location.container + + ": " + container_client.GetUrl(), + exception); } } @@ -1335,10 +1332,9 @@ class AzureFileSystem::Impl { "Failed to delete a directory: " + location.path); } } catch (const Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to delete a directory: " + location.path + ": " + - directory_client.GetUrl(), - exception); + return ExceptionToStatus("Failed to delete a directory: " + location.path + ": " + + directory_client.GetUrl(), + exception); } } else { return DeleteDirContentsWithoutHierarchicalNamespace(location, @@ -1367,7 +1363,7 @@ class AzureFileSystem::Impl { try { sub_directory_client.DeleteRecursive(); } catch (const Storage::StorageException& exception) { - return internal::ExceptionToStatus( + return ExceptionToStatus( "Failed to delete a sub directory: " + location.container + internal::kSep + path.Name + ": " + sub_directory_client.GetUrl(), exception); @@ -1377,7 +1373,7 @@ class AzureFileSystem::Impl { try { sub_file_client.Delete(); } catch (const Storage::StorageException& exception) { - return internal::ExceptionToStatus( + return ExceptionToStatus( "Failed to delete a sub file: " + location.container + internal::kSep + path.Name + ": " + sub_file_client.GetUrl(), exception); @@ -1389,10 +1385,9 @@ class AzureFileSystem::Impl { if (missing_dir_ok && exception.StatusCode == Http::HttpStatusCode::NotFound) { return Status::OK(); } else { - return internal::ExceptionToStatus( - "Failed to delete directory contents: " + location.path + ": " + - directory_client.GetUrl(), - exception); + return ExceptionToStatus("Failed to delete directory contents: " + + location.path + ": " + directory_client.GetUrl(), + exception); } } return Status::OK(); @@ -1415,7 +1410,7 @@ class AzureFileSystem::Impl { try { dest_blob_client.CopyFromUri(src_url); } catch (const Storage::StorageException& exception) { - return internal::ExceptionToStatus( + return ExceptionToStatus( "Failed to copy a blob. (" + src_url + " -> " + dest_blob_client.GetUrl() + ")", exception); } diff --git a/cpp/src/arrow/filesystem/azurefs_internal.cc b/cpp/src/arrow/filesystem/azurefs_internal.cc index 3e545d670cb..c8cda0406ab 100644 --- a/cpp/src/arrow/filesystem/azurefs_internal.cc +++ b/cpp/src/arrow/filesystem/azurefs_internal.cc @@ -23,11 +23,17 @@ namespace arrow::fs::internal { +namespace { + +// TODO(GH-38772): Remote azurefs_internal.h/.cc by moving the detector to +// azurefs.cc (which contains a private copy of this helper function already). Status ExceptionToStatus(const std::string& prefix, const Azure::Storage::StorageException& exception) { return Status::IOError(prefix, " Azure Error: ", exception.what()); } +} // namespace + Status HierarchicalNamespaceDetector::Init( Azure::Storage::Files::DataLake::DataLakeServiceClient* datalake_service_client) { datalake_service_client_ = datalake_service_client; diff --git a/cpp/src/arrow/filesystem/azurefs_internal.h b/cpp/src/arrow/filesystem/azurefs_internal.h index c3da96239a1..92592cf164f 100644 --- a/cpp/src/arrow/filesystem/azurefs_internal.h +++ b/cpp/src/arrow/filesystem/azurefs_internal.h @@ -25,9 +25,6 @@ namespace arrow::fs::internal { -Status ExceptionToStatus(const std::string& prefix, - const Azure::Storage::StorageException& exception); - class HierarchicalNamespaceDetector { public: Status Init( From 7d2d78946353a198da912502c2189dc172c47782 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Thu, 14 Dec 2023 12:54:42 -0300 Subject: [PATCH 23/26] Prevent half-constructed state and remove AzureFileSystem::Init() --- cpp/src/arrow/filesystem/azurefs.cc | 54 ++++++++++++++++------------- cpp/src/arrow/filesystem/azurefs.h | 18 +++++----- 2 files changed, 39 insertions(+), 33 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index a1c9a312130..d6761922641 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -753,23 +753,31 @@ class ObjectAppendStream final : public io::OutputStream { // AzureFilesystem Implementation class AzureFileSystem::Impl { - public: + private: io::IOContext io_context_; - std::unique_ptr - datalake_service_client_; - std::unique_ptr blob_service_client_; AzureOptions options_; + internal::HierarchicalNamespaceDetector hns_detector_; + std::unique_ptr datalake_service_client_; + std::unique_ptr blob_service_client_; Impl(AzureOptions options, io::IOContext io_context) - : io_context_(io_context), options_(std::move(options)) {} - - Status Init() { - ARROW_ASSIGN_OR_RAISE(blob_service_client_, options_.MakeBlobServiceClient()); - ARROW_ASSIGN_OR_RAISE(datalake_service_client_, options_.MakeDataLakeServiceClient()); - return hns_detector_.Init(datalake_service_client_.get()); - } + : io_context_(std::move(io_context)), options_(std::move(options)) {} + public: + static Result> Make(AzureOptions options, + io::IOContext io_context) { + auto self = std::unique_ptr( + new AzureFileSystem::Impl(std::move(options), std::move(io_context))); + ARROW_ASSIGN_OR_RAISE(self->blob_service_client_, + self->options_.MakeBlobServiceClient()); + ARROW_ASSIGN_OR_RAISE(self->datalake_service_client_, + self->options_.MakeDataLakeServiceClient()); + RETURN_NOT_OK(self->hns_detector_.Init(self->datalake_service_client_.get())); + return self; + } + + io::IOContext& io_context() { return io_context_; } const AzureOptions& options() const { return options_; } public: @@ -1418,6 +1426,17 @@ class AzureFileSystem::Impl { } }; +AzureFileSystem::AzureFileSystem(std::unique_ptr&& impl) + : FileSystem(impl->io_context()), impl_(std::move(impl)) { + default_async_is_sync_ = false; +} + +Result> AzureFileSystem::Make( + const AzureOptions& options, const io::IOContext& io_context) { + ARROW_ASSIGN_OR_RAISE(auto impl, AzureFileSystem::Impl::Make(options, io_context)); + return std::shared_ptr(new AzureFileSystem(std::move(impl))); +} + const AzureOptions& AzureFileSystem::options() const { return impl_->options(); } bool AzureFileSystem::Equals(const FileSystem& other) const { @@ -1516,17 +1535,4 @@ Result> AzureFileSystem::OpenAppendStream( return impl_->OpenAppendStream(location, metadata, false, this); } -Result> AzureFileSystem::Make( - const AzureOptions& options, const io::IOContext& io_context) { - std::shared_ptr ptr(new AzureFileSystem(options, io_context)); - RETURN_NOT_OK(ptr->impl_->Init()); - return ptr; -} - -AzureFileSystem::AzureFileSystem(const AzureOptions& options, - const io::IOContext& io_context) - : FileSystem(io_context), impl_(std::make_unique(options, io_context)) { - default_async_is_sync_ = false; -} - } // namespace arrow::fs diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index 01dd6a81aec..1266aa2d02b 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -125,9 +125,18 @@ struct ARROW_EXPORT AzureOptions { /// [3]: /// https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-known-issues class ARROW_EXPORT AzureFileSystem : public FileSystem { + private: + class Impl; + std::unique_ptr impl_; + + explicit AzureFileSystem(std::unique_ptr&& impl); + public: ~AzureFileSystem() override = default; + static Result> Make( + const AzureOptions& options, const io::IOContext& = io::default_io_context()); + std::string type_name() const override { return "abfs"; } /// Return the original Azure options when constructing the filesystem @@ -171,15 +180,6 @@ class ARROW_EXPORT AzureFileSystem : public FileSystem { Result> OpenAppendStream( const std::string& path, const std::shared_ptr& metadata = {}) override; - - static Result> Make( - const AzureOptions& options, const io::IOContext& = io::default_io_context()); - - private: - AzureFileSystem(const AzureOptions& options, const io::IOContext& io_context); - - class Impl; - std::unique_ptr impl_; }; } // namespace arrow::fs From ccd1f1dfa853594847d92374c21cdb8382d58f22 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Thu, 14 Dec 2023 13:19:12 -0300 Subject: [PATCH 24/26] Remove redundant virtual destructors --- cpp/src/arrow/filesystem/azurefs_test.cc | 8 -------- 1 file changed, 8 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 1c6a738d74e..463ff4e8daf 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -85,8 +85,6 @@ class BaseAzureEnv : public ::testing::Environment { : account_name_(std::move(account_name)), account_key_(std::move(account_key)) {} public: - ~BaseAzureEnv() override = default; - const std::string& account_name() const { return account_name_; } const std::string& account_key() const { return account_key_; } @@ -151,8 +149,6 @@ class AzureEnvImpl : public BaseAzureEnv { } public: - ~AzureEnvImpl() override = default; - static Result GetInstance() { // Ensure MakeAndAddToGlobalTestEnvironment() is called only once by storing the // Result in a static variable. @@ -249,8 +245,6 @@ class AzureFlatNSEnv : public AzureEnvImpl { public: static const AzureBackend kBackend = AzureBackend::kAzure; - ~AzureFlatNSEnv() override = default; - static Result> Make() { return MakeFromEnvVars("AZURE_FLAT_NAMESPACE_ACCOUNT_NAME", "AZURE_FLAT_NAMESPACE_ACCOUNT_KEY"); @@ -264,8 +258,6 @@ class AzureHierarchicalNSEnv : public AzureEnvImpl { public: static const AzureBackend kBackend = AzureBackend::kAzure; - ~AzureHierarchicalNSEnv() override = default; - static Result> Make() { return MakeFromEnvVars("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_NAME", "AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_KEY"); From 3b3c32979255030c45496ad88feb8f53b15d6435 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Thu, 14 Dec 2023 13:27:05 -0300 Subject: [PATCH 25/26] typo --- cpp/src/arrow/filesystem/azurefs_internal.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/filesystem/azurefs_internal.cc b/cpp/src/arrow/filesystem/azurefs_internal.cc index c8cda0406ab..39c3fb23e3c 100644 --- a/cpp/src/arrow/filesystem/azurefs_internal.cc +++ b/cpp/src/arrow/filesystem/azurefs_internal.cc @@ -25,7 +25,7 @@ namespace arrow::fs::internal { namespace { -// TODO(GH-38772): Remote azurefs_internal.h/.cc by moving the detector to +// TODO(GH-38772): Remove azurefs_internal.h/.cc by moving the detector to // azurefs.cc (which contains a private copy of this helper function already). Status ExceptionToStatus(const std::string& prefix, const Azure::Storage::StorageException& exception) { From 4f45c674ee287dd82499f3d2910af886d58143f7 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Thu, 14 Dec 2023 13:28:03 -0300 Subject: [PATCH 26/26] Correct ordering of fields --- cpp/src/arrow/filesystem/azurefs.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index d6761922641..21788536408 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -757,9 +757,9 @@ class AzureFileSystem::Impl { io::IOContext io_context_; AzureOptions options_; - internal::HierarchicalNamespaceDetector hns_detector_; std::unique_ptr datalake_service_client_; std::unique_ptr blob_service_client_; + internal::HierarchicalNamespaceDetector hns_detector_; Impl(AzureOptions options, io::IOContext io_context) : io_context_(std::move(io_context)), options_(std::move(options)) {}