From cc40c20ac8a96bfcf5752cdfea6f918c72e42366 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Mon, 27 Nov 2023 16:44:41 +0900 Subject: [PATCH 1/9] GH-38701: [C++][FS][Azure] Implement `DeleteDirContents()` --- cpp/src/arrow/filesystem/azurefs.cc | 181 ++++++++++++++++------- cpp/src/arrow/filesystem/azurefs_test.cc | 77 ++++++++++ 2 files changed, 203 insertions(+), 55 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 4dde275da13..6d2ce1599e9 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -970,6 +970,78 @@ class AzureFileSystem::Impl { return stream; } + private: + Status DeleteDirContentsWihtoutHierarchicalNamespace(const AzureLocation& location, + bool missing_dir_ok) { + auto container_client = + blob_service_client_->GetBlobContainerClient(location.container); + Azure::Storage::Blobs::ListBlobsOptions options; + options.Prefix = internal::EnsureTrailingSlash(location.path); + // https://learn.microsoft.com/en-us/rest/api/storageservices/blob-batch#remarks + // + // Only supports up to 256 subrequests in a single batch. The + // size of the body for a batch request can't exceed 4 MB. + const int32_t kNumMaxRequestsInBatch = 256; + options.PageSizeHint = kNumMaxRequestsInBatch; + try { + auto list_response = container_client.ListBlobs(options); + if (!missing_dir_ok && list_response.Blobs.empty()) { + return Status::IOError("Specified directory doesn't exist: ", location.path, ": ", + container_client.GetUrl()); + } + while (list_response.HasPage() && !list_response.Blobs.empty()) { + auto batch = container_client.CreateBatch(); + std::vector> + deferred_responses; + for (const auto& blob_item : list_response.Blobs) { + deferred_responses.push_back(batch.DeleteBlob(blob_item.Name)); + } + try { + container_client.SubmitBatch(batch); + } catch (const Azure::Storage::StorageException& exception) { + return internal::ExceptionToStatus( + "Failed to delete blobs in a directory: " + location.path + ": " + + container_client.GetUrl(), + exception); + } + std::vector failed_blob_names; + for (size_t i = 0; i < deferred_responses.size(); ++i) { + const auto& deferred_response = deferred_responses[i]; + bool success = true; + try { + auto delete_result = deferred_response.GetResponse(); + success = delete_result.Value.Deleted; + } catch (const Azure::Storage::StorageException& exception) { + success = false; + } + if (!success) { + const auto& blob_item = list_response.Blobs[i]; + failed_blob_names.push_back(blob_item.Name); + } + } + if (!failed_blob_names.empty()) { + if (failed_blob_names.size() == 1) { + return Status::IOError("Failed to delete a blob: ", failed_blob_names[0], + ": " + container_client.GetUrl()); + } else { + return Status::IOError("Failed to delete blobs: [", + arrow::internal::JoinStrings(failed_blob_names, ", "), + "]: " + container_client.GetUrl()); + } + } + list_response.MoveToNextPage(); + } + } catch (const Azure::Storage::StorageException& exception) { + return internal::ExceptionToStatus( + "Failed to list blobs in a directory: " + location.path + ": " + + container_client.GetUrl(), + exception); + } + return Status::OK(); + } + + public: Status DeleteDir(const AzureLocation& location) { if (location.container.empty()) { return Status::Invalid("Cannot delete an empty container"); @@ -1017,69 +1089,67 @@ class AzureFileSystem::Impl { exception); } } else { - auto container_client = - blob_service_client_->GetBlobContainerClient(location.container); - Azure::Storage::Blobs::ListBlobsOptions options; - options.Prefix = internal::EnsureTrailingSlash(location.path); - // https://learn.microsoft.com/en-us/rest/api/storageservices/blob-batch#remarks - // - // Only supports up to 256 subrequests in a single batch. The - // size of the body for a batch request can't exceed 4 MB. - const int32_t kNumMaxRequestsInBatch = 256; - options.PageSizeHint = kNumMaxRequestsInBatch; + return DeleteDirContentsWihtoutHierarchicalNamespace(location, true); + } + } + + Status DeleteDirContents(const AzureLocation& location, bool missing_dir_ok) { + if (location.container.empty()) { + return internal::InvalidDeleteDirContents(location.all); + } + if (location.path.empty()) { + return internal::InvalidDeleteDirContents(location.all); + } + + ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled, + hierarchical_namespace_.Enabled(location.container)); + if (hierarchical_namespace_enabled) { + auto file_system_client = + datalake_service_client_->GetFileSystemClient(location.container); + auto directory_client = file_system_client.GetDirectoryClient(location.path); try { - auto list_response = container_client.ListBlobs(options); - while (list_response.HasPage() && !list_response.Blobs.empty()) { - auto batch = container_client.CreateBatch(); - std::vector> - deferred_responses; - for (const auto& blob_item : list_response.Blobs) { - deferred_responses.push_back(batch.DeleteBlob(blob_item.Name)); - } - try { - container_client.SubmitBatch(batch); - } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to delete blobs in a directory: " + location.path + ": " + - container_client.GetUrl(), - exception); - } - std::vector failed_blob_names; - for (size_t i = 0; i < deferred_responses.size(); ++i) { - const auto& deferred_response = deferred_responses[i]; - bool success = true; - try { - auto delete_result = deferred_response.GetResponse(); - success = delete_result.Value.Deleted; - } catch (const Azure::Storage::StorageException& exception) { - success = false; - } - if (!success) { - const auto& blob_item = list_response.Blobs[i]; - failed_blob_names.push_back(blob_item.Name); - } - } - if (!failed_blob_names.empty()) { - if (failed_blob_names.size() == 1) { - return Status::IOError("Failed to delete a blob: ", failed_blob_names[0], - ": " + container_client.GetUrl()); + auto list_response = directory_client.ListPaths(false); + while (list_response.HasPage() && !list_response.Paths.empty()) { + for (const auto& path : list_response.Paths) { + if (path.IsDirectory) { + auto sub_directory_client = + file_system_client.GetDirectoryClient(path.Name); + try { + sub_directory_client.DeleteRecursive(); + } catch (const Azure::Storage::StorageException& exception) { + return internal::ExceptionToStatus( + "Failed to delete a sub directory: " + location.container + + internal::kSep + path.Name + ": " + sub_directory_client.GetUrl(), + exception); + } } else { - return Status::IOError( - "Failed to delete blobs: [", - arrow::internal::JoinStrings(failed_blob_names, ", "), - "]: " + container_client.GetUrl()); + auto sub_file_client = file_system_client.GetFileClient(path.Name); + try { + sub_file_client.Delete(); + } catch (const Azure::Storage::StorageException& exception) { + return internal::ExceptionToStatus( + "Failed to delete a sub file: " + location.container + + internal::kSep + path.Name + ": " + sub_file_client.GetUrl(), + exception); + } } } list_response.MoveToNextPage(); } } catch (const Azure::Storage::StorageException& exception) { - return internal::ExceptionToStatus( - "Failed to list blobs in a directory: " + location.path + ": " + - container_client.GetUrl(), - exception); + if (missing_dir_ok && + exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) { + return Status::OK(); + } else { + return internal::ExceptionToStatus( + "Failed to delete directory contents: " + location.path + ": " + + directory_client.GetUrl(), + exception); + } } return Status::OK(); + } else { + return DeleteDirContentsWihtoutHierarchicalNamespace(location, missing_dir_ok); } } }; @@ -1121,7 +1191,8 @@ Status AzureFileSystem::DeleteDir(const std::string& path) { } Status AzureFileSystem::DeleteDirContents(const std::string& path, bool missing_dir_ok) { - return Status::NotImplemented("The Azure FileSystem is not fully implemented"); + ARROW_ASSIGN_OR_RAISE(auto location, AzureLocation::FromString(path)); + return impl_->DeleteDirContents(location, missing_dir_ok); } Status AzureFileSystem::DeleteRootDirContents() { diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 7c86385126d..15e38c76044 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -666,6 +666,83 @@ TEST_F(AzuriteFileSystemTest, DeleteDirUri) { ASSERT_RAISES(Invalid, fs_->DeleteDir("abfs://" + PreexistingContainerPath())); } +TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessExist) { + const auto directory_path = + internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); + const auto sub_directory_path = internal::ConcatAbstractPath(directory_path, "new-sub"); + const auto sub_blob_path = internal::ConcatAbstractPath(sub_directory_path, "sub.txt"); + const auto top_blob_path = internal::ConcatAbstractPath(directory_path, "top.txt"); + ASSERT_OK(fs_->CreateDir(sub_directory_path, true)); + ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(sub_blob_path)); + ASSERT_OK(output->Write(std::string_view("sub"))); + ASSERT_OK(output->Close()); + ASSERT_OK_AND_ASSIGN(output, fs_->OpenOutputStream(top_blob_path)); + ASSERT_OK(output->Write(std::string_view("top"))); + ASSERT_OK(output->Close()); + + arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), sub_directory_path, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), sub_blob_path, FileType::File); + arrow::fs::AssertFileInfo(fs_.get(), top_blob_path, FileType::File); + ASSERT_OK(fs_->DeleteDirContents(directory_path)); + // GH-38772: We may change this to FileType::Directory. + arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); + arrow::fs::AssertFileInfo(fs_.get(), sub_directory_path, FileType::NotFound); + arrow::fs::AssertFileInfo(fs_.get(), sub_blob_path, FileType::NotFound); + arrow::fs::AssertFileInfo(fs_.get(), top_blob_path, FileType::NotFound); +} + +TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessNonexistent) { + const auto directory_path = + internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); + ASSERT_OK(fs_->DeleteDirContents(directory_path, true)); + arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); +} + +TEST_F(AzuriteFileSystemTest, DeleteDirContentsFailureNonexistent) { + const auto directory_path = + internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); + ASSERT_RAISES(IOError, fs_->DeleteDirContents(directory_path, false)); +} + +TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirContentsSuccessExist) { + const auto directory_path = + internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); + const auto sub_directory_path = internal::ConcatAbstractPath(directory_path, "new-sub"); + const auto sub_blob_path = internal::ConcatAbstractPath(sub_directory_path, "sub.txt"); + const auto top_blob_path = internal::ConcatAbstractPath(directory_path, "top.txt"); + ASSERT_OK(fs_->CreateDir(sub_directory_path, true)); + ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(sub_blob_path)); + ASSERT_OK(output->Write(std::string_view("sub"))); + ASSERT_OK(output->Close()); + ASSERT_OK_AND_ASSIGN(output, fs_->OpenOutputStream(top_blob_path)); + ASSERT_OK(output->Write(std::string_view("top"))); + ASSERT_OK(output->Close()); + + arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), sub_directory_path, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), sub_blob_path, FileType::File); + arrow::fs::AssertFileInfo(fs_.get(), top_blob_path, FileType::File); + ASSERT_OK(fs_->DeleteDirContents(directory_path)); + arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), sub_directory_path, FileType::NotFound); + arrow::fs::AssertFileInfo(fs_.get(), sub_blob_path, FileType::NotFound); + arrow::fs::AssertFileInfo(fs_.get(), top_blob_path, FileType::NotFound); +} + +TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirContentsSuccessNonexistent) { + const auto directory_path = + internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); + ASSERT_OK(fs_->DeleteDirContents(directory_path, true)); + arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); +} + +TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirContentsFailureNonexistent) { + const auto directory_path = + internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); + ASSERT_RAISES(IOError, fs_->DeleteDirContents(directory_path, false)); +} + TEST_F(AzuriteFileSystemTest, OpenInputStreamString) { std::shared_ptr stream; ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(PreexistingObjectPath())); From f9e1846b39a7772975bf6c080a58ea6d4b93f81e Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 28 Nov 2023 14:29:45 +0900 Subject: [PATCH 2/9] Use PathNotFound --- cpp/src/arrow/filesystem/azurefs.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 6d2ce1599e9..3276aa01ec1 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -986,8 +986,7 @@ class AzureFileSystem::Impl { try { auto list_response = container_client.ListBlobs(options); if (!missing_dir_ok && list_response.Blobs.empty()) { - return Status::IOError("Specified directory doesn't exist: ", location.path, ": ", - container_client.GetUrl()); + return PathNotFound(location); } while (list_response.HasPage() && !list_response.Blobs.empty()) { auto batch = container_client.CreateBatch(); From 19b4f6d75d989040701c9f1a736cf204aa44ad43 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 28 Nov 2023 14:29:57 +0900 Subject: [PATCH 3/9] Use for --- cpp/src/arrow/filesystem/azurefs.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 3276aa01ec1..8adaa5baf53 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -988,7 +988,10 @@ class AzureFileSystem::Impl { if (!missing_dir_ok && list_response.Blobs.empty()) { return PathNotFound(location); } - while (list_response.HasPage() && !list_response.Blobs.empty()) { + for (; list_response.HasPage(); list_response.MoveToNextPage()) { + if (list_response.Blobs.empty()) { + continue; + } auto batch = container_client.CreateBatch(); std::vector> @@ -1029,7 +1032,6 @@ class AzureFileSystem::Impl { "]: " + container_client.GetUrl()); } } - list_response.MoveToNextPage(); } } catch (const Azure::Storage::StorageException& exception) { return internal::ExceptionToStatus( @@ -1108,7 +1110,7 @@ class AzureFileSystem::Impl { auto directory_client = file_system_client.GetDirectoryClient(location.path); try { auto list_response = directory_client.ListPaths(false); - while (list_response.HasPage() && !list_response.Paths.empty()) { + for (; list_response.HasPage(); list_response.MoveToNextPage()) { for (const auto& path : list_response.Paths) { if (path.IsDirectory) { auto sub_directory_client = @@ -1133,7 +1135,6 @@ class AzureFileSystem::Impl { } } } - list_response.MoveToNextPage(); } } catch (const Azure::Storage::StorageException& exception) { if (missing_dir_ok && From e0bc11bce1e2dd56f32f6be42c0ed059091fc07d Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 28 Nov 2023 14:30:04 +0900 Subject: [PATCH 4/9] Add argument name comment --- cpp/src/arrow/filesystem/azurefs.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 8adaa5baf53..8ebb4c9334b 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -1090,7 +1090,8 @@ class AzureFileSystem::Impl { exception); } } else { - return DeleteDirContentsWihtoutHierarchicalNamespace(location, true); + return DeleteDirContentsWihtoutHierarchicalNamespace(location, + /*missing_dir_ok=*/true); } } From 510cfa03899599f3cea05aed57e42d949f27adde Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 28 Nov 2023 14:30:14 +0900 Subject: [PATCH 5/9] Skip a test on macOS --- cpp/src/arrow/filesystem/azurefs_test.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 15e38c76044..da828f41f8a 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -667,6 +667,10 @@ TEST_F(AzuriteFileSystemTest, DeleteDirUri) { } TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessExist) { +#ifdef __APPLE__ + GTEST_SKIP() << "This test fails by an Azurite problem: " + "https://github.com/Azure/Azurite/pull/2302"; +#endif const auto directory_path = internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); const auto sub_directory_path = internal::ConcatAbstractPath(directory_path, "new-sub"); From e573afd64edad8c71ffa03d7e3dd46f913498f03 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 29 Nov 2023 18:09:39 +0900 Subject: [PATCH 6/9] Container isn't root directory --- cpp/src/arrow/filesystem/azurefs.cc | 3 --- cpp/src/arrow/filesystem/azurefs_test.cc | 34 +++++++++++++++++++++++- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 8ebb4c9334b..4e97ee641d1 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -1099,9 +1099,6 @@ class AzureFileSystem::Impl { if (location.container.empty()) { return internal::InvalidDeleteDirContents(location.all); } - if (location.path.empty()) { - return internal::InvalidDeleteDirContents(location.all); - } ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled, hierarchical_namespace_.Enabled(location.container)); diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index da828f41f8a..24b4da32451 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -666,7 +666,39 @@ TEST_F(AzuriteFileSystemTest, DeleteDirUri) { ASSERT_RAISES(Invalid, fs_->DeleteDir("abfs://" + PreexistingContainerPath())); } -TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessExist) { +TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessContainer) { +#ifdef __APPLE__ + GTEST_SKIP() << "This test fails by an Azurite problem: " + "https://github.com/Azure/Azurite/pull/2302"; +#endif + const auto container_path = RandomContainerName(); + const auto directory_path = + internal::ConcatAbstractPath(container_path, RandomDirectoryName()); + const auto sub_directory_path = internal::ConcatAbstractPath(directory_path, "new-sub"); + const auto sub_blob_path = internal::ConcatAbstractPath(sub_directory_path, "sub.txt"); + const auto top_blob_path = internal::ConcatAbstractPath(directory_path, "top.txt"); + ASSERT_OK(fs_->CreateDir(sub_directory_path, true)); + ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(sub_blob_path)); + ASSERT_OK(output->Write(std::string_view("sub"))); + ASSERT_OK(output->Close()); + ASSERT_OK_AND_ASSIGN(output, fs_->OpenOutputStream(top_blob_path)); + ASSERT_OK(output->Write(std::string_view("top"))); + ASSERT_OK(output->Close()); + + arrow::fs::AssertFileInfo(fs_.get(), container_path, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), sub_directory_path, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), sub_blob_path, FileType::File); + arrow::fs::AssertFileInfo(fs_.get(), top_blob_path, FileType::File); + ASSERT_OK(fs_->DeleteDirContents(container_path)); + arrow::fs::AssertFileInfo(fs_.get(), container_path, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); + arrow::fs::AssertFileInfo(fs_.get(), sub_directory_path, FileType::NotFound); + arrow::fs::AssertFileInfo(fs_.get(), sub_blob_path, FileType::NotFound); + arrow::fs::AssertFileInfo(fs_.get(), top_blob_path, FileType::NotFound); +} + +TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessDirectory) { #ifdef __APPLE__ GTEST_SKIP() << "This test fails by an Azurite problem: " "https://github.com/Azure/Azurite/pull/2302"; From b1f07d62f20d70437532e65a7ca2a5e22c7bb0a2 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 1 Dec 2023 11:25:05 +0900 Subject: [PATCH 7/9] Prefer null prefix to empty prefix --- cpp/src/arrow/filesystem/azurefs.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 4e97ee641d1..ecc8d06f975 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -976,7 +976,9 @@ class AzureFileSystem::Impl { auto container_client = blob_service_client_->GetBlobContainerClient(location.container); Azure::Storage::Blobs::ListBlobsOptions options; - options.Prefix = internal::EnsureTrailingSlash(location.path); + if (!location.path.empty()) { + options.Prefix = internal::EnsureTrailingSlash(location.path); + } // https://learn.microsoft.com/en-us/rest/api/storageservices/blob-batch#remarks // // Only supports up to 256 subrequests in a single batch. The From 651769a3a18b5a16c931920b60eb1c8e454c6afd Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 1 Dec 2023 11:25:52 +0900 Subject: [PATCH 8/9] Extract common codes as a convenient method --- cpp/src/arrow/filesystem/azurefs_test.cc | 130 +++++++++++------------ 1 file changed, 61 insertions(+), 69 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 24b4da32451..2ef5f8773c3 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -287,6 +287,45 @@ class AzureFileSystemTest : public ::testing::Test { void RunGetFileInfoObjectWithNestedStructureTest(); void RunGetFileInfoObjectTest(); + + struct HierarchicalPaths { + std::string container; + std::string directory; + std::vector sub_paths; + }; + + // Need to use "void" as the return type to use ASSERT_* in this method. + void CreateHierarchicalData(HierarchicalPaths& paths) { + const auto container_path = RandomContainerName(); + const auto directory_path = + internal::ConcatAbstractPath(container_path, RandomDirectoryName()); + const auto sub_directory_path = + internal::ConcatAbstractPath(directory_path, "new-sub"); + const auto sub_blob_path = + internal::ConcatAbstractPath(sub_directory_path, "sub.txt"); + const auto top_blob_path = internal::ConcatAbstractPath(directory_path, "top.txt"); + ASSERT_OK(fs_->CreateDir(sub_directory_path, true)); + ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(sub_blob_path)); + ASSERT_OK(output->Write(std::string_view("sub"))); + ASSERT_OK(output->Close()); + ASSERT_OK_AND_ASSIGN(output, fs_->OpenOutputStream(top_blob_path)); + ASSERT_OK(output->Write(std::string_view("top"))); + ASSERT_OK(output->Close()); + + AssertFileInfo(fs_.get(), container_path, FileType::Directory); + AssertFileInfo(fs_.get(), directory_path, FileType::Directory); + AssertFileInfo(fs_.get(), sub_directory_path, FileType::Directory); + AssertFileInfo(fs_.get(), sub_blob_path, FileType::File); + AssertFileInfo(fs_.get(), top_blob_path, FileType::File); + + paths.container = container_path; + paths.directory = directory_path; + paths.sub_paths = { + sub_directory_path, + sub_blob_path, + top_blob_path, + }; + }; }; class AzuriteFileSystemTest : public AzureFileSystemTest { @@ -671,31 +710,14 @@ TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessContainer) { GTEST_SKIP() << "This test fails by an Azurite problem: " "https://github.com/Azure/Azurite/pull/2302"; #endif - const auto container_path = RandomContainerName(); - const auto directory_path = - internal::ConcatAbstractPath(container_path, RandomDirectoryName()); - const auto sub_directory_path = internal::ConcatAbstractPath(directory_path, "new-sub"); - const auto sub_blob_path = internal::ConcatAbstractPath(sub_directory_path, "sub.txt"); - const auto top_blob_path = internal::ConcatAbstractPath(directory_path, "top.txt"); - ASSERT_OK(fs_->CreateDir(sub_directory_path, true)); - ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(sub_blob_path)); - ASSERT_OK(output->Write(std::string_view("sub"))); - ASSERT_OK(output->Close()); - ASSERT_OK_AND_ASSIGN(output, fs_->OpenOutputStream(top_blob_path)); - ASSERT_OK(output->Write(std::string_view("top"))); - ASSERT_OK(output->Close()); - - arrow::fs::AssertFileInfo(fs_.get(), container_path, FileType::Directory); - arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::Directory); - arrow::fs::AssertFileInfo(fs_.get(), sub_directory_path, FileType::Directory); - arrow::fs::AssertFileInfo(fs_.get(), sub_blob_path, FileType::File); - arrow::fs::AssertFileInfo(fs_.get(), top_blob_path, FileType::File); - ASSERT_OK(fs_->DeleteDirContents(container_path)); - arrow::fs::AssertFileInfo(fs_.get(), container_path, FileType::Directory); - arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); - arrow::fs::AssertFileInfo(fs_.get(), sub_directory_path, FileType::NotFound); - arrow::fs::AssertFileInfo(fs_.get(), sub_blob_path, FileType::NotFound); - arrow::fs::AssertFileInfo(fs_.get(), top_blob_path, FileType::NotFound); + HierarchicalPaths paths; + CreateHierarchicalData(paths); + ASSERT_OK(fs_->DeleteDirContents(paths.container)); + arrow::fs::AssertFileInfo(fs_.get(), paths.container, FileType::Directory); + arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::NotFound); + for (const auto& sub_path : paths.sub_paths) { + arrow::fs::AssertFileInfo(fs_.get(), sub_path, FileType::NotFound); + } } TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessDirectory) { @@ -703,29 +725,14 @@ TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessDirectory) { GTEST_SKIP() << "This test fails by an Azurite problem: " "https://github.com/Azure/Azurite/pull/2302"; #endif - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - const auto sub_directory_path = internal::ConcatAbstractPath(directory_path, "new-sub"); - const auto sub_blob_path = internal::ConcatAbstractPath(sub_directory_path, "sub.txt"); - const auto top_blob_path = internal::ConcatAbstractPath(directory_path, "top.txt"); - ASSERT_OK(fs_->CreateDir(sub_directory_path, true)); - ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(sub_blob_path)); - ASSERT_OK(output->Write(std::string_view("sub"))); - ASSERT_OK(output->Close()); - ASSERT_OK_AND_ASSIGN(output, fs_->OpenOutputStream(top_blob_path)); - ASSERT_OK(output->Write(std::string_view("top"))); - ASSERT_OK(output->Close()); - - arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::Directory); - arrow::fs::AssertFileInfo(fs_.get(), sub_directory_path, FileType::Directory); - arrow::fs::AssertFileInfo(fs_.get(), sub_blob_path, FileType::File); - arrow::fs::AssertFileInfo(fs_.get(), top_blob_path, FileType::File); - ASSERT_OK(fs_->DeleteDirContents(directory_path)); + HierarchicalPaths paths; + CreateHierarchicalData(paths); + ASSERT_OK(fs_->DeleteDirContents(paths.directory)); // GH-38772: We may change this to FileType::Directory. - arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound); - arrow::fs::AssertFileInfo(fs_.get(), sub_directory_path, FileType::NotFound); - arrow::fs::AssertFileInfo(fs_.get(), sub_blob_path, FileType::NotFound); - arrow::fs::AssertFileInfo(fs_.get(), top_blob_path, FileType::NotFound); + arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::NotFound); + for (const auto& sub_path : paths.sub_paths) { + arrow::fs::AssertFileInfo(fs_.get(), sub_path, FileType::NotFound); + } } TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessNonexistent) { @@ -742,28 +749,13 @@ TEST_F(AzuriteFileSystemTest, DeleteDirContentsFailureNonexistent) { } TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirContentsSuccessExist) { - const auto directory_path = - internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName()); - const auto sub_directory_path = internal::ConcatAbstractPath(directory_path, "new-sub"); - const auto sub_blob_path = internal::ConcatAbstractPath(sub_directory_path, "sub.txt"); - const auto top_blob_path = internal::ConcatAbstractPath(directory_path, "top.txt"); - ASSERT_OK(fs_->CreateDir(sub_directory_path, true)); - ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(sub_blob_path)); - ASSERT_OK(output->Write(std::string_view("sub"))); - ASSERT_OK(output->Close()); - ASSERT_OK_AND_ASSIGN(output, fs_->OpenOutputStream(top_blob_path)); - ASSERT_OK(output->Write(std::string_view("top"))); - ASSERT_OK(output->Close()); - - arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::Directory); - arrow::fs::AssertFileInfo(fs_.get(), sub_directory_path, FileType::Directory); - arrow::fs::AssertFileInfo(fs_.get(), sub_blob_path, FileType::File); - arrow::fs::AssertFileInfo(fs_.get(), top_blob_path, FileType::File); - ASSERT_OK(fs_->DeleteDirContents(directory_path)); - arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::Directory); - arrow::fs::AssertFileInfo(fs_.get(), sub_directory_path, FileType::NotFound); - arrow::fs::AssertFileInfo(fs_.get(), sub_blob_path, FileType::NotFound); - arrow::fs::AssertFileInfo(fs_.get(), top_blob_path, FileType::NotFound); + HierarchicalPaths paths; + CreateHierarchicalData(paths); + ASSERT_OK(fs_->DeleteDirContents(paths.directory)); + arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::Directory); + for (const auto& sub_path : paths.sub_paths) { + arrow::fs::AssertFileInfo(fs_.get(), sub_path, FileType::NotFound); + } } TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirContentsSuccessNonexistent) { From 2d3067692a17aa48e1b84d5a4ee8aac9b30050b4 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 1 Dec 2023 11:34:25 +0900 Subject: [PATCH 9/9] Fix lint --- cpp/src/arrow/filesystem/azurefs_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 2ef5f8773c3..b6f75987693 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -325,7 +325,7 @@ class AzureFileSystemTest : public ::testing::Test { sub_blob_path, top_blob_path, }; - }; + } }; class AzuriteFileSystemTest : public AzureFileSystemTest {