Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions cpp/src/arrow/filesystem/gcsfs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,9 @@ class GcsFileSystem::Impl {

Result<FileInfoVector> GetFileInfo(const FileSelector& select) {
ARROW_ASSIGN_OR_RAISE(auto p, GcsPath::FromString(select.base_dir));
auto prefix = p.object.empty() ? gcs::Prefix() : gcs::Prefix(p.object);
const auto canonical = internal::EnsureTrailingSlash(p.object);
const auto max_depth = internal::Depth(canonical) + select.max_recursion;
auto prefix = p.object.empty() ? gcs::Prefix() : gcs::Prefix(canonical);
auto delimiter = select.recursive ? gcs::Delimiter() : gcs::Delimiter("/");
bool found_directory = false;
FileInfoVector result;
Expand All @@ -311,8 +313,9 @@ class GcsFileSystem::Impl {
return internal::ToArrowStatus(o.status());
}
found_directory = true;
// Skip the directory itself from the results
if (o->name() == p.object) {
// Skip the directory itself from the results, and any result that is "too deep"
// into the recursion.
if (o->name() == p.object || internal::Depth(o->name()) > max_depth) {
continue;
}
auto path = internal::ConcatAbstractPath(o->bucket(), o->name());
Expand Down
5 changes: 5 additions & 0 deletions cpp/src/arrow/filesystem/gcsfs_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <unordered_map>
#include <vector>

#include "arrow/filesystem/path_util.h"
#include "arrow/util/key_value_metadata.h"

namespace arrow {
Expand Down Expand Up @@ -258,6 +259,10 @@ Result<std::shared_ptr<const KeyValueMetadata>> FromObjectMetadata(
return result;
}

std::int64_t Depth(arrow::util::string_view path) {
return std::count(path.begin(), path.end(), fs::internal::kSep);
}

} // namespace internal
} // namespace fs
} // namespace arrow
2 changes: 2 additions & 0 deletions cpp/src/arrow/filesystem/gcsfs_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ Result<google::cloud::storage::WithObjectMetadata> ToObjectMetadata(
Result<std::shared_ptr<const KeyValueMetadata>> FromObjectMetadata(
google::cloud::storage::ObjectMetadata const& m);

std::int64_t Depth(arrow::util::string_view path);

} // namespace internal
} // namespace fs
} // namespace arrow
72 changes: 61 additions & 11 deletions cpp/src/arrow/filesystem/gcsfs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -199,16 +199,26 @@ class GcsIntegrationTest : public ::testing::Test {

Result<Hierarchy> CreateHierarchy(std::shared_ptr<arrow::fs::FileSystem> fs) {
const char* const kTestFolders[] = {
"a/", "a/0/", "a/0/0/", "a/1/", "a/2/",
"b/",
"b/0/",
"b/0/0/",
"b/1/",
"b/2/",
// Create some additional folders that should not appear in any listing of b/
"aa/",
"ba/",
"c/",
};
constexpr auto kFilesPerFolder = 4;
auto result = Hierarchy{PreexistingBucketPath() + "a/", {}};
constexpr auto kFilesPerFolder = 2;
auto base_dir = internal::ConcatAbstractPath(PreexistingBucketPath(), "b/");
auto result = Hierarchy{base_dir, {}};
for (auto const* f : kTestFolders) {
const auto folder = PreexistingBucketPath() + f;
const auto folder = internal::ConcatAbstractPath(PreexistingBucketPath(), f);
RETURN_NOT_OK(fs->CreateDir(folder, true));
result.contents.push_back(arrow::fs::Dir(folder));
for (int i = 0; i != kFilesPerFolder; ++i) {
const auto filename = folder + "test-file-" + std::to_string(i);
const auto filename =
internal::ConcatAbstractPath(folder, "test-file-" + std::to_string(i));
CreateFile(fs.get(), filename, filename);
result.contents.push_back(arrow::fs::File(filename));
}
Expand Down Expand Up @@ -483,14 +493,19 @@ TEST_F(GcsIntegrationTest, GetFileInfoSelectorRecursive) {
auto fs = internal::MakeGcsFileSystemForTest(TestGcsOptions());
ASSERT_OK_AND_ASSIGN(auto hierarchy, CreateHierarchy(fs));
std::vector<arrow::fs::FileInfo> expected;
std::copy_if(
hierarchy.contents.begin(), hierarchy.contents.end(), std::back_inserter(expected),
[&](const arrow::fs::FileInfo& info) { return hierarchy.base_dir != info.path(); });
std::copy_if(hierarchy.contents.begin(), hierarchy.contents.end(),
std::back_inserter(expected), [&](const arrow::fs::FileInfo& info) {
if (!fs::internal::IsAncestorOf(hierarchy.base_dir, info.path())) {
return false;
}
return hierarchy.base_dir != info.path();
});

auto selector = FileSelector();
selector.base_dir = hierarchy.base_dir;
selector.allow_not_found = false;
selector.recursive = true;
selector.max_recursion = 16;
ASSERT_OK_AND_ASSIGN(auto results, fs->GetFileInfo(selector));
EXPECT_THAT(results, UnorderedElementsAreArray(expected.begin(), expected.end()));
}
Expand All @@ -515,6 +530,34 @@ TEST_F(GcsIntegrationTest, GetFileInfoSelectorNonRecursive) {
EXPECT_THAT(results, UnorderedElementsAreArray(expected.begin(), expected.end()));
}

TEST_F(GcsIntegrationTest, GetFileInfoSelectorLimitedRecursion) {
auto fs = internal::MakeGcsFileSystemForTest(TestGcsOptions());
ASSERT_OK_AND_ASSIGN(auto hierarchy, CreateHierarchy(fs));

for (const auto max_recursion : {0, 1, 2, 3}) {
SCOPED_TRACE("Testing with max_recursion=" + std::to_string(max_recursion));
const auto max_depth =
internal::Depth(internal::EnsureTrailingSlash(hierarchy.base_dir)) +
max_recursion;
std::vector<arrow::fs::FileInfo> expected;
std::copy_if(hierarchy.contents.begin(), hierarchy.contents.end(),
std::back_inserter(expected), [&](const arrow::fs::FileInfo& info) {
if (info.path() == hierarchy.base_dir) return false;
if (!fs::internal::IsAncestorOf(hierarchy.base_dir, info.path())) {
return false;
}
return internal::Depth(info.path()) <= max_depth;
});
auto selector = FileSelector();
selector.base_dir = hierarchy.base_dir;
selector.allow_not_found = true;
selector.recursive = true;
selector.max_recursion = max_recursion;
ASSERT_OK_AND_ASSIGN(auto results, fs->GetFileInfo(selector));
EXPECT_THAT(results, UnorderedElementsAreArray(expected.begin(), expected.end()));
}
}

TEST_F(GcsIntegrationTest, GetFileInfoSelectorNotFoundTrue) {
auto fs = internal::MakeGcsFileSystemForTest(TestGcsOptions());

Expand Down Expand Up @@ -591,7 +634,10 @@ TEST_F(GcsIntegrationTest, DeleteDirSuccess) {
arrow::fs::AssertFileInfo(fs.get(), PreexistingBucketPath(), FileType::Directory);
arrow::fs::AssertFileInfo(fs.get(), PreexistingObjectPath(), FileType::File);
for (auto const& info : hierarchy.contents) {
arrow::fs::AssertFileInfo(fs.get(), info.path(), FileType::NotFound);
const auto expected_type = fs::internal::IsAncestorOf(hierarchy.base_dir, info.path())
? FileType::NotFound
: info.type();
arrow::fs::AssertFileInfo(fs.get(), info.path(), expected_type);
}
}

Expand All @@ -604,8 +650,12 @@ TEST_F(GcsIntegrationTest, DeleteDirContentsSuccess) {
arrow::fs::AssertFileInfo(fs.get(), PreexistingBucketPath(), FileType::Directory);
arrow::fs::AssertFileInfo(fs.get(), PreexistingObjectPath(), FileType::File);
for (auto const& info : hierarchy.contents) {
if (info.path() == hierarchy.base_dir) continue;
arrow::fs::AssertFileInfo(fs.get(), info.path(), FileType::NotFound);
auto expected_type = FileType::NotFound;
if (info.path() == hierarchy.base_dir ||
!fs::internal::IsAncestorOf(hierarchy.base_dir, info.path())) {
expected_type = info.type();
}
arrow::fs::AssertFileInfo(fs.get(), info.path(), expected_type);
}
}

Expand Down