Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 40 additions & 1 deletion cpp/src/arrow/filesystem/gcsfs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,45 @@ class GcsFileSystem::Impl {
path.object.back() == '/' ? FileType::Directory : FileType::File);
}

Result<FileInfoVector> GetFileInfo(const FileSelector& select) {
ARROW_ASSIGN_OR_RAISE(auto p, GcsPath::FromString(select.base_dir));
auto prefix = p.object.empty() ? gcs::Prefix() : gcs::Prefix(p.object);
auto delimiter = select.recursive ? gcs::Delimiter() : gcs::Delimiter("/");
bool found_directory = false;
FileInfoVector result;
for (auto const& o : client_.ListObjects(p.bucket, prefix, delimiter)) {
if (!o.ok()) {
if (select.allow_not_found &&
o.status().code() == google::cloud::StatusCode::kNotFound) {
continue;
}
return internal::ToArrowStatus(o.status());
}
found_directory = true;
// Skip the directory itself from the results
if (o->name() == p.object) {
continue;
}
auto path = internal::ConcatAbstractPath(o->bucket(), o->name());
if (o->name().back() == '/') {
result.push_back(
FileInfo(internal::EnsureTrailingSlash(path), FileType::Directory));
continue;
}
auto info = FileInfo(path, FileType::File);
info.set_size(static_cast<int64_t>(o->size()));
// An object has multiple "time" attributes, including the time when its data was
// created, and the time when its metadata was last updated. We use the object
// creation time because the data for an object cannot be changed once created.
info.set_mtime(o->time_created());
result.push_back(std::move(info));
}
if (!found_directory && !select.allow_not_found) {
return Status::IOError("No such file or directory '", select.base_dir, "'");
}
return result;
}

// GCS does not have directories or folders. But folders can be emulated (with some
// limitations) using marker objects. That and listing with prefixes creates the
// illusion of folders.
Expand Down Expand Up @@ -478,7 +517,7 @@ Result<FileInfo> GcsFileSystem::GetFileInfo(const std::string& path) {
}

Result<FileInfoVector> GcsFileSystem::GetFileInfo(const FileSelector& select) {
return Status::NotImplemented("The GCS FileSystem is not fully implemented");
return impl_->GetFileInfo(select);
}

Status GcsFileSystem::CreateDir(const std::string& path, bool recursive) {
Expand Down
88 changes: 87 additions & 1 deletion cpp/src/arrow/filesystem/gcsfs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@

namespace arrow {
namespace fs {
/// Custom comparison for FileInfo, we need this to use complex googletest matchers.
inline bool operator==(const FileInfo& a, const FileInfo& b) {
return a.path() == b.path() && a.type() == b.type();
}

namespace {

namespace bp = boost::process;
Expand All @@ -51,6 +56,7 @@ using ::testing::Not;
using ::testing::NotNull;
using ::testing::Pair;
using ::testing::UnorderedElementsAre;
using ::testing::UnorderedElementsAreArray;

auto const* kLoremIpsum = R"""(
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
Expand Down Expand Up @@ -93,7 +99,7 @@ class GcsTestbench : public ::testing::Environment {
error_ = std::move(error);
}

~GcsTestbench() {
~GcsTestbench() override {
// Brutal shutdown, kill the full process group because the GCS testbench may launch
// additional children.
group_.terminate();
Expand Down Expand Up @@ -187,6 +193,29 @@ class GcsIntegrationTest : public ::testing::Test {

std::string RandomFolderName() { return RandomChars(32) + "/"; }

struct Hierarchy {
std::string base_dir;
std::vector<FileInfo> contents;
};

Result<Hierarchy> CreateHierarchy(std::shared_ptr<arrow::fs::FileSystem> fs) {
const char* const kTestFolders[] = {
"a/", "a/0/", "a/0/0/", "a/1/", "a/2/",
};
auto result = Hierarchy{PreexistingBucketPath() + "a/", {}};
for (auto const* f : kTestFolders) {
const auto folder = PreexistingBucketPath() + f;
RETURN_NOT_OK(fs->CreateDir(folder, true));
result.contents.push_back(arrow::fs::Dir(folder));
for (int i = 0; i != 64; ++i) {
const auto filename = folder + "test-file-" + std::to_string(i);
CreateFile(fs.get(), filename, filename);
result.contents.push_back(arrow::fs::File(filename));
}
}
return result;
}

private:
std::string RandomChars(std::size_t count) {
auto const fillers = std::string("abcdefghijlkmnopqrstuvwxyz0123456789");
Expand Down Expand Up @@ -399,6 +428,63 @@ TEST_F(GcsIntegrationTest, GetFileInfoObject) {
arrow::fs::AssertFileInfo(fs.get(), PreexistingObjectPath(), FileType::File);
}

TEST_F(GcsIntegrationTest, GetFileInfoSelectorRecursive) {
auto fs = internal::MakeGcsFileSystemForTest(TestGcsOptions());
ASSERT_OK_AND_ASSIGN(auto hierarchy, CreateHierarchy(fs));
std::vector<arrow::fs::FileInfo> expected;
std::copy_if(
hierarchy.contents.begin(), hierarchy.contents.end(), std::back_inserter(expected),
[&](const arrow::fs::FileInfo& info) { return hierarchy.base_dir != info.path(); });

auto selector = FileSelector();
selector.base_dir = hierarchy.base_dir;
selector.allow_not_found = false;
selector.recursive = true;
ASSERT_OK_AND_ASSIGN(auto results, fs->GetFileInfo(selector));
EXPECT_THAT(results, UnorderedElementsAreArray(expected.begin(), expected.end()));
}

TEST_F(GcsIntegrationTest, GetFileInfoSelectorNonRecursive) {
auto fs = internal::MakeGcsFileSystemForTest(TestGcsOptions());
ASSERT_OK_AND_ASSIGN(auto hierarchy, CreateHierarchy(fs));
std::vector<arrow::fs::FileInfo> expected;
std::copy_if(hierarchy.contents.begin(), hierarchy.contents.end(),
std::back_inserter(expected), [&](const arrow::fs::FileInfo& info) {
if (info.path() == hierarchy.base_dir) return false;
return internal::EnsureTrailingSlash(
internal::GetAbstractPathParent(info.path()).first) ==
hierarchy.base_dir;
});

auto selector = FileSelector();
selector.base_dir = hierarchy.base_dir;
selector.allow_not_found = false;
selector.recursive = false;
ASSERT_OK_AND_ASSIGN(auto results, fs->GetFileInfo(selector));
EXPECT_THAT(results, UnorderedElementsAreArray(expected.begin(), expected.end()));
}

TEST_F(GcsIntegrationTest, GetFileInfoSelectorNotFoundTrue) {
auto fs = internal::MakeGcsFileSystemForTest(TestGcsOptions());

auto selector = FileSelector();
selector.base_dir = NotFoundObjectPath() + "/";
selector.allow_not_found = true;
selector.recursive = true;
ASSERT_OK_AND_ASSIGN(auto results, fs->GetFileInfo(selector));
EXPECT_THAT(results, IsEmpty());
}

TEST_F(GcsIntegrationTest, GetFileInfoSelectorNotFoundFalse) {
auto fs = internal::MakeGcsFileSystemForTest(TestGcsOptions());

auto selector = FileSelector();
selector.base_dir = NotFoundObjectPath() + "/";
selector.allow_not_found = false;
selector.recursive = false;
ASSERT_RAISES(IOError, fs->GetFileInfo(selector));
}

TEST_F(GcsIntegrationTest, CreateDirSuccessBucketOnly) {
auto fs = internal::MakeGcsFileSystemForTest(TestGcsOptions());
auto bucket_name = RandomBucketName();
Expand Down