Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions cpp/examples/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -195,3 +195,13 @@ if(ARROW_GANDIVA)
endif()
add_arrow_example(gandiva_example EXTRA_LINK_LIBS ${GANDIVA_EXAMPLE_LINK_LIBS})
endif()

if(ARROW_FILESYSTEM)
add_library(filesystem_definition_example MODULE filesystem_definition_example.cc)
target_link_libraries(filesystem_definition_example ${ARROW_EXAMPLE_LINK_LIBS})

add_arrow_example(filesystem_usage_example)
target_compile_definitions(filesystem-usage-example
PUBLIC FILESYSTEM_EXAMPLE_LIBPATH="$<TARGET_FILE:filesystem_definition_example>"
)
endif()
151 changes: 151 additions & 0 deletions cpp/examples/arrow/filesystem_definition_example.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include <arrow/filesystem/filesystem.h>
#include <arrow/filesystem/filesystem_library.h>
#include <arrow/io/memory.h>
#include <arrow/result.h>
#include <arrow/util/uri.h>

// Demonstrate registering a user-defined Arrow FileSystem outside
// of the Arrow source tree.

using arrow::Result;
using arrow::Status;
namespace io = arrow::io;
namespace fs = arrow::fs;

class ExampleFileSystem : public fs::FileSystem {
public:
explicit ExampleFileSystem(const io::IOContext& io_context)
: fs::FileSystem{io_context} {}

// This is a mock filesystem whose root directory contains a single file.
// All operations which would mutate will simply raise an error.
static constexpr std::string_view kPath = "example_file";
static constexpr std::string_view kContents = "hello world";
static fs::FileInfo info() {
fs::FileInfo info;
info.set_path(std::string{kPath});
info.set_type(fs::FileType::File);
info.set_size(kContents.size());
return info;
}

static Status NotFound(std::string_view path) {
return Status::IOError("Path does not exist '", path, "'");
}

static Status NoMutation() {
return Status::IOError("operations which would mutate are not permitted");
}

Result<std::string> PathFromUri(const std::string& uri_string) const override {
ARROW_ASSIGN_OR_RAISE(auto uri, arrow::util::Uri::FromString(uri_string));
return uri.path();
}

std::string type_name() const override { return "example"; }

bool Equals(const FileSystem& other) const override {
return type_name() == other.type_name();
}

/// \cond FALSE
using FileSystem::CreateDir;
using FileSystem::DeleteDirContents;
using FileSystem::GetFileInfo;
using FileSystem::OpenAppendStream;
using FileSystem::OpenOutputStream;
/// \endcond

Result<fs::FileInfo> GetFileInfo(const std::string& path) override {
if (path == kPath) {
return info();
}
return NotFound(path);
}

Result<std::vector<fs::FileInfo>> GetFileInfo(const fs::FileSelector& select) override {
if (select.base_dir == "/" || select.base_dir == "") {
return std::vector<fs::FileInfo>{info()};
}
if (select.allow_not_found) {
return std::vector<fs::FileInfo>{};
}
return NotFound(select.base_dir);
}

Status CreateDir(const std::string& path, bool recursive) override {
return NoMutation();
}

Status DeleteDir(const std::string& path) override { return NoMutation(); }

Status DeleteDirContents(const std::string& path, bool missing_dir_ok) override {
return NoMutation();
}

Status DeleteRootDirContents() override { return NoMutation(); }

Status DeleteFile(const std::string& path) override { return NoMutation(); }

Status Move(const std::string& src, const std::string& dest) override {
return NoMutation();
}

Status CopyFile(const std::string& src, const std::string& dest) override {
return NoMutation();
}

Result<std::shared_ptr<io::InputStream>> OpenInputStream(
const std::string& path) override {
return OpenInputFile(path);
}

Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
const std::string& path) override {
if (path == kPath) {
return io::BufferReader::FromString(std::string{kContents});
}
return NotFound(path);
}

Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
const std::string& path,
const std::shared_ptr<const arrow::KeyValueMetadata>& metadata) override {
return NoMutation();
}

Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
const std::string& path,
const std::shared_ptr<const arrow::KeyValueMetadata>& metadata) override {
return NoMutation();
}
};

fs::FileSystemRegistrar kExampleFileSystemModule{
"example",
[](const arrow::util::Uri& uri, const io::IOContext& io_context,
std::string* out_path) -> Result<std::shared_ptr<fs::FileSystem>> {
auto fs = std::make_shared<ExampleFileSystem>(io_context);
if (out_path) {
ARROW_ASSIGN_OR_RAISE(*out_path, fs->PathFromUri(uri.ToString()));
}
return fs;
},
};
55 changes: 55 additions & 0 deletions cpp/examples/arrow/filesystem_usage_example.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include <iostream>

#include <arrow/filesystem/filesystem.h>
#include <arrow/result.h>

namespace fs = arrow::fs;

// Demonstrate dynamically loading a user-defined Arrow FileSystem

arrow::Status Execute() {
ARROW_RETURN_NOT_OK(arrow::fs::LoadFileSystemFactories(FILESYSTEM_EXAMPLE_LIBPATH));

std::string uri = "example:///example_file";
std::cout << "Uri: " << uri << std::endl;

std::string path;
ARROW_ASSIGN_OR_RAISE(auto fs, arrow::fs::FileSystemFromUri(uri, &path));
std::cout << "Path: " << path << std::endl;

fs::FileSelector sel;
sel.base_dir = "/";
ARROW_ASSIGN_OR_RAISE(auto infos, fs->GetFileInfo(sel));

std::cout << "Root directory contains:" << std::endl;
for (const auto& info : infos) {
std::cout << "- " << info << std::endl;
}
return arrow::Status::OK();
}

int main() {
auto status = Execute();
if (!status.ok()) {
std::cerr << "Error occurred : " << status.message() << std::endl;
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
4 changes: 2 additions & 2 deletions cpp/src/arrow/dataset/partition.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ namespace dataset {
namespace {
/// Apply UriUnescape, then ensure the results are valid UTF-8.
Result<std::string> SafeUriUnescape(std::string_view encoded) {
auto decoded = ::arrow::internal::UriUnescape(encoded);
auto decoded = ::arrow::util::UriUnescape(encoded);
if (!util::ValidateUTF8(decoded)) {
return Status::Invalid("Partition segment was not valid UTF-8 after URL decoding: ",
encoded);
Expand Down Expand Up @@ -755,7 +755,7 @@ Result<PartitionPathFormat> HivePartitioning::FormatValues(
// field_index <-> path nesting relation
segments[i] = name + "=" + hive_options_.null_fallback;
} else {
segments[i] = name + "=" + arrow::internal::UriEscape(values[i]->ToString());
segments[i] = name + "=" + arrow::util::UriEscape(values[i]->ToString());
}
}

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/dataset/partition_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -935,7 +935,7 @@ TEST_F(TestPartitioning, WriteHiveWithSlashesInValues) {
"experiment/A/f.csv", "experiment/B/f.csv", "experiment/C/k.csv",
"experiment/M/i.csv"};
for (auto partition : unique_partitions) {
encoded_paths.push_back("part=" + arrow::internal::UriEscape(partition));
encoded_paths.push_back("part=" + arrow::util::UriEscape(partition));
}

ASSERT_EQ(all_dirs.size(), encoded_paths.size());
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/engine/substrait/relation_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ namespace arrow {
using internal::checked_cast;
using internal::StartsWith;
using internal::ToChars;
using internal::UriFromAbsolutePath;
using util::UriFromAbsolutePath;

namespace engine {

Expand Down Expand Up @@ -463,7 +463,7 @@ Result<DeclarationInfo> FromProto(const substrait::Rel& rel, const ExtensionSet&
}

// Extract and parse the read relation's source URI
::arrow::internal::Uri item_uri;
::arrow::util::Uri item_uri;
switch (item.path_type_case()) {
case substrait::ReadRel::LocalFiles::FileOrFiles::kUriPath:
RETURN_NOT_OK(item_uri.Parse(item.uri_path()));
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/arrow/filesystem/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ add_arrow_test(filesystem-test
filesystem_test.cc
localfs_test.cc
EXTRA_LABELS
filesystem)
filesystem
DEFINITIONS
ARROW_FILESYSTEM_EXAMPLE_LIBPATH="$<TARGET_FILE:arrow_filesystem_example>")

if(ARROW_BUILD_BENCHMARKS)
add_arrow_benchmark(localfs_benchmark
Expand Down
16 changes: 12 additions & 4 deletions cpp/src/arrow/filesystem/azurefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,15 +196,23 @@ class ARROW_EXPORT AzureFileSystem : public FileSystem {

bool Equals(const FileSystem& other) const override;

/// \cond FALSE
using FileSystem::CreateDir;
using FileSystem::DeleteDirContents;
using FileSystem::GetFileInfo;
using FileSystem::OpenAppendStream;
using FileSystem::OpenOutputStream;
/// \endcond

Result<FileInfo> GetFileInfo(const std::string& path) override;

Result<FileInfoVector> GetFileInfo(const FileSelector& select) override;

Status CreateDir(const std::string& path, bool recursive = true) override;
Status CreateDir(const std::string& path, bool recursive) override;

Status DeleteDir(const std::string& path) override;

Status DeleteDirContents(const std::string& path, bool missing_dir_ok = false) override;
Status DeleteDirContents(const std::string& path, bool missing_dir_ok) override;

Status DeleteRootDirContents() override;

Expand Down Expand Up @@ -246,11 +254,11 @@ class ARROW_EXPORT AzureFileSystem : public FileSystem {

Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
const std::string& path,
const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
const std::shared_ptr<const KeyValueMetadata>& metadata) override;

Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
const std::string& path,
const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
const std::shared_ptr<const KeyValueMetadata>& metadata) override;
};

} // namespace arrow::fs
Loading