Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ci/scripts/integration_hdfs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HADOOP_HOME/lib/native/
# execute cpp tests
pushd ${build_dir}
debug/arrow-io-hdfs-test
debug/arrow-hdfs-test
popd

# cannot use --pyargs with custom arguments like --hdfs or --only-hdfs, because
Expand Down
12 changes: 12 additions & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,14 @@ endif()
if(ARROW_FILESYSTEM)
add_subdirectory(filesystem)

if(ARROW_HDFS)
add_definitions(-DARROW_HDFS)
endif()

if(ARROW_S3)
add_definitions(-DARROW_S3)
endif()

list(APPEND ARROW_SRCS
filesystem/filesystem.cc
filesystem/localfs.cc
Expand All @@ -306,6 +314,10 @@ if(ARROW_FILESYSTEM)
list(APPEND ARROW_SRCS filesystem/s3fs.cc)
endif()

if(ARROW_HDFS)
list(APPEND ARROW_SRCS filesystem/hdfs.cc)
endif()

list(APPEND ARROW_TESTING_SRCS filesystem/test_util.cc)
endif()

Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/filesystem/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,7 @@ if(ARROW_S3)
add_dependencies(arrow-tests arrow-s3fs-narrative-test)
endif()
endif()

if(ARROW_HDFS)
add_arrow_test(hdfs_test)
endif()
1 change: 1 addition & 0 deletions cpp/src/arrow/filesystem/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#define ARROW_FILESYSTEM_API_H

#include "arrow/filesystem/filesystem.h" // IWYU pragma: export
#include "arrow/filesystem/hdfs.h" // IWYU pragma: export
#include "arrow/filesystem/localfs.h" // IWYU pragma: export
#include "arrow/filesystem/mockfs.h" // IWYU pragma: export
#include "arrow/filesystem/s3fs.h" // IWYU pragma: export
Expand Down
62 changes: 62 additions & 0 deletions cpp/src/arrow/filesystem/filesystem.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,30 @@
#include <utility>

#include "arrow/filesystem/filesystem.h"
#ifdef ARROW_HDFS
#include "arrow/filesystem/hdfs.h"
#endif
#include "arrow/filesystem/localfs.h"
#include "arrow/filesystem/mockfs.h"
#include "arrow/filesystem/path_util.h"
#include "arrow/filesystem/util_internal.h"
#include "arrow/io/slow.h"
#include "arrow/util/logging.h"
#include "arrow/util/macros.h"
#include "arrow/util/uri.h"

namespace arrow {

using internal::Uri;

namespace fs {

using internal::ConcatAbstractPath;
using internal::EnsureTrailingSlash;
using internal::GetAbstractPathParent;
using internal::kSep;
using internal::RemoveLeadingSlash;
using internal::RemoveTrailingSlash;

std::string ToString(FileType ftype) {
switch (ftype) {
Expand Down Expand Up @@ -328,5 +340,55 @@ Status SlowFileSystem::OpenAppendStream(const std::string& path,
return base_fs_->OpenAppendStream(path, out);
}

Status FileSystemFromUri(const std::string& uri_string,
std::shared_ptr<FileSystem>* out_fs, std::string* out_path) {
Uri uri;
RETURN_NOT_OK(uri.Parse(uri_string));
if (out_path != nullptr) {
*out_path = std::string(uri.path());
}

const auto scheme = uri.scheme();
#ifdef _WIN32
if (scheme.size() == 1) {
// Assuming a plain local path starting with a drive letter, e.g "C:/..."
if (out_path != nullptr) {
*out_path = uri_string;
}
*out_fs = std::make_shared<LocalFileSystem>();
return Status::OK();
}
#endif
if (scheme == "" || scheme == "file") {
*out_fs = std::make_shared<LocalFileSystem>();
return Status::OK();
}

if (scheme == "hdfs") {
#ifdef ARROW_HDFS
ARROW_ASSIGN_OR_RAISE(auto options, HdfsOptions::FromUri(uri));
ARROW_ASSIGN_OR_RAISE(auto hdfs, HadoopFileSystem::Make(options));
*out_fs = hdfs;
return Status::OK();
#else
return Status::NotImplemented("Arrow compiled without HDFS support");
#endif
}

// Other filesystems below do not have an absolute / relative path distinction,
// normalize path by removing leading slash.
// XXX perhaps each filesystem should have a path normalization method?
if (out_path != nullptr) {
*out_path = std::string(RemoveLeadingSlash(*out_path));
}
if (scheme == "mock") {
*out_fs = std::make_shared<internal::MockFileSystem>(internal::CurrentTimePoint());
return Status::OK();
}

// TODO add support for S3 URIs
return Status::Invalid("Unrecognized filesystem type in URI: ", uri_string);
}

} // namespace fs
} // namespace arrow
14 changes: 14 additions & 0 deletions cpp/src/arrow/filesystem/filesystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <vector>

#include "arrow/status.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"

// The Windows API defines macros from *File resolving to either
Expand Down Expand Up @@ -318,5 +319,18 @@ class ARROW_EXPORT SlowFileSystem : public FileSystem {
std::shared_ptr<io::LatencyGenerator> latencies_;
};

/// \brief EXPERIMENTAL: Create a new FileSystem by URI
///
/// A scheme-less URI is considered a local filesystem path.
/// Recognized schemes are "file", "mock" and "hdfs".
///
/// \param[in] uri a URI-based path, ex: file:///some/local/path
/// \param[out] out_fs FileSystem instance.
/// \param[out] out_path (optional) Path inside the filesystem.
/// \return Status
ARROW_EXPORT
Status FileSystemFromUri(const std::string& uri, std::shared_ptr<FileSystem>* out_fs,
std::string* out_path = NULLPTR);

} // namespace fs
} // namespace arrow
67 changes: 67 additions & 0 deletions cpp/src/arrow/filesystem/filesystem_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,51 @@ TEST(PathUtil, RemoveTrailingSlash) {
ASSERT_EQ("/abc/def", std::string(RemoveTrailingSlash("/abc/def//")));
}

TEST(PathUtil, EnsureLeadingSlash) {
ASSERT_EQ("/", EnsureLeadingSlash(""));
ASSERT_EQ("/", EnsureLeadingSlash("/"));
ASSERT_EQ("/abc", EnsureLeadingSlash("abc"));
ASSERT_EQ("/abc/", EnsureLeadingSlash("abc/"));
ASSERT_EQ("/abc", EnsureLeadingSlash("/abc"));
ASSERT_EQ("/abc/", EnsureLeadingSlash("/abc/"));
}

TEST(PathUtil, RemoveLeadingSlash) {
ASSERT_EQ("", std::string(RemoveLeadingSlash("")));
ASSERT_EQ("", std::string(RemoveLeadingSlash("/")));
ASSERT_EQ("", std::string(RemoveLeadingSlash("//")));
ASSERT_EQ("abc/def", std::string(RemoveLeadingSlash("abc/def")));
ASSERT_EQ("abc/def", std::string(RemoveLeadingSlash("/abc/def")));
ASSERT_EQ("abc/def", std::string(RemoveLeadingSlash("//abc/def")));
ASSERT_EQ("abc/def/", std::string(RemoveLeadingSlash("abc/def/")));
ASSERT_EQ("abc/def/", std::string(RemoveLeadingSlash("/abc/def/")));
ASSERT_EQ("abc/def/", std::string(RemoveLeadingSlash("//abc/def/")));
}

TEST(PathUtil, MakeAbstractPathRelative) {
std::string s;

ASSERT_OK_AND_EQ("", MakeAbstractPathRelative("/", "/"));
ASSERT_OK_AND_EQ("foo/bar", MakeAbstractPathRelative("/", "/foo/bar"));

ASSERT_OK_AND_EQ("", MakeAbstractPathRelative("/foo", "/foo"));
ASSERT_OK_AND_EQ("", MakeAbstractPathRelative("/foo/", "/foo"));
ASSERT_OK_AND_EQ("", MakeAbstractPathRelative("/foo", "/foo/"));
ASSERT_OK_AND_EQ("", MakeAbstractPathRelative("/foo/", "/foo/"));

ASSERT_OK_AND_EQ("bar", MakeAbstractPathRelative("/foo", "/foo/bar"));
ASSERT_OK_AND_EQ("bar", MakeAbstractPathRelative("/foo/", "/foo/bar"));
ASSERT_OK_AND_EQ("bar/", MakeAbstractPathRelative("/foo/", "/foo/bar/"));

// Not relative to base
ASSERT_RAISES(Invalid, MakeAbstractPathRelative("/xxx", "/foo/bar"));
ASSERT_RAISES(Invalid, MakeAbstractPathRelative("/xxx", "/xxxx"));

// Base is not absolute
ASSERT_RAISES(Invalid, MakeAbstractPathRelative("foo/bar", "foo/bar/baz"));
ASSERT_RAISES(Invalid, MakeAbstractPathRelative("", "foo/bar/baz"));
}

////////////////////////////////////////////////////////////////////////////
// Generic MockFileSystem tests

Expand Down Expand Up @@ -381,6 +426,28 @@ TEST_F(TestMockFS, Make) {
CheckFiles({{"A/a", time_, ""}});
}

TEST_F(TestMockFS, FileSystemFromUri) {
std::string path;
ASSERT_OK(FileSystemFromUri("mock:", &fs_, &path));
ASSERT_EQ(path, "");
CheckDirs({}); // Ensures it's a MockFileSystem
ASSERT_OK(FileSystemFromUri("mock:foo/bar", &fs_, &path));
ASSERT_EQ(path, "foo/bar");
CheckDirs({});
ASSERT_OK(FileSystemFromUri("mock:/foo/bar", &fs_, &path));
ASSERT_EQ(path, "foo/bar");
CheckDirs({});
ASSERT_OK(FileSystemFromUri("mock:/foo/bar/?q=xxx", &fs_, &path));
ASSERT_EQ(path, "foo/bar/");
CheckDirs({});
ASSERT_OK(FileSystemFromUri("mock:///foo/bar", &fs_, &path));
ASSERT_EQ(path, "foo/bar");
CheckDirs({});
ASSERT_OK(FileSystemFromUri("mock:///foo/bar?q=zzz", &fs_, &path));
ASSERT_EQ(path, "foo/bar");
CheckDirs({});
}

////////////////////////////////////////////////////////////////////////////
// Concrete SubTreeFileSystem tests

Expand Down
Loading