diff --git a/cpp/src/arrow/filesystem/gcsfs.cc b/cpp/src/arrow/filesystem/gcsfs.cc index 82d2439a607..da7b856be47 100644 --- a/cpp/src/arrow/filesystem/gcsfs.cc +++ b/cpp/src/arrow/filesystem/gcsfs.cc @@ -887,6 +887,7 @@ Status GcsFileSystem::CopyFile(const std::string& src, const std::string& dest) Result> GcsFileSystem::OpenInputStream( const std::string& path) { + ARROW_RETURN_NOT_OK(internal::AssertNoTrailingSlash(path)); ARROW_ASSIGN_OR_RAISE(auto p, GcsPath::FromString(path)); return impl_->OpenInputStream(p, gcs::Generation(), gcs::ReadFromOffset()); } @@ -897,12 +898,14 @@ Result> GcsFileSystem::OpenInputStream( return Status::IOError("Cannot open directory '", info.path(), "' as an input stream"); } + ARROW_RETURN_NOT_OK(internal::AssertNoTrailingSlash(info.path())); ARROW_ASSIGN_OR_RAISE(auto p, GcsPath::FromString(info.path())); return impl_->OpenInputStream(p, gcs::Generation(), gcs::ReadFromOffset()); } Result> GcsFileSystem::OpenInputFile( const std::string& path) { + ARROW_RETURN_NOT_OK(internal::AssertNoTrailingSlash(path)); ARROW_ASSIGN_OR_RAISE(auto p, GcsPath::FromString(path)); auto metadata = impl_->GetObjectMetadata(p); ARROW_GCS_RETURN_NOT_OK(metadata.status()); @@ -924,6 +927,7 @@ Result> GcsFileSystem::OpenInputFile( return Status::IOError("Cannot open directory '", info.path(), "' as an input stream"); } + ARROW_RETURN_NOT_OK(internal::AssertNoTrailingSlash(info.path())); ARROW_ASSIGN_OR_RAISE(auto p, GcsPath::FromString(info.path())); auto metadata = impl_->GetObjectMetadata(p); ARROW_GCS_RETURN_NOT_OK(metadata.status()); @@ -941,6 +945,7 @@ Result> GcsFileSystem::OpenInputFile( Result> GcsFileSystem::OpenOutputStream( const std::string& path, const std::shared_ptr& metadata) { + ARROW_RETURN_NOT_OK(internal::AssertNoTrailingSlash(path)); ARROW_ASSIGN_OR_RAISE(auto p, GcsPath::FromString(path)); return impl_->OpenOutputStream(p, metadata); } diff --git a/cpp/src/arrow/filesystem/mockfs.cc b/cpp/src/arrow/filesystem/mockfs.cc index 31dba27326b..69e49b32043 100644 --- a/cpp/src/arrow/filesystem/mockfs.cc +++ b/cpp/src/arrow/filesystem/mockfs.cc @@ -382,6 +382,7 @@ class MockFileSystem::Impl { Result> OpenOutputStream( const std::string& path, bool append, const std::shared_ptr& metadata) { + ARROW_RETURN_NOT_OK(internal::AssertNoTrailingSlash(path)); auto parts = SplitAbstractPath(path); RETURN_NOT_OK(ValidateAbstractPathParts(parts)); @@ -412,6 +413,7 @@ class MockFileSystem::Impl { } Result> OpenInputReader(const std::string& path) { + ARROW_RETURN_NOT_OK(internal::AssertNoTrailingSlash(path)); auto parts = SplitAbstractPath(path); RETURN_NOT_OK(ValidateAbstractPathParts(parts)); @@ -727,6 +729,7 @@ Result> MockFileSystem::OpenOutputStream( Result> MockFileSystem::OpenAppendStream( const std::string& path, const std::shared_ptr& metadata) { + ARROW_RETURN_NOT_OK(internal::AssertNoTrailingSlash(path)); RETURN_NOT_OK(ValidatePath(path)); auto guard = impl_->lock_guard(); diff --git a/cpp/src/arrow/filesystem/path_util.cc b/cpp/src/arrow/filesystem/path_util.cc index 04484dd5bda..1afc3b2a89b 100644 --- a/cpp/src/arrow/filesystem/path_util.cc +++ b/cpp/src/arrow/filesystem/path_util.cc @@ -19,6 +19,7 @@ #include #include "arrow/filesystem/path_util.h" +#include "arrow/filesystem/util_internal.h" #include "arrow/result.h" #include "arrow/status.h" #include "arrow/util/logging.h" @@ -139,6 +140,13 @@ util::string_view RemoveLeadingSlash(util::string_view key) { return key; } +Status AssertNoTrailingSlash(util::string_view key) { + if (key.back() == '/') { + return NotAFile(key); + } + return Status::OK(); +} + Result MakeAbstractPathRelative(const std::string& base, const std::string& path) { if (base.empty() || base.front() != kSep) { diff --git a/cpp/src/arrow/filesystem/path_util.h b/cpp/src/arrow/filesystem/path_util.h index 75745ee44ce..d4083d3b5c9 100644 --- a/cpp/src/arrow/filesystem/path_util.h +++ b/cpp/src/arrow/filesystem/path_util.h @@ -72,6 +72,9 @@ std::string EnsureTrailingSlash(util::string_view s); ARROW_EXPORT util::string_view RemoveTrailingSlash(util::string_view s); +ARROW_EXPORT +Status AssertNoTrailingSlash(util::string_view s); + ARROW_EXPORT bool IsAncestorOf(util::string_view ancestor, util::string_view descendant); diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index 7a3df543506..dd3973ba771 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -2169,6 +2169,7 @@ class S3FileSystem::Impl : public std::enable_shared_from_this> OpenInputFile(const std::string& s, S3FileSystem* fs) { + ARROW_RETURN_NOT_OK(internal::AssertNoTrailingSlash(s)); ARROW_ASSIGN_OR_RAISE(auto path, S3Path::FromString(s)); RETURN_NOT_OK(ValidateFilePath(path)); @@ -2179,6 +2180,7 @@ class S3FileSystem::Impl : public std::enable_shared_from_this> OpenInputFile(const FileInfo& info, S3FileSystem* fs) { + ARROW_RETURN_NOT_OK(internal::AssertNoTrailingSlash(info.path())); if (info.type() == FileType::NotFound) { return ::arrow::fs::internal::PathNotFound(info.path()); } @@ -2537,6 +2539,7 @@ Result> S3FileSystem::OpenInputFile( Result> S3FileSystem::OpenOutputStream( const std::string& s, const std::shared_ptr& metadata) { + ARROW_RETURN_NOT_OK(internal::AssertNoTrailingSlash(s)); ARROW_ASSIGN_OR_RAISE(auto path, S3Path::FromString(s)); RETURN_NOT_OK(ValidateFilePath(path)); diff --git a/cpp/src/arrow/filesystem/test_util.cc b/cpp/src/arrow/filesystem/test_util.cc index 3e197e110b1..d72386a64a9 100644 --- a/cpp/src/arrow/filesystem/test_util.cc +++ b/cpp/src/arrow/filesystem/test_util.cc @@ -899,6 +899,9 @@ void GenericFileSystemTest::TestOpenOutputStream(FileSystem* fs) { ASSERT_RAISES(Invalid, stream->Write("x")); // Stream is closed + // Trailing slash rejected + ASSERT_RAISES(IOError, fs->OpenOutputStream("CD/ghi/")); + // Storing metadata along file auto metadata = KeyValueMetadata::Make({"Content-Type", "Content-Language"}, {"x-arrow/filesystem-test", "fr_FR"}); @@ -931,6 +934,9 @@ void GenericFileSystemTest::TestOpenAppendStream(FileSystem* fs) { std::shared_ptr stream; + // Trailing slash rejected + ASSERT_RAISES(IOError, fs->OpenAppendStream("abc/")); + if (allow_append_to_new_file()) { ASSERT_OK_AND_ASSIGN(stream, fs->OpenAppendStream("abc")); } else { @@ -974,6 +980,9 @@ void GenericFileSystemTest::TestOpenInputStream(FileSystem* fs) { ASSERT_OK(stream->Close()); ASSERT_RAISES(Invalid, stream->Read(1)); // Stream is closed + // Trailing slash rejected + ASSERT_RAISES(IOError, fs->OpenInputStream("AB/abc/")); + // File does not exist AssertRaisesWithErrno(ENOENT, fs->OpenInputStream("AB/def")); AssertRaisesWithErrno(ENOENT, fs->OpenInputStream("def")); @@ -1009,6 +1018,15 @@ void GenericFileSystemTest::TestOpenInputStreamWithFileInfo(FileSystem* fs) { info.set_type(FileType::Unknown); AssertRaisesWithErrno(ENOENT, fs->OpenInputStream(info)); + // Trailing slash rejected + auto maybe_info = fs->GetFileInfo("AB/abc/"); + if (maybe_info.ok()) { + ASSERT_OK_AND_ASSIGN(info, maybe_info); + ASSERT_RAISES(IOError, fs->OpenInputStream(info)); + } else { + ASSERT_RAISES(IOError, maybe_info); + } + // Cannot open directory ASSERT_OK_AND_ASSIGN(info, fs->GetFileInfo("AB")); ASSERT_RAISES(IOError, fs->OpenInputStream(info)); @@ -1044,6 +1062,9 @@ void GenericFileSystemTest::TestOpenInputFile(FileSystem* fs) { ASSERT_OK(file->Close()); ASSERT_RAISES(Invalid, file->ReadAt(1, 1)); // Stream is closed + // Trailing slash rejected + ASSERT_RAISES(IOError, fs->OpenInputFile("AB/abc/")); + // File does not exist AssertRaisesWithErrno(ENOENT, fs->OpenInputFile("AB/def")); AssertRaisesWithErrno(ENOENT, fs->OpenInputFile("def")); @@ -1067,6 +1088,9 @@ void GenericFileSystemTest::TestOpenInputFileAsync(FileSystem* fs) { // File does not exist AssertRaisesWithErrno(ENOENT, fs->OpenInputFileAsync("AB/def").result()); + + // Trailing slash rejected + ASSERT_RAISES(IOError, fs->OpenInputFileAsync("AB/abc/").result()); } void GenericFileSystemTest::TestOpenInputFileWithFileInfo(FileSystem* fs) { @@ -1096,6 +1120,15 @@ void GenericFileSystemTest::TestOpenInputFileWithFileInfo(FileSystem* fs) { info.set_type(FileType::Unknown); AssertRaisesWithErrno(ENOENT, fs->OpenInputFile(info)); + // Trailing slash rejected + auto maybe_info = fs->GetFileInfo("AB/abc/"); + if (maybe_info.ok()) { + ASSERT_OK_AND_ASSIGN(info, maybe_info); + ASSERT_RAISES(IOError, fs->OpenInputFile(info)); + } else { + ASSERT_RAISES(IOError, maybe_info); + } + // Cannot open directory ASSERT_OK_AND_ASSIGN(info, fs->GetFileInfo("AB")); ASSERT_RAISES(IOError, fs->OpenInputFile(info)); diff --git a/cpp/src/arrow/filesystem/util_internal.cc b/cpp/src/arrow/filesystem/util_internal.cc index 4851fccd55d..0d2ad709026 100644 --- a/cpp/src/arrow/filesystem/util_internal.cc +++ b/cpp/src/arrow/filesystem/util_internal.cc @@ -56,21 +56,21 @@ Status CopyStream(const std::shared_ptr& src, return Status::OK(); } -Status PathNotFound(const std::string& path) { +Status PathNotFound(util::string_view path) { return Status::IOError("Path does not exist '", path, "'") .WithDetail(StatusDetailFromErrno(ENOENT)); } -Status NotADir(const std::string& path) { +Status NotADir(util::string_view path) { return Status::IOError("Not a directory: '", path, "'") .WithDetail(StatusDetailFromErrno(ENOTDIR)); } -Status NotAFile(const std::string& path) { +Status NotAFile(util::string_view path) { return Status::IOError("Not a regular file: '", path, "'"); } -Status InvalidDeleteDirContents(const std::string& path) { +Status InvalidDeleteDirContents(util::string_view path) { return Status::Invalid( "DeleteDirContents called on invalid path '", path, "'. ", "If you wish to delete the root directory's contents, call DeleteRootDirContents."); diff --git a/cpp/src/arrow/filesystem/util_internal.h b/cpp/src/arrow/filesystem/util_internal.h index 80a29feb850..75a2d3a2ef5 100644 --- a/cpp/src/arrow/filesystem/util_internal.h +++ b/cpp/src/arrow/filesystem/util_internal.h @@ -23,6 +23,7 @@ #include "arrow/filesystem/filesystem.h" #include "arrow/io/interfaces.h" #include "arrow/status.h" +#include "arrow/util/string_view.h" #include "arrow/util/visibility.h" namespace arrow { @@ -38,16 +39,16 @@ Status CopyStream(const std::shared_ptr& src, const io::IOContext& io_context); ARROW_EXPORT -Status PathNotFound(const std::string& path); +Status PathNotFound(util::string_view path); ARROW_EXPORT -Status NotADir(const std::string& path); +Status NotADir(util::string_view path); ARROW_EXPORT -Status NotAFile(const std::string& path); +Status NotAFile(util::string_view path); ARROW_EXPORT -Status InvalidDeleteDirContents(const std::string& path); +Status InvalidDeleteDirContents(util::string_view path); /// \brief Return files matching the glob pattern on the filesystem /// diff --git a/r/R/io.R b/r/R/io.R index 8e72187b431..82e3847df5e 100644 --- a/r/R/io.R +++ b/r/R/io.R @@ -241,7 +241,9 @@ mmap_open <- function(path, mode = c("read", "write", "readwrite")) { make_readable_file <- function(file, mmap = TRUE, compression = NULL, filesystem = NULL) { if (inherits(file, "SubTreeFileSystem")) { filesystem <- file$base_fs - file <- file$base_path + # SubTreeFileSystem adds a slash to base_path, but filesystems will reject file names + # with trailing slashes, so we need to remove it here. + file <- sub("/$", "", file$base_path) } if (is.string(file)) { if (is_url(file)) { @@ -303,7 +305,9 @@ make_output_stream <- function(x, filesystem = NULL, compression = NULL) { if (inherits(x, "SubTreeFileSystem")) { filesystem <- x$base_fs - x <- x$base_path + # SubTreeFileSystem adds a slash to base_path, but filesystems will reject file names + # with trailing slashes, so we need to remove it here. + x <- sub("/$", "", x$base_path) } else if (is_url(x)) { fs_and_path <- FileSystem$from_uri(x) filesystem <- fs_and_path$fs @@ -317,7 +321,7 @@ make_output_stream <- function(x, filesystem = NULL, compression = NULL) { assert_that(is.string(x)) if (is.null(filesystem) && is_compressed(compression)) { - CompressedOutputStream$create(x) ##compressed local + CompressedOutputStream$create(x) ## compressed local } else if (is.null(filesystem) && !is_compressed(compression)) { FileOutputStream$create(x) ## uncompressed local } else if (!is.null(filesystem) && is_compressed(compression)) { @@ -333,7 +337,7 @@ detect_compression <- function(path) { } # Remove any trailing slashes, which FileSystem$from_uri may add - path <- gsub("/$", "", path) + path <- sub("/$", "", path) switch(tools::file_ext(path), bz2 = "bz2",