diff --git a/cpp/src/arrow/io/io-file-test.cc b/cpp/src/arrow/io/io-file-test.cc index d3ef90800f8..78ca0739ed5 100644 --- a/cpp/src/arrow/io/io-file-test.cc +++ b/cpp/src/arrow/io/io-file-test.cc @@ -322,6 +322,9 @@ TEST_F(TestReadableFile, SeekTellSize) { ASSERT_OK(file_->GetSize(&size)); ASSERT_EQ(8, size); + ASSERT_OK(file_->Tell(&position)); + ASSERT_EQ(100, position); + // does not support zero copy ASSERT_FALSE(file_->supports_zero_copy()); } @@ -538,6 +541,12 @@ TEST_F(TestPipeIO, TestWrite) { ASSERT_EQ(bytes_read, 0); } +TEST_F(TestPipeIO, ReadableFileFails) { + // ReadableFile fails on non-seekable fd + std::shared_ptr file; + ASSERT_RAISES(IOError, ReadableFile::Open(r_, &file)); +} + // ---------------------------------------------------------------------- // Memory map tests @@ -553,7 +562,7 @@ TEST_F(TestMemoryMappedFile, ZeroSizeFlie) { std::shared_ptr result; ASSERT_OK(InitMemoryMap(0, path, &result)); - int64_t size = 0; + int64_t size = -1; ASSERT_OK(result->Tell(&size)); ASSERT_EQ(0, size); } @@ -566,7 +575,7 @@ TEST_F(TestMemoryMappedFile, WriteRead) { const int reps = 5; - std::string path = "ipc-write-read-test"; + std::string path = "io-memory-map-write-read-test"; std::shared_ptr result; ASSERT_OK(InitMemoryMap(reps * buffer_size, path, &result)); @@ -582,6 +591,20 @@ TEST_F(TestMemoryMappedFile, WriteRead) { } } +TEST_F(TestMemoryMappedFile, GetSize) { + std::string path = "io-memory-map-get-size"; + std::shared_ptr result; + ASSERT_OK(InitMemoryMap(16384, path, &result)); + + int64_t size = -1; + ASSERT_OK(result->GetSize(&size)); + ASSERT_EQ(16384, size); + + int64_t position = -1; + ASSERT_OK(result->Tell(&position)); + ASSERT_EQ(0, position); +} + TEST_F(TestMemoryMappedFile, ReadOnly) { const int64_t buffer_size = 1024; std::vector buffer(buffer_size); diff --git a/cpp/src/arrow/util/io-util.cc b/cpp/src/arrow/util/io-util.cc index 10a30df5e57..03edc183886 100644 --- a/cpp/src/arrow/util/io-util.cc +++ b/cpp/src/arrow/util/io-util.cc @@ -255,25 +255,32 @@ Status FileSeek(int fd, int64_t pos, int whence) { Status FileSeek(int fd, int64_t pos) { return FileSeek(fd, pos, SEEK_SET); } Status FileGetSize(int fd, int64_t* size) { - int64_t ret; - - // XXX Should use fstat() instead, but this function also ensures the - // file is seekable - - // Save current position - int64_t current_position = lseek64_compat(fd, 0, SEEK_CUR); - CHECK_LSEEK(current_position); - - // Move to end of the file, which returns the file length - ret = lseek64_compat(fd, 0, SEEK_END); - CHECK_LSEEK(ret); - - *size = ret; +#if defined(_MSC_VER) + struct __stat64 st; +#else + struct stat st; +#endif + st.st_size = -1; - // Restore file position - ret = lseek64_compat(fd, current_position, SEEK_SET); - CHECK_LSEEK(ret); +#if defined(_MSC_VER) + int ret = _fstat64(fd, &st); +#else + int ret = fstat(fd, &st); +#endif + if (ret == -1) { + return Status::IOError("error stat()ing file"); + } + if (st.st_size == 0) { + // Maybe the file doesn't support getting its size, double-check by + // trying to tell() (seekable files usually have a size, while + // non-seekable files don't) + int64_t position; + RETURN_NOT_OK(FileTell(fd, &position)); + } else if (st.st_size < 0) { + return Status::IOError("error getting file size"); + } + *size = st.st_size; return Status::OK(); } diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py index 15116007777..02851bee08d 100644 --- a/python/pyarrow/tests/test_io.py +++ b/python/pyarrow/tests/test_io.py @@ -100,6 +100,8 @@ def test_python_file_read(): assert v == b'sample data' assert len(v) == 11 + assert f.size() == len(data) + f.close()