diff --git a/r/NAMESPACE b/r/NAMESPACE index cc5961e5ba1..65d60d846f4 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -8,6 +8,12 @@ S3method("==","arrow::RecordBatch") S3method("==","arrow::ipc::Message") S3method(BufferReader,"arrow::Buffer") S3method(BufferReader,default) +S3method(FeatherTableReader,"arrow::io::RandomAccessFile") +S3method(FeatherTableReader,"arrow::ipc::feather::TableReader") +S3method(FeatherTableReader,character) +S3method(FeatherTableReader,default) +S3method(FeatherTableReader,fs_path) +S3method(FeatherTableWriter,"arrow::io::OutputStream") S3method(FixedSizeBufferWriter,"arrow::Buffer") S3method(FixedSizeBufferWriter,default) S3method(MessageReader,"arrow::io::InputStream") @@ -33,12 +39,6 @@ S3method(buffer,default) S3method(buffer,integer) S3method(buffer,numeric) S3method(buffer,raw) -S3method(feather_table_reader,"arrow::io::RandomAccessFile") -S3method(feather_table_reader,"arrow::ipc::feather::TableReader") -S3method(feather_table_reader,character) -S3method(feather_table_reader,default) -S3method(feather_table_reader,fs_path) -S3method(feather_table_writer,"arrow::io::OutputStream") S3method(length,"arrow::Array") S3method(names,"arrow::RecordBatch") S3method(print,"arrow-enum") @@ -70,6 +70,8 @@ S3method(write_feather_RecordBatch,fs_path) export(BufferOutputStream) export(BufferReader) export(DateUnit) +export(FeatherTableReader) +export(FeatherTableWriter) export(FileMode) export(FileOutputStream) export(FixedSizeBufferWriter) @@ -95,8 +97,6 @@ export(date64) export(decimal) export(default_memory_pool) export(dictionary) -export(feather_table_reader) -export(feather_table_writer) export(field) export(float16) export(float32) diff --git a/r/R/RcppExports.R b/r/R/RcppExports.R index ccf854927b7..0310eab2027 100644 --- a/r/R/RcppExports.R +++ b/r/R/RcppExports.R @@ -445,8 +445,8 @@ ipc___feather___TableReader__GetColumn <- function(reader, i) { .Call(`_arrow_ipc___feather___TableReader__GetColumn`, reader, i) } -ipc___feather___TableReader__Read <- function(reader) { - .Call(`_arrow_ipc___feather___TableReader__Read`, reader) +ipc___feather___TableReader__Read <- function(reader, columns) { + .Call(`_arrow_ipc___feather___TableReader__Read`, reader, columns) } ipc___feather___TableReader__Open <- function(stream) { diff --git a/r/R/feather.R b/r/R/feather.R index bae71d31bc1..064652145c8 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -35,7 +35,9 @@ num_columns = function() ipc___feather___TableReader__num_columns(self), GetColumnName = function(i) ipc___feather___TableReader__GetColumnName(self, i), GetColumn = function(i) shared_ptr(`arrow::Column`, ipc___feather___TableReader__GetColumn(self, i)), - Read = function() shared_ptr(`arrow::Table`, ipc___feather___TableReader__Read(self)) + Read = function(columns) { + shared_ptr(`arrow::Table`, ipc___feather___TableReader__Read(self, columns)) + } ) ) @@ -44,12 +46,12 @@ #' @param stream an OutputStream #' #' @export -feather_table_writer <- function(stream) { - UseMethod("feather_table_writer") +FeatherTableWriter <- function(stream) { + UseMethod("FeatherTableWriter") } #' @export -`feather_table_writer.arrow::io::OutputStream` <- function(stream){ +`FeatherTableWriter.arrow::io::OutputStream` <- function(stream){ unique_ptr(`arrow::ipc::feather::TableWriter`, ipc___feather___TableWriter__Open(stream)) } @@ -107,7 +109,7 @@ write_feather_RecordBatch <- function(data, stream) { #' @export #' @method write_feather_RecordBatch arrow::io::OutputStream `write_feather_RecordBatch.arrow::io::OutputStream` <- function(data, stream) { - ipc___TableWriter__RecordBatch__WriteFeather(feather_table_writer(stream), data) + ipc___TableWriter__RecordBatch__WriteFeather(FeatherTableWriter(stream), data) } #' A arrow::ipc::feather::TableReader to read from a file @@ -117,44 +119,50 @@ write_feather_RecordBatch <- function(data, stream) { #' @param ... extra parameters #' #' @export -feather_table_reader <- function(file, mmap = TRUE, ...){ - UseMethod("feather_table_reader") +FeatherTableReader <- function(file, mmap = TRUE, ...){ + UseMethod("FeatherTableReader") } #' @export -feather_table_reader.default <- function(file, mmap = TRUE, ...) { +FeatherTableReader.default <- function(file, mmap = TRUE, ...) { stop("unsupported") } #' @export -feather_table_reader.character <- function(file, mmap = TRUE, ...) { - feather_table_reader(fs::path_abs(file), mmap = mmap, ...) +FeatherTableReader.character <- function(file, mmap = TRUE, ...) { + FeatherTableReader(fs::path_abs(file), mmap = mmap, ...) } #' @export -feather_table_reader.fs_path <- function(file, mmap = TRUE, ...) { +FeatherTableReader.fs_path <- function(file, mmap = TRUE, ...) { stream <- if(isTRUE(mmap)) mmap_open(file, ...) else ReadableFile(file, ...) - feather_table_reader(stream) + FeatherTableReader(stream) } #' @export -`feather_table_reader.arrow::io::RandomAccessFile` <- function(file, mmap = TRUE, ...){ +`FeatherTableReader.arrow::io::RandomAccessFile` <- function(file, mmap = TRUE, ...){ unique_ptr(`arrow::ipc::feather::TableReader`, ipc___feather___TableReader__Open(file)) } #' @export -`feather_table_reader.arrow::ipc::feather::TableReader` <- function(file, mmap = TRUE, ...){ +`FeatherTableReader.arrow::ipc::feather::TableReader` <- function(file, mmap = TRUE, ...){ file } #' Read a feather file #' -#' @param file a arrow::ipc::feather::TableReader or whatever the [feather_table_reader()] function can handle +#' @param file a arrow::ipc::feather::TableReader or whatever the [FeatherTableReader()] function can handle +#' @param columns names if the columns to read. The default `NULL` means all columns +#' @param as_tibble should the [arrow::Table][arrow__Table] be converted to a tibble. #' @param ... additional parameters #' -#' @return an arrow::Table +#' @return a data frame if `as_tibble` is `TRUE` (the default), or a [arrow::Table][arrow__Table] otherwise #' #' @export -read_feather <- function(file, ...){ - feather_table_reader(file, ...)$Read() +read_feather <- function(file, columns = NULL, as_tibble = TRUE, ...){ + out <- FeatherTableReader(file, ...)$Read(columns) + if (isTRUE(as_tibble)) { + out <- as_tibble(out) + } + out } diff --git a/r/man/feather_table_reader.Rd b/r/man/FeatherTableReader.Rd similarity index 80% rename from r/man/feather_table_reader.Rd rename to r/man/FeatherTableReader.Rd index fb1c53429f8..15a260bd57c 100644 --- a/r/man/feather_table_reader.Rd +++ b/r/man/FeatherTableReader.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/feather.R -\name{feather_table_reader} -\alias{feather_table_reader} +\name{FeatherTableReader} +\alias{FeatherTableReader} \title{A arrow::ipc::feather::TableReader to read from a file} \usage{ -feather_table_reader(file, mmap = TRUE, ...) +FeatherTableReader(file, mmap = TRUE, ...) } \arguments{ \item{file}{A file path, arrow::io::RandomAccessFile} diff --git a/r/man/feather_table_writer.Rd b/r/man/FeatherTableWriter.Rd similarity index 74% rename from r/man/feather_table_writer.Rd rename to r/man/FeatherTableWriter.Rd index 36035aca120..3acf5971a71 100644 --- a/r/man/feather_table_writer.Rd +++ b/r/man/FeatherTableWriter.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/feather.R -\name{feather_table_writer} -\alias{feather_table_writer} +\name{FeatherTableWriter} +\alias{FeatherTableWriter} \title{Create TableWriter that writes into a stream} \usage{ -feather_table_writer(stream) +FeatherTableWriter(stream) } \arguments{ \item{stream}{an OutputStream} diff --git a/r/man/read_feather.Rd b/r/man/read_feather.Rd index e86b86b99e9..31fd36ab65a 100644 --- a/r/man/read_feather.Rd +++ b/r/man/read_feather.Rd @@ -4,15 +4,19 @@ \alias{read_feather} \title{Read a feather file} \usage{ -read_feather(file, ...) +read_feather(file, columns = NULL, as_tibble = TRUE, ...) } \arguments{ -\item{file}{a arrow::ipc::feather::TableReader or whatever the \code{\link[=feather_table_reader]{feather_table_reader()}} function can handle} +\item{file}{a arrow::ipc::feather::TableReader or whatever the \code{\link[=FeatherTableReader]{FeatherTableReader()}} function can handle} + +\item{columns}{names if the columns to read. The default \code{NULL} means all columns} + +\item{as_tibble}{should the \link[=arrow__Table]{arrow::Table} be converted to a tibble.} \item{...}{additional parameters} } \value{ -an arrow::Table +a data frame if \code{as_tibble} is \code{TRUE} (the default), or a \link[=arrow__Table]{arrow::Table} otherwise } \description{ Read a feather file diff --git a/r/src/RcppExports.cpp b/r/src/RcppExports.cpp index bca4eafdee4..e5a784eb70c 100644 --- a/r/src/RcppExports.cpp +++ b/r/src/RcppExports.cpp @@ -1244,13 +1244,14 @@ BEGIN_RCPP END_RCPP } // ipc___feather___TableReader__Read -std::shared_ptr ipc___feather___TableReader__Read(const std::unique_ptr& reader); -RcppExport SEXP _arrow_ipc___feather___TableReader__Read(SEXP readerSEXP) { +std::shared_ptr ipc___feather___TableReader__Read(const std::unique_ptr& reader, SEXP columns); +RcppExport SEXP _arrow_ipc___feather___TableReader__Read(SEXP readerSEXP, SEXP columnsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const std::unique_ptr& >::type reader(readerSEXP); - rcpp_result_gen = Rcpp::wrap(ipc___feather___TableReader__Read(reader)); + Rcpp::traits::input_parameter< SEXP >::type columns(columnsSEXP); + rcpp_result_gen = Rcpp::wrap(ipc___feather___TableReader__Read(reader, columns)); return rcpp_result_gen; END_RCPP } @@ -2262,7 +2263,7 @@ static const R_CallMethodDef CallEntries[] = { {"_arrow_ipc___feather___TableReader__num_columns", (DL_FUNC) &_arrow_ipc___feather___TableReader__num_columns, 1}, {"_arrow_ipc___feather___TableReader__GetColumnName", (DL_FUNC) &_arrow_ipc___feather___TableReader__GetColumnName, 2}, {"_arrow_ipc___feather___TableReader__GetColumn", (DL_FUNC) &_arrow_ipc___feather___TableReader__GetColumn, 2}, - {"_arrow_ipc___feather___TableReader__Read", (DL_FUNC) &_arrow_ipc___feather___TableReader__Read, 1}, + {"_arrow_ipc___feather___TableReader__Read", (DL_FUNC) &_arrow_ipc___feather___TableReader__Read, 2}, {"_arrow_ipc___feather___TableReader__Open", (DL_FUNC) &_arrow_ipc___feather___TableReader__Open, 1}, {"_arrow_Field__initialize", (DL_FUNC) &_arrow_Field__initialize, 3}, {"_arrow_Field__ToString", (DL_FUNC) &_arrow_Field__ToString, 1}, diff --git a/r/src/feather.cpp b/r/src/feather.cpp index 7b84deefadb..8389156c384 100644 --- a/r/src/feather.cpp +++ b/r/src/feather.cpp @@ -115,9 +115,37 @@ std::shared_ptr ipc___feather___TableReader__GetColumn( // [[Rcpp::export]] std::shared_ptr ipc___feather___TableReader__Read( - const std::unique_ptr& reader) { + const std::unique_ptr& reader, SEXP columns) { std::shared_ptr table; - STOP_IF_NOT_OK(reader->Read(&table)); + + switch (TYPEOF(columns)) { + case INTSXP: { + R_xlen_t n = XLENGTH(columns); + std::vector indices(n); + int* p_columns = INTEGER(columns); + for (int i = 0; i < n; i++) { + indices[i] = p_columns[i] - 1; + } + STOP_IF_NOT_OK(reader->Read(indices, &table)); + break; + } + case STRSXP: { + R_xlen_t n = XLENGTH(columns); + std::vector names(n); + for (R_xlen_t i = 0; i < n; i++) { + names[i] = CHAR(STRING_ELT(columns, i)); + } + STOP_IF_NOT_OK(reader->Read(names, &table)); + break; + } + case NILSXP: + STOP_IF_NOT_OK(reader->Read(&table)); + break; + default: + Rcpp::stop("incompatible column specification"); + break; + }; + return table; } diff --git a/r/tests/testthat/test-feather.R b/r/tests/testthat/test-feather.R index 715017fb586..23fdc58fd78 100644 --- a/r/tests/testthat/test-feather.R +++ b/r/tests/testthat/test-feather.R @@ -34,25 +34,66 @@ test_that("feather read/write round trip", { expect_true(fs::file_exists(tf3)) tab1 <- read_feather(tf1) - expect_is(tab1, "arrow::Table") + expect_is(tab1, "data.frame") tab2 <- read_feather(tf2) - expect_is(tab2, "arrow::Table") + expect_is(tab2, "data.frame") tab3 <- read_feather(tf3) - expect_is(tab3, "arrow::Table") + expect_is(tab3, "data.frame") # reading directly from arrow::io::MemoryMappedFile tab4 <- read_feather(mmap_open(tf3)) - expect_is(tab4, "arrow::Table") + expect_is(tab4, "data.frame") # reading directly from arrow::io::ReadableFile tab5 <- read_feather(ReadableFile(tf3)) - expect_is(tab5, "arrow::Table") + expect_is(tab5, "data.frame") + + expect_equal(tib, tab1) + expect_equal(tib, tab2) + expect_equal(tib, tab3) + expect_equal(tib, tab4) + expect_equal(tib, tab5) +}) + +test_that("feather handles columns = ", { + tib <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10]) + + tf1 <- local_tempfile() + write_feather(tib, tf1) + expect_true(fs::file_exists(tf1)) + + tab1 <- read_feather(tf1, columns = c("x", "y")) + expect_is(tab1, "data.frame") + + expect_equal(tib[, c("x", "y")], as_tibble(tab1)) +}) + +test_that("feather handles columns = ", { + tib <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10]) + + tf1 <- local_tempfile() + write_feather(tib, tf1) + expect_true(fs::file_exists(tf1)) + + tab1 <- read_feather(tf1, columns = 1:2) + expect_is(tab1, "data.frame") + + expect_equal(tib[, c("x", "y")], as_tibble(tab1)) +}) + +test_that("feather read/write round trip", { + tib <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10]) + + tf1 <- local_tempfile() + write_feather(tib, tf1) + expect_true(fs::file_exists(tf1)) + + tab1 <- read_feather(tf1, as_tibble = FALSE) + expect_is(tab1, "arrow::Table") expect_equal(tib, as_tibble(tab1)) - expect_equal(tib, as_tibble(tab2)) - expect_equal(tib, as_tibble(tab3)) - expect_equal(tib, as_tibble(tab4)) - expect_equal(tib, as_tibble(tab5)) }) + +