From 7dec4ed65ff4dd665a08ef9fc92eb61ac72e8363 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Mon, 7 Sep 2020 15:10:14 +0200 Subject: [PATCH 1/5] + Table$SelectColumns() R follow up from #7272 --- r/R/arrowExports.R | 4 ++-- r/R/table.R | 10 ++++++++-- r/man/Table.Rd | 2 ++ r/src/arrowExports.cpp | 14 +++++++------- r/src/table.cpp | 18 +++--------------- r/tests/testthat/test-Table.R | 9 +++++++++ 6 files changed, 31 insertions(+), 26 deletions(-) diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index aa48384495e2..98294e600a7b 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -1504,8 +1504,8 @@ Table__GetColumnByName <- function(table, name){ .Call(`_arrow_Table__GetColumnByName` , table, name) } -Table__select <- function(table, indices){ - .Call(`_arrow_Table__select` , table, indices) +Table__SelectColumns <- function(table, indices){ + .Call(`_arrow_Table__SelectColumns` , table, indices) } all_record_batches <- function(lst){ diff --git a/r/R/table.R b/r/R/table.R index 20eb9bb0a672..4c2864702c9f 100644 --- a/r/R/table.R +++ b/r/R/table.R @@ -55,6 +55,8 @@ #' - `$ColumnNames()`: Get all column names (called by `names(tab)`) #' - `$GetColumnByName(name)`: Extract a `ChunkedArray` by string name #' - `$field(i)`: Extract a `Field` from the table schema by integer position +#' - `$SelectColumns(indices)`: Return new `Table` with specified columns, given as +#' 0-based indices. #' - `$select(spec)`: Return a new table with a selection of columns. #' This supports the usual `character`, `numeric`, and `logical` selection #' methods as well as "tidy select" expressions. @@ -115,6 +117,10 @@ Table <- R6Class("Table", inherit = ArrowObject, shared_ptr(Table, Table__cast(self, target_schema, options)) }, + SelectColumns = function(indices) { + shared_ptr(Table, Table__SelectColumns(self, indices)) + }, + select = function(spec) { spec <- enquo(spec) if (quo_is_null(spec)) { @@ -122,8 +128,8 @@ Table <- R6Class("Table", inherit = ArrowObject, } else { all_vars <- self$ColumnNames() vars <- vars_select(all_vars, !!spec) - indices <- match(vars, all_vars) - shared_ptr(Table, Table__select(self, indices)) + indices <- match(vars, all_vars) - 1L + self$SelectColumns(indices) } }, diff --git a/r/man/Table.Rd b/r/man/Table.Rd index 2014a30be9e8..6ab1b4967336 100644 --- a/r/man/Table.Rd +++ b/r/man/Table.Rd @@ -46,6 +46,8 @@ the following R6 methods that map onto the underlying C++ methods: \item \verb{$ColumnNames()}: Get all column names (called by \code{names(tab)}) \item \verb{$GetColumnByName(name)}: Extract a \code{ChunkedArray} by string name \item \verb{$field(i)}: Extract a \code{Field} from the table schema by integer position +\item \verb{$SelectColumns(indices)}: Return new \code{Table} with specified columns, given as +0-based indices. \item \verb{$select(spec)}: Return a new table with a selection of columns. This supports the usual \code{character}, \code{numeric}, and \code{logical} selection methods as well as "tidy select" expressions. diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index b6c8f2b91c86..9ecad49b63ee 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -5880,17 +5880,17 @@ extern "C" SEXP _arrow_Table__GetColumnByName(SEXP table_sexp, SEXP name_sexp){ // table.cpp #if defined(ARROW_R_WITH_ARROW) -std::shared_ptr Table__select(const std::shared_ptr& table, cpp11::integers indices); -extern "C" SEXP _arrow_Table__select(SEXP table_sexp, SEXP indices_sexp){ +std::shared_ptr Table__SelectColumns(const std::shared_ptr& table, const std::vector& indices); +extern "C" SEXP _arrow_Table__SelectColumns(SEXP table_sexp, SEXP indices_sexp){ BEGIN_CPP11 arrow::r::Input&>::type table(table_sexp); - arrow::r::Input::type indices(indices_sexp); - return cpp11::as_sexp(Table__select(table, indices)); + arrow::r::Input&>::type indices(indices_sexp); + return cpp11::as_sexp(Table__SelectColumns(table, indices)); END_CPP11 } #else -extern "C" SEXP _arrow_Table__select(SEXP table_sexp, SEXP indices_sexp){ - Rf_error("Cannot call Table__select(). Please use arrow::install_arrow() to install required runtime libraries. "); +extern "C" SEXP _arrow_Table__SelectColumns(SEXP table_sexp, SEXP indices_sexp){ + Rf_error("Cannot call Table__SelectColumns(). Please use arrow::install_arrow() to install required runtime libraries. "); } #endif @@ -6371,7 +6371,7 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_Table__Validate", (DL_FUNC) &_arrow_Table__Validate, 1}, { "_arrow_Table__ValidateFull", (DL_FUNC) &_arrow_Table__ValidateFull, 1}, { "_arrow_Table__GetColumnByName", (DL_FUNC) &_arrow_Table__GetColumnByName, 2}, - { "_arrow_Table__select", (DL_FUNC) &_arrow_Table__select, 2}, + { "_arrow_Table__SelectColumns", (DL_FUNC) &_arrow_Table__SelectColumns, 2}, { "_arrow_all_record_batches", (DL_FUNC) &_arrow_all_record_batches, 1}, { "_arrow_Table__from_record_batches", (DL_FUNC) &_arrow_Table__from_record_batches, 2}, { "_arrow_Table__from_dots", (DL_FUNC) &_arrow_Table__from_dots, 2}, diff --git a/r/src/table.cpp b/r/src/table.cpp index 6a56e8e8f2d9..5024df970092 100644 --- a/r/src/table.cpp +++ b/r/src/table.cpp @@ -115,21 +115,9 @@ std::shared_ptr Table__GetColumnByName( } // [[arrow::export]] -std::shared_ptr Table__select(const std::shared_ptr& table, - cpp11::integers indices) { - R_xlen_t n = indices.size(); - - std::vector> fields(n); - std::vector> columns(n); - - for (R_xlen_t i = 0; i < n; i++) { - int pos = indices[i] - 1; - fields[i] = table->schema()->field(pos); - columns[i] = table->column(pos); - } - - auto schema = std::make_shared(std::move(fields)); - return arrow::Table::Make(schema, columns); +std::shared_ptr Table__SelectColumns( + const std::shared_ptr& table, const std::vector& indices) { + return ValueOrStop(table->SelectColumns(indices)); } namespace arrow { diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R index bd6206ab1979..7e4ac15f54bd 100644 --- a/r/tests/testthat/test-Table.R +++ b/r/tests/testthat/test-Table.R @@ -349,3 +349,12 @@ test_that("Table unifies dictionary on conversion back to R (ARROW-8374)", { expect_identical(as.data.frame(tab), res) }) + +test_that("Table$SelectColumns()", { + tab <- Table$create(x = 1:10, y = 1:10) + + expect_equal(tab$SelectColumns(0L), Table$create(x = 1:10)) + + expect_error(tab$SelectColumns(2:4)) + expect_error(tab$SelectColumns("")) +}) From ab4dd81bfca2b785f91e3d173af7a3dc81fc8739 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 11 Sep 2020 13:20:29 +0200 Subject: [PATCH 2/5] RecordBatch and Table gets $SelectColumns() --- r/R/arrowExports.R | 4 ++-- r/R/csv.R | 9 ++++++++- r/R/json.R | 9 ++++++++- r/R/record-batch.R | 28 ++++++++++++++-------------- r/R/table.R | 22 +++++----------------- r/man/RecordBatch.Rd | 5 ++--- r/man/Table.Rd | 7 ++----- r/src/arrowExports.cpp | 12 ++++++------ r/src/recordbatch.cpp | 4 ++-- 9 files changed, 49 insertions(+), 51 deletions(-) diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index 98294e600a7b..0d0c37621ce7 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -1260,8 +1260,8 @@ RecordBatch__GetColumnByName <- function(batch, name){ .Call(`_arrow_RecordBatch__GetColumnByName` , batch, name) } -RecordBatch__select <- function(batch, indices){ - .Call(`_arrow_RecordBatch__select` , batch, indices) +RecordBatch__SelectColumns <- function(batch, indices){ + .Call(`_arrow_RecordBatch__SelectColumns` , batch, indices) } RecordBatch__Equals <- function(self, other, check_metadata){ diff --git a/r/R/csv.R b/r/R/csv.R index e145a907e28c..67d1c9eb0d80 100644 --- a/r/R/csv.R +++ b/r/R/csv.R @@ -129,7 +129,14 @@ read_delim_arrow <- function(file, convert_options = convert_options ) - tab <- reader$Read()$select(!!enquo(col_select)) + tab <- reader$Read() + + col_select <- enquo(col_select) + if (!quo_is_null(col_select)) { + tab <- tab$SelectColumns( + vars_select(names(tab), !!col_select) + ) + } if (isTRUE(as_data_frame)) { tab <- as.data.frame(tab) diff --git a/r/R/json.R b/r/R/json.R index 1f05f84cfd02..38d65f0f46d0 100644 --- a/r/R/json.R +++ b/r/R/json.R @@ -36,7 +36,14 @@ #' df <- read_json_arrow(tf) #' } read_json_arrow <- function(file, col_select = NULL, as_data_frame = TRUE, ...) { - tab <- JsonTableReader$create(file, ...)$Read()$select(!!enquo(col_select)) + tab <- JsonTableReader$create(file, ...)$Read() + + col_select <- enquo(col_select) + if (!quo_is_null(col_select)) { + tab <- tab$SelectColumns( + vars_select(names(tab), !!col_select) + ) + } if (isTRUE(as_data_frame)) { tab <- as.data.frame(tab) diff --git a/r/R/record-batch.R b/r/R/record-batch.R index 712000a97ab2..e58263f15d38 100644 --- a/r/R/record-batch.R +++ b/r/R/record-batch.R @@ -48,9 +48,8 @@ #' - `$names()`: Get all column names (called by `names(batch)`) #' - `$GetColumnByName(name)`: Extract an `Array` by string name #' - `$RemoveColumn(i)`: Drops a column from the batch by integer position -#' - `$select(spec)`: Return a new record batch with a selection of columns. -#' This supports the usual `character`, `numeric`, and `logical` selection -#' methods as well as "tidy select" expressions. +#' - `$selectColumns(indices)`: Return a new record batch with a selection of columns. Supports +#' 0-based integer indices and character vectors. #' - `$Slice(offset, length = NULL)`: Create a zero-copy view starting at the #' indicated integer offset and going for the given length, or to the end #' of the table if `NULL`, the default. @@ -84,21 +83,15 @@ RecordBatch <- R6Class("RecordBatch", inherit = ArrowObject, assert_that(is.string(name)) shared_ptr(Array, RecordBatch__GetColumnByName(self, name)) }, - select = function(spec) { - spec <- enquo(spec) - if (quo_is_null(spec)) { - self - } else { - all_vars <- self$names() - vars <- vars_select(all_vars, !!spec) - indices <- match(vars, all_vars) - shared_ptr(RecordBatch, RecordBatch__select(self, indices)) + SelectColumns = function(indices) { + if (is.character(indices)) { + indices <- match(indices, self$names()) - 1L } + shared_ptr(RecordBatch, RecordBatch__SelectColumns(self, indices)) }, RemoveColumn = function(i){ shared_ptr(RecordBatch, RecordBatch__RemoveColumn(self, i)) }, - Slice = function(offset, length = NULL) { if (is.null(length)) { shared_ptr(RecordBatch, RecordBatch__Slice1(self, offset)) @@ -218,7 +211,14 @@ names.RecordBatch <- function(x) x$names() if (!missing(j)) { # Selecting columns is cheaper than filtering rows, so do it first. # That way, if we're filtering too, we have fewer arrays to filter/slice/take - x <- x$select(j) + j <- enquo(j) + if (!quo_is_null(j)) { + all_vars <- names(x) + vars <- vars_select(all_vars, !!j) + indices <- match(vars, all_vars) - 1L + x <- x$SelectColumns(indices) + } + if (drop && ncol(x) == 1L) { x <- x$column(0) } diff --git a/r/R/table.R b/r/R/table.R index 4c2864702c9f..d5eec4fed08a 100644 --- a/r/R/table.R +++ b/r/R/table.R @@ -55,11 +55,8 @@ #' - `$ColumnNames()`: Get all column names (called by `names(tab)`) #' - `$GetColumnByName(name)`: Extract a `ChunkedArray` by string name #' - `$field(i)`: Extract a `Field` from the table schema by integer position -#' - `$SelectColumns(indices)`: Return new `Table` with specified columns, given as -#' 0-based indices. -#' - `$select(spec)`: Return a new table with a selection of columns. -#' This supports the usual `character`, `numeric`, and `logical` selection -#' methods as well as "tidy select" expressions. +#' - `$SelectColumns(indices)`: Return new `Table` with specified columns. Supports +#' 0-based integer indices and character vectors. #' - `$Slice(offset, length = NULL)`: Create a zero-copy view starting at the #' indicated integer offset and going for the given length, or to the end #' of the table if `NULL`, the default. @@ -118,19 +115,10 @@ Table <- R6Class("Table", inherit = ArrowObject, }, SelectColumns = function(indices) { - shared_ptr(Table, Table__SelectColumns(self, indices)) - }, - - select = function(spec) { - spec <- enquo(spec) - if (quo_is_null(spec)) { - self - } else { - all_vars <- self$ColumnNames() - vars <- vars_select(all_vars, !!spec) - indices <- match(vars, all_vars) - 1L - self$SelectColumns(indices) + if (is.character(indices)) { + indices <- match(indices, self$ColumnNames()) - 1L } + shared_ptr(Table, Table__SelectColumns(self, indices)) }, Slice = function(offset, length = NULL) { diff --git a/r/man/RecordBatch.Rd b/r/man/RecordBatch.Rd index 40c34968d63e..1cf70a00a157 100644 --- a/r/man/RecordBatch.Rd +++ b/r/man/RecordBatch.Rd @@ -48,9 +48,8 @@ the following R6 methods that map onto the underlying C++ methods: \item \verb{$names()}: Get all column names (called by \code{names(batch)}) \item \verb{$GetColumnByName(name)}: Extract an \code{Array} by string name \item \verb{$RemoveColumn(i)}: Drops a column from the batch by integer position -\item \verb{$select(spec)}: Return a new record batch with a selection of columns. -This supports the usual \code{character}, \code{numeric}, and \code{logical} selection -methods as well as "tidy select" expressions. +\item \verb{$selectColumns(indices)}: Return a new record batch with a selection of columns. Supports +0-based integer indices and character vectors. \item \verb{$Slice(offset, length = NULL)}: Create a zero-copy view starting at the indicated integer offset and going for the given length, or to the end of the table if \code{NULL}, the default. diff --git a/r/man/Table.Rd b/r/man/Table.Rd index 6ab1b4967336..e53cba6ba60a 100644 --- a/r/man/Table.Rd +++ b/r/man/Table.Rd @@ -46,11 +46,8 @@ the following R6 methods that map onto the underlying C++ methods: \item \verb{$ColumnNames()}: Get all column names (called by \code{names(tab)}) \item \verb{$GetColumnByName(name)}: Extract a \code{ChunkedArray} by string name \item \verb{$field(i)}: Extract a \code{Field} from the table schema by integer position -\item \verb{$SelectColumns(indices)}: Return new \code{Table} with specified columns, given as -0-based indices. -\item \verb{$select(spec)}: Return a new table with a selection of columns. -This supports the usual \code{character}, \code{numeric}, and \code{logical} selection -methods as well as "tidy select" expressions. +\item \verb{$SelectColumns(indices)}: Return new \code{Table} with specified columns. Supports +0-based integer indices and character vectors. \item \verb{$Slice(offset, length = NULL)}: Create a zero-copy view starting at the indicated integer offset and going for the given length, or to the end of the table if \code{NULL}, the default. diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 9ecad49b63ee..b162a795ac86 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -4925,17 +4925,17 @@ extern "C" SEXP _arrow_RecordBatch__GetColumnByName(SEXP batch_sexp, SEXP name_s // recordbatch.cpp #if defined(ARROW_R_WITH_ARROW) -std::shared_ptr RecordBatch__select(const std::shared_ptr& batch, cpp11::integers indices); -extern "C" SEXP _arrow_RecordBatch__select(SEXP batch_sexp, SEXP indices_sexp){ +std::shared_ptr RecordBatch__SelectColumns(const std::shared_ptr& batch, cpp11::integers indices); +extern "C" SEXP _arrow_RecordBatch__SelectColumns(SEXP batch_sexp, SEXP indices_sexp){ BEGIN_CPP11 arrow::r::Input&>::type batch(batch_sexp); arrow::r::Input::type indices(indices_sexp); - return cpp11::as_sexp(RecordBatch__select(batch, indices)); + return cpp11::as_sexp(RecordBatch__SelectColumns(batch, indices)); END_CPP11 } #else -extern "C" SEXP _arrow_RecordBatch__select(SEXP batch_sexp, SEXP indices_sexp){ - Rf_error("Cannot call RecordBatch__select(). Please use arrow::install_arrow() to install required runtime libraries. "); +extern "C" SEXP _arrow_RecordBatch__SelectColumns(SEXP batch_sexp, SEXP indices_sexp){ + Rf_error("Cannot call RecordBatch__SelectColumns(). Please use arrow::install_arrow() to install required runtime libraries. "); } #endif @@ -6310,7 +6310,7 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_RecordBatch__columns", (DL_FUNC) &_arrow_RecordBatch__columns, 1}, { "_arrow_RecordBatch__column", (DL_FUNC) &_arrow_RecordBatch__column, 2}, { "_arrow_RecordBatch__GetColumnByName", (DL_FUNC) &_arrow_RecordBatch__GetColumnByName, 2}, - { "_arrow_RecordBatch__select", (DL_FUNC) &_arrow_RecordBatch__select, 2}, + { "_arrow_RecordBatch__SelectColumns", (DL_FUNC) &_arrow_RecordBatch__SelectColumns, 2}, { "_arrow_RecordBatch__Equals", (DL_FUNC) &_arrow_RecordBatch__Equals, 3}, { "_arrow_RecordBatch__RemoveColumn", (DL_FUNC) &_arrow_RecordBatch__RemoveColumn, 2}, { "_arrow_RecordBatch__column_name", (DL_FUNC) &_arrow_RecordBatch__column_name, 2}, diff --git a/r/src/recordbatch.cpp b/r/src/recordbatch.cpp index e9648da55ac3..02b61f60633d 100644 --- a/r/src/recordbatch.cpp +++ b/r/src/recordbatch.cpp @@ -77,7 +77,7 @@ std::shared_ptr RecordBatch__GetColumnByName( } // [[arrow::export]] -std::shared_ptr RecordBatch__select( +std::shared_ptr RecordBatch__SelectColumns( const std::shared_ptr& batch, cpp11::integers indices) { R_xlen_t n = indices.size(); auto nrows = batch->num_rows(); @@ -86,7 +86,7 @@ std::shared_ptr RecordBatch__select( std::vector> columns(n); for (R_xlen_t i = 0; i < n; i++) { - int pos = indices[i] - 1; + int pos = indices[i]; fields[i] = batch->schema()->field(pos); columns[i] = batch->column(pos); } From 3668ad588f827c58a9b5d79ffbaf69989e5a2f42 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 11 Sep 2020 13:57:12 +0200 Subject: [PATCH 3/5] $SelectColumns() only supporting 0-based integers, but [ support R style 1-based and character --- r/R/csv.R | 4 +--- r/R/json.R | 4 +--- r/R/record-batch.R | 16 +++++----------- r/R/table.R | 6 +----- r/tests/testthat/test-Table.R | 10 +++++----- 5 files changed, 13 insertions(+), 27 deletions(-) diff --git a/r/R/csv.R b/r/R/csv.R index 67d1c9eb0d80..85ba788025a9 100644 --- a/r/R/csv.R +++ b/r/R/csv.R @@ -133,9 +133,7 @@ read_delim_arrow <- function(file, col_select <- enquo(col_select) if (!quo_is_null(col_select)) { - tab <- tab$SelectColumns( - vars_select(names(tab), !!col_select) - ) + tab <- tab[vars_select(names(tab), !!col_select)] } if (isTRUE(as_data_frame)) { diff --git a/r/R/json.R b/r/R/json.R index 38d65f0f46d0..006a7d7cf956 100644 --- a/r/R/json.R +++ b/r/R/json.R @@ -40,9 +40,7 @@ read_json_arrow <- function(file, col_select = NULL, as_data_frame = TRUE, ...) col_select <- enquo(col_select) if (!quo_is_null(col_select)) { - tab <- tab$SelectColumns( - vars_select(names(tab), !!col_select) - ) + tab <- tab[vars_select(names(tab), !!col_select)] } if (isTRUE(as_data_frame)) { diff --git a/r/R/record-batch.R b/r/R/record-batch.R index e58263f15d38..0b0b76e92389 100644 --- a/r/R/record-batch.R +++ b/r/R/record-batch.R @@ -48,8 +48,7 @@ #' - `$names()`: Get all column names (called by `names(batch)`) #' - `$GetColumnByName(name)`: Extract an `Array` by string name #' - `$RemoveColumn(i)`: Drops a column from the batch by integer position -#' - `$selectColumns(indices)`: Return a new record batch with a selection of columns. Supports -#' 0-based integer indices and character vectors. +#' - `$selectColumns(indices)`: Return a new record batch with a selection of columns, expressed as 0-based integers. #' - `$Slice(offset, length = NULL)`: Create a zero-copy view starting at the #' indicated integer offset and going for the given length, or to the end #' of the table if `NULL`, the default. @@ -84,9 +83,6 @@ RecordBatch <- R6Class("RecordBatch", inherit = ArrowObject, shared_ptr(Array, RecordBatch__GetColumnByName(self, name)) }, SelectColumns = function(indices) { - if (is.character(indices)) { - indices <- match(indices, self$names()) - 1L - } shared_ptr(RecordBatch, RecordBatch__SelectColumns(self, indices)) }, RemoveColumn = function(i){ @@ -211,12 +207,10 @@ names.RecordBatch <- function(x) x$names() if (!missing(j)) { # Selecting columns is cheaper than filtering rows, so do it first. # That way, if we're filtering too, we have fewer arrays to filter/slice/take - j <- enquo(j) - if (!quo_is_null(j)) { - all_vars <- names(x) - vars <- vars_select(all_vars, !!j) - indices <- match(vars, all_vars) - 1L - x <- x$SelectColumns(indices) + if (is_integerish(j)) { + x <- x$SelectColumns(as.integer(j) - 1L) + } else if (is.character(j)) { + x <- x$SelectColumns(match(j, names(x)) - 1L) } if (drop && ncol(x) == 1L) { diff --git a/r/R/table.R b/r/R/table.R index d5eec4fed08a..81ba7fdf030e 100644 --- a/r/R/table.R +++ b/r/R/table.R @@ -55,8 +55,7 @@ #' - `$ColumnNames()`: Get all column names (called by `names(tab)`) #' - `$GetColumnByName(name)`: Extract a `ChunkedArray` by string name #' - `$field(i)`: Extract a `Field` from the table schema by integer position -#' - `$SelectColumns(indices)`: Return new `Table` with specified columns. Supports -#' 0-based integer indices and character vectors. +#' - `$SelectColumns(indices)`: Return new `Table` with specified columns, expressed as 0-based integers. #' - `$Slice(offset, length = NULL)`: Create a zero-copy view starting at the #' indicated integer offset and going for the given length, or to the end #' of the table if `NULL`, the default. @@ -115,9 +114,6 @@ Table <- R6Class("Table", inherit = ArrowObject, }, SelectColumns = function(indices) { - if (is.character(indices)) { - indices <- match(indices, self$ColumnNames()) - 1L - } shared_ptr(Table, Table__SelectColumns(self, indices)) }, diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R index 7e4ac15f54bd..a9ea891eaeea 100644 --- a/r/tests/testthat/test-Table.R +++ b/r/tests/testthat/test-Table.R @@ -130,16 +130,16 @@ test_that("[, [[, $ for Table", { expect_null(tab[["asdf"]]) # List-like column slicing expect_data_frame(tab[2:4], tbl[2:4]) - expect_data_frame(tab[c(1, 0)], tbl[c(1, 0)]) + expect_data_frame(tab[c(2, 1)], tbl[c(2, 1)]) expect_error(tab[[c(4, 3)]]) expect_error(tab[[NA]], "'i' must be character or numeric, not logical") expect_error(tab[[NULL]], "'i' must be character or numeric, not NULL") expect_error(tab[[c("asdf", "jkl;")]], 'length(name) not equal to 1', fixed = TRUE) - expect_error(tab[-3], "Selections can't have negative value") # From tidyselect - expect_error(tab[-3:3], "Selections can't have negative value") # From tidyselect - expect_error(tab[1000]) # This is caught in vctrs, assert more specifically when it stabilizes - expect_error(tab[1:1000]) # same as ^ + expect_error(tab[-3], "Invalid column index") + expect_error(tab[-3:3], "Invalid column index") + expect_error(tab[1000], "Invalid column index") + expect_error(tab[1:1000], "Invalid column index") skip("Table with 0 cols doesn't know how many rows it should have") expect_data_frame(tab[0], tbl[0]) From 0686538b7889f37465befcdc4f24d88837f56874 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Fri, 11 Sep 2020 15:13:13 -0700 Subject: [PATCH 4/5] Allow column selection by negative indices --- r/R/record-batch.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/r/R/record-batch.R b/r/R/record-batch.R index 0b0b76e92389..6996112d2685 100644 --- a/r/R/record-batch.R +++ b/r/R/record-batch.R @@ -208,6 +208,10 @@ names.RecordBatch <- function(x) x$names() # Selecting columns is cheaper than filtering rows, so do it first. # That way, if we're filtering too, we have fewer arrays to filter/slice/take if (is_integerish(j)) { + if (all(j < 0)) { + # in R, negative j means "everything but j" + j <- setdiff(seq_len(x$num_columns), -1 * j) + } x <- x$SelectColumns(as.integer(j) - 1L) } else if (is.character(j)) { x <- x$SelectColumns(match(j, names(x)) - 1L) From d60683d4a723b438eca3fac23bc42d6aa8e4197f Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Fri, 11 Sep 2020 15:42:53 -0700 Subject: [PATCH 5/5] Update test --- r/tests/testthat/test-Table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R index a9ea891eaeea..0758edd45097 100644 --- a/r/tests/testthat/test-Table.R +++ b/r/tests/testthat/test-Table.R @@ -131,12 +131,12 @@ test_that("[, [[, $ for Table", { # List-like column slicing expect_data_frame(tab[2:4], tbl[2:4]) expect_data_frame(tab[c(2, 1)], tbl[c(2, 1)]) + expect_data_frame(tab[-3], tbl[-3]) expect_error(tab[[c(4, 3)]]) expect_error(tab[[NA]], "'i' must be character or numeric, not logical") expect_error(tab[[NULL]], "'i' must be character or numeric, not NULL") expect_error(tab[[c("asdf", "jkl;")]], 'length(name) not equal to 1', fixed = TRUE) - expect_error(tab[-3], "Invalid column index") expect_error(tab[-3:3], "Invalid column index") expect_error(tab[1000], "Invalid column index") expect_error(tab[1:1000], "Invalid column index")