diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index 6bd940f8067..e8f57db99c2 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -62,7 +62,7 @@ jobs: strategy: matrix: include: - - cpp_version: "13.0.0" + - cpp_version: "15.0.2" steps: - name: Checkout Arrow uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0 diff --git a/r/NEWS.md b/r/NEWS.md index 1e8a480ef5f..c2690e6248d 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -24,6 +24,7 @@ * `summarize()` supports more complex expressions, and correctly handles cases where column names are reused in expressions. * The `na_matches` argument to the `dplyr::*_join()` functions is now supported. This argument controls whether `NA` values are considered equal when joining. (#41358) * R metadata, stored in the Arrow schema to support round-tripping data between R and Arrow/Parquet, is now serialized and deserialized more strictly. This makes it safer to load data from files from unknown sources into R data.frames. (#41969) +* The minimum version of the Arrow C++ library the Arrow R package can be built with has been bumped to 15.0.0 (#42241) # arrow 16.1.0 diff --git a/r/R/parquet.R b/r/R/parquet.R index 0ee6c62601c..88ce1c77128 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -419,6 +419,7 @@ ParquetWriterProperties$create <- function(column_names, #' @section Methods: #' #' - `WriteTable` Write a [Table] to `sink` +#' - `WriteBatch` Write a [RecordBatch] to `sink` #' - `Close` Close the writer. Note: does not close the `sink`. #' [arrow::io::OutputStream][OutputStream] has its own `close()` method. #' @@ -428,8 +429,14 @@ ParquetFileWriter <- R6Class("ParquetFileWriter", inherit = ArrowObject, public = list( WriteTable = function(table, chunk_size) { + assert_is(table, "Table") parquet___arrow___FileWriter__WriteTable(self, table, chunk_size) }, + WriteBatch = function(batch, ...) { + assert_is(batch, "RecordBatch") + table <- Table$create(batch) + self$WriteTable(table, ...) + }, Close = function() parquet___arrow___FileWriter__Close(self) ) ) diff --git a/r/man/ParquetFileWriter.Rd b/r/man/ParquetFileWriter.Rd index f36e85ab6c4..5779e574d46 100644 --- a/r/man/ParquetFileWriter.Rd +++ b/r/man/ParquetFileWriter.Rd @@ -24,6 +24,7 @@ takes the following arguments: \itemize{ \item \code{WriteTable} Write a \link{Table} to \code{sink} +\item \code{WriteBatch} Write a \link{RecordBatch} to \code{sink} \item \code{Close} Close the writer. Note: does not close the \code{sink}. \link[=OutputStream]{arrow::io::OutputStream} has its own \code{close()} method. } diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index a81210f0ad9..d2db11e14a7 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -1050,7 +1050,6 @@ class RDictionaryConverter> template struct RConverterTrait; -#if ARROW_VERSION_MAJOR >= 15 template struct RConverterTrait< T, enable_if_t::value && !is_interval_type::value && @@ -1062,14 +1061,6 @@ template struct RConverterTrait> { // not implemented }; -#else -template -struct RConverterTrait< - T, enable_if_t::value && !is_interval_type::value && - !is_extension_type::value>> { - using type = RPrimitiveConverter; -}; -#endif template struct RConverterTrait> { diff --git a/r/tests/testthat/test-parquet.R b/r/tests/testthat/test-parquet.R index f2359116fda..cc57022600f 100644 --- a/r/tests/testthat/test-parquet.R +++ b/r/tests/testthat/test-parquet.R @@ -530,3 +530,31 @@ test_that("thrift string and container size can be specified when reading Parque data <- reader_container$ReadTable() expect_identical(collect.ArrowTabular(data), example_data) }) + +test_that("We can use WriteBatch on ParquetFileWriter", { + tf <- tempfile() + on.exit(unlink(tf)) + sink <- FileOutputStream$create(tf) + sch <- schema(a = int32()) + props <- ParquetWriterProperties$create(column_names = names(sch)) + writer <- ParquetFileWriter$create(schema = sch, sink = sink, properties = props) + + batch <- RecordBatch$create(data.frame(a = 1:10)) + writer$WriteBatch(batch, chunk_size = 10) + writer$WriteBatch(batch, chunk_size = 10) + writer$WriteBatch(batch, chunk_size = 10) + writer$Close() + + tbl <- read_parquet(tf) + expect_equal(nrow(tbl), 30) +}) + +test_that("WriteBatch on ParquetFileWriter errors when called on closed sink", { + sink <- FileOutputStream$create(tempfile()) + sch <- schema(a = int32()) + props <- ParquetWriterProperties$create(column_names = names(sch)) + writer <- ParquetFileWriter$create(schema = sch, sink = sink, properties = props) + writer$Close() + batch <- RecordBatch$create(data.frame(a = 1:10)) + expect_error(writer$WriteBatch(batch, chunk_size = 10), "Operation on closed file") +}) diff --git a/r/tools/check-versions.R b/r/tools/check-versions.R index 34b2ef680c5..ea7fe93c524 100644 --- a/r/tools/check-versions.R +++ b/r/tools/check-versions.R @@ -24,10 +24,10 @@ release_version_supported <- function(r_version, cpp_version) { r_version <- package_version(r_version) cpp_version <- package_version(cpp_version) major <- function(x) as.numeric(x[1, 1]) - minimum_cpp_version <- package_version("13.0.0") + minimum_cpp_version <- package_version("15.0.0") allow_mismatch <- identical(tolower(Sys.getenv("ARROW_R_ALLOW_CPP_VERSION_MISMATCH", "false")), "true") - # If we allow a version mismatch we still need to cover the minimum version (13.0.0 for now) + # If we allow a version mismatch we still need to cover the minimum version (15.0.0 for now) # we don't allow newer C++ versions as new features without additional feature gates are likely to # break the R package version_valid <- cpp_version >= minimum_cpp_version && major(cpp_version) <= major(r_version) diff --git a/r/tools/test-check-versions.R b/r/tools/test-check-versions.R index f558648bed1..14c0bee3fd8 100644 --- a/r/tools/test-check-versions.R +++ b/r/tools/test-check-versions.R @@ -61,16 +61,24 @@ test_that("check_versions without mismatch", { test_that("check_versions with mismatch", { withr::local_envvar(.new = c(ARROW_R_ALLOW_CPP_VERSION_MISMATCH = "false")) + expect_true( + release_version_supported("15.0.0", "15.0.0") + ) + expect_false( release_version_supported("15.0.0", "13.0.0") ) withr::local_envvar(.new = c(ARROW_R_ALLOW_CPP_VERSION_MISMATCH = "true")) - expect_true( + expect_false( release_version_supported("15.0.0", "13.0.0") ) + expect_true( + release_version_supported("16.0.0", "15.0.0") + ) + expect_false( release_version_supported("15.0.0", "16.0.0") )