From ca1653921d4c572adc40a7dfcc7f24f700974300 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Mon, 5 Apr 2021 15:30:47 -0700 Subject: [PATCH 1/5] Raise a better error when failing to convert array/chunkedarray/scalar with nuls --- r/R/array.R | 7 +++++- r/R/chunked-array.R | 7 +++++- r/R/scalar.R | 8 +++++-- r/R/util.R | 8 +++++++ r/tests/testthat/test-Array.R | 16 ++++++++++--- r/tests/testthat/test-chunked-array.R | 33 +++++++++++++++++++++++++++ r/tests/testthat/test-scalar.R | 26 +++++++++++++++++++++ 7 files changed, 98 insertions(+), 7 deletions(-) diff --git a/r/R/array.R b/r/R/array.R index 1d63c5735a7..be431c2d391 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -105,7 +105,12 @@ Array <- R6Class("Array", Array__Diff(self, other) }, data = function() Array__data(self), - as_vector = function() Array__as_vector(self), + as_vector = function() { + tryCatch( + Array__as_vector(self), + error = handle_embedded_nul_error + ) + }, ToString = function() { typ <- paste0("<", self$type$ToString(), ">") paste(typ, Array__ToString(self), sep = "\n") diff --git a/r/R/chunked-array.R b/r/R/chunked-array.R index a7f9c8f790c..615fca50039 100644 --- a/r/R/chunked-array.R +++ b/r/R/chunked-array.R @@ -62,7 +62,12 @@ ChunkedArray <- R6Class("ChunkedArray", inherit = ArrowDatum, public = list( length = function() ChunkedArray__length(self), chunk = function(i) Array$create(ChunkedArray__chunk(self, i)), - as_vector = function() ChunkedArray__as_vector(self), + as_vector = function() { + tryCatch( + ChunkedArray__as_vector(self), + error = handle_embedded_nul_error + ) + }, Slice = function(offset, length = NULL){ if (is.null(length)) { ChunkedArray__Slice1(self, offset) diff --git a/r/R/scalar.R b/r/R/scalar.R index cbda5964a2c..ef6041fe877 100644 --- a/r/R/scalar.R +++ b/r/R/scalar.R @@ -32,8 +32,12 @@ Scalar <- R6Class("Scalar", # TODO: document the methods public = list( ToString = function() Scalar__ToString(self), - as_vector = function() Scalar__as_vector(self), - as_array = function() MakeArrayFromScalar(self), + as_vector = function() { + tryCatch( + Scalar__as_vector(self), + error = handle_embedded_nul_error + ) + }, as_array = function() MakeArrayFromScalar(self), Equals = function(other, ...) { inherits(other, "Scalar") && Scalar__Equals(self, other) }, diff --git a/r/R/util.R b/r/R/util.R index 4680381e909..6d9c91b74aa 100644 --- a/r/R/util.R +++ b/r/R/util.R @@ -86,3 +86,11 @@ all_names <- function(expr) { is_constant <- function(expr) { length(all_vars(expr)) == 0 } + +handle_embedded_nul_error <- function(e) { + msg <- conditionMessage(e) + if (grepl(" nul ", msg)) { + e$message <- paste0(msg, "; to strip nuls when converting from Arrow to R, set options(arrow.skip_nul = TRUE)") + } + stop(e) +} \ No newline at end of file diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index 18e26207ca3..8c06166e93c 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -634,17 +634,27 @@ test_that("Handling string data with embedded nuls", { as.raw(c(0x63, 0x61, 0x6d, 0x65, 0x72, 0x61)), as.raw(c(0x74, 0x76))), class = c("arrow_binary", "vctrs_vctr", "list")) - expect_error(rawToChar(raws[[3]]), "nul") # See? + expect_error( + rawToChar(raws[[3]]), + "embedded nul in string: 'ma\\0n'", # See? + fixed = TRUE + ) array_with_nul <- Array$create(raws)$cast(utf8()) - expect_error(as.vector(array_with_nul), "nul") + expect_error( + as.vector(array_with_nul), + "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, set options(arrow.skip_nul = TRUE)", + fixed = TRUE + ) options(arrow.skip_nul = TRUE) + on.exit(options(arrow.skip_nul = NULL)) expect_warning( expect_identical( as.vector(array_with_nul), c("person", "woman", "man", "fan", "camera", "tv") ), - "Stripping '\\\\0' \\(nul\\) from character vector" + "Stripping '\\0' (nul) from character vector", + fixed = TRUE ) }) diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R index a5ff6ef4812..f006d6d8fb9 100644 --- a/r/tests/testthat/test-chunked-array.R +++ b/r/tests/testthat/test-chunked-array.R @@ -383,3 +383,36 @@ test_that("Converting a chunked array unifies factors (ARROW-8374)", { expect_identical(ca$as_vector(), res) }) + +test_that("Handling string data with embedded nuls", { + raws <- structure(list( + as.raw(c(0x70, 0x65, 0x72, 0x73, 0x6f, 0x6e)), + as.raw(c(0x77, 0x6f, 0x6d, 0x61, 0x6e)), + as.raw(c(0x6d, 0x61, 0x00, 0x6e)), # <-- there's your nul, 0x00 + as.raw(c(0x66, 0x00, 0x00, 0x61, 0x00, 0x6e)), # multiple nuls + as.raw(c(0x63, 0x61, 0x6d, 0x65, 0x72, 0x61)), + as.raw(c(0x74, 0x76))), + class = c("arrow_binary", "vctrs_vctr", "list")) + expect_error( + rawToChar(raws[[3]]), + "embedded nul in string: 'ma\\0n'", # See? + fixed = TRUE + ) + array_with_nul <- ChunkedArray$create(raws)$cast(utf8()) + expect_error( + as.vector(array_with_nul), + "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, set options(arrow.skip_nul = TRUE)", + fixed = TRUE + ) + + options(arrow.skip_nul = TRUE) + on.exit(options(arrow.skip_nul = NULL)) + expect_warning( + expect_identical( + as.vector(array_with_nul), + c("person", "woman", "man", "fan", "camera", "tv") + ), + "Stripping '\\0' (nul) from character vector", + fixed = TRUE + ) +}) \ No newline at end of file diff --git a/r/tests/testthat/test-scalar.R b/r/tests/testthat/test-scalar.R index e9ef893bbd9..a24fb4f6b29 100644 --- a/r/tests/testthat/test-scalar.R +++ b/r/tests/testthat/test-scalar.R @@ -76,3 +76,29 @@ test_that("Scalar$ApproxEquals", { expect_false(a$ApproxEquals(d)) expect_false(a$ApproxEquals(aa)) }) + +test_that("Handling string data with embedded nuls", { + raws <- as.raw(c(0x6d, 0x61, 0x00, 0x6e)) + expect_error( + rawToChar(raws), + "embedded nul in string: 'ma\\0n'", # See? + fixed = TRUE + ) + scalar_with_nul <- Scalar$create(raws, binary())$cast(utf8()) + expect_error( + as.vector(scalar_with_nul), + "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, set options(arrow.skip_nul = TRUE)", + fixed = TRUE + ) + + options(arrow.skip_nul = TRUE) + on.exit(options(arrow.skip_nul = NULL)) + expect_warning( + expect_identical( + as.vector(scalar_with_nul), + "man" + ), + "Stripping '\\0' (nul) from character vector", + fixed = TRUE + ) +}) \ No newline at end of file From ef21ed1dcfdcc3c3eb496091bdbcc7c72197b5f9 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Mon, 5 Apr 2021 15:42:15 -0700 Subject: [PATCH 2/5] Add ArrowTabular wrapping --- r/R/arrow-tabular.R | 5 ++++- r/R/scalar.R | 3 ++- r/tests/testthat/test-RecordBatch.R | 28 ++++++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/r/R/arrow-tabular.R b/r/R/arrow-tabular.R index 157b799f3b6..f32111688a2 100644 --- a/r/R/arrow-tabular.R +++ b/r/R/arrow-tabular.R @@ -61,7 +61,10 @@ ArrowTabular <- R6Class("ArrowTabular", inherit = ArrowObject, #' @export as.data.frame.ArrowTabular <- function(x, row.names = NULL, optional = FALSE, ...) { - df <- x$to_data_frame() + tryCatch( + df <- x$to_data_frame(), + error = handle_embedded_nul_error + ) if (!is.null(r_metadata <- x$metadata$r)) { df <- apply_arrow_r_metadata(df, .unserialize_arrow_r_metadata(r_metadata)) } diff --git a/r/R/scalar.R b/r/R/scalar.R index ef6041fe877..6afa7255650 100644 --- a/r/R/scalar.R +++ b/r/R/scalar.R @@ -37,7 +37,8 @@ Scalar <- R6Class("Scalar", Scalar__as_vector(self), error = handle_embedded_nul_error ) - }, as_array = function() MakeArrayFromScalar(self), + }, + as_array = function() MakeArrayFromScalar(self), Equals = function(other, ...) { inherits(other, "Scalar") && Scalar__Equals(self, other) }, diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R index 96abceb2994..0c99d2234e3 100644 --- a/r/tests/testthat/test-RecordBatch.R +++ b/r/tests/testthat/test-RecordBatch.R @@ -471,3 +471,31 @@ test_that("record_batch() with different length arrays", { expect_error(record_batch(a=1:5, b = 42), msg) expect_error(record_batch(a=1:5, b = 1:6), msg) }) + +test_that("Handling string data with embedded nuls", { + raws <- structure(list( + as.raw(c(0x70, 0x65, 0x72, 0x73, 0x6f, 0x6e)), + as.raw(c(0x77, 0x6f, 0x6d, 0x61, 0x6e)), + as.raw(c(0x6d, 0x61, 0x00, 0x6e)), # <-- there's your nul, 0x00 + as.raw(c(0x63, 0x61, 0x6d, 0x65, 0x72, 0x61)), + as.raw(c(0x74, 0x76))), + class = c("arrow_binary", "vctrs_vctr", "list")) + batch_with_nul <- record_batch(a = 1:5, b = raws) + batch_with_nul$b <- batch_with_nul$b$cast(utf8()) + expect_error( + as.data.frame(batch_with_nul), + "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, set options(arrow.skip_nul = TRUE)", + fixed = TRUE + ) + + options(arrow.skip_nul = TRUE) + on.exit(options(arrow.skip_nul = NULL)) + expect_warning( + expect_equivalent( + as.data.frame(batch_with_nul)$b, + c("person", "woman", "man", "camera", "tv") + ), + "Stripping '\\0' (nul) from character vector", + fixed = TRUE + ) +}) \ No newline at end of file From 45ec3091ffcdbb378023993b9340ef394bd684d8 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Mon, 5 Apr 2021 15:47:47 -0700 Subject: [PATCH 3/5] Simplify wrapping for ArrowDatum types; use withr --- r/DESCRIPTION | 3 ++- r/R/array.R | 7 +------ r/R/arrow-datum.R | 7 ++++++- r/R/chunked-array.R | 7 +------ r/R/scalar.R | 7 +------ r/tests/testthat/test-Array.R | 20 +++++++++--------- r/tests/testthat/test-RecordBatch.R | 20 +++++++++--------- r/tests/testthat/test-chunked-array.R | 29 +++++++++++---------------- r/tests/testthat/test-scalar.R | 20 +++++++++--------- 9 files changed, 53 insertions(+), 67 deletions(-) diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 3de40f6f9a7..a355e790a2d 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -49,7 +49,8 @@ Suggests: rmarkdown, stringr, testthat, - tibble + tibble, + withr LinkingTo: cpp11 (>= 0.2.0) Collate: 'enums.R' diff --git a/r/R/array.R b/r/R/array.R index be431c2d391..1d63c5735a7 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -105,12 +105,7 @@ Array <- R6Class("Array", Array__Diff(self, other) }, data = function() Array__data(self), - as_vector = function() { - tryCatch( - Array__as_vector(self), - error = handle_embedded_nul_error - ) - }, + as_vector = function() Array__as_vector(self), ToString = function() { typ <- paste0("<", self$type$ToString(), ">") paste(typ, Array__ToString(self), sep = "\n") diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R index 99940e74cbd..dd43307c9cc 100644 --- a/r/R/arrow-datum.R +++ b/r/R/arrow-datum.R @@ -39,7 +39,12 @@ is.na.ArrowDatum <- function(x) call_function("is_null", x) is.nan.ArrowDatum <- function(x) call_function("is_nan", x) #' @export -as.vector.ArrowDatum <- function(x, mode) x$as_vector() +as.vector.ArrowDatum <- function(x, mode) { + tryCatch( + x$as_vector(), + error = handle_embedded_nul_error + ) +} filter_rows <- function(x, i, keep_na = TRUE, ...) { # General purpose function for [ row subsetting with R semantics diff --git a/r/R/chunked-array.R b/r/R/chunked-array.R index 615fca50039..a7f9c8f790c 100644 --- a/r/R/chunked-array.R +++ b/r/R/chunked-array.R @@ -62,12 +62,7 @@ ChunkedArray <- R6Class("ChunkedArray", inherit = ArrowDatum, public = list( length = function() ChunkedArray__length(self), chunk = function(i) Array$create(ChunkedArray__chunk(self, i)), - as_vector = function() { - tryCatch( - ChunkedArray__as_vector(self), - error = handle_embedded_nul_error - ) - }, + as_vector = function() ChunkedArray__as_vector(self), Slice = function(offset, length = NULL){ if (is.null(length)) { ChunkedArray__Slice1(self, offset) diff --git a/r/R/scalar.R b/r/R/scalar.R index 6afa7255650..cbda5964a2c 100644 --- a/r/R/scalar.R +++ b/r/R/scalar.R @@ -32,12 +32,7 @@ Scalar <- R6Class("Scalar", # TODO: document the methods public = list( ToString = function() Scalar__ToString(self), - as_vector = function() { - tryCatch( - Scalar__as_vector(self), - error = handle_embedded_nul_error - ) - }, + as_vector = function() Scalar__as_vector(self), as_array = function() MakeArrayFromScalar(self), Equals = function(other, ...) { inherits(other, "Scalar") && Scalar__Equals(self, other) diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index 8c06166e93c..b6280a096af 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -646,16 +646,16 @@ test_that("Handling string data with embedded nuls", { fixed = TRUE ) - options(arrow.skip_nul = TRUE) - on.exit(options(arrow.skip_nul = NULL)) - expect_warning( - expect_identical( - as.vector(array_with_nul), - c("person", "woman", "man", "fan", "camera", "tv") - ), - "Stripping '\\0' (nul) from character vector", - fixed = TRUE - ) + withr::with_options(list(arrow.skip_nul = TRUE), { + expect_warning( + expect_identical( + as.vector(array_with_nul), + c("person", "woman", "man", "fan", "camera", "tv") + ), + "Stripping '\\0' (nul) from character vector", + fixed = TRUE + ) + }) }) test_that("Array$create() should have helpful error", { diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R index 0c99d2234e3..b71c07b78c2 100644 --- a/r/tests/testthat/test-RecordBatch.R +++ b/r/tests/testthat/test-RecordBatch.R @@ -488,14 +488,14 @@ test_that("Handling string data with embedded nuls", { fixed = TRUE ) - options(arrow.skip_nul = TRUE) - on.exit(options(arrow.skip_nul = NULL)) - expect_warning( - expect_equivalent( - as.data.frame(batch_with_nul)$b, - c("person", "woman", "man", "camera", "tv") - ), - "Stripping '\\0' (nul) from character vector", - fixed = TRUE - ) + withr::with_options(list(arrow.skip_nul = TRUE), { + expect_warning( + expect_equivalent( + as.data.frame(batch_with_nul)$b, + c("person", "woman", "man", "camera", "tv") + ), + "Stripping '\\0' (nul) from character vector", + fixed = TRUE + ) + }) }) \ No newline at end of file diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R index f006d6d8fb9..17a82de810f 100644 --- a/r/tests/testthat/test-chunked-array.R +++ b/r/tests/testthat/test-chunked-array.R @@ -393,26 +393,21 @@ test_that("Handling string data with embedded nuls", { as.raw(c(0x63, 0x61, 0x6d, 0x65, 0x72, 0x61)), as.raw(c(0x74, 0x76))), class = c("arrow_binary", "vctrs_vctr", "list")) + chunked_array_with_nul <- ChunkedArray$create(raws)$cast(utf8()) expect_error( - rawToChar(raws[[3]]), - "embedded nul in string: 'ma\\0n'", # See? - fixed = TRUE - ) - array_with_nul <- ChunkedArray$create(raws)$cast(utf8()) - expect_error( - as.vector(array_with_nul), + as.vector(chunked_array_with_nul), "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, set options(arrow.skip_nul = TRUE)", fixed = TRUE ) - options(arrow.skip_nul = TRUE) - on.exit(options(arrow.skip_nul = NULL)) - expect_warning( - expect_identical( - as.vector(array_with_nul), - c("person", "woman", "man", "fan", "camera", "tv") - ), - "Stripping '\\0' (nul) from character vector", - fixed = TRUE - ) + withr::with_options(list(arrow.skip_nul = TRUE), { + expect_warning( + expect_identical( + as.vector(chunked_array_with_nul), + c("person", "woman", "man", "fan", "camera", "tv") + ), + "Stripping '\\0' (nul) from character vector", + fixed = TRUE + ) + }) }) \ No newline at end of file diff --git a/r/tests/testthat/test-scalar.R b/r/tests/testthat/test-scalar.R index a24fb4f6b29..501298a8021 100644 --- a/r/tests/testthat/test-scalar.R +++ b/r/tests/testthat/test-scalar.R @@ -91,14 +91,14 @@ test_that("Handling string data with embedded nuls", { fixed = TRUE ) - options(arrow.skip_nul = TRUE) - on.exit(options(arrow.skip_nul = NULL)) - expect_warning( - expect_identical( - as.vector(scalar_with_nul), - "man" - ), - "Stripping '\\0' (nul) from character vector", - fixed = TRUE - ) + withr::with_options(list(arrow.skip_nul = TRUE), { + expect_warning( + expect_identical( + as.vector(scalar_with_nul), + "man" + ), + "Stripping '\\0' (nul) from character vector", + fixed = TRUE + ) + }) }) \ No newline at end of file From 04f3e2f7736b9ebbdd20f4a7e4c43189fc0b040c Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Mon, 5 Apr 2021 15:51:47 -0700 Subject: [PATCH 4/5] withr all the things --- r/tests/testthat/helper-arrow.R | 4 +-- r/tests/testthat/test-Array.R | 37 +++++++++++++-------------- r/tests/testthat/test-install-arrow.R | 18 ++++++------- 3 files changed, 28 insertions(+), 31 deletions(-) diff --git a/r/tests/testthat/helper-arrow.R b/r/tests/testthat/helper-arrow.R index a20ec23961e..89d9bf07ee6 100644 --- a/r/tests/testthat/helper-arrow.R +++ b/r/tests/testthat/helper-arrow.R @@ -65,7 +65,5 @@ test_that <- function(what, code) { # Wrapper to run tests that only touch R code even when the C++ library isn't # available (so that at least some tests are run on those platforms) r_only <- function(code) { - old <- options(..skip.tests = FALSE) - on.exit(options(old)) - code + withr::with_options(list(..skip.tests = FALSE), code) } diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index b6280a096af..1b92732368d 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -262,17 +262,16 @@ test_that("array supports POSIXct (ARROW-3340)", { test_that("array supports POSIXct without timezone", { # Make sure timezone is not set - tz <- Sys.getenv("TZ") - Sys.setenv(TZ = "") - on.exit(Sys.setenv(TZ = tz)) - times <- strptime("2019-02-03 12:34:56", format="%Y-%m-%d %H:%M:%S") + 1:10 - expect_array_roundtrip(times, timestamp("us", "")) - - # Also test the INTSXP code path - skip("Ingest_POSIXct only implemented for REALSXP") - times_int <- as.integer(times) - attributes(times_int) <- attributes(times) - expect_array_roundtrip(times_int, timestamp("us", "")) + withr::with_envvar(c(TZ = ""), { + times <- strptime("2019-02-03 12:34:56", format="%Y-%m-%d %H:%M:%S") + 1:10 + expect_array_roundtrip(times, timestamp("us", "")) + + # Also test the INTSXP code path + skip("Ingest_POSIXct only implemented for REALSXP") + times_int <- as.integer(times) + attributes(times_int) <- attributes(times) + expect_array_roundtrip(times_int, timestamp("us", "")) + }) }) test_that("Timezone handling in Arrow roundtrip (ARROW-3543)", { @@ -803,14 +802,14 @@ test_that("Array$ApproxEquals", { }) test_that("auto int64 conversion to int can be disabled (ARROW-10093)", { - op <- options(arrow.int64_downcast = FALSE); on.exit(options(op)) + withr::with_options(list(arrow.int64_downcast = FALSE), { + a <- Array$create(1:10, int64()) + expect_true(inherits(a$as_vector(), "integer64")) - a <- Array$create(1:10, int64()) - expect_true(inherits(a$as_vector(), "integer64")) + batch <- RecordBatch$create(x = a) + expect_true(inherits(as.data.frame(batch)$x, "integer64")) - batch <- RecordBatch$create(x = a) - expect_true(inherits(as.data.frame(batch)$x, "integer64")) - - tab <- Table$create(x = a) - expect_true(inherits(as.data.frame(batch)$x, "integer64")) + tab <- Table$create(x = a) + expect_true(inherits(as.data.frame(batch)$x, "integer64")) + }) }) diff --git a/r/tests/testthat/test-install-arrow.R b/r/tests/testthat/test-install-arrow.R index e2b1d771aa3..d9d1cc74b02 100644 --- a/r/tests/testthat/test-install-arrow.R +++ b/r/tests/testthat/test-install-arrow.R @@ -23,17 +23,17 @@ r_only({ ours <- "https://dl.example.com/ursalabs/fake_repo" other <- "https://cran.fiocruz.br/" - old <- options( + opts <- list( repos=c(CRAN = "@CRAN@"), # Restore defaul arrow.dev_repo = ours ) - on.exit(options(old)) - - expect_identical(arrow_repos(), cran) - expect_identical(arrow_repos(c(cran, ours)), cran) - expect_identical(arrow_repos(c(ours, other)), other) - expect_identical(arrow_repos(nightly = TRUE), c(ours, cran)) - expect_identical(arrow_repos(c(cran, ours), nightly = TRUE), c(ours, cran)) - expect_identical(arrow_repos(c(ours, other), nightly = TRUE), c(ours, other)) + withr::with_options(opts, { + expect_identical(arrow_repos(), cran) + expect_identical(arrow_repos(c(cran, ours)), cran) + expect_identical(arrow_repos(c(ours, other)), other) + expect_identical(arrow_repos(nightly = TRUE), c(ours, cran)) + expect_identical(arrow_repos(c(cran, ours), nightly = TRUE), c(ours, cran)) + expect_identical(arrow_repos(c(ours, other), nightly = TRUE), c(ours, other)) + }) }) }) From 17aa3c3c159b9479f747c200dbf159f8658c214d Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Tue, 6 Apr 2021 07:47:25 -0700 Subject: [PATCH 5/5] Include patch from @westonpace --- r/src/array_to_vector.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index ddcb7494697..d5fae295181 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -88,11 +88,14 @@ class Converter { // for each array, add a task to the task group // // The task group is Finish() in the caller - void IngestParallel(SEXP data, const std::shared_ptr& tg) { + // The converter itself is passed as `self` so that if one of the parallel ops + // hits `stop()`, we don't bail before `tg` is destroyed, which would cause a crash + void IngestParallel(SEXP data, const std::shared_ptr& tg, + std::shared_ptr self) { R_xlen_t k = 0, i = 0; for (const auto& array : arrays_) { auto n_chunk = array->length(); - tg->Append([=] { return IngestOne(data, array, k, n_chunk, i); }); + tg->Append([=] { return self->IngestOne(data, array, k, n_chunk, i); }); k += n_chunk; i++; } @@ -1242,7 +1245,7 @@ cpp11::writable::list to_dataframe_parallel( // add a task to ingest data of that column if that can be done in parallel if (converters[i]->Parallel()) { - converters[i]->IngestParallel(column, tg); + converters[i]->IngestParallel(column, tg, converters[i]); } }