From 3d0ad954217a5d2f0f1b98df08439a14294264cc Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 3 Dec 2021 13:37:05 -0400 Subject: [PATCH 01/12] add duration type --- r/NAMESPACE | 1 + r/R/arrowExports.R | 8 ++++++++ r/R/type.R | 25 ++++++++++++++++++++++++ r/man/data-type.Rd | 3 +++ r/src/arrowExports.cpp | 32 +++++++++++++++++++++++++++++++ r/src/datatype.cpp | 12 ++++++++++++ r/tests/testthat/test-data-type.R | 31 ++++++++++++++++++++++++++++++ 7 files changed, 112 insertions(+) diff --git a/r/NAMESPACE b/r/NAMESPACE index df4f36408e3..6b392240d30 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -217,6 +217,7 @@ export(decimal) export(decimal128) export(default_memory_pool) export(dictionary) +export(duration) export(ends_with) export(everything) export(field) diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index c1e0fca788d..dc8afbff42a 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -712,6 +712,10 @@ Time64__initialize <- function(unit) { .Call(`_arrow_Time64__initialize`, unit) } +Duration__initialize <- function(unit) { + .Call(`_arrow_Duration__initialize`, unit) +} + list__ <- function(x) { .Call(`_arrow_list__`, x) } @@ -768,6 +772,10 @@ TimeType__unit <- function(type) { .Call(`_arrow_TimeType__unit`, type) } +DurationType__unit <- function(type) { + .Call(`_arrow_DurationType__unit`, type) +} + DecimalType__precision <- function(type) { .Call(`_arrow_DecimalType__precision`, type) } diff --git a/r/R/type.R b/r/R/type.R index 60f0045e514..9fe5f15d21b 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -133,6 +133,13 @@ TimeType <- R6Class("TimeType", Time32 <- R6Class("Time32", inherit = TimeType) Time64 <- R6Class("Time64", inherit = TimeType) +DurationType <- R6Class("DurationType", + inherit = FixedWidthType, + public = list( + unit = function() DurationType__unit(self) + ) +) + Null <- R6Class("Null", inherit = DataType) Timestamp <- R6Class("Timestamp", @@ -334,6 +341,13 @@ valid_time64_units <- c( "us" = TimeUnit$MICRO ) +valid_duration_units <- c( + "s" = TimeUnit$SECOND, + "ms" = TimeUnit$MILLI, + "us" = TimeUnit$MICRO, + "ns" = TimeUnit$NANO +) + make_valid_time_unit <- function(unit, valid_units) { if (is.character(unit)) { unit <- valid_units[match.arg(unit, choices = names(valid_units))] @@ -360,6 +374,16 @@ time64 <- function(unit = c("ns", "us")) { Time64__initialize(unit) } +#' @rdname data-type +#' @export +duration <- function(unit = c("s", "ms", "us", "ns")) { + if (is.character(unit)) { + unit <- match.arg(unit) + } + unit <- make_valid_time_unit(unit, valid_duration_units) + Duration__initialize(unit) +} + #' @rdname data-type #' @export null <- function() Null__initialize() @@ -503,6 +527,7 @@ canonical_type_str <- function(type_str) { large_list = "large_list", fixed_size_list_of = "fixed_size_list", fixed_size_list = "fixed_size_list", + duration = "duration", stop("Unrecognized string representation of data type", call. = FALSE) ) } diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index 2b1c4bbff25..8761d5237a3 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -27,6 +27,7 @@ \alias{date64} \alias{time32} \alias{time64} +\alias{duration} \alias{null} \alias{timestamp} \alias{decimal128} @@ -88,6 +89,8 @@ time32(unit = c("ms", "s")) time64(unit = c("ns", "us")) +duration(unit = c("s", "ms", "us", "ns")) + null() timestamp(unit = c("s", "ms", "us", "ns"), timezone = "") diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index ef388b09208..cb0dc5097d4 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -2791,6 +2791,21 @@ extern "C" SEXP _arrow_Time64__initialize(SEXP unit_sexp){ } #endif +// datatype.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr Duration__initialize(arrow::TimeUnit::type unit); +extern "C" SEXP _arrow_Duration__initialize(SEXP unit_sexp){ +BEGIN_CPP11 + arrow::r::Input::type unit(unit_sexp); + return cpp11::as_sexp(Duration__initialize(unit)); +END_CPP11 +} +#else +extern "C" SEXP _arrow_Duration__initialize(SEXP unit_sexp){ + Rf_error("Cannot call Duration__initialize(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // datatype.cpp #if defined(ARROW_R_WITH_ARROW) std::shared_ptr list__(SEXP x); @@ -3003,6 +3018,21 @@ extern "C" SEXP _arrow_TimeType__unit(SEXP type_sexp){ } #endif +// datatype.cpp +#if defined(ARROW_R_WITH_ARROW) +arrow::TimeUnit::type DurationType__unit(const std::shared_ptr& type); +extern "C" SEXP _arrow_DurationType__unit(SEXP type_sexp){ +BEGIN_CPP11 + arrow::r::Input&>::type type(type_sexp); + return cpp11::as_sexp(DurationType__unit(type)); +END_CPP11 +} +#else +extern "C" SEXP _arrow_DurationType__unit(SEXP type_sexp){ + Rf_error("Cannot call DurationType__unit(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // datatype.cpp #if defined(ARROW_R_WITH_ARROW) int32_t DecimalType__precision(const std::shared_ptr& type); @@ -7349,6 +7379,7 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_Timestamp__initialize", (DL_FUNC) &_arrow_Timestamp__initialize, 2}, { "_arrow_Time32__initialize", (DL_FUNC) &_arrow_Time32__initialize, 1}, { "_arrow_Time64__initialize", (DL_FUNC) &_arrow_Time64__initialize, 1}, + { "_arrow_Duration__initialize", (DL_FUNC) &_arrow_Duration__initialize, 1}, { "_arrow_list__", (DL_FUNC) &_arrow_list__, 1}, { "_arrow_large_list__", (DL_FUNC) &_arrow_large_list__, 1}, { "_arrow_fixed_size_list__", (DL_FUNC) &_arrow_fixed_size_list__, 2}, @@ -7363,6 +7394,7 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_FixedWidthType__bit_width", (DL_FUNC) &_arrow_FixedWidthType__bit_width, 1}, { "_arrow_DateType__unit", (DL_FUNC) &_arrow_DateType__unit, 1}, { "_arrow_TimeType__unit", (DL_FUNC) &_arrow_TimeType__unit, 1}, + { "_arrow_DurationType__unit", (DL_FUNC) &_arrow_DurationType__unit, 1}, { "_arrow_DecimalType__precision", (DL_FUNC) &_arrow_DecimalType__precision, 1}, { "_arrow_DecimalType__scale", (DL_FUNC) &_arrow_DecimalType__scale, 1}, { "_arrow_TimestampType__timezone", (DL_FUNC) &_arrow_TimestampType__timezone, 1}, diff --git a/r/src/datatype.cpp b/r/src/datatype.cpp index bfc6687ed5b..17e4eafe824 100644 --- a/r/src/datatype.cpp +++ b/r/src/datatype.cpp @@ -79,6 +79,8 @@ const char* r6_class_name::get( return "Time32"; case Type::TIME64: return "Time64"; + case Type::DURATION: + return "DurationType"; case Type::DECIMAL128: return "Decimal128Type"; @@ -207,6 +209,11 @@ std::shared_ptr Time64__initialize(arrow::TimeUnit::type unit) return arrow::time64(unit); } +// [[arrow::export]] +std::shared_ptr Duration__initialize(arrow::TimeUnit::type unit) { + return arrow::duration(unit); +} + // [[arrow::export]] std::shared_ptr list__(SEXP x) { if (Rf_inherits(x, "Field")) { @@ -309,6 +316,11 @@ arrow::TimeUnit::type TimeType__unit(const std::shared_ptr& typ return type->unit(); } +// [[arrow::export]] +arrow::TimeUnit::type DurationType__unit(const std::shared_ptr& type) { + return type->unit(); +} + // [[arrow::export]] int32_t DecimalType__precision(const std::shared_ptr& type) { return type->precision(); diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R index d2795722928..6a88d72be75 100644 --- a/r/tests/testthat/test-data-type.R +++ b/r/tests/testthat/test-data-type.R @@ -287,6 +287,30 @@ test_that("time64 types work as expected", { expect_equal(x$unit(), unclass(TimeUnit$NANO)) }) +test_that("duration types work as expected", { + x <- duration(TimeUnit$MICRO) + expect_equal(x$id, Type$DURATION) + expect_equal(x$name, "duration") + expect_equal(x$ToString(), "duration[us]") + expect_true(x == x) + expect_false(x == null()) + expect_equal(x$num_fields, 0L) + expect_equal(x$fields(), list()) + expect_equal(x$bit_width, 64L) + expect_equal(x$unit(), unclass(TimeUnit$MICRO)) + + x <- duration(TimeUnit$NANO) + expect_equal(x$id, Type$DURATION) + expect_equal(x$name, "duration") + expect_equal(x$ToString(), "duration[ns]") + expect_true(x == x) + expect_false(x == null()) + expect_equal(x$num_fields, 0L) + expect_equal(x$fields(), list()) + expect_equal(x$bit_width, 64L) + expect_equal(x$unit(), unclass(TimeUnit$NANO)) +}) + test_that("time type unit validation", { expect_equal(time32(TimeUnit$SECOND), time32("s")) expect_equal(time32(TimeUnit$MILLI), time32("ms")) @@ -301,6 +325,13 @@ test_that("time type unit validation", { expect_error(time64(4), '"unit" should be one of 3 or 2') expect_error(time64(NULL), '"unit" should be one of "ns" or "us"') expect_match_arg_error(time64("years")) + + expect_equal(duration(TimeUnit$NANO), duration("n")) + expect_equal(duration(TimeUnit$MICRO), duration("us")) + expect_equal(duration(), duration(TimeUnit$SECOND)) + expect_error(duration(4), '"unit" should be one of 0, 1, 2, or 3') + expect_error(duration(NULL), '"unit" should be one of "s", "ms", "us", or "ns"') + expect_match_arg_error(duration("years")) }) test_that("timestamp type input validation", { From 59bdaa1ae90cc92f8475f461e5a0b659b5fdc181 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 3 Dec 2021 14:07:56 -0400 Subject: [PATCH 02/12] test more common case for duration unit --- r/tests/testthat/test-data-type.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R index 6a88d72be75..0ebadd1d92a 100644 --- a/r/tests/testthat/test-data-type.R +++ b/r/tests/testthat/test-data-type.R @@ -299,16 +299,16 @@ test_that("duration types work as expected", { expect_equal(x$bit_width, 64L) expect_equal(x$unit(), unclass(TimeUnit$MICRO)) - x <- duration(TimeUnit$NANO) + x <- duration(TimeUnit$SECOND) expect_equal(x$id, Type$DURATION) expect_equal(x$name, "duration") - expect_equal(x$ToString(), "duration[ns]") + expect_equal(x$ToString(), "duration[s]") expect_true(x == x) expect_false(x == null()) expect_equal(x$num_fields, 0L) expect_equal(x$fields(), list()) expect_equal(x$bit_width, 64L) - expect_equal(x$unit(), unclass(TimeUnit$NANO)) + expect_equal(x$unit(), unclass(TimeUnit$SECOND)) }) test_that("time type unit validation", { From 8f5739de2994c6c3fb07cdc3e6b4c25bfabfe94c Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 3 Dec 2021 14:25:02 -0400 Subject: [PATCH 03/12] add some C++ infrastructue for conversion --- r/src/array_to_vector.cpp | 58 +++++++++++++++++++++++++++++++++++++++ r/src/r_to_arrow.cpp | 9 ++++-- r/src/type_infer.cpp | 5 +++- 3 files changed, 68 insertions(+), 4 deletions(-) diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index 3ac32d7c395..69d20eb31e2 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -886,6 +886,61 @@ class Converter_Time : public Converter { } }; +template +class Converter_Duration : public Converter { +public: + explicit Converter_Duration(const std::shared_ptr& chunked_array) + : Converter(chunked_array) {} + + SEXP Allocate(R_xlen_t n) const { + cpp11::writable::doubles data(n); + data.attr("class") = "difftime"; + + // difftime is always stored as "seconds" + data.attr("units") = cpp11::writable::strings({"secs"}); + return data; + } + + Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const { + std::fill_n(REAL(data) + start, n, NA_REAL); + return Status::OK(); + } + + Status Ingest_some_nulls(SEXP data, const std::shared_ptr& array, + R_xlen_t start, R_xlen_t n, size_t chunk_index) const { + int multiplier = TimeUnit_multiplier(array); + + auto p_data = REAL(data) + start; + auto p_values = array->data()->GetValues(1); + auto ingest_one = [&](R_xlen_t i) { + p_data[i] = static_cast(p_values[i]) / multiplier; + return Status::OK(); + }; + auto null_one = [&](R_xlen_t i) { + p_data[i] = NA_REAL; + return Status::OK(); + }; + return IngestSome(array, n, ingest_one, null_one); + } + +private: + int TimeUnit_multiplier(const std::shared_ptr& array) const { + // difftime is always "seconds", so multiply based on the Array's TimeUnit + switch (static_cast(array->type().get())->unit()) { + case TimeUnit::SECOND: + return 1; + case TimeUnit::MILLI: + return 1000; + case TimeUnit::MICRO: + return 1000000; + case TimeUnit::NANO: + return 1000000000; + default: + return 0; + } + } +}; + template class Converter_Timestamp : public Converter_Time { public: @@ -1204,6 +1259,9 @@ std::shared_ptr Converter::Make( case Type::TIME64: return std::make_shared>(chunked_array); + case Type::DURATION: + return std::make_shared>(chunked_array); + case Type::TIMESTAMP: return std::make_shared>(chunked_array); diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 0a600501c7c..489cab80f60 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -71,6 +71,7 @@ enum RVectorType { DATE_INT, DATE_DBL, TIME, + DURATION, POSIXCT, POSIXLT, BINARY, @@ -107,9 +108,11 @@ RVectorType GetVectorType(SEXP x) { return INT64; } else if (Rf_inherits(x, "POSIXct")) { return POSIXCT; - } else if (Rf_inherits(x, "difftime")) { + } else if (Rf_inherits(x, "hms")) { return TIME; - } else { + } else if (Rf_inherits(x, "difftime")) { + return DURATION; + } else { return FLOAT64; } } @@ -587,7 +590,7 @@ class RPrimitiveConverter::value>> Status Extend(SEXP x, int64_t size, int64_t offset = 0) override { RETURN_NOT_OK(this->Reserve(size - offset)); auto rtype = GetVectorType(x); - if (rtype != TIME) { + if (rtype != TIME && rtype != DURATION) { return Status::Invalid("Invalid conversion to time"); } diff --git a/r/src/type_infer.cpp b/r/src/type_infer.cpp index 1471a000d57..2568757aa2d 100644 --- a/r/src/type_infer.cpp +++ b/r/src/type_infer.cpp @@ -96,9 +96,12 @@ std::shared_ptr InferArrowTypeFromVector(SEXP x) { if (Rf_inherits(x, "integer64")) { return int64(); } - if (Rf_inherits(x, "difftime")) { + if (Rf_inherits(x, "hms")) { return time32(TimeUnit::SECOND); } + if (Rf_inherits(x, "difftime")) { + return duration(TimeUnit::SECOND); + } return float64(); } From e49c6abe725b4413ce9411d9b8a8ba843f18f8cb Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 3 Dec 2021 14:25:22 -0400 Subject: [PATCH 04/12] add failing test for difftime roundtrip --- r/tests/testthat/test-Array.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index ce23c260908..0db40b81920 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -303,12 +303,18 @@ test_that("array supports integer64", { expect_true(as.vector(is.na(all_na))) }) -test_that("array supports difftime", { +test_that("array supports hms difftime", { time <- hms::hms(56, 34, 12) expect_array_roundtrip(c(time, time), time32("s")) expect_array_roundtrip(vctrs::vec_c(NA, time), time32("s")) }) +test_that("array supports difftime", { + time <- as.difftime(1234, units = "secs") + expect_array_roundtrip(c(time, time), duration("s")) + expect_array_roundtrip(vctrs::vec_c(NA, time), duration("s")) +}) + test_that("support for NaN (ARROW-3615)", { x <- c(1, NA, NaN, -1) y <- Array$create(x) From 642306bcac41ba782c429aef17acf4d0a20c5d44 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 3 Dec 2021 14:25:40 -0400 Subject: [PATCH 05/12] fix table to update type for difftime and add hms --- r/vignettes/arrow.Rmd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/r/vignettes/arrow.Rmd b/r/vignettes/arrow.Rmd index 79f3a47fa14..7a7d40a15b3 100644 --- a/r/vignettes/arrow.Rmd +++ b/r/vignettes/arrow.Rmd @@ -135,7 +135,8 @@ In the tables, entries with a `-` are not currently implemented. | data.frame | struct | | list^3^ | list | | bit64::integer64 | int64 | -| difftime | time32 | +| hms | time32 | +| difftime | duration | | vctrs::vctrs_unspecified | null | From 5d54c6ea057059f14e744eff4d842107eb046fac Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 9 Dec 2021 09:07:33 -0400 Subject: [PATCH 06/12] add hms package name to hms class --- r/vignettes/arrow.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/vignettes/arrow.Rmd b/r/vignettes/arrow.Rmd index 7a7d40a15b3..d1bfd0b0eed 100644 --- a/r/vignettes/arrow.Rmd +++ b/r/vignettes/arrow.Rmd @@ -135,7 +135,7 @@ In the tables, entries with a `-` are not currently implemented. | data.frame | struct | | list^3^ | list | | bit64::integer64 | int64 | -| hms | time32 | +| hms::hms | time32 | | difftime | duration | | vctrs::vctrs_unspecified | null | From 279bc78a79f6220e3d87ada89edb0fdcfe9d204b Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Wed, 8 Dec 2021 14:27:57 +0100 Subject: [PATCH 07/12] RPrimitiveConverter::Extend() --- r/src/r_to_arrow.cpp | 65 +++++++++++++++++++++++++++++++++----------- 1 file changed, 49 insertions(+), 16 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 489cab80f60..8fd224836b3 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -583,6 +583,23 @@ int64_t get_TimeUnit_multiplier(TimeUnit::type unit) { } } +Result get_difftime_unit_multiplier(SEXP x) { + std::string unit(CHAR(STRING_ELT(Rf_getAttrib(x, symbols::units), 0))); + if (unit == "secs") { + return 1; + } else if (unit == "mins") { + return 60; + } else if (unit == "hours") { + return 3600; + } else if (unit == "days") { + return 86400; + } else if (unit == "weeks") { + return 604800; + } else { + return Status::Invalid("unknown difftime unit"); + } +} + template class RPrimitiveConverter::value>> : public PrimitiveConverter { @@ -595,21 +612,7 @@ class RPrimitiveConverter::value>> } // multiplier to get the number of seconds from the value stored in the R vector - int difftime_multiplier; - std::string unit(CHAR(STRING_ELT(Rf_getAttrib(x, symbols::units), 0))); - if (unit == "secs") { - difftime_multiplier = 1; - } else if (unit == "mins") { - difftime_multiplier = 60; - } else if (unit == "hours") { - difftime_multiplier = 3600; - } else if (unit == "days") { - difftime_multiplier = 86400; - } else if (unit == "weeks") { - difftime_multiplier = 604800; - } else { - return Status::Invalid("unknown difftime unit"); - } + ARROW_ASSIGN_OR_RAISE(int difftime_multiplier, get_difftime_unit_multiplier(x)); // then multiply the seconds by this to match the time unit auto multiplier = @@ -825,7 +828,37 @@ class RPrimitiveConverter::value>> : public PrimitiveConverter { public: Status Extend(SEXP x, int64_t size, int64_t offset = 0) override { - // TODO: look in lubridate + auto rtype = GetVectorType(x); + + // only handle R objects + if (rtype == DURATION) { + RETURN_NOT_OK(this->Reserve(size - offset)); + + ARROW_ASSIGN_OR_RAISE(int difftime_multiplier, get_difftime_unit_multiplier(x)); + + int64_t multiplier = get_TimeUnit_multiplier(this->primitive_type_->unit()) * difftime_multiplier; + + auto append_value = [this, multiplier](double value) { + auto converted = static_cast(value * multiplier); + this->primitive_builder_->UnsafeAppend(converted); + return Status::OK(); + }; + auto append_null = [this]() { + this->primitive_builder_->UnsafeAppendNull(); + return Status::OK(); + }; + + if (ALTREP(x)) { + return VisitVector(RVectorIterator_ALTREP(x, offset), size, append_null, + append_value); + } else { + return VisitVector(RVectorIterator(x, offset), size, append_null, + append_value); + } + + return Status::OK(); + } + return Status::NotImplemented("Extend"); } }; From bcb1ce9ca0d8a7792e4d61f4e35f1d6fd7967955 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 10 Dec 2021 11:20:12 +0100 Subject: [PATCH 08/12] comment --- r/src/r_to_arrow.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 8fd224836b3..51483cdf861 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -607,6 +607,7 @@ class RPrimitiveConverter::value>> Status Extend(SEXP x, int64_t size, int64_t offset = 0) override { RETURN_NOT_OK(this->Reserve(size - offset)); auto rtype = GetVectorType(x); + // This probably also not accept duration if (rtype != TIME && rtype != DURATION) { return Status::Invalid("Invalid conversion to time"); } From 3073ad1ea578ebd42ba493d9ac2a2ac8ad715710 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 10 Dec 2021 09:23:43 -0400 Subject: [PATCH 09/12] run clang-format --- r/src/array_to_vector.cpp | 26 +++++++++++++------------- r/src/r_to_arrow.cpp | 5 +++-- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index 69d20eb31e2..7cf861a5448 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -888,9 +888,9 @@ class Converter_Time : public Converter { template class Converter_Duration : public Converter { -public: + public: explicit Converter_Duration(const std::shared_ptr& chunked_array) - : Converter(chunked_array) {} + : Converter(chunked_array) {} SEXP Allocate(R_xlen_t n) const { cpp11::writable::doubles data(n); @@ -923,20 +923,20 @@ class Converter_Duration : public Converter { return IngestSome(array, n, ingest_one, null_one); } -private: + private: int TimeUnit_multiplier(const std::shared_ptr& array) const { // difftime is always "seconds", so multiply based on the Array's TimeUnit switch (static_cast(array->type().get())->unit()) { - case TimeUnit::SECOND: - return 1; - case TimeUnit::MILLI: - return 1000; - case TimeUnit::MICRO: - return 1000000; - case TimeUnit::NANO: - return 1000000000; - default: - return 0; + case TimeUnit::SECOND: + return 1; + case TimeUnit::MILLI: + return 1000; + case TimeUnit::MICRO: + return 1000000; + case TimeUnit::NANO: + return 1000000000; + default: + return 0; } } }; diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 51483cdf861..21d7d7088f7 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -112,7 +112,7 @@ RVectorType GetVectorType(SEXP x) { return TIME; } else if (Rf_inherits(x, "difftime")) { return DURATION; - } else { + } else { return FLOAT64; } } @@ -837,7 +837,8 @@ class RPrimitiveConverter::value>> ARROW_ASSIGN_OR_RAISE(int difftime_multiplier, get_difftime_unit_multiplier(x)); - int64_t multiplier = get_TimeUnit_multiplier(this->primitive_type_->unit()) * difftime_multiplier; + int64_t multiplier = + get_TimeUnit_multiplier(this->primitive_type_->unit()) * difftime_multiplier; auto append_value = [this, multiplier](double value) { auto converted = static_cast(value * multiplier); From 32623e3a8ca019f43d0da2b96c4abdd3007f32fe Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 10 Dec 2021 11:23:45 -0400 Subject: [PATCH 10/12] Update r/src/r_to_arrow.cpp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Romain François --- r/src/r_to_arrow.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 21d7d7088f7..c05f16695fd 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -608,7 +608,7 @@ class RPrimitiveConverter::value>> RETURN_NOT_OK(this->Reserve(size - offset)); auto rtype = GetVectorType(x); // This probably also not accept duration - if (rtype != TIME && rtype != DURATION) { + if (rtype != TIME) { return Status::Invalid("Invalid conversion to time"); } From 6789f35830478171eff650a7589e62d4c55043e1 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 10 Dec 2021 11:49:45 -0400 Subject: [PATCH 11/12] remove comment --- r/src/r_to_arrow.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index c05f16695fd..8139755ce42 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -607,7 +607,6 @@ class RPrimitiveConverter::value>> Status Extend(SEXP x, int64_t size, int64_t offset = 0) override { RETURN_NOT_OK(this->Reserve(size - offset)); auto rtype = GetVectorType(x); - // This probably also not accept duration if (rtype != TIME) { return Status::Invalid("Invalid conversion to time"); } From 45024c5158c93f7f6ad3607fe7d54a8233af669d Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 10 Dec 2021 12:05:32 -0400 Subject: [PATCH 12/12] more tests --- r/tests/testthat/test-chunked-array.R | 7 ++++++- r/tests/testthat/test-type.R | 4 ++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R index 73e536514a2..6ecca378037 100644 --- a/r/tests/testthat/test-chunked-array.R +++ b/r/tests/testthat/test-chunked-array.R @@ -196,11 +196,16 @@ test_that("ChunkedArray supports integer64 (ARROW-3716)", { expect_identical(as.vector(ca), c(bit64::as.integer64(0L), x)) }) -test_that("ChunkedArray supports difftime", { +test_that("ChunkedArray supports hms", { time <- hms::hms(56, 34, 12) expect_chunked_roundtrip(list(time, time), time32("s")) }) +test_that("ChunkedArray supports difftime", { + dur <- as.difftime(123, units = "secs") + expect_chunked_roundtrip(list(dur, dur), duration(unit = "s")) +}) + test_that("ChunkedArray supports empty arrays (ARROW-13761)", { types <- c( int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(), diff --git a/r/tests/testthat/test-type.R b/r/tests/testthat/test-type.R index 8046ebbaa57..887c7c3e494 100644 --- a/r/tests/testthat/test-type.R +++ b/r/tests/testthat/test-type.R @@ -44,6 +44,10 @@ test_that("type() infers from R type", { type(hms::hms(56, 34, 12)), time32(unit = TimeUnit$SECOND) ) + expect_equal( + type(as.difftime(123, units = "days")), + duration(unit = TimeUnit$SECOND) + ) expect_equal( type(bit64::integer64()), int64()