Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ export(decimal)
export(decimal128)
export(default_memory_pool)
export(dictionary)
export(duration)
export(ends_with)
export(everything)
export(field)
Expand Down
8 changes: 8 additions & 0 deletions r/R/arrowExports.R

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions r/R/type.R
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,13 @@ TimeType <- R6Class("TimeType",
Time32 <- R6Class("Time32", inherit = TimeType)
Time64 <- R6Class("Time64", inherit = TimeType)

DurationType <- R6Class("DurationType",
inherit = FixedWidthType,
public = list(
unit = function() DurationType__unit(self)
)
)

Null <- R6Class("Null", inherit = DataType)

Timestamp <- R6Class("Timestamp",
Expand Down Expand Up @@ -334,6 +341,13 @@ valid_time64_units <- c(
"us" = TimeUnit$MICRO
)

valid_duration_units <- c(
"s" = TimeUnit$SECOND,
"ms" = TimeUnit$MILLI,
"us" = TimeUnit$MICRO,
"ns" = TimeUnit$NANO
)

make_valid_time_unit <- function(unit, valid_units) {
if (is.character(unit)) {
unit <- valid_units[match.arg(unit, choices = names(valid_units))]
Expand All @@ -360,6 +374,16 @@ time64 <- function(unit = c("ns", "us")) {
Time64__initialize(unit)
}

#' @rdname data-type
#' @export
duration <- function(unit = c("s", "ms", "us", "ns")) {
if (is.character(unit)) {
unit <- match.arg(unit)
}
unit <- make_valid_time_unit(unit, valid_duration_units)
Duration__initialize(unit)
}

#' @rdname data-type
#' @export
null <- function() Null__initialize()
Expand Down Expand Up @@ -503,6 +527,7 @@ canonical_type_str <- function(type_str) {
large_list = "large_list",
fixed_size_list_of = "fixed_size_list",
fixed_size_list = "fixed_size_list",
duration = "duration",
stop("Unrecognized string representation of data type", call. = FALSE)
)
}
Expand Down
3 changes: 3 additions & 0 deletions r/man/data-type.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

58 changes: 58 additions & 0 deletions r/src/array_to_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -886,6 +886,61 @@ class Converter_Time : public Converter {
}
};

template <typename value_type, typename unit_type = TimeType>
class Converter_Duration : public Converter {
public:
explicit Converter_Duration(const std::shared_ptr<ChunkedArray>& chunked_array)
: Converter(chunked_array) {}

SEXP Allocate(R_xlen_t n) const {
cpp11::writable::doubles data(n);
data.attr("class") = "difftime";

// difftime is always stored as "seconds"
data.attr("units") = cpp11::writable::strings({"secs"});
return data;
}

Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const {
std::fill_n(REAL(data) + start, n, NA_REAL);
return Status::OK();
}

Status Ingest_some_nulls(SEXP data, const std::shared_ptr<arrow::Array>& array,
R_xlen_t start, R_xlen_t n, size_t chunk_index) const {
int multiplier = TimeUnit_multiplier(array);

auto p_data = REAL(data) + start;
auto p_values = array->data()->GetValues<value_type>(1);
auto ingest_one = [&](R_xlen_t i) {
p_data[i] = static_cast<double>(p_values[i]) / multiplier;
return Status::OK();
};
auto null_one = [&](R_xlen_t i) {
p_data[i] = NA_REAL;
return Status::OK();
};
return IngestSome(array, n, ingest_one, null_one);
}

private:
int TimeUnit_multiplier(const std::shared_ptr<Array>& array) const {
// difftime is always "seconds", so multiply based on the Array's TimeUnit
switch (static_cast<unit_type*>(array->type().get())->unit()) {
case TimeUnit::SECOND:
return 1;
case TimeUnit::MILLI:
return 1000;
case TimeUnit::MICRO:
return 1000000;
case TimeUnit::NANO:
return 1000000000;
default:
return 0;
}
}
};

template <typename value_type>
class Converter_Timestamp : public Converter_Time<value_type, TimestampType> {
public:
Expand Down Expand Up @@ -1204,6 +1259,9 @@ std::shared_ptr<Converter> Converter::Make(
case Type::TIME64:
return std::make_shared<arrow::r::Converter_Time<int64_t>>(chunked_array);

case Type::DURATION:
return std::make_shared<arrow::r::Converter_Duration<int64_t>>(chunked_array);

case Type::TIMESTAMP:
return std::make_shared<arrow::r::Converter_Timestamp<int64_t>>(chunked_array);

Expand Down
32 changes: 32 additions & 0 deletions r/src/arrowExports.cpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions r/src/datatype.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ const char* r6_class_name<arrow::DataType>::get(
return "Time32";
case Type::TIME64:
return "Time64";
case Type::DURATION:
return "DurationType";

case Type::DECIMAL128:
return "Decimal128Type";
Expand Down Expand Up @@ -207,6 +209,11 @@ std::shared_ptr<arrow::DataType> Time64__initialize(arrow::TimeUnit::type unit)
return arrow::time64(unit);
}

// [[arrow::export]]
std::shared_ptr<arrow::DataType> Duration__initialize(arrow::TimeUnit::type unit) {
return arrow::duration(unit);
}

// [[arrow::export]]
std::shared_ptr<arrow::DataType> list__(SEXP x) {
if (Rf_inherits(x, "Field")) {
Expand Down Expand Up @@ -309,6 +316,11 @@ arrow::TimeUnit::type TimeType__unit(const std::shared_ptr<arrow::TimeType>& typ
return type->unit();
}

// [[arrow::export]]
arrow::TimeUnit::type DurationType__unit(const std::shared_ptr<arrow::TimeType>& type) {
return type->unit();
}

// [[arrow::export]]
int32_t DecimalType__precision(const std::shared_ptr<arrow::DecimalType>& type) {
return type->precision();
Expand Down
71 changes: 54 additions & 17 deletions r/src/r_to_arrow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ enum RVectorType {
DATE_INT,
DATE_DBL,
TIME,
DURATION,
POSIXCT,
POSIXLT,
BINARY,
Expand Down Expand Up @@ -107,8 +108,10 @@ RVectorType GetVectorType(SEXP x) {
return INT64;
} else if (Rf_inherits(x, "POSIXct")) {
return POSIXCT;
} else if (Rf_inherits(x, "difftime")) {
} else if (Rf_inherits(x, "hms")) {
return TIME;
} else if (Rf_inherits(x, "difftime")) {
return DURATION;
} else {
return FLOAT64;
}
Expand Down Expand Up @@ -580,6 +583,23 @@ int64_t get_TimeUnit_multiplier(TimeUnit::type unit) {
}
}

Result<int> get_difftime_unit_multiplier(SEXP x) {
std::string unit(CHAR(STRING_ELT(Rf_getAttrib(x, symbols::units), 0)));
if (unit == "secs") {
return 1;
} else if (unit == "mins") {
return 60;
} else if (unit == "hours") {
return 3600;
} else if (unit == "days") {
return 86400;
} else if (unit == "weeks") {
return 604800;
} else {
return Status::Invalid("unknown difftime unit");
}
}

template <typename T>
class RPrimitiveConverter<T, enable_if_t<is_time_type<T>::value>>
: public PrimitiveConverter<T, RConverter> {
Expand All @@ -592,21 +612,7 @@ class RPrimitiveConverter<T, enable_if_t<is_time_type<T>::value>>
}

// multiplier to get the number of seconds from the value stored in the R vector
int difftime_multiplier;
std::string unit(CHAR(STRING_ELT(Rf_getAttrib(x, symbols::units), 0)));
if (unit == "secs") {
difftime_multiplier = 1;
} else if (unit == "mins") {
difftime_multiplier = 60;
} else if (unit == "hours") {
difftime_multiplier = 3600;
} else if (unit == "days") {
difftime_multiplier = 86400;
} else if (unit == "weeks") {
difftime_multiplier = 604800;
} else {
return Status::Invalid("unknown difftime unit");
}
ARROW_ASSIGN_OR_RAISE(int difftime_multiplier, get_difftime_unit_multiplier(x));

// then multiply the seconds by this to match the time unit
auto multiplier =
Expand Down Expand Up @@ -822,7 +828,38 @@ class RPrimitiveConverter<T, enable_if_t<is_duration_type<T>::value>>
: public PrimitiveConverter<T, RConverter> {
public:
Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
// TODO: look in lubridate
auto rtype = GetVectorType(x);

// only handle <difftime> R objects
if (rtype == DURATION) {
RETURN_NOT_OK(this->Reserve(size - offset));

ARROW_ASSIGN_OR_RAISE(int difftime_multiplier, get_difftime_unit_multiplier(x));

int64_t multiplier =
get_TimeUnit_multiplier(this->primitive_type_->unit()) * difftime_multiplier;

auto append_value = [this, multiplier](double value) {
auto converted = static_cast<typename T::c_type>(value * multiplier);
this->primitive_builder_->UnsafeAppend(converted);
return Status::OK();
};
auto append_null = [this]() {
this->primitive_builder_->UnsafeAppendNull();
return Status::OK();
};

if (ALTREP(x)) {
return VisitVector(RVectorIterator_ALTREP<double>(x, offset), size, append_null,
append_value);
} else {
return VisitVector(RVectorIterator<double>(x, offset), size, append_null,
append_value);
}

return Status::OK();
}

return Status::NotImplemented("Extend");
}
};
Expand Down
5 changes: 4 additions & 1 deletion r/src/type_infer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,12 @@ std::shared_ptr<arrow::DataType> InferArrowTypeFromVector<REALSXP>(SEXP x) {
if (Rf_inherits(x, "integer64")) {
return int64();
}
if (Rf_inherits(x, "difftime")) {
if (Rf_inherits(x, "hms")) {
return time32(TimeUnit::SECOND);
}
if (Rf_inherits(x, "difftime")) {
return duration(TimeUnit::SECOND);
}
return float64();
}

Expand Down
Loading