From 74645cd5f4ea1525f5fddfd8219f8fff569c8470 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Mon, 26 Aug 2019 14:33:49 -0700 Subject: [PATCH] Make time/timestamp type factories more human friendly --- r/R/type.R | 76 +++++++++++++++++++++++++++++-- r/man/data-type.Rd | 20 ++++++-- r/tests/testthat/test-data-type.R | 38 ++++++++++++++++ 3 files changed, 124 insertions(+), 10 deletions(-) diff --git a/r/R/type.R b/r/R/type.R index b763ed80ebc..86b888d1cce 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -267,20 +267,28 @@ type.default <- function(x) { #' * Called from `schema()` or `struct()`, `double()` also is supported as a #' way of creating a `float64()` #' -#' @param unit For date/time types, the time unit (day, second, millisecond, etc.) -#' @param timezone For `timestamp()`, an optional time zone. +#' `date32()` creates a datetime type with a "day" unit, like the R `Date` +#' class. `date64()` has a "ms" unit. +#' +#' @param unit For time/timestamp types, the time unit. `time32()` can take +#' either "s" or "ms", while `time64()` can be "us" or "ns". `timestamp()` can +#' take any of those four values. +#' @param timezone For `timestamp()`, an optional time zone string. #' @param precision For `decimal()`, precision #' @param scale For `decimal()`, scale #' @param type For `list_of()`, a data type to make a list-of-type #' @param ... For `struct()`, a named list of types to define the struct columns #' #' @name data-type +#' @return An Arrow type object inheriting from `arrow::DataType`. #' @export #' @seealso [dictionary()] for creating a dictionary (factor-like) type. #' @examples #' \donttest{ #' bool() #' struct(a = int32(), b = double()) +#' timestamp("ms", timezone = "CEST") +#' time64("ns") #' } int8 <- function() shared_ptr(`arrow::Int8`, Int8__initialize()) @@ -358,11 +366,64 @@ date64 <- function() shared_ptr(`arrow::Date64`, Date64__initialize()) #' @rdname data-type #' @export -time32 <- function(unit) shared_ptr(`arrow::Time32`, Time32__initialize(unit)) +time32 <- function(unit = c("ms", "s")) { + if (is.character(unit)) { + unit <- match.arg(unit) + } + unit <- make_valid_time_unit(unit, valid_time32_units) + shared_ptr(`arrow::Time32`, Time32__initialize(unit)) +} + +valid_time32_units <- c( + "ms" = TimeUnit$MILLI, + "s" = TimeUnit$SECOND +) + +valid_time64_units <- c( + "ns" = TimeUnit$NANO, + "us" = TimeUnit$MICRO +) + +make_valid_time_unit <- function(unit, valid_units) { + if (is.character(unit)) { + unit <- valid_units[match.arg(unit, choices = names(valid_units))] + } + if (is.numeric(unit)) { + # Allow non-integer input for convenience + unit <- as.integer(unit) + } else { + stop('"unit" should be one of ', oxford_paste(names(valid_units), "or"), call.=FALSE) + } + if (!(unit %in% valid_units)) { + stop('"unit" should be one of ', oxford_paste(valid_units, "or"), call.=FALSE) + } + unit +} + +oxford_paste <- function(x, conjunction = "and") { + if (is.character(x)) { + x <- paste0('"', x, '"') + } + if (length(x) < 2) { + return(x) + } + x[length(x)] <- paste(conjunction, x[length(x)]) + if (length(x) > 2) { + return(paste(x, collapse = ", ")) + } else { + return(paste(x, collapse = " ")) + } +} #' @rdname data-type #' @export -time64 <- function(unit) shared_ptr(`arrow::Time64`, Time64__initialize(unit)) +time64 <- function(unit = c("ns", "us")) { + if (is.character(unit)) { + unit <- match.arg(unit) + } + unit <- make_valid_time_unit(unit, valid_time64_units) + shared_ptr(`arrow::Time64`, Time64__initialize(unit)) +} #' @rdname data-type #' @export @@ -370,10 +431,15 @@ null <- function() shared_ptr(`arrow::Null`, Null__initialize()) #' @rdname data-type #' @export -timestamp <- function(unit, timezone) { +timestamp <- function(unit = c("s", "ms", "us", "ns"), timezone) { + if (is.character(unit)) { + unit <- match.arg(unit) + } + unit <- make_valid_time_unit(unit, c(valid_time64_units, valid_time32_units)) if (missing(timezone)) { shared_ptr(`arrow::Timestamp`, Timestamp__initialize1(unit)) } else { + assert_that(is.character(timezone), length(timezone) == 1) shared_ptr(`arrow::Timestamp`, Timestamp__initialize2(unit, timezone)) } } diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index 316ff495f2e..a0355cdb1fa 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -68,13 +68,13 @@ date32() date64() -time32(unit) +time32(unit = c("ms", "s")) -time64(unit) +time64(unit = c("ns", "us")) null() -timestamp(unit, timezone) +timestamp(unit = c("s", "ms", "us", "ns"), timezone) decimal(precision, scale) @@ -83,9 +83,11 @@ list_of(type) struct(...) } \arguments{ -\item{unit}{For date/time types, the time unit (day, second, millisecond, etc.)} +\item{unit}{For time/timestamp types, the time unit. \code{time32()} can take +either "s" or "ms", while \code{time64()} can be "us" or "ns". \code{timestamp()} can +take any of those four values.} -\item{timezone}{For \code{timestamp()}, an optional time zone.} +\item{timezone}{For \code{timestamp()}, an optional time zone string.} \item{precision}{For \code{decimal()}, precision} @@ -95,6 +97,9 @@ struct(...) \item{...}{For \code{struct()}, a named list of types to define the struct columns} } +\value{ +An Arrow type object inheriting from \code{arrow::DataType}. +} \description{ These functions create type objects corresponding to Arrow types. Use them when defining a \code{\link[=schema]{schema()}} or as inputs to other types, like \code{struct}. Most @@ -110,11 +115,16 @@ A few functions have aliases: \item Called from \code{schema()} or \code{struct()}, \code{double()} also is supported as a way of creating a \code{float64()} } + +\code{date32()} creates a datetime type with a "day" unit, like the R \code{Date} +class. \code{date64()} has a "ms" unit. } \examples{ \donttest{ bool() struct(a = int32(), b = double()) +timestamp("ms", timezone = "CEST") +time64("ns") } } \seealso{ diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R index dfc0d53f40d..b16da2cca1b 100644 --- a/r/tests/testthat/test-data-type.R +++ b/r/tests/testthat/test-data-type.R @@ -237,6 +237,10 @@ test_that("timestamp type works as expected", { expect_equal(x$unit(), unclass(TimeUnit$NANO)) }) +test_that("timestamp with timezone", { + expect_equal(timestamp(timezone = "EST")$ToString(), "timestamp[s, tz=EST]") +}) + test_that("time32 types work as expected", { x <- time32(TimeUnit$SECOND) expect_equal(x$id, 19L) @@ -285,6 +289,40 @@ test_that("time64 types work as expected", { expect_equal(x$unit(), unclass(TimeUnit$NANO)) }) +test_that("time type unit validation", { + expect_equal(time32(TimeUnit$SECOND), time32("s")) + expect_equal(time32(TimeUnit$MILLI), time32("ms")) + expect_equal(time32(), time32(TimeUnit$MILLI)) + expect_error(time32(4), '"unit" should be one of 1 or 0') + expect_error(time32(NULL), '"unit" should be one of "ms" or "s"') + expect_error(time32("years"), "'arg' should be one of") + + expect_equal(time64(TimeUnit$NANO), time64("n")) + expect_equal(time64(TimeUnit$MICRO), time64("us")) + expect_equal(time64(), time64(TimeUnit$NANO)) + expect_error(time64(4), '"unit" should be one of 3 or 2') + expect_error(time64(NULL), '"unit" should be one of "ns" or "us"') + expect_error(time64("years"), "'arg' should be one of") +}) + +test_that("timestamp type input validation", { + expect_equal(timestamp("ms"), timestamp(TimeUnit$MILLI)) + expect_equal(timestamp(), timestamp(TimeUnit$SECOND)) + expect_error( + timestamp(NULL), + '"unit" should be one of "ns", "us", "ms", or "s"' + ) + expect_error( + timestamp(timezone = 1231231), + "timezone is not a character vector" + ) + expect_error( + timestamp(timezone = c("not", "a", "timezone")), + "length(timezone) not equal to 1", + fixed = TRUE + ) +}) + test_that("list type works as expected", { x <- list_of(int32()) expect_equal(x$id, 23L)