From 7c05a64b61f178b10394db1280d99610ddddf229 Mon Sep 17 00:00:00 2001 From: Rok Date: Thu, 24 Jun 2021 23:56:40 +0200 Subject: [PATCH 01/15] Adding TemporalOptions. --- .../arrow/compute/kernels/scalar_temporal.cc | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_temporal.cc index 1694d22ffae..0a8591cae79 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal.cc @@ -35,6 +35,7 @@ using arrow_vendored::date::days; using arrow_vendored::date::floor; using arrow_vendored::date::hh_mm_ss; using arrow_vendored::date::sys_time; +using arrow_vendored::date::time_zone; using arrow_vendored::date::trunc; using arrow_vendored::date::weekday; using arrow_vendored::date::weeks; @@ -80,6 +81,39 @@ struct TemporalComponentExtract { } }; +/// \addtogroup compute-concrete-options +/// @{ + +/// \brief Control behavior of temporal kernels +/// +/// Used to control timestamp localization and handling ambiguous/nonexistent times. +struct ARROW_EXPORT TemporalOptions : public FunctionOptions { + /// How to interpret ambiguous local times that can be interpreted as + /// multiple instants due to DST shifts. + enum Ambiguous { RAISE_AMBIGUOUS = 0, INFER, IGNORE_AMBIGUOUS }; + + /// How to handle local times that do not exists due to DST shifts. + enum Nonexistent { + RAISE_NONEXISTENT = 0, + SHIFT_FORWARD, + SHIFT_BACKWARD, + IGNORE_NONEXISTENT + }; + + explicit TemporalOptions(const int64_t index_of_monday = 1, + const time_zone* tz = nullptr, + const Ambiguous ambiguous = RAISE_AMBIGUOUS, + const Nonexistent nonexistent = RAISE_NONEXISTENT) + : tz(tz), ambiguous(ambiguous), nonexistent(nonexistent) {} + + static TemporalOptions Defaults() { return TemporalOptions{}; } + + const int64_t index_of_monday = 0; + const time_zone* tz; + const enum Ambiguous ambiguous; + const enum Nonexistent nonexistent; +}; + // ---------------------------------------------------------------------- // Extract year from timestamp From 4bc1fdc13d4e6f41b341d17cf12b52234a054d09 Mon Sep 17 00:00:00 2001 From: Rok Date: Mon, 28 Jun 2021 17:01:00 +0200 Subject: [PATCH 02/15] Renaming to TemporalComponentExtractionOptions. --- cpp/src/arrow/compute/api_scalar.cc | 6 +- cpp/src/arrow/compute/api_scalar.h | 17 ++- .../arrow/compute/kernels/scalar_temporal.cc | 106 ++++++++++++------ .../compute/kernels/scalar_temporal_test.cc | 7 ++ docs/source/cpp/compute.rst | 73 ++++++------ 5 files changed, 135 insertions(+), 74 deletions(-) diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc index 2021c8a30c6..6f320843629 100644 --- a/cpp/src/arrow/compute/api_scalar.cc +++ b/cpp/src/arrow/compute/api_scalar.cc @@ -458,7 +458,6 @@ Result IfElse(const Datum& cond, const Datum& if_true, const Datum& if_fa SCALAR_EAGER_UNARY(Year, "year") SCALAR_EAGER_UNARY(Month, "month") SCALAR_EAGER_UNARY(Day, "day") -SCALAR_EAGER_UNARY(DayOfWeek, "day_of_week") SCALAR_EAGER_UNARY(DayOfYear, "day_of_year") SCALAR_EAGER_UNARY(ISOYear, "iso_year") SCALAR_EAGER_UNARY(ISOWeek, "iso_week") @@ -472,5 +471,10 @@ SCALAR_EAGER_UNARY(Microsecond, "microsecond") SCALAR_EAGER_UNARY(Nanosecond, "nanosecond") SCALAR_EAGER_UNARY(Subsecond, "subsecond") +Result DayOfWeek(const Datum& arg, TemporalComponentExtractionOptions options, + ExecContext* ctx) { + return CallFunction("day_of_week", {arg}, &options, ctx); +} + } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 89b4faca940..4799ab3aa33 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -244,6 +244,18 @@ class ARROW_EXPORT ProjectOptions : public FunctionOptions { std::vector> field_metadata; }; +struct ARROW_EXPORT TemporalComponentExtractionOptions : public FunctionOptions { + explicit TemporalComponentExtractionOptions(int64_t start_index = 0) + : start_index(start_index) {} + + static TemporalComponentExtractionOptions Defaults() { + return TemporalComponentExtractionOptions{}; + } + + /// Index of the first day of the week. + int64_t start_index; +}; + /// @} /// \brief Get the absolute value of a value. Array values can be of arbitrary @@ -721,7 +733,10 @@ Result Day(const Datum& values, ExecContext* ctx = NULLPTR); /// /// \since 5.0.0 /// \note API not yet finalized -ARROW_EXPORT Result DayOfWeek(const Datum& values, ExecContext* ctx = NULLPTR); +ARROW_EXPORT Result DayOfWeek(const Datum& values, + TemporalComponentExtractionOptions options = + TemporalComponentExtractionOptions::Defaults(), + ExecContext* ctx = NULLPTR); /// \brief DayOfYear returns number of day of the year for each element of `values`. /// January 1st maps to day number 1, February 1st to 32, etc. diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_temporal.cc index 0a8591cae79..935091fa8f7 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal.cc @@ -16,6 +16,7 @@ // under the License. #include "arrow/builder.h" +#include "arrow/compute/api_scalar.h" #include "arrow/compute/kernels/common.h" #include "arrow/util/checked_cast.h" #include "arrow/util/time.h" @@ -49,6 +50,8 @@ using arrow_vendored::date::literals::thu; using internal::applicator::ScalarUnaryNotNull; using internal::applicator::SimpleUnary; +using TemporalComponentExtractState = OptionsWrapper; + const std::string& GetInputTimezone(const Datum& datum) { return checked_cast(*datum.type()).timezone(); } @@ -81,37 +84,18 @@ struct TemporalComponentExtract { } }; -/// \addtogroup compute-concrete-options -/// @{ - -/// \brief Control behavior of temporal kernels -/// -/// Used to control timestamp localization and handling ambiguous/nonexistent times. -struct ARROW_EXPORT TemporalOptions : public FunctionOptions { - /// How to interpret ambiguous local times that can be interpreted as - /// multiple instants due to DST shifts. - enum Ambiguous { RAISE_AMBIGUOUS = 0, INFER, IGNORE_AMBIGUOUS }; - - /// How to handle local times that do not exists due to DST shifts. - enum Nonexistent { - RAISE_NONEXISTENT = 0, - SHIFT_FORWARD, - SHIFT_BACKWARD, - IGNORE_NONEXISTENT - }; - - explicit TemporalOptions(const int64_t index_of_monday = 1, - const time_zone* tz = nullptr, - const Ambiguous ambiguous = RAISE_AMBIGUOUS, - const Nonexistent nonexistent = RAISE_NONEXISTENT) - : tz(tz), ambiguous(ambiguous), nonexistent(nonexistent) {} - - static TemporalOptions Defaults() { return TemporalOptions{}; } - - const int64_t index_of_monday = 0; - const time_zone* tz; - const enum Ambiguous ambiguous; - const enum Nonexistent nonexistent; +template +struct TemporalComponentExtractWithOptions { + using OutValue = typename internal::GetOutputType::T; + + static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { + TemporalComponentExtractionOptions options = TemporalComponentExtractState::Get(ctx); + + RETURN_NOT_OK(TemporalComponentExtractCheckTimezone(batch.values[0])); + applicator::ScalarUnaryNotNullStateful kernel{ + Op(options)}; + return kernel.Exec(ctx, batch, out); + } }; // ---------------------------------------------------------------------- @@ -155,13 +139,16 @@ struct Day { template struct DayOfWeek { + explicit DayOfWeek(TemporalComponentExtractionOptions options) : options(options) {} + template - static T Call(KernelContext*, Arg0 arg, Status*) { + T Call(KernelContext*, Arg0 arg, Status*) const { return static_cast( weekday(year_month_day(floor(sys_time(Duration{arg})))) .iso_encoding() - - 1); + 1 + options.start_index); } + TemporalComponentExtractionOptions options; }; // ---------------------------------------------------------------------- @@ -432,6 +419,46 @@ std::shared_ptr MakeTemporal(std::string name, const FunctionDoc return func; } +template