diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc index 2021c8a30c6..1aae7988a67 100644 --- a/cpp/src/arrow/compute/api_scalar.cc +++ b/cpp/src/arrow/compute/api_scalar.cc @@ -158,6 +158,9 @@ static auto kProjectOptionsType = GetFunctionOptionsType( DataMember("field_names", &ProjectOptions::field_names), DataMember("field_nullability", &ProjectOptions::field_nullability), DataMember("field_metadata", &ProjectOptions::field_metadata)); +static auto kDayOfWeekOptionsType = GetFunctionOptionsType( + DataMember("one_based_numbering", &DayOfWeekOptions::one_based_numbering), + DataMember("week_start", &DayOfWeekOptions::week_start)); } // namespace } // namespace internal @@ -278,6 +281,12 @@ ProjectOptions::ProjectOptions(std::vector n) ProjectOptions::ProjectOptions() : ProjectOptions(std::vector()) {} constexpr char ProjectOptions::kTypeName[]; +DayOfWeekOptions::DayOfWeekOptions(bool one_based_numbering, uint32_t week_start) + : FunctionOptions(internal::kDayOfWeekOptionsType), + one_based_numbering(one_based_numbering), + week_start(week_start) {} +constexpr char DayOfWeekOptions::kTypeName[]; + namespace internal { void RegisterScalarOptions(FunctionRegistry* registry) { DCHECK_OK(registry->AddFunctionOptionsType(kArithmeticOptionsType)); @@ -296,6 +305,7 @@ void RegisterScalarOptions(FunctionRegistry* registry) { DCHECK_OK(registry->AddFunctionOptionsType(kSliceOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kCompareOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kProjectOptionsType)); + DCHECK_OK(registry->AddFunctionOptionsType(kDayOfWeekOptionsType)); } } // namespace internal @@ -458,7 +468,6 @@ Result IfElse(const Datum& cond, const Datum& if_true, const Datum& if_fa SCALAR_EAGER_UNARY(Year, "year") SCALAR_EAGER_UNARY(Month, "month") SCALAR_EAGER_UNARY(Day, "day") -SCALAR_EAGER_UNARY(DayOfWeek, "day_of_week") SCALAR_EAGER_UNARY(DayOfYear, "day_of_year") SCALAR_EAGER_UNARY(ISOYear, "iso_year") SCALAR_EAGER_UNARY(ISOWeek, "iso_week") @@ -472,5 +481,9 @@ SCALAR_EAGER_UNARY(Microsecond, "microsecond") SCALAR_EAGER_UNARY(Nanosecond, "nanosecond") SCALAR_EAGER_UNARY(Subsecond, "subsecond") +Result DayOfWeek(const Datum& arg, DayOfWeekOptions options, ExecContext* ctx) { + return CallFunction("day_of_week", {arg}, &options, ctx); +} + } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 89b4faca940..07a8d706f89 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -244,6 +244,18 @@ class ARROW_EXPORT ProjectOptions : public FunctionOptions { std::vector> field_metadata; }; +struct ARROW_EXPORT DayOfWeekOptions : public FunctionOptions { + public: + explicit DayOfWeekOptions(bool one_based_numbering = false, uint32_t week_start = 1); + constexpr static char const kTypeName[] = "DayOfWeekOptions"; + static DayOfWeekOptions Defaults() { return DayOfWeekOptions{}; } + + /// Number days from 1 if true and from 0 if false + bool one_based_numbering; + /// What day does the week start with (Monday=1, Sunday=7) + uint32_t week_start; +}; + /// @} /// \brief Get the absolute value of a value. Array values can be of arbitrary @@ -713,15 +725,22 @@ ARROW_EXPORT Result Day(const Datum& values, ExecContext* ctx = NULLPTR); /// \brief DayOfWeek returns number of the day of the week value for each element of -/// `values`. Week starts on Monday denoted by 0 and ends on Sunday denoted by 6. +/// `values`. +/// +/// By default week starts on Monday denoted by 0 and ends on Sunday denoted +/// by 6. Start day of the week (Monday=1, Sunday=7) and numbering base (0 or 1) can be +/// set using DayOfWeekOptions /// /// \param[in] values input to extract number of the day of the week from +/// \param[in] options for setting start of the week and day numbering /// \param[in] ctx the function execution context, optional /// \return the resulting datum /// /// \since 5.0.0 /// \note API not yet finalized -ARROW_EXPORT Result DayOfWeek(const Datum& values, ExecContext* ctx = NULLPTR); +ARROW_EXPORT Result DayOfWeek(const Datum& values, + DayOfWeekOptions options = DayOfWeekOptions(), + ExecContext* ctx = NULLPTR); /// \brief DayOfYear returns number of day of the year for each element of `values`. /// January 1st maps to day number 1, February 1st to 32, etc. diff --git a/cpp/src/arrow/compute/function_test.cc b/cpp/src/arrow/compute/function_test.cc index bbe514af09a..752ade284b7 100644 --- a/cpp/src/arrow/compute/function_test.cc +++ b/cpp/src/arrow/compute/function_test.cc @@ -90,6 +90,7 @@ TEST(FunctionOptions, Equality) { options.emplace_back(new ProjectOptions({"col1"}, {false}, {})); options.emplace_back( new ProjectOptions({"col1"}, {false}, {key_value_metadata({{"key", "val"}})})); + options.emplace_back(new DayOfWeekOptions(false, 1)); options.emplace_back(new CastOptions(CastOptions::Safe(boolean()))); options.emplace_back(new CastOptions(CastOptions::Unsafe(int64()))); options.emplace_back(new FilterOptions()); diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_temporal.cc index 1694d22ffae..f0257772d4a 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal.cc @@ -16,6 +16,7 @@ // under the License. #include "arrow/builder.h" +#include "arrow/compute/api_scalar.h" #include "arrow/compute/kernels/common.h" #include "arrow/util/checked_cast.h" #include "arrow/util/time.h" @@ -48,6 +49,8 @@ using arrow_vendored::date::literals::thu; using internal::applicator::ScalarUnaryNotNull; using internal::applicator::SimpleUnary; +using DayOfWeekState = OptionsWrapper; + const std::string& GetInputTimezone(const Datum& datum) { return checked_cast(*datum.type()).timezone(); } @@ -80,6 +83,25 @@ struct TemporalComponentExtract { } }; +template +struct DayOfWeekExec { + using OutValue = typename internal::GetOutputType::T; + + static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { + const DayOfWeekOptions& options = DayOfWeekState::Get(ctx); + if (options.week_start < 1 || 7 < options.week_start) { + return Status::Invalid( + "week_start must follow ISO convention (Monday=1, Sunday=7). Got week_start=", + options.week_start); + } + + RETURN_NOT_OK(TemporalComponentExtractCheckTimezone(batch.values[0])); + applicator::ScalarUnaryNotNullStateful kernel{ + Op(options)}; + return kernel.Exec(ctx, batch, out); + } +}; + // ---------------------------------------------------------------------- // Extract year from timestamp @@ -118,16 +140,30 @@ struct Day { // ---------------------------------------------------------------------- // Extract day of week from timestamp +// +// By default week starts on Monday represented by 0 and ends on Sunday represented +// by 6. Start day of the week (Monday=1, Sunday=7) and numbering start (0 or 1) can be +// set using DayOfWeekOptions template struct DayOfWeek { + explicit DayOfWeek(const DayOfWeekOptions& options) { + for (int i = 0; i < 7; i++) { + lookup_table[i] = i + 8 - options.week_start; + lookup_table[i] = (lookup_table[i] > 6) ? lookup_table[i] - 7 : lookup_table[i]; + lookup_table[i] += options.one_based_numbering; + } + } + template - static T Call(KernelContext*, Arg0 arg, Status*) { - return static_cast( - weekday(year_month_day(floor(sys_time(Duration{arg})))) - .iso_encoding() - - 1); + T Call(KernelContext*, Arg0 arg, Status*) const { + const auto wd = arrow_vendored::date::year_month_weekday( + floor(sys_time(Duration{arg}))) + .weekday() + .iso_encoding(); + return lookup_table[wd - 1]; } + std::array lookup_table; }; // ---------------------------------------------------------------------- @@ -398,6 +434,42 @@ std::shared_ptr MakeTemporal(std::string name, const FunctionDoc return func; } +template