Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion cpp/src/arrow/compute/api_scalar.cc
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,9 @@ static auto kProjectOptionsType = GetFunctionOptionsType<ProjectOptions>(
DataMember("field_names", &ProjectOptions::field_names),
DataMember("field_nullability", &ProjectOptions::field_nullability),
DataMember("field_metadata", &ProjectOptions::field_metadata));
static auto kDayOfWeekOptionsType = GetFunctionOptionsType<DayOfWeekOptions>(
DataMember("one_based_numbering", &DayOfWeekOptions::one_based_numbering),
DataMember("week_start", &DayOfWeekOptions::week_start));
} // namespace
} // namespace internal

Expand Down Expand Up @@ -278,6 +281,12 @@ ProjectOptions::ProjectOptions(std::vector<std::string> n)
ProjectOptions::ProjectOptions() : ProjectOptions(std::vector<std::string>()) {}
constexpr char ProjectOptions::kTypeName[];

DayOfWeekOptions::DayOfWeekOptions(bool one_based_numbering, uint32_t week_start)
: FunctionOptions(internal::kDayOfWeekOptionsType),
one_based_numbering(one_based_numbering),
week_start(week_start) {}
constexpr char DayOfWeekOptions::kTypeName[];

namespace internal {
void RegisterScalarOptions(FunctionRegistry* registry) {
DCHECK_OK(registry->AddFunctionOptionsType(kArithmeticOptionsType));
Expand All @@ -296,6 +305,7 @@ void RegisterScalarOptions(FunctionRegistry* registry) {
DCHECK_OK(registry->AddFunctionOptionsType(kSliceOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kCompareOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kProjectOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kDayOfWeekOptionsType));
}
} // namespace internal

Expand Down Expand Up @@ -458,7 +468,6 @@ Result<Datum> IfElse(const Datum& cond, const Datum& if_true, const Datum& if_fa
SCALAR_EAGER_UNARY(Year, "year")
SCALAR_EAGER_UNARY(Month, "month")
SCALAR_EAGER_UNARY(Day, "day")
SCALAR_EAGER_UNARY(DayOfWeek, "day_of_week")
SCALAR_EAGER_UNARY(DayOfYear, "day_of_year")
SCALAR_EAGER_UNARY(ISOYear, "iso_year")
SCALAR_EAGER_UNARY(ISOWeek, "iso_week")
Expand All @@ -472,5 +481,9 @@ SCALAR_EAGER_UNARY(Microsecond, "microsecond")
SCALAR_EAGER_UNARY(Nanosecond, "nanosecond")
SCALAR_EAGER_UNARY(Subsecond, "subsecond")

Result<Datum> DayOfWeek(const Datum& arg, DayOfWeekOptions options, ExecContext* ctx) {
return CallFunction("day_of_week", {arg}, &options, ctx);
}

} // namespace compute
} // namespace arrow
23 changes: 21 additions & 2 deletions cpp/src/arrow/compute/api_scalar.h
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,18 @@ class ARROW_EXPORT ProjectOptions : public FunctionOptions {
std::vector<std::shared_ptr<const KeyValueMetadata>> field_metadata;
};

struct ARROW_EXPORT DayOfWeekOptions : public FunctionOptions {
public:
explicit DayOfWeekOptions(bool one_based_numbering = false, uint32_t week_start = 1);
constexpr static char const kTypeName[] = "DayOfWeekOptions";
static DayOfWeekOptions Defaults() { return DayOfWeekOptions{}; }

/// Number days from 1 if true and from 0 if false
bool one_based_numbering;
/// What day does the week start with (Monday=1, Sunday=7)
uint32_t week_start;
};

/// @}

/// \brief Get the absolute value of a value. Array values can be of arbitrary
Expand Down Expand Up @@ -713,15 +725,22 @@ ARROW_EXPORT
Result<Datum> Day(const Datum& values, ExecContext* ctx = NULLPTR);

/// \brief DayOfWeek returns number of the day of the week value for each element of
/// `values`. Week starts on Monday denoted by 0 and ends on Sunday denoted by 6.
/// `values`.
///
/// By default week starts on Monday denoted by 0 and ends on Sunday denoted
/// by 6. Start day of the week (Monday=1, Sunday=7) and numbering base (0 or 1) can be
/// set using DayOfWeekOptions
///
/// \param[in] values input to extract number of the day of the week from
/// \param[in] options for setting start of the week and day numbering
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 5.0.0
/// \note API not yet finalized
ARROW_EXPORT Result<Datum> DayOfWeek(const Datum& values, ExecContext* ctx = NULLPTR);
ARROW_EXPORT Result<Datum> DayOfWeek(const Datum& values,
DayOfWeekOptions options = DayOfWeekOptions(),
ExecContext* ctx = NULLPTR);

/// \brief DayOfYear returns number of day of the year for each element of `values`.
/// January 1st maps to day number 1, February 1st to 32, etc.
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/compute/function_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ TEST(FunctionOptions, Equality) {
options.emplace_back(new ProjectOptions({"col1"}, {false}, {}));
options.emplace_back(
new ProjectOptions({"col1"}, {false}, {key_value_metadata({{"key", "val"}})}));
options.emplace_back(new DayOfWeekOptions(false, 1));
options.emplace_back(new CastOptions(CastOptions::Safe(boolean())));
options.emplace_back(new CastOptions(CastOptions::Unsafe(int64())));
options.emplace_back(new FilterOptions());
Expand Down
97 changes: 88 additions & 9 deletions cpp/src/arrow/compute/kernels/scalar_temporal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
// under the License.

#include "arrow/builder.h"
#include "arrow/compute/api_scalar.h"
#include "arrow/compute/kernels/common.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/time.h"
Expand Down Expand Up @@ -48,6 +49,8 @@ using arrow_vendored::date::literals::thu;
using internal::applicator::ScalarUnaryNotNull;
using internal::applicator::SimpleUnary;

using DayOfWeekState = OptionsWrapper<DayOfWeekOptions>;

const std::string& GetInputTimezone(const Datum& datum) {
return checked_cast<const TimestampType&>(*datum.type()).timezone();
}
Expand Down Expand Up @@ -80,6 +83,25 @@ struct TemporalComponentExtract {
}
};

template <typename Op, typename OutType>
struct DayOfWeekExec {
using OutValue = typename internal::GetOutputType<OutType>::T;

static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
const DayOfWeekOptions& options = DayOfWeekState::Get(ctx);
if (options.week_start < 1 || 7 < options.week_start) {
return Status::Invalid(
"week_start must follow ISO convention (Monday=1, Sunday=7). Got week_start=",
options.week_start);
}

RETURN_NOT_OK(TemporalComponentExtractCheckTimezone(batch.values[0]));
applicator::ScalarUnaryNotNullStateful<OutType, TimestampType, Op> kernel{
Op(options)};
return kernel.Exec(ctx, batch, out);
}
};

// ----------------------------------------------------------------------
// Extract year from timestamp

Expand Down Expand Up @@ -118,16 +140,30 @@ struct Day {

// ----------------------------------------------------------------------
// Extract day of week from timestamp
//
// By default week starts on Monday represented by 0 and ends on Sunday represented
// by 6. Start day of the week (Monday=1, Sunday=7) and numbering start (0 or 1) can be
// set using DayOfWeekOptions

template <typename Duration>
struct DayOfWeek {
explicit DayOfWeek(const DayOfWeekOptions& options) {
for (int i = 0; i < 7; i++) {
lookup_table[i] = i + 8 - options.week_start;
lookup_table[i] = (lookup_table[i] > 6) ? lookup_table[i] - 7 : lookup_table[i];
lookup_table[i] += options.one_based_numbering;
}
}

template <typename T, typename Arg0>
static T Call(KernelContext*, Arg0 arg, Status*) {
return static_cast<T>(
weekday(year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))))
.iso_encoding() -
1);
T Call(KernelContext*, Arg0 arg, Status*) const {
const auto wd = arrow_vendored::date::year_month_weekday(
floor<days>(sys_time<Duration>(Duration{arg})))
.weekday()
.iso_encoding();
return lookup_table[wd - 1];
}
std::array<int64_t, 7> lookup_table;
};

// ----------------------------------------------------------------------
Expand Down Expand Up @@ -398,6 +434,42 @@ std::shared_ptr<ScalarFunction> MakeTemporal(std::string name, const FunctionDoc
return func;
}

template <template <typename...> class Op, typename OutType>
std::shared_ptr<ScalarFunction> MakeTemporalWithOptions(
std::string name, const FunctionDoc* doc, const DayOfWeekOptions& default_options,
KernelInit init) {
const auto& out_type = TypeTraits<OutType>::type_singleton();
auto func =
std::make_shared<ScalarFunction>(name, Arity::Unary(), doc, &default_options);

for (auto unit : internal::AllTimeUnits()) {
InputType in_type{match::TimestampTypeUnit(unit)};
switch (unit) {
case TimeUnit::SECOND: {
auto exec = DayOfWeekExec<Op<std::chrono::seconds>, OutType>::Exec;
DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec), init));
break;
}
case TimeUnit::MILLI: {
auto exec = DayOfWeekExec<Op<std::chrono::milliseconds>, OutType>::Exec;
DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec), init));
break;
}
case TimeUnit::MICRO: {
auto exec = DayOfWeekExec<Op<std::chrono::microseconds>, OutType>::Exec;
DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec), init));
break;
}
case TimeUnit::NANO: {
auto exec = DayOfWeekExec<Op<std::chrono::nanoseconds>, OutType>::Exec;
DCHECK_OK(func->AddKernel({in_type}, out_type, std::move(exec), init));
break;
}
}
}
return func;
}

template <template <typename...> class Op>
std::shared_ptr<ScalarFunction> MakeStructTemporal(std::string name,
const FunctionDoc* doc) {
Expand Down Expand Up @@ -451,9 +523,14 @@ const FunctionDoc day_doc{

const FunctionDoc day_of_week_doc{
"Extract day of the week number",
("Week starts on Monday denoted by 0 and ends on Sunday denoted by 6.\n"
("By default, the week starts on Monday represented by 0 and ends on Sunday "
"represented by 6.\n"
"DayOfWeekOptions.week_start can be used to set another starting day using ISO "
"convention (Monday=1, Sunday=7). Day numbering can start with 0 or 1 using "
"DayOfWeekOptions.one_based_numbering parameter.\n"
"Returns an error if timestamp has a defined timezone. Null values return null."),
{"values"}};
{"values"},
"DayOfWeekOptions"};

const FunctionDoc day_of_year_doc{
"Extract number of day of year",
Expand Down Expand Up @@ -537,7 +614,9 @@ void RegisterScalarTemporal(FunctionRegistry* registry) {
auto day = MakeTemporal<Day, Int64Type>("day", &year_doc);
DCHECK_OK(registry->AddFunction(std::move(day)));

auto day_of_week = MakeTemporal<DayOfWeek, Int64Type>("day_of_week", &day_of_week_doc);
static auto default_day_of_week_options = DayOfWeekOptions::Defaults();
auto day_of_week = MakeTemporalWithOptions<DayOfWeek, Int64Type>(
"day_of_week", &day_of_week_doc, default_day_of_week_options, DayOfWeekState::Init);
DCHECK_OK(registry->AddFunction(std::move(day_of_week)));

auto day_of_year = MakeTemporal<DayOfYear, Int64Type>("day_of_year", &day_of_year_doc);
Expand All @@ -561,7 +640,7 @@ void RegisterScalarTemporal(FunctionRegistry* registry) {
auto minute = MakeTemporal<Minute, Int64Type>("minute", &minute_doc);
DCHECK_OK(registry->AddFunction(std::move(minute)));

auto second = MakeTemporal<Second, DoubleType>("second", &second_doc);
auto second = MakeTemporal<Second, Int64Type>("second", &second_doc);
DCHECK_OK(registry->AddFunction(std::move(second)));

auto millisecond =
Expand Down
Loading