From ac122d458bd7eb1848c1ce3e2455e2b034778e35 Mon Sep 17 00:00:00 2001 From: Rok Date: Tue, 27 Apr 2021 19:59:26 +0200 Subject: [PATCH 01/17] First commit. --- cpp/src/arrow/CMakeLists.txt | 1 + cpp/src/arrow/compute/api_scalar.cc | 15 + cpp/src/arrow/compute/api_scalar.h | 107 ++++++ cpp/src/arrow/compute/kernels/CMakeLists.txt | 1 + .../arrow/compute/kernels/scalar_temporal.cc | 351 ++++++++++++++++++ .../compute/kernels/scalar_temporal_test.cc | 74 ++++ cpp/src/arrow/compute/registry.cc | 1 + cpp/src/arrow/compute/registry_internal.h | 1 + 8 files changed, 551 insertions(+) create mode 100644 cpp/src/arrow/compute/kernels/scalar_temporal.cc create mode 100644 cpp/src/arrow/compute/kernels/scalar_temporal_test.cc diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index f6d5a540c98..8e411898a34 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -397,6 +397,7 @@ if(ARROW_COMPUTE) compute/kernels/scalar_nested.cc compute/kernels/scalar_set_lookup.cc compute/kernels/scalar_string.cc + compute/kernels/scalar_temporal.cc compute/kernels/scalar_validity.cc compute/kernels/scalar_fill_null.cc compute/kernels/scalar_if_else.cc diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc index 6f77d6f9785..8bcd2b8e54b 100644 --- a/cpp/src/arrow/compute/api_scalar.cc +++ b/cpp/src/arrow/compute/api_scalar.cc @@ -172,5 +172,20 @@ Result IfElse(const Datum& cond, const Datum& if_true, const Datum& if_fa return CallFunction("if_else", {cond, if_true, if_false}, ctx); } +// ---------------------------------------------------------------------- +// Temporal functions + +SCALAR_EAGER_UNARY(Year, "year") +SCALAR_EAGER_UNARY(Month, "month") +SCALAR_EAGER_UNARY(Day, "day") +SCALAR_EAGER_UNARY(Week, "week") +SCALAR_EAGER_UNARY(Quarter, "quarter") +SCALAR_EAGER_UNARY(DayOfYear, "day_of_year") +SCALAR_EAGER_UNARY(DayOfWeek, "day_of_week") +SCALAR_EAGER_UNARY(Hour, "hour") +SCALAR_EAGER_UNARY(Minute, "minute") +SCALAR_EAGER_UNARY(Second, "second") +>>>>>>> e10c71612... First commit. + } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 21d5c5324d4..53167dcadea 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -521,5 +521,112 @@ ARROW_EXPORT Result IfElse(const Datum& cond, const Datum& left, const Datum& right, ExecContext* ctx = NULLPTR); +/// \brief Year returns year value for each element of `values` +/// +/// \param[in] values input to extract year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result Year(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Month returns month value for each element of `values` +/// +/// \param[in] values input to extract month from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result Month(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Day returns day value for each element of `values` +/// +/// \param[in] values input to extract day from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result Day(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Week returns week of year value for each element of `values` +/// +/// \param[in] values input to extract week of year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT Result Week(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Quarter returns quarter of year value for each element of `values` +/// +/// \param[in] values input to extract quarter of year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT Result Quarter(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief DayOfYear returns day of year value for each element of `values` +/// +/// \param[in] values input to extract day of year from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT Result DayOfYear(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief DayOfWeek returns day of the week value for each element of `values` +/// +/// \param[in] values input to extract dat of the week from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result DayOfWeek(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Hour returns hour value for each element of `values` +/// +/// \param[in] values input to extract hour from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result Hour(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Minute returns minutes value for each element of `values` +/// +/// \param[in] values input to extract minutes from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result Minute(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Second returns seconds value for each element of `values` +/// +/// \param[in] values input to extract seconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result Second(const Datum& values, ExecContext* ctx = NULLPTR); + } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt index fc11d144105..326578588a7 100644 --- a/cpp/src/arrow/compute/kernels/CMakeLists.txt +++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt @@ -27,6 +27,7 @@ add_arrow_compute_test(scalar_test scalar_nested_test.cc scalar_set_lookup_test.cc scalar_string_test.cc + scalar_temporal_test.cc scalar_validity_test.cc scalar_fill_null_test.cc scalar_if_else_test.cc diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_temporal.cc new file mode 100644 index 00000000000..0a76ca4d67d --- /dev/null +++ b/cpp/src/arrow/compute/kernels/scalar_temporal.cc @@ -0,0 +1,351 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/compute/kernels/common.h" +#include "arrow/util/time.h" +#include "arrow/vendored/datetime.h" + +namespace arrow { + +namespace compute { +namespace { + +using TimePoint = + std::chrono::time_point; + +inline arrow_vendored::date::year_month_day get_year_month_day(const int64_t in_data) { + std::chrono::seconds since_epoch{in_data}; + return arrow_vendored::date::sys_days{ + arrow_vendored::date::floor(since_epoch)}; +} + +template +inline arrow_vendored::date::hh_mm_ss get_time_of_day(const int64_t in_data) { + std::chrono::seconds since_epoch{in_data}; + arrow_vendored::date::sys_days timepoint_days{ + arrow_vendored::date::floor(since_epoch)}; + std::chrono::seconds since_midnight = since_epoch - timepoint_days.time_since_epoch(); + return arrow_vendored::date::make_time(since_midnight); +} + +inline unsigned day_of_year(const int64_t in_data) { + // Based on + // https://github.com/HowardHinnant/date/blob/6e921e1b1d21e84a5c82416ba7ecd98e33a436d0/include/date/iso_week.h#L1021 + const auto t2 = arrow_vendored::date::sys_days{ + arrow_vendored::date::floor( + std::chrono::seconds{in_data})}; + const auto t1 = arrow_vendored::date::year_month_day(t2).year() / + arrow_vendored::date::month(1) / arrow_vendored::date::day(1); + const auto since_new_year = t2 - arrow_vendored::date::sys_days(t1); + return static_cast(since_new_year.count()); +} + +inline unsigned week(const int64_t in_data) { + // Based on + // https://github.com/HowardHinnant/date/blob/6e921e1b1d21e84a5c82416ba7ecd98e33a436d0/include/date/iso_week.h#L1503 + using namespace arrow_vendored::date; + const auto dp = sys_days{floor(std::chrono::seconds{in_data})}; + auto y = year_month_day{dp + days{3}}.year(); + auto start = sys_days((y - years{1}) / dec / thu[last]) + (mon - thu); + if (dp < start) { + --y; + start = sys_days((y - years{1}) / dec / thu[last]) + (mon - thu); + } + return static_cast(trunc(dp - start).count() + 1); +} + +// ---------------------------------------------------------------------- +// Extract year from timestamp + +template +struct Year { + static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { + const auto& in_data = internal::UnboxScalar::Unbox(in); + checked_cast(out)->value = + static_cast(get_year_month_day(in_data).year()); + return Status::OK(); + } + + static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { + auto in_data = in.GetValues(1); + auto out_data = out->GetMutableValues(1); + for (int64_t i = 0; i < in.length; i++) { + out_data[i] = static_cast(get_year_month_day(in_data[i]).year()); + } + return Status::OK(); + } +}; + +// ---------------------------------------------------------------------- +// Extract month from timestamp + +template +struct Month { + static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { + const auto& in_data = internal::UnboxScalar::Unbox(in); + checked_cast(out)->value = + static_cast(get_year_month_day(in_data).month()); + return Status::OK(); + } + + static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { + auto in_data = in.GetValues(1); + auto out_data = out->GetMutableValues(1); + for (int64_t i = 0; i < in.length; i++) { + out_data[i] = static_cast(get_year_month_day(in_data[i]).month()); + } + return Status::OK(); + } +}; + +// ---------------------------------------------------------------------- +// Extract day from timestamp + +template +struct Day { + static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { + const auto& in_data = internal::UnboxScalar::Unbox(in); + checked_cast(out)->value = + static_cast(get_year_month_day(in_data).day()); + return Status::OK(); + } + + static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { + auto in_data = in.GetValues(1); + auto out_data = out->GetMutableValues(1); + for (int64_t i = 0; i < in.length; i++) { + out_data[i] = static_cast(get_year_month_day(in_data[i]).day()); + } + return Status::OK(); + } +}; + +// ---------------------------------------------------------------------- +// Extract week from timestamp + +template +struct Week { + static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { + const auto& in_data = internal::UnboxScalar::Unbox(in); + checked_cast(out)->value = week(in_data); + return Status::OK(); + } + + static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { + auto in_data = in.GetValues(1); + auto out_data = out->GetMutableValues(1); + for (int64_t i = 0; i < in.length; i++) { + out_data[i] = week(in_data[i]); + } + return Status::OK(); + } +}; + +// ---------------------------------------------------------------------- +// Extract quarter from timestamp + +template +struct Quarter { + static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { + const auto& in_data = internal::UnboxScalar::Unbox(in); + checked_cast(out)->value = + static_cast(get_year_month_day(in_data).month()) / 3 + 1; + return Status::OK(); + } + + static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { + auto in_data = in.GetValues(1); + auto out_data = out->GetMutableValues(1); + for (int64_t i = 0; i < in.length; i++) { + out_data[i] = static_cast(get_year_month_day(in_data[i]).month()) / 3 + 1; + } + return Status::OK(); + } +}; + +// ---------------------------------------------------------------------- +// Extract day of year from timestamp + +template +struct DayOfYear { + static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { + const auto& in_data = internal::UnboxScalar::Unbox(in); + checked_cast(out)->value = day_of_year(in_data); + return Status::OK(); + } + + static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { + auto in_data = in.GetValues(1); + auto out_data = out->GetMutableValues(1); + for (int64_t i = 0; i < in.length; i++) { + out_data[i] = day_of_year(in_data[i]); + } + return Status::OK(); + } +}; + +// ---------------------------------------------------------------------- +// Extract day of week from timestamp + +template +struct DayOfWeek { + static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { + const auto& in_data = internal::UnboxScalar::Unbox(in); + checked_cast(out)->value = static_cast( + arrow_vendored::date::weekday(get_year_month_day(in_data)).iso_encoding()); + return Status::OK(); + } + + static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { + auto in_data = in.GetValues(1); + auto out_data = out->GetMutableValues(1); + for (int64_t i = 0; i < in.length; i++) { + out_data[i] = static_cast( + arrow_vendored::date::weekday(get_year_month_day(in_data[i])).iso_encoding()); + } + return Status::OK(); + } +}; + +// ---------------------------------------------------------------------- +// Extract hour from timestamp + +template +struct Hour { + static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { + const auto& in_data = internal::UnboxScalar::Unbox(in); + checked_cast(out)->value = static_cast( + get_time_of_day(in_data).hours().count()); + return Status::OK(); + } + + static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { + auto in_data = in.GetValues(1); + auto out_data = out->GetMutableValues(1); + for (int64_t i = 0; i < in.length; i++) { + out_data[i] = static_cast( + get_time_of_day(in_data[i]).hours().count()); + } + return Status::OK(); + } +}; + +// ---------------------------------------------------------------------- +// Extract minute from timestamp + +template +struct Minute { + static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { + const auto& in_data = internal::UnboxScalar::Unbox(in); + checked_cast(out)->value = static_cast( + get_time_of_day(in_data).minutes().count()); + return Status::OK(); + } + + static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { + auto in_data = in.GetValues(1); + auto out_data = out->GetMutableValues(1); + for (int64_t i = 0; i < in.length; i++) { + out_data[i] = static_cast( + get_time_of_day(in_data[i]).minutes().count()); + } + return Status::OK(); + } +}; + +// ---------------------------------------------------------------------- +// Extract second from timestamp + +template +struct Second { + static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { + const auto& in_data = internal::UnboxScalar::Unbox(in); + checked_cast(out)->value = static_cast( + get_time_of_day(in_data).seconds().count()); + return Status::OK(); + } + + static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { + auto in_data = in.GetValues(1); + auto out_data = out->GetMutableValues(1); + for (int64_t i = 0; i < in.length; i++) { + out_data[i] = static_cast( + get_time_of_day(in_data[i]).seconds().count()); + } + return Status::OK(); + } +}; + +void MakeFunction(std::string name, ArrayKernelExec exec, const FunctionDoc* doc, + OutputType out_type, FunctionRegistry* registry, + bool can_write_into_slices = true, + NullHandling::type null_handling = NullHandling::INTERSECTION) { + auto func = std::make_shared(name, Arity(1), doc); + + std::vector in_types(1, InputType(Type::TIMESTAMP)); + ScalarKernel kernel(std::move(in_types), out_type, exec); + kernel.null_handling = null_handling; + kernel.can_write_into_slices = can_write_into_slices; + + DCHECK_OK(func->AddKernel(kernel)); + DCHECK_OK(registry->AddFunction(std::move(func))); +} + +const FunctionDoc year_doc{"Extract year values", "", {"values"}}; +const FunctionDoc month_doc{"Extract month values", "", {"values"}}; +const FunctionDoc day_doc{"Extract day values", "", {"values"}}; +const FunctionDoc day_of_year_doc{"Extract day of year values", "", {"values"}}; +const FunctionDoc week_doc{"Extract week values", "", {"values"}}; +const FunctionDoc quarter_doc{"Extract quarter values", "", {"values"}}; +const FunctionDoc day_of_week_doc{"Extract day of week values", "", {"values"}}; +const FunctionDoc hour_doc{"Extract hour values", "", {"values"}}; +const FunctionDoc minute_doc{"Extract minute values", "", {"values"}}; +const FunctionDoc second_doc{"Extract second values", "", {"values"}}; + +} // namespace +namespace internal { + +void RegisterScalarTemporal(FunctionRegistry* registry) { + MakeFunction("year", applicator::SimpleUnary>, &year_doc, int64(), + registry); + MakeFunction("month", applicator::SimpleUnary>, &month_doc, int64(), + registry); + MakeFunction("day", applicator::SimpleUnary>, &day_doc, int64(), registry); + MakeFunction("week", applicator::SimpleUnary>, &week_doc, int64(), + registry); + MakeFunction("quarter", applicator::SimpleUnary>, &quarter_doc, + int64(), registry); + MakeFunction("day_of_year", applicator::SimpleUnary>, + &day_of_year_doc, int64(), registry); + MakeFunction("day_of_week", applicator::SimpleUnary>, + &day_of_week_doc, int64(), registry); + MakeFunction("hour", applicator::SimpleUnary>, &hour_doc, int64(), + registry); + MakeFunction("minute", applicator::SimpleUnary>, &minute_doc, int64(), + registry); + MakeFunction("second", applicator::SimpleUnary>, &second_doc, int64(), + registry); + + // TODO + // millisecond + // microsecond + // nanosecond +} + +} // namespace internal +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc new file mode 100644 index 00000000000..0b110b10bbe --- /dev/null +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -0,0 +1,74 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include "arrow/compute/kernels/test_util.h" +#include "arrow/util/checked_cast.h" +#include "arrow/util/formatting.h" + +namespace arrow { + +using internal::StringFormatter; + +class TestArray : public ::testing::Test { + public: + void SetUp() { pool_ = default_memory_pool(); } + + protected: + MemoryPool* pool_; +}; + +namespace compute { + +TEST(TestArray, TestTemporalcomponentExtraction) { + const char* json = + R"(["1970-01-01T10:10:59","2000-02-29T23:23:23","3989-07-14T18:04:01","1900-02-28T07:59:20"])"; + const char* year = "[1970, 2000, 3989, 1900]"; + const char* month = "[1, 2, 7, 2]"; + const char* day = "[1, 29, 14, 28]"; + const char* day_of_year = "[0, 59, 194, 58]"; + const char* week = "[1, 9, 28, 9]"; + const char* quarter = "[1, 1, 3, 1]"; + const char* day_of_week = "[4, 2, 5, 3]"; + const char* hour = "[10, 23, 18, 7]"; + const char* minute = "[10, 23, 4, 59]"; + const char* second = "[59, 23, 1, 20]"; + + CheckScalarUnary("year", ArrayFromJSON(timestamp(TimeUnit::SECOND), json), + ArrayFromJSON(int64(), year)); + CheckScalarUnary("month", ArrayFromJSON(timestamp(TimeUnit::SECOND), json), + ArrayFromJSON(int64(), month)); + CheckScalarUnary("day", ArrayFromJSON(timestamp(TimeUnit::SECOND), json), + ArrayFromJSON(int64(), day)); + CheckScalarUnary("day_of_year", ArrayFromJSON(timestamp(TimeUnit::SECOND), json), + ArrayFromJSON(int64(), day_of_year)); + CheckScalarUnary("week", ArrayFromJSON(timestamp(TimeUnit::SECOND), json), + ArrayFromJSON(int64(), week)); + CheckScalarUnary("quarter", ArrayFromJSON(timestamp(TimeUnit::SECOND), json), + ArrayFromJSON(int64(), quarter)); + CheckScalarUnary("day_of_week", ArrayFromJSON(timestamp(TimeUnit::SECOND), json), + ArrayFromJSON(int64(), day_of_week)); + CheckScalarUnary("hour", ArrayFromJSON(timestamp(TimeUnit::SECOND), json), + ArrayFromJSON(int64(), hour)); + CheckScalarUnary("minute", ArrayFromJSON(timestamp(TimeUnit::SECOND), json), + ArrayFromJSON(int64(), minute)); + CheckScalarUnary("second", ArrayFromJSON(timestamp(TimeUnit::SECOND), json), + ArrayFromJSON(int64(), second)); +} + +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/compute/registry.cc b/cpp/src/arrow/compute/registry.cc index 1d713b96e1e..673802f99b0 100644 --- a/cpp/src/arrow/compute/registry.cc +++ b/cpp/src/arrow/compute/registry.cc @@ -126,6 +126,7 @@ static std::unique_ptr CreateBuiltInRegistry() { RegisterScalarValidity(registry.get()); RegisterScalarFillNull(registry.get()); RegisterScalarIfElse(registry.get()); + RegisterScalarTemporal(registry.get()); // Vector functions RegisterVectorHash(registry.get()); diff --git a/cpp/src/arrow/compute/registry_internal.h b/cpp/src/arrow/compute/registry_internal.h index f97553af4b1..68e0f2207f1 100644 --- a/cpp/src/arrow/compute/registry_internal.h +++ b/cpp/src/arrow/compute/registry_internal.h @@ -35,6 +35,7 @@ void RegisterScalarStringAscii(FunctionRegistry* registry); void RegisterScalarValidity(FunctionRegistry* registry); void RegisterScalarFillNull(FunctionRegistry* registry); void RegisterScalarIfElse(FunctionRegistry* registry); +void RegisterScalarTemporal(FunctionRegistry* registry); // Vector functions void RegisterVectorHash(FunctionRegistry* registry); From 29819131d329f8e75d6aa40b0c4394b4c1142ecf Mon Sep 17 00:00:00 2001 From: Rok Date: Wed, 5 May 2021 16:03:13 +0200 Subject: [PATCH 02/17] Make kernels timezone aware. --- cpp/src/arrow/compute/api_scalar.cc | 4 +- cpp/src/arrow/compute/api_scalar.h | 33 ++ .../arrow/compute/kernels/scalar_temporal.cc | 401 +++++++++++++++--- .../compute/kernels/scalar_temporal_test.cc | 245 +++++++++-- 4 files changed, 582 insertions(+), 101 deletions(-) diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc index 8bcd2b8e54b..bd60639e3c6 100644 --- a/cpp/src/arrow/compute/api_scalar.cc +++ b/cpp/src/arrow/compute/api_scalar.cc @@ -185,7 +185,9 @@ SCALAR_EAGER_UNARY(DayOfWeek, "day_of_week") SCALAR_EAGER_UNARY(Hour, "hour") SCALAR_EAGER_UNARY(Minute, "minute") SCALAR_EAGER_UNARY(Second, "second") ->>>>>>> e10c71612... First commit. +SCALAR_EAGER_UNARY(Millisecond, "millisecond") +SCALAR_EAGER_UNARY(Microsecond, "microsecond") +SCALAR_EAGER_UNARY(Nanosecond, "nanosecond") } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 53167dcadea..54b94011f96 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -628,5 +628,38 @@ Result Minute(const Datum& values, ExecContext* ctx = NULLPTR); ARROW_EXPORT Result Second(const Datum& values, ExecContext* ctx = NULLPTR); +/// \brief Millisecond returns milliseconds value for each element of `values` +/// +/// \param[in] values input to extract milliseconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result Millisecond(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Microsecond returns microseconds value for each element of `values` +/// +/// \param[in] values input to extract microseconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result Microsecond(const Datum& values, ExecContext* ctx = NULLPTR); + +/// \brief Nanosecond returns nanoseconds value for each element of `values` +/// +/// \param[in] values input to extract nanoseconds from +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 4.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Result Nanosecond(const Datum& values, ExecContext* ctx = NULLPTR); + } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_temporal.cc index 0a76ca4d67d..a45c7043795 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal.cc @@ -22,43 +22,181 @@ namespace arrow { namespace compute { -namespace { +namespace internal { -using TimePoint = - std::chrono::time_point; +using arrow_vendored::date::days; +using arrow_vendored::date::floor; +using arrow_vendored::date::hh_mm_ss; +using arrow_vendored::date::local_days; +using arrow_vendored::date::locate_zone; +using arrow_vendored::date::sys_days; +using arrow_vendored::date::sys_time; +using arrow_vendored::date::trunc; +using arrow_vendored::date::weeks; +using arrow_vendored::date::year_month_day; +using arrow_vendored::date::years; +using arrow_vendored::date::literals::dec; +using arrow_vendored::date::literals::jan; +using arrow_vendored::date::literals::last; +using arrow_vendored::date::literals::mon; +using arrow_vendored::date::literals::thu; -inline arrow_vendored::date::year_month_day get_year_month_day(const int64_t in_data) { - std::chrono::seconds since_epoch{in_data}; - return arrow_vendored::date::sys_days{ - arrow_vendored::date::floor(since_epoch)}; +template +inline year_month_day ymd_caster_template(const int64_t data) { + return year_month_day(floor(sys_time(Duration{data}))); } template -inline arrow_vendored::date::hh_mm_ss get_time_of_day(const int64_t in_data) { - std::chrono::seconds since_epoch{in_data}; - arrow_vendored::date::sys_days timepoint_days{ - arrow_vendored::date::floor(since_epoch)}; - std::chrono::seconds since_midnight = since_epoch - timepoint_days.time_since_epoch(); - return arrow_vendored::date::make_time(since_midnight); +inline std::function ymd_caster_zoned_template( + const std::string timezone) { + static const arrow_vendored::date::time_zone* tz = locate_zone(timezone); + return [](const int64_t data) { + return year_month_day(floor(tz->to_local(sys_time(Duration{data})))); + }; } -inline unsigned day_of_year(const int64_t in_data) { - // Based on - // https://github.com/HowardHinnant/date/blob/6e921e1b1d21e84a5c82416ba7ecd98e33a436d0/include/date/iso_week.h#L1021 - const auto t2 = arrow_vendored::date::sys_days{ - arrow_vendored::date::floor( - std::chrono::seconds{in_data})}; - const auto t1 = arrow_vendored::date::year_month_day(t2).year() / - arrow_vendored::date::month(1) / arrow_vendored::date::day(1); - const auto since_new_year = t2 - arrow_vendored::date::sys_days(t1); - return static_cast(since_new_year.count()); +inline std::function make_ymd_caster( + const std::shared_ptr type) { + const auto ts_type = std::static_pointer_cast(type); + const TimeUnit::type unit = ts_type->unit(); + const std::string timezone = ts_type->timezone(); + + if (timezone.empty()) { + switch (unit) { + case TimeUnit::SECOND: + return ymd_caster_template; + case TimeUnit::MILLI: + return ymd_caster_template; + case TimeUnit::MICRO: + return ymd_caster_template; + case TimeUnit::NANO: + return ymd_caster_template; + } + } else { + switch (unit) { + case TimeUnit::SECOND: + return ymd_caster_zoned_template(timezone); + case TimeUnit::MILLI: + return ymd_caster_zoned_template(timezone); + case TimeUnit::MICRO: + return ymd_caster_zoned_template(timezone); + case TimeUnit::NANO: + return ymd_caster_zoned_template(timezone); + } + } + return ymd_caster_template; } -inline unsigned week(const int64_t in_data) { +template +inline hh_mm_ss hhmmss_caster_template(const int64_t data) { + DurationIn t = DurationIn{data}; + return hh_mm_ss( + std::chrono::duration_cast(t - floor(t))); +} + +template +inline std::function(const int64_t)> hhmmss_caster_zoned_template( + const std::string timezone) { + static const arrow_vendored::date::time_zone* tz = locate_zone(timezone); + return [](const int64_t data) { + const auto z = sys_time(DurationIn{data}); + const auto l = make_zoned(tz, z).get_local_time(); + return hh_mm_ss( + std::chrono::duration_cast(l - floor(l))); + }; +} + +template +inline std::function(const int64_t)> make_hhmmss_caster( + const std::shared_ptr type) { + const auto ts_type = std::static_pointer_cast(type); + const TimeUnit::type unit = ts_type->unit(); + const std::string timezone = ts_type->timezone(); + + if (timezone.empty()) { + switch (unit) { + case TimeUnit::SECOND: + return hhmmss_caster_template; + case TimeUnit::MILLI: + return hhmmss_caster_template; + case TimeUnit::MICRO: + return hhmmss_caster_template; + case TimeUnit::NANO: + return hhmmss_caster_template; + } + } else { + switch (unit) { + case TimeUnit::SECOND: + return hhmmss_caster_zoned_template(timezone); + case TimeUnit::MILLI: + return hhmmss_caster_zoned_template( + timezone); + case TimeUnit::MICRO: + return hhmmss_caster_zoned_template( + timezone); + case TimeUnit::NANO: + return hhmmss_caster_zoned_template(timezone); + } + } + return hhmmss_caster_template; +} + +template +inline unsigned day_of_year_caster_template(const int64_t data) { + const auto sd = sys_days{floor(Duration{data})}; + const auto y = year_month_day(sd).year(); + return static_cast((sd - sys_days(y / jan / 0)).count()); +} + +template +inline std::function day_of_year_zoned_caster_template( + const std::string timezone) { + static const arrow_vendored::date::time_zone* tz = locate_zone(timezone); + return [](const int64_t data) { + auto ld = + year_month_day(floor(tz->to_local(sys_time(Duration{data})))); + return static_cast( + (local_days(ld) - local_days(ld.year() / jan / 1) + days{1}).count()); + }; +} + +inline std::function get_day_of_year_caster( + const std::shared_ptr type) { + const auto ts_type = std::static_pointer_cast(type); + const TimeUnit::type unit = ts_type->unit(); + const std::string timezone = ts_type->timezone(); + + if (timezone.empty()) { + switch (unit) { + case TimeUnit::SECOND: + return day_of_year_caster_template; + case TimeUnit::MILLI: + return day_of_year_caster_template; + case TimeUnit::MICRO: + return day_of_year_caster_template; + case TimeUnit::NANO: + return day_of_year_caster_template; + } + } else { + switch (unit) { + case TimeUnit::SECOND: + return day_of_year_zoned_caster_template(timezone); + case TimeUnit::MILLI: + return day_of_year_zoned_caster_template(timezone); + case TimeUnit::MICRO: + return day_of_year_zoned_caster_template(timezone); + case TimeUnit::NANO: + return day_of_year_zoned_caster_template(timezone); + } + } + return day_of_year_caster_template; +} + +template +inline unsigned week_caster_template(const int64_t data) { // Based on // https://github.com/HowardHinnant/date/blob/6e921e1b1d21e84a5c82416ba7ecd98e33a436d0/include/date/iso_week.h#L1503 - using namespace arrow_vendored::date; - const auto dp = sys_days{floor(std::chrono::seconds{in_data})}; + const auto dp = sys_days{floor(Duration{data})}; auto y = year_month_day{dp + days{3}}.year(); auto start = sys_days((y - years{1}) / dec / thu[last]) + (mon - thu); if (dp < start) { @@ -68,6 +206,54 @@ inline unsigned week(const int64_t in_data) { return static_cast(trunc(dp - start).count() + 1); } +template +inline std::function week_zoned_caster_template( + const std::string timezone) { + static const arrow_vendored::date::time_zone* tz = locate_zone(timezone); + return [](const int64_t data) { + const auto ld = floor(tz->to_local(sys_time(Duration{data}))); + auto y = year_month_day{ld + days{3}}.year(); + auto start = local_days((y - years{1}) / dec / thu[last]) + (mon - thu); + if (ld < start) { + --y; + start = local_days((y - years{1}) / dec / thu[last]) + (mon - thu); + } + return static_cast(trunc(local_days(ld) - start).count() + 1); + }; +} + +inline std::function make_week_caster( + const std::shared_ptr type) { + const auto ts_type = std::static_pointer_cast(type); + const TimeUnit::type unit = ts_type->unit(); + const std::string timezone = ts_type->timezone(); + + if (timezone.empty()) { + switch (unit) { + case TimeUnit::SECOND: + return week_caster_template; + case TimeUnit::MILLI: + return week_caster_template; + case TimeUnit::MICRO: + return week_caster_template; + case TimeUnit::NANO: + return week_caster_template; + } + } else { + switch (unit) { + case TimeUnit::SECOND: + return week_zoned_caster_template(timezone); + case TimeUnit::MILLI: + return week_zoned_caster_template(timezone); + case TimeUnit::MICRO: + return week_zoned_caster_template(timezone); + case TimeUnit::NANO: + return week_zoned_caster_template(timezone); + } + } + return day_of_year_caster_template; +} + // ---------------------------------------------------------------------- // Extract year from timestamp @@ -75,16 +261,17 @@ template struct Year { static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { const auto& in_data = internal::UnboxScalar::Unbox(in); - checked_cast(out)->value = - static_cast(get_year_month_day(in_data).year()); + auto ymd_caster = make_ymd_caster(in.type); + checked_cast(out)->value = static_cast(ymd_caster(in_data).year()); return Status::OK(); } static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { auto in_data = in.GetValues(1); auto out_data = out->GetMutableValues(1); + auto ymd_caster = make_ymd_caster(in.type); for (int64_t i = 0; i < in.length; i++) { - out_data[i] = static_cast(get_year_month_day(in_data[i]).year()); + out_data[i] = static_cast(ymd_caster(in_data[i]).year()); } return Status::OK(); } @@ -97,16 +284,18 @@ template struct Month { static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { const auto& in_data = internal::UnboxScalar::Unbox(in); + auto ymd_caster = make_ymd_caster(in.type); checked_cast(out)->value = - static_cast(get_year_month_day(in_data).month()); + static_cast(ymd_caster(in_data).month()); return Status::OK(); } static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { auto in_data = in.GetValues(1); auto out_data = out->GetMutableValues(1); + auto ymd_caster = make_ymd_caster(in.type); for (int64_t i = 0; i < in.length; i++) { - out_data[i] = static_cast(get_year_month_day(in_data[i]).month()); + out_data[i] = static_cast(ymd_caster(in_data[i]).month()); } return Status::OK(); } @@ -118,17 +307,19 @@ struct Month { template struct Day { static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { - const auto& in_data = internal::UnboxScalar::Unbox(in); + const int64_t& in_data = internal::UnboxScalar::Unbox(in); + auto ymd_caster = make_ymd_caster(in.type); checked_cast(out)->value = - static_cast(get_year_month_day(in_data).day()); + static_cast(ymd_caster(in_data).day()); return Status::OK(); } static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { auto in_data = in.GetValues(1); auto out_data = out->GetMutableValues(1); + auto ymd_caster = make_ymd_caster(in.type); for (int64_t i = 0; i < in.length; i++) { - out_data[i] = static_cast(get_year_month_day(in_data[i]).day()); + out_data[i] = static_cast(ymd_caster(in_data[i]).day()); } return Status::OK(); } @@ -141,15 +332,17 @@ template struct Week { static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { const auto& in_data = internal::UnboxScalar::Unbox(in); - checked_cast(out)->value = week(in_data); + auto week_caster = make_week_caster(in.type); + checked_cast(out)->value = week_caster(in_data); return Status::OK(); } static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { auto in_data = in.GetValues(1); auto out_data = out->GetMutableValues(1); + auto week_caster = make_week_caster(in.type); for (int64_t i = 0; i < in.length; i++) { - out_data[i] = week(in_data[i]); + out_data[i] = week_caster(in_data[i]); } return Status::OK(); } @@ -162,16 +355,18 @@ template struct Quarter { static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { const auto& in_data = internal::UnboxScalar::Unbox(in); + auto ymd_caster = make_ymd_caster(in.type); checked_cast(out)->value = - static_cast(get_year_month_day(in_data).month()) / 3 + 1; + (static_cast(ymd_caster(in_data).month()) - 1) / 3 + 1; return Status::OK(); } static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { auto in_data = in.GetValues(1); auto out_data = out->GetMutableValues(1); + auto ymd_caster = make_ymd_caster(in.type); for (int64_t i = 0; i < in.length; i++) { - out_data[i] = static_cast(get_year_month_day(in_data[i]).month()) / 3 + 1; + out_data[i] = (static_cast(ymd_caster(in_data[i]).month()) - 1) / 3 + 1; } return Status::OK(); } @@ -184,15 +379,17 @@ template struct DayOfYear { static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { const auto& in_data = internal::UnboxScalar::Unbox(in); - checked_cast(out)->value = day_of_year(in_data); + auto day_of_year_caster = get_day_of_year_caster(in.type); + checked_cast(out)->value = day_of_year_caster(in_data); return Status::OK(); } static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { auto in_data = in.GetValues(1); auto out_data = out->GetMutableValues(1); + auto day_of_year_caster = get_day_of_year_caster(in.type); for (int64_t i = 0; i < in.length; i++) { - out_data[i] = day_of_year(in_data[i]); + out_data[i] = day_of_year_caster(in_data[i]); } return Status::OK(); } @@ -205,17 +402,19 @@ template struct DayOfWeek { static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { const auto& in_data = internal::UnboxScalar::Unbox(in); + auto ymd_caster = make_ymd_caster(in.type); checked_cast(out)->value = static_cast( - arrow_vendored::date::weekday(get_year_month_day(in_data)).iso_encoding()); + arrow_vendored::date::weekday(ymd_caster(in_data)).iso_encoding()); return Status::OK(); } static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { auto in_data = in.GetValues(1); auto out_data = out->GetMutableValues(1); + auto ymd_caster = make_ymd_caster(in.type); for (int64_t i = 0; i < in.length; i++) { out_data[i] = static_cast( - arrow_vendored::date::weekday(get_year_month_day(in_data[i])).iso_encoding()); + arrow_vendored::date::weekday(ymd_caster(in_data[i])).iso_encoding()); } return Status::OK(); } @@ -228,17 +427,18 @@ template struct Hour { static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { const auto& in_data = internal::UnboxScalar::Unbox(in); - checked_cast(out)->value = static_cast( - get_time_of_day(in_data).hours().count()); + auto hhmmss_caster = make_hhmmss_caster(in.type); + checked_cast(out)->value = + static_cast(hhmmss_caster(in_data).hours().count()); return Status::OK(); } static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { auto in_data = in.GetValues(1); auto out_data = out->GetMutableValues(1); + auto hhmmss_caster = make_hhmmss_caster(in.type); for (int64_t i = 0; i < in.length; i++) { - out_data[i] = static_cast( - get_time_of_day(in_data[i]).hours().count()); + out_data[i] = static_cast(hhmmss_caster(in_data[i]).hours().count()); } return Status::OK(); } @@ -251,17 +451,18 @@ template struct Minute { static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { const auto& in_data = internal::UnboxScalar::Unbox(in); - checked_cast(out)->value = static_cast( - get_time_of_day(in_data).minutes().count()); + auto hhmmss_caster = make_hhmmss_caster(in.type); + checked_cast(out)->value = + static_cast(hhmmss_caster(in_data).minutes().count()); return Status::OK(); } static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { auto in_data = in.GetValues(1); auto out_data = out->GetMutableValues(1); + auto hhmmss_caster = make_hhmmss_caster(in.type); for (int64_t i = 0; i < in.length; i++) { - out_data[i] = static_cast( - get_time_of_day(in_data[i]).minutes().count()); + out_data[i] = static_cast(hhmmss_caster(in_data[i]).minutes().count()); } return Status::OK(); } @@ -274,17 +475,93 @@ template struct Second { static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { const auto& in_data = internal::UnboxScalar::Unbox(in); - checked_cast(out)->value = static_cast( - get_time_of_day(in_data).seconds().count()); + auto hhmmss_caster = make_hhmmss_caster(in.type); + checked_cast(out)->value = + static_cast(hhmmss_caster(in_data).seconds().count()); + return Status::OK(); + } + + static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { + auto in_data = in.GetValues(1); + auto out_data = out->GetMutableValues(1); + auto hhmmss_caster = make_hhmmss_caster(in.type); + for (int64_t i = 0; i < in.length; i++) { + out_data[i] = static_cast(hhmmss_caster(in_data[i]).seconds().count()); + } + return Status::OK(); + } +}; + +// ---------------------------------------------------------------------- +// Extract milliseconds from timestamp + +template +struct Millisecond { + static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { + const auto& in_data = internal::UnboxScalar::Unbox(in); + auto hhmmss_caster = make_hhmmss_caster(in.type); + checked_cast(out)->value = + static_cast(hhmmss_caster(in_data).subseconds().count() % 1000); return Status::OK(); } static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { auto in_data = in.GetValues(1); auto out_data = out->GetMutableValues(1); + auto hhmmss_caster = make_hhmmss_caster(in.type); for (int64_t i = 0; i < in.length; i++) { - out_data[i] = static_cast( - get_time_of_day(in_data[i]).seconds().count()); + out_data[i] = + static_cast(hhmmss_caster(in_data[i]).subseconds().count() % 1000); + } + return Status::OK(); + } +}; + +// ---------------------------------------------------------------------- +// Extract microseconds from timestamp + +template +struct Microsecond { + static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { + const auto& in_data = internal::UnboxScalar::Unbox(in); + auto hhmmss_caster = make_hhmmss_caster(in.type); + checked_cast(out)->value = + static_cast(hhmmss_caster(in_data).subseconds().count() % 1000); + return Status::OK(); + } + + static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { + auto in_data = in.GetValues(1); + auto out_data = out->GetMutableValues(1); + auto hhmmss_caster = make_hhmmss_caster(in.type); + for (int64_t i = 0; i < in.length; i++) { + out_data[i] = + static_cast(hhmmss_caster(in_data[i]).subseconds().count() % 1000); + } + return Status::OK(); + } +}; + +// ---------------------------------------------------------------------- +// Extract nanoseconds from timestamp + +template +struct Nanosecond { + static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { + const auto& in_data = internal::UnboxScalar::Unbox(in); + auto hhmmss_caster = make_hhmmss_caster(in.type); + checked_cast(out)->value = + static_cast(hhmmss_caster(in_data).subseconds().count() % 1000); + return Status::OK(); + } + + static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { + auto in_data = in.GetValues(1); + auto out_data = out->GetMutableValues(1); + auto hhmmss_caster = make_hhmmss_caster(in.type); + for (int64_t i = 0; i < in.length; i++) { + out_data[i] = + static_cast(hhmmss_caster(in_data[i]).subseconds().count() % 1000); } return Status::OK(); } @@ -315,8 +592,11 @@ const FunctionDoc day_of_week_doc{"Extract day of week values", "", {"values"}}; const FunctionDoc hour_doc{"Extract hour values", "", {"values"}}; const FunctionDoc minute_doc{"Extract minute values", "", {"values"}}; const FunctionDoc second_doc{"Extract second values", "", {"values"}}; +const FunctionDoc millisecond_doc{"Extract millisecond values", "", {"values"}}; +const FunctionDoc microsecond_doc{"Extract microsecond values", "", {"values"}}; +const FunctionDoc nanosecond_doc{"Extract nanosecond values", "", {"values"}}; -} // namespace +} // namespace internal namespace internal { void RegisterScalarTemporal(FunctionRegistry* registry) { @@ -339,11 +619,12 @@ void RegisterScalarTemporal(FunctionRegistry* registry) { registry); MakeFunction("second", applicator::SimpleUnary>, &second_doc, int64(), registry); - - // TODO - // millisecond - // microsecond - // nanosecond + MakeFunction("millisecond", applicator::SimpleUnary>, + &millisecond_doc, int64(), registry); + MakeFunction("microsecond", applicator::SimpleUnary>, + µsecond_doc, int64(), registry); + MakeFunction("nanosecond", applicator::SimpleUnary>, + &nanosecond_doc, int64(), registry); } } // namespace internal diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 0b110b10bbe..b6d643880cc 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -16,6 +16,7 @@ // under the License. #include +#include "arrow/compute/api_scalar.h" #include "arrow/compute/kernels/test_util.h" #include "arrow/util/checked_cast.h" #include "arrow/util/formatting.h" @@ -24,51 +25,215 @@ namespace arrow { using internal::StringFormatter; -class TestArray : public ::testing::Test { - public: - void SetUp() { pool_ = default_memory_pool(); } - - protected: - MemoryPool* pool_; -}; +class ScalarTemporalTest : public ::testing::Test {}; namespace compute { -TEST(TestArray, TestTemporalcomponentExtraction) { +TEST(ScalarTemporalTest, TestSimpleTemporalComponentExtraction) { const char* json = - R"(["1970-01-01T10:10:59","2000-02-29T23:23:23","3989-07-14T18:04:01","1900-02-28T07:59:20"])"; - const char* year = "[1970, 2000, 3989, 1900]"; - const char* month = "[1, 2, 7, 2]"; - const char* day = "[1, 29, 14, 28]"; - const char* day_of_year = "[0, 59, 194, 58]"; - const char* week = "[1, 9, 28, 9]"; - const char* quarter = "[1, 1, 3, 1]"; - const char* day_of_week = "[4, 2, 5, 3]"; - const char* hour = "[10, 23, 18, 7]"; - const char* minute = "[10, 23, 4, 59]"; - const char* second = "[59, 23, 1, 20]"; - - CheckScalarUnary("year", ArrayFromJSON(timestamp(TimeUnit::SECOND), json), - ArrayFromJSON(int64(), year)); - CheckScalarUnary("month", ArrayFromJSON(timestamp(TimeUnit::SECOND), json), - ArrayFromJSON(int64(), month)); - CheckScalarUnary("day", ArrayFromJSON(timestamp(TimeUnit::SECOND), json), - ArrayFromJSON(int64(), day)); - CheckScalarUnary("day_of_year", ArrayFromJSON(timestamp(TimeUnit::SECOND), json), - ArrayFromJSON(int64(), day_of_year)); - CheckScalarUnary("week", ArrayFromJSON(timestamp(TimeUnit::SECOND), json), - ArrayFromJSON(int64(), week)); - CheckScalarUnary("quarter", ArrayFromJSON(timestamp(TimeUnit::SECOND), json), - ArrayFromJSON(int64(), quarter)); - CheckScalarUnary("day_of_week", ArrayFromJSON(timestamp(TimeUnit::SECOND), json), - ArrayFromJSON(int64(), day_of_week)); - CheckScalarUnary("hour", ArrayFromJSON(timestamp(TimeUnit::SECOND), json), - ArrayFromJSON(int64(), hour)); - CheckScalarUnary("minute", ArrayFromJSON(timestamp(TimeUnit::SECOND), json), - ArrayFromJSON(int64(), minute)); - CheckScalarUnary("second", ArrayFromJSON(timestamp(TimeUnit::SECOND), json), - ArrayFromJSON(int64(), second)); + R"(["1970-01-01T00:00:59","2000-02-29T23:23:23", + "3989-07-14T18:04:01","1900-01-01T01:59:20","2033-05-18T03:33:20"])"; + auto time_points = ArrayFromJSON(timestamp(TimeUnit::SECOND), json); + + auto year = ArrayFromJSON(int64(), "[1970, 2000, 3989, 1900, 2033]"); + auto month = ArrayFromJSON(int64(), "[1, 2, 7, 1, 5]"); + auto day = ArrayFromJSON(int64(), "[1, 29, 14, 1, 18]"); + auto day_of_year = ArrayFromJSON(int64(), "[1, 60, 195, 1, 138]"); + auto week = ArrayFromJSON(int64(), "[1, 9, 28, 1, 20]"); + auto quarter = ArrayFromJSON(int64(), "[1, 1, 3, 1, 2]"); + auto day_of_week = ArrayFromJSON(int64(), "[4, 2, 5, 1, 3]"); + auto hour = ArrayFromJSON(int64(), "[0, 23, 18, 1, 3]"); + auto minute = ArrayFromJSON(int64(), "[0, 23, 4, 59, 33]"); + auto second = ArrayFromJSON(int64(), "[59, 23, 1, 20, 20]"); + auto millisecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]"); + auto microsecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]"); + auto nanosecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]"); + + ASSERT_OK_AND_ASSIGN(Datum actual_year, Year(time_points)); + ASSERT_OK_AND_ASSIGN(Datum actual_month, Month(time_points)); + ASSERT_OK_AND_ASSIGN(Datum actual_day, Day(time_points)); + ASSERT_OK_AND_ASSIGN(Datum actual_day_of_year, DayOfYear(time_points)); + ASSERT_OK_AND_ASSIGN(Datum actual_week, Week(time_points)); + ASSERT_OK_AND_ASSIGN(Datum actual_quarter, Quarter(time_points)); + ASSERT_OK_AND_ASSIGN(Datum actual_day_of_week, DayOfWeek(time_points)); + ASSERT_OK_AND_ASSIGN(Datum actual_hour, Hour(time_points)); + ASSERT_OK_AND_ASSIGN(Datum actual_minute, Minute(time_points)); + ASSERT_OK_AND_ASSIGN(Datum actual_second, Second(time_points)); + ASSERT_OK_AND_ASSIGN(Datum actual_millisecond, Millisecond(time_points)); + ASSERT_OK_AND_ASSIGN(Datum actual_microsecond, Microsecond(time_points)); + ASSERT_OK_AND_ASSIGN(Datum actual_nanosecond, Nanosecond(time_points)); + + ASSERT_EQ(actual_year, year); + ASSERT_EQ(actual_month, month); + ASSERT_EQ(actual_day, day); + ASSERT_EQ(actual_day_of_year, day_of_year); + ASSERT_EQ(actual_week, week); + ASSERT_EQ(actual_quarter, quarter); + ASSERT_EQ(actual_day_of_week, day_of_week); + ASSERT_EQ(actual_hour, hour); + ASSERT_EQ(actual_minute, minute); + ASSERT_EQ(actual_second, second); + ASSERT_EQ(actual_millisecond, millisecond); + ASSERT_EQ(actual_microsecond, microsecond); + ASSERT_EQ(actual_nanosecond, nanosecond); + + CheckScalarUnary("year", time_points, year); + CheckScalarUnary("month", time_points, month); + CheckScalarUnary("day", time_points, day); + CheckScalarUnary("day_of_year", time_points, day_of_year); + CheckScalarUnary("week", time_points, week); + CheckScalarUnary("quarter", time_points, quarter); + CheckScalarUnary("day_of_week", time_points, day_of_week); + CheckScalarUnary("hour", time_points, hour); + CheckScalarUnary("minute", time_points, minute); + CheckScalarUnary("second", time_points, second); + CheckScalarUnary("millisecond", time_points, millisecond); + CheckScalarUnary("microsecond", time_points, microsecond); + CheckScalarUnary("nanosecond", time_points, nanosecond); } +TEST(ScalarTemporalTest, TestTemporalComponentExtraction) { + const char* json_second = "[59, 951866603, -2208981640, 2000000000]"; + const char* json_milli = "[59000, 951866603000, -2208981640000, 2000000000000]"; + const char* json_micro = + "[59000000, 951866603000000, -2208981640000000, 2000000000000000]"; + const char* json_nano = + "[59000000000, 951866603000000000, -2208981640000000000, 2000000000000000000]"; + + auto time_points_second = ArrayFromJSON(timestamp(TimeUnit::SECOND), json_second); + auto time_points_milli = ArrayFromJSON(timestamp(TimeUnit::MILLI), json_milli); + auto time_points_micro = ArrayFromJSON(timestamp(TimeUnit::MICRO), json_micro); + auto time_points_nano = ArrayFromJSON(timestamp(TimeUnit::NANO), json_nano); + + auto year = ArrayFromJSON(int64(), "[1970, 2000, 1900, 2033]"); + auto month = ArrayFromJSON(int64(), "[1, 2, 1, 5]"); + auto day = ArrayFromJSON(int64(), "[1, 29, 1, 18]"); + auto day_of_year = ArrayFromJSON(int64(), "[1, 60, 1, 138]"); + auto week = ArrayFromJSON(int64(), "[1, 9, 1, 20]"); + auto quarter = ArrayFromJSON(int64(), "[1, 1, 1, 2]"); + auto day_of_week = ArrayFromJSON(int64(), "[4, 2, 1, 3]"); + auto hour = ArrayFromJSON(int64(), "[0, 23, 1, 3]"); + auto minute = ArrayFromJSON(int64(), "[0, 23, 59, 33]"); + auto second = ArrayFromJSON(int64(), "[59, 23, 20, 20]"); + auto millisecond = ArrayFromJSON(int64(), "[0, 0, 0, 0]"); + auto microsecond = ArrayFromJSON(int64(), "[0, 0, 0, 0]"); + auto nanosecond = ArrayFromJSON(int64(), "[0, 0, 0, 0]"); + + for (auto time_points : + {time_points_second, time_points_milli, time_points_micro, time_points_nano}) { + CheckScalarUnary("year", time_points, year); + CheckScalarUnary("month", time_points, month); + CheckScalarUnary("day", time_points, day); + CheckScalarUnary("day_of_year", time_points, day_of_year); + CheckScalarUnary("week", time_points, week); + CheckScalarUnary("quarter", time_points, quarter); + CheckScalarUnary("day_of_week", time_points, day_of_week); + CheckScalarUnary("hour", time_points, hour); + CheckScalarUnary("minute", time_points, minute); + CheckScalarUnary("second", time_points, second); + CheckScalarUnary("millisecond", time_points, millisecond); + CheckScalarUnary("microsecond", time_points, microsecond); + CheckScalarUnary("nanosecond", time_points, nanosecond); + } + + std::string in = "[123, 999, 1, 31231000]"; + auto out = ArrayFromJSON(int64(), "[123, 999, 1, 0]"); + + auto tp_milli = ArrayFromJSON(timestamp(TimeUnit::MILLI), in); + auto tp_milli_zoned = ArrayFromJSON(timestamp(TimeUnit::MILLI, "Etc/GMT+2"), in); + CheckScalarUnary("millisecond", tp_milli, out); + CheckScalarUnary("millisecond", tp_milli, out); + + auto tp_micro = ArrayFromJSON(timestamp(TimeUnit::MICRO), in); + auto tp_micro_zoned = ArrayFromJSON(timestamp(TimeUnit::MICRO, "Etc/GMT+2"), in); + CheckScalarUnary("microsecond", tp_micro, out); + CheckScalarUnary("microsecond", tp_micro_zoned, out); + + auto tp_nano = ArrayFromJSON(timestamp(TimeUnit::NANO), in); + auto tp_nano_zoned = ArrayFromJSON(timestamp(TimeUnit::NANO, "Etc/GMT+2"), in); + CheckScalarUnary("nanosecond", tp_nano, out); + CheckScalarUnary("nanosecond", tp_nano_zoned, out); +} + +TEST(ScalarTemporalTest, TestSimpleZonedTemporalComponentExtraction) { + const char* json = + R"(["1970-01-01T00:00:59","2000-02-29T23:23:23", + "3989-07-14T18:04:01","1900-01-01T01:59:20","2033-05-18T03:33:20"])"; + auto time_points = ArrayFromJSON(timestamp(TimeUnit::SECOND, "Etc/GMT+2"), json); + + auto year = ArrayFromJSON(int64(), "[1969, 2000, 3989, 1899, 2033]"); + auto month = ArrayFromJSON(int64(), "[12, 2, 7, 12, 5]"); + auto day = ArrayFromJSON(int64(), "[31, 29, 14, 31, 18]"); + auto day_of_year = ArrayFromJSON(int64(), "[365, 60, 195, 365, 138]"); + auto week = ArrayFromJSON(int64(), "[1, 9, 28, 52, 20]"); + auto quarter = ArrayFromJSON(int64(), "[4, 1, 3, 4, 2]"); + auto day_of_week = ArrayFromJSON(int64(), "[3, 2, 5, 7, 3]"); + auto hour = ArrayFromJSON(int64(), "[22, 21, 16, 23, 1]"); + auto minute = ArrayFromJSON(int64(), "[0, 23, 4, 59, 33]"); + auto second = ArrayFromJSON(int64(), "[59, 23, 1, 20, 20]"); + auto millisecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]"); + auto microsecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]"); + auto nanosecond = ArrayFromJSON(int64(), "[0, 0, 0, 0, 0]"); + + CheckScalarUnary("year", time_points, year); + CheckScalarUnary("month", time_points, month); + CheckScalarUnary("day", time_points, day); + CheckScalarUnary("day_of_year", time_points, day_of_year); + CheckScalarUnary("week", time_points, week); + CheckScalarUnary("quarter", time_points, quarter); + CheckScalarUnary("day_of_week", time_points, day_of_week); + CheckScalarUnary("hour", time_points, hour); + CheckScalarUnary("minute", time_points, minute); + CheckScalarUnary("second", time_points, second); + CheckScalarUnary("millisecond", time_points, millisecond); + CheckScalarUnary("microsecond", time_points, microsecond); + CheckScalarUnary("nanosecond", time_points, nanosecond); +} + +TEST(ScalarTemporalTest, TestZonedTemporalComponentExtraction) { + std::string timezone = "Etc/GMT+2"; + const char* json_second = "[59, 951866603, -2208981640, 2000000000]"; + const char* json_milli = "[59000, 951866603000, -2208981640000, 2000000000000]"; + const char* json_micro = + "[59000000, 951866603000000, -2208981640000000, 2000000000000000]"; + const char* json_nano = + "[59000000000, 951866603000000000, -2208981640000000000, 2000000000000000000]"; + + auto year = ArrayFromJSON(int64(), "[1969, 2000, 1899, 2033]"); + auto month = ArrayFromJSON(int64(), "[12, 2, 12, 5]"); + auto day = ArrayFromJSON(int64(), "[31, 29, 31, 18]"); + auto day_of_year = ArrayFromJSON(int64(), "[365, 60, 365, 138]"); + auto week = ArrayFromJSON(int64(), "[1, 9, 52, 20]"); + auto quarter = ArrayFromJSON(int64(), "[4, 1, 4, 2]"); + auto day_of_week = ArrayFromJSON(int64(), "[3, 2, 7, 3]"); + auto hour = ArrayFromJSON(int64(), "[22, 21, 23, 1]"); + auto minute = ArrayFromJSON(int64(), "[0, 23, 59, 33]"); + auto second = ArrayFromJSON(int64(), "[59, 23, 20, 20]"); + auto millisecond = ArrayFromJSON(int64(), "[0, 0, 0, 0]"); + auto microsecond = ArrayFromJSON(int64(), "[0, 0, 0, 0]"); + auto nanosecond = ArrayFromJSON(int64(), "[0, 0, 0, 0]"); + + auto all_time_points = { + ArrayFromJSON(timestamp(TimeUnit::SECOND, timezone), json_second), + ArrayFromJSON(timestamp(TimeUnit::MILLI, timezone), json_milli), + ArrayFromJSON(timestamp(TimeUnit::MICRO, timezone), json_micro), + ArrayFromJSON(timestamp(TimeUnit::NANO, timezone), json_nano)}; + + for (auto time_points : all_time_points) { + CheckScalarUnary("year", time_points, year); + CheckScalarUnary("month", time_points, month); + CheckScalarUnary("day", time_points, day); + CheckScalarUnary("day_of_year", time_points, day_of_year); + CheckScalarUnary("week", time_points, week); + CheckScalarUnary("quarter", time_points, quarter); + CheckScalarUnary("day_of_week", time_points, day_of_week); + CheckScalarUnary("hour", time_points, hour); + CheckScalarUnary("minute", time_points, minute); + CheckScalarUnary("second", time_points, second); + CheckScalarUnary("millisecond", time_points, millisecond); + CheckScalarUnary("microsecond", time_points, microsecond); + CheckScalarUnary("nanosecond", time_points, nanosecond); + } +} } // namespace compute } // namespace arrow From 77e5a033e97f10c1f99c3a8531d6f18943b4eaff Mon Sep 17 00:00:00 2001 From: Rok Date: Wed, 12 May 2021 23:21:39 +0200 Subject: [PATCH 03/17] Drying. --- .../arrow/compute/kernels/scalar_temporal.cc | 713 +++++++----------- .../compute/kernels/scalar_temporal_test.cc | 329 ++++---- 2 files changed, 397 insertions(+), 645 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_temporal.cc index a45c7043795..9849574814d 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal.cc @@ -29,9 +29,12 @@ using arrow_vendored::date::floor; using arrow_vendored::date::hh_mm_ss; using arrow_vendored::date::local_days; using arrow_vendored::date::locate_zone; +using arrow_vendored::date::make_zoned; using arrow_vendored::date::sys_days; using arrow_vendored::date::sys_time; +using arrow_vendored::date::time_zone; using arrow_vendored::date::trunc; +using arrow_vendored::date::weekday; using arrow_vendored::date::weeks; using arrow_vendored::date::year_month_day; using arrow_vendored::date::years; @@ -41,539 +44,335 @@ using arrow_vendored::date::literals::last; using arrow_vendored::date::literals::mon; using arrow_vendored::date::literals::thu; -template -inline year_month_day ymd_caster_template(const int64_t data) { - return year_month_day(floor(sys_time(Duration{data}))); -} - -template -inline std::function ymd_caster_zoned_template( - const std::string timezone) { - static const arrow_vendored::date::time_zone* tz = locate_zone(timezone); - return [](const int64_t data) { - return year_month_day(floor(tz->to_local(sys_time(Duration{data})))); - }; -} - -inline std::function make_ymd_caster( - const std::shared_ptr type) { - const auto ts_type = std::static_pointer_cast(type); - const TimeUnit::type unit = ts_type->unit(); - const std::string timezone = ts_type->timezone(); - - if (timezone.empty()) { - switch (unit) { - case TimeUnit::SECOND: - return ymd_caster_template; - case TimeUnit::MILLI: - return ymd_caster_template; - case TimeUnit::MICRO: - return ymd_caster_template; - case TimeUnit::NANO: - return ymd_caster_template; - } - } else { - switch (unit) { - case TimeUnit::SECOND: - return ymd_caster_zoned_template(timezone); - case TimeUnit::MILLI: - return ymd_caster_zoned_template(timezone); - case TimeUnit::MICRO: - return ymd_caster_zoned_template(timezone); - case TimeUnit::NANO: - return ymd_caster_zoned_template(timezone); - } - } - return ymd_caster_template; -} - -template -inline hh_mm_ss hhmmss_caster_template(const int64_t data) { - DurationIn t = DurationIn{data}; - return hh_mm_ss( - std::chrono::duration_cast(t - floor(t))); -} - -template -inline std::function(const int64_t)> hhmmss_caster_zoned_template( - const std::string timezone) { - static const arrow_vendored::date::time_zone* tz = locate_zone(timezone); - return [](const int64_t data) { - const auto z = sys_time(DurationIn{data}); - const auto l = make_zoned(tz, z).get_local_time(); - return hh_mm_ss( - std::chrono::duration_cast(l - floor(l))); - }; -} - -template -inline std::function(const int64_t)> make_hhmmss_caster( - const std::shared_ptr type) { - const auto ts_type = std::static_pointer_cast(type); - const TimeUnit::type unit = ts_type->unit(); - const std::string timezone = ts_type->timezone(); - - if (timezone.empty()) { - switch (unit) { - case TimeUnit::SECOND: - return hhmmss_caster_template; - case TimeUnit::MILLI: - return hhmmss_caster_template; - case TimeUnit::MICRO: - return hhmmss_caster_template; - case TimeUnit::NANO: - return hhmmss_caster_template; - } - } else { - switch (unit) { - case TimeUnit::SECOND: - return hhmmss_caster_zoned_template(timezone); - case TimeUnit::MILLI: - return hhmmss_caster_zoned_template( - timezone); - case TimeUnit::MICRO: - return hhmmss_caster_zoned_template( - timezone); - case TimeUnit::NANO: - return hhmmss_caster_zoned_template(timezone); - } - } - return hhmmss_caster_template; -} - -template -inline unsigned day_of_year_caster_template(const int64_t data) { - const auto sd = sys_days{floor(Duration{data})}; - const auto y = year_month_day(sd).year(); - return static_cast((sd - sys_days(y / jan / 0)).count()); -} - -template -inline std::function day_of_year_zoned_caster_template( - const std::string timezone) { - static const arrow_vendored::date::time_zone* tz = locate_zone(timezone); - return [](const int64_t data) { - auto ld = - year_month_day(floor(tz->to_local(sys_time(Duration{data})))); - return static_cast( - (local_days(ld) - local_days(ld.year() / jan / 1) + days{1}).count()); - }; -} - -inline std::function get_day_of_year_caster( - const std::shared_ptr type) { - const auto ts_type = std::static_pointer_cast(type); - const TimeUnit::type unit = ts_type->unit(); - const std::string timezone = ts_type->timezone(); - - if (timezone.empty()) { - switch (unit) { - case TimeUnit::SECOND: - return day_of_year_caster_template; - case TimeUnit::MILLI: - return day_of_year_caster_template; - case TimeUnit::MICRO: - return day_of_year_caster_template; - case TimeUnit::NANO: - return day_of_year_caster_template; - } - } else { - switch (unit) { - case TimeUnit::SECOND: - return day_of_year_zoned_caster_template(timezone); - case TimeUnit::MILLI: - return day_of_year_zoned_caster_template(timezone); - case TimeUnit::MICRO: - return day_of_year_zoned_caster_template(timezone); - case TimeUnit::NANO: - return day_of_year_zoned_caster_template(timezone); - } - } - return day_of_year_caster_template; -} - -template -inline unsigned week_caster_template(const int64_t data) { - // Based on - // https://github.com/HowardHinnant/date/blob/6e921e1b1d21e84a5c82416ba7ecd98e33a436d0/include/date/iso_week.h#L1503 - const auto dp = sys_days{floor(Duration{data})}; - auto y = year_month_day{dp + days{3}}.year(); - auto start = sys_days((y - years{1}) / dec / thu[last]) + (mon - thu); - if (dp < start) { - --y; - start = sys_days((y - years{1}) / dec / thu[last]) + (mon - thu); - } - return static_cast(trunc(dp - start).count() + 1); -} - -template -inline std::function week_zoned_caster_template( - const std::string timezone) { - static const arrow_vendored::date::time_zone* tz = locate_zone(timezone); - return [](const int64_t data) { - const auto ld = floor(tz->to_local(sys_time(Duration{data}))); - auto y = year_month_day{ld + days{3}}.year(); - auto start = local_days((y - years{1}) / dec / thu[last]) + (mon - thu); - if (ld < start) { - --y; - start = local_days((y - years{1}) / dec / thu[last]) + (mon - thu); - } - return static_cast(trunc(local_days(ld) - start).count() + 1); - }; -} - -inline std::function make_week_caster( - const std::shared_ptr type) { - const auto ts_type = std::static_pointer_cast(type); - const TimeUnit::type unit = ts_type->unit(); - const std::string timezone = ts_type->timezone(); - - if (timezone.empty()) { - switch (unit) { - case TimeUnit::SECOND: - return week_caster_template; - case TimeUnit::MILLI: - return week_caster_template; - case TimeUnit::MICRO: - return week_caster_template; - case TimeUnit::NANO: - return week_caster_template; - } - } else { - switch (unit) { - case TimeUnit::SECOND: - return week_zoned_caster_template(timezone); - case TimeUnit::MILLI: - return week_zoned_caster_template(timezone); - case TimeUnit::MICRO: - return week_zoned_caster_template(timezone); - case TimeUnit::NANO: - return week_zoned_caster_template(timezone); - } - } - return day_of_year_caster_template; -} - // ---------------------------------------------------------------------- // Extract year from timestamp -template -struct Year { - static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { - const auto& in_data = internal::UnboxScalar::Unbox(in); - auto ymd_caster = make_ymd_caster(in.type); - checked_cast(out)->value = static_cast(ymd_caster(in_data).year()); - return Status::OK(); +template +struct year { + inline const int32_t operator()(const int64_t data) const { + return static_cast( + year_month_day(floor(sys_time(Duration{data}))).year()); } - - static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { - auto in_data = in.GetValues(1); - auto out_data = out->GetMutableValues(1); - auto ymd_caster = make_ymd_caster(in.type); - for (int64_t i = 0; i < in.length; i++) { - out_data[i] = static_cast(ymd_caster(in_data[i]).year()); - } - return Status::OK(); + inline const int32_t operator()(const int64_t data, const time_zone* tz) const { + auto zt = make_zoned(tz, sys_time(Duration{data})).get_sys_time(); + return static_cast(year_month_day(floor(zt)).year()); } }; // ---------------------------------------------------------------------- // Extract month from timestamp -template -struct Month { - static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { - const auto& in_data = internal::UnboxScalar::Unbox(in); - auto ymd_caster = make_ymd_caster(in.type); - checked_cast(out)->value = - static_cast(ymd_caster(in_data).month()); - return Status::OK(); +template +struct month { + inline const uint32_t operator()(const int64_t data) const { + return static_cast( + year_month_day(floor(sys_time(Duration{data}))).month()); } - - static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { - auto in_data = in.GetValues(1); - auto out_data = out->GetMutableValues(1); - auto ymd_caster = make_ymd_caster(in.type); - for (int64_t i = 0; i < in.length; i++) { - out_data[i] = static_cast(ymd_caster(in_data[i]).month()); - } - return Status::OK(); + inline const uint32_t operator()(const int64_t data, const time_zone* tz) const { + auto zt = make_zoned(tz, sys_time(Duration{data})).get_sys_time(); + return static_cast(year_month_day(floor(zt)).month()); } }; // ---------------------------------------------------------------------- // Extract day from timestamp -template -struct Day { - static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { - const int64_t& in_data = internal::UnboxScalar::Unbox(in); - auto ymd_caster = make_ymd_caster(in.type); - checked_cast(out)->value = - static_cast(ymd_caster(in_data).day()); - return Status::OK(); +template +struct day { + inline const uint32_t operator()(const int64_t data) const { + return static_cast( + year_month_day(floor(sys_time(Duration{data}))).day()); } - - static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { - auto in_data = in.GetValues(1); - auto out_data = out->GetMutableValues(1); - auto ymd_caster = make_ymd_caster(in.type); - for (int64_t i = 0; i < in.length; i++) { - out_data[i] = static_cast(ymd_caster(in_data[i]).day()); - } - return Status::OK(); + inline const uint32_t operator()(const int64_t data, const time_zone* tz) const { + auto zt = make_zoned(tz, sys_time(Duration{data})).get_sys_time(); + return static_cast(year_month_day(floor(zt)).day()); } }; // ---------------------------------------------------------------------- -// Extract week from timestamp +// Extract day of week from timestamp -template -struct Week { - static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { - const auto& in_data = internal::UnboxScalar::Unbox(in); - auto week_caster = make_week_caster(in.type); - checked_cast(out)->value = week_caster(in_data); - return Status::OK(); +template +struct day_of_week { + inline const uint8_t operator()(const int64_t data) const { + return weekday(year_month_day(floor(sys_time(Duration{data})))) + .iso_encoding(); } - - static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { - auto in_data = in.GetValues(1); - auto out_data = out->GetMutableValues(1); - auto week_caster = make_week_caster(in.type); - for (int64_t i = 0; i < in.length; i++) { - out_data[i] = week_caster(in_data[i]); - } - return Status::OK(); + inline const uint8_t operator()(const int64_t data, const time_zone* tz) const { + auto zt = make_zoned(tz, sys_time(Duration{data})).get_sys_time(); + return weekday(year_month_day(floor(zt))).iso_encoding(); } }; // ---------------------------------------------------------------------- -// Extract quarter from timestamp +// Extract day of year from timestamp -template -struct Quarter { - static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { - const auto& in_data = internal::UnboxScalar::Unbox(in); - auto ymd_caster = make_ymd_caster(in.type); - checked_cast(out)->value = - (static_cast(ymd_caster(in_data).month()) - 1) / 3 + 1; - return Status::OK(); +template +struct day_of_year { + inline const uint16_t operator()(const int64_t data) const { + const auto sd = sys_days{floor(Duration{data})}; + const auto y = year_month_day(sd).year(); + return (sd - sys_days(y / jan / 0)).count(); } - - static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { - auto in_data = in.GetValues(1); - auto out_data = out->GetMutableValues(1); - auto ymd_caster = make_ymd_caster(in.type); - for (int64_t i = 0; i < in.length; i++) { - out_data[i] = (static_cast(ymd_caster(in_data[i]).month()) - 1) / 3 + 1; - } - return Status::OK(); + inline const uint16_t operator()(const int64_t data, const time_zone* tz) const { + auto zt = make_zoned(tz, sys_time(Duration{data})).get_sys_time(); + auto ld = year_month_day(floor(zt)); + return (local_days(ld) - local_days(ld.year() / jan / 1) + days{1}).count(); } }; // ---------------------------------------------------------------------- -// Extract day of year from timestamp +// Extract week from timestamp -template -struct DayOfYear { - static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { - const auto& in_data = internal::UnboxScalar::Unbox(in); - auto day_of_year_caster = get_day_of_year_caster(in.type); - checked_cast(out)->value = day_of_year_caster(in_data); - return Status::OK(); +template +struct week { + // Based on + // https://github.com/HowardHinnant/date/blob/6e921e1b1d21e84a5c82416ba7ecd98e33a436d0/include/date/iso_week.h#L1503 + inline const uint8_t operator()(const int64_t data) const { + const auto dp = sys_days{floor(Duration{data})}; + auto y = year_month_day{dp + days{3}}.year(); + auto start = sys_days((y - years{1}) / dec / thu[last]) + (mon - thu); + if (dp < start) { + --y; + start = sys_days((y - years{1}) / dec / thu[last]) + (mon - thu); + } + return trunc(dp - start).count() + 1; } - - static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { - auto in_data = in.GetValues(1); - auto out_data = out->GetMutableValues(1); - auto day_of_year_caster = get_day_of_year_caster(in.type); - for (int64_t i = 0; i < in.length; i++) { - out_data[i] = day_of_year_caster(in_data[i]); + inline const uint8_t operator()(const int64_t data, const time_zone* tz) const { + const auto dp = sys_days{ + floor(make_zoned(tz, sys_time(Duration{data})).get_sys_time())}; + auto y = year_month_day{dp + days{3}}.year(); + auto start = sys_days((y - years{1}) / dec / thu[last]) + (mon - thu); + if (dp < start) { + --y; + start = sys_days((y - years{1}) / dec / thu[last]) + (mon - thu); } - return Status::OK(); + return trunc(dp - start).count() + 1; } }; // ---------------------------------------------------------------------- -// Extract day of week from timestamp +// Extract day of quarter from timestamp -template -struct DayOfWeek { - static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { - const auto& in_data = internal::UnboxScalar::Unbox(in); - auto ymd_caster = make_ymd_caster(in.type); - checked_cast(out)->value = static_cast( - arrow_vendored::date::weekday(ymd_caster(in_data)).iso_encoding()); - return Status::OK(); +template +struct quarter { + inline const uint32_t operator()(const int64_t data) const { + const auto ymd = year_month_day(floor(sys_time(Duration{data}))); + return (static_cast(ymd.month()) - 1) / 3 + 1; } - - static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { - auto in_data = in.GetValues(1); - auto out_data = out->GetMutableValues(1); - auto ymd_caster = make_ymd_caster(in.type); - for (int64_t i = 0; i < in.length; i++) { - out_data[i] = static_cast( - arrow_vendored::date::weekday(ymd_caster(in_data[i])).iso_encoding()); - } - return Status::OK(); + inline const uint32_t operator()(const int64_t data, const time_zone* tz) const { + auto zt = make_zoned(tz, sys_time(Duration{data})).get_sys_time(); + const auto ymd = year_month_day(floor(zt)); + return (static_cast(ymd.month()) - 1) / 3 + 1; } }; // ---------------------------------------------------------------------- // Extract hour from timestamp -template -struct Hour { - static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { - const auto& in_data = internal::UnboxScalar::Unbox(in); - auto hhmmss_caster = make_hhmmss_caster(in.type); - checked_cast(out)->value = - static_cast(hhmmss_caster(in_data).hours().count()); - return Status::OK(); +template +struct hour { + inline uint8_t operator()(const int64_t data) const { + Duration t = Duration{data}; + return hh_mm_ss(t - floor(t)).hours().count(); } - - static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { - auto in_data = in.GetValues(1); - auto out_data = out->GetMutableValues(1); - auto hhmmss_caster = make_hhmmss_caster(in.type); - for (int64_t i = 0; i < in.length; i++) { - out_data[i] = static_cast(hhmmss_caster(in_data[i]).hours().count()); - } - return Status::OK(); + inline uint8_t operator()(const int64_t data, const time_zone* tz) const { + const auto z = sys_time(Duration{data}); + const auto l = make_zoned(tz, z).get_local_time(); + return hh_mm_ss(std::chrono::duration_cast(l - floor(l))) + .hours() + .count(); } }; // ---------------------------------------------------------------------- // Extract minute from timestamp -template -struct Minute { - static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { - const auto& in_data = internal::UnboxScalar::Unbox(in); - auto hhmmss_caster = make_hhmmss_caster(in.type); - checked_cast(out)->value = - static_cast(hhmmss_caster(in_data).minutes().count()); - return Status::OK(); +template +struct minute { + inline uint8_t operator()(const int64_t data) const { + Duration t = Duration{data}; + return hh_mm_ss(t - floor(t)).minutes().count(); } - - static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { - auto in_data = in.GetValues(1); - auto out_data = out->GetMutableValues(1); - auto hhmmss_caster = make_hhmmss_caster(in.type); - for (int64_t i = 0; i < in.length; i++) { - out_data[i] = static_cast(hhmmss_caster(in_data[i]).minutes().count()); - } - return Status::OK(); + inline uint8_t operator()(const int64_t data, const time_zone* tz) const { + const auto z = sys_time(Duration{data}); + const auto l = make_zoned(tz, z).get_local_time(); + return hh_mm_ss(std::chrono::duration_cast(l - floor(l))) + .minutes() + .count(); } }; // ---------------------------------------------------------------------- // Extract second from timestamp -template -struct Second { - static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { - const auto& in_data = internal::UnboxScalar::Unbox(in); - auto hhmmss_caster = make_hhmmss_caster(in.type); - checked_cast(out)->value = - static_cast(hhmmss_caster(in_data).seconds().count()); - return Status::OK(); +template +struct second { + inline uint8_t operator()(const int64_t data) const { + Duration t = Duration{data}; + return hh_mm_ss(t - floor(t)).seconds().count(); } - - static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { - auto in_data = in.GetValues(1); - auto out_data = out->GetMutableValues(1); - auto hhmmss_caster = make_hhmmss_caster(in.type); - for (int64_t i = 0; i < in.length; i++) { - out_data[i] = static_cast(hhmmss_caster(in_data[i]).seconds().count()); - } - return Status::OK(); + inline uint8_t operator()(const int64_t data, const time_zone* tz) const { + const auto z = sys_time(Duration{data}); + const auto l = make_zoned(tz, z).get_local_time(); + return hh_mm_ss(std::chrono::duration_cast(l - floor(l))) + .seconds() + .count(); } }; // ---------------------------------------------------------------------- // Extract milliseconds from timestamp -template -struct Millisecond { - static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { - const auto& in_data = internal::UnboxScalar::Unbox(in); - auto hhmmss_caster = make_hhmmss_caster(in.type); - checked_cast(out)->value = - static_cast(hhmmss_caster(in_data).subseconds().count() % 1000); - return Status::OK(); +template +struct millisecond { + inline uint16_t operator()(const int64_t data) const { + Duration t = Duration{data}; + return std::chrono::duration_cast(t - floor(t)) + .count() % + 1000; } - - static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { - auto in_data = in.GetValues(1); - auto out_data = out->GetMutableValues(1); - auto hhmmss_caster = make_hhmmss_caster(in.type); - for (int64_t i = 0; i < in.length; i++) { - out_data[i] = - static_cast(hhmmss_caster(in_data[i]).subseconds().count() % 1000); - } - return Status::OK(); + inline uint16_t operator()(const int64_t data, const time_zone* tz) const { + const auto z = sys_time(Duration{data}); + const auto l = make_zoned(tz, z).get_local_time(); + return std::chrono::duration_cast(l - floor(l)) + .count() % + 1000; } }; // ---------------------------------------------------------------------- // Extract microseconds from timestamp -template -struct Microsecond { - static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { - const auto& in_data = internal::UnboxScalar::Unbox(in); - auto hhmmss_caster = make_hhmmss_caster(in.type); - checked_cast(out)->value = - static_cast(hhmmss_caster(in_data).subseconds().count() % 1000); - return Status::OK(); +template +struct microsecond { + inline uint16_t operator()(const int64_t data) const { + Duration t = Duration{data}; + return std::chrono::duration_cast(t - floor(t)) + .count() % + 1000; } - - static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { - auto in_data = in.GetValues(1); - auto out_data = out->GetMutableValues(1); - auto hhmmss_caster = make_hhmmss_caster(in.type); - for (int64_t i = 0; i < in.length; i++) { - out_data[i] = - static_cast(hhmmss_caster(in_data[i]).subseconds().count() % 1000); - } - return Status::OK(); + inline uint16_t operator()(const int64_t data, const time_zone* tz) const { + const auto z = sys_time(Duration{data}); + const auto l = make_zoned(tz, z).get_local_time(); + return std::chrono::duration_cast(l - floor(l)) + .count() % + 1000; } }; // ---------------------------------------------------------------------- // Extract nanoseconds from timestamp -template -struct Nanosecond { +template +struct nanosecond { + inline uint16_t operator()(const int64_t data) const { + Duration t = Duration{data}; + return std::chrono::duration_cast(t - floor(t)) + .count() % + 1000; + } + inline uint16_t operator()(const int64_t data, const time_zone* tz) const { + const auto z = sys_time(Duration{data}); + const auto l = make_zoned(tz, z).get_local_time(); + return std::chrono::duration_cast(l - floor(l)) + .count() % + 1000; + } +}; + +template