From bbb9c9aa57930dc0ecfdef85d8fb1ae3e7ec0af7 Mon Sep 17 00:00:00 2001 From: Rok Date: Sun, 29 Aug 2021 00:06:31 +0200 Subject: [PATCH 01/11] ISOWeek kernel to Week kernel. --- cpp/src/arrow/compute/api_scalar.cc | 4 ++ cpp/src/arrow/compute/api_scalar.h | 18 ++++++- .../arrow/compute/kernels/scalar_temporal.cc | 53 ++++++++++++++++++- .../compute/kernels/scalar_temporal_test.cc | 34 ++++++++++++ python/pyarrow/tests/test_compute.py | 5 ++ r/src/compute.cpp | 13 +++++ 6 files changed, 125 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc index 83aaee5f0fe..4a8835e8958 100644 --- a/cpp/src/arrow/compute/api_scalar.cc +++ b/cpp/src/arrow/compute/api_scalar.cc @@ -648,6 +648,10 @@ Result AssumeTimezone(const Datum& arg, AssumeTimezoneOptions options, return CallFunction("assume_timezone", {arg}, &options, ctx); } +Result Week(const Datum& arg, DayOfWeekOptions options, ExecContext* ctx) { + return CallFunction("week", {arg}, &options, ctx); +} + Result Strftime(const Datum& arg, StrftimeOptions options, ExecContext* ctx) { return CallFunction("strftime", {arg}, &options, ctx); } diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 05e2ff30f5f..01a3fbc11b1 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -321,7 +321,7 @@ struct ARROW_EXPORT DayOfWeekOptions : public FunctionOptions { constexpr static char const kTypeName[] = "DayOfWeekOptions"; static DayOfWeekOptions Defaults() { return DayOfWeekOptions(); } - /// Number days from 1 if true and from 0 if false + /// Number from 1 if true and from 0 if false bool one_based_numbering; /// What day does the week start with (Monday=1, Sunday=7) uint32_t week_start; @@ -1018,6 +1018,22 @@ Result ISOYear(const Datum& values, ExecContext* ctx = NULLPTR); /// \note API not yet finalized ARROW_EXPORT Result ISOWeek(const Datum& values, ExecContext* ctx = NULLPTR); +/// \brief Week returns week of year number for each element of `values`. +/// First ISO week has the majority (4 or more) of its days in January. +/// Year can have 52 or 53 weeks. Week numbering can start with 0 or 1 +/// depending on DayOfWeekOptions.one_based_numbering. +/// +/// \param[in] values input to extract week of year from +/// \param[in] options for setting numbering start +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 6.0.0 +/// \note API not yet finalized +ARROW_EXPORT Result Week(const Datum& values, + DayOfWeekOptions options = DayOfWeekOptions(), + ExecContext* ctx = NULLPTR); + /// \brief ISOCalendar returns a (ISO year, ISO week, ISO day of week) struct for /// each element of `values`. /// ISO week starts on Monday denoted by 1 and ends on Sunday denoted by 7. diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_temporal.cc index 396eec842ae..99137601c76 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal.cc @@ -56,6 +56,8 @@ using arrow_vendored::date::literals::jan; using arrow_vendored::date::literals::last; using arrow_vendored::date::literals::mon; using arrow_vendored::date::literals::thu; +using arrow_vendored::date::literals::wed; +using arrow_vendored::date::literals::sun; using internal::applicator::ScalarUnaryNotNull; using internal::applicator::SimpleUnary; @@ -364,7 +366,7 @@ struct ISOYear { // ---------------------------------------------------------------------- // Extract ISO week from temporal types // -// First week of an ISO year has the majority (4 or more) of it's days in January. +// First week of an ISO year has the majority (4 or more) of its days in January. // Last week of an ISO year has the year's last Thursday in it. // Based on // https://github.com/HowardHinnant/date/blob/6e921e1b1d21e84a5c82416ba7ecd98e33a436d0/include/date/iso_week.h#L1503 @@ -389,6 +391,41 @@ struct ISOWeek { Localizer localizer_; }; +template +struct Week { + explicit Week(const DayOfWeekOptions* options, Localizer&& localizer) + : localizer_(std::move(localizer)), count_offset_(options->one_based_numbering) { + if (options->week_start == 7) { + start_week_ = sun; + mid_week_ = wed; + + } else { + start_week_ = mon; + mid_week_ = thu; + } + } + + template + T Call(KernelContext*, Arg0 arg, Status*) const { + const auto t = floor(localizer_.template ConvertTimePoint(arg)); + auto y = year_month_day{t + days{3}}.year(); + + auto start = localizer_.ConvertDays((y - years{1}) / dec / mid_week_[last]) + + (start_week_ - mid_week_); + if (t < start) { + --y; + start = localizer_.ConvertDays((y - years{1}) / dec / mid_week_[last]) + + (start_week_ - mid_week_); + } + return static_cast(trunc(t - start).count() + count_offset_); + } + + Localizer localizer_; + const int count_offset_; + arrow_vendored::date::weekday start_week_; + arrow_vendored::date::weekday mid_week_; +}; + // ---------------------------------------------------------------------- // Extract quarter from temporal types @@ -1011,6 +1048,15 @@ const FunctionDoc iso_week_doc{ "cannot be found in the timezone database."), {"values"}}; +const FunctionDoc week_doc{ + "Extract week of year number", + ("First week has the majority (4 or more) of its days in January.\n" + "Year can have 52 or 53 weeks. Week numbering can start with 0 or 1 using " + "DayOfWeekOptions.one_based_numbering.\n" + "Returns an error if timestamp has a defined timezone. Null values return null."), + {"values"}, + "DayOfWeekOptions"}; + const FunctionDoc iso_calendar_doc{ "Extract (ISO year, ISO week, ISO day of week) struct", ("ISO week starts on Monday denoted by 1 and ends on Sunday denoted by 7.\n" @@ -1144,6 +1190,11 @@ void RegisterScalarTemporal(FunctionRegistry* registry) { "iso_week", {WithDates, WithTimestamps}, int64(), &iso_week_doc); DCHECK_OK(registry->AddFunction(std::move(iso_week))); + static auto default_week_options = DayOfWeekOptions(false, 1); + auto week = MakeTemporal( + "week", int64(), &week_doc, &default_week_options, DayOfWeekState::Init); + DCHECK_OK(registry->AddFunction(std::move(week))); + auto iso_calendar = MakeSimpleUnaryTemporal( "iso_calendar", {WithDates, WithTimestamps}, IsoCalendarType(), &iso_calendar_doc); DCHECK_OK(registry->AddFunction(std::move(iso_calendar))); diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 85bfc203589..e482114c1d4 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -122,6 +122,7 @@ class ScalarTemporalTest : public ::testing::Test { "2005, 2008, 2009, 2011, null]"; std::string iso_week = "[1, 9, 52, 20, 1, 1, 1, 53, 53, 53, 1, 52, 52, 52, 1, 52, null]"; + std::string week = "[0, 8, 51, 19, 0, 0, 0, 52, 52, 52, 0, 51, 51, 51, 0, 51, null]"; std::string quarter = "[1, 1, 1, 2, 1, 4, 4, 4, 1, 1, 1, 1, 4, 4, 4, 1, null]"; std::string hour = "[0, 23, 0, 3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 1, null]"; @@ -178,6 +179,7 @@ TEST_F(ScalarTemporalTest, TestTemporalComponentExtractionWithDifferentUnits) { CheckScalarUnary("day_of_year", unit, times_seconds_precision, int64(), day_of_year); CheckScalarUnary("iso_year", unit, times_seconds_precision, int64(), iso_year); CheckScalarUnary("iso_week", unit, times_seconds_precision, int64(), iso_week); + CheckScalarUnary("week", unit, times_seconds_precision, int64(), week); CheckScalarUnary("iso_calendar", ArrayFromJSON(unit, times_seconds_precision), iso_calendar); CheckScalarUnary("quarter", unit, times_seconds_precision, int64(), quarter); @@ -206,6 +208,7 @@ TEST_F(ScalarTemporalTest, TestOutsideNanosecondRange) { auto day_of_year = "[263, 103]"; auto iso_year = "[1677, 2262]"; auto iso_week = "[38, 15]"; + auto week = "[37, 14]"; auto iso_calendar = ArrayFromJSON(iso_calendar_type, R"([{"iso_year": 1677, "iso_week": 38, "iso_day_of_week": 1}, @@ -226,6 +229,7 @@ TEST_F(ScalarTemporalTest, TestOutsideNanosecondRange) { CheckScalarUnary("day_of_year", unit, times, int64(), day_of_year); CheckScalarUnary("iso_year", unit, times, int64(), iso_year); CheckScalarUnary("iso_week", unit, times, int64(), iso_week); + CheckScalarUnary("week", unit, times, int64(), week); CheckScalarUnary("iso_calendar", ArrayFromJSON(unit, times), iso_calendar); CheckScalarUnary("quarter", unit, times, int64(), quarter); CheckScalarUnary("hour", unit, times, int64(), hour); @@ -256,6 +260,7 @@ TEST_F(ScalarTemporalTest, TestZoned1) { "[1970, 2000, 1898, 2033, 2020, 2020, 2019, 2009, 2009, 2009, 2009, 2005, 2005, " "2008, 2008, 2011, null]"; auto iso_week = "[1, 9, 52, 20, 1, 1, 52, 53, 53, 53, 53, 52, 52, 52, 52, 52, null]"; + auto week = "[0, 8, 51, 19, 0, 0, 51, 52, 52, 52, 52, 51, 51, 51, 51, 51, null]"; auto iso_calendar = ArrayFromJSON(iso_calendar_type, R"([{"iso_year": 1970, "iso_week": 1, "iso_day_of_week": 3}, @@ -285,6 +290,7 @@ TEST_F(ScalarTemporalTest, TestZoned1) { CheckScalarUnary("day_of_year", unit, times, int64(), day_of_year); CheckScalarUnary("iso_year", unit, times, int64(), iso_year); CheckScalarUnary("iso_week", unit, times, int64(), iso_week); + CheckScalarUnary("week", unit, times, int64(), week); CheckScalarUnary("iso_calendar", ArrayFromJSON(unit, times), iso_calendar); CheckScalarUnary("quarter", unit, times, int64(), quarter); CheckScalarUnary("hour", unit, times, int64(), hour); @@ -343,6 +349,7 @@ TEST_F(ScalarTemporalTest, TestZoned2) { CheckScalarUnary("day_of_year", unit, times_seconds_precision, int64(), day_of_year); CheckScalarUnary("iso_year", unit, times_seconds_precision, int64(), iso_year); CheckScalarUnary("iso_week", unit, times_seconds_precision, int64(), iso_week); + CheckScalarUnary("week", unit, times_seconds_precision, int64(), week); CheckScalarUnary("iso_calendar", ArrayFromJSON(unit, times_seconds_precision), iso_calendar); CheckScalarUnary("quarter", unit, times_seconds_precision, int64(), quarter); @@ -371,6 +378,7 @@ TEST_F(ScalarTemporalTest, TestNonexistentTimezone) { ASSERT_RAISES(Invalid, DayOfYear(timestamp_array)); ASSERT_RAISES(Invalid, ISOYear(timestamp_array)); ASSERT_RAISES(Invalid, ISOWeek(timestamp_array)); + ASSERT_RAISES(Invalid, Week(timestamp_array)); ASSERT_RAISES(Invalid, ISOCalendar(timestamp_array)); ASSERT_RAISES(Invalid, Quarter(timestamp_array)); ASSERT_RAISES(Invalid, Hour(timestamp_array)); @@ -384,6 +392,32 @@ TEST_F(ScalarTemporalTest, TestNonexistentTimezone) { } #endif +TEST_F(ScalarTemporalTest, Week) { + std::string week = "[0, 8, 51, 19, 0, 0, 0, 52, 52, 52, 0, 51, 51, 51, 0, 51, null]"; + std::string week_11 = "[1, 9, 52, 20, 1, 1, 1, 53, 53, 53, 1, 52, 52, 52, 1, 52, null]"; + std::string week_01 = "[1, 9, 0, 20, 1, 53, 53, 53, 0, 0, 1, 0, 52, 52, 53, 0, null]"; + std::string week_07 = "[0, 9, 1, 20, 1, 53, 53, 52, 0, 1, 1, 1, 52, 53, 53, 1, null]"; + std::string week_17 = "[53, 9, 1, 20, 1, 1, 1, 52, 52, 1, 1, 1, 52, 53, 53, 1, null]"; + + for (auto u : internal::AllTimeUnits()) { + auto unit = timestamp(u); + auto timestamps = ArrayFromJSON(unit, times_seconds_precision); + auto options_01 = DayOfWeekOptions(/*one_based_numbering=*/false, /*week_start*/ 1); + auto options_11 = DayOfWeekOptions(/*one_based_numbering=*/true, /*week_start*/ 1); + auto options_07 = DayOfWeekOptions(/*one_based_numbering=*/false, /*week_start*/ 7); + auto options_17 = DayOfWeekOptions(/*one_based_numbering=*/true, /*week_start*/ 7); + + CheckScalarUnary("iso_week", unit, times_seconds_precision, int64(), week_11); + CheckScalarUnary("week", unit, times_seconds_precision, int64(), week_11, + &options_11); + // CheckScalarUnary("week", unit, times_seconds_precision, int64(), week_01, + // &options_01); CheckScalarUnary("week", unit, times_seconds_precision, int64(), + // week_07, &options_07); + CheckScalarUnary("week", unit, times_seconds_precision, int64(), week_17, + &options_17); + } +} + TEST_F(ScalarTemporalTest, DayOfWeek) { auto unit = timestamp(TimeUnit::NANO); diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 579b56b64e7..af3a53da22b 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1693,6 +1693,11 @@ def _check_datetime_components(timestamps, timezone=None): assert pc.day_of_week(tsa, options=day_of_week_options).equals( pa.array(ts.dt.dayofweek + 1)) + assert pc.week(tsa, options=pc.DayOfWeekOptions( + one_based_numbering=False)).equals(pa.array(iso_week - 1)) + assert pc.week(tsa, options=pc.DayOfWeekOptions( + one_based_numbering=True)).equals(pa.array(iso_week)) + @pytest.mark.pandas def test_extract_datetime_components(): diff --git a/r/src/compute.cpp b/r/src/compute.cpp index 952a7cc3a9a..f189ac50f4e 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -339,6 +339,19 @@ std::shared_ptr make_compute_options( cpp11::as_cpp(options["week_start"])); } + if (func_name == "week") { + using Options = arrow::compute::DayOfWeekOptions; + bool one_based_numbering = true; + int week_start = 1; + if (!Rf_isNull(options["one_based_numbering"])) { + one_based_numbering = cpp11::as_cpp(options["one_based_numbering"]); + } + if (!Rf_isNull(options["week_start"])) { + week_start = cpp11::as_cpp(options["week_start"]); + } + return std::make_shared(one_based_numbering, week_start); + } + if (func_name == "strptime") { using Options = arrow::compute::StrptimeOptions; return std::make_shared( From 9fbd48803b51b066a16a4f450b6f96dc78ada1e4 Mon Sep 17 00:00:00 2001 From: Rok Date: Wed, 1 Sep 2021 17:32:53 +0200 Subject: [PATCH 02/11] Matching MySQL. --- cpp/src/arrow/compute/api_scalar.h | 2 +- .../arrow/compute/kernels/scalar_temporal.cc | 36 +++++++------ .../compute/kernels/scalar_temporal_test.cc | 52 ++++++------------- python/pyarrow/tests/test_compute.py | 2 - 4 files changed, 36 insertions(+), 56 deletions(-) diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 01a3fbc11b1..15a968f4c0a 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -1031,7 +1031,7 @@ ARROW_EXPORT Result ISOWeek(const Datum& values, ExecContext* ctx = NULLP /// \since 6.0.0 /// \note API not yet finalized ARROW_EXPORT Result Week(const Datum& values, - DayOfWeekOptions options = DayOfWeekOptions(), + DayOfWeekOptions options = DayOfWeekOptions(true, 0), ExecContext* ctx = NULLPTR); /// \brief ISOCalendar returns a (ISO year, ISO week, ISO day of week) struct for diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_temporal.cc index 99137601c76..5150fc6c17a 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal.cc @@ -57,7 +57,6 @@ using arrow_vendored::date::literals::last; using arrow_vendored::date::literals::mon; using arrow_vendored::date::literals::thu; using arrow_vendored::date::literals::wed; -using arrow_vendored::date::literals::sun; using internal::applicator::ScalarUnaryNotNull; using internal::applicator::SimpleUnary; @@ -394,13 +393,11 @@ struct ISOWeek { template struct Week { explicit Week(const DayOfWeekOptions* options, Localizer&& localizer) - : localizer_(std::move(localizer)), count_offset_(options->one_based_numbering) { + : localizer_(std::move(localizer)), + one_based_numbering_(options->one_based_numbering) { if (options->week_start == 7) { - start_week_ = sun; mid_week_ = wed; - } else { - start_week_ = mon; mid_week_ = thu; } } @@ -408,22 +405,27 @@ struct Week { template T Call(KernelContext*, Arg0 arg, Status*) const { const auto t = floor(localizer_.template ConvertTimePoint(arg)); - auto y = year_month_day{t + days{3}}.year(); - - auto start = localizer_.ConvertDays((y - years{1}) / dec / mid_week_[last]) + - (start_week_ - mid_week_); - if (t < start) { - --y; - start = localizer_.ConvertDays((y - years{1}) / dec / mid_week_[last]) + - (start_week_ - mid_week_); + if (one_based_numbering_) { + auto y = year_month_day{t + days{3}}.year(); + auto start = + localizer_.ConvertDays((y - years{1}) / dec / mid_week_[last]) + (mon - thu); + if (t < start) { + --y; + start = + localizer_.ConvertDays((y - years{1}) / dec / mid_week_[last]) + (mon - thu); + } + return static_cast(trunc(t - start).count() + 1); + } else { + auto y = year_month_day{t}.year(); + auto start = + localizer_.ConvertDays((y - years{1}) / dec / mid_week_[last]) + (mon - thu); + return static_cast(floor(t - start).count() + 1); } - return static_cast(trunc(t - start).count() + count_offset_); } Localizer localizer_; - const int count_offset_; - arrow_vendored::date::weekday start_week_; arrow_vendored::date::weekday mid_week_; + bool one_based_numbering_; }; // ---------------------------------------------------------------------- @@ -1190,7 +1192,7 @@ void RegisterScalarTemporal(FunctionRegistry* registry) { "iso_week", {WithDates, WithTimestamps}, int64(), &iso_week_doc); DCHECK_OK(registry->AddFunction(std::move(iso_week))); - static auto default_week_options = DayOfWeekOptions(false, 1); + static auto default_week_options = DayOfWeekOptions(true, 1); auto week = MakeTemporal( "week", int64(), &week_doc, &default_week_options, DayOfWeekState::Init); DCHECK_OK(registry->AddFunction(std::move(week))); diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index e482114c1d4..8ce9f80c688 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -122,7 +122,7 @@ class ScalarTemporalTest : public ::testing::Test { "2005, 2008, 2009, 2011, null]"; std::string iso_week = "[1, 9, 52, 20, 1, 1, 1, 53, 53, 53, 1, 52, 52, 52, 1, 52, null]"; - std::string week = "[0, 8, 51, 19, 0, 0, 0, 52, 52, 52, 0, 51, 51, 51, 0, 51, null]"; + std::string week = "[1, 9, 52, 20, 1, 1, 1, 53, 53, 53, 1, 52, 52, 52, 1, 52, null]"; std::string quarter = "[1, 1, 1, 2, 1, 4, 4, 4, 1, 1, 1, 1, 4, 4, 4, 1, null]"; std::string hour = "[0, 23, 0, 3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 1, null]"; @@ -195,12 +195,7 @@ TEST_F(ScalarTemporalTest, TestTemporalComponentExtractionWithDifferentUnits) { TEST_F(ScalarTemporalTest, TestOutsideNanosecondRange) { const char* times = R"(["1677-09-20T00:00:59.123456", "2262-04-13T23:23:23.999999"])"; - auto unit = timestamp(TimeUnit::MICRO); - auto iso_calendar_type = - struct_({field("iso_year", int64()), field("iso_week", int64()), - field("iso_day_of_week", int64())}); - auto year = "[1677, 2262]"; auto month = "[9, 4]"; auto day = "[20, 13]"; @@ -208,7 +203,7 @@ TEST_F(ScalarTemporalTest, TestOutsideNanosecondRange) { auto day_of_year = "[263, 103]"; auto iso_year = "[1677, 2262]"; auto iso_week = "[38, 15]"; - auto week = "[37, 14]"; + auto week = "[38, 15]"; auto iso_calendar = ArrayFromJSON(iso_calendar_type, R"([{"iso_year": 1677, "iso_week": 38, "iso_day_of_week": 1}, @@ -245,9 +240,6 @@ TEST_F(ScalarTemporalTest, TestOutsideNanosecondRange) { // TODO: We should test on windows once ARROW-13168 is resolved. TEST_F(ScalarTemporalTest, TestZoned1) { auto unit = timestamp(TimeUnit::NANO, "Pacific/Marquesas"); - auto iso_calendar_type = - struct_({field("iso_year", int64()), field("iso_week", int64()), - field("iso_day_of_week", int64())}); auto year = "[1969, 2000, 1898, 2033, 2019, 2019, 2019, 2009, 2009, 2010, 2010, 2005, 2005, " "2008, 2008, 2011, null]"; @@ -260,7 +252,7 @@ TEST_F(ScalarTemporalTest, TestZoned1) { "[1970, 2000, 1898, 2033, 2020, 2020, 2019, 2009, 2009, 2009, 2009, 2005, 2005, " "2008, 2008, 2011, null]"; auto iso_week = "[1, 9, 52, 20, 1, 1, 52, 53, 53, 53, 53, 52, 52, 52, 52, 52, null]"; - auto week = "[0, 8, 51, 19, 0, 0, 51, 52, 52, 52, 52, 51, 51, 51, 51, 51, null]"; + auto week = "[1, 9, 52, 20, 1, 1, 52, 53, 53, 53, 53, 52, 52, 52, 52, 52, null]"; auto iso_calendar = ArrayFromJSON(iso_calendar_type, R"([{"iso_year": 1970, "iso_week": 1, "iso_day_of_week": 3}, @@ -305,12 +297,6 @@ TEST_F(ScalarTemporalTest, TestZoned1) { TEST_F(ScalarTemporalTest, TestZoned2) { for (auto u : TimeUnit::values()) { auto unit = timestamp(u, "Australia/Broken_Hill"); - auto iso_calendar_type = - struct_({field("iso_year", int64()), field("iso_week", int64()), - field("iso_day_of_week", int64())}); - auto year = - "[1970, 2000, 1899, 2033, 2020, 2019, 2019, 2009, 2010, 2010, 2010, 2006, 2005, " - "2008, 2008, 2012, null]"; auto month = "[1, 3, 1, 5, 1, 12, 12, 12, 1, 1, 1, 1, 12, 12, 12, 1, null]"; auto day = "[1, 1, 1, 18, 1, 31, 30, 31, 1, 3, 4, 1, 31, 28, 29, 1, null]"; auto day_of_week = "[3, 2, 6, 2, 2, 1, 0, 3, 4, 6, 0, 6, 5, 6, 0, 6, null]"; @@ -320,6 +306,7 @@ TEST_F(ScalarTemporalTest, TestZoned2) { "[1970, 2000, 1898, 2033, 2020, 2020, 2020, 2009, 2009, 2009, 2010, 2005, 2005, " "2008, 2009, 2011, null]"; auto iso_week = "[1, 9, 52, 20, 1, 1, 1, 53, 53, 53, 1, 52, 52, 52, 1, 52, null]"; + auto week = "[1, 9, 52, 20, 1, 1, 1, 53, 53, 53, 1, 52, 52, 52, 1, 52, null]"; auto iso_calendar = ArrayFromJSON(iso_calendar_type, R"([{"iso_year": 1970, "iso_week": 1, "iso_day_of_week": 4}, @@ -393,29 +380,22 @@ TEST_F(ScalarTemporalTest, TestNonexistentTimezone) { #endif TEST_F(ScalarTemporalTest, Week) { - std::string week = "[0, 8, 51, 19, 0, 0, 0, 52, 52, 52, 0, 51, 51, 51, 0, 51, null]"; std::string week_11 = "[1, 9, 52, 20, 1, 1, 1, 53, 53, 53, 1, 52, 52, 52, 1, 52, null]"; + std::string week_17 = "[53, 9, 1, 20, 1, 1, 1, 52, 52, 1, 1, 1, 52, 53, 53, 1, null]"; std::string week_01 = "[1, 9, 0, 20, 1, 53, 53, 53, 0, 0, 1, 0, 52, 52, 53, 0, null]"; std::string week_07 = "[0, 9, 1, 20, 1, 53, 53, 52, 0, 1, 1, 1, 52, 53, 53, 1, null]"; - std::string week_17 = "[53, 9, 1, 20, 1, 1, 1, 52, 52, 1, 1, 1, 52, 53, 53, 1, null]"; + auto unit = timestamp(TimeUnit::NANO); - for (auto u : internal::AllTimeUnits()) { - auto unit = timestamp(u); - auto timestamps = ArrayFromJSON(unit, times_seconds_precision); - auto options_01 = DayOfWeekOptions(/*one_based_numbering=*/false, /*week_start*/ 1); - auto options_11 = DayOfWeekOptions(/*one_based_numbering=*/true, /*week_start*/ 1); - auto options_07 = DayOfWeekOptions(/*one_based_numbering=*/false, /*week_start*/ 7); - auto options_17 = DayOfWeekOptions(/*one_based_numbering=*/true, /*week_start*/ 7); - - CheckScalarUnary("iso_week", unit, times_seconds_precision, int64(), week_11); - CheckScalarUnary("week", unit, times_seconds_precision, int64(), week_11, - &options_11); - // CheckScalarUnary("week", unit, times_seconds_precision, int64(), week_01, - // &options_01); CheckScalarUnary("week", unit, times_seconds_precision, int64(), - // week_07, &options_07); - CheckScalarUnary("week", unit, times_seconds_precision, int64(), week_17, - &options_17); - } + auto options_01 = DayOfWeekOptions(/*one_based_numbering=*/false, /*week_start*/ 1); + auto options_11 = DayOfWeekOptions(/*one_based_numbering=*/true, /*week_start*/ 1); + auto options_07 = DayOfWeekOptions(/*one_based_numbering=*/false, /*week_start*/ 7); + auto options_17 = DayOfWeekOptions(/*one_based_numbering=*/true, /*week_start*/ 7); + + CheckScalarUnary("iso_week", unit, times, int64(), week_11); + CheckScalarUnary("week", unit, times, int64(), week_01, &options_01); + CheckScalarUnary("week", unit, times, int64(), week_11, &options_11); + CheckScalarUnary("week", unit, times, int64(), week_07, &options_07); + CheckScalarUnary("week", unit, times, int64(), week_17, &options_17); } TEST_F(ScalarTemporalTest, DayOfWeek) { diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index af3a53da22b..3ebf30b52b6 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1693,8 +1693,6 @@ def _check_datetime_components(timestamps, timezone=None): assert pc.day_of_week(tsa, options=day_of_week_options).equals( pa.array(ts.dt.dayofweek + 1)) - assert pc.week(tsa, options=pc.DayOfWeekOptions( - one_based_numbering=False)).equals(pa.array(iso_week - 1)) assert pc.week(tsa, options=pc.DayOfWeekOptions( one_based_numbering=True)).equals(pa.array(iso_week)) From 4c3ed3044eceb6095bc888f7e8c8897d7a598463 Mon Sep 17 00:00:00 2001 From: Rok Date: Thu, 2 Sep 2021 00:52:44 +0200 Subject: [PATCH 03/11] Moving to WeekOptions. --- cpp/src/arrow/compute/api_scalar.cc | 15 +++- cpp/src/arrow/compute/api_scalar.h | 18 ++++- cpp/src/arrow/compute/function_test.cc | 1 + .../arrow/compute/kernels/scalar_temporal.cc | 69 ++++++++++++++----- .../compute/kernels/scalar_temporal_test.cc | 53 ++++++++++---- python/pyarrow/_compute.pyx | 16 +++++ python/pyarrow/compute.py | 1 + python/pyarrow/includes/libarrow.pxd | 8 +++ python/pyarrow/tests/test_compute.py | 8 ++- r/src/compute.cpp | 21 +++--- 10 files changed, 163 insertions(+), 47 deletions(-) diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc index 4a8835e8958..245b19345ec 100644 --- a/cpp/src/arrow/compute/api_scalar.cc +++ b/cpp/src/arrow/compute/api_scalar.cc @@ -247,6 +247,10 @@ static auto kMakeStructOptionsType = GetFunctionOptionsType( static auto kDayOfWeekOptionsType = GetFunctionOptionsType( DataMember("one_based_numbering", &DayOfWeekOptions::one_based_numbering), DataMember("week_start", &DayOfWeekOptions::week_start)); +static auto kWeekOptionsType = GetFunctionOptionsType( + DataMember("week_starts_monday", &WeekOptions::week_starts_monday), + DataMember("count_from_zero", &WeekOptions::count_from_zero), + DataMember("first_week_in_year", &WeekOptions::first_week_in_year)); static auto kNullOptionsType = GetFunctionOptionsType( DataMember("nan_is_null", &NullOptions::nan_is_null)); } // namespace @@ -412,6 +416,14 @@ DayOfWeekOptions::DayOfWeekOptions(bool one_based_numbering, uint32_t week_start week_start(week_start) {} constexpr char DayOfWeekOptions::kTypeName[]; +WeekOptions::WeekOptions(bool week_starts_monday, bool count_from_zero, + bool first_week_in_year) + : FunctionOptions(internal::kWeekOptionsType), + week_starts_monday(week_starts_monday), + count_from_zero(count_from_zero), + first_week_in_year(first_week_in_year) {} +constexpr char WeekOptions::kTypeName[]; + NullOptions::NullOptions(bool nan_is_null) : FunctionOptions(internal::kNullOptionsType), nan_is_null(nan_is_null) {} constexpr char NullOptions::kTypeName[]; @@ -438,6 +450,7 @@ void RegisterScalarOptions(FunctionRegistry* registry) { DCHECK_OK(registry->AddFunctionOptionsType(kSliceOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kMakeStructOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kDayOfWeekOptionsType)); + DCHECK_OK(registry->AddFunctionOptionsType(kWeekOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kNullOptionsType)); } } // namespace internal @@ -648,7 +661,7 @@ Result AssumeTimezone(const Datum& arg, AssumeTimezoneOptions options, return CallFunction("assume_timezone", {arg}, &options, ctx); } -Result Week(const Datum& arg, DayOfWeekOptions options, ExecContext* ctx) { +Result Week(const Datum& arg, WeekOptions options, ExecContext* ctx) { return CallFunction("week", {arg}, &options, ctx); } diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 15a968f4c0a..739b2172f51 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -361,6 +361,21 @@ struct ARROW_EXPORT AssumeTimezoneOptions : public FunctionOptions { Nonexistent nonexistent; }; +struct ARROW_EXPORT WeekOptions : public FunctionOptions { + public: + explicit WeekOptions(bool week_starts_monday = true, bool count_from_zero = false, + bool first_week_in_year = false); + constexpr static char const kTypeName[] = "WeekOptions"; + static WeekOptions Defaults() { return WeekOptions{}; } + + /// What day does the week start with (Monday=true, Sunday=false) + bool week_starts_monday; + /// Days in current year that fall into last years ISO week return week 0 if true + bool count_from_zero; + /// Is the first week fully in the the year or only its 4 or more days + bool first_week_in_year; +}; + /// @} /// \brief Get the absolute value of a value. @@ -1030,8 +1045,7 @@ ARROW_EXPORT Result ISOWeek(const Datum& values, ExecContext* ctx = NULLP /// /// \since 6.0.0 /// \note API not yet finalized -ARROW_EXPORT Result Week(const Datum& values, - DayOfWeekOptions options = DayOfWeekOptions(true, 0), +ARROW_EXPORT Result Week(const Datum& values, WeekOptions options = WeekOptions(), ExecContext* ctx = NULLPTR); /// \brief ISOCalendar returns a (ISO year, ISO week, ISO day of week) struct for diff --git a/cpp/src/arrow/compute/function_test.cc b/cpp/src/arrow/compute/function_test.cc index 183167490b6..626824d73ec 100644 --- a/cpp/src/arrow/compute/function_test.cc +++ b/cpp/src/arrow/compute/function_test.cc @@ -104,6 +104,7 @@ TEST(FunctionOptions, Equality) { options.emplace_back( new MakeStructOptions({"col1"}, {false}, {key_value_metadata({{"key", "val"}})})); options.emplace_back(new DayOfWeekOptions(false, 1)); + options.emplace_back(new WeekOptions(true, false, false)); options.emplace_back(new CastOptions(CastOptions::Safe(boolean()))); options.emplace_back(new CastOptions(CastOptions::Unsafe(int64()))); options.emplace_back(new FilterOptions()); diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_temporal.cc index 5150fc6c17a..1d42e775e9f 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal.cc @@ -55,12 +55,14 @@ using arrow_vendored::date::literals::dec; using arrow_vendored::date::literals::jan; using arrow_vendored::date::literals::last; using arrow_vendored::date::literals::mon; +using arrow_vendored::date::literals::sun; using arrow_vendored::date::literals::thu; using arrow_vendored::date::literals::wed; using internal::applicator::ScalarUnaryNotNull; using internal::applicator::SimpleUnary; using DayOfWeekState = OptionsWrapper; +using WeekState = OptionsWrapper; using StrftimeState = OptionsWrapper; using AssumeTimezoneState = OptionsWrapper; @@ -231,6 +233,17 @@ struct AssumeTimezoneExtractor } }; +template