From 7f4ec7bcc0a8322dc5ff9a104cba0b0e7e97b8fd Mon Sep 17 00:00:00 2001 From: Rok Date: Thu, 17 Mar 2022 18:33:52 +0100 Subject: [PATCH 01/19] Add change_on_boundary to RoundTemporalOptions --- cpp/src/arrow/compute/api_scalar.cc | 12 +- cpp/src/arrow/compute/api_scalar.h | 11 +- .../compute/kernels/scalar_temporal_test.cc | 608 ++++++++++++++++++ .../compute/kernels/scalar_temporal_unary.cc | 256 +++++--- python/pyarrow/_compute.pyx | 20 +- python/pyarrow/includes/libarrow.pxd | 6 +- python/pyarrow/tests/test_compute.py | 19 +- 7 files changed, 832 insertions(+), 100 deletions(-) diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc index a9e2565a3ea..bcc0837367f 100644 --- a/cpp/src/arrow/compute/api_scalar.cc +++ b/cpp/src/arrow/compute/api_scalar.cc @@ -332,7 +332,9 @@ static auto kRoundOptionsType = GetFunctionOptionsType( static auto kRoundTemporalOptionsType = GetFunctionOptionsType( DataMember("multiple", &RoundTemporalOptions::multiple), DataMember("unit", &RoundTemporalOptions::unit), - DataMember("week_starts_monday", &RoundTemporalOptions::week_starts_monday)); + DataMember("week_starts_monday", &RoundTemporalOptions::week_starts_monday), + DataMember("change_on_boundary", &RoundTemporalOptions::change_on_boundary), + DataMember("calendar_based_origin", &RoundTemporalOptions::calendar_based_origin)); static auto kRoundToMultipleOptionsType = GetFunctionOptionsType( DataMember("multiple", &RoundToMultipleOptions::multiple), DataMember("round_mode", &RoundToMultipleOptions::round_mode)); @@ -491,11 +493,15 @@ RoundOptions::RoundOptions(int64_t ndigits, RoundMode round_mode) constexpr char RoundOptions::kTypeName[]; RoundTemporalOptions::RoundTemporalOptions(int multiple, CalendarUnit unit, - bool week_starts_monday) + bool week_starts_monday, + bool change_on_boundary, + bool calendar_based_origin) : FunctionOptions(internal::kRoundTemporalOptionsType), multiple(std::move(multiple)), unit(unit), - week_starts_monday(week_starts_monday) {} + week_starts_monday(week_starts_monday), + change_on_boundary(change_on_boundary), + calendar_based_origin(calendar_based_origin) {} constexpr char RoundTemporalOptions::kTypeName[]; RoundToMultipleOptions::RoundToMultipleOptions(double multiple, RoundMode round_mode) diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 0af591acfa5..368e7ecb557 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -107,7 +107,9 @@ enum class CalendarUnit : int8_t { class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions { public: explicit RoundTemporalOptions(int multiple = 1, CalendarUnit unit = CalendarUnit::DAY, - bool week_starts_monday = true); + bool week_starts_monday = true, + bool change_on_boundary = false, + bool calendar_based_origin = false); static constexpr char const kTypeName[] = "RoundTemporalOptions"; static RoundTemporalOptions Defaults() { return RoundTemporalOptions(); } @@ -117,6 +119,13 @@ class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions { CalendarUnit unit; /// What day does the week start with (Monday=true, Sunday=false) bool week_starts_monday; + /// Times exactly on unit multiple boundary will be rounded one unit multiple up. + /// This applies for ceiling only. + bool change_on_boundary; + /// By default origin is 1970-01-01T00:00:00. By setting this to true, rounding origin + /// will be beginning of one less precise calendar unit. E.g. rounding to hours will use + /// beginning of day as origin. + bool calendar_based_origin; }; class ARROW_EXPORT RoundToMultipleOptions : public FunctionOptions { diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index a52d69c36c7..6aa2649acd1 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -2229,6 +2229,367 @@ TEST_F(ScalarTemporalTest, TestCeilTemporal) { CheckScalarUnary(op, unit, times, unit, ceil_15_years, &round_to_15_years); } +TEST_F(ScalarTemporalTest, TestCeilTemporalChangeOnBoundary) { + std::string op = "ceil_temporal"; + RoundTemporalOptions round_to_1_nanoseconds = + RoundTemporalOptions(1, CalendarUnit::NANOSECOND, true, true, false); + RoundTemporalOptions round_to_1_microseconds = + RoundTemporalOptions(1, CalendarUnit::MICROSECOND, true, true, false); + RoundTemporalOptions round_to_1_milliseconds = + RoundTemporalOptions(1, CalendarUnit::MILLISECOND, true, true, false); + RoundTemporalOptions round_to_1_seconds = + RoundTemporalOptions(1, CalendarUnit::SECOND, true, true, false); + RoundTemporalOptions round_to_1_minutes = + RoundTemporalOptions(1, CalendarUnit::MINUTE, true, true, false); + RoundTemporalOptions round_to_1_hours = + RoundTemporalOptions(1, CalendarUnit::HOUR, true, true, false); + RoundTemporalOptions round_to_1_days = + RoundTemporalOptions(1, CalendarUnit::DAY, true, true, false); + RoundTemporalOptions round_to_1_weeks = + RoundTemporalOptions(1, CalendarUnit::WEEK, true, true, false); + RoundTemporalOptions round_to_1_weeks_sunday = + RoundTemporalOptions(1, CalendarUnit::WEEK, false, true, false); + RoundTemporalOptions round_to_1_months = + RoundTemporalOptions(1, CalendarUnit::MONTH, true, true, false); + RoundTemporalOptions round_to_1_quarters = + RoundTemporalOptions(1, CalendarUnit::QUARTER, true, true, false); + RoundTemporalOptions round_to_1_years = + RoundTemporalOptions(1, CalendarUnit::YEAR, true, true, false); + + RoundTemporalOptions round_to_15_nanoseconds = + RoundTemporalOptions(15, CalendarUnit::NANOSECOND, true, true, false); + RoundTemporalOptions round_to_15_microseconds = + RoundTemporalOptions(15, CalendarUnit::MICROSECOND, true, true, false); + RoundTemporalOptions round_to_15_milliseconds = + RoundTemporalOptions(15, CalendarUnit::MILLISECOND, true, true, false); + RoundTemporalOptions round_to_13_seconds = + RoundTemporalOptions(13, CalendarUnit::SECOND, true, true, false); + RoundTemporalOptions round_to_13_minutes = + RoundTemporalOptions(13, CalendarUnit::MINUTE, true, true, false); + RoundTemporalOptions round_to_15_hours = + RoundTemporalOptions(15, CalendarUnit::HOUR, true, true, false); + RoundTemporalOptions round_to_15_days = + RoundTemporalOptions(15, CalendarUnit::DAY, true, true, false); + RoundTemporalOptions round_to_15_weeks = + RoundTemporalOptions(15, CalendarUnit::WEEK, true, true, false); + RoundTemporalOptions round_to_15_weeks_sunday = + RoundTemporalOptions(15, CalendarUnit::WEEK, false, true, false); + RoundTemporalOptions round_to_15_months = + RoundTemporalOptions(15, CalendarUnit::MONTH, true, true, false); + RoundTemporalOptions round_to_15_quarters = + RoundTemporalOptions(15, CalendarUnit::QUARTER, true, true, false); + RoundTemporalOptions round_to_15_years = + RoundTemporalOptions(15, CalendarUnit::YEAR, true, true, false); + + const char* ceil_1_nanosecond = + R"(["1970-01-01 00:00:59.123456790", "2000-02-29 23:23:24.000000000", + "1899-01-01 00:59:20.001001002", "2033-05-18 03:33:20.000000001", + "2020-01-01 01:05:05.001000001", "2019-12-31 02:10:10.002000001", + "2019-12-30 03:15:15.003000001", "2009-12-31 04:20:20.004132001", + "2010-01-01 05:25:25.005321001", "2010-01-03 06:30:30.006163001", + "2010-01-04 07:35:35.000000001", "2006-01-01 08:40:40.000000001", + "2005-12-31 09:45:45.000000001", "2008-12-28 00:00:00.000000001", + "2008-12-29 00:00:00.000000001", "2012-01-01 01:02:03.000000001", null])"; + const char* ceil_1_microsecond = + R"(["1970-01-01 00:00:59.123457", "2000-02-29 23:23:24.000000", + "1899-01-01 00:59:20.001002", "2033-05-18 03:33:20.000001", + "2020-01-01 01:05:05.001001", "2019-12-31 02:10:10.002001", + "2019-12-30 03:15:15.003001", "2009-12-31 04:20:20.004133", + "2010-01-01 05:25:25.005322", "2010-01-03 06:30:30.006164", + "2010-01-04 07:35:35.000001", "2006-01-01 08:40:40.000001", + "2005-12-31 09:45:45.000001", "2008-12-28 00:00:00.000001", + "2008-12-29 00:00:00.000001", "2012-01-01 01:02:03.000001", null])"; + const char* ceil_1_millisecond = + R"(["1970-01-01 00:00:59.124", "2000-02-29 23:23:24.000", + "1899-01-01 00:59:20.002", "2033-05-18 03:33:20.001", + "2020-01-01 01:05:05.002", "2019-12-31 02:10:10.003", + "2019-12-30 03:15:15.004", "2009-12-31 04:20:20.005", + "2010-01-01 05:25:25.006", "2010-01-03 06:30:30.007", + "2010-01-04 07:35:35.001", "2006-01-01 08:40:40.001", + "2005-12-31 09:45:45.001", "2008-12-28 00:00:00.001", + "2008-12-29 00:00:00.001", "2012-01-01 01:02:03.001", null])"; + const char* ceil_1_second = + R"(["1970-01-01 00:01:00", "2000-02-29 23:23:24", "1899-01-01 00:59:21", + "2033-05-18 03:33:21", "2020-01-01 01:05:06", "2019-12-31 02:10:11", + "2019-12-30 03:15:16", "2009-12-31 04:20:21", "2010-01-01 05:25:26", + "2010-01-03 06:30:31", "2010-01-04 07:35:36", "2006-01-01 08:40:41", + "2005-12-31 09:45:46", "2008-12-28 00:00:01", "2008-12-29 00:00:01", + "2012-01-01 01:02:04", null])"; + const char* ceil_1_minute = + R"(["1970-01-01 00:01:00", "2000-02-29 23:24:00", "1899-01-01 01:00:00", + "2033-05-18 03:34:00", "2020-01-01 01:06:00", "2019-12-31 02:11:00", + "2019-12-30 03:16:00", "2009-12-31 04:21:00", "2010-01-01 05:26:00", + "2010-01-03 06:31:00", "2010-01-04 07:36:00", "2006-01-01 08:41:00", + "2005-12-31 09:46:00", "2008-12-28 00:01:00", "2008-12-29 00:01:00", + "2012-01-01 01:03:00", null])"; + const char* ceil_1_hour = + R"(["1970-01-01 01:00:00", "2000-03-01 00:00:00", "1899-01-01 01:00:00", + "2033-05-18 04:00:00", "2020-01-01 02:00:00", "2019-12-31 03:00:00", + "2019-12-30 04:00:00", "2009-12-31 05:00:00", "2010-01-01 06:00:00", + "2010-01-03 07:00:00", "2010-01-04 08:00:00", "2006-01-01 09:00:00", + "2005-12-31 10:00:00", "2008-12-28 01:00:00", "2008-12-29 01:00:00", + "2012-01-01 02:00:00", null])"; + const char* ceil_1_day = + R"(["1970-01-02", "2000-03-01", "1899-01-02", "2033-05-19", + "2020-01-02", "2020-01-01", "2019-12-31", "2010-01-01", + "2010-01-02", "2010-01-04", "2010-01-05", "2006-01-02", + "2006-01-01", "2008-12-29", "2008-12-30", "2012-01-02", null])"; + const char* ceil_1_weeks = + R"(["1970-01-05", "2000-03-06", "1899-01-02", "2033-05-23", + "2020-01-06", "2020-01-06", "2020-01-06", "2010-01-04", + "2010-01-04", "2010-01-04", "2010-01-11", "2006-01-02", + "2006-01-02", "2008-12-29", "2009-01-05", "2012-01-02", null])"; + const char* ceil_1_weeks_sunday = + R"(["1970-01-04", "2000-03-05", "1899-01-08", "2033-05-22", + "2020-01-05", "2020-01-05", "2020-01-05", "2010-01-03", + "2010-01-03", "2010-01-10", "2010-01-10", "2006-01-08", + "2006-01-01", "2009-01-04", "2009-01-04", "2012-01-08", null])"; + const char* ceil_1_months = + R"(["1970-02-01", "2000-03-01", "1899-02-01", "2033-06-01", + "2020-02-01", "2020-01-01", "2020-01-01", "2010-01-01", + "2010-02-01", "2010-02-01", "2010-02-01", "2006-02-01", + "2006-01-01", "2009-01-01", "2009-01-01", "2012-02-01", null])"; + const char* ceil_1_quarters = + R"(["1970-04-01", "2000-04-01", "1899-04-01", "2033-07-01", + "2020-04-01", "2020-01-01", "2020-01-01", "2010-01-01", + "2010-04-01", "2010-04-01", "2010-04-01", "2006-04-01", + "2006-01-01", "2009-01-01", "2009-01-01", "2012-04-01", null])"; + const char* ceil_1_years = + R"(["1971-01-01", "2001-01-01", "1900-01-01", "2034-01-01", + "2021-01-01", "2020-01-01", "2020-01-01", "2010-01-01", + "2011-01-01", "2011-01-01", "2011-01-01", "2007-01-01", + "2006-01-01", "2009-01-01", "2009-01-01", "2013-01-01", null])"; + + const char* ceil_15_nanosecond = + R"(["1970-01-01 00:00:59.123456790", "2000-02-29 23:23:24.000000000", + "1899-01-01 00:59:20.001001005", "2033-05-18 03:33:20.000000010", + "2020-01-01 01:05:05.001000015", "2019-12-31 02:10:10.002000015", + "2019-12-30 03:15:15.003000015", "2009-12-31 04:20:20.004132015", + "2010-01-01 05:25:25.005321015", "2010-01-03 06:30:30.006163005", + "2010-01-04 07:35:35.000000010", "2006-01-01 08:40:40.000000005", + "2005-12-31 09:45:45.000000015", "2008-12-28 00:00:00.000000015", + "2008-12-29 00:00:00.000000015", "2012-01-01 01:02:03.000000015", null])"; + const char* ceil_15_microsecond = + R"(["1970-01-01 00:00:59.123460", "2000-02-29 23:23:24.000000", + "1899-01-01 00:59:20.001015", "2033-05-18 03:33:20.000010", + "2020-01-01 01:05:05.001015", "2019-12-31 02:10:10.002015", + "2019-12-30 03:15:15.003015", "2009-12-31 04:20:20.004135", + "2010-01-01 05:25:25.005330", "2010-01-03 06:30:30.006165", + "2010-01-04 07:35:35.000010", "2006-01-01 08:40:40.000005", + "2005-12-31 09:45:45.000015", "2008-12-28 00:00:00.000015", + "2008-12-29 00:00:00.000015", "2012-01-01 01:02:03.000015", null])"; + const char* ceil_15_millisecond = + R"(["1970-01-01 00:00:59.130", "2000-02-29 23:23:24.000", + "1899-01-01 00:59:20.010", "2033-05-18 03:33:20.010", + "2020-01-01 01:05:05.010", "2019-12-31 02:10:10.005", + "2019-12-30 03:15:15.015", "2009-12-31 04:20:20.010", + "2010-01-01 05:25:25.020", "2010-01-03 06:30:30.015", + "2010-01-04 07:35:35.010", "2006-01-01 08:40:40.005", + "2005-12-31 09:45:45.015", "2008-12-28 00:00:00.015", + "2008-12-29 00:00:00.015", "2012-01-01 01:02:03.015", null])"; + const char* ceil_13_second = + R"(["1970-01-01 00:01:05", "2000-02-29 23:23:24", "1899-01-01 00:59:29", + "2033-05-18 03:33:22", "2020-01-01 01:05:06", "2019-12-31 02:10:21", + "2019-12-30 03:15:23", "2009-12-31 04:20:32", "2010-01-01 05:25:30", + "2010-01-03 06:30:39", "2010-01-04 07:35:37", "2006-01-01 08:40:53", + "2005-12-31 09:45:55", "2008-12-28 00:00:01", "2008-12-29 00:00:12", + "2012-01-01 01:02:11", null])"; + const char* ceil_13_minute = + R"(["1970-01-01 00:13:00", "2000-02-29 23:26:00", "1899-01-01 01:01:00", + "2033-05-18 03:39:00", "2020-01-01 01:09:00", "2019-12-31 02:11:00", + "2019-12-30 03:26:00", "2009-12-31 04:24:00", "2010-01-01 05:32:00", + "2010-01-03 06:43:00", "2010-01-04 07:38:00", "2006-01-01 08:45:00", + "2005-12-31 09:47:00", "2008-12-28 00:05:00", "2008-12-29 00:08:00", + "2012-01-01 01:05:00", null])"; + const char* ceil_15_hour = + R"(["1970-01-01 15:00:00", "2000-03-01 12:00:00", "1899-01-01 03:00:00", + "2033-05-18 18:00:00", "2020-01-01 12:00:00", "2019-12-31 06:00:00", + "2019-12-30 15:00:00", "2009-12-31 09:00:00", "2010-01-01 15:00:00", + "2010-01-03 12:00:00", "2010-01-04 18:00:00", "2006-01-01 09:00:00", + "2005-12-31 18:00:00", "2008-12-28 06:00:00", "2008-12-29 12:00:00", + "2012-01-01 15:00:00", null])"; + const char* ceil_15_day = + R"(["1970-01-16", "2000-03-09", "1899-01-13", "2033-05-30", "2020-01-09", + "2020-01-09", "2020-01-09", "2010-01-01", "2010-01-16", "2010-01-16", + "2010-01-16", "2006-01-07", "2006-01-07", "2009-01-06", "2009-01-06", + "2012-01-06", null])"; + const char* ceil_15_weeks = + R"(["1970-04-13", "2000-03-06", "1899-04-10", "2033-07-11", "2020-01-06", + "2020-01-06", "2020-01-06", "2010-03-29", "2010-03-29", "2010-03-29", + "2010-03-29", "2006-03-20", "2006-03-20", "2009-02-02", "2009-02-02", + "2012-04-02", null])"; + const char* ceil_15_weeks_sunday = + R"(["1970-04-12", "2000-03-05", "1899-04-09", "2033-07-10", "2020-01-05", + "2020-01-05", "2020-01-05", "2010-03-28", "2010-03-28", "2010-03-28", + "2010-03-28", "2006-03-19", "2006-03-19", "2009-02-01", "2009-02-01", + "2012-04-01", null])"; + const char* ceil_15_months = + R"(["1971-04-01", "2001-04-01", "1900-01-01", "2033-10-01", "2021-04-01", + "2020-01-01", "2020-01-01", "2010-01-01", "2011-04-01", "2011-04-01", + "2011-04-01", "2006-04-01", "2006-04-01", "2010-01-01", "2010-01-01", + "2012-07-01", null])"; + const char* ceil_15_quarters = + R"(["1973-10-01", "2003-10-01", "1902-07-01", "2033-10-01", "2022-07-01", + "2022-07-01", "2022-07-01", "2011-04-01", "2011-04-01", "2011-04-01", + "2011-04-01", "2007-07-01", "2007-07-01", "2011-04-01", "2011-04-01", + "2015-01-01", null])"; + const char* ceil_15_years = + R"(["1980-01-01", "2010-01-01", "1905-01-01", "2040-01-01", "2025-01-01", + "2025-01-01", "2025-01-01", "2010-01-01", "2025-01-01", "2025-01-01", + "2025-01-01", "2010-01-01", "2010-01-01", "2010-01-01", "2010-01-01", + "2025-01-01", null])"; + + auto unit = timestamp(TimeUnit::NANO, "UTC"); + CheckScalarUnary(op, unit, times, unit, ceil_1_nanosecond, &round_to_1_nanoseconds); + CheckScalarUnary(op, unit, times, unit, ceil_1_microsecond, &round_to_1_microseconds); + CheckScalarUnary(op, unit, times, unit, ceil_1_millisecond, &round_to_1_milliseconds); + CheckScalarUnary(op, unit, times, unit, ceil_1_second, &round_to_1_seconds); + CheckScalarUnary(op, unit, times, unit, ceil_1_minute, &round_to_1_minutes); + CheckScalarUnary(op, unit, times, unit, ceil_1_hour, &round_to_1_hours); + CheckScalarUnary(op, unit, times, unit, ceil_1_day, &round_to_1_days); + CheckScalarUnary(op, unit, times, unit, ceil_1_weeks, &round_to_1_weeks); + CheckScalarUnary(op, unit, times, unit, ceil_1_weeks_sunday, &round_to_1_weeks_sunday); + CheckScalarUnary(op, unit, times, unit, ceil_1_months, &round_to_1_months); + CheckScalarUnary(op, unit, times, unit, ceil_1_quarters, &round_to_1_quarters); + CheckScalarUnary(op, unit, times, unit, ceil_1_years, &round_to_1_years); + + CheckScalarUnary(op, unit, times, unit, ceil_15_nanosecond, &round_to_15_nanoseconds); + CheckScalarUnary(op, unit, times, unit, ceil_15_microsecond, &round_to_15_microseconds); + CheckScalarUnary(op, unit, times, unit, ceil_15_millisecond, &round_to_15_milliseconds); + CheckScalarUnary(op, unit, times, unit, ceil_13_second, &round_to_13_seconds); + CheckScalarUnary(op, unit, times, unit, ceil_13_minute, &round_to_13_minutes); + CheckScalarUnary(op, unit, times, unit, ceil_15_hour, &round_to_15_hours); + CheckScalarUnary(op, unit, times, unit, ceil_15_day, &round_to_15_days); + CheckScalarUnary(op, unit, times, unit, ceil_15_weeks, &round_to_15_weeks); + CheckScalarUnary(op, unit, times, unit, ceil_15_weeks_sunday, + &round_to_15_weeks_sunday); + CheckScalarUnary(op, unit, times, unit, ceil_15_months, &round_to_15_months); + CheckScalarUnary(op, unit, times, unit, ceil_15_quarters, &round_to_15_quarters); + CheckScalarUnary(op, unit, times, unit, ceil_15_years, &round_to_15_years); +} + +TEST_F(ScalarTemporalTest, TestCeilTemporalCalendarBasedOrigin) { + std::string op = "ceil_temporal"; + RoundTemporalOptions round_to_15_nanoseconds = + RoundTemporalOptions(15, CalendarUnit::NANOSECOND, true, true, true); + RoundTemporalOptions round_to_15_microseconds = + RoundTemporalOptions(15, CalendarUnit::MICROSECOND, true, true, true); + RoundTemporalOptions round_to_15_milliseconds = + RoundTemporalOptions(15, CalendarUnit::MILLISECOND, true, true, true); + RoundTemporalOptions round_to_13_seconds = + RoundTemporalOptions(13, CalendarUnit::SECOND, true, true, true); + RoundTemporalOptions round_to_13_minutes = + RoundTemporalOptions(13, CalendarUnit::MINUTE, true, true, true); + RoundTemporalOptions round_to_15_hours = + RoundTemporalOptions(15, CalendarUnit::HOUR, true, true, true); + RoundTemporalOptions round_to_15_days = + RoundTemporalOptions(15, CalendarUnit::DAY, true, true, true); + RoundTemporalOptions round_to_15_weeks = + RoundTemporalOptions(15, CalendarUnit::WEEK, true, true, true); + RoundTemporalOptions round_to_15_weeks_sunday = + RoundTemporalOptions(15, CalendarUnit::WEEK, false, true, true); + RoundTemporalOptions round_to_15_months = + RoundTemporalOptions(15, CalendarUnit::MONTH, true, true, true); + RoundTemporalOptions round_to_15_quarters = + RoundTemporalOptions(15, CalendarUnit::QUARTER, true, true, true); + RoundTemporalOptions round_to_15_years = + RoundTemporalOptions(15, CalendarUnit::YEAR, true, true, true); + + const char* ceil_15_nanosecond = + R"(["1970-01-01 00:00:59.123456795", "2000-02-29 23:23:24.000000005", + "1899-01-01 00:59:20.001001015", "2033-05-18 03:33:20.000000015", + "2020-01-01 01:05:05.001000015", "2019-12-31 02:10:10.002000015", + "2019-12-30 03:15:15.003000015", "2009-12-31 04:20:20.004132015", + "2010-01-01 05:25:25.005321015", "2010-01-03 06:30:30.006163015", + "2010-01-04 07:35:35.000000015", "2006-01-01 08:40:40.000000015", + "2005-12-31 09:45:45.000000015", "2008-12-28 00:00:00.000000015", + "2008-12-29 00:00:00.000000015", "2012-01-01 01:02:03.000000015", null])"; + const char* ceil_15_microsecond = + R"(["1970-01-01 00:00:59.123465", "2000-02-29 23:23:24.000005", + "1899-01-01 00:59:20.001015", "2033-05-18 03:33:20.000015", + "2020-01-01 01:05:05.001015", "2019-12-31 02:10:10.002015", + "2019-12-30 03:15:15.003015", "2009-12-31 04:20:20.004135", + "2010-01-01 05:25:25.005330", "2010-01-03 06:30:30.006165", + "2010-01-04 07:35:35.000015", "2006-01-01 08:40:40.000015", + "2005-12-31 09:45:45.000015", "2008-12-28 00:00:00.000015", + "2008-12-29 00:00:00.000015", "2012-01-01 01:02:03.000015", null])"; + const char* ceil_15_millisecond = + R"(["1970-01-01 00:00:59.135", "2000-02-29 23:23:24.005", + "1899-01-01 00:59:20.015", "2033-05-18 03:33:20.015", + "2020-01-01 01:05:05.015", "2019-12-31 02:10:10.015", + "2019-12-30 03:15:15.015", "2009-12-31 04:20:20.015", + "2010-01-01 05:25:25.015", "2010-01-03 06:30:30.015", + "2010-01-04 07:35:35.015", "2006-01-01 08:40:40.015", + "2005-12-31 09:45:45.015", "2008-12-28 00:00:00.015", + "2008-12-29 00:00:00.015", "2012-01-01 01:02:03.015", null])"; + const char* ceil_13_second = + R"(["1970-01-01 00:01:05", "2000-02-29 23:23:26", "1899-01-01 00:59:26", + "2033-05-18 03:33:26", "2020-01-01 01:05:13", "2019-12-31 02:10:13", + "2019-12-30 03:15:26", "2009-12-31 04:20:26", "2010-01-01 05:25:26", + "2010-01-03 06:30:39", "2010-01-04 07:35:39", "2006-01-01 08:40:52", + "2005-12-31 09:45:52", "2008-12-28 00:00:13", "2008-12-29 00:00:13", + "2012-01-01 01:02:13", null])"; + const char* ceil_13_minute = + R"(["1970-01-01 00:13:00", "2000-02-29 23:26:00", "1899-01-01 01:05:00", + "2033-05-18 03:39:00", "2020-01-01 01:13:00", "2019-12-31 02:13:00", + "2019-12-30 03:26:00", "2009-12-31 04:26:00", "2010-01-01 05:26:00", + "2010-01-03 06:39:00", "2010-01-04 07:39:00", "2006-01-01 08:52:00", + "2005-12-31 09:52:00", "2008-12-28 00:13:00", "2008-12-29 00:13:00", + "2012-01-01 01:13:00", null])"; + const char* ceil_15_hour = + R"(["1970-01-01 15:00:00", "2000-03-01 06:00:00", "1899-01-01 15:00:00", + "2033-05-18 15:00:00", "2020-01-01 15:00:00", "2019-12-31 15:00:00", + "2019-12-30 15:00:00", "2009-12-31 15:00:00", "2010-01-01 15:00:00", + "2010-01-03 15:00:00", "2010-01-04 15:00:00", "2006-01-01 15:00:00", + "2005-12-31 15:00:00", "2008-12-28 15:00:00", "2008-12-29 15:00:00", + "2012-01-01 15:00:00", null])"; + const char* ceil_15_day = + R"(["1970-01-16", "2000-03-02", "1899-01-16", "2033-05-31", + "2020-01-16", "2020-01-15", "2019-12-31", "2010-01-15", + "2010-01-16", "2010-01-16", "2010-01-16", "2006-01-16", + "2006-01-15", "2008-12-31", "2008-12-31", "2012-01-16", null])"; + const char* ceil_15_weeks = + R"(["1970-04-13", "2000-04-17", "1899-04-17", "2033-08-01", "2020-04-13", + "2020-04-13", "2020-04-13", "2010-04-19", "2010-04-19", "2010-04-19", + "2010-04-19", "2006-04-17", "2006-04-17", "2009-02-23", "2009-04-13", + "2012-04-16", null])"; + const char* ceil_15_weeks_sunday = + R"(["1970-04-19", "2000-04-16", "1899-04-16", "2033-07-31", "2020-04-12", + "2020-04-12", "2020-04-12", "2010-04-18", "2010-04-18", "2010-04-18", + "2010-04-18", "2006-04-16", "2006-04-16", "2009-04-19", "2009-04-19", + "2012-04-15", null])"; + const char* ceil_15_months = + R"(["1971-04-01", "2001-04-01", "1900-04-01", "2034-04-01", + "2021-04-01", "2020-04-01", "2020-04-01", "2010-04-01", + "2011-04-01", "2011-04-01", "2011-04-01", "2007-04-01", + "2006-04-01", "2009-04-01", "2009-04-01", "2013-04-01", null])"; + const char* ceil_15_quarters = + R"(["1973-10-01", "2003-10-01", "1902-10-01", "2036-10-01", + "2023-10-01", "2022-10-01", "2022-10-01", "2012-10-01", + "2013-10-01", "2013-10-01", "2013-10-01", "2009-10-01", + "2008-10-01", "2011-10-01", "2011-10-01", "2015-10-01", null])"; + const char* ceil_15_years = + R"(["1980-01-01", "2010-01-01", "1905-01-01", "2040-01-01", + "2025-01-01", "2025-01-01", "2025-01-01", "2010-01-01", + "2025-01-01", "2025-01-01", "2025-01-01", "2010-01-01", + "2010-01-01", "2010-01-01", "2010-01-01", "2025-01-01", null])"; + + auto unit = timestamp(TimeUnit::NANO, "UTC"); + CheckScalarUnary(op, unit, times, unit, ceil_15_nanosecond, &round_to_15_nanoseconds); + CheckScalarUnary(op, unit, times, unit, ceil_15_microsecond, &round_to_15_microseconds); + CheckScalarUnary(op, unit, times, unit, ceil_15_millisecond, &round_to_15_milliseconds); + CheckScalarUnary(op, unit, times, unit, ceil_13_second, &round_to_13_seconds); + CheckScalarUnary(op, unit, times, unit, ceil_13_minute, &round_to_13_minutes); + CheckScalarUnary(op, unit, times, unit, ceil_15_hour, &round_to_15_hours); + CheckScalarUnary(op, unit, times, unit, ceil_15_day, &round_to_15_days); + CheckScalarUnary(op, unit, times, unit, ceil_15_weeks, &round_to_15_weeks); + CheckScalarUnary(op, unit, times, unit, ceil_15_weeks_sunday, + &round_to_15_weeks_sunday); + CheckScalarUnary(op, unit, times, unit, ceil_15_months, &round_to_15_months); + CheckScalarUnary(op, unit, times, unit, ceil_15_quarters, &round_to_15_quarters); + CheckScalarUnary(op, unit, times, unit, ceil_15_years, &round_to_15_years); +} + TEST_F(ScalarTemporalTest, TestFloorTemporal) { std::string op = "floor_temporal"; const char* floor_1_nanosecond = @@ -2420,6 +2781,130 @@ TEST_F(ScalarTemporalTest, TestFloorTemporal) { CheckScalarUnary(op, unit, times, unit, floor_15_years, &round_to_15_years); } +TEST_F(ScalarTemporalTest, TestFloorTemporalCalendarBasedOrigin) { + std::string op = "floor_temporal"; + RoundTemporalOptions round_to_15_nanoseconds = + RoundTemporalOptions(15, CalendarUnit::NANOSECOND, true, true, true); + RoundTemporalOptions round_to_15_microseconds = + RoundTemporalOptions(15, CalendarUnit::MICROSECOND, true, true, true); + RoundTemporalOptions round_to_15_milliseconds = + RoundTemporalOptions(15, CalendarUnit::MILLISECOND, true, true, true); + RoundTemporalOptions round_to_13_seconds = + RoundTemporalOptions(13, CalendarUnit::SECOND, true, true, true); + RoundTemporalOptions round_to_13_minutes = + RoundTemporalOptions(13, CalendarUnit::MINUTE, true, true, true); + RoundTemporalOptions round_to_15_hours = + RoundTemporalOptions(15, CalendarUnit::HOUR, true, true, true); + RoundTemporalOptions round_to_15_days = + RoundTemporalOptions(15, CalendarUnit::DAY, true, true, true); + RoundTemporalOptions round_to_15_weeks = + RoundTemporalOptions(15, CalendarUnit::WEEK, true, true, true); + RoundTemporalOptions round_to_15_weeks_sunday = + RoundTemporalOptions(15, CalendarUnit::WEEK, false, true, true); + RoundTemporalOptions round_to_15_months = + RoundTemporalOptions(15, CalendarUnit::MONTH, true, true, true); + RoundTemporalOptions round_to_15_quarters = + RoundTemporalOptions(15, CalendarUnit::QUARTER, true, true, true); + RoundTemporalOptions round_to_15_years = + RoundTemporalOptions(15, CalendarUnit::YEAR, true, true, true); + + const char* floor_15_nanosecond = + R"(["1970-01-01 00:00:59.123456780", "2000-02-29 23:23:23.999999990", + "1899-01-01 00:59:20.001001000", "2033-05-18 03:33:20.000000000", + "2020-01-01 01:05:05.001000000", "2019-12-31 02:10:10.002000000", + "2019-12-30 03:15:15.003000000", "2009-12-31 04:20:20.004132000", + "2010-01-01 05:25:25.005321000", "2010-01-03 06:30:30.006163000", + "2010-01-04 07:35:35.000000000", "2006-01-01 08:40:40.000000000", + "2005-12-31 09:45:45.000000000", "2008-12-28 00:00:00.000000000", + "2008-12-29 00:00:00.000000000", "2012-01-01 01:02:03.000000000", null])"; + const char* floor_15_microsecond = + R"(["1970-01-01 00:00:59.123450", "2000-02-29 23:23:23.999990", + "1899-01-01 00:59:20.001000", "2033-05-18 03:33:20.000000", + "2020-01-01 01:05:05.001000", "2019-12-31 02:10:10.002000", + "2019-12-30 03:15:15.003000", "2009-12-31 04:20:20.004120", + "2010-01-01 05:25:25.005315", "2010-01-03 06:30:30.006150", + "2010-01-04 07:35:35.000000", "2006-01-01 08:40:40.000000", + "2005-12-31 09:45:45.000000", "2008-12-28 00:00:00.000000", + "2008-12-29 00:00:00.000000", "2012-01-01 01:02:03.000000", null])"; + const char* floor_15_millisecond = + R"(["1970-01-01 00:00:59.120", "2000-02-29 23:23:23.990", + "1899-01-01 00:59:20.000", "2033-05-18 03:33:20.000", + "2020-01-01 01:05:05.000", "2019-12-31 02:10:10.000", + "2019-12-30 03:15:15.000", "2009-12-31 04:20:20.000", + "2010-01-01 05:25:25.000", "2010-01-03 06:30:30.000", + "2010-01-04 07:35:35.000", "2006-01-01 08:40:40.000", + "2005-12-31 09:45:45.000", "2008-12-28 00:00:00.000", + "2008-12-29 00:00:00.000", "2012-01-01 01:02:03.000", null])"; + const char* floor_13_second = + R"(["1970-01-01 00:00:52", "2000-02-29 23:23:13", "1899-01-01 00:59:13", + "2033-05-18 03:33:13", "2020-01-01 01:05:00", "2019-12-31 02:10:00", + "2019-12-30 03:15:13", "2009-12-31 04:20:13", "2010-01-01 05:25:13", + "2010-01-03 06:30:26", "2010-01-04 07:35:26", "2006-01-01 08:40:39", + "2005-12-31 09:45:39", "2008-12-28 00:00:00", "2008-12-29 00:00:00", + "2012-01-01 01:02:00", null])"; + const char* floor_13_minute = + R"(["1970-01-01 00:00:00", "2000-02-29 23:13:00", "1899-01-01 00:52:00", + "2033-05-18 03:26:00", "2020-01-01 01:00:00", "2019-12-31 02:00:00", + "2019-12-30 03:13:00", "2009-12-31 04:13:00", "2010-01-01 05:13:00", + "2010-01-03 06:26:00", "2010-01-04 07:26:00", "2006-01-01 08:39:00", + "2005-12-31 09:39:00", "2008-12-28 00:00:00", "2008-12-29 00:00:00", + "2012-01-01 01:00:00", null])"; + const char* floor_15_hour = + R"(["1970-01-01 00:00:00", "2000-02-29 15:00:00", "1899-01-01 00:00:00", + "2033-05-18 00:00:00", "2020-01-01 00:00:00", "2019-12-31 00:00:00", + "2019-12-30 00:00:00", "2009-12-31 00:00:00", "2010-01-01 00:00:00", + "2010-01-03 00:00:00", "2010-01-04 00:00:00", "2006-01-01 00:00:00", + "2005-12-31 00:00:00", "2008-12-28 00:00:00", "2008-12-29 00:00:00", + "2012-01-01 00:00:00", null])"; + const char* floor_15_day = + R"(["1970-01-01", "2000-02-16", "1899-01-01", "2033-05-16", + "2020-01-01", "2019-12-31", "2019-12-16", "2009-12-31", + "2010-01-01", "2010-01-01", "2010-01-01", "2006-01-01", + "2005-12-31", "2008-12-16", "2008-12-16", "2012-01-01", null])"; + const char* floor_15_weeks = + R"(["1969-12-29", "2000-01-03", "1899-01-02", "2033-04-18", + "2019-12-30", "2019-12-30", "2019-12-30", "2010-01-04", + "2010-01-04", "2010-01-04", "2010-01-04", "2006-01-02", + "2006-01-02", "2008-11-10", "2008-12-29", "2012-01-02", null])"; + const char* floor_15_weeks_sunday = + R"(["1970-01-04", "2000-01-02", "1899-01-01", "2033-04-17", + "2019-12-29", "2019-12-29", "2019-12-29", "2010-01-03", + "2010-01-03", "2010-01-03", "2010-01-03", "2006-01-01", + "2006-01-01", "2009-01-04", "2009-01-04", "2012-01-01", null])"; + const char* floor_15_months = + R"(["1970-01-01", "2000-01-01", "1899-01-01", "2033-01-01", + "2020-01-01", "2019-01-01", "2019-01-01", "2009-01-01", + "2010-01-01", "2010-01-01", "2010-01-01", "2006-01-01", + "2005-01-01", "2008-01-01", "2008-01-01", "2012-01-01", null])"; + const char* floor_15_quarters = + R"(["1970-01-01", "2000-01-01", "1899-01-01", "2033-01-01", + "2020-01-01", "2019-01-01", "2019-01-01", "2009-01-01", + "2010-01-01", "2010-01-01", "2010-01-01", "2006-01-01", + "2005-01-01", "2008-01-01", "2008-01-01", "2012-01-01", null])"; + const char* floor_15_years = + R"(["1965-01-01", "1995-01-01", "1890-01-01", "2025-01-01", + "2010-01-01", "2010-01-01", "2010-01-01", "1995-01-01", + "2010-01-01", "2010-01-01", "2010-01-01", "1995-01-01", + "1995-01-01", "1995-01-01", "1995-01-01", "2010-01-01", null])"; + + auto unit = timestamp(TimeUnit::NANO, "UTC"); + CheckScalarUnary(op, unit, times, unit, floor_15_nanosecond, &round_to_15_nanoseconds); + CheckScalarUnary(op, unit, times, unit, floor_15_microsecond, + &round_to_15_microseconds); + CheckScalarUnary(op, unit, times, unit, floor_15_millisecond, + &round_to_15_milliseconds); + CheckScalarUnary(op, unit, times, unit, floor_13_second, &round_to_13_seconds); + CheckScalarUnary(op, unit, times, unit, floor_13_minute, &round_to_13_minutes); + CheckScalarUnary(op, unit, times, unit, floor_15_hour, &round_to_15_hours); + CheckScalarUnary(op, unit, times, unit, floor_15_day, &round_to_15_days); + CheckScalarUnary(op, unit, times, unit, floor_15_weeks, &round_to_15_weeks); + CheckScalarUnary(op, unit, times, unit, floor_15_weeks_sunday, + &round_to_15_weeks_sunday); + CheckScalarUnary(op, unit, times, unit, floor_15_months, &round_to_15_months); + CheckScalarUnary(op, unit, times, unit, floor_15_quarters, &round_to_15_quarters); + CheckScalarUnary(op, unit, times, unit, floor_15_years, &round_to_15_years); +} + TEST_F(ScalarTemporalTest, TestRoundTemporal) { std::string op = "round_temporal"; const char* round_1_nanoseconds = @@ -2632,6 +3117,129 @@ TEST_F(ScalarTemporalTest, TestCeilFloorRoundTemporalBrussels) { CheckScalarUnary("round_temporal", unit, times, unit, round_2_hours, &round_to_2_hours); } +TEST_F(ScalarTemporalTest, TestRoundTemporalCalendarBasedOrigin) { + std::string op = "round_temporal"; + RoundTemporalOptions round_to_15_nanoseconds = + RoundTemporalOptions(15, CalendarUnit::NANOSECOND, true, true, true); + RoundTemporalOptions round_to_15_microseconds = + RoundTemporalOptions(15, CalendarUnit::MICROSECOND, true, true, true); + RoundTemporalOptions round_to_15_milliseconds = + RoundTemporalOptions(15, CalendarUnit::MILLISECOND, true, true, true); + RoundTemporalOptions round_to_13_seconds = + RoundTemporalOptions(13, CalendarUnit::SECOND, true, true, true); + RoundTemporalOptions round_to_13_minutes = + RoundTemporalOptions(13, CalendarUnit::MINUTE, true, true, true); + RoundTemporalOptions round_to_15_hours = + RoundTemporalOptions(15, CalendarUnit::HOUR, true, true, true); + RoundTemporalOptions round_to_15_days = + RoundTemporalOptions(15, CalendarUnit::DAY, true, true, true); + RoundTemporalOptions round_to_15_weeks = + RoundTemporalOptions(15, CalendarUnit::WEEK, true, true, true); + RoundTemporalOptions round_to_15_weeks_sunday = + RoundTemporalOptions(15, CalendarUnit::WEEK, false, true, true); + RoundTemporalOptions round_to_15_months = + RoundTemporalOptions(15, CalendarUnit::MONTH, true, true, true); + RoundTemporalOptions round_to_15_quarters = + RoundTemporalOptions(15, CalendarUnit::QUARTER, true, true, true); + RoundTemporalOptions round_to_15_years = + RoundTemporalOptions(15, CalendarUnit::YEAR, true, true, true); + + const char* round_15_nanosecond = + R"(["1970-01-01 00:00:59.123456795", "2000-02-29 23:23:24.000000005", + "1899-01-01 00:59:20.001001000", "2033-05-18 03:33:20.000000000", + "2020-01-01 01:05:05.001000000", "2019-12-31 02:10:10.002000000", + "2019-12-30 03:15:15.003000000", "2009-12-31 04:20:20.004132000", + "2010-01-01 05:25:25.005321000", "2010-01-03 06:30:30.006163000", + "2010-01-04 07:35:35.000000000", "2006-01-01 08:40:40.000000000", + "2005-12-31 09:45:45.000000000", "2008-12-28 00:00:00.000000000", + "2008-12-29 00:00:00.000000000", "2012-01-01 01:02:03.000000000", null])"; + const char* round_15_microsecond = + R"(["1970-01-01 00:00:59.123450", "2000-02-29 23:23:24.000005", + "1899-01-01 00:59:20.001000", "2033-05-18 03:33:20.000000", + "2020-01-01 01:05:05.001000", "2019-12-31 02:10:10.002000", + "2019-12-30 03:15:15.003000", "2009-12-31 04:20:20.004135", + "2010-01-01 05:25:25.005315", "2010-01-03 06:30:30.006165", + "2010-01-04 07:35:35.000000", "2006-01-01 08:40:40.000000", + "2005-12-31 09:45:45.000000", "2008-12-28 00:00:00.000000", + "2008-12-29 00:00:00.000000", "2012-01-01 01:02:03.000000", null])"; + const char* round_15_millisecond = + R"(["1970-01-01 00:00:59.120", "2000-02-29 23:23:24.005", + "1899-01-01 00:59:20.000", "2033-05-18 03:33:20.000", + "2020-01-01 01:05:05.000", "2019-12-31 02:10:10.000", + "2019-12-30 03:15:15.000", "2009-12-31 04:20:20.000", + "2010-01-01 05:25:25.000", "2010-01-03 06:30:30.000", + "2010-01-04 07:35:35.000", "2006-01-01 08:40:40.000", + "2005-12-31 09:45:45.000", "2008-12-28 00:00:00.000", + "2008-12-29 00:00:00.000", "2012-01-01 01:02:03.000", null])"; + const char* round_13_second = + R"(["1970-01-01 00:01:05", "2000-02-29 23:23:26", "1899-01-01 00:59:26", + "2033-05-18 03:33:26", "2020-01-01 01:05:00", "2019-12-31 02:10:13", + "2019-12-30 03:15:13", "2009-12-31 04:20:26", "2010-01-01 05:25:26", + "2010-01-03 06:30:26", "2010-01-04 07:35:39", "2006-01-01 08:40:39", + "2005-12-31 09:45:39", "2008-12-28 00:00:00", "2008-12-29 00:00:00", + "2012-01-01 01:02:00", null])"; + const char* round_13_minute = + R"(["1970-01-01 00:00:00", "2000-02-29 23:26:00", "1899-01-01 01:05:00", + "2033-05-18 03:39:00", "2020-01-01 01:00:00", "2019-12-31 02:13:00", + "2019-12-30 03:13:00", "2009-12-31 04:26:00", "2010-01-01 05:26:00", + "2010-01-03 06:26:00", "2010-01-04 07:39:00", "2006-01-01 08:39:00", + "2005-12-31 09:52:00", "2008-12-28 00:00:00", "2008-12-29 00:00:00", + "2012-01-01 01:00:00", null])"; + const char* round_15_hour = + R"(["1970-01-01 00:00:00", "2000-03-01 06:00:00", "1899-01-01 00:00:00", + "2033-05-18 00:00:00", "2020-01-01 00:00:00", "2019-12-31 00:00:00", + "2019-12-30 00:00:00", "2009-12-31 00:00:00", "2010-01-01 00:00:00", + "2010-01-03 00:00:00", "2010-01-04 15:00:00", "2006-01-01 15:00:00", + "2005-12-31 15:00:00", "2008-12-28 00:00:00", "2008-12-29 00:00:00", + "2012-01-01 00:00:00", null])"; + const char* round_15_day = + R"(["1970-01-01", "2000-03-02", "1899-01-01", "2033-05-16", + "2020-01-01", "2019-12-31", "2019-12-31", "2009-12-31", + "2010-01-01", "2010-01-01", "2010-01-01", "2006-01-01", + "2005-12-31", "2008-12-31", "2008-12-31", "2012-01-01", null])"; + const char* round_15_weeks = + R"(["1969-12-29", "2000-04-17", "1899-01-02", "2033-04-18", + "2019-12-30", "2019-12-30", "2019-12-30", "2010-01-04", + "2010-01-04", "2010-01-04", "2010-01-04", "2006-01-02", + "2006-01-02", "2008-11-10", "2008-12-29", "2012-01-02", null])"; + const char* round_15_weeks_sunday = + R"(["1970-01-04", "2000-04-16", "1899-01-01", "2033-04-17", + "2019-12-29", "2019-12-29", "2019-12-29", "2010-01-03", + "2010-01-03", "2010-01-03", "2010-01-03", "2006-01-01", + "2006-01-01", "2009-01-04", "2009-01-04", "2012-01-01", null])"; + const char* round_15_months = + R"(["1970-01-01", "2000-01-01", "1899-01-01", "2033-01-01", + "2020-01-01", "2020-04-01", "2020-04-01", "2010-04-01", + "2010-01-01", "2010-01-01", "2010-01-01", "2006-01-01", + "2006-04-01", "2009-04-01", "2009-04-01", "2012-01-01", null])"; + const char* round_15_quarters = + R"(["1970-01-01", "2000-01-01", "1899-01-01", "2033-01-01", + "2020-01-01", "2019-01-01", "2019-01-01", "2009-01-01", + "2010-01-01", "2010-01-01", "2010-01-01", "2006-01-01", + "2005-01-01", "2008-01-01", "2008-01-01", "2012-01-01", null])"; + const char* round_15_years = + R"(["1965-01-01", "1995-01-01", "1905-01-01", "2040-01-01", + "2025-01-01", "2025-01-01", "2025-01-01", "2010-01-01", + "2010-01-01", "2010-01-01", "2010-01-01", "2010-01-01", + "2010-01-01", "2010-01-01", "2010-01-01", "2010-01-01", null])"; + auto unit = timestamp(TimeUnit::NANO, "UTC"); + CheckScalarUnary(op, unit, times, unit, round_15_nanosecond, &round_to_15_nanoseconds); + CheckScalarUnary(op, unit, times, unit, round_15_microsecond, + &round_to_15_microseconds); + CheckScalarUnary(op, unit, times, unit, round_15_millisecond, + &round_to_15_milliseconds); + CheckScalarUnary(op, unit, times, unit, round_13_second, &round_to_13_seconds); + CheckScalarUnary(op, unit, times, unit, round_13_minute, &round_to_13_minutes); + CheckScalarUnary(op, unit, times, unit, round_15_hour, &round_to_15_hours); + CheckScalarUnary(op, unit, times, unit, round_15_day, &round_to_15_days); + CheckScalarUnary(op, unit, times, unit, round_15_weeks, &round_to_15_weeks); + CheckScalarUnary(op, unit, times, unit, round_15_weeks_sunday, + &round_to_15_weeks_sunday); + CheckScalarUnary(op, unit, times, unit, round_15_months, &round_to_15_months); + CheckScalarUnary(op, unit, times, unit, round_15_quarters, &round_to_15_quarters); + CheckScalarUnary(op, unit, times, unit, round_15_years, &round_to_15_years); +} + TEST_F(ScalarTemporalTest, TestCeilFloorRoundTemporalKolkata) { // Kolkata timezone was defined as UTC+5:21:10 from 1871 to 1906 when it changed to // IST (UTC+05:30) without DST. This test is to check rounding is done in historical diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc index 7484de2a005..ab1b03bb9a8 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc @@ -689,11 +689,27 @@ struct IsDaylightSavings { // Round temporal values to given frequency template -year_month_day GetFlooredYmd(int64_t arg, int multiple, Localizer localizer_) { +year_month_day GetFlooredYmd(int64_t arg, const int multiple, + const RoundTemporalOptions options, Localizer localizer_) { year_month_day ymd{floor(localizer_.template ConvertTimePoint(arg))}; if (multiple == 1) { return year_month_day(ymd.year() / ymd.month() / 1); + } else if (options.calendar_based_origin) { + switch (options.unit) { + case compute::CalendarUnit::MONTH: { + const auto m = + static_cast(ymd.month()) / options.multiple * options.multiple; + return year_month_day(ymd.year() / 1 / 1) + months{m}; + } + case compute::CalendarUnit::QUARTER: { + const auto m = static_cast(ymd.month()) / (options.multiple * 3) * + (options.multiple * 3); + return year_month_day(ymd.year() / 1 / 1) + months{m}; + } + default: + return ymd; + } } else { int32_t total_months_origin = 1970 * 12; int32_t total_months = static_cast(ymd.year()) * 12 + @@ -705,21 +721,75 @@ year_month_day GetFlooredYmd(int64_t arg, int multiple, Localizer localizer_) { } else { total_months = (total_months - multiple + 1) / multiple * multiple; } - return year_month_day(year{1970} / jan / 0) + months{total_months}; + return year{1970} / jan / 1 + months{total_months}; } } template -const Duration FloorTimePoint(const int64_t arg, const int64_t multiple, +const Duration FloorTimePoint(const int64_t arg, const RoundTemporalOptions options, Localizer localizer_, Status* st) { const auto t = localizer_.template ConvertTimePoint(arg); - const Unit d = floor(t).time_since_epoch(); - if (multiple == 1) { + if (options.multiple == 1) { + const Unit d = floor(t).time_since_epoch(); return localizer_.template ConvertLocalToSys(duration_cast(d), st); + } else if (options.calendar_based_origin) { + const Unit unit = Unit{options.multiple}; + + switch (options.unit) { + case compute::CalendarUnit::DAY: { + const auto origin = + localizer_.ConvertDays(year_month_day(floor(t)).year() / + year_month_day(floor(t)).month() / 1); + const auto m = (floor(t) - origin) / unit * unit + origin; + return localizer_.template ConvertLocalToSys( + duration_cast(m.time_since_epoch()), st); + } + case compute::CalendarUnit::HOUR: { + const auto origin = localizer_.ConvertDays(year_month_day(floor(t))); + const auto m = (t - origin) / unit * unit + origin; + return localizer_.template ConvertLocalToSys( + duration_cast(m.time_since_epoch()), st); + } + case compute::CalendarUnit::MINUTE: { + const auto origin = floor(t); + const auto m = (t - origin) / unit * unit + origin; + return localizer_.template ConvertLocalToSys( + duration_cast(m.time_since_epoch()), st); + } + case compute::CalendarUnit::SECOND: { + const auto origin = floor(t); + const auto m = (t - origin) / unit * unit + origin; + return localizer_.template ConvertLocalToSys( + duration_cast(m.time_since_epoch()), st); + } + case compute::CalendarUnit::MILLISECOND: { + const auto origin = floor(t); + const auto m = (t - origin) / unit * unit + origin; + return localizer_.template ConvertLocalToSys( + duration_cast(m.time_since_epoch()), st); + } + case compute::CalendarUnit::MICROSECOND: { + const auto origin = floor(t); + const auto m = (t - origin) / unit * unit + origin; + return localizer_.template ConvertLocalToSys( + duration_cast(m.time_since_epoch()), st); + } + case compute::CalendarUnit::NANOSECOND: { + const auto origin = floor(t); + const auto m = (t - origin) / unit * unit + origin; + return localizer_.template ConvertLocalToSys( + duration_cast(m.time_since_epoch()), st); + } + default: { + *st = Status::Invalid("Cannot floor to ", &options.unit); + return Duration{0}; + } + } } else { - const Unit unit = Unit{multiple}; + const Unit d = floor(t).time_since_epoch(); + const Unit unit = Unit{options.multiple}; const Unit m = (d.count() >= 0) ? d / unit * unit : (d - unit + Unit{1}) / unit * unit; return localizer_.template ConvertLocalToSys(duration_cast(m), @@ -728,18 +798,31 @@ const Duration FloorTimePoint(const int64_t arg, const int64_t multiple, } template -const Duration FloorWeekTimePoint(const int64_t arg, const int64_t multiple, +const Duration FloorWeekTimePoint(const int64_t arg, const RoundTemporalOptions options, Localizer localizer_, const Duration weekday_offset, Status* st) { const auto t = localizer_.template ConvertTimePoint(arg) + weekday_offset; const weeks d = floor(t).time_since_epoch(); - if (multiple == 1) { + if (options.multiple == 1) { return localizer_.template ConvertLocalToSys(duration_cast(d), st) - weekday_offset; + } else if (options.calendar_based_origin) { + weekday wd_; + if (options.week_starts_monday) { + wd_ = thu; + } else { + wd_ = wed; + } + const auto y = year_month_day{floor(t)}.year(); + const auto start = + localizer_.ConvertDays((y - years{1}) / dec / wd_[last]) + (mon - thu); + const weeks unit = weeks{options.multiple}; + const auto m = (t - start) / unit * unit + start; + return localizer_.template ConvertLocalToSys(m.time_since_epoch(), st); } else { - const weeks unit = weeks{multiple}; + const weeks unit = weeks{options.multiple}; const weeks m = (d.count() >= 0) ? d / unit * unit : (d - unit + weeks{1}) / unit * unit; return localizer_.template ConvertLocalToSys(duration_cast(m), @@ -749,55 +832,58 @@ const Duration FloorWeekTimePoint(const int64_t arg, const int64_t multiple, } template -Duration CeilTimePoint(const int64_t arg, const int64_t multiple, Localizer localizer_, - Status* st) { +Duration CeilTimePoint(const int64_t arg, const RoundTemporalOptions options, + Localizer localizer_, Status* st) { const Duration f = - FloorTimePoint(arg, multiple, localizer_, st); + FloorTimePoint(arg, options, localizer_, st); const auto cl = localizer_.template ConvertTimePoint(f.count()).time_since_epoch(); const Duration cs = localizer_.template ConvertLocalToSys(duration_cast(cl), st); - if (cs >= Duration{arg}) { - return cs; + + if (options.change_on_boundary || cs < Duration{arg}) { + return localizer_.template ConvertLocalToSys( + duration_cast(cl + duration_cast(Unit{options.multiple})), + st); } - return localizer_.template ConvertLocalToSys( - duration_cast(cl + duration_cast(Unit{multiple})), st); + return cs; } template -Duration CeilWeekTimePoint(const int64_t arg, const int64_t multiple, +Duration CeilWeekTimePoint(const int64_t arg, const RoundTemporalOptions options, Localizer localizer_, const Duration weekday_offset, Status* st) { - const Duration f = FloorWeekTimePoint(arg, multiple, localizer_, + const Duration f = FloorWeekTimePoint(arg, options, localizer_, weekday_offset, st); const auto cl = localizer_.template ConvertTimePoint(f.count()).time_since_epoch(); const Duration cs = localizer_.template ConvertLocalToSys(duration_cast(cl), st); - if (cs >= Duration{arg}) { - return cs; + if (options.change_on_boundary || cs < Duration{arg}) { + return localizer_.template ConvertLocalToSys( + duration_cast(cl + duration_cast(weeks{options.multiple})), + st); } - return localizer_.template ConvertLocalToSys( - duration_cast(cl + duration_cast(weeks{multiple})), st); + return cs; } template -Duration RoundTimePoint(const int64_t arg, const int64_t multiple, Localizer localizer_, - Status* st) { +Duration RoundTimePoint(const int64_t arg, const RoundTemporalOptions options, + Localizer localizer_, Status* st) { const Duration f = - FloorTimePoint(arg, multiple, localizer_, st); + FloorTimePoint(arg, options, localizer_, st); const Duration c = - CeilTimePoint(arg, multiple, localizer_, st); + CeilTimePoint(arg, options, localizer_, st); return (Duration{arg} - f >= c - Duration{arg}) ? c : f; } template -Duration RoundWeekTimePoint(const int64_t arg, const int64_t multiple, +Duration RoundWeekTimePoint(const int64_t arg, const RoundTemporalOptions options, Localizer localizer_, const Duration weekday_offset, Status* st) { - const Duration f = FloorWeekTimePoint(arg, multiple, localizer_, + const Duration f = FloorWeekTimePoint(arg, options, localizer_, weekday_offset, st); - const Duration c = CeilWeekTimePoint(arg, multiple, localizer_, + const Duration c = CeilWeekTimePoint(arg, options, localizer_, weekday_offset, st); return (Duration{arg} - f >= c - Duration{arg}) ? c : f; } @@ -812,52 +898,50 @@ struct CeilTemporal { Duration t; switch (options.unit) { case compute::CalendarUnit::NANOSECOND: - t = CeilTimePoint( - arg, options.multiple, localizer_, st); + t = CeilTimePoint(arg, options, + localizer_, st); break; case compute::CalendarUnit::MICROSECOND: - t = CeilTimePoint( - arg, options.multiple, localizer_, st); + t = CeilTimePoint(arg, options, + localizer_, st); break; case compute::CalendarUnit::MILLISECOND: - t = CeilTimePoint( - arg, options.multiple, localizer_, st); + t = CeilTimePoint(arg, options, + localizer_, st); break; case compute::CalendarUnit::SECOND: - t = CeilTimePoint( - arg, options.multiple, localizer_, st); + t = CeilTimePoint(arg, options, + localizer_, st); break; case compute::CalendarUnit::MINUTE: - t = CeilTimePoint(arg, options.multiple, localizer_, - st); + t = CeilTimePoint(arg, options, localizer_, st); break; case compute::CalendarUnit::HOUR: - t = CeilTimePoint(arg, options.multiple, + t = CeilTimePoint(arg, options, localizer_, st); break; case compute::CalendarUnit::DAY: - t = CeilTimePoint(arg, options.multiple, localizer_, - st); + t = CeilTimePoint(arg, options, localizer_, st); break; case compute::CalendarUnit::WEEK: if (options.week_starts_monday) { - t = CeilWeekTimePoint(arg, options.multiple, localizer_, - days{3}, st); + t = CeilWeekTimePoint(arg, options, localizer_, days{3}, + st); } else { - t = CeilWeekTimePoint(arg, options.multiple, localizer_, - days{4}, st); + t = CeilWeekTimePoint(arg, options, localizer_, days{4}, + st); } break; case compute::CalendarUnit::MONTH: { - year_month_day ymd = - GetFlooredYmd(arg, options.multiple, localizer_); + year_month_day ymd = GetFlooredYmd(arg, options.multiple, + options, localizer_); ymd += months{options.multiple}; t = localizer_.ConvertDays(ymd.year() / ymd.month() / 1).time_since_epoch(); break; } case compute::CalendarUnit::QUARTER: { - year_month_day ymd = - GetFlooredYmd(arg, 3 * options.multiple, localizer_); + year_month_day ymd = GetFlooredYmd(arg, 3 * options.multiple, + options, localizer_); ymd += months{3 * options.multiple}; t = localizer_.ConvertDays(ymd.year() / ymd.month() / 1).time_since_epoch(); break; @@ -890,51 +974,49 @@ struct FloorTemporal { Duration t; switch (options.unit) { case compute::CalendarUnit::NANOSECOND: - t = FloorTimePoint( - arg, options.multiple, localizer_, st); + t = FloorTimePoint(arg, options, + localizer_, st); break; case compute::CalendarUnit::MICROSECOND: t = FloorTimePoint( - arg, options.multiple, localizer_, st); + arg, options, localizer_, st); break; case compute::CalendarUnit::MILLISECOND: t = FloorTimePoint( - arg, options.multiple, localizer_, st); + arg, options, localizer_, st); break; case compute::CalendarUnit::SECOND: - t = FloorTimePoint( - arg, options.multiple, localizer_, st); + t = FloorTimePoint(arg, options, + localizer_, st); break; case compute::CalendarUnit::MINUTE: - t = FloorTimePoint(arg, options.multiple, - localizer_, st); + t = FloorTimePoint(arg, options, localizer_, st); break; case compute::CalendarUnit::HOUR: - t = FloorTimePoint(arg, options.multiple, + t = FloorTimePoint(arg, options, localizer_, st); break; case compute::CalendarUnit::DAY: - t = FloorTimePoint(arg, options.multiple, localizer_, - st); + t = FloorTimePoint(arg, options, localizer_, st); break; case compute::CalendarUnit::WEEK: if (options.week_starts_monday) { - t = FloorWeekTimePoint(arg, options.multiple, localizer_, - days{3}, st); + t = FloorWeekTimePoint(arg, options, localizer_, days{3}, + st); } else { - t = FloorWeekTimePoint(arg, options.multiple, localizer_, - days{4}, st); + t = FloorWeekTimePoint(arg, options, localizer_, days{4}, + st); } break; case compute::CalendarUnit::MONTH: { - year_month_day ymd = - GetFlooredYmd(arg, options.multiple, localizer_); + year_month_day ymd = GetFlooredYmd(arg, options.multiple, + options, localizer_); t = localizer_.ConvertDays(ymd.year() / ymd.month() / 1).time_since_epoch(); break; } case compute::CalendarUnit::QUARTER: { - year_month_day ymd = - GetFlooredYmd(arg, 3 * options.multiple, localizer_); + year_month_day ymd = GetFlooredYmd(arg, 3 * options.multiple, + options, localizer_); t = localizer_.ConvertDays(ymd.year() / ymd.month() / 1).time_since_epoch(); break; } @@ -965,46 +1047,44 @@ struct RoundTemporal { Duration t; switch (options.unit) { case compute::CalendarUnit::NANOSECOND: - t = RoundTimePoint( - arg, options.multiple, localizer_, st); + t = RoundTimePoint(arg, options, + localizer_, st); break; case compute::CalendarUnit::MICROSECOND: t = RoundTimePoint( - arg, options.multiple, localizer_, st); + arg, options, localizer_, st); break; case compute::CalendarUnit::MILLISECOND: t = RoundTimePoint( - arg, options.multiple, localizer_, st); + arg, options, localizer_, st); break; case compute::CalendarUnit::SECOND: - t = RoundTimePoint( - arg, options.multiple, localizer_, st); + t = RoundTimePoint(arg, options, + localizer_, st); break; case compute::CalendarUnit::MINUTE: - t = RoundTimePoint(arg, options.multiple, - localizer_, st); + t = RoundTimePoint(arg, options, localizer_, st); break; case compute::CalendarUnit::HOUR: - t = RoundTimePoint(arg, options.multiple, + t = RoundTimePoint(arg, options, localizer_, st); break; case compute::CalendarUnit::DAY: - t = RoundTimePoint(arg, options.multiple, localizer_, - st); + t = RoundTimePoint(arg, options, localizer_, st); break; case compute::CalendarUnit::WEEK: if (options.week_starts_monday) { - t = RoundWeekTimePoint(arg, options.multiple, localizer_, - days{3}, st); + t = RoundWeekTimePoint(arg, options, localizer_, days{3}, + st); } else { - t = RoundWeekTimePoint(arg, options.multiple, localizer_, - days{4}, st); + t = RoundWeekTimePoint(arg, options, localizer_, days{4}, + st); } break; case compute::CalendarUnit::MONTH: { auto t0 = localizer_.template ConvertTimePoint(arg); - year_month_day ymd = - GetFlooredYmd(arg, options.multiple, localizer_); + year_month_day ymd = GetFlooredYmd(arg, options.multiple, + options, localizer_); auto f = localizer_.ConvertDays(ymd.year() / ymd.month() / 1); ymd += months{options.multiple}; @@ -1015,8 +1095,8 @@ struct RoundTemporal { } case compute::CalendarUnit::QUARTER: { auto t0 = localizer_.template ConvertTimePoint(arg); - year_month_day ymd = - GetFlooredYmd(arg, 3 * options.multiple, localizer_); + year_month_day ymd = GetFlooredYmd(arg, 3 * options.multiple, + options, localizer_); auto f = localizer_.ConvertDays(ymd.year() / ymd.month() / 1); ymd += months{3 * options.multiple}; diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index 96da505f763..c3988844e58 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -882,11 +882,13 @@ cdef CCalendarUnit unwrap_round_temporal_unit(unit) except *: cdef class _RoundTemporalOptions(FunctionOptions): - def _set_options(self, multiple, unit, week_starts_monday): + def _set_options(self, multiple, unit, week_starts_monday, + change_on_boundary, calendar_based_origin): self.wrapped.reset( new CRoundTemporalOptions( multiple, unwrap_round_temporal_unit(unit), - week_starts_monday) + week_starts_monday, change_on_boundary, + calendar_based_origin) ) @@ -905,10 +907,20 @@ class RoundTemporalOptions(_RoundTemporalOptions): "nanosecond". week_starts_monday : bool, default True If True, weeks start on Monday; if False, on Sunday. + change_on_boundary : bool, default False + If True times exactly on unit multiple boundary will be rounded + one unit multiple up. This applies for ceiling only. + calendar_based_origin : bool, default False + By default origin is 1970-01-01T00:00:00. By setting this to True, + rounding origin will be beginning of one less precise calendar unit. + E.g.: rounding to hours will use beginning of day as origin. + """ - def __init__(self, multiple=1, unit="day", week_starts_monday=True): - self._set_options(multiple, unit, week_starts_monday) + def __init__(self, multiple=1, unit="day", week_starts_monday=True, + change_on_boundary=False, calendar_based_origin=False): + self._set_options(multiple, unit, week_starts_monday, + change_on_boundary, calendar_based_origin) cdef class _RoundToMultipleOptions(FunctionOptions): diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 2e51864b860..6c47f177840 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1971,10 +1971,14 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: cdef cppclass CRoundTemporalOptions \ "arrow::compute::RoundTemporalOptions"(CFunctionOptions): CRoundTemporalOptions(int multiple, CCalendarUnit unit, - c_bool week_starts_monday) + c_bool week_starts_monday, + c_bool change_on_boundary, + c_bool calendar_based_origin) int multiple CCalendarUnit unit c_bool week_starts_monday + c_bool change_on_boundary + c_bool calendar_based_origin cdef cppclass CRoundToMultipleOptions \ "arrow::compute::RoundToMultipleOptions"(CFunctionOptions): diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 45282a28678..1d9464b0e09 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -2085,6 +2085,19 @@ def _check_temporal_rounding(ts, values, unit): expected = ts.dt.round(frequency) np.testing.assert_array_equal(result, expected) + # TODO: should work for day + if ta.type.tz is None and unit != "day": + options = pc.RoundTemporalOptions( + value, unit, change_on_boundary=True) + result = pc.ceil_temporal(ta, options=options) + expected = ts.dt.ceil(frequency) + + expected = np.where( + expected == ts, + expected + pd.Timedelta(value, unit_shorthand[unit]), + expected) + np.testing.assert_array_equal(result, expected) + # TODO: We should test on windows once ARROW-13168 is resolved. @pytest.mark.skipif(sys.platform == 'win32', @@ -2095,9 +2108,8 @@ def _check_temporal_rounding(ts, values, unit): def test_round_temporal(unit): from pyarrow.vendored.version import Version - if Version(pd.__version__) < Version('1.0.0') and \ - unit in ("nanosecond", "microsecond"): - pytest.skip('Pandas < 1.0 rounds zoned small units differently.') + if Version(pd.__version__) < Version('1.0.0'): + pytest.skip('Pandas < 1.0 rounds differently.') values = (1, 2, 3, 4, 5, 6, 7, 10, 15, 24, 60, 250, 500, 750) timestamps = [ @@ -2111,6 +2123,7 @@ def test_round_temporal(unit): "1967-02-26 05:56:46.922376960", "1975-11-01 10:55:37.016146432", "1982-01-21 18:43:44.517366784", + "1992-01-01T00:00:00.100000000", "1999-12-04 05:55:34.794991104", "2026-10-26 08:39:00.316686848"] ts = pd.Series([pd.Timestamp(x, unit="ns") for x in timestamps]) From c4ab6e23251bb315d9b9c4af3cfa2a3b3959d2b6 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Thu, 21 Apr 2022 02:08:02 +0200 Subject: [PATCH 02/19] Apply suggestions from code review Co-authored-by: Antoine Pitrou --- cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc | 2 +- python/pyarrow/_compute.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc index ab1b03bb9a8..291c2759b5b 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc @@ -690,7 +690,7 @@ struct IsDaylightSavings { template year_month_day GetFlooredYmd(int64_t arg, const int multiple, - const RoundTemporalOptions options, Localizer localizer_) { + const RoundTemporalOptions& options, Localizer localizer_) { year_month_day ymd{floor(localizer_.template ConvertTimePoint(arg))}; if (multiple == 1) { diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index c3988844e58..e53545830fc 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -917,7 +917,7 @@ class RoundTemporalOptions(_RoundTemporalOptions): """ - def __init__(self, multiple=1, unit="day", week_starts_monday=True, + def __init__(self, multiple=1, unit="day", *, week_starts_monday=True, change_on_boundary=False, calendar_based_origin=False): self._set_options(multiple, unit, week_starts_monday, change_on_boundary, calendar_based_origin) From 88c8646371f95c6e3fa87f2db16a36032a5177cd Mon Sep 17 00:00:00 2001 From: Rok Date: Thu, 21 Apr 2022 15:14:12 +0200 Subject: [PATCH 03/19] Review feedback. --- cpp/src/arrow/compute/api_scalar.cc | 7 +- cpp/src/arrow/compute/api_scalar.h | 5 +- .../compute/kernels/scalar_temporal_unary.cc | 114 ++++++++++-------- python/pyarrow/_compute.pyx | 10 +- python/pyarrow/includes/libarrow.pxd | 4 +- python/pyarrow/tests/test_compute.py | 7 +- 6 files changed, 76 insertions(+), 71 deletions(-) diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc index bcc0837367f..f9adf95ac66 100644 --- a/cpp/src/arrow/compute/api_scalar.cc +++ b/cpp/src/arrow/compute/api_scalar.cc @@ -333,7 +333,7 @@ static auto kRoundTemporalOptionsType = GetFunctionOptionsType( DataMember("multiple", &RoundToMultipleOptions::multiple), @@ -493,14 +493,13 @@ RoundOptions::RoundOptions(int64_t ndigits, RoundMode round_mode) constexpr char RoundOptions::kTypeName[]; RoundTemporalOptions::RoundTemporalOptions(int multiple, CalendarUnit unit, - bool week_starts_monday, - bool change_on_boundary, + bool week_starts_monday, bool strict_ceil, bool calendar_based_origin) : FunctionOptions(internal::kRoundTemporalOptionsType), multiple(std::move(multiple)), unit(unit), week_starts_monday(week_starts_monday), - change_on_boundary(change_on_boundary), + strict_ceil(strict_ceil), calendar_based_origin(calendar_based_origin) {} constexpr char RoundTemporalOptions::kTypeName[]; diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 368e7ecb557..c2ead9ad9cf 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -107,8 +107,7 @@ enum class CalendarUnit : int8_t { class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions { public: explicit RoundTemporalOptions(int multiple = 1, CalendarUnit unit = CalendarUnit::DAY, - bool week_starts_monday = true, - bool change_on_boundary = false, + bool week_starts_monday = true, bool strict_ceil = false, bool calendar_based_origin = false); static constexpr char const kTypeName[] = "RoundTemporalOptions"; static RoundTemporalOptions Defaults() { return RoundTemporalOptions(); } @@ -121,7 +120,7 @@ class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions { bool week_starts_monday; /// Times exactly on unit multiple boundary will be rounded one unit multiple up. /// This applies for ceiling only. - bool change_on_boundary; + bool strict_ceil; /// By default origin is 1970-01-01T00:00:00. By setting this to true, rounding origin /// will be beginning of one less precise calendar unit. E.g. rounding to hours will use /// beginning of day as origin. diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc index 291c2759b5b..fe3baf5911f 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc @@ -694,8 +694,15 @@ year_month_day GetFlooredYmd(int64_t arg, const int multiple, year_month_day ymd{floor(localizer_.template ConvertTimePoint(arg))}; if (multiple == 1) { + // Round to a multiple of months since epoch start (1970-01-01 00:00:00). return year_month_day(ymd.year() / ymd.month() / 1); } else if (options.calendar_based_origin) { + // Round to a multiple of months since the last year. + // + // Note: compute::CalendarUnit::YEAR is the greatest unit so there is no logical time + // point to use as origin. compute::CalendarUnit::DAY is covered by FloorTimePoint. + // Therefore compute::CalendarUnit::YEAR and compute::CalendarUnit::DAY are not + // covered here. switch (options.unit) { case compute::CalendarUnit::MONTH: { const auto m = @@ -711,6 +718,7 @@ year_month_day GetFlooredYmd(int64_t arg, const int multiple, return ymd; } } else { + // Round to month * options.multiple since epoch start (1970-01-01 00:00:00). int32_t total_months_origin = 1970 * 12; int32_t total_months = static_cast(ymd.year()) * 12 + static_cast(static_cast(ymd.month())) - 1 - @@ -721,73 +729,69 @@ year_month_day GetFlooredYmd(int64_t arg, const int multiple, } else { total_months = (total_months - multiple + 1) / multiple * multiple; } - return year{1970} / jan / 1 + months{total_months}; + return year_month_day(year{1970} / jan / 1) + months{total_months}; } } template -const Duration FloorTimePoint(const int64_t arg, const RoundTemporalOptions options, +const Duration FloorTimePoint(const int64_t arg, const RoundTemporalOptions& options, Localizer localizer_, Status* st) { const auto t = localizer_.template ConvertTimePoint(arg); if (options.multiple == 1) { + // Round to a multiple of unit since epoch start (1970-01-01 00:00:00). const Unit d = floor(t).time_since_epoch(); return localizer_.template ConvertLocalToSys(duration_cast(d), st); } else if (options.calendar_based_origin) { + // Round to a multiple of units since the last greater unit. + // For example: round to multiple of days since the beginning of the month or + // to hours since the beginning of the day. const Unit unit = Unit{options.multiple}; + Duration origin; switch (options.unit) { - case compute::CalendarUnit::DAY: { - const auto origin = - localizer_.ConvertDays(year_month_day(floor(t)).year() / - year_month_day(floor(t)).month() / 1); - const auto m = (floor(t) - origin) / unit * unit + origin; - return localizer_.template ConvertLocalToSys( - duration_cast(m.time_since_epoch()), st); - } - case compute::CalendarUnit::HOUR: { - const auto origin = localizer_.ConvertDays(year_month_day(floor(t))); - const auto m = (t - origin) / unit * unit + origin; - return localizer_.template ConvertLocalToSys( - duration_cast(m.time_since_epoch()), st); - } - case compute::CalendarUnit::MINUTE: { - const auto origin = floor(t); - const auto m = (t - origin) / unit * unit + origin; - return localizer_.template ConvertLocalToSys( - duration_cast(m.time_since_epoch()), st); - } - case compute::CalendarUnit::SECOND: { - const auto origin = floor(t); - const auto m = (t - origin) / unit * unit + origin; - return localizer_.template ConvertLocalToSys( - duration_cast(m.time_since_epoch()), st); - } - case compute::CalendarUnit::MILLISECOND: { - const auto origin = floor(t); - const auto m = (t - origin) / unit * unit + origin; - return localizer_.template ConvertLocalToSys( - duration_cast(m.time_since_epoch()), st); - } - case compute::CalendarUnit::MICROSECOND: { - const auto origin = floor(t); - const auto m = (t - origin) / unit * unit + origin; - return localizer_.template ConvertLocalToSys( - duration_cast(m.time_since_epoch()), st); - } - case compute::CalendarUnit::NANOSECOND: { - const auto origin = floor(t); - const auto m = (t - origin) / unit * unit + origin; - return localizer_.template ConvertLocalToSys( - duration_cast(m.time_since_epoch()), st); - } + case compute::CalendarUnit::DAY: + origin = duration_cast( + localizer_ + .ConvertDays(year_month_day(floor(t)).year() / + year_month_day(floor(t)).month() / 1) + .time_since_epoch()); + break; + case compute::CalendarUnit::HOUR: + origin = duration_cast( + localizer_.ConvertDays(year_month_day(floor(t))).time_since_epoch()); + break; + case compute::CalendarUnit::MINUTE: + origin = duration_cast(floor(t).time_since_epoch()); + break; + case compute::CalendarUnit::SECOND: + origin = + duration_cast(floor(t).time_since_epoch()); + break; + case compute::CalendarUnit::MILLISECOND: + origin = + duration_cast(floor(t).time_since_epoch()); + break; + case compute::CalendarUnit::MICROSECOND: + origin = duration_cast( + floor(t).time_since_epoch()); + break; + case compute::CalendarUnit::NANOSECOND: + origin = duration_cast( + floor(t).time_since_epoch()); + break; default: { *st = Status::Invalid("Cannot floor to ", &options.unit); return Duration{0}; } } + const Duration m = + duration_cast(((t - origin).time_since_epoch() / unit * unit + origin)); + return localizer_.template ConvertLocalToSys(m, st); } else { + // Round to a multiple of units * options.multiple since epoch start + // (1970-01-01 00:00:00). const Unit d = floor(t).time_since_epoch(); const Unit unit = Unit{options.multiple}; const Unit m = @@ -798,17 +802,19 @@ const Duration FloorTimePoint(const int64_t arg, const RoundTemporalOptions opti } template -const Duration FloorWeekTimePoint(const int64_t arg, const RoundTemporalOptions options, +const Duration FloorWeekTimePoint(const int64_t arg, const RoundTemporalOptions& options, Localizer localizer_, const Duration weekday_offset, Status* st) { const auto t = localizer_.template ConvertTimePoint(arg) + weekday_offset; const weeks d = floor(t).time_since_epoch(); if (options.multiple == 1) { + // Round to a multiple of weeks since epoch start (1970-01-01 00:00:00). return localizer_.template ConvertLocalToSys(duration_cast(d), st) - weekday_offset; } else if (options.calendar_based_origin) { + // Round to a multiple of weeks since year prior. weekday wd_; if (options.week_starts_monday) { wd_ = thu; @@ -822,6 +828,8 @@ const Duration FloorWeekTimePoint(const int64_t arg, const RoundTemporalOptions const auto m = (t - start) / unit * unit + start; return localizer_.template ConvertLocalToSys(m.time_since_epoch(), st); } else { + // Round to a multiple of weeks * options.multiple since epoch start + // (1970-01-01 00:00:00). const weeks unit = weeks{options.multiple}; const weeks m = (d.count() >= 0) ? d / unit * unit : (d - unit + weeks{1}) / unit * unit; @@ -832,7 +840,7 @@ const Duration FloorWeekTimePoint(const int64_t arg, const RoundTemporalOptions } template -Duration CeilTimePoint(const int64_t arg, const RoundTemporalOptions options, +Duration CeilTimePoint(const int64_t arg, const RoundTemporalOptions& options, Localizer localizer_, Status* st) { const Duration f = FloorTimePoint(arg, options, localizer_, st); @@ -841,7 +849,7 @@ Duration CeilTimePoint(const int64_t arg, const RoundTemporalOptions options, const Duration cs = localizer_.template ConvertLocalToSys(duration_cast(cl), st); - if (options.change_on_boundary || cs < Duration{arg}) { + if (options.strict_ceil || cs < Duration{arg}) { return localizer_.template ConvertLocalToSys( duration_cast(cl + duration_cast(Unit{options.multiple})), st); @@ -850,7 +858,7 @@ Duration CeilTimePoint(const int64_t arg, const RoundTemporalOptions options, } template -Duration CeilWeekTimePoint(const int64_t arg, const RoundTemporalOptions options, +Duration CeilWeekTimePoint(const int64_t arg, const RoundTemporalOptions& options, Localizer localizer_, const Duration weekday_offset, Status* st) { const Duration f = FloorWeekTimePoint(arg, options, localizer_, @@ -859,7 +867,7 @@ Duration CeilWeekTimePoint(const int64_t arg, const RoundTemporalOptions options localizer_.template ConvertTimePoint(f.count()).time_since_epoch(); const Duration cs = localizer_.template ConvertLocalToSys(duration_cast(cl), st); - if (options.change_on_boundary || cs < Duration{arg}) { + if (options.strict_ceil || cs < Duration{arg}) { return localizer_.template ConvertLocalToSys( duration_cast(cl + duration_cast(weeks{options.multiple})), st); @@ -868,7 +876,7 @@ Duration CeilWeekTimePoint(const int64_t arg, const RoundTemporalOptions options } template -Duration RoundTimePoint(const int64_t arg, const RoundTemporalOptions options, +Duration RoundTimePoint(const int64_t arg, const RoundTemporalOptions& options, Localizer localizer_, Status* st) { const Duration f = FloorTimePoint(arg, options, localizer_, st); @@ -878,7 +886,7 @@ Duration RoundTimePoint(const int64_t arg, const RoundTemporalOptions options, } template -Duration RoundWeekTimePoint(const int64_t arg, const RoundTemporalOptions options, +Duration RoundWeekTimePoint(const int64_t arg, const RoundTemporalOptions& options, Localizer localizer_, const Duration weekday_offset, Status* st) { const Duration f = FloorWeekTimePoint(arg, options, localizer_, diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index e53545830fc..42085d8a109 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -883,11 +883,11 @@ cdef CCalendarUnit unwrap_round_temporal_unit(unit) except *: cdef class _RoundTemporalOptions(FunctionOptions): def _set_options(self, multiple, unit, week_starts_monday, - change_on_boundary, calendar_based_origin): + strict_ceil, calendar_based_origin): self.wrapped.reset( new CRoundTemporalOptions( multiple, unwrap_round_temporal_unit(unit), - week_starts_monday, change_on_boundary, + week_starts_monday, strict_ceil, calendar_based_origin) ) @@ -907,7 +907,7 @@ class RoundTemporalOptions(_RoundTemporalOptions): "nanosecond". week_starts_monday : bool, default True If True, weeks start on Monday; if False, on Sunday. - change_on_boundary : bool, default False + strict_ceil : bool, default False If True times exactly on unit multiple boundary will be rounded one unit multiple up. This applies for ceiling only. calendar_based_origin : bool, default False @@ -918,9 +918,9 @@ class RoundTemporalOptions(_RoundTemporalOptions): """ def __init__(self, multiple=1, unit="day", *, week_starts_monday=True, - change_on_boundary=False, calendar_based_origin=False): + strict_ceil=False, calendar_based_origin=False): self._set_options(multiple, unit, week_starts_monday, - change_on_boundary, calendar_based_origin) + strict_ceil, calendar_based_origin) cdef class _RoundToMultipleOptions(FunctionOptions): diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 6c47f177840..615e4646428 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1972,12 +1972,12 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: "arrow::compute::RoundTemporalOptions"(CFunctionOptions): CRoundTemporalOptions(int multiple, CCalendarUnit unit, c_bool week_starts_monday, - c_bool change_on_boundary, + c_bool strict_ceil, c_bool calendar_based_origin) int multiple CCalendarUnit unit c_bool week_starts_monday - c_bool change_on_boundary + c_bool strict_ceil c_bool calendar_based_origin cdef cppclass CRoundToMultipleOptions \ diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 1d9464b0e09..66abebf4641 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -152,7 +152,7 @@ def test_option_class_equality(): pc.ReplaceSliceOptions(0, 1, "a"), pc.ReplaceSubstringOptions("a", "b"), pc.RoundOptions(2, "towards_infinity"), - pc.RoundTemporalOptions(1, "second", True), + pc.RoundTemporalOptions(1, "second", week_starts_monday=True), pc.RoundToMultipleOptions(100, "towards_infinity"), pc.ScalarAggregateOptions(), pc.SelectKOptions(0, sort_keys=[("b", "ascending")]), @@ -2085,10 +2085,9 @@ def _check_temporal_rounding(ts, values, unit): expected = ts.dt.round(frequency) np.testing.assert_array_equal(result, expected) - # TODO: should work for day if ta.type.tz is None and unit != "day": options = pc.RoundTemporalOptions( - value, unit, change_on_boundary=True) + value, unit, strict_ceil=True) result = pc.ceil_temporal(ta, options=options) expected = ts.dt.ceil(frequency) @@ -2123,7 +2122,7 @@ def test_round_temporal(unit): "1967-02-26 05:56:46.922376960", "1975-11-01 10:55:37.016146432", "1982-01-21 18:43:44.517366784", - "1992-01-01T00:00:00.100000000", + "1992-01-01 00:00:00.100000000", "1999-12-04 05:55:34.794991104", "2026-10-26 08:39:00.316686848"] ts = pd.Series([pd.Timestamp(x, unit="ns") for x in timestamps]) From 29fdbf2d1c40206643cd866b95527893ba2cec79 Mon Sep 17 00:00:00 2001 From: Rok Date: Thu, 28 Apr 2022 16:50:43 +0200 Subject: [PATCH 04/19] Review feedback. --- cpp/src/arrow/compute/kernels/scalar_temporal_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 6aa2649acd1..47ec2cf932d 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -2229,7 +2229,7 @@ TEST_F(ScalarTemporalTest, TestCeilTemporal) { CheckScalarUnary(op, unit, times, unit, ceil_15_years, &round_to_15_years); } -TEST_F(ScalarTemporalTest, TestCeilTemporalChangeOnBoundary) { +TEST_F(ScalarTemporalTest, TestCeilTemporalStrictCeil) { std::string op = "ceil_temporal"; RoundTemporalOptions round_to_1_nanoseconds = RoundTemporalOptions(1, CalendarUnit::NANOSECOND, true, true, false); From c240ebb5bb0147dabee6cfd4a5487f8b00f78368 Mon Sep 17 00:00:00 2001 From: Rok Date: Fri, 6 May 2022 14:28:29 +0200 Subject: [PATCH 05/19] Review feedback --- cpp/src/arrow/compute/api_scalar.cc | 7 ++++--- cpp/src/arrow/compute/api_scalar.h | 14 +++++++++----- .../compute/kernels/scalar_temporal_test.cc | 6 +++--- .../compute/kernels/scalar_temporal_unary.cc | 6 +++--- python/pyarrow/_compute.pyx | 16 +++++++++++++--- python/pyarrow/includes/libarrow.pxd | 4 ++-- 6 files changed, 34 insertions(+), 19 deletions(-) diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc index f9adf95ac66..5fc2afa3b72 100644 --- a/cpp/src/arrow/compute/api_scalar.cc +++ b/cpp/src/arrow/compute/api_scalar.cc @@ -334,7 +334,8 @@ static auto kRoundTemporalOptionsType = GetFunctionOptionsType( DataMember("multiple", &RoundToMultipleOptions::multiple), DataMember("round_mode", &RoundToMultipleOptions::round_mode)); @@ -494,13 +495,13 @@ constexpr char RoundOptions::kTypeName[]; RoundTemporalOptions::RoundTemporalOptions(int multiple, CalendarUnit unit, bool week_starts_monday, bool strict_ceil, - bool calendar_based_origin) + bool multiple_since_greater_unit) : FunctionOptions(internal::kRoundTemporalOptionsType), multiple(std::move(multiple)), unit(unit), week_starts_monday(week_starts_monday), strict_ceil(strict_ceil), - calendar_based_origin(calendar_based_origin) {} + multiple_since_greater_unit(multiple_since_greater_unit) {} constexpr char RoundTemporalOptions::kTypeName[]; RoundToMultipleOptions::RoundToMultipleOptions(double multiple, RoundMode round_mode) diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index c2ead9ad9cf..f2ff09b3237 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -108,7 +108,7 @@ class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions { public: explicit RoundTemporalOptions(int multiple = 1, CalendarUnit unit = CalendarUnit::DAY, bool week_starts_monday = true, bool strict_ceil = false, - bool calendar_based_origin = false); + bool multiple_since_greater_unit = false); static constexpr char const kTypeName[] = "RoundTemporalOptions"; static RoundTemporalOptions Defaults() { return RoundTemporalOptions(); } @@ -121,10 +121,14 @@ class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions { /// Times exactly on unit multiple boundary will be rounded one unit multiple up. /// This applies for ceiling only. bool strict_ceil; - /// By default origin is 1970-01-01T00:00:00. By setting this to true, rounding origin - /// will be beginning of one less precise calendar unit. E.g. rounding to hours will use - /// beginning of day as origin. - bool calendar_based_origin; + /// By default time is rounded to a multiple of units since 1970-01-01T00:00:00. + /// By setting multiple_since_greater_unit to true, time will be rounded to number + /// of units since the last greater calendar unit. + /// For example: rounding to multiple of days since the beginning of the month or + /// to hours since the beginning of the day. + /// Please note: week and quarter are not used as greater units, therefor days will + /// will be rounded to the beginning of the month not week. + bool multiple_since_greater_unit; }; class ARROW_EXPORT RoundToMultipleOptions : public FunctionOptions { diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 47ec2cf932d..38f47a558ce 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -2468,7 +2468,7 @@ TEST_F(ScalarTemporalTest, TestCeilTemporalStrictCeil) { CheckScalarUnary(op, unit, times, unit, ceil_15_years, &round_to_15_years); } -TEST_F(ScalarTemporalTest, TestCeilTemporalCalendarBasedOrigin) { +TEST_F(ScalarTemporalTest, TestCeilTemporalMultipleSinceGreaterUnit) { std::string op = "ceil_temporal"; RoundTemporalOptions round_to_15_nanoseconds = RoundTemporalOptions(15, CalendarUnit::NANOSECOND, true, true, true); @@ -2781,7 +2781,7 @@ TEST_F(ScalarTemporalTest, TestFloorTemporal) { CheckScalarUnary(op, unit, times, unit, floor_15_years, &round_to_15_years); } -TEST_F(ScalarTemporalTest, TestFloorTemporalCalendarBasedOrigin) { +TEST_F(ScalarTemporalTest, TestFloorTemporalMultipleSinceGreaterUnit) { std::string op = "floor_temporal"; RoundTemporalOptions round_to_15_nanoseconds = RoundTemporalOptions(15, CalendarUnit::NANOSECOND, true, true, true); @@ -3117,7 +3117,7 @@ TEST_F(ScalarTemporalTest, TestCeilFloorRoundTemporalBrussels) { CheckScalarUnary("round_temporal", unit, times, unit, round_2_hours, &round_to_2_hours); } -TEST_F(ScalarTemporalTest, TestRoundTemporalCalendarBasedOrigin) { +TEST_F(ScalarTemporalTest, TestRoundTemporalMultipleSinceGreaterUnit) { std::string op = "round_temporal"; RoundTemporalOptions round_to_15_nanoseconds = RoundTemporalOptions(15, CalendarUnit::NANOSECOND, true, true, true); diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc index fe3baf5911f..44cbb9f5e32 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc @@ -696,7 +696,7 @@ year_month_day GetFlooredYmd(int64_t arg, const int multiple, if (multiple == 1) { // Round to a multiple of months since epoch start (1970-01-01 00:00:00). return year_month_day(ymd.year() / ymd.month() / 1); - } else if (options.calendar_based_origin) { + } else if (options.multiple_since_greater_unit) { // Round to a multiple of months since the last year. // // Note: compute::CalendarUnit::YEAR is the greatest unit so there is no logical time @@ -743,7 +743,7 @@ const Duration FloorTimePoint(const int64_t arg, const RoundTemporalOptions& opt const Unit d = floor(t).time_since_epoch(); return localizer_.template ConvertLocalToSys(duration_cast(d), st); - } else if (options.calendar_based_origin) { + } else if (options.multiple_since_greater_unit) { // Round to a multiple of units since the last greater unit. // For example: round to multiple of days since the beginning of the month or // to hours since the beginning of the day. @@ -813,7 +813,7 @@ const Duration FloorWeekTimePoint(const int64_t arg, const RoundTemporalOptions& return localizer_.template ConvertLocalToSys(duration_cast(d), st) - weekday_offset; - } else if (options.calendar_based_origin) { + } else if (options.multiple_since_greater_unit) { // Round to a multiple of weeks since year prior. weekday wd_; if (options.week_starts_monday) { diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index 42085d8a109..6bbf546b809 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -910,17 +910,27 @@ class RoundTemporalOptions(_RoundTemporalOptions): strict_ceil : bool, default False If True times exactly on unit multiple boundary will be rounded one unit multiple up. This applies for ceiling only. - calendar_based_origin : bool, default False + multiple_since_greater_unit : bool, default False By default origin is 1970-01-01T00:00:00. By setting this to True, rounding origin will be beginning of one less precise calendar unit. E.g.: rounding to hours will use beginning of day as origin. + By default time is rounded to a multiple of units since + 1970-01-01T00:00:00. By setting multiple_since_greater_unit to true, + time will be rounded to number of units since the last greater + calendar unit. + For example: rounding to multiple of days since the beginning of the + month or to hours since the beginning of the day. + Please note: week and quarter are not used as greater units, + therefor days will will be rounded to the beginning of the month not + week. + """ def __init__(self, multiple=1, unit="day", *, week_starts_monday=True, - strict_ceil=False, calendar_based_origin=False): + strict_ceil=False, multiple_since_greater_unit=False): self._set_options(multiple, unit, week_starts_monday, - strict_ceil, calendar_based_origin) + strict_ceil, multiple_since_greater_unit) cdef class _RoundToMultipleOptions(FunctionOptions): diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 615e4646428..7bb5c1280d6 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1973,12 +1973,12 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: CRoundTemporalOptions(int multiple, CCalendarUnit unit, c_bool week_starts_monday, c_bool strict_ceil, - c_bool calendar_based_origin) + c_bool multiple_since_greater_unit) int multiple CCalendarUnit unit c_bool week_starts_monday c_bool strict_ceil - c_bool calendar_based_origin + c_bool multiple_since_greater_unit cdef cppclass CRoundToMultipleOptions \ "arrow::compute::RoundToMultipleOptions"(CFunctionOptions): From 4d816b764d176eb962c7daa696303c8ee38049de Mon Sep 17 00:00:00 2001 From: Rok Date: Fri, 6 May 2022 19:49:18 +0200 Subject: [PATCH 06/19] strict_ceil to ceil_on_boundary --- cpp/src/arrow/compute/api_scalar.cc | 6 +++--- cpp/src/arrow/compute/api_scalar.h | 12 +++++++----- .../compute/kernels/scalar_temporal_unary.cc | 4 ++-- python/pyarrow/_compute.pyx | 16 ++++++++-------- python/pyarrow/includes/libarrow.pxd | 4 ++-- python/pyarrow/tests/test_compute.py | 2 +- 6 files changed, 23 insertions(+), 21 deletions(-) diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc index 5fc2afa3b72..6166bf23698 100644 --- a/cpp/src/arrow/compute/api_scalar.cc +++ b/cpp/src/arrow/compute/api_scalar.cc @@ -333,7 +333,7 @@ static auto kRoundTemporalOptionsType = GetFunctionOptionsType( @@ -494,13 +494,13 @@ RoundOptions::RoundOptions(int64_t ndigits, RoundMode round_mode) constexpr char RoundOptions::kTypeName[]; RoundTemporalOptions::RoundTemporalOptions(int multiple, CalendarUnit unit, - bool week_starts_monday, bool strict_ceil, + bool week_starts_monday, bool ceil_on_boundary, bool multiple_since_greater_unit) : FunctionOptions(internal::kRoundTemporalOptionsType), multiple(std::move(multiple)), unit(unit), week_starts_monday(week_starts_monday), - strict_ceil(strict_ceil), + ceil_on_boundary(ceil_on_boundary), multiple_since_greater_unit(multiple_since_greater_unit) {} constexpr char RoundTemporalOptions::kTypeName[]; diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index f2ff09b3237..134ab0a8f46 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -107,7 +107,8 @@ enum class CalendarUnit : int8_t { class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions { public: explicit RoundTemporalOptions(int multiple = 1, CalendarUnit unit = CalendarUnit::DAY, - bool week_starts_monday = true, bool strict_ceil = false, + bool week_starts_monday = true, + bool ceil_on_boundary = false, bool multiple_since_greater_unit = false); static constexpr char const kTypeName[] = "RoundTemporalOptions"; static RoundTemporalOptions Defaults() { return RoundTemporalOptions(); } @@ -118,16 +119,17 @@ class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions { CalendarUnit unit; /// What day does the week start with (Monday=true, Sunday=false) bool week_starts_monday; - /// Times exactly on unit multiple boundary will be rounded one unit multiple up. + /// If True times exactly on unit multiple boundary will be rounded up one unit. /// This applies for ceiling only. - bool strict_ceil; + bool ceil_on_boundary; /// By default time is rounded to a multiple of units since 1970-01-01T00:00:00. /// By setting multiple_since_greater_unit to true, time will be rounded to number /// of units since the last greater calendar unit. /// For example: rounding to multiple of days since the beginning of the month or /// to hours since the beginning of the day. - /// Please note: week and quarter are not used as greater units, therefor days will - /// will be rounded to the beginning of the month not week. + /// Exceptions: week and quarter are not used as greater units, therefor days will + /// will be rounded to the beginning of the month not week. Greater unit of week + /// is year. bool multiple_since_greater_unit; }; diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc index 44cbb9f5e32..69955b1b24b 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc @@ -849,7 +849,7 @@ Duration CeilTimePoint(const int64_t arg, const RoundTemporalOptions& options, const Duration cs = localizer_.template ConvertLocalToSys(duration_cast(cl), st); - if (options.strict_ceil || cs < Duration{arg}) { + if (options.ceil_on_boundary || cs < Duration{arg}) { return localizer_.template ConvertLocalToSys( duration_cast(cl + duration_cast(Unit{options.multiple})), st); @@ -867,7 +867,7 @@ Duration CeilWeekTimePoint(const int64_t arg, const RoundTemporalOptions& option localizer_.template ConvertTimePoint(f.count()).time_since_epoch(); const Duration cs = localizer_.template ConvertLocalToSys(duration_cast(cl), st); - if (options.strict_ceil || cs < Duration{arg}) { + if (options.ceil_on_boundary || cs < Duration{arg}) { return localizer_.template ConvertLocalToSys( duration_cast(cl + duration_cast(weeks{options.multiple})), st); diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index 6bbf546b809..0dd35fa82b4 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -883,11 +883,11 @@ cdef CCalendarUnit unwrap_round_temporal_unit(unit) except *: cdef class _RoundTemporalOptions(FunctionOptions): def _set_options(self, multiple, unit, week_starts_monday, - strict_ceil, calendar_based_origin): + ceil_on_boundary, calendar_based_origin): self.wrapped.reset( new CRoundTemporalOptions( multiple, unwrap_round_temporal_unit(unit), - week_starts_monday, strict_ceil, + week_starts_monday, ceil_on_boundary, calendar_based_origin) ) @@ -907,9 +907,9 @@ class RoundTemporalOptions(_RoundTemporalOptions): "nanosecond". week_starts_monday : bool, default True If True, weeks start on Monday; if False, on Sunday. - strict_ceil : bool, default False + ceil_on_boundary : bool, default False If True times exactly on unit multiple boundary will be rounded - one unit multiple up. This applies for ceiling only. + up one unit. This applies for ceiling only. multiple_since_greater_unit : bool, default False By default origin is 1970-01-01T00:00:00. By setting this to True, rounding origin will be beginning of one less precise calendar unit. @@ -921,16 +921,16 @@ class RoundTemporalOptions(_RoundTemporalOptions): calendar unit. For example: rounding to multiple of days since the beginning of the month or to hours since the beginning of the day. - Please note: week and quarter are not used as greater units, + Exceptions: week and quarter are not used as greater units, therefor days will will be rounded to the beginning of the month not - week. + week. Greater unit of week is a year. """ def __init__(self, multiple=1, unit="day", *, week_starts_monday=True, - strict_ceil=False, multiple_since_greater_unit=False): + ceil_on_boundary=False, multiple_since_greater_unit=False): self._set_options(multiple, unit, week_starts_monday, - strict_ceil, multiple_since_greater_unit) + ceil_on_boundary, multiple_since_greater_unit) cdef class _RoundToMultipleOptions(FunctionOptions): diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 7bb5c1280d6..6ed21df3d04 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1972,12 +1972,12 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: "arrow::compute::RoundTemporalOptions"(CFunctionOptions): CRoundTemporalOptions(int multiple, CCalendarUnit unit, c_bool week_starts_monday, - c_bool strict_ceil, + c_bool ceil_on_boundary, c_bool multiple_since_greater_unit) int multiple CCalendarUnit unit c_bool week_starts_monday - c_bool strict_ceil + c_bool ceil_on_boundary c_bool multiple_since_greater_unit cdef cppclass CRoundToMultipleOptions \ diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 66abebf4641..d9cd2ed170b 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -2087,7 +2087,7 @@ def _check_temporal_rounding(ts, values, unit): if ta.type.tz is None and unit != "day": options = pc.RoundTemporalOptions( - value, unit, strict_ceil=True) + value, unit, ceil_on_boundary=True) result = pc.ceil_temporal(ta, options=options) expected = ts.dt.ceil(frequency) From 4160a40fbab9d400e69f454f06a2450b3345be67 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 10 May 2022 15:32:51 +0200 Subject: [PATCH 07/19] Apply suggestions from code review Co-authored-by: Joris Van den Bossche --- cpp/src/arrow/compute/api_scalar.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 134ab0a8f46..8b886d97b3c 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -125,9 +125,9 @@ class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions { /// By default time is rounded to a multiple of units since 1970-01-01T00:00:00. /// By setting multiple_since_greater_unit to true, time will be rounded to number /// of units since the last greater calendar unit. - /// For example: rounding to multiple of days since the beginning of the month or + /// For example: rounding to a multiple of days since the beginning of the month or /// to hours since the beginning of the day. - /// Exceptions: week and quarter are not used as greater units, therefor days will + /// Exceptions: week and quarter are not used as greater units, therefore days will /// will be rounded to the beginning of the month not week. Greater unit of week /// is year. bool multiple_since_greater_unit; From 177b233dd72ae6af6b5b66e10dbd389c93a1d1fc Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 10 May 2022 15:33:09 +0200 Subject: [PATCH 08/19] Apply suggestions from code review Co-authored-by: Joris Van den Bossche --- cpp/src/arrow/compute/api_scalar.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 8b886d97b3c..8e3d0d373b5 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -123,7 +123,7 @@ class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions { /// This applies for ceiling only. bool ceil_on_boundary; /// By default time is rounded to a multiple of units since 1970-01-01T00:00:00. - /// By setting multiple_since_greater_unit to true, time will be rounded to number + /// By setting multiple_since_greater_unit to true, time will be rounded to a number /// of units since the last greater calendar unit. /// For example: rounding to a multiple of days since the beginning of the month or /// to hours since the beginning of the day. From 9fe7a06c84c0a4b79d2ed2b1033cfdf8a4360799 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 10 May 2022 15:33:09 +0200 Subject: [PATCH 09/19] Apply suggestions from code review Co-authored-by: Joris Van den Bossche --- cpp/src/arrow/compute/api_scalar.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 8e3d0d373b5..271395cafc0 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -119,7 +119,9 @@ class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions { CalendarUnit unit; /// What day does the week start with (Monday=true, Sunday=false) bool week_starts_monday; - /// If True times exactly on unit multiple boundary will be rounded up one unit. + /// Enable this flag to return a rounded value that is strictly greater than the input. + /// For example: ceiling 1970-01-01T00:00:00 to 3 hours would yield 1970-01-01T03:00:00 + /// if set to true and 1970-01-01T00:00:00 if set to false. /// This applies for ceiling only. bool ceil_on_boundary; /// By default time is rounded to a multiple of units since 1970-01-01T00:00:00. From e9f36fb1da80869c4e4eb4a24ce0745ad2ca15e9 Mon Sep 17 00:00:00 2001 From: Rok Date: Wed, 11 May 2022 20:05:15 +0200 Subject: [PATCH 10/19] ceil_on_boundary -> ceil_is_strictly_greater --- cpp/src/arrow/compute/api_scalar.cc | 8 +++++--- cpp/src/arrow/compute/api_scalar.h | 4 ++-- .../arrow/compute/kernels/scalar_temporal_unary.cc | 4 ++-- python/pyarrow/_compute.pyx | 12 +++++++----- python/pyarrow/includes/libarrow.pxd | 4 ++-- python/pyarrow/tests/test_compute.py | 2 +- 6 files changed, 19 insertions(+), 15 deletions(-) diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc index 6166bf23698..4b27ca06a31 100644 --- a/cpp/src/arrow/compute/api_scalar.cc +++ b/cpp/src/arrow/compute/api_scalar.cc @@ -333,7 +333,8 @@ static auto kRoundTemporalOptionsType = GetFunctionOptionsType( @@ -494,13 +495,14 @@ RoundOptions::RoundOptions(int64_t ndigits, RoundMode round_mode) constexpr char RoundOptions::kTypeName[]; RoundTemporalOptions::RoundTemporalOptions(int multiple, CalendarUnit unit, - bool week_starts_monday, bool ceil_on_boundary, + bool week_starts_monday, + bool ceil_is_strictly_greater, bool multiple_since_greater_unit) : FunctionOptions(internal::kRoundTemporalOptionsType), multiple(std::move(multiple)), unit(unit), week_starts_monday(week_starts_monday), - ceil_on_boundary(ceil_on_boundary), + ceil_is_strictly_greater(ceil_is_strictly_greater), multiple_since_greater_unit(multiple_since_greater_unit) {} constexpr char RoundTemporalOptions::kTypeName[]; diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 271395cafc0..beebdd2a410 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -108,7 +108,7 @@ class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions { public: explicit RoundTemporalOptions(int multiple = 1, CalendarUnit unit = CalendarUnit::DAY, bool week_starts_monday = true, - bool ceil_on_boundary = false, + bool ceil_is_strictly_greater = false, bool multiple_since_greater_unit = false); static constexpr char const kTypeName[] = "RoundTemporalOptions"; static RoundTemporalOptions Defaults() { return RoundTemporalOptions(); } @@ -123,7 +123,7 @@ class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions { /// For example: ceiling 1970-01-01T00:00:00 to 3 hours would yield 1970-01-01T03:00:00 /// if set to true and 1970-01-01T00:00:00 if set to false. /// This applies for ceiling only. - bool ceil_on_boundary; + bool ceil_is_strictly_greater; /// By default time is rounded to a multiple of units since 1970-01-01T00:00:00. /// By setting multiple_since_greater_unit to true, time will be rounded to a number /// of units since the last greater calendar unit. diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc index 69955b1b24b..919e30f3ccd 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc @@ -849,7 +849,7 @@ Duration CeilTimePoint(const int64_t arg, const RoundTemporalOptions& options, const Duration cs = localizer_.template ConvertLocalToSys(duration_cast(cl), st); - if (options.ceil_on_boundary || cs < Duration{arg}) { + if (options.ceil_is_strictly_greater || cs < Duration{arg}) { return localizer_.template ConvertLocalToSys( duration_cast(cl + duration_cast(Unit{options.multiple})), st); @@ -867,7 +867,7 @@ Duration CeilWeekTimePoint(const int64_t arg, const RoundTemporalOptions& option localizer_.template ConvertTimePoint(f.count()).time_since_epoch(); const Duration cs = localizer_.template ConvertLocalToSys(duration_cast(cl), st); - if (options.ceil_on_boundary || cs < Duration{arg}) { + if (options.ceil_is_strictly_greater || cs < Duration{arg}) { return localizer_.template ConvertLocalToSys( duration_cast(cl + duration_cast(weeks{options.multiple})), st); diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index 0dd35fa82b4..d15cf385200 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -883,11 +883,11 @@ cdef CCalendarUnit unwrap_round_temporal_unit(unit) except *: cdef class _RoundTemporalOptions(FunctionOptions): def _set_options(self, multiple, unit, week_starts_monday, - ceil_on_boundary, calendar_based_origin): + ceil_is_strictly_greater, calendar_based_origin): self.wrapped.reset( new CRoundTemporalOptions( multiple, unwrap_round_temporal_unit(unit), - week_starts_monday, ceil_on_boundary, + week_starts_monday, ceil_is_strictly_greater, calendar_based_origin) ) @@ -907,7 +907,7 @@ class RoundTemporalOptions(_RoundTemporalOptions): "nanosecond". week_starts_monday : bool, default True If True, weeks start on Monday; if False, on Sunday. - ceil_on_boundary : bool, default False + ceil_is_strictly_greater : bool, default False If True times exactly on unit multiple boundary will be rounded up one unit. This applies for ceiling only. multiple_since_greater_unit : bool, default False @@ -928,9 +928,11 @@ class RoundTemporalOptions(_RoundTemporalOptions): """ def __init__(self, multiple=1, unit="day", *, week_starts_monday=True, - ceil_on_boundary=False, multiple_since_greater_unit=False): + ceil_is_strictly_greater=False, + multiple_since_greater_unit=False): self._set_options(multiple, unit, week_starts_monday, - ceil_on_boundary, multiple_since_greater_unit) + ceil_is_strictly_greater, + multiple_since_greater_unit) cdef class _RoundToMultipleOptions(FunctionOptions): diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 6ed21df3d04..92fb51aabd1 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1972,12 +1972,12 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: "arrow::compute::RoundTemporalOptions"(CFunctionOptions): CRoundTemporalOptions(int multiple, CCalendarUnit unit, c_bool week_starts_monday, - c_bool ceil_on_boundary, + c_bool ceil_is_strictly_greater, c_bool multiple_since_greater_unit) int multiple CCalendarUnit unit c_bool week_starts_monday - c_bool ceil_on_boundary + c_bool ceil_is_strictly_greater c_bool multiple_since_greater_unit cdef cppclass CRoundToMultipleOptions \ diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index d9cd2ed170b..2e2efb0a3c1 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -2087,7 +2087,7 @@ def _check_temporal_rounding(ts, values, unit): if ta.type.tz is None and unit != "day": options = pc.RoundTemporalOptions( - value, unit, ceil_on_boundary=True) + value, unit, ceil_is_strictly_greater=True) result = pc.ceil_temporal(ta, options=options) expected = ts.dt.ceil(frequency) From af2a5e396c043cfac03078925802ff15850a4824 Mon Sep 17 00:00:00 2001 From: Rok Date: Thu, 12 May 2022 14:55:40 +0200 Subject: [PATCH 11/19] Review feedback. --- python/pyarrow/_compute.pyx | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index d15cf385200..4ee6b4d20ba 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -908,8 +908,11 @@ class RoundTemporalOptions(_RoundTemporalOptions): week_starts_monday : bool, default True If True, weeks start on Monday; if False, on Sunday. ceil_is_strictly_greater : bool, default False - If True times exactly on unit multiple boundary will be rounded - up one unit. This applies for ceiling only. + If True return a rounded value that is strictly greater than the + input. For example: ceiling 1970-01-01T00:00:00 to 3 hours would + yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00 + if set to False. + This applies for ceiling only. multiple_since_greater_unit : bool, default False By default origin is 1970-01-01T00:00:00. By setting this to True, rounding origin will be beginning of one less precise calendar unit. From c5c7cc0dbed3dafbfdb20de35783ef2308669aff Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 16 May 2022 18:30:04 +0200 Subject: [PATCH 12/19] Update python/pyarrow/_compute.pyx Co-authored-by: Antoine Pitrou --- python/pyarrow/_compute.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index 4ee6b4d20ba..0ae68215b01 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -925,7 +925,7 @@ class RoundTemporalOptions(_RoundTemporalOptions): For example: rounding to multiple of days since the beginning of the month or to hours since the beginning of the day. Exceptions: week and quarter are not used as greater units, - therefor days will will be rounded to the beginning of the month not + therefore days will be rounded to the beginning of the month not week. Greater unit of week is a year. """ From b2ff7a2597e732b5e0caecba3ca5624643557f41 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 16 May 2022 18:30:04 +0200 Subject: [PATCH 13/19] Update python/pyarrow/_compute.pyx Co-authored-by: Antoine Pitrou --- .../compute/kernels/scalar_temporal_test.cc | 14 ++--- python/pyarrow/tests/test_compute.py | 53 ++++++++++++++----- 2 files changed, 48 insertions(+), 19 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 38f47a558ce..4cf9a1b04c9 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -3137,8 +3137,8 @@ TEST_F(ScalarTemporalTest, TestRoundTemporalMultipleSinceGreaterUnit) { RoundTemporalOptions(15, CalendarUnit::WEEK, true, true, true); RoundTemporalOptions round_to_15_weeks_sunday = RoundTemporalOptions(15, CalendarUnit::WEEK, false, true, true); - RoundTemporalOptions round_to_15_months = - RoundTemporalOptions(15, CalendarUnit::MONTH, true, true, true); + RoundTemporalOptions round_to_5_months = + RoundTemporalOptions(5, CalendarUnit::MONTH, true, true, true); RoundTemporalOptions round_to_15_quarters = RoundTemporalOptions(15, CalendarUnit::QUARTER, true, true, true); RoundTemporalOptions round_to_15_years = @@ -3207,11 +3207,11 @@ TEST_F(ScalarTemporalTest, TestRoundTemporalMultipleSinceGreaterUnit) { "2019-12-29", "2019-12-29", "2019-12-29", "2010-01-03", "2010-01-03", "2010-01-03", "2010-01-03", "2006-01-01", "2006-01-01", "2009-01-04", "2009-01-04", "2012-01-01", null])"; - const char* round_15_months = - R"(["1970-01-01", "2000-01-01", "1899-01-01", "2033-01-01", - "2020-01-01", "2020-04-01", "2020-04-01", "2010-04-01", + const char* round_5_months = + R"(["1970-01-01", "2000-01-01", "1899-01-01", "2033-06-01", + "2020-01-01", "2019-11-01", "2019-11-01", "2009-11-01", "2010-01-01", "2010-01-01", "2010-01-01", "2006-01-01", - "2006-04-01", "2009-04-01", "2009-04-01", "2012-01-01", null])"; + "2005-11-01", "2008-11-01", "2008-11-01", "2012-01-01", null])"; const char* round_15_quarters = R"(["1970-01-01", "2000-01-01", "1899-01-01", "2033-01-01", "2020-01-01", "2019-01-01", "2019-01-01", "2009-01-01", @@ -3235,7 +3235,7 @@ TEST_F(ScalarTemporalTest, TestRoundTemporalMultipleSinceGreaterUnit) { CheckScalarUnary(op, unit, times, unit, round_15_weeks, &round_to_15_weeks); CheckScalarUnary(op, unit, times, unit, round_15_weeks_sunday, &round_to_15_weeks_sunday); - CheckScalarUnary(op, unit, times, unit, round_15_months, &round_to_15_months); + CheckScalarUnary(op, unit, times, unit, round_5_months, &round_to_5_months); CheckScalarUnary(op, unit, times, unit, round_15_quarters, &round_to_15_quarters); CheckScalarUnary(op, unit, times, unit, round_15_years, &round_to_15_years); } diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 2e2efb0a3c1..1e5676e2dc8 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -2037,6 +2037,14 @@ def _check_temporal_rounding(ts, values, unit): "hour": "H", "day": "D" } + greater_unit = { + "nanosecond": "us", + "microsecond": "ms", + "millisecond": "s", + "second": "min", + "minute": "H", + "hour": "d", + } ta = pa.array(ts) for value in values: @@ -2055,6 +2063,27 @@ def _check_temporal_rounding(ts, values, unit): expected = ts.dt.round(frequency) np.testing.assert_array_equal(result, expected) + # Check rounding with multiple_since_greater_unit=True. + # Note: rounding to month is not supported in Pandas so we can't + # approximate this functionallity and exclude unit == "day". + if unit != "day": + options = pc.RoundTemporalOptions( + value, unit, multiple_since_greater_unit=True) + origin = ts.dt.floor(greater_unit[unit]) + + if ta.type.tz is None: + result = pc.ceil_temporal(ta, options=options).to_pandas() + expected = (ts - origin).dt.ceil(frequency) + origin + np.testing.assert_array_equal(result, expected) + + result = pc.floor_temporal(ta, options=options).to_pandas() + expected = (ts - origin).dt.floor(frequency) + origin + np.testing.assert_array_equal(result, expected) + + result = pc.round_temporal(ta, options=options).to_pandas() + expected = (ts - origin).dt.round(frequency) + origin + np.testing.assert_array_equal(result, expected) + # Check RoundTemporalOptions partial defaults if unit == "day": result = pc.ceil_temporal(ta, multiple=value).to_pandas() @@ -2069,6 +2098,18 @@ def _check_temporal_rounding(ts, values, unit): expected = ts.dt.round(frequency) np.testing.assert_array_equal(result, expected) + if ta.type.tz is None: + options = pc.RoundTemporalOptions( + value, unit, ceil_is_strictly_greater=True) + result = pc.ceil_temporal(ta, options=options) + expected = ts.dt.ceil(frequency) + + expected = np.where( + expected == ts, + expected + pd.Timedelta(value, unit_shorthand[unit]), + expected) + np.testing.assert_array_equal(result, expected) + # Check RoundTemporalOptions defaults if unit == "day": frequency = "1D" @@ -2085,18 +2126,6 @@ def _check_temporal_rounding(ts, values, unit): expected = ts.dt.round(frequency) np.testing.assert_array_equal(result, expected) - if ta.type.tz is None and unit != "day": - options = pc.RoundTemporalOptions( - value, unit, ceil_is_strictly_greater=True) - result = pc.ceil_temporal(ta, options=options) - expected = ts.dt.ceil(frequency) - - expected = np.where( - expected == ts, - expected + pd.Timedelta(value, unit_shorthand[unit]), - expected) - np.testing.assert_array_equal(result, expected) - # TODO: We should test on windows once ARROW-13168 is resolved. @pytest.mark.skipif(sys.platform == 'win32', From 65807b76075bd3eaa4ef2c2603e96337785e40da Mon Sep 17 00:00:00 2001 From: Rok Date: Tue, 17 May 2022 16:04:35 +0200 Subject: [PATCH 14/19] review feedback --- cpp/src/arrow/compute/kernels/scalar_temporal_test.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 4cf9a1b04c9..66d37c172fb 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -3144,6 +3144,9 @@ TEST_F(ScalarTemporalTest, TestRoundTemporalMultipleSinceGreaterUnit) { RoundTemporalOptions round_to_15_years = RoundTemporalOptions(15, CalendarUnit::YEAR, true, true, true); + // Data for tests below was generaed via lubridate with the exception + // of week data because lubridate currently does not support rounding to + // multiple of week. const char* round_15_nanosecond = R"(["1970-01-01 00:00:59.123456795", "2000-02-29 23:23:24.000000005", "1899-01-01 00:59:20.001001000", "2033-05-18 03:33:20.000000000", From 167236db4ae45e287e7fd113ecfa6558632eeac0 Mon Sep 17 00:00:00 2001 From: Rok Date: Tue, 17 May 2022 16:53:26 +0200 Subject: [PATCH 15/19] multiple_since_greater_unit->calendar_based_origin --- cpp/src/arrow/compute/api_scalar.cc | 7 +++---- cpp/src/arrow/compute/api_scalar.h | 6 +++--- cpp/src/arrow/compute/kernels/scalar_temporal_test.cc | 6 ++++++ cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc | 6 +++--- python/pyarrow/_compute.pyx | 8 ++++---- python/pyarrow/includes/libarrow.pxd | 4 ++-- python/pyarrow/tests/test_compute.py | 4 ++-- 7 files changed, 23 insertions(+), 18 deletions(-) diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc index 4b27ca06a31..cd5b4ce7997 100644 --- a/cpp/src/arrow/compute/api_scalar.cc +++ b/cpp/src/arrow/compute/api_scalar.cc @@ -335,8 +335,7 @@ static auto kRoundTemporalOptionsType = GetFunctionOptionsType( DataMember("multiple", &RoundToMultipleOptions::multiple), DataMember("round_mode", &RoundToMultipleOptions::round_mode)); @@ -497,13 +496,13 @@ constexpr char RoundOptions::kTypeName[]; RoundTemporalOptions::RoundTemporalOptions(int multiple, CalendarUnit unit, bool week_starts_monday, bool ceil_is_strictly_greater, - bool multiple_since_greater_unit) + bool calendar_based_origin) : FunctionOptions(internal::kRoundTemporalOptionsType), multiple(std::move(multiple)), unit(unit), week_starts_monday(week_starts_monday), ceil_is_strictly_greater(ceil_is_strictly_greater), - multiple_since_greater_unit(multiple_since_greater_unit) {} + calendar_based_origin(calendar_based_origin) {} constexpr char RoundTemporalOptions::kTypeName[]; RoundToMultipleOptions::RoundToMultipleOptions(double multiple, RoundMode round_mode) diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index beebdd2a410..8c2aa039226 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -109,7 +109,7 @@ class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions { explicit RoundTemporalOptions(int multiple = 1, CalendarUnit unit = CalendarUnit::DAY, bool week_starts_monday = true, bool ceil_is_strictly_greater = false, - bool multiple_since_greater_unit = false); + bool calendar_based_origin = false); static constexpr char const kTypeName[] = "RoundTemporalOptions"; static RoundTemporalOptions Defaults() { return RoundTemporalOptions(); } @@ -125,14 +125,14 @@ class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions { /// This applies for ceiling only. bool ceil_is_strictly_greater; /// By default time is rounded to a multiple of units since 1970-01-01T00:00:00. - /// By setting multiple_since_greater_unit to true, time will be rounded to a number + /// By setting calendar_based_origin to true, time will be rounded to a number /// of units since the last greater calendar unit. /// For example: rounding to a multiple of days since the beginning of the month or /// to hours since the beginning of the day. /// Exceptions: week and quarter are not used as greater units, therefore days will /// will be rounded to the beginning of the month not week. Greater unit of week /// is year. - bool multiple_since_greater_unit; + bool calendar_based_origin; }; class ARROW_EXPORT RoundToMultipleOptions : public FunctionOptions { diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 66d37c172fb..45bd7819c4e 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -2495,6 +2495,9 @@ TEST_F(ScalarTemporalTest, TestCeilTemporalMultipleSinceGreaterUnit) { RoundTemporalOptions round_to_15_years = RoundTemporalOptions(15, CalendarUnit::YEAR, true, true, true); + // Data for tests below was generaed via lubridate with the exception + // of week data because lubridate currently does not support rounding to + // multiple of week. const char* ceil_15_nanosecond = R"(["1970-01-01 00:00:59.123456795", "2000-02-29 23:23:24.000000005", "1899-01-01 00:59:20.001001015", "2033-05-18 03:33:20.000000015", @@ -2808,6 +2811,9 @@ TEST_F(ScalarTemporalTest, TestFloorTemporalMultipleSinceGreaterUnit) { RoundTemporalOptions round_to_15_years = RoundTemporalOptions(15, CalendarUnit::YEAR, true, true, true); + // Data for tests below was generaed via lubridate with the exception + // of week data because lubridate currently does not support rounding to + // multiple of week. const char* floor_15_nanosecond = R"(["1970-01-01 00:00:59.123456780", "2000-02-29 23:23:23.999999990", "1899-01-01 00:59:20.001001000", "2033-05-18 03:33:20.000000000", diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc index 919e30f3ccd..6275de94818 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc @@ -696,7 +696,7 @@ year_month_day GetFlooredYmd(int64_t arg, const int multiple, if (multiple == 1) { // Round to a multiple of months since epoch start (1970-01-01 00:00:00). return year_month_day(ymd.year() / ymd.month() / 1); - } else if (options.multiple_since_greater_unit) { + } else if (options.calendar_based_origin) { // Round to a multiple of months since the last year. // // Note: compute::CalendarUnit::YEAR is the greatest unit so there is no logical time @@ -743,7 +743,7 @@ const Duration FloorTimePoint(const int64_t arg, const RoundTemporalOptions& opt const Unit d = floor(t).time_since_epoch(); return localizer_.template ConvertLocalToSys(duration_cast(d), st); - } else if (options.multiple_since_greater_unit) { + } else if (options.calendar_based_origin) { // Round to a multiple of units since the last greater unit. // For example: round to multiple of days since the beginning of the month or // to hours since the beginning of the day. @@ -813,7 +813,7 @@ const Duration FloorWeekTimePoint(const int64_t arg, const RoundTemporalOptions& return localizer_.template ConvertLocalToSys(duration_cast(d), st) - weekday_offset; - } else if (options.multiple_since_greater_unit) { + } else if (options.calendar_based_origin) { // Round to a multiple of weeks since year prior. weekday wd_; if (options.week_starts_monday) { diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index 0ae68215b01..fefdc319ea3 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -913,13 +913,13 @@ class RoundTemporalOptions(_RoundTemporalOptions): yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00 if set to False. This applies for ceiling only. - multiple_since_greater_unit : bool, default False + calendar_based_origin : bool, default False By default origin is 1970-01-01T00:00:00. By setting this to True, rounding origin will be beginning of one less precise calendar unit. E.g.: rounding to hours will use beginning of day as origin. By default time is rounded to a multiple of units since - 1970-01-01T00:00:00. By setting multiple_since_greater_unit to true, + 1970-01-01T00:00:00. By setting calendar_based_origin to true, time will be rounded to number of units since the last greater calendar unit. For example: rounding to multiple of days since the beginning of the @@ -932,10 +932,10 @@ class RoundTemporalOptions(_RoundTemporalOptions): def __init__(self, multiple=1, unit="day", *, week_starts_monday=True, ceil_is_strictly_greater=False, - multiple_since_greater_unit=False): + calendar_based_origin=False): self._set_options(multiple, unit, week_starts_monday, ceil_is_strictly_greater, - multiple_since_greater_unit) + calendar_based_origin) cdef class _RoundToMultipleOptions(FunctionOptions): diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 92fb51aabd1..9d023e78212 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1973,12 +1973,12 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: CRoundTemporalOptions(int multiple, CCalendarUnit unit, c_bool week_starts_monday, c_bool ceil_is_strictly_greater, - c_bool multiple_since_greater_unit) + c_bool calendar_based_origin) int multiple CCalendarUnit unit c_bool week_starts_monday c_bool ceil_is_strictly_greater - c_bool multiple_since_greater_unit + c_bool calendar_based_origin cdef cppclass CRoundToMultipleOptions \ "arrow::compute::RoundToMultipleOptions"(CFunctionOptions): diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 1e5676e2dc8..c1a73fb2fce 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -2063,12 +2063,12 @@ def _check_temporal_rounding(ts, values, unit): expected = ts.dt.round(frequency) np.testing.assert_array_equal(result, expected) - # Check rounding with multiple_since_greater_unit=True. + # Check rounding with calendar_based_origin=True. # Note: rounding to month is not supported in Pandas so we can't # approximate this functionallity and exclude unit == "day". if unit != "day": options = pc.RoundTemporalOptions( - value, unit, multiple_since_greater_unit=True) + value, unit, calendar_based_origin=True) origin = ts.dt.floor(greater_unit[unit]) if ta.type.tz is None: From b2f92269338054100b704a93fd5ea60060cc1f96 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 17 May 2022 17:04:22 +0200 Subject: [PATCH 16/19] Update python/pyarrow/_compute.pyx Co-authored-by: Joris Van den Bossche --- python/pyarrow/_compute.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index fefdc319ea3..94e94df68ab 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -914,7 +914,7 @@ class RoundTemporalOptions(_RoundTemporalOptions): if set to False. This applies for ceiling only. calendar_based_origin : bool, default False - By default origin is 1970-01-01T00:00:00. By setting this to True, + By default, the origin is 1970-01-01T00:00:00. By setting this to True, rounding origin will be beginning of one less precise calendar unit. E.g.: rounding to hours will use beginning of day as origin. From 38696a6b893a3fc6ff0e5623652ea7ede3b1cddd Mon Sep 17 00:00:00 2001 From: Rok Date: Tue, 17 May 2022 17:04:52 +0200 Subject: [PATCH 17/19] review feedback --- python/pyarrow/tests/test_compute.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index c1a73fb2fce..6b733794ee8 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -2098,6 +2098,10 @@ def _check_temporal_rounding(ts, values, unit): expected = ts.dt.round(frequency) np.testing.assert_array_equal(result, expected) + # We naively test ceil_is_strictly_greater by adding time unit multiple + # to regular ceiled timestamp if it is equal to the original timestamp. + # This does not work if timestamp is zoned since our logic will not + # account for DST jumps. if ta.type.tz is None: options = pc.RoundTemporalOptions( value, unit, ceil_is_strictly_greater=True) From 422b0229004d7c9b6659c7c15f5645ed2188b9b9 Mon Sep 17 00:00:00 2001 From: Rok Date: Mon, 30 May 2022 21:58:36 +0200 Subject: [PATCH 18/19] Comments for RoundTemporalOptions.calendar_based_origin --- cpp/src/arrow/compute/api_scalar.h | 5 +++++ python/pyarrow/_compute.pyx | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 8c2aa039226..9fb7a942105 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -132,6 +132,11 @@ class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions { /// Exceptions: week and quarter are not used as greater units, therefore days will /// will be rounded to the beginning of the month not week. Greater unit of week /// is year. + /// Note that ceiling and rounding might change sorting order of an array near greater + /// unit change. For example rounding YYYY-mm-dd 23:00:00 to 5 hours will ceil and + /// round to YYYY-mm-dd+1 01:00:00 and floor to YYYY-mm-dd 20:00:00. On the other hand + /// YYYY-mm-dd+1 00:00:00 will ceil, round and floor to YYYY-mm-dd+1 00:00:00. This + /// can break the order of an already ordered array. bool calendar_based_origin; }; diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index 94e94df68ab..3623d22c6b1 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -927,6 +927,12 @@ class RoundTemporalOptions(_RoundTemporalOptions): Exceptions: week and quarter are not used as greater units, therefore days will be rounded to the beginning of the month not week. Greater unit of week is a year. + Note that ceiling and rounding might change sorting order of an array + near greater unit change. For example rounding YYYY-mm-dd 23:00:00 to + 5 hours will ceil and round to YYYY-mm-dd+1 01:00:00 and floor to + YYYY-mm-dd 20:00:00. On the other hand YYYY-mm-dd+1 00:00:00 will + ceil, round and floor to YYYY-mm-dd+1 00:00:00. This can break the + order of an already ordered array. """ From de82528c370c3c0055cda648f94201df3c2b9694 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 31 May 2022 14:13:10 +0200 Subject: [PATCH 19/19] Apply suggestions from code review Co-authored-by: Antoine Pitrou --- python/pyarrow/_compute.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index 3623d22c6b1..78bb31b5f9c 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -908,11 +908,11 @@ class RoundTemporalOptions(_RoundTemporalOptions): week_starts_monday : bool, default True If True, weeks start on Monday; if False, on Sunday. ceil_is_strictly_greater : bool, default False - If True return a rounded value that is strictly greater than the + If True, ceil returns a rounded value that is strictly greater than the input. For example: ceiling 1970-01-01T00:00:00 to 3 hours would yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00 if set to False. - This applies for ceiling only. + This applies to the ceil_temporal function only. calendar_based_origin : bool, default False By default, the origin is 1970-01-01T00:00:00. By setting this to True, rounding origin will be beginning of one less precise calendar unit.