diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc index a9e2565a3ea..cd5b4ce7997 100644 --- a/cpp/src/arrow/compute/api_scalar.cc +++ b/cpp/src/arrow/compute/api_scalar.cc @@ -332,7 +332,10 @@ static auto kRoundOptionsType = GetFunctionOptionsType( static auto kRoundTemporalOptionsType = GetFunctionOptionsType( DataMember("multiple", &RoundTemporalOptions::multiple), DataMember("unit", &RoundTemporalOptions::unit), - DataMember("week_starts_monday", &RoundTemporalOptions::week_starts_monday)); + DataMember("week_starts_monday", &RoundTemporalOptions::week_starts_monday), + DataMember("ceil_is_strictly_greater", + &RoundTemporalOptions::ceil_is_strictly_greater), + DataMember("calendar_based_origin", &RoundTemporalOptions::calendar_based_origin)); static auto kRoundToMultipleOptionsType = GetFunctionOptionsType( DataMember("multiple", &RoundToMultipleOptions::multiple), DataMember("round_mode", &RoundToMultipleOptions::round_mode)); @@ -491,11 +494,15 @@ RoundOptions::RoundOptions(int64_t ndigits, RoundMode round_mode) constexpr char RoundOptions::kTypeName[]; RoundTemporalOptions::RoundTemporalOptions(int multiple, CalendarUnit unit, - bool week_starts_monday) + bool week_starts_monday, + bool ceil_is_strictly_greater, + bool calendar_based_origin) : FunctionOptions(internal::kRoundTemporalOptionsType), multiple(std::move(multiple)), unit(unit), - week_starts_monday(week_starts_monday) {} + week_starts_monday(week_starts_monday), + ceil_is_strictly_greater(ceil_is_strictly_greater), + calendar_based_origin(calendar_based_origin) {} constexpr char RoundTemporalOptions::kTypeName[]; RoundToMultipleOptions::RoundToMultipleOptions(double multiple, RoundMode round_mode) diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 0af591acfa5..9fb7a942105 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -107,7 +107,9 @@ enum class CalendarUnit : int8_t { class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions { public: explicit RoundTemporalOptions(int multiple = 1, CalendarUnit unit = CalendarUnit::DAY, - bool week_starts_monday = true); + bool week_starts_monday = true, + bool ceil_is_strictly_greater = false, + bool calendar_based_origin = false); static constexpr char const kTypeName[] = "RoundTemporalOptions"; static RoundTemporalOptions Defaults() { return RoundTemporalOptions(); } @@ -117,6 +119,25 @@ class ARROW_EXPORT RoundTemporalOptions : public FunctionOptions { CalendarUnit unit; /// What day does the week start with (Monday=true, Sunday=false) bool week_starts_monday; + /// Enable this flag to return a rounded value that is strictly greater than the input. + /// For example: ceiling 1970-01-01T00:00:00 to 3 hours would yield 1970-01-01T03:00:00 + /// if set to true and 1970-01-01T00:00:00 if set to false. + /// This applies for ceiling only. + bool ceil_is_strictly_greater; + /// By default time is rounded to a multiple of units since 1970-01-01T00:00:00. + /// By setting calendar_based_origin to true, time will be rounded to a number + /// of units since the last greater calendar unit. + /// For example: rounding to a multiple of days since the beginning of the month or + /// to hours since the beginning of the day. + /// Exceptions: week and quarter are not used as greater units, therefore days will + /// will be rounded to the beginning of the month not week. Greater unit of week + /// is year. + /// Note that ceiling and rounding might change sorting order of an array near greater + /// unit change. For example rounding YYYY-mm-dd 23:00:00 to 5 hours will ceil and + /// round to YYYY-mm-dd+1 01:00:00 and floor to YYYY-mm-dd 20:00:00. On the other hand + /// YYYY-mm-dd+1 00:00:00 will ceil, round and floor to YYYY-mm-dd+1 00:00:00. This + /// can break the order of an already ordered array. + bool calendar_based_origin; }; class ARROW_EXPORT RoundToMultipleOptions : public FunctionOptions { diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index a52d69c36c7..45bd7819c4e 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -2229,6 +2229,370 @@ TEST_F(ScalarTemporalTest, TestCeilTemporal) { CheckScalarUnary(op, unit, times, unit, ceil_15_years, &round_to_15_years); } +TEST_F(ScalarTemporalTest, TestCeilTemporalStrictCeil) { + std::string op = "ceil_temporal"; + RoundTemporalOptions round_to_1_nanoseconds = + RoundTemporalOptions(1, CalendarUnit::NANOSECOND, true, true, false); + RoundTemporalOptions round_to_1_microseconds = + RoundTemporalOptions(1, CalendarUnit::MICROSECOND, true, true, false); + RoundTemporalOptions round_to_1_milliseconds = + RoundTemporalOptions(1, CalendarUnit::MILLISECOND, true, true, false); + RoundTemporalOptions round_to_1_seconds = + RoundTemporalOptions(1, CalendarUnit::SECOND, true, true, false); + RoundTemporalOptions round_to_1_minutes = + RoundTemporalOptions(1, CalendarUnit::MINUTE, true, true, false); + RoundTemporalOptions round_to_1_hours = + RoundTemporalOptions(1, CalendarUnit::HOUR, true, true, false); + RoundTemporalOptions round_to_1_days = + RoundTemporalOptions(1, CalendarUnit::DAY, true, true, false); + RoundTemporalOptions round_to_1_weeks = + RoundTemporalOptions(1, CalendarUnit::WEEK, true, true, false); + RoundTemporalOptions round_to_1_weeks_sunday = + RoundTemporalOptions(1, CalendarUnit::WEEK, false, true, false); + RoundTemporalOptions round_to_1_months = + RoundTemporalOptions(1, CalendarUnit::MONTH, true, true, false); + RoundTemporalOptions round_to_1_quarters = + RoundTemporalOptions(1, CalendarUnit::QUARTER, true, true, false); + RoundTemporalOptions round_to_1_years = + RoundTemporalOptions(1, CalendarUnit::YEAR, true, true, false); + + RoundTemporalOptions round_to_15_nanoseconds = + RoundTemporalOptions(15, CalendarUnit::NANOSECOND, true, true, false); + RoundTemporalOptions round_to_15_microseconds = + RoundTemporalOptions(15, CalendarUnit::MICROSECOND, true, true, false); + RoundTemporalOptions round_to_15_milliseconds = + RoundTemporalOptions(15, CalendarUnit::MILLISECOND, true, true, false); + RoundTemporalOptions round_to_13_seconds = + RoundTemporalOptions(13, CalendarUnit::SECOND, true, true, false); + RoundTemporalOptions round_to_13_minutes = + RoundTemporalOptions(13, CalendarUnit::MINUTE, true, true, false); + RoundTemporalOptions round_to_15_hours = + RoundTemporalOptions(15, CalendarUnit::HOUR, true, true, false); + RoundTemporalOptions round_to_15_days = + RoundTemporalOptions(15, CalendarUnit::DAY, true, true, false); + RoundTemporalOptions round_to_15_weeks = + RoundTemporalOptions(15, CalendarUnit::WEEK, true, true, false); + RoundTemporalOptions round_to_15_weeks_sunday = + RoundTemporalOptions(15, CalendarUnit::WEEK, false, true, false); + RoundTemporalOptions round_to_15_months = + RoundTemporalOptions(15, CalendarUnit::MONTH, true, true, false); + RoundTemporalOptions round_to_15_quarters = + RoundTemporalOptions(15, CalendarUnit::QUARTER, true, true, false); + RoundTemporalOptions round_to_15_years = + RoundTemporalOptions(15, CalendarUnit::YEAR, true, true, false); + + const char* ceil_1_nanosecond = + R"(["1970-01-01 00:00:59.123456790", "2000-02-29 23:23:24.000000000", + "1899-01-01 00:59:20.001001002", "2033-05-18 03:33:20.000000001", + "2020-01-01 01:05:05.001000001", "2019-12-31 02:10:10.002000001", + "2019-12-30 03:15:15.003000001", "2009-12-31 04:20:20.004132001", + "2010-01-01 05:25:25.005321001", "2010-01-03 06:30:30.006163001", + "2010-01-04 07:35:35.000000001", "2006-01-01 08:40:40.000000001", + "2005-12-31 09:45:45.000000001", "2008-12-28 00:00:00.000000001", + "2008-12-29 00:00:00.000000001", "2012-01-01 01:02:03.000000001", null])"; + const char* ceil_1_microsecond = + R"(["1970-01-01 00:00:59.123457", "2000-02-29 23:23:24.000000", + "1899-01-01 00:59:20.001002", "2033-05-18 03:33:20.000001", + "2020-01-01 01:05:05.001001", "2019-12-31 02:10:10.002001", + "2019-12-30 03:15:15.003001", "2009-12-31 04:20:20.004133", + "2010-01-01 05:25:25.005322", "2010-01-03 06:30:30.006164", + "2010-01-04 07:35:35.000001", "2006-01-01 08:40:40.000001", + "2005-12-31 09:45:45.000001", "2008-12-28 00:00:00.000001", + "2008-12-29 00:00:00.000001", "2012-01-01 01:02:03.000001", null])"; + const char* ceil_1_millisecond = + R"(["1970-01-01 00:00:59.124", "2000-02-29 23:23:24.000", + "1899-01-01 00:59:20.002", "2033-05-18 03:33:20.001", + "2020-01-01 01:05:05.002", "2019-12-31 02:10:10.003", + "2019-12-30 03:15:15.004", "2009-12-31 04:20:20.005", + "2010-01-01 05:25:25.006", "2010-01-03 06:30:30.007", + "2010-01-04 07:35:35.001", "2006-01-01 08:40:40.001", + "2005-12-31 09:45:45.001", "2008-12-28 00:00:00.001", + "2008-12-29 00:00:00.001", "2012-01-01 01:02:03.001", null])"; + const char* ceil_1_second = + R"(["1970-01-01 00:01:00", "2000-02-29 23:23:24", "1899-01-01 00:59:21", + "2033-05-18 03:33:21", "2020-01-01 01:05:06", "2019-12-31 02:10:11", + "2019-12-30 03:15:16", "2009-12-31 04:20:21", "2010-01-01 05:25:26", + "2010-01-03 06:30:31", "2010-01-04 07:35:36", "2006-01-01 08:40:41", + "2005-12-31 09:45:46", "2008-12-28 00:00:01", "2008-12-29 00:00:01", + "2012-01-01 01:02:04", null])"; + const char* ceil_1_minute = + R"(["1970-01-01 00:01:00", "2000-02-29 23:24:00", "1899-01-01 01:00:00", + "2033-05-18 03:34:00", "2020-01-01 01:06:00", "2019-12-31 02:11:00", + "2019-12-30 03:16:00", "2009-12-31 04:21:00", "2010-01-01 05:26:00", + "2010-01-03 06:31:00", "2010-01-04 07:36:00", "2006-01-01 08:41:00", + "2005-12-31 09:46:00", "2008-12-28 00:01:00", "2008-12-29 00:01:00", + "2012-01-01 01:03:00", null])"; + const char* ceil_1_hour = + R"(["1970-01-01 01:00:00", "2000-03-01 00:00:00", "1899-01-01 01:00:00", + "2033-05-18 04:00:00", "2020-01-01 02:00:00", "2019-12-31 03:00:00", + "2019-12-30 04:00:00", "2009-12-31 05:00:00", "2010-01-01 06:00:00", + "2010-01-03 07:00:00", "2010-01-04 08:00:00", "2006-01-01 09:00:00", + "2005-12-31 10:00:00", "2008-12-28 01:00:00", "2008-12-29 01:00:00", + "2012-01-01 02:00:00", null])"; + const char* ceil_1_day = + R"(["1970-01-02", "2000-03-01", "1899-01-02", "2033-05-19", + "2020-01-02", "2020-01-01", "2019-12-31", "2010-01-01", + "2010-01-02", "2010-01-04", "2010-01-05", "2006-01-02", + "2006-01-01", "2008-12-29", "2008-12-30", "2012-01-02", null])"; + const char* ceil_1_weeks = + R"(["1970-01-05", "2000-03-06", "1899-01-02", "2033-05-23", + "2020-01-06", "2020-01-06", "2020-01-06", "2010-01-04", + "2010-01-04", "2010-01-04", "2010-01-11", "2006-01-02", + "2006-01-02", "2008-12-29", "2009-01-05", "2012-01-02", null])"; + const char* ceil_1_weeks_sunday = + R"(["1970-01-04", "2000-03-05", "1899-01-08", "2033-05-22", + "2020-01-05", "2020-01-05", "2020-01-05", "2010-01-03", + "2010-01-03", "2010-01-10", "2010-01-10", "2006-01-08", + "2006-01-01", "2009-01-04", "2009-01-04", "2012-01-08", null])"; + const char* ceil_1_months = + R"(["1970-02-01", "2000-03-01", "1899-02-01", "2033-06-01", + "2020-02-01", "2020-01-01", "2020-01-01", "2010-01-01", + "2010-02-01", "2010-02-01", "2010-02-01", "2006-02-01", + "2006-01-01", "2009-01-01", "2009-01-01", "2012-02-01", null])"; + const char* ceil_1_quarters = + R"(["1970-04-01", "2000-04-01", "1899-04-01", "2033-07-01", + "2020-04-01", "2020-01-01", "2020-01-01", "2010-01-01", + "2010-04-01", "2010-04-01", "2010-04-01", "2006-04-01", + "2006-01-01", "2009-01-01", "2009-01-01", "2012-04-01", null])"; + const char* ceil_1_years = + R"(["1971-01-01", "2001-01-01", "1900-01-01", "2034-01-01", + "2021-01-01", "2020-01-01", "2020-01-01", "2010-01-01", + "2011-01-01", "2011-01-01", "2011-01-01", "2007-01-01", + "2006-01-01", "2009-01-01", "2009-01-01", "2013-01-01", null])"; + + const char* ceil_15_nanosecond = + R"(["1970-01-01 00:00:59.123456790", "2000-02-29 23:23:24.000000000", + "1899-01-01 00:59:20.001001005", "2033-05-18 03:33:20.000000010", + "2020-01-01 01:05:05.001000015", "2019-12-31 02:10:10.002000015", + "2019-12-30 03:15:15.003000015", "2009-12-31 04:20:20.004132015", + "2010-01-01 05:25:25.005321015", "2010-01-03 06:30:30.006163005", + "2010-01-04 07:35:35.000000010", "2006-01-01 08:40:40.000000005", + "2005-12-31 09:45:45.000000015", "2008-12-28 00:00:00.000000015", + "2008-12-29 00:00:00.000000015", "2012-01-01 01:02:03.000000015", null])"; + const char* ceil_15_microsecond = + R"(["1970-01-01 00:00:59.123460", "2000-02-29 23:23:24.000000", + "1899-01-01 00:59:20.001015", "2033-05-18 03:33:20.000010", + "2020-01-01 01:05:05.001015", "2019-12-31 02:10:10.002015", + "2019-12-30 03:15:15.003015", "2009-12-31 04:20:20.004135", + "2010-01-01 05:25:25.005330", "2010-01-03 06:30:30.006165", + "2010-01-04 07:35:35.000010", "2006-01-01 08:40:40.000005", + "2005-12-31 09:45:45.000015", "2008-12-28 00:00:00.000015", + "2008-12-29 00:00:00.000015", "2012-01-01 01:02:03.000015", null])"; + const char* ceil_15_millisecond = + R"(["1970-01-01 00:00:59.130", "2000-02-29 23:23:24.000", + "1899-01-01 00:59:20.010", "2033-05-18 03:33:20.010", + "2020-01-01 01:05:05.010", "2019-12-31 02:10:10.005", + "2019-12-30 03:15:15.015", "2009-12-31 04:20:20.010", + "2010-01-01 05:25:25.020", "2010-01-03 06:30:30.015", + "2010-01-04 07:35:35.010", "2006-01-01 08:40:40.005", + "2005-12-31 09:45:45.015", "2008-12-28 00:00:00.015", + "2008-12-29 00:00:00.015", "2012-01-01 01:02:03.015", null])"; + const char* ceil_13_second = + R"(["1970-01-01 00:01:05", "2000-02-29 23:23:24", "1899-01-01 00:59:29", + "2033-05-18 03:33:22", "2020-01-01 01:05:06", "2019-12-31 02:10:21", + "2019-12-30 03:15:23", "2009-12-31 04:20:32", "2010-01-01 05:25:30", + "2010-01-03 06:30:39", "2010-01-04 07:35:37", "2006-01-01 08:40:53", + "2005-12-31 09:45:55", "2008-12-28 00:00:01", "2008-12-29 00:00:12", + "2012-01-01 01:02:11", null])"; + const char* ceil_13_minute = + R"(["1970-01-01 00:13:00", "2000-02-29 23:26:00", "1899-01-01 01:01:00", + "2033-05-18 03:39:00", "2020-01-01 01:09:00", "2019-12-31 02:11:00", + "2019-12-30 03:26:00", "2009-12-31 04:24:00", "2010-01-01 05:32:00", + "2010-01-03 06:43:00", "2010-01-04 07:38:00", "2006-01-01 08:45:00", + "2005-12-31 09:47:00", "2008-12-28 00:05:00", "2008-12-29 00:08:00", + "2012-01-01 01:05:00", null])"; + const char* ceil_15_hour = + R"(["1970-01-01 15:00:00", "2000-03-01 12:00:00", "1899-01-01 03:00:00", + "2033-05-18 18:00:00", "2020-01-01 12:00:00", "2019-12-31 06:00:00", + "2019-12-30 15:00:00", "2009-12-31 09:00:00", "2010-01-01 15:00:00", + "2010-01-03 12:00:00", "2010-01-04 18:00:00", "2006-01-01 09:00:00", + "2005-12-31 18:00:00", "2008-12-28 06:00:00", "2008-12-29 12:00:00", + "2012-01-01 15:00:00", null])"; + const char* ceil_15_day = + R"(["1970-01-16", "2000-03-09", "1899-01-13", "2033-05-30", "2020-01-09", + "2020-01-09", "2020-01-09", "2010-01-01", "2010-01-16", "2010-01-16", + "2010-01-16", "2006-01-07", "2006-01-07", "2009-01-06", "2009-01-06", + "2012-01-06", null])"; + const char* ceil_15_weeks = + R"(["1970-04-13", "2000-03-06", "1899-04-10", "2033-07-11", "2020-01-06", + "2020-01-06", "2020-01-06", "2010-03-29", "2010-03-29", "2010-03-29", + "2010-03-29", "2006-03-20", "2006-03-20", "2009-02-02", "2009-02-02", + "2012-04-02", null])"; + const char* ceil_15_weeks_sunday = + R"(["1970-04-12", "2000-03-05", "1899-04-09", "2033-07-10", "2020-01-05", + "2020-01-05", "2020-01-05", "2010-03-28", "2010-03-28", "2010-03-28", + "2010-03-28", "2006-03-19", "2006-03-19", "2009-02-01", "2009-02-01", + "2012-04-01", null])"; + const char* ceil_15_months = + R"(["1971-04-01", "2001-04-01", "1900-01-01", "2033-10-01", "2021-04-01", + "2020-01-01", "2020-01-01", "2010-01-01", "2011-04-01", "2011-04-01", + "2011-04-01", "2006-04-01", "2006-04-01", "2010-01-01", "2010-01-01", + "2012-07-01", null])"; + const char* ceil_15_quarters = + R"(["1973-10-01", "2003-10-01", "1902-07-01", "2033-10-01", "2022-07-01", + "2022-07-01", "2022-07-01", "2011-04-01", "2011-04-01", "2011-04-01", + "2011-04-01", "2007-07-01", "2007-07-01", "2011-04-01", "2011-04-01", + "2015-01-01", null])"; + const char* ceil_15_years = + R"(["1980-01-01", "2010-01-01", "1905-01-01", "2040-01-01", "2025-01-01", + "2025-01-01", "2025-01-01", "2010-01-01", "2025-01-01", "2025-01-01", + "2025-01-01", "2010-01-01", "2010-01-01", "2010-01-01", "2010-01-01", + "2025-01-01", null])"; + + auto unit = timestamp(TimeUnit::NANO, "UTC"); + CheckScalarUnary(op, unit, times, unit, ceil_1_nanosecond, &round_to_1_nanoseconds); + CheckScalarUnary(op, unit, times, unit, ceil_1_microsecond, &round_to_1_microseconds); + CheckScalarUnary(op, unit, times, unit, ceil_1_millisecond, &round_to_1_milliseconds); + CheckScalarUnary(op, unit, times, unit, ceil_1_second, &round_to_1_seconds); + CheckScalarUnary(op, unit, times, unit, ceil_1_minute, &round_to_1_minutes); + CheckScalarUnary(op, unit, times, unit, ceil_1_hour, &round_to_1_hours); + CheckScalarUnary(op, unit, times, unit, ceil_1_day, &round_to_1_days); + CheckScalarUnary(op, unit, times, unit, ceil_1_weeks, &round_to_1_weeks); + CheckScalarUnary(op, unit, times, unit, ceil_1_weeks_sunday, &round_to_1_weeks_sunday); + CheckScalarUnary(op, unit, times, unit, ceil_1_months, &round_to_1_months); + CheckScalarUnary(op, unit, times, unit, ceil_1_quarters, &round_to_1_quarters); + CheckScalarUnary(op, unit, times, unit, ceil_1_years, &round_to_1_years); + + CheckScalarUnary(op, unit, times, unit, ceil_15_nanosecond, &round_to_15_nanoseconds); + CheckScalarUnary(op, unit, times, unit, ceil_15_microsecond, &round_to_15_microseconds); + CheckScalarUnary(op, unit, times, unit, ceil_15_millisecond, &round_to_15_milliseconds); + CheckScalarUnary(op, unit, times, unit, ceil_13_second, &round_to_13_seconds); + CheckScalarUnary(op, unit, times, unit, ceil_13_minute, &round_to_13_minutes); + CheckScalarUnary(op, unit, times, unit, ceil_15_hour, &round_to_15_hours); + CheckScalarUnary(op, unit, times, unit, ceil_15_day, &round_to_15_days); + CheckScalarUnary(op, unit, times, unit, ceil_15_weeks, &round_to_15_weeks); + CheckScalarUnary(op, unit, times, unit, ceil_15_weeks_sunday, + &round_to_15_weeks_sunday); + CheckScalarUnary(op, unit, times, unit, ceil_15_months, &round_to_15_months); + CheckScalarUnary(op, unit, times, unit, ceil_15_quarters, &round_to_15_quarters); + CheckScalarUnary(op, unit, times, unit, ceil_15_years, &round_to_15_years); +} + +TEST_F(ScalarTemporalTest, TestCeilTemporalMultipleSinceGreaterUnit) { + std::string op = "ceil_temporal"; + RoundTemporalOptions round_to_15_nanoseconds = + RoundTemporalOptions(15, CalendarUnit::NANOSECOND, true, true, true); + RoundTemporalOptions round_to_15_microseconds = + RoundTemporalOptions(15, CalendarUnit::MICROSECOND, true, true, true); + RoundTemporalOptions round_to_15_milliseconds = + RoundTemporalOptions(15, CalendarUnit::MILLISECOND, true, true, true); + RoundTemporalOptions round_to_13_seconds = + RoundTemporalOptions(13, CalendarUnit::SECOND, true, true, true); + RoundTemporalOptions round_to_13_minutes = + RoundTemporalOptions(13, CalendarUnit::MINUTE, true, true, true); + RoundTemporalOptions round_to_15_hours = + RoundTemporalOptions(15, CalendarUnit::HOUR, true, true, true); + RoundTemporalOptions round_to_15_days = + RoundTemporalOptions(15, CalendarUnit::DAY, true, true, true); + RoundTemporalOptions round_to_15_weeks = + RoundTemporalOptions(15, CalendarUnit::WEEK, true, true, true); + RoundTemporalOptions round_to_15_weeks_sunday = + RoundTemporalOptions(15, CalendarUnit::WEEK, false, true, true); + RoundTemporalOptions round_to_15_months = + RoundTemporalOptions(15, CalendarUnit::MONTH, true, true, true); + RoundTemporalOptions round_to_15_quarters = + RoundTemporalOptions(15, CalendarUnit::QUARTER, true, true, true); + RoundTemporalOptions round_to_15_years = + RoundTemporalOptions(15, CalendarUnit::YEAR, true, true, true); + + // Data for tests below was generaed via lubridate with the exception + // of week data because lubridate currently does not support rounding to + // multiple of week. + const char* ceil_15_nanosecond = + R"(["1970-01-01 00:00:59.123456795", "2000-02-29 23:23:24.000000005", + "1899-01-01 00:59:20.001001015", "2033-05-18 03:33:20.000000015", + "2020-01-01 01:05:05.001000015", "2019-12-31 02:10:10.002000015", + "2019-12-30 03:15:15.003000015", "2009-12-31 04:20:20.004132015", + "2010-01-01 05:25:25.005321015", "2010-01-03 06:30:30.006163015", + "2010-01-04 07:35:35.000000015", "2006-01-01 08:40:40.000000015", + "2005-12-31 09:45:45.000000015", "2008-12-28 00:00:00.000000015", + "2008-12-29 00:00:00.000000015", "2012-01-01 01:02:03.000000015", null])"; + const char* ceil_15_microsecond = + R"(["1970-01-01 00:00:59.123465", "2000-02-29 23:23:24.000005", + "1899-01-01 00:59:20.001015", "2033-05-18 03:33:20.000015", + "2020-01-01 01:05:05.001015", "2019-12-31 02:10:10.002015", + "2019-12-30 03:15:15.003015", "2009-12-31 04:20:20.004135", + "2010-01-01 05:25:25.005330", "2010-01-03 06:30:30.006165", + "2010-01-04 07:35:35.000015", "2006-01-01 08:40:40.000015", + "2005-12-31 09:45:45.000015", "2008-12-28 00:00:00.000015", + "2008-12-29 00:00:00.000015", "2012-01-01 01:02:03.000015", null])"; + const char* ceil_15_millisecond = + R"(["1970-01-01 00:00:59.135", "2000-02-29 23:23:24.005", + "1899-01-01 00:59:20.015", "2033-05-18 03:33:20.015", + "2020-01-01 01:05:05.015", "2019-12-31 02:10:10.015", + "2019-12-30 03:15:15.015", "2009-12-31 04:20:20.015", + "2010-01-01 05:25:25.015", "2010-01-03 06:30:30.015", + "2010-01-04 07:35:35.015", "2006-01-01 08:40:40.015", + "2005-12-31 09:45:45.015", "2008-12-28 00:00:00.015", + "2008-12-29 00:00:00.015", "2012-01-01 01:02:03.015", null])"; + const char* ceil_13_second = + R"(["1970-01-01 00:01:05", "2000-02-29 23:23:26", "1899-01-01 00:59:26", + "2033-05-18 03:33:26", "2020-01-01 01:05:13", "2019-12-31 02:10:13", + "2019-12-30 03:15:26", "2009-12-31 04:20:26", "2010-01-01 05:25:26", + "2010-01-03 06:30:39", "2010-01-04 07:35:39", "2006-01-01 08:40:52", + "2005-12-31 09:45:52", "2008-12-28 00:00:13", "2008-12-29 00:00:13", + "2012-01-01 01:02:13", null])"; + const char* ceil_13_minute = + R"(["1970-01-01 00:13:00", "2000-02-29 23:26:00", "1899-01-01 01:05:00", + "2033-05-18 03:39:00", "2020-01-01 01:13:00", "2019-12-31 02:13:00", + "2019-12-30 03:26:00", "2009-12-31 04:26:00", "2010-01-01 05:26:00", + "2010-01-03 06:39:00", "2010-01-04 07:39:00", "2006-01-01 08:52:00", + "2005-12-31 09:52:00", "2008-12-28 00:13:00", "2008-12-29 00:13:00", + "2012-01-01 01:13:00", null])"; + const char* ceil_15_hour = + R"(["1970-01-01 15:00:00", "2000-03-01 06:00:00", "1899-01-01 15:00:00", + "2033-05-18 15:00:00", "2020-01-01 15:00:00", "2019-12-31 15:00:00", + "2019-12-30 15:00:00", "2009-12-31 15:00:00", "2010-01-01 15:00:00", + "2010-01-03 15:00:00", "2010-01-04 15:00:00", "2006-01-01 15:00:00", + "2005-12-31 15:00:00", "2008-12-28 15:00:00", "2008-12-29 15:00:00", + "2012-01-01 15:00:00", null])"; + const char* ceil_15_day = + R"(["1970-01-16", "2000-03-02", "1899-01-16", "2033-05-31", + "2020-01-16", "2020-01-15", "2019-12-31", "2010-01-15", + "2010-01-16", "2010-01-16", "2010-01-16", "2006-01-16", + "2006-01-15", "2008-12-31", "2008-12-31", "2012-01-16", null])"; + const char* ceil_15_weeks = + R"(["1970-04-13", "2000-04-17", "1899-04-17", "2033-08-01", "2020-04-13", + "2020-04-13", "2020-04-13", "2010-04-19", "2010-04-19", "2010-04-19", + "2010-04-19", "2006-04-17", "2006-04-17", "2009-02-23", "2009-04-13", + "2012-04-16", null])"; + const char* ceil_15_weeks_sunday = + R"(["1970-04-19", "2000-04-16", "1899-04-16", "2033-07-31", "2020-04-12", + "2020-04-12", "2020-04-12", "2010-04-18", "2010-04-18", "2010-04-18", + "2010-04-18", "2006-04-16", "2006-04-16", "2009-04-19", "2009-04-19", + "2012-04-15", null])"; + const char* ceil_15_months = + R"(["1971-04-01", "2001-04-01", "1900-04-01", "2034-04-01", + "2021-04-01", "2020-04-01", "2020-04-01", "2010-04-01", + "2011-04-01", "2011-04-01", "2011-04-01", "2007-04-01", + "2006-04-01", "2009-04-01", "2009-04-01", "2013-04-01", null])"; + const char* ceil_15_quarters = + R"(["1973-10-01", "2003-10-01", "1902-10-01", "2036-10-01", + "2023-10-01", "2022-10-01", "2022-10-01", "2012-10-01", + "2013-10-01", "2013-10-01", "2013-10-01", "2009-10-01", + "2008-10-01", "2011-10-01", "2011-10-01", "2015-10-01", null])"; + const char* ceil_15_years = + R"(["1980-01-01", "2010-01-01", "1905-01-01", "2040-01-01", + "2025-01-01", "2025-01-01", "2025-01-01", "2010-01-01", + "2025-01-01", "2025-01-01", "2025-01-01", "2010-01-01", + "2010-01-01", "2010-01-01", "2010-01-01", "2025-01-01", null])"; + + auto unit = timestamp(TimeUnit::NANO, "UTC"); + CheckScalarUnary(op, unit, times, unit, ceil_15_nanosecond, &round_to_15_nanoseconds); + CheckScalarUnary(op, unit, times, unit, ceil_15_microsecond, &round_to_15_microseconds); + CheckScalarUnary(op, unit, times, unit, ceil_15_millisecond, &round_to_15_milliseconds); + CheckScalarUnary(op, unit, times, unit, ceil_13_second, &round_to_13_seconds); + CheckScalarUnary(op, unit, times, unit, ceil_13_minute, &round_to_13_minutes); + CheckScalarUnary(op, unit, times, unit, ceil_15_hour, &round_to_15_hours); + CheckScalarUnary(op, unit, times, unit, ceil_15_day, &round_to_15_days); + CheckScalarUnary(op, unit, times, unit, ceil_15_weeks, &round_to_15_weeks); + CheckScalarUnary(op, unit, times, unit, ceil_15_weeks_sunday, + &round_to_15_weeks_sunday); + CheckScalarUnary(op, unit, times, unit, ceil_15_months, &round_to_15_months); + CheckScalarUnary(op, unit, times, unit, ceil_15_quarters, &round_to_15_quarters); + CheckScalarUnary(op, unit, times, unit, ceil_15_years, &round_to_15_years); +} + TEST_F(ScalarTemporalTest, TestFloorTemporal) { std::string op = "floor_temporal"; const char* floor_1_nanosecond = @@ -2420,6 +2784,133 @@ TEST_F(ScalarTemporalTest, TestFloorTemporal) { CheckScalarUnary(op, unit, times, unit, floor_15_years, &round_to_15_years); } +TEST_F(ScalarTemporalTest, TestFloorTemporalMultipleSinceGreaterUnit) { + std::string op = "floor_temporal"; + RoundTemporalOptions round_to_15_nanoseconds = + RoundTemporalOptions(15, CalendarUnit::NANOSECOND, true, true, true); + RoundTemporalOptions round_to_15_microseconds = + RoundTemporalOptions(15, CalendarUnit::MICROSECOND, true, true, true); + RoundTemporalOptions round_to_15_milliseconds = + RoundTemporalOptions(15, CalendarUnit::MILLISECOND, true, true, true); + RoundTemporalOptions round_to_13_seconds = + RoundTemporalOptions(13, CalendarUnit::SECOND, true, true, true); + RoundTemporalOptions round_to_13_minutes = + RoundTemporalOptions(13, CalendarUnit::MINUTE, true, true, true); + RoundTemporalOptions round_to_15_hours = + RoundTemporalOptions(15, CalendarUnit::HOUR, true, true, true); + RoundTemporalOptions round_to_15_days = + RoundTemporalOptions(15, CalendarUnit::DAY, true, true, true); + RoundTemporalOptions round_to_15_weeks = + RoundTemporalOptions(15, CalendarUnit::WEEK, true, true, true); + RoundTemporalOptions round_to_15_weeks_sunday = + RoundTemporalOptions(15, CalendarUnit::WEEK, false, true, true); + RoundTemporalOptions round_to_15_months = + RoundTemporalOptions(15, CalendarUnit::MONTH, true, true, true); + RoundTemporalOptions round_to_15_quarters = + RoundTemporalOptions(15, CalendarUnit::QUARTER, true, true, true); + RoundTemporalOptions round_to_15_years = + RoundTemporalOptions(15, CalendarUnit::YEAR, true, true, true); + + // Data for tests below was generaed via lubridate with the exception + // of week data because lubridate currently does not support rounding to + // multiple of week. + const char* floor_15_nanosecond = + R"(["1970-01-01 00:00:59.123456780", "2000-02-29 23:23:23.999999990", + "1899-01-01 00:59:20.001001000", "2033-05-18 03:33:20.000000000", + "2020-01-01 01:05:05.001000000", "2019-12-31 02:10:10.002000000", + "2019-12-30 03:15:15.003000000", "2009-12-31 04:20:20.004132000", + "2010-01-01 05:25:25.005321000", "2010-01-03 06:30:30.006163000", + "2010-01-04 07:35:35.000000000", "2006-01-01 08:40:40.000000000", + "2005-12-31 09:45:45.000000000", "2008-12-28 00:00:00.000000000", + "2008-12-29 00:00:00.000000000", "2012-01-01 01:02:03.000000000", null])"; + const char* floor_15_microsecond = + R"(["1970-01-01 00:00:59.123450", "2000-02-29 23:23:23.999990", + "1899-01-01 00:59:20.001000", "2033-05-18 03:33:20.000000", + "2020-01-01 01:05:05.001000", "2019-12-31 02:10:10.002000", + "2019-12-30 03:15:15.003000", "2009-12-31 04:20:20.004120", + "2010-01-01 05:25:25.005315", "2010-01-03 06:30:30.006150", + "2010-01-04 07:35:35.000000", "2006-01-01 08:40:40.000000", + "2005-12-31 09:45:45.000000", "2008-12-28 00:00:00.000000", + "2008-12-29 00:00:00.000000", "2012-01-01 01:02:03.000000", null])"; + const char* floor_15_millisecond = + R"(["1970-01-01 00:00:59.120", "2000-02-29 23:23:23.990", + "1899-01-01 00:59:20.000", "2033-05-18 03:33:20.000", + "2020-01-01 01:05:05.000", "2019-12-31 02:10:10.000", + "2019-12-30 03:15:15.000", "2009-12-31 04:20:20.000", + "2010-01-01 05:25:25.000", "2010-01-03 06:30:30.000", + "2010-01-04 07:35:35.000", "2006-01-01 08:40:40.000", + "2005-12-31 09:45:45.000", "2008-12-28 00:00:00.000", + "2008-12-29 00:00:00.000", "2012-01-01 01:02:03.000", null])"; + const char* floor_13_second = + R"(["1970-01-01 00:00:52", "2000-02-29 23:23:13", "1899-01-01 00:59:13", + "2033-05-18 03:33:13", "2020-01-01 01:05:00", "2019-12-31 02:10:00", + "2019-12-30 03:15:13", "2009-12-31 04:20:13", "2010-01-01 05:25:13", + "2010-01-03 06:30:26", "2010-01-04 07:35:26", "2006-01-01 08:40:39", + "2005-12-31 09:45:39", "2008-12-28 00:00:00", "2008-12-29 00:00:00", + "2012-01-01 01:02:00", null])"; + const char* floor_13_minute = + R"(["1970-01-01 00:00:00", "2000-02-29 23:13:00", "1899-01-01 00:52:00", + "2033-05-18 03:26:00", "2020-01-01 01:00:00", "2019-12-31 02:00:00", + "2019-12-30 03:13:00", "2009-12-31 04:13:00", "2010-01-01 05:13:00", + "2010-01-03 06:26:00", "2010-01-04 07:26:00", "2006-01-01 08:39:00", + "2005-12-31 09:39:00", "2008-12-28 00:00:00", "2008-12-29 00:00:00", + "2012-01-01 01:00:00", null])"; + const char* floor_15_hour = + R"(["1970-01-01 00:00:00", "2000-02-29 15:00:00", "1899-01-01 00:00:00", + "2033-05-18 00:00:00", "2020-01-01 00:00:00", "2019-12-31 00:00:00", + "2019-12-30 00:00:00", "2009-12-31 00:00:00", "2010-01-01 00:00:00", + "2010-01-03 00:00:00", "2010-01-04 00:00:00", "2006-01-01 00:00:00", + "2005-12-31 00:00:00", "2008-12-28 00:00:00", "2008-12-29 00:00:00", + "2012-01-01 00:00:00", null])"; + const char* floor_15_day = + R"(["1970-01-01", "2000-02-16", "1899-01-01", "2033-05-16", + "2020-01-01", "2019-12-31", "2019-12-16", "2009-12-31", + "2010-01-01", "2010-01-01", "2010-01-01", "2006-01-01", + "2005-12-31", "2008-12-16", "2008-12-16", "2012-01-01", null])"; + const char* floor_15_weeks = + R"(["1969-12-29", "2000-01-03", "1899-01-02", "2033-04-18", + "2019-12-30", "2019-12-30", "2019-12-30", "2010-01-04", + "2010-01-04", "2010-01-04", "2010-01-04", "2006-01-02", + "2006-01-02", "2008-11-10", "2008-12-29", "2012-01-02", null])"; + const char* floor_15_weeks_sunday = + R"(["1970-01-04", "2000-01-02", "1899-01-01", "2033-04-17", + "2019-12-29", "2019-12-29", "2019-12-29", "2010-01-03", + "2010-01-03", "2010-01-03", "2010-01-03", "2006-01-01", + "2006-01-01", "2009-01-04", "2009-01-04", "2012-01-01", null])"; + const char* floor_15_months = + R"(["1970-01-01", "2000-01-01", "1899-01-01", "2033-01-01", + "2020-01-01", "2019-01-01", "2019-01-01", "2009-01-01", + "2010-01-01", "2010-01-01", "2010-01-01", "2006-01-01", + "2005-01-01", "2008-01-01", "2008-01-01", "2012-01-01", null])"; + const char* floor_15_quarters = + R"(["1970-01-01", "2000-01-01", "1899-01-01", "2033-01-01", + "2020-01-01", "2019-01-01", "2019-01-01", "2009-01-01", + "2010-01-01", "2010-01-01", "2010-01-01", "2006-01-01", + "2005-01-01", "2008-01-01", "2008-01-01", "2012-01-01", null])"; + const char* floor_15_years = + R"(["1965-01-01", "1995-01-01", "1890-01-01", "2025-01-01", + "2010-01-01", "2010-01-01", "2010-01-01", "1995-01-01", + "2010-01-01", "2010-01-01", "2010-01-01", "1995-01-01", + "1995-01-01", "1995-01-01", "1995-01-01", "2010-01-01", null])"; + + auto unit = timestamp(TimeUnit::NANO, "UTC"); + CheckScalarUnary(op, unit, times, unit, floor_15_nanosecond, &round_to_15_nanoseconds); + CheckScalarUnary(op, unit, times, unit, floor_15_microsecond, + &round_to_15_microseconds); + CheckScalarUnary(op, unit, times, unit, floor_15_millisecond, + &round_to_15_milliseconds); + CheckScalarUnary(op, unit, times, unit, floor_13_second, &round_to_13_seconds); + CheckScalarUnary(op, unit, times, unit, floor_13_minute, &round_to_13_minutes); + CheckScalarUnary(op, unit, times, unit, floor_15_hour, &round_to_15_hours); + CheckScalarUnary(op, unit, times, unit, floor_15_day, &round_to_15_days); + CheckScalarUnary(op, unit, times, unit, floor_15_weeks, &round_to_15_weeks); + CheckScalarUnary(op, unit, times, unit, floor_15_weeks_sunday, + &round_to_15_weeks_sunday); + CheckScalarUnary(op, unit, times, unit, floor_15_months, &round_to_15_months); + CheckScalarUnary(op, unit, times, unit, floor_15_quarters, &round_to_15_quarters); + CheckScalarUnary(op, unit, times, unit, floor_15_years, &round_to_15_years); +} + TEST_F(ScalarTemporalTest, TestRoundTemporal) { std::string op = "round_temporal"; const char* round_1_nanoseconds = @@ -2632,6 +3123,132 @@ TEST_F(ScalarTemporalTest, TestCeilFloorRoundTemporalBrussels) { CheckScalarUnary("round_temporal", unit, times, unit, round_2_hours, &round_to_2_hours); } +TEST_F(ScalarTemporalTest, TestRoundTemporalMultipleSinceGreaterUnit) { + std::string op = "round_temporal"; + RoundTemporalOptions round_to_15_nanoseconds = + RoundTemporalOptions(15, CalendarUnit::NANOSECOND, true, true, true); + RoundTemporalOptions round_to_15_microseconds = + RoundTemporalOptions(15, CalendarUnit::MICROSECOND, true, true, true); + RoundTemporalOptions round_to_15_milliseconds = + RoundTemporalOptions(15, CalendarUnit::MILLISECOND, true, true, true); + RoundTemporalOptions round_to_13_seconds = + RoundTemporalOptions(13, CalendarUnit::SECOND, true, true, true); + RoundTemporalOptions round_to_13_minutes = + RoundTemporalOptions(13, CalendarUnit::MINUTE, true, true, true); + RoundTemporalOptions round_to_15_hours = + RoundTemporalOptions(15, CalendarUnit::HOUR, true, true, true); + RoundTemporalOptions round_to_15_days = + RoundTemporalOptions(15, CalendarUnit::DAY, true, true, true); + RoundTemporalOptions round_to_15_weeks = + RoundTemporalOptions(15, CalendarUnit::WEEK, true, true, true); + RoundTemporalOptions round_to_15_weeks_sunday = + RoundTemporalOptions(15, CalendarUnit::WEEK, false, true, true); + RoundTemporalOptions round_to_5_months = + RoundTemporalOptions(5, CalendarUnit::MONTH, true, true, true); + RoundTemporalOptions round_to_15_quarters = + RoundTemporalOptions(15, CalendarUnit::QUARTER, true, true, true); + RoundTemporalOptions round_to_15_years = + RoundTemporalOptions(15, CalendarUnit::YEAR, true, true, true); + + // Data for tests below was generaed via lubridate with the exception + // of week data because lubridate currently does not support rounding to + // multiple of week. + const char* round_15_nanosecond = + R"(["1970-01-01 00:00:59.123456795", "2000-02-29 23:23:24.000000005", + "1899-01-01 00:59:20.001001000", "2033-05-18 03:33:20.000000000", + "2020-01-01 01:05:05.001000000", "2019-12-31 02:10:10.002000000", + "2019-12-30 03:15:15.003000000", "2009-12-31 04:20:20.004132000", + "2010-01-01 05:25:25.005321000", "2010-01-03 06:30:30.006163000", + "2010-01-04 07:35:35.000000000", "2006-01-01 08:40:40.000000000", + "2005-12-31 09:45:45.000000000", "2008-12-28 00:00:00.000000000", + "2008-12-29 00:00:00.000000000", "2012-01-01 01:02:03.000000000", null])"; + const char* round_15_microsecond = + R"(["1970-01-01 00:00:59.123450", "2000-02-29 23:23:24.000005", + "1899-01-01 00:59:20.001000", "2033-05-18 03:33:20.000000", + "2020-01-01 01:05:05.001000", "2019-12-31 02:10:10.002000", + "2019-12-30 03:15:15.003000", "2009-12-31 04:20:20.004135", + "2010-01-01 05:25:25.005315", "2010-01-03 06:30:30.006165", + "2010-01-04 07:35:35.000000", "2006-01-01 08:40:40.000000", + "2005-12-31 09:45:45.000000", "2008-12-28 00:00:00.000000", + "2008-12-29 00:00:00.000000", "2012-01-01 01:02:03.000000", null])"; + const char* round_15_millisecond = + R"(["1970-01-01 00:00:59.120", "2000-02-29 23:23:24.005", + "1899-01-01 00:59:20.000", "2033-05-18 03:33:20.000", + "2020-01-01 01:05:05.000", "2019-12-31 02:10:10.000", + "2019-12-30 03:15:15.000", "2009-12-31 04:20:20.000", + "2010-01-01 05:25:25.000", "2010-01-03 06:30:30.000", + "2010-01-04 07:35:35.000", "2006-01-01 08:40:40.000", + "2005-12-31 09:45:45.000", "2008-12-28 00:00:00.000", + "2008-12-29 00:00:00.000", "2012-01-01 01:02:03.000", null])"; + const char* round_13_second = + R"(["1970-01-01 00:01:05", "2000-02-29 23:23:26", "1899-01-01 00:59:26", + "2033-05-18 03:33:26", "2020-01-01 01:05:00", "2019-12-31 02:10:13", + "2019-12-30 03:15:13", "2009-12-31 04:20:26", "2010-01-01 05:25:26", + "2010-01-03 06:30:26", "2010-01-04 07:35:39", "2006-01-01 08:40:39", + "2005-12-31 09:45:39", "2008-12-28 00:00:00", "2008-12-29 00:00:00", + "2012-01-01 01:02:00", null])"; + const char* round_13_minute = + R"(["1970-01-01 00:00:00", "2000-02-29 23:26:00", "1899-01-01 01:05:00", + "2033-05-18 03:39:00", "2020-01-01 01:00:00", "2019-12-31 02:13:00", + "2019-12-30 03:13:00", "2009-12-31 04:26:00", "2010-01-01 05:26:00", + "2010-01-03 06:26:00", "2010-01-04 07:39:00", "2006-01-01 08:39:00", + "2005-12-31 09:52:00", "2008-12-28 00:00:00", "2008-12-29 00:00:00", + "2012-01-01 01:00:00", null])"; + const char* round_15_hour = + R"(["1970-01-01 00:00:00", "2000-03-01 06:00:00", "1899-01-01 00:00:00", + "2033-05-18 00:00:00", "2020-01-01 00:00:00", "2019-12-31 00:00:00", + "2019-12-30 00:00:00", "2009-12-31 00:00:00", "2010-01-01 00:00:00", + "2010-01-03 00:00:00", "2010-01-04 15:00:00", "2006-01-01 15:00:00", + "2005-12-31 15:00:00", "2008-12-28 00:00:00", "2008-12-29 00:00:00", + "2012-01-01 00:00:00", null])"; + const char* round_15_day = + R"(["1970-01-01", "2000-03-02", "1899-01-01", "2033-05-16", + "2020-01-01", "2019-12-31", "2019-12-31", "2009-12-31", + "2010-01-01", "2010-01-01", "2010-01-01", "2006-01-01", + "2005-12-31", "2008-12-31", "2008-12-31", "2012-01-01", null])"; + const char* round_15_weeks = + R"(["1969-12-29", "2000-04-17", "1899-01-02", "2033-04-18", + "2019-12-30", "2019-12-30", "2019-12-30", "2010-01-04", + "2010-01-04", "2010-01-04", "2010-01-04", "2006-01-02", + "2006-01-02", "2008-11-10", "2008-12-29", "2012-01-02", null])"; + const char* round_15_weeks_sunday = + R"(["1970-01-04", "2000-04-16", "1899-01-01", "2033-04-17", + "2019-12-29", "2019-12-29", "2019-12-29", "2010-01-03", + "2010-01-03", "2010-01-03", "2010-01-03", "2006-01-01", + "2006-01-01", "2009-01-04", "2009-01-04", "2012-01-01", null])"; + const char* round_5_months = + R"(["1970-01-01", "2000-01-01", "1899-01-01", "2033-06-01", + "2020-01-01", "2019-11-01", "2019-11-01", "2009-11-01", + "2010-01-01", "2010-01-01", "2010-01-01", "2006-01-01", + "2005-11-01", "2008-11-01", "2008-11-01", "2012-01-01", null])"; + const char* round_15_quarters = + R"(["1970-01-01", "2000-01-01", "1899-01-01", "2033-01-01", + "2020-01-01", "2019-01-01", "2019-01-01", "2009-01-01", + "2010-01-01", "2010-01-01", "2010-01-01", "2006-01-01", + "2005-01-01", "2008-01-01", "2008-01-01", "2012-01-01", null])"; + const char* round_15_years = + R"(["1965-01-01", "1995-01-01", "1905-01-01", "2040-01-01", + "2025-01-01", "2025-01-01", "2025-01-01", "2010-01-01", + "2010-01-01", "2010-01-01", "2010-01-01", "2010-01-01", + "2010-01-01", "2010-01-01", "2010-01-01", "2010-01-01", null])"; + auto unit = timestamp(TimeUnit::NANO, "UTC"); + CheckScalarUnary(op, unit, times, unit, round_15_nanosecond, &round_to_15_nanoseconds); + CheckScalarUnary(op, unit, times, unit, round_15_microsecond, + &round_to_15_microseconds); + CheckScalarUnary(op, unit, times, unit, round_15_millisecond, + &round_to_15_milliseconds); + CheckScalarUnary(op, unit, times, unit, round_13_second, &round_to_13_seconds); + CheckScalarUnary(op, unit, times, unit, round_13_minute, &round_to_13_minutes); + CheckScalarUnary(op, unit, times, unit, round_15_hour, &round_to_15_hours); + CheckScalarUnary(op, unit, times, unit, round_15_day, &round_to_15_days); + CheckScalarUnary(op, unit, times, unit, round_15_weeks, &round_to_15_weeks); + CheckScalarUnary(op, unit, times, unit, round_15_weeks_sunday, + &round_to_15_weeks_sunday); + CheckScalarUnary(op, unit, times, unit, round_5_months, &round_to_5_months); + CheckScalarUnary(op, unit, times, unit, round_15_quarters, &round_to_15_quarters); + CheckScalarUnary(op, unit, times, unit, round_15_years, &round_to_15_years); +} + TEST_F(ScalarTemporalTest, TestCeilFloorRoundTemporalKolkata) { // Kolkata timezone was defined as UTC+5:21:10 from 1871 to 1906 when it changed to // IST (UTC+05:30) without DST. This test is to check rounding is done in historical diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc index 7484de2a005..6275de94818 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc @@ -689,12 +689,36 @@ struct IsDaylightSavings { // Round temporal values to given frequency template -year_month_day GetFlooredYmd(int64_t arg, int multiple, Localizer localizer_) { +year_month_day GetFlooredYmd(int64_t arg, const int multiple, + const RoundTemporalOptions& options, Localizer localizer_) { year_month_day ymd{floor(localizer_.template ConvertTimePoint(arg))}; if (multiple == 1) { + // Round to a multiple of months since epoch start (1970-01-01 00:00:00). return year_month_day(ymd.year() / ymd.month() / 1); + } else if (options.calendar_based_origin) { + // Round to a multiple of months since the last year. + // + // Note: compute::CalendarUnit::YEAR is the greatest unit so there is no logical time + // point to use as origin. compute::CalendarUnit::DAY is covered by FloorTimePoint. + // Therefore compute::CalendarUnit::YEAR and compute::CalendarUnit::DAY are not + // covered here. + switch (options.unit) { + case compute::CalendarUnit::MONTH: { + const auto m = + static_cast(ymd.month()) / options.multiple * options.multiple; + return year_month_day(ymd.year() / 1 / 1) + months{m}; + } + case compute::CalendarUnit::QUARTER: { + const auto m = static_cast(ymd.month()) / (options.multiple * 3) * + (options.multiple * 3); + return year_month_day(ymd.year() / 1 / 1) + months{m}; + } + default: + return ymd; + } } else { + // Round to month * options.multiple since epoch start (1970-01-01 00:00:00). int32_t total_months_origin = 1970 * 12; int32_t total_months = static_cast(ymd.year()) * 12 + static_cast(static_cast(ymd.month())) - 1 - @@ -705,21 +729,71 @@ year_month_day GetFlooredYmd(int64_t arg, int multiple, Localizer localizer_) { } else { total_months = (total_months - multiple + 1) / multiple * multiple; } - return year_month_day(year{1970} / jan / 0) + months{total_months}; + return year_month_day(year{1970} / jan / 1) + months{total_months}; } } template -const Duration FloorTimePoint(const int64_t arg, const int64_t multiple, +const Duration FloorTimePoint(const int64_t arg, const RoundTemporalOptions& options, Localizer localizer_, Status* st) { const auto t = localizer_.template ConvertTimePoint(arg); - const Unit d = floor(t).time_since_epoch(); - if (multiple == 1) { + if (options.multiple == 1) { + // Round to a multiple of unit since epoch start (1970-01-01 00:00:00). + const Unit d = floor(t).time_since_epoch(); return localizer_.template ConvertLocalToSys(duration_cast(d), st); + } else if (options.calendar_based_origin) { + // Round to a multiple of units since the last greater unit. + // For example: round to multiple of days since the beginning of the month or + // to hours since the beginning of the day. + const Unit unit = Unit{options.multiple}; + Duration origin; + + switch (options.unit) { + case compute::CalendarUnit::DAY: + origin = duration_cast( + localizer_ + .ConvertDays(year_month_day(floor(t)).year() / + year_month_day(floor(t)).month() / 1) + .time_since_epoch()); + break; + case compute::CalendarUnit::HOUR: + origin = duration_cast( + localizer_.ConvertDays(year_month_day(floor(t))).time_since_epoch()); + break; + case compute::CalendarUnit::MINUTE: + origin = duration_cast(floor(t).time_since_epoch()); + break; + case compute::CalendarUnit::SECOND: + origin = + duration_cast(floor(t).time_since_epoch()); + break; + case compute::CalendarUnit::MILLISECOND: + origin = + duration_cast(floor(t).time_since_epoch()); + break; + case compute::CalendarUnit::MICROSECOND: + origin = duration_cast( + floor(t).time_since_epoch()); + break; + case compute::CalendarUnit::NANOSECOND: + origin = duration_cast( + floor(t).time_since_epoch()); + break; + default: { + *st = Status::Invalid("Cannot floor to ", &options.unit); + return Duration{0}; + } + } + const Duration m = + duration_cast(((t - origin).time_since_epoch() / unit * unit + origin)); + return localizer_.template ConvertLocalToSys(m, st); } else { - const Unit unit = Unit{multiple}; + // Round to a multiple of units * options.multiple since epoch start + // (1970-01-01 00:00:00). + const Unit d = floor(t).time_since_epoch(); + const Unit unit = Unit{options.multiple}; const Unit m = (d.count() >= 0) ? d / unit * unit : (d - unit + Unit{1}) / unit * unit; return localizer_.template ConvertLocalToSys(duration_cast(m), @@ -728,18 +802,35 @@ const Duration FloorTimePoint(const int64_t arg, const int64_t multiple, } template -const Duration FloorWeekTimePoint(const int64_t arg, const int64_t multiple, +const Duration FloorWeekTimePoint(const int64_t arg, const RoundTemporalOptions& options, Localizer localizer_, const Duration weekday_offset, Status* st) { const auto t = localizer_.template ConvertTimePoint(arg) + weekday_offset; const weeks d = floor(t).time_since_epoch(); - if (multiple == 1) { + if (options.multiple == 1) { + // Round to a multiple of weeks since epoch start (1970-01-01 00:00:00). return localizer_.template ConvertLocalToSys(duration_cast(d), st) - weekday_offset; + } else if (options.calendar_based_origin) { + // Round to a multiple of weeks since year prior. + weekday wd_; + if (options.week_starts_monday) { + wd_ = thu; + } else { + wd_ = wed; + } + const auto y = year_month_day{floor(t)}.year(); + const auto start = + localizer_.ConvertDays((y - years{1}) / dec / wd_[last]) + (mon - thu); + const weeks unit = weeks{options.multiple}; + const auto m = (t - start) / unit * unit + start; + return localizer_.template ConvertLocalToSys(m.time_since_epoch(), st); } else { - const weeks unit = weeks{multiple}; + // Round to a multiple of weeks * options.multiple since epoch start + // (1970-01-01 00:00:00). + const weeks unit = weeks{options.multiple}; const weeks m = (d.count() >= 0) ? d / unit * unit : (d - unit + weeks{1}) / unit * unit; return localizer_.template ConvertLocalToSys(duration_cast(m), @@ -749,55 +840,58 @@ const Duration FloorWeekTimePoint(const int64_t arg, const int64_t multiple, } template -Duration CeilTimePoint(const int64_t arg, const int64_t multiple, Localizer localizer_, - Status* st) { +Duration CeilTimePoint(const int64_t arg, const RoundTemporalOptions& options, + Localizer localizer_, Status* st) { const Duration f = - FloorTimePoint(arg, multiple, localizer_, st); + FloorTimePoint(arg, options, localizer_, st); const auto cl = localizer_.template ConvertTimePoint(f.count()).time_since_epoch(); const Duration cs = localizer_.template ConvertLocalToSys(duration_cast(cl), st); - if (cs >= Duration{arg}) { - return cs; + + if (options.ceil_is_strictly_greater || cs < Duration{arg}) { + return localizer_.template ConvertLocalToSys( + duration_cast(cl + duration_cast(Unit{options.multiple})), + st); } - return localizer_.template ConvertLocalToSys( - duration_cast(cl + duration_cast(Unit{multiple})), st); + return cs; } template -Duration CeilWeekTimePoint(const int64_t arg, const int64_t multiple, +Duration CeilWeekTimePoint(const int64_t arg, const RoundTemporalOptions& options, Localizer localizer_, const Duration weekday_offset, Status* st) { - const Duration f = FloorWeekTimePoint(arg, multiple, localizer_, + const Duration f = FloorWeekTimePoint(arg, options, localizer_, weekday_offset, st); const auto cl = localizer_.template ConvertTimePoint(f.count()).time_since_epoch(); const Duration cs = localizer_.template ConvertLocalToSys(duration_cast(cl), st); - if (cs >= Duration{arg}) { - return cs; + if (options.ceil_is_strictly_greater || cs < Duration{arg}) { + return localizer_.template ConvertLocalToSys( + duration_cast(cl + duration_cast(weeks{options.multiple})), + st); } - return localizer_.template ConvertLocalToSys( - duration_cast(cl + duration_cast(weeks{multiple})), st); + return cs; } template -Duration RoundTimePoint(const int64_t arg, const int64_t multiple, Localizer localizer_, - Status* st) { +Duration RoundTimePoint(const int64_t arg, const RoundTemporalOptions& options, + Localizer localizer_, Status* st) { const Duration f = - FloorTimePoint(arg, multiple, localizer_, st); + FloorTimePoint(arg, options, localizer_, st); const Duration c = - CeilTimePoint(arg, multiple, localizer_, st); + CeilTimePoint(arg, options, localizer_, st); return (Duration{arg} - f >= c - Duration{arg}) ? c : f; } template -Duration RoundWeekTimePoint(const int64_t arg, const int64_t multiple, +Duration RoundWeekTimePoint(const int64_t arg, const RoundTemporalOptions& options, Localizer localizer_, const Duration weekday_offset, Status* st) { - const Duration f = FloorWeekTimePoint(arg, multiple, localizer_, + const Duration f = FloorWeekTimePoint(arg, options, localizer_, weekday_offset, st); - const Duration c = CeilWeekTimePoint(arg, multiple, localizer_, + const Duration c = CeilWeekTimePoint(arg, options, localizer_, weekday_offset, st); return (Duration{arg} - f >= c - Duration{arg}) ? c : f; } @@ -812,52 +906,50 @@ struct CeilTemporal { Duration t; switch (options.unit) { case compute::CalendarUnit::NANOSECOND: - t = CeilTimePoint( - arg, options.multiple, localizer_, st); + t = CeilTimePoint(arg, options, + localizer_, st); break; case compute::CalendarUnit::MICROSECOND: - t = CeilTimePoint( - arg, options.multiple, localizer_, st); + t = CeilTimePoint(arg, options, + localizer_, st); break; case compute::CalendarUnit::MILLISECOND: - t = CeilTimePoint( - arg, options.multiple, localizer_, st); + t = CeilTimePoint(arg, options, + localizer_, st); break; case compute::CalendarUnit::SECOND: - t = CeilTimePoint( - arg, options.multiple, localizer_, st); + t = CeilTimePoint(arg, options, + localizer_, st); break; case compute::CalendarUnit::MINUTE: - t = CeilTimePoint(arg, options.multiple, localizer_, - st); + t = CeilTimePoint(arg, options, localizer_, st); break; case compute::CalendarUnit::HOUR: - t = CeilTimePoint(arg, options.multiple, + t = CeilTimePoint(arg, options, localizer_, st); break; case compute::CalendarUnit::DAY: - t = CeilTimePoint(arg, options.multiple, localizer_, - st); + t = CeilTimePoint(arg, options, localizer_, st); break; case compute::CalendarUnit::WEEK: if (options.week_starts_monday) { - t = CeilWeekTimePoint(arg, options.multiple, localizer_, - days{3}, st); + t = CeilWeekTimePoint(arg, options, localizer_, days{3}, + st); } else { - t = CeilWeekTimePoint(arg, options.multiple, localizer_, - days{4}, st); + t = CeilWeekTimePoint(arg, options, localizer_, days{4}, + st); } break; case compute::CalendarUnit::MONTH: { - year_month_day ymd = - GetFlooredYmd(arg, options.multiple, localizer_); + year_month_day ymd = GetFlooredYmd(arg, options.multiple, + options, localizer_); ymd += months{options.multiple}; t = localizer_.ConvertDays(ymd.year() / ymd.month() / 1).time_since_epoch(); break; } case compute::CalendarUnit::QUARTER: { - year_month_day ymd = - GetFlooredYmd(arg, 3 * options.multiple, localizer_); + year_month_day ymd = GetFlooredYmd(arg, 3 * options.multiple, + options, localizer_); ymd += months{3 * options.multiple}; t = localizer_.ConvertDays(ymd.year() / ymd.month() / 1).time_since_epoch(); break; @@ -890,51 +982,49 @@ struct FloorTemporal { Duration t; switch (options.unit) { case compute::CalendarUnit::NANOSECOND: - t = FloorTimePoint( - arg, options.multiple, localizer_, st); + t = FloorTimePoint(arg, options, + localizer_, st); break; case compute::CalendarUnit::MICROSECOND: t = FloorTimePoint( - arg, options.multiple, localizer_, st); + arg, options, localizer_, st); break; case compute::CalendarUnit::MILLISECOND: t = FloorTimePoint( - arg, options.multiple, localizer_, st); + arg, options, localizer_, st); break; case compute::CalendarUnit::SECOND: - t = FloorTimePoint( - arg, options.multiple, localizer_, st); + t = FloorTimePoint(arg, options, + localizer_, st); break; case compute::CalendarUnit::MINUTE: - t = FloorTimePoint(arg, options.multiple, - localizer_, st); + t = FloorTimePoint(arg, options, localizer_, st); break; case compute::CalendarUnit::HOUR: - t = FloorTimePoint(arg, options.multiple, + t = FloorTimePoint(arg, options, localizer_, st); break; case compute::CalendarUnit::DAY: - t = FloorTimePoint(arg, options.multiple, localizer_, - st); + t = FloorTimePoint(arg, options, localizer_, st); break; case compute::CalendarUnit::WEEK: if (options.week_starts_monday) { - t = FloorWeekTimePoint(arg, options.multiple, localizer_, - days{3}, st); + t = FloorWeekTimePoint(arg, options, localizer_, days{3}, + st); } else { - t = FloorWeekTimePoint(arg, options.multiple, localizer_, - days{4}, st); + t = FloorWeekTimePoint(arg, options, localizer_, days{4}, + st); } break; case compute::CalendarUnit::MONTH: { - year_month_day ymd = - GetFlooredYmd(arg, options.multiple, localizer_); + year_month_day ymd = GetFlooredYmd(arg, options.multiple, + options, localizer_); t = localizer_.ConvertDays(ymd.year() / ymd.month() / 1).time_since_epoch(); break; } case compute::CalendarUnit::QUARTER: { - year_month_day ymd = - GetFlooredYmd(arg, 3 * options.multiple, localizer_); + year_month_day ymd = GetFlooredYmd(arg, 3 * options.multiple, + options, localizer_); t = localizer_.ConvertDays(ymd.year() / ymd.month() / 1).time_since_epoch(); break; } @@ -965,46 +1055,44 @@ struct RoundTemporal { Duration t; switch (options.unit) { case compute::CalendarUnit::NANOSECOND: - t = RoundTimePoint( - arg, options.multiple, localizer_, st); + t = RoundTimePoint(arg, options, + localizer_, st); break; case compute::CalendarUnit::MICROSECOND: t = RoundTimePoint( - arg, options.multiple, localizer_, st); + arg, options, localizer_, st); break; case compute::CalendarUnit::MILLISECOND: t = RoundTimePoint( - arg, options.multiple, localizer_, st); + arg, options, localizer_, st); break; case compute::CalendarUnit::SECOND: - t = RoundTimePoint( - arg, options.multiple, localizer_, st); + t = RoundTimePoint(arg, options, + localizer_, st); break; case compute::CalendarUnit::MINUTE: - t = RoundTimePoint(arg, options.multiple, - localizer_, st); + t = RoundTimePoint(arg, options, localizer_, st); break; case compute::CalendarUnit::HOUR: - t = RoundTimePoint(arg, options.multiple, + t = RoundTimePoint(arg, options, localizer_, st); break; case compute::CalendarUnit::DAY: - t = RoundTimePoint(arg, options.multiple, localizer_, - st); + t = RoundTimePoint(arg, options, localizer_, st); break; case compute::CalendarUnit::WEEK: if (options.week_starts_monday) { - t = RoundWeekTimePoint(arg, options.multiple, localizer_, - days{3}, st); + t = RoundWeekTimePoint(arg, options, localizer_, days{3}, + st); } else { - t = RoundWeekTimePoint(arg, options.multiple, localizer_, - days{4}, st); + t = RoundWeekTimePoint(arg, options, localizer_, days{4}, + st); } break; case compute::CalendarUnit::MONTH: { auto t0 = localizer_.template ConvertTimePoint(arg); - year_month_day ymd = - GetFlooredYmd(arg, options.multiple, localizer_); + year_month_day ymd = GetFlooredYmd(arg, options.multiple, + options, localizer_); auto f = localizer_.ConvertDays(ymd.year() / ymd.month() / 1); ymd += months{options.multiple}; @@ -1015,8 +1103,8 @@ struct RoundTemporal { } case compute::CalendarUnit::QUARTER: { auto t0 = localizer_.template ConvertTimePoint(arg); - year_month_day ymd = - GetFlooredYmd(arg, 3 * options.multiple, localizer_); + year_month_day ymd = GetFlooredYmd(arg, 3 * options.multiple, + options, localizer_); auto f = localizer_.ConvertDays(ymd.year() / ymd.month() / 1); ymd += months{3 * options.multiple}; diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index 96da505f763..78bb31b5f9c 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -882,11 +882,13 @@ cdef CCalendarUnit unwrap_round_temporal_unit(unit) except *: cdef class _RoundTemporalOptions(FunctionOptions): - def _set_options(self, multiple, unit, week_starts_monday): + def _set_options(self, multiple, unit, week_starts_monday, + ceil_is_strictly_greater, calendar_based_origin): self.wrapped.reset( new CRoundTemporalOptions( multiple, unwrap_round_temporal_unit(unit), - week_starts_monday) + week_starts_monday, ceil_is_strictly_greater, + calendar_based_origin) ) @@ -905,10 +907,41 @@ class RoundTemporalOptions(_RoundTemporalOptions): "nanosecond". week_starts_monday : bool, default True If True, weeks start on Monday; if False, on Sunday. - """ - - def __init__(self, multiple=1, unit="day", week_starts_monday=True): - self._set_options(multiple, unit, week_starts_monday) + ceil_is_strictly_greater : bool, default False + If True, ceil returns a rounded value that is strictly greater than the + input. For example: ceiling 1970-01-01T00:00:00 to 3 hours would + yield 1970-01-01T03:00:00 if set to True and 1970-01-01T00:00:00 + if set to False. + This applies to the ceil_temporal function only. + calendar_based_origin : bool, default False + By default, the origin is 1970-01-01T00:00:00. By setting this to True, + rounding origin will be beginning of one less precise calendar unit. + E.g.: rounding to hours will use beginning of day as origin. + + By default time is rounded to a multiple of units since + 1970-01-01T00:00:00. By setting calendar_based_origin to true, + time will be rounded to number of units since the last greater + calendar unit. + For example: rounding to multiple of days since the beginning of the + month or to hours since the beginning of the day. + Exceptions: week and quarter are not used as greater units, + therefore days will be rounded to the beginning of the month not + week. Greater unit of week is a year. + Note that ceiling and rounding might change sorting order of an array + near greater unit change. For example rounding YYYY-mm-dd 23:00:00 to + 5 hours will ceil and round to YYYY-mm-dd+1 01:00:00 and floor to + YYYY-mm-dd 20:00:00. On the other hand YYYY-mm-dd+1 00:00:00 will + ceil, round and floor to YYYY-mm-dd+1 00:00:00. This can break the + order of an already ordered array. + + """ + + def __init__(self, multiple=1, unit="day", *, week_starts_monday=True, + ceil_is_strictly_greater=False, + calendar_based_origin=False): + self._set_options(multiple, unit, week_starts_monday, + ceil_is_strictly_greater, + calendar_based_origin) cdef class _RoundToMultipleOptions(FunctionOptions): diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 2e51864b860..9d023e78212 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1971,10 +1971,14 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: cdef cppclass CRoundTemporalOptions \ "arrow::compute::RoundTemporalOptions"(CFunctionOptions): CRoundTemporalOptions(int multiple, CCalendarUnit unit, - c_bool week_starts_monday) + c_bool week_starts_monday, + c_bool ceil_is_strictly_greater, + c_bool calendar_based_origin) int multiple CCalendarUnit unit c_bool week_starts_monday + c_bool ceil_is_strictly_greater + c_bool calendar_based_origin cdef cppclass CRoundToMultipleOptions \ "arrow::compute::RoundToMultipleOptions"(CFunctionOptions): diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 45282a28678..6b733794ee8 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -152,7 +152,7 @@ def test_option_class_equality(): pc.ReplaceSliceOptions(0, 1, "a"), pc.ReplaceSubstringOptions("a", "b"), pc.RoundOptions(2, "towards_infinity"), - pc.RoundTemporalOptions(1, "second", True), + pc.RoundTemporalOptions(1, "second", week_starts_monday=True), pc.RoundToMultipleOptions(100, "towards_infinity"), pc.ScalarAggregateOptions(), pc.SelectKOptions(0, sort_keys=[("b", "ascending")]), @@ -2037,6 +2037,14 @@ def _check_temporal_rounding(ts, values, unit): "hour": "H", "day": "D" } + greater_unit = { + "nanosecond": "us", + "microsecond": "ms", + "millisecond": "s", + "second": "min", + "minute": "H", + "hour": "d", + } ta = pa.array(ts) for value in values: @@ -2055,6 +2063,27 @@ def _check_temporal_rounding(ts, values, unit): expected = ts.dt.round(frequency) np.testing.assert_array_equal(result, expected) + # Check rounding with calendar_based_origin=True. + # Note: rounding to month is not supported in Pandas so we can't + # approximate this functionallity and exclude unit == "day". + if unit != "day": + options = pc.RoundTemporalOptions( + value, unit, calendar_based_origin=True) + origin = ts.dt.floor(greater_unit[unit]) + + if ta.type.tz is None: + result = pc.ceil_temporal(ta, options=options).to_pandas() + expected = (ts - origin).dt.ceil(frequency) + origin + np.testing.assert_array_equal(result, expected) + + result = pc.floor_temporal(ta, options=options).to_pandas() + expected = (ts - origin).dt.floor(frequency) + origin + np.testing.assert_array_equal(result, expected) + + result = pc.round_temporal(ta, options=options).to_pandas() + expected = (ts - origin).dt.round(frequency) + origin + np.testing.assert_array_equal(result, expected) + # Check RoundTemporalOptions partial defaults if unit == "day": result = pc.ceil_temporal(ta, multiple=value).to_pandas() @@ -2069,6 +2098,22 @@ def _check_temporal_rounding(ts, values, unit): expected = ts.dt.round(frequency) np.testing.assert_array_equal(result, expected) + # We naively test ceil_is_strictly_greater by adding time unit multiple + # to regular ceiled timestamp if it is equal to the original timestamp. + # This does not work if timestamp is zoned since our logic will not + # account for DST jumps. + if ta.type.tz is None: + options = pc.RoundTemporalOptions( + value, unit, ceil_is_strictly_greater=True) + result = pc.ceil_temporal(ta, options=options) + expected = ts.dt.ceil(frequency) + + expected = np.where( + expected == ts, + expected + pd.Timedelta(value, unit_shorthand[unit]), + expected) + np.testing.assert_array_equal(result, expected) + # Check RoundTemporalOptions defaults if unit == "day": frequency = "1D" @@ -2095,9 +2140,8 @@ def _check_temporal_rounding(ts, values, unit): def test_round_temporal(unit): from pyarrow.vendored.version import Version - if Version(pd.__version__) < Version('1.0.0') and \ - unit in ("nanosecond", "microsecond"): - pytest.skip('Pandas < 1.0 rounds zoned small units differently.') + if Version(pd.__version__) < Version('1.0.0'): + pytest.skip('Pandas < 1.0 rounds differently.') values = (1, 2, 3, 4, 5, 6, 7, 10, 15, 24, 60, 250, 500, 750) timestamps = [ @@ -2111,6 +2155,7 @@ def test_round_temporal(unit): "1967-02-26 05:56:46.922376960", "1975-11-01 10:55:37.016146432", "1982-01-21 18:43:44.517366784", + "1992-01-01 00:00:00.100000000", "1999-12-04 05:55:34.794991104", "2026-10-26 08:39:00.316686848"] ts = pd.Series([pd.Timestamp(x, unit="ns") for x in timestamps])