From 8857c38ae00161a57ea4513a564e6840753aa668 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Wed, 15 Feb 2023 21:40:48 +0100 Subject: [PATCH 1/4] Adding local_time kernel --- cpp/src/arrow/compute/api_scalar.cc | 1 + cpp/src/arrow/compute/api_scalar.h | 11 +++++ .../compute/kernels/scalar_temporal_test.cc | 15 +++++++ .../compute/kernels/scalar_temporal_unary.cc | 40 ++++++++++++++++++- docs/source/cpp/compute.rst | 2 + 5 files changed, 67 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc index 2976e8ae4e8..ace0fcdb58e 100644 --- a/cpp/src/arrow/compute/api_scalar.cc +++ b/cpp/src/arrow/compute/api_scalar.cc @@ -834,6 +834,7 @@ SCALAR_EAGER_UNARY(DayOfYear, "day_of_year") SCALAR_EAGER_UNARY(Hour, "hour") SCALAR_EAGER_UNARY(YearMonthDay, "year_month_day") SCALAR_EAGER_UNARY(IsDaylightSavings, "is_dst") +SCALAR_EAGER_UNARY(LocalTime, "local_time") SCALAR_EAGER_UNARY(IsLeapYear, "is_leap_year") SCALAR_EAGER_UNARY(ISOCalendar, "iso_calendar") SCALAR_EAGER_UNARY(ISOWeek, "iso_week") diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 35c41dba9a9..f4cfb21de58 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -1518,6 +1518,17 @@ ARROW_EXPORT Result AssumeTimezone(const Datum& values, ARROW_EXPORT Result IsDaylightSavings(const Datum& values, ExecContext* ctx = NULLPTR); +/// \brief LocalTime converts timestamps from timezone aware to timezone naive +/// local timestamps +/// +/// \param[in] values input to convert to local time +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 12.0.0 +/// \note API not yet finalized +ARROW_EXPORT Result LocalTime(const Datum& values, ExecContext* ctx = NULLPTR); + /// \brief Years Between finds the number of years between two values /// /// \param[in] left input treated as the start time diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 597157eb6e0..84954b17454 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -1811,6 +1811,21 @@ TEST_F(ScalarTemporalTest, TestTemporalDifferenceErrors) { CallFunction("weeks_between", {arr1, arr1}, &options)); } +TEST_F(ScalarTemporalTest, TestLocalTime) { + const char* expected_local_us_central = + R"(["1969-12-31 18:00:59", "2000-02-29 17:23:23", "1898-12-31 19:08:20", + "2033-05-17 22:33:20", "2019-12-31 19:05:05", "2019-12-30 20:10:10", + "2019-12-29 21:15:15", "2009-12-30 22:20:20", "2009-12-31 23:25:25", + "2010-01-03 00:30:30", "2010-01-04 01:35:35", "2006-01-01 02:40:40", + "2005-12-31 03:45:45", "2008-12-27 18:00:00", "2008-12-28 18:00:00", + "2011-12-31 19:02:03", null])"; + + for (auto u : TimeUnit::values()) { + CheckScalarUnary("local_time", timestamp(u, "US/Central"), times_seconds_precision, + timestamp(u), expected_local_us_central); + } +} + TEST_F(ScalarTemporalTest, TestAssumeTimezone) { std::string timezone_utc = "UTC"; std::string timezone_kolkata = "Asia/Kolkata"; diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc index bc533568c4d..beb7f2d2676 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc @@ -672,6 +672,23 @@ struct IsDaylightSavings { const time_zone* tz_; }; +// ---------------------------------------------------------------------- +// Extract timezone naive time of a given timestamp in it's timezone + +template +struct LocalTime { + explicit LocalTime(const FunctionOptions* options, Localizer&& localizer) + : localizer_(std::move(localizer)) {} + + template + T Call(KernelContext*, Arg0 arg, Status*) const { + const auto t = localizer_.template ConvertTimePoint(arg); + return static_cast(t.time_since_epoch().count()); + } + + Localizer localizer_; +}; + // ---------------------------------------------------------------------- // Round temporal values to given frequency @@ -1327,6 +1344,12 @@ Result ResolveAssumeTimezoneOutput(KernelContext* ctx, return timestamp(in_type.unit(), AssumeTimezoneState::Get(ctx).timezone); } +Result ResolveLocalTimeOutput(KernelContext* ctx, + const std::vector& args) { + const auto& in_type = checked_cast(*args[0]); + return timestamp(in_type.unit()); +} + template struct AssumeTimezone { explicit AssumeTimezone(const AssumeTimezoneOptions* options, const time_zone* tz) @@ -1784,6 +1807,12 @@ const FunctionDoc is_dst_doc{ "An error is returned if the values do not have a defined timezone."), {"values"}}; +const FunctionDoc local_time_doc{ + "Converts a timezone-aware timestamp to a timezone-naive timestamp", + ("LocalTime returns a timestamp without a timezone.\n" + "Null values emit null.\n" + "An error is returned if the values do not have a defined timezone."), + {"values"}}; const FunctionDoc floor_temporal_doc{ "Round temporal values down to nearest multiple of specified time unit", ("Null values emit null.\n" @@ -1801,8 +1830,9 @@ const FunctionDoc ceil_temporal_doc{ const FunctionDoc round_temporal_doc{ "Round temporal values to the nearest multiple of specified time unit", ("Null values emit null.\n" - "An error is returned if the values have a defined timezone but it\n" - "cannot be found in the timezone database."), + "If timezone is not given then timezone naive timestamp in UTC are\n" + "returned. An error is returned if the values have a defined timezone\n" + "but it cannot be found in the timezone database."), {"timestamps"}, "RoundTemporalOptions"}; @@ -1969,6 +1999,12 @@ void RegisterScalarTemporalUnary(FunctionRegistry* registry) { is_dst_doc); DCHECK_OK(registry->AddFunction(std::move(is_dst))); + auto local_time = + UnaryTemporalFactory::Make< + WithTimestamps>("local_time", OutputType::Resolver(ResolveLocalTimeOutput), + local_time_doc); + DCHECK_OK(registry->AddFunction(std::move(local_time))); + // Temporal rounding functions // Note: UnaryTemporalFactory will not correctly resolve OutputType(FirstType) to // output type. See TemporalComponentExtractRound for more. diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index 5bd3d659a3f..cfa38629994 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -1445,6 +1445,8 @@ For timestamps inputs with non-empty timezone, localized timestamp components wi +--------------------+------------+-------------------+---------------+----------------------------+-------+ | is_leap_year | Unary | Timestamp, Date | Boolean | | | +--------------------+------------+-------------------+---------------+----------------------------+-------+ +| local_time | Unary | Timestamp | Timestamp | | | ++--------------------+------------+-------------------+---------------+----------------------------+-------+ | microsecond | Unary | Timestamp, Time | Int64 | | | +--------------------+------------+-------------------+---------------+----------------------------+-------+ | millisecond | Unary | Timestamp, Time | Int64 | | | From a5185a2a7d29be2a3d55e3cb206dba276d26a803 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Thu, 16 Feb 2023 12:03:16 +0100 Subject: [PATCH 2/4] More test cases. Python tests. --- cpp/src/arrow/compute/api_scalar.h | 3 +- .../compute/kernels/scalar_temporal_test.cc | 36 ++++++++++++++----- .../compute/kernels/scalar_temporal_unary.cc | 11 +++--- python/pyarrow/tests/test_compute.py | 1 + 4 files changed, 35 insertions(+), 16 deletions(-) diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index f4cfb21de58..3e55e2c8892 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -1518,8 +1518,7 @@ ARROW_EXPORT Result AssumeTimezone(const Datum& values, ARROW_EXPORT Result IsDaylightSavings(const Datum& values, ExecContext* ctx = NULLPTR); -/// \brief LocalTime converts timestamps from timezone aware to timezone naive -/// local timestamps +/// \brief LocalTime converts timestamp to timezone naive local timestamp /// /// \param[in] values input to convert to local time /// \param[in] ctx the function execution context, optional diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 84954b17454..41408b94797 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -1812,17 +1812,35 @@ TEST_F(ScalarTemporalTest, TestTemporalDifferenceErrors) { } TEST_F(ScalarTemporalTest, TestLocalTime) { - const char* expected_local_us_central = - R"(["1969-12-31 18:00:59", "2000-02-29 17:23:23", "1898-12-31 19:08:20", - "2033-05-17 22:33:20", "2019-12-31 19:05:05", "2019-12-30 20:10:10", - "2019-12-29 21:15:15", "2009-12-30 22:20:20", "2009-12-31 23:25:25", - "2010-01-03 00:30:30", "2010-01-04 01:35:35", "2006-01-01 02:40:40", - "2005-12-31 03:45:45", "2008-12-27 18:00:00", "2008-12-28 18:00:00", - "2011-12-31 19:02:03", null])"; + const char* times_seconds_precision = + R"(["1970-01-01T00:00:59", "2000-02-29T23:23:23", "2033-05-18T03:33:20", + "2020-01-01T01:05:05", "2019-12-31T02:10:10", "2019-12-30T03:15:15", + "2009-12-31T04:20:20", "2010-01-01T05:25:25", "2010-01-03T06:30:30", + "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45", + "2008-12-28", "2008-12-29", "2012-01-01 01:02:03", null])"; + + const char* expected_local_kolkata = + R"(["1970-01-01 05:30:59", "2000-03-01 04:53:23", "2033-05-18 09:03:20", + "2020-01-01 06:35:05", "2019-12-31 07:40:10", "2019-12-30 08:45:15", + "2009-12-31 09:50:20", "2010-01-01 10:55:25", "2010-01-03 12:00:30", + "2010-01-04 13:05:35", "2006-01-01 14:10:40", "2005-12-31 15:15:45", + "2008-12-28 05:30:00", "2008-12-29 05:30:00", "2012-01-01 06:32:03", null])"; + const char* expected_local_marquesas = + R"(["1969-12-31 14:30:59", "2000-02-29 13:53:23", "2033-05-17 18:03:20", + "2019-12-31 15:35:05", "2019-12-30 16:40:10", "2019-12-29 17:45:15", + "2009-12-30 18:50:20", "2009-12-31 19:55:25", "2010-01-02 21:00:30", + "2010-01-03 22:05:35", "2005-12-31 23:10:40", "2005-12-31 00:15:45", + "2008-12-27 14:30:00", "2008-12-28 14:30:00", "2011-12-31 15:32:03", null])"; for (auto u : TimeUnit::values()) { - CheckScalarUnary("local_time", timestamp(u, "US/Central"), times_seconds_precision, - timestamp(u), expected_local_us_central); + CheckScalarUnary("local_time", timestamp(u), times_seconds_precision, timestamp(u), + times_seconds_precision); + CheckScalarUnary("local_time", timestamp(u, "UTC"), times_seconds_precision, + timestamp(u), times_seconds_precision); + CheckScalarUnary("local_time", timestamp(u, "Asia/Kolkata"), times_seconds_precision, + timestamp(u), expected_local_kolkata); + CheckScalarUnary("local_time", timestamp(u, "Pacific/Marquesas"), + times_seconds_precision, timestamp(u), expected_local_marquesas); } } diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc index beb7f2d2676..b69bdc2719e 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc @@ -673,7 +673,7 @@ struct IsDaylightSavings { }; // ---------------------------------------------------------------------- -// Extract timezone naive time of a given timestamp in it's timezone +// Extract local time of a given timestamp given its timezone template struct LocalTime { @@ -1808,10 +1808,11 @@ const FunctionDoc is_dst_doc{ {"values"}}; const FunctionDoc local_time_doc{ - "Converts a timezone-aware timestamp to a timezone-naive timestamp", - ("LocalTime returns a timestamp without a timezone.\n" - "Null values emit null.\n" - "An error is returned if the values do not have a defined timezone."), + "Convert timestamp to a timezone-naive local time timestamp", + ("LocalTime converts a timestamp to a local time of timestamps timezone\n" + "and removes timezone metadata. If input is in UTC or doesn't have\n" + "timezone metadata, it is returned as is.\n" + "Null values emit null."), {"values"}}; const FunctionDoc floor_temporal_doc{ "Round temporal values down to nearest multiple of specified time unit", diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index ed79b47104d..75bf70409a8 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1951,6 +1951,7 @@ def _check_datetime_components(timestamps, timezone=None): assert pc.microsecond(tsa).equals(pa.array(ts.dt.microsecond % 10 ** 3)) assert pc.nanosecond(tsa).equals(pa.array(ts.dt.nanosecond)) assert pc.subsecond(tsa).equals(pa.array(subseconds)) + assert pc.local_time(tsa).equals(pa.array(ts.dt.tz_localize(None))) if ts.dt.tz: if ts.dt.tz is datetime.timezone.utc: From 115347ee7cbc2efe4ab15477e34afa102e0a6a10 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Fri, 17 Feb 2023 12:53:06 +0100 Subject: [PATCH 3/4] Review feedback --- docs/source/cpp/compute.rst | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index cfa38629994..7942987e0ab 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -1445,8 +1445,6 @@ For timestamps inputs with non-empty timezone, localized timestamp components wi +--------------------+------------+-------------------+---------------+----------------------------+-------+ | is_leap_year | Unary | Timestamp, Date | Boolean | | | +--------------------+------------+-------------------+---------------+----------------------------+-------+ -| local_time | Unary | Timestamp | Timestamp | | | -+--------------------+------------+-------------------+---------------+----------------------------+-------+ | microsecond | Unary | Timestamp, Time | Int64 | | | +--------------------+------------+-------------------+---------------+----------------------------+-------+ | millisecond | Unary | Timestamp, Time | Int64 | | | @@ -1547,7 +1545,7 @@ is the same, even though the UTC years would be different. Timezone handling ~~~~~~~~~~~~~~~~~ -This function is meant to be used when an external system produces +`assume_timezone` function is meant to be used when an external system produces "timezone-naive" timestamps which need to be converted to "timezone-aware" timestamps (see for example the `definition `__ @@ -1558,11 +1556,20 @@ Input timestamps are assumed to be relative to the timezone given in UTC-relative timestamps with the timezone metadata set to the above value. An error is returned if the timestamps already have the timezone metadata set. +`local_time` function converts UTC-relative timestamps to local "timezone-naive" +timestamp. The timezone is taken from the timezone metadata of the input +timestamps. This function is the inverse of `assume_timezone`. Please note: +**all temporal functions already operate on timestamps as if they were in local +time of metadata provided timezone**. Using `local_time` is only meant to be +used when an external system expects local timestamps. + +--------------------+------------+-------------------+---------------+----------------------------------+-------+ | Function name | Arity | Input types | Output type | Options class | Notes | +====================+============+===================+===============+==================================+=======+ | assume_timezone | Unary | Timestamp | Timestamp | :struct:`AssumeTimezoneOptions` | \(1) | +--------------------+------------+-------------------+---------------+----------------------------------+-------+ +| local_time | Unary | Timestamp | Timestamp | | \(2) | ++--------------------+------------+-------------------+---------------+----------------------------------+-------+ * \(1) In addition to the timezone value, :struct:`AssumeTimezoneOptions` allows choosing the behaviour when a timestamp is ambiguous or nonexistent From cf2eb81466fe9ea50911138aa6dd28e07010ba03 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Fri, 17 Feb 2023 18:39:13 +0100 Subject: [PATCH 4/4] Apply suggestions from code review Co-authored-by: Weston Pace --- docs/source/cpp/compute.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index 7942987e0ab..87411fbe77a 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -1557,10 +1557,10 @@ UTC-relative timestamps with the timezone metadata set to the above value. An error is returned if the timestamps already have the timezone metadata set. `local_time` function converts UTC-relative timestamps to local "timezone-naive" -timestamp. The timezone is taken from the timezone metadata of the input +timestamps. The timezone is taken from the timezone metadata of the input timestamps. This function is the inverse of `assume_timezone`. Please note: **all temporal functions already operate on timestamps as if they were in local -time of metadata provided timezone**. Using `local_time` is only meant to be +time of the metadata provided timezone**. Using `local_time` is only meant to be used when an external system expects local timestamps. +--------------------+------------+-------------------+---------------+----------------------------------+-------+