diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc index 2976e8ae4e8..ace0fcdb58e 100644 --- a/cpp/src/arrow/compute/api_scalar.cc +++ b/cpp/src/arrow/compute/api_scalar.cc @@ -834,6 +834,7 @@ SCALAR_EAGER_UNARY(DayOfYear, "day_of_year") SCALAR_EAGER_UNARY(Hour, "hour") SCALAR_EAGER_UNARY(YearMonthDay, "year_month_day") SCALAR_EAGER_UNARY(IsDaylightSavings, "is_dst") +SCALAR_EAGER_UNARY(LocalTime, "local_time") SCALAR_EAGER_UNARY(IsLeapYear, "is_leap_year") SCALAR_EAGER_UNARY(ISOCalendar, "iso_calendar") SCALAR_EAGER_UNARY(ISOWeek, "iso_week") diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 35c41dba9a9..3e55e2c8892 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -1518,6 +1518,16 @@ ARROW_EXPORT Result AssumeTimezone(const Datum& values, ARROW_EXPORT Result IsDaylightSavings(const Datum& values, ExecContext* ctx = NULLPTR); +/// \brief LocalTime converts timestamp to timezone naive local timestamp +/// +/// \param[in] values input to convert to local time +/// \param[in] ctx the function execution context, optional +/// \return the resulting datum +/// +/// \since 12.0.0 +/// \note API not yet finalized +ARROW_EXPORT Result LocalTime(const Datum& values, ExecContext* ctx = NULLPTR); + /// \brief Years Between finds the number of years between two values /// /// \param[in] left input treated as the start time diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 597157eb6e0..41408b94797 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -1811,6 +1811,39 @@ TEST_F(ScalarTemporalTest, TestTemporalDifferenceErrors) { CallFunction("weeks_between", {arr1, arr1}, &options)); } +TEST_F(ScalarTemporalTest, TestLocalTime) { + const char* times_seconds_precision = + R"(["1970-01-01T00:00:59", "2000-02-29T23:23:23", "2033-05-18T03:33:20", + "2020-01-01T01:05:05", "2019-12-31T02:10:10", "2019-12-30T03:15:15", + "2009-12-31T04:20:20", "2010-01-01T05:25:25", "2010-01-03T06:30:30", + "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45", + "2008-12-28", "2008-12-29", "2012-01-01 01:02:03", null])"; + + const char* expected_local_kolkata = + R"(["1970-01-01 05:30:59", "2000-03-01 04:53:23", "2033-05-18 09:03:20", + "2020-01-01 06:35:05", "2019-12-31 07:40:10", "2019-12-30 08:45:15", + "2009-12-31 09:50:20", "2010-01-01 10:55:25", "2010-01-03 12:00:30", + "2010-01-04 13:05:35", "2006-01-01 14:10:40", "2005-12-31 15:15:45", + "2008-12-28 05:30:00", "2008-12-29 05:30:00", "2012-01-01 06:32:03", null])"; + const char* expected_local_marquesas = + R"(["1969-12-31 14:30:59", "2000-02-29 13:53:23", "2033-05-17 18:03:20", + "2019-12-31 15:35:05", "2019-12-30 16:40:10", "2019-12-29 17:45:15", + "2009-12-30 18:50:20", "2009-12-31 19:55:25", "2010-01-02 21:00:30", + "2010-01-03 22:05:35", "2005-12-31 23:10:40", "2005-12-31 00:15:45", + "2008-12-27 14:30:00", "2008-12-28 14:30:00", "2011-12-31 15:32:03", null])"; + + for (auto u : TimeUnit::values()) { + CheckScalarUnary("local_time", timestamp(u), times_seconds_precision, timestamp(u), + times_seconds_precision); + CheckScalarUnary("local_time", timestamp(u, "UTC"), times_seconds_precision, + timestamp(u), times_seconds_precision); + CheckScalarUnary("local_time", timestamp(u, "Asia/Kolkata"), times_seconds_precision, + timestamp(u), expected_local_kolkata); + CheckScalarUnary("local_time", timestamp(u, "Pacific/Marquesas"), + times_seconds_precision, timestamp(u), expected_local_marquesas); + } +} + TEST_F(ScalarTemporalTest, TestAssumeTimezone) { std::string timezone_utc = "UTC"; std::string timezone_kolkata = "Asia/Kolkata"; diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc index bc533568c4d..b69bdc2719e 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc @@ -672,6 +672,23 @@ struct IsDaylightSavings { const time_zone* tz_; }; +// ---------------------------------------------------------------------- +// Extract local time of a given timestamp given its timezone + +template +struct LocalTime { + explicit LocalTime(const FunctionOptions* options, Localizer&& localizer) + : localizer_(std::move(localizer)) {} + + template + T Call(KernelContext*, Arg0 arg, Status*) const { + const auto t = localizer_.template ConvertTimePoint(arg); + return static_cast(t.time_since_epoch().count()); + } + + Localizer localizer_; +}; + // ---------------------------------------------------------------------- // Round temporal values to given frequency @@ -1327,6 +1344,12 @@ Result ResolveAssumeTimezoneOutput(KernelContext* ctx, return timestamp(in_type.unit(), AssumeTimezoneState::Get(ctx).timezone); } +Result ResolveLocalTimeOutput(KernelContext* ctx, + const std::vector& args) { + const auto& in_type = checked_cast(*args[0]); + return timestamp(in_type.unit()); +} + template struct AssumeTimezone { explicit AssumeTimezone(const AssumeTimezoneOptions* options, const time_zone* tz) @@ -1784,6 +1807,13 @@ const FunctionDoc is_dst_doc{ "An error is returned if the values do not have a defined timezone."), {"values"}}; +const FunctionDoc local_time_doc{ + "Convert timestamp to a timezone-naive local time timestamp", + ("LocalTime converts a timestamp to a local time of timestamps timezone\n" + "and removes timezone metadata. If input is in UTC or doesn't have\n" + "timezone metadata, it is returned as is.\n" + "Null values emit null."), + {"values"}}; const FunctionDoc floor_temporal_doc{ "Round temporal values down to nearest multiple of specified time unit", ("Null values emit null.\n" @@ -1801,8 +1831,9 @@ const FunctionDoc ceil_temporal_doc{ const FunctionDoc round_temporal_doc{ "Round temporal values to the nearest multiple of specified time unit", ("Null values emit null.\n" - "An error is returned if the values have a defined timezone but it\n" - "cannot be found in the timezone database."), + "If timezone is not given then timezone naive timestamp in UTC are\n" + "returned. An error is returned if the values have a defined timezone\n" + "but it cannot be found in the timezone database."), {"timestamps"}, "RoundTemporalOptions"}; @@ -1969,6 +2000,12 @@ void RegisterScalarTemporalUnary(FunctionRegistry* registry) { is_dst_doc); DCHECK_OK(registry->AddFunction(std::move(is_dst))); + auto local_time = + UnaryTemporalFactory::Make< + WithTimestamps>("local_time", OutputType::Resolver(ResolveLocalTimeOutput), + local_time_doc); + DCHECK_OK(registry->AddFunction(std::move(local_time))); + // Temporal rounding functions // Note: UnaryTemporalFactory will not correctly resolve OutputType(FirstType) to // output type. See TemporalComponentExtractRound for more. diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index 5bd3d659a3f..87411fbe77a 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -1545,7 +1545,7 @@ is the same, even though the UTC years would be different. Timezone handling ~~~~~~~~~~~~~~~~~ -This function is meant to be used when an external system produces +`assume_timezone` function is meant to be used when an external system produces "timezone-naive" timestamps which need to be converted to "timezone-aware" timestamps (see for example the `definition `__ @@ -1556,11 +1556,20 @@ Input timestamps are assumed to be relative to the timezone given in UTC-relative timestamps with the timezone metadata set to the above value. An error is returned if the timestamps already have the timezone metadata set. +`local_time` function converts UTC-relative timestamps to local "timezone-naive" +timestamps. The timezone is taken from the timezone metadata of the input +timestamps. This function is the inverse of `assume_timezone`. Please note: +**all temporal functions already operate on timestamps as if they were in local +time of the metadata provided timezone**. Using `local_time` is only meant to be +used when an external system expects local timestamps. + +--------------------+------------+-------------------+---------------+----------------------------------+-------+ | Function name | Arity | Input types | Output type | Options class | Notes | +====================+============+===================+===============+==================================+=======+ | assume_timezone | Unary | Timestamp | Timestamp | :struct:`AssumeTimezoneOptions` | \(1) | +--------------------+------------+-------------------+---------------+----------------------------------+-------+ +| local_time | Unary | Timestamp | Timestamp | | \(2) | ++--------------------+------------+-------------------+---------------+----------------------------------+-------+ * \(1) In addition to the timezone value, :struct:`AssumeTimezoneOptions` allows choosing the behaviour when a timestamp is ambiguous or nonexistent diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index ed79b47104d..75bf70409a8 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1951,6 +1951,7 @@ def _check_datetime_components(timestamps, timezone=None): assert pc.microsecond(tsa).equals(pa.array(ts.dt.microsecond % 10 ** 3)) assert pc.nanosecond(tsa).equals(pa.array(ts.dt.nanosecond)) assert pc.subsecond(tsa).equals(pa.array(subseconds)) + assert pc.local_time(tsa).equals(pa.array(ts.dt.tz_localize(None))) if ts.dt.tz: if ts.dt.tz is datetime.timezone.utc: