Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/src/arrow/compute/api_scalar.cc
Original file line number Diff line number Diff line change
Expand Up @@ -834,6 +834,7 @@ SCALAR_EAGER_UNARY(DayOfYear, "day_of_year")
SCALAR_EAGER_UNARY(Hour, "hour")
SCALAR_EAGER_UNARY(YearMonthDay, "year_month_day")
SCALAR_EAGER_UNARY(IsDaylightSavings, "is_dst")
SCALAR_EAGER_UNARY(LocalTime, "local_time")
SCALAR_EAGER_UNARY(IsLeapYear, "is_leap_year")
SCALAR_EAGER_UNARY(ISOCalendar, "iso_calendar")
SCALAR_EAGER_UNARY(ISOWeek, "iso_week")
Expand Down
10 changes: 10 additions & 0 deletions cpp/src/arrow/compute/api_scalar.h
Original file line number Diff line number Diff line change
Expand Up @@ -1518,6 +1518,16 @@ ARROW_EXPORT Result<Datum> AssumeTimezone(const Datum& values,
ARROW_EXPORT Result<Datum> IsDaylightSavings(const Datum& values,
ExecContext* ctx = NULLPTR);

/// \brief LocalTime converts timestamp to timezone naive local timestamp
///
/// \param[in] values input to convert to local time
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 12.0.0
/// \note API not yet finalized
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was going to suggest removing this but now I see this is everywhere. Hmm.... :( I'm not sure how true this is these days.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, you could see it as an opportunity to sync the API with substrait :D

ARROW_EXPORT Result<Datum> LocalTime(const Datum& values, ExecContext* ctx = NULLPTR);

/// \brief Years Between finds the number of years between two values
///
/// \param[in] left input treated as the start time
Expand Down
33 changes: 33 additions & 0 deletions cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1811,6 +1811,39 @@ TEST_F(ScalarTemporalTest, TestTemporalDifferenceErrors) {
CallFunction("weeks_between", {arr1, arr1}, &options));
}

TEST_F(ScalarTemporalTest, TestLocalTime) {
const char* times_seconds_precision =
R"(["1970-01-01T00:00:59", "2000-02-29T23:23:23", "2033-05-18T03:33:20",
"2020-01-01T01:05:05", "2019-12-31T02:10:10", "2019-12-30T03:15:15",
"2009-12-31T04:20:20", "2010-01-01T05:25:25", "2010-01-03T06:30:30",
"2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45",
"2008-12-28", "2008-12-29", "2012-01-01 01:02:03", null])";

const char* expected_local_kolkata =
R"(["1970-01-01 05:30:59", "2000-03-01 04:53:23", "2033-05-18 09:03:20",
"2020-01-01 06:35:05", "2019-12-31 07:40:10", "2019-12-30 08:45:15",
"2009-12-31 09:50:20", "2010-01-01 10:55:25", "2010-01-03 12:00:30",
"2010-01-04 13:05:35", "2006-01-01 14:10:40", "2005-12-31 15:15:45",
"2008-12-28 05:30:00", "2008-12-29 05:30:00", "2012-01-01 06:32:03", null])";
const char* expected_local_marquesas =
R"(["1969-12-31 14:30:59", "2000-02-29 13:53:23", "2033-05-17 18:03:20",
"2019-12-31 15:35:05", "2019-12-30 16:40:10", "2019-12-29 17:45:15",
"2009-12-30 18:50:20", "2009-12-31 19:55:25", "2010-01-02 21:00:30",
"2010-01-03 22:05:35", "2005-12-31 23:10:40", "2005-12-31 00:15:45",
"2008-12-27 14:30:00", "2008-12-28 14:30:00", "2011-12-31 15:32:03", null])";

for (auto u : TimeUnit::values()) {
CheckScalarUnary("local_time", timestamp(u), times_seconds_precision, timestamp(u),
times_seconds_precision);
CheckScalarUnary("local_time", timestamp(u, "UTC"), times_seconds_precision,
timestamp(u), times_seconds_precision);
CheckScalarUnary("local_time", timestamp(u, "Asia/Kolkata"), times_seconds_precision,
timestamp(u), expected_local_kolkata);
CheckScalarUnary("local_time", timestamp(u, "Pacific/Marquesas"),
times_seconds_precision, timestamp(u), expected_local_marquesas);
}
}

TEST_F(ScalarTemporalTest, TestAssumeTimezone) {
std::string timezone_utc = "UTC";
std::string timezone_kolkata = "Asia/Kolkata";
Expand Down
41 changes: 39 additions & 2 deletions cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,23 @@ struct IsDaylightSavings {
const time_zone* tz_;
};

// ----------------------------------------------------------------------
// Extract local time of a given timestamp given its timezone

template <typename Duration, typename Localizer>
struct LocalTime {
explicit LocalTime(const FunctionOptions* options, Localizer&& localizer)
: localizer_(std::move(localizer)) {}

template <typename T, typename Arg0>
T Call(KernelContext*, Arg0 arg, Status*) const {
const auto t = localizer_.template ConvertTimePoint<Duration>(arg);
return static_cast<T>(t.time_since_epoch().count());
}

Localizer localizer_;
};

// ----------------------------------------------------------------------
// Round temporal values to given frequency

Expand Down Expand Up @@ -1327,6 +1344,12 @@ Result<TypeHolder> ResolveAssumeTimezoneOutput(KernelContext* ctx,
return timestamp(in_type.unit(), AssumeTimezoneState::Get(ctx).timezone);
}

Result<TypeHolder> ResolveLocalTimeOutput(KernelContext* ctx,
const std::vector<TypeHolder>& args) {
const auto& in_type = checked_cast<const TimestampType&>(*args[0]);
return timestamp(in_type.unit());
}

template <typename Duration>
struct AssumeTimezone {
explicit AssumeTimezone(const AssumeTimezoneOptions* options, const time_zone* tz)
Expand Down Expand Up @@ -1784,6 +1807,13 @@ const FunctionDoc is_dst_doc{
"An error is returned if the values do not have a defined timezone."),
{"values"}};

const FunctionDoc local_time_doc{
"Convert timestamp to a timezone-naive local time timestamp",
("LocalTime converts a timestamp to a local time of timestamps timezone\n"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Something is missing in this sentence?

"and removes timezone metadata. If input is in UTC or doesn't have\n"
"timezone metadata, it is returned as is.\n"
Comment on lines +1813 to +1814
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Returned "as is" is not entirely correct for UTC, as I assume the "UTC" timezone is still removed?

"Null values emit null."),
{"values"}};
const FunctionDoc floor_temporal_doc{
"Round temporal values down to nearest multiple of specified time unit",
("Null values emit null.\n"
Expand All @@ -1801,8 +1831,9 @@ const FunctionDoc ceil_temporal_doc{
const FunctionDoc round_temporal_doc{
"Round temporal values to the nearest multiple of specified time unit",
("Null values emit null.\n"
"An error is returned if the values have a defined timezone but it\n"
"cannot be found in the timezone database."),
"If timezone is not given then timezone naive timestamp in UTC are\n"
"returned. An error is returned if the values have a defined timezone\n"
"but it cannot be found in the timezone database."),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was this change intentional? Or was it meant to be in local_time_doc?
(and if intentional, why only for round and not ceil/floor?)

{"timestamps"},
"RoundTemporalOptions"};

Expand Down Expand Up @@ -1969,6 +2000,12 @@ void RegisterScalarTemporalUnary(FunctionRegistry* registry) {
is_dst_doc);
DCHECK_OK(registry->AddFunction(std::move(is_dst)));

auto local_time =
UnaryTemporalFactory<LocalTime, TemporalComponentExtract, TimestampType>::Make<
WithTimestamps>("local_time", OutputType::Resolver(ResolveLocalTimeOutput),
local_time_doc);
DCHECK_OK(registry->AddFunction(std::move(local_time)));

// Temporal rounding functions
// Note: UnaryTemporalFactory will not correctly resolve OutputType(FirstType) to
// output type. See TemporalComponentExtractRound for more.
Expand Down
11 changes: 10 additions & 1 deletion docs/source/cpp/compute.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1545,7 +1545,7 @@ is the same, even though the UTC years would be different.
Timezone handling
~~~~~~~~~~~~~~~~~

This function is meant to be used when an external system produces
`assume_timezone` function is meant to be used when an external system produces
"timezone-naive" timestamps which need to be converted to "timezone-aware"
timestamps (see for example the `definition
<https://docs.python.org/3/library/datetime.html#aware-and-naive-objects>`__
Expand All @@ -1556,11 +1556,20 @@ Input timestamps are assumed to be relative to the timezone given in
UTC-relative timestamps with the timezone metadata set to the above value.
An error is returned if the timestamps already have the timezone metadata set.

`local_time` function converts UTC-relative timestamps to local "timezone-naive"
timestamps. The timezone is taken from the timezone metadata of the input
timestamps. This function is the inverse of `assume_timezone`. Please note:
**all temporal functions already operate on timestamps as if they were in local
time of the metadata provided timezone**. Using `local_time` is only meant to be
used when an external system expects local timestamps.

+--------------------+------------+-------------------+---------------+----------------------------------+-------+
| Function name | Arity | Input types | Output type | Options class | Notes |
+====================+============+===================+===============+==================================+=======+
| assume_timezone | Unary | Timestamp | Timestamp | :struct:`AssumeTimezoneOptions` | \(1) |
+--------------------+------------+-------------------+---------------+----------------------------------+-------+
| local_time | Unary | Timestamp | Timestamp | | \(2) |
+--------------------+------------+-------------------+---------------+----------------------------------+-------+

* \(1) In addition to the timezone value, :struct:`AssumeTimezoneOptions`
allows choosing the behaviour when a timestamp is ambiguous or nonexistent
Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/tests/test_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -1951,6 +1951,7 @@ def _check_datetime_components(timestamps, timezone=None):
assert pc.microsecond(tsa).equals(pa.array(ts.dt.microsecond % 10 ** 3))
assert pc.nanosecond(tsa).equals(pa.array(ts.dt.nanosecond))
assert pc.subsecond(tsa).equals(pa.array(subseconds))
assert pc.local_time(tsa).equals(pa.array(ts.dt.tz_localize(None)))

if ts.dt.tz:
if ts.dt.tz is datetime.timezone.utc:
Expand Down