From 2ed07704cd5cc50e0cd373f4736e9c8e76a1731d Mon Sep 17 00:00:00 2001
From: David Li
Date: Fri, 13 Aug 2021 13:31:20 -0400
Subject: [PATCH 1/2] ARROW-13549: [C++] Add date/time extraction functions
---
.../compute/kernels/scalar_cast_temporal.cc | 220 ++++++++---
.../arrow/compute/kernels/scalar_cast_test.cc | 346 +++++++++++++++---
.../arrow/compute/kernels/scalar_temporal.cc | 176 +--------
.../arrow/compute/kernels/temporal_internal.h | 214 +++++++++++
docs/source/python/api/compute.rst | 23 ++
python/pyarrow/tests/test_pandas.py | 25 +-
6 files changed, 730 insertions(+), 274 deletions(-)
create mode 100644 cpp/src/arrow/compute/kernels/temporal_internal.h
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
index 1a58fce7c74..0fe537ebbaf 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
@@ -22,6 +22,7 @@
#include "arrow/array/builder_time.h"
#include "arrow/compute/kernels/common.h"
#include "arrow/compute/kernels/scalar_cast_internal.h"
+#include "arrow/compute/kernels/temporal_internal.h"
#include "arrow/util/bitmap_reader.h"
#include "arrow/util/time.h"
#include "arrow/util/value_parsing.h"
@@ -117,6 +118,28 @@ Status ShiftTime(KernelContext* ctx, const util::DivideOrMultiply factor_op,
return Status::OK();
}
+template class Op, typename OutType, typename... Args>
+Status ExtractTemporal(KernelContext* ctx, const ExecBatch& batch, Datum* out,
+ Args... args) {
+ const auto& ty = checked_cast(*batch[0].type());
+
+ switch (ty.unit()) {
+ case TimeUnit::SECOND:
+ return TemporalComponentExtract::Exec(ctx, batch, out, args...);
+ case TimeUnit::MILLI:
+ return TemporalComponentExtract::Exec(ctx, batch, out, args...);
+ case TimeUnit::MICRO:
+ return TemporalComponentExtract::Exec(ctx, batch, out, args...);
+ case TimeUnit::NANO:
+ return TemporalComponentExtract::Exec(ctx, batch, out, args...);
+ }
+ return Status::Invalid("Unknown timestamp unit: ", ty);
+}
+
// and
template
struct CastFunctor<
@@ -142,68 +165,175 @@ struct CastFunctor<
}
};
+// ----------------------------------------------------------------------
+// From timestamp to date32 or date64
+
template <>
struct CastFunctor {
- static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
- DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
-
- const ArrayData& input = *batch[0].array();
- ArrayData* output = out->mutable_array();
-
- const auto& in_type = checked_cast(*input.type);
+ template
+ struct Date32 {
+ Date32(const FunctionOptions* options, Localizer&& localizer)
+ : localizer_(std::move(localizer)) {}
+
+ template
+ T Call(KernelContext*, Arg0 arg, Status*) const {
+ return static_cast(static_cast(
+ floor(localizer_.template ConvertTimePoint(arg))
+ .time_since_epoch()
+ .count()));
+ }
- static const int64_t kTimestampToDateFactors[4] = {
- 86400LL, // SECOND
- 86400LL * 1000LL, // MILLI
- 86400LL * 1000LL * 1000LL, // MICRO
- 86400LL * 1000LL * 1000LL * 1000LL, // NANO
- };
+ Localizer localizer_;
+ };
- const int64_t factor = kTimestampToDateFactors[static_cast(in_type.unit())];
- return ShiftTime(ctx, util::DIVIDE, factor, input, output);
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
+ return ExtractTemporal(ctx, batch, out);
}
};
template <>
struct CastFunctor {
+ template
+ struct Date64 {
+ constexpr static int64_t kMillisPerDay = 86400000;
+ Date64(const FunctionOptions* options, Localizer&& localizer)
+ : localizer_(std::move(localizer)) {}
+
+ template
+ T Call(KernelContext*, Arg0 arg, Status*) const {
+ return static_cast(
+ kMillisPerDay *
+ static_cast(
+ floor(localizer_.template ConvertTimePoint(arg))
+ .time_since_epoch()
+ .count()));
+ }
+
+ Localizer localizer_;
+ };
+
static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
+ return ExtractTemporal(ctx, batch, out);
+ }
+};
- const CastOptions& options = checked_cast(*ctx->state()).options;
- const ArrayData& input = *batch[0].array();
- ArrayData* output = out->mutable_array();
- const auto& in_type = checked_cast(*input.type);
+// ----------------------------------------------------------------------
+// From timestamp to time32 or time64
+
+template
+struct ExtractTimeDownscaled {
+ ExtractTimeDownscaled(const FunctionOptions* options, Localizer&& localizer,
+ const int64_t factor)
+ : localizer_(std::move(localizer)), factor_(factor) {}
+
+ template
+ T Call(KernelContext*, Arg0 arg, Status* st) const {
+ const auto t = localizer_.template ConvertTimePoint(arg);
+ const int64_t orig_value = (t - floor(t)).count();
+ const T scaled = static_cast(orig_value / factor_);
+ const int64_t unscaled = static_cast(scaled) * factor_;
+ if (unscaled != orig_value) {
+ *st = Status::Invalid("Cast would lose data: ", orig_value);
+ return 0;
+ }
+ return scaled;
+ }
+
+ Localizer localizer_;
+ const int64_t factor_;
+};
- auto conversion = util::GetTimestampConversion(in_type.unit(), TimeUnit::MILLI);
- RETURN_NOT_OK((ShiftTime(ctx, conversion.first, conversion.second,
- input, output)));
+template
+struct ExtractTimeUpscaledUnchecked {
+ ExtractTimeUpscaledUnchecked(const FunctionOptions* options, Localizer&& localizer,
+ const int64_t factor)
+ : localizer_(std::move(localizer)), factor_(factor) {}
+
+ template
+ T Call(KernelContext*, Arg0 arg, Status*) const {
+ const auto t = localizer_.template ConvertTimePoint(arg);
+ const int64_t orig_value = (t - floor(t)).count();
+ return static_cast(orig_value * factor_);
+ }
+
+ Localizer localizer_;
+ const int64_t factor_;
+};
- // Ensure that intraday milliseconds have been zeroed out
- auto out_data = output->GetMutableValues(1);
+template
+struct ExtractTimeDownscaledUnchecked {
+ ExtractTimeDownscaledUnchecked(const FunctionOptions* options, Localizer&& localizer,
+ const int64_t factor)
+ : localizer_(std::move(localizer)), factor_(factor) {}
+
+ template
+ T Call(KernelContext*, Arg0 arg, Status*) const {
+ const auto t = localizer_.template ConvertTimePoint(arg);
+ const int64_t orig_value = (t - floor(t)).count();
+ return static_cast(orig_value / factor_);
+ }
- if (input.null_count != 0) {
- BitmapReader bit_reader(input.buffers[0]->data(), input.offset, input.length);
+ Localizer localizer_;
+ const int64_t factor_;
+};
+
+template <>
+struct CastFunctor {
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
+ const auto& in_type = checked_cast(*batch[0].type());
+ const auto& out_type = checked_cast(*out->type());
+ const CastOptions& options = checked_cast(*ctx->state()).options;
- for (int64_t i = 0; i < input.length; ++i) {
- const int64_t remainder = out_data[i] % kMillisecondsInDay;
- if (ARROW_PREDICT_FALSE(!options.allow_time_truncate && bit_reader.IsSet() &&
- remainder > 0)) {
- return Status::Invalid("Timestamp value had non-zero intraday milliseconds");
+ // Shifting before extraction won't work since the timestamp may not fit
+ // even if the time itself fits
+ if (in_type.unit() != out_type.unit()) {
+ auto conversion = util::GetTimestampConversion(in_type.unit(), out_type.unit());
+ if (conversion.first == util::MULTIPLY) {
+ return ExtractTemporal(
+ ctx, batch, out, conversion.second);
+ } else {
+ if (options.allow_time_truncate) {
+ return ExtractTemporal(
+ ctx, batch, out, conversion.second);
+ } else {
+ return ExtractTemporal(ctx, batch, out,
+ conversion.second);
}
- out_data[i] -= remainder;
- bit_reader.Next();
}
- } else {
- for (int64_t i = 0; i < input.length; ++i) {
- const int64_t remainder = out_data[i] % kMillisecondsInDay;
- if (ARROW_PREDICT_FALSE(!options.allow_time_truncate && remainder > 0)) {
- return Status::Invalid("Timestamp value had non-zero intraday milliseconds");
+ }
+ return ExtractTemporal(ctx, batch, out, 1);
+ }
+};
+
+template <>
+struct CastFunctor {
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ DCHECK_EQ(batch[0].kind(), Datum::ARRAY);
+ const auto& in_type = checked_cast(*batch[0].type());
+ const auto& out_type = checked_cast(*out->type());
+ const CastOptions& options = checked_cast(*ctx->state()).options;
+
+ // Shifting before extraction won't work since the timestamp may not fit
+ // even if the time itself fits
+ if (in_type.unit() != out_type.unit()) {
+ auto conversion = util::GetTimestampConversion(in_type.unit(), out_type.unit());
+ if (conversion.first == util::MULTIPLY) {
+ return ExtractTemporal(
+ ctx, batch, out, conversion.second);
+ } else {
+ if (options.allow_time_truncate) {
+ return ExtractTemporal(
+ ctx, batch, out, conversion.second);
+ } else {
+ return ExtractTemporal(ctx, batch, out,
+ conversion.second);
}
- out_data[i] -= remainder;
}
}
-
- return Status::OK();
+ return ExtractTemporal(ctx, batch, out, 1);
}
};
@@ -389,6 +519,10 @@ std::shared_ptr GetTime32Cast() {
// time32 -> time32
AddCrossUnitCast(func.get());
+ // timestamp -> time32
+ AddSimpleCast(InputType(Type::TIMESTAMP), kOutputTargetType,
+ func.get());
+
return func;
}
@@ -406,6 +540,10 @@ std::shared_ptr GetTime64Cast() {
// Between durations
AddCrossUnitCast(func.get());
+ // timestamp -> time64
+ AddSimpleCast(InputType(Type::TIMESTAMP), kOutputTargetType,
+ func.get());
+
return func;
}
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index 656fa62be2b..399a4a0e5d4 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -45,6 +45,7 @@
#include "arrow/compute/api_vector.h"
#include "arrow/compute/cast.h"
#include "arrow/compute/kernel.h"
+#include "arrow/compute/kernels/codegen_internal.h"
#include "arrow/compute/kernels/test_util.h"
namespace arrow {
@@ -92,8 +93,9 @@ static void CheckCast(std::shared_ptr input, std::shared_ptr expec
static void CheckCastFails(std::shared_ptr input, CastOptions options) {
ASSERT_RAISES(Invalid, Cast(input, options))
- << "\n to_type: " << options.to_type->ToString()
- << "\n input: " << input->ToString();
+ << "\n to_type: " << options.to_type->ToString()
+ << "\n from_type: " << input->type()->ToString()
+ << "\n input: " << input->ToString();
// For the scalars, check that at least one of the input fails (since many
// of the tests contains a mix of passing and failing values). In some
@@ -1100,47 +1102,307 @@ TEST(Cast, TimestampToTimestampMultiplyOverflow) {
options);
}
-TEST(Cast, TimestampToDate) {
- for (auto date : {
- // 2000-01-01, 2000-01-02, null
- ArrayFromJSON(date32(), "[10957, 10958, null]"),
- ArrayFromJSON(date64(), "[946684800000, 946771200000, null]"),
- }) {
- for (auto ts : {
- ArrayFromJSON(timestamp(TimeUnit::SECOND), "[946684800, 946771200, null]"),
- ArrayFromJSON(timestamp(TimeUnit::MILLI),
- "[946684800000, 946771200000, null]"),
- ArrayFromJSON(timestamp(TimeUnit::MICRO),
- "[946684800000000, 946771200000000, null]"),
- ArrayFromJSON(timestamp(TimeUnit::NANO),
- "[946684800000000000, 946771200000000000, null]"),
- }) {
- CheckCast(ts, date);
- }
+constexpr char kTimestampJson[] =
+ R"(["1970-01-01T00:00:59.123456789","2000-02-29T23:23:23.999999999",
+ "1899-01-01T00:59:20.001001001","2033-05-18T03:33:20.000000000",
+ "2020-01-01T01:05:05.001", "2019-12-31T02:10:10.002",
+ "2019-12-30T03:15:15.003", "2009-12-31T04:20:20.004132",
+ "2010-01-01T05:25:25.005321", "2010-01-03T06:30:30.006163",
+ "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45",
+ "2008-12-28", "2008-12-29", "2012-01-01 01:02:03", null])";
+constexpr char kTimestampSecondsJson[] =
+ R"(["1970-01-01T00:00:59","2000-02-29T23:23:23",
+ "1899-01-01T00:59:20","2033-05-18T03:33:20",
+ "2020-01-01T01:05:05", "2019-12-31T02:10:10",
+ "2019-12-30T03:15:15", "2009-12-31T04:20:20",
+ "2010-01-01T05:25:25", "2010-01-03T06:30:30",
+ "2010-01-04T07:35:35", "2006-01-01T08:40:40",
+ "2005-12-31T09:45:45", "2008-12-28", "2008-12-29",
+ "2012-01-01 01:02:03", null])";
+constexpr char kTimestampExtremeJson[] =
+ R"(["1677-09-20T00:00:59.123456", "2262-04-13T23:23:23.999999"])";
- for (auto ts : {
- ArrayFromJSON(timestamp(TimeUnit::SECOND), "[946684801, 946771201, null]"),
- ArrayFromJSON(timestamp(TimeUnit::MILLI),
- "[946684800001, 946771200001, null]"),
- ArrayFromJSON(timestamp(TimeUnit::MICRO),
- "[946684800000001, 946771200000001, null]"),
- ArrayFromJSON(timestamp(TimeUnit::NANO),
- "[946684800000000001, 946771200000000001, null]"),
- }) {
- auto options = CastOptions::Safe(date->type());
- CheckCastFails(ts, options);
+TEST(Cast, TimestampToDate) {
+ // See scalar_temporal_test.cc
+ auto timestamps = ArrayFromJSON(timestamp(TimeUnit::NANO), kTimestampJson);
+ auto date_32 = ArrayFromJSON(date32(),
+ R"([
+ 0, 11016, -25932, 23148,
+ 18262, 18261, 18260, 14609,
+ 14610, 14612, 14613, 13149,
+ 13148, 14241, 14242, 15340, null
+ ])");
+ auto date_64 = ArrayFromJSON(date64(),
+ R"([
+ 0, 951782400000, -2240524800000, 1999987200000,
+ 1577836800000, 1577750400000, 1577664000000, 1262217600000,
+ 1262304000000, 1262476800000, 1262563200000, 1136073600000,
+ 1135987200000, 1230422400000, 1230508800000, 1325376000000, null
+ ])");
+ // See TestOutsideNanosecondRange in scalar_temporal_test.cc
+ auto timestamps_extreme =
+ ArrayFromJSON(timestamp(TimeUnit::MICRO),
+ R"(["1677-09-20T00:00:59.123456", "2262-04-13T23:23:23.999999"])");
+ auto date_32_extreme = ArrayFromJSON(date32(), "[-106753, 106753]");
+ auto date_64_extreme = ArrayFromJSON(date64(), "[-9223459200000, 9223459200000]");
+
+ CheckCast(timestamps, date_32);
+ CheckCast(timestamps, date_64);
+ CheckCast(timestamps_extreme, date_32_extreme);
+ CheckCast(timestamps_extreme, date_64_extreme);
+ for (auto u : TimeUnit::values()) {
+ auto unit = timestamp(u);
+ CheckCast(ArrayFromJSON(unit, kTimestampSecondsJson), date_32);
+ CheckCast(ArrayFromJSON(unit, kTimestampSecondsJson), date_64);
+ }
+}
+
+TEST(Cast, ZonedTimestampToDate) {
+#ifdef _WIN32
+ // TODO(ARROW-13168): we lack tzdb on Windows
+ GTEST_SKIP() << "ARROW-13168: no access to timezone database on Windows";
+#endif
- options.allow_time_truncate = true;
- CheckCast(ts, date, options);
+ {
+ // See TestZoned in scalar_temporal_test.cc
+ auto timestamps =
+ ArrayFromJSON(timestamp(TimeUnit::NANO, "Pacific/Marquesas"), kTimestampJson);
+ auto date_32 = ArrayFromJSON(date32(),
+ R"([
+ -1, 11016, -25933, 23147,
+ 18261, 18260, 18259, 14608,
+ 14609, 14611, 14612, 13148,
+ 13148, 14240, 14241, 15339, null
+ ])");
+ auto date_64 = ArrayFromJSON(date64(), R"([
+ -86400000, 951782400000, -2240611200000, 1999900800000,
+ 1577750400000, 1577664000000, 1577577600000, 1262131200000,
+ 1262217600000, 1262390400000, 1262476800000, 1135987200000,
+ 1135987200000, 1230336000000, 1230422400000, 1325289600000, null
+ ])");
+ CheckCast(timestamps, date_32);
+ CheckCast(timestamps, date_64);
+ }
+
+ auto date_32 = ArrayFromJSON(date32(), R"([
+ 0, 11017, -25932, 23148,
+ 18262, 18261, 18260, 14609,
+ 14610, 14612, 14613, 13149,
+ 13148, 14241, 14242, 15340, null
+ ])");
+ auto date_64 = ArrayFromJSON(date64(), R"([
+ 0, 951868800000, -2240524800000, 1999987200000, 1577836800000,
+ 1577750400000, 1577664000000, 1262217600000, 1262304000000,
+ 1262476800000, 1262563200000, 1136073600000, 1135987200000,
+ 1230422400000, 1230508800000, 1325376000000, null
+ ])");
+
+ for (auto u : TimeUnit::values()) {
+ auto timestamps =
+ ArrayFromJSON(timestamp(u, "Australia/Broken_Hill"), kTimestampSecondsJson);
+ CheckCast(timestamps, date_32);
+ CheckCast(timestamps, date_64);
+ }
+
+ // Invalid timezone
+ for (auto u : TimeUnit::values()) {
+ auto timestamps =
+ ArrayFromJSON(timestamp(u, "Mars/Mariner_Valley"), kTimestampSecondsJson);
+ CheckCastFails(timestamps, CastOptions::Unsafe(date32()));
+ CheckCastFails(timestamps, CastOptions::Unsafe(date64()));
+ }
+}
+
+TEST(Cast, TimestampToTime) {
+ // See scalar_temporal_test.cc
+ auto timestamps = ArrayFromJSON(timestamp(TimeUnit::NANO), kTimestampJson);
+ // See TestOutsideNanosecondRange in scalar_temporal_test.cc
+ auto timestamps_extreme =
+ ArrayFromJSON(timestamp(TimeUnit::MICRO), kTimestampExtremeJson);
+ auto timestamps_us = ArrayFromJSON(timestamp(TimeUnit::MICRO), R"([
+ "1970-01-01T00:00:59.123456","2000-02-29T23:23:23.999999",
+ "1899-01-01T00:59:20.001001","2033-05-18T03:33:20.000000",
+ "2020-01-01T01:05:05.001", "2019-12-31T02:10:10.002",
+ "2019-12-30T03:15:15.003", "2009-12-31T04:20:20.004132",
+ "2010-01-01T05:25:25.005321", "2010-01-03T06:30:30.006163",
+ "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45",
+ "2008-12-28", "2008-12-29", "2012-01-01 01:02:03", null])");
+ auto timestamps_ms = ArrayFromJSON(timestamp(TimeUnit::MILLI), R"([
+ "1970-01-01T00:00:59.123","2000-02-29T23:23:23.999",
+ "1899-01-01T00:59:20.001","2033-05-18T03:33:20.000",
+ "2020-01-01T01:05:05.001", "2019-12-31T02:10:10.002",
+ "2019-12-30T03:15:15.003", "2009-12-31T04:20:20.004",
+ "2010-01-01T05:25:25.005", "2010-01-03T06:30:30.006",
+ "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45",
+ "2008-12-28", "2008-12-29", "2012-01-01 01:02:03", null])");
+ auto timestamps_s = ArrayFromJSON(timestamp(TimeUnit::SECOND), kTimestampSecondsJson);
+
+ auto times = ArrayFromJSON(time64(TimeUnit::NANO), R"([
+ 59123456789, 84203999999999, 3560001001001, 12800000000000,
+ 3905001000000, 7810002000000, 11715003000000, 15620004132000,
+ 19525005321000, 23430006163000, 27335000000000, 31240000000000,
+ 35145000000000, 0, 0, 3723000000000, null
+ ])");
+ auto times_ns_us = ArrayFromJSON(time64(TimeUnit::MICRO), R"([
+ 59123456, 84203999999, 3560001001, 12800000000,
+ 3905001000, 7810002000, 11715003000, 15620004132,
+ 19525005321, 23430006163, 27335000000, 31240000000,
+ 35145000000, 0, 0, 3723000000, null
+ ])");
+ auto times_ns_ms = ArrayFromJSON(time32(TimeUnit::MILLI), R"([
+ 59123, 84203999, 3560001, 12800000,
+ 3905001, 7810002, 11715003, 15620004,
+ 19525005, 23430006, 27335000, 31240000,
+ 35145000, 0, 0, 3723000, null
+ ])");
+ auto times_us_ns = ArrayFromJSON(time64(TimeUnit::NANO), R"([
+ 59123456000, 84203999999000, 3560001001000, 12800000000000,
+ 3905001000000, 7810002000000, 11715003000000, 15620004132000,
+ 19525005321000, 23430006163000, 27335000000000, 31240000000000,
+ 35145000000000, 0, 0, 3723000000000, null
+ ])");
+ auto times_ms_ns = ArrayFromJSON(time64(TimeUnit::NANO), R"([
+ 59123000000, 84203999000000, 3560001000000, 12800000000000,
+ 3905001000000, 7810002000000, 11715003000000, 15620004000000,
+ 19525005000000, 23430006000000, 27335000000000, 31240000000000,
+ 35145000000000, 0, 0, 3723000000000, null
+ ])");
+ auto times_ms_us = ArrayFromJSON(time64(TimeUnit::MICRO), R"([
+ 59123000, 84203999000, 3560001000, 12800000000,
+ 3905001000, 7810002000, 11715003000, 15620004000,
+ 19525005000, 23430006000, 27335000000, 31240000000,
+ 35145000000, 0, 0, 3723000000, null
+ ])");
+
+ auto times_extreme = ArrayFromJSON(time64(TimeUnit::MICRO), "[59123456, 84203999999]");
+ auto times_s = ArrayFromJSON(time32(TimeUnit::SECOND), R"([
+ 59, 84203, 3560, 12800,
+ 3905, 7810, 11715, 15620,
+ 19525, 23430, 27335, 31240,
+ 35145, 0, 0, 3723, null
+ ])");
+ auto times_ms = ArrayFromJSON(time32(TimeUnit::MILLI), R"([
+ 59000, 84203000, 3560000, 12800000,
+ 3905000, 7810000, 11715000, 15620000,
+ 19525000, 23430000, 27335000, 31240000,
+ 35145000, 0, 0, 3723000, null
+ ])");
+ auto times_us = ArrayFromJSON(time64(TimeUnit::MICRO), R"([
+ 59000000, 84203000000, 3560000000, 12800000000,
+ 3905000000, 7810000000, 11715000000, 15620000000,
+ 19525000000, 23430000000, 27335000000, 31240000000,
+ 35145000000, 0, 0, 3723000000, null
+ ])");
+ auto times_ns = ArrayFromJSON(time64(TimeUnit::NANO), R"([
+ 59000000000, 84203000000000, 3560000000000, 12800000000000,
+ 3905000000000, 7810000000000, 11715000000000, 15620000000000,
+ 19525000000000, 23430000000000, 27335000000000, 31240000000000,
+ 35145000000000, 0, 0, 3723000000000, null
+ ])");
+
+ CheckCast(timestamps, times);
+ CheckCastFails(timestamps, CastOptions::Safe(time64(TimeUnit::MICRO)));
+ CheckCast(timestamps_extreme, times_extreme);
+ CheckCast(ArrayFromJSON(timestamp(TimeUnit::SECOND), kTimestampSecondsJson), times_s);
+ CheckCast(ArrayFromJSON(timestamp(TimeUnit::SECOND), kTimestampSecondsJson), times_ms);
+ CheckCast(ArrayFromJSON(timestamp(TimeUnit::MILLI), kTimestampSecondsJson), times_s);
+ CheckCast(ArrayFromJSON(timestamp(TimeUnit::MILLI), kTimestampSecondsJson), times_ms);
+ CheckCast(ArrayFromJSON(timestamp(TimeUnit::MICRO), kTimestampSecondsJson), times_us);
+ CheckCast(ArrayFromJSON(timestamp(TimeUnit::MICRO), kTimestampSecondsJson), times_ns);
+ CheckCast(ArrayFromJSON(timestamp(TimeUnit::MICRO), kTimestampSecondsJson), times_ms);
+ CheckCast(ArrayFromJSON(timestamp(TimeUnit::MICRO), kTimestampSecondsJson), times_s);
+ CheckCast(ArrayFromJSON(timestamp(TimeUnit::NANO), kTimestampSecondsJson), times_ns);
+ CheckCast(ArrayFromJSON(timestamp(TimeUnit::NANO), kTimestampSecondsJson), times_us);
+ CheckCast(ArrayFromJSON(timestamp(TimeUnit::NANO), kTimestampSecondsJson), times_ms);
+ CheckCast(ArrayFromJSON(timestamp(TimeUnit::NANO), kTimestampSecondsJson), times_s);
+
+ CastOptions truncate = CastOptions::Safe();
+ truncate.allow_time_truncate = true;
+
+ // Truncation tests
+ CheckCastFails(timestamps, CastOptions::Safe(time64(TimeUnit::MICRO)));
+ CheckCastFails(timestamps, CastOptions::Safe(time32(TimeUnit::MILLI)));
+ CheckCastFails(timestamps, CastOptions::Safe(time32(TimeUnit::SECOND)));
+ CheckCastFails(timestamps_us, CastOptions::Safe(time32(TimeUnit::MILLI)));
+ CheckCastFails(timestamps_us, CastOptions::Safe(time32(TimeUnit::SECOND)));
+ CheckCastFails(timestamps_ms, CastOptions::Safe(time32(TimeUnit::SECOND)));
+ CheckCast(timestamps, times_ns_us, truncate);
+ CheckCast(timestamps, times_ns_ms, truncate);
+ CheckCast(timestamps, times_s, truncate);
+ CheckCast(timestamps_us, times_ns_ms, truncate);
+ CheckCast(timestamps_us, times_s, truncate);
+ CheckCast(timestamps_ms, times_s, truncate);
+
+ // Upscaling tests
+ CheckCast(timestamps_us, times_us_ns);
+ CheckCast(timestamps_ms, times_ms_ns);
+ CheckCast(timestamps_ms, times_ms_us);
+ CheckCast(timestamps_s, times_ns);
+ CheckCast(timestamps_s, times_us);
+ CheckCast(timestamps_s, times_ms);
+
+ // Invalid timezone
+ for (auto u : TimeUnit::values()) {
+ auto timestamps =
+ ArrayFromJSON(timestamp(u, "Mars/Mariner_Valley"), kTimestampSecondsJson);
+ if (u == TimeUnit::SECOND || u == TimeUnit::MILLI) {
+ CheckCastFails(timestamps, CastOptions::Unsafe(time32(u)));
+ } else {
+ CheckCastFails(timestamps, CastOptions::Unsafe(time64(u)));
}
+ }
+}
- auto options = CastOptions::Safe(date->type());
- auto ts = ArrayFromJSON(timestamp(TimeUnit::SECOND), "[946684800, 946771200, 1]");
- CheckCastFails(ts, options);
+TEST(Cast, ZonedTimestampToTime) {
+#ifdef _WIN32
+ // TODO(ARROW-13168): we lack tzdb on Windows
+ GTEST_SKIP() << "ARROW-13168: no access to timezone database on Windows";
+#endif
- // Make sure that nulls are excluded from the truncation checks
- CheckCast(MaskArrayWithNullsAt(ts, {2}), date);
- }
+ CheckCast(ArrayFromJSON(timestamp(TimeUnit::NANO, "Pacific/Marquesas"), kTimestampJson),
+ ArrayFromJSON(time64(TimeUnit::NANO), R"([
+ 52259123456789, 50003999999999, 56480001001001, 65000000000000,
+ 56105001000000, 60010002000000, 63915003000000, 67820004132000,
+ 71725005321000, 75630006163000, 79535000000000, 83440000000000,
+ 945000000000, 52200000000000, 52200000000000, 55923000000000, null
+ ])"));
+
+ auto time_s = R"([
+ 34259, 35603, 35960, 47000,
+ 41705, 45610, 49515, 53420,
+ 57325, 61230, 65135, 69040,
+ 72945, 37800, 37800, 41523, null
+ ])";
+ auto time_ms = R"([
+ 34259000, 35603000, 35960000, 47000000,
+ 41705000, 45610000, 49515000, 53420000,
+ 57325000, 61230000, 65135000, 69040000,
+ 72945000, 37800000, 37800000, 41523000, null
+ ])";
+ auto time_us = R"([
+ 34259000000, 35603000000, 35960000000, 47000000000,
+ 41705000000, 45610000000, 49515000000, 53420000000,
+ 57325000000, 61230000000, 65135000000, 69040000000,
+ 72945000000, 37800000000, 37800000000, 41523000000, null
+ ])";
+ auto time_ns = R"([
+ 34259000000000, 35603000000000, 35960000000000, 47000000000000,
+ 41705000000000, 45610000000000, 49515000000000, 53420000000000,
+ 57325000000000, 61230000000000, 65135000000000, 69040000000000,
+ 72945000000000, 37800000000000, 37800000000000, 41523000000000, null
+ ])";
+ CheckCast(ArrayFromJSON(timestamp(TimeUnit::SECOND, "Australia/Broken_Hill"),
+ kTimestampSecondsJson),
+ ArrayFromJSON(time32(TimeUnit::SECOND), time_s));
+ CheckCast(ArrayFromJSON(timestamp(TimeUnit::MILLI, "Australia/Broken_Hill"),
+ kTimestampSecondsJson),
+ ArrayFromJSON(time32(TimeUnit::MILLI), time_ms));
+ CheckCast(ArrayFromJSON(timestamp(TimeUnit::MICRO, "Australia/Broken_Hill"),
+ kTimestampSecondsJson),
+ ArrayFromJSON(time64(TimeUnit::MICRO), time_us));
+ CheckCast(ArrayFromJSON(timestamp(TimeUnit::NANO, "Australia/Broken_Hill"),
+ kTimestampSecondsJson),
+ ArrayFromJSON(time64(TimeUnit::NANO), time_ns));
}
TEST(Cast, TimeToTime) {
@@ -1490,14 +1752,14 @@ TEST(Cast, StringToInt) {
ArrayFromJSON(signed_type, "[0, null, 127, -1, 0, 0, 127]"));
}
- CheckCast(ArrayFromJSON(string_type, R"(["2147483647", null, "-2147483648", "0",
+ CheckCast(ArrayFromJSON(string_type, R"(["2147483647", null, "-2147483648", "0",
"0X0", "0x7FFFFFFF", "0XFFFFfFfF", "0Xf0000000"])"),
ArrayFromJSON(
int32(),
"[2147483647, null, -2147483648, 0, 0, 2147483647, -1, -268435456]"));
CheckCast(ArrayFromJSON(string_type,
- R"(["9223372036854775807", null, "-9223372036854775808", "0",
+ R"(["9223372036854775807", null, "-9223372036854775808", "0",
"0x0", "0x7FFFFFFFFFFFFFFf", "0XF000000000000001"])"),
ArrayFromJSON(int64(),
"[9223372036854775807, null, -9223372036854775808, 0, 0, "
@@ -1510,13 +1772,13 @@ TEST(Cast, StringToInt) {
}
CheckCast(
- ArrayFromJSON(string_type, R"(["2147483647", null, "4294967295", "0",
+ ArrayFromJSON(string_type, R"(["2147483647", null, "4294967295", "0",
"0x0", "0x7FFFFFFf", "0xFFFFFFFF"])"),
ArrayFromJSON(uint32(),
"[2147483647, null, 4294967295, 0, 0, 2147483647, 4294967295]"));
CheckCast(ArrayFromJSON(string_type,
- R"(["9223372036854775807", null, "18446744073709551615", "0",
+ R"(["9223372036854775807", null, "18446744073709551615", "0",
"0x0", "0x7FFFFFFFFFFFFFFf", "0xfFFFFFFFFFFFFFFf"])"),
ArrayFromJSON(uint64(),
"[9223372036854775807, null, 18446744073709551615, 0, 0, "
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_temporal.cc
index 170c58f67ab..0ced435345f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_temporal.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal.cc
@@ -22,6 +22,7 @@
#include "arrow/builder.h"
#include "arrow/compute/api_scalar.h"
#include "arrow/compute/kernels/common.h"
+#include "arrow/compute/kernels/temporal_internal.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/time.h"
#include "arrow/vendored/datetime.h"
@@ -69,41 +70,6 @@ const std::shared_ptr& IsoCalendarType() {
return type;
}
-const std::string& GetInputTimezone(const DataType& type) {
- return checked_cast(type).timezone();
-}
-
-const std::string& GetInputTimezone(const Datum& datum) {
- return checked_cast(*datum.type()).timezone();
-}
-
-const std::string& GetInputTimezone(const Scalar& scalar) {
- return checked_cast(*scalar.type).timezone();
-}
-
-const std::string& GetInputTimezone(const ArrayData& array) {
- return checked_cast(*array.type).timezone();
-}
-
-template
-enable_if_timestamp GetInputTimezone(const DataType& type) {
- return GetInputTimezone(type);
-}
-
-template
-enable_if_t::value || is_date_type::value, const std::string>
-GetInputTimezone(const DataType& type) {
- return "";
-}
-
-Result LocateZone(const std::string& timezone) {
- try {
- return locate_zone(timezone);
- } catch (const std::runtime_error& ex) {
- return Status::Invalid("Cannot locate timezone '", timezone, "': ", ex.what());
- }
-}
-
Result GetLocale(const std::string& locale) {
try {
return std::locale(locale.c_str());
@@ -112,144 +78,6 @@ Result GetLocale(const std::string& locale) {
}
}
-struct NonZonedLocalizer {
- // No-op conversions: UTC -> UTC
- template
- sys_time ConvertTimePoint(int64_t t) const {
- return sys_time(Duration{t});
- }
-
- sys_days ConvertDays(sys_days d) const { return d; }
-};
-
-struct ZonedLocalizer {
- // Timezone-localizing conversions: UTC -> local time
- const time_zone* tz;
-
- template
- local_time ConvertTimePoint(int64_t t) const {
- return tz->to_local(sys_time(Duration{t}));
- }
-
- local_days ConvertDays(sys_days d) const { return local_days(year_month_day(d)); }
-};
-
-//
-// Executor class for temporal component extractors, i.e. scalar kernels
-// with the signature temporal type ->
-//
-// The `Op` parameter is templated on the Duration (which depends on the timestamp
-// unit) and a Localizer class (depending on whether the timestamp has a
-// timezone defined).
-//
-template class Op, typename Duration, typename InType,
- typename OutType>
-struct TemporalComponentExtractBase {
- template
- static Status ExecWithOptions(KernelContext* ctx, const OptionsType* options,
- const ExecBatch& batch, Datum* out) {
- const auto& timezone = GetInputTimezone(batch.values[0]);
- if (timezone.empty()) {
- using ExecTemplate = Op;
- auto op = ExecTemplate(options, NonZonedLocalizer());
- applicator::ScalarUnaryNotNullStateful kernel{
- op};
- return kernel.Exec(ctx, batch, out);
- } else {
- ARROW_ASSIGN_OR_RAISE(auto tz, LocateZone(timezone));
- using ExecTemplate = Op;
- auto op = ExecTemplate(options, ZonedLocalizer{tz});
- applicator::ScalarUnaryNotNullStateful kernel{
- op};
- return kernel.Exec(ctx, batch, out);
- }
- }
-};
-
-template class Op, typename OutType>
-struct TemporalComponentExtractBase {
- template
- static Status ExecWithOptions(KernelContext* ctx, const OptionsType* options,
- const ExecBatch& batch, Datum* out) {
- using ExecTemplate = Op;
- auto op = ExecTemplate(options, NonZonedLocalizer());
- applicator::ScalarUnaryNotNullStateful kernel{op};
- return kernel.Exec(ctx, batch, out);
- }
-};
-
-template class Op, typename OutType>
-struct TemporalComponentExtractBase {
- template
- static Status ExecWithOptions(KernelContext* ctx, const OptionsType* options,
- const ExecBatch& batch, Datum* out) {
- using ExecTemplate = Op;
- auto op = ExecTemplate(options, NonZonedLocalizer());
- applicator::ScalarUnaryNotNullStateful kernel{op};
- return kernel.Exec(ctx, batch, out);
- }
-};
-
-template class Op, typename OutType>
-struct TemporalComponentExtractBase {
- template
- static Status ExecWithOptions(KernelContext* ctx, const OptionsType* options,
- const ExecBatch& batch, Datum* out) {
- using ExecTemplate = Op;
- auto op = ExecTemplate(options, NonZonedLocalizer());
- applicator::ScalarUnaryNotNullStateful kernel{op};
- return kernel.Exec(ctx, batch, out);
- }
-};
-
-template class Op, typename OutType>
-struct TemporalComponentExtractBase {
- template
- static Status ExecWithOptions(KernelContext* ctx, const OptionsType* options,
- const ExecBatch& batch, Datum* out) {
- using ExecTemplate = Op;
- auto op = ExecTemplate(options, NonZonedLocalizer());
- applicator::ScalarUnaryNotNullStateful kernel{op};
- return kernel.Exec(ctx, batch, out);
- }
-};
-
-template class Op, typename OutType>
-struct TemporalComponentExtractBase {
- template
- static Status ExecWithOptions(KernelContext* ctx, const OptionsType* options,
- const ExecBatch& batch, Datum* out) {
- using ExecTemplate = Op;
- auto op = ExecTemplate(options, NonZonedLocalizer());
- applicator::ScalarUnaryNotNullStateful kernel{op};
- return kernel.Exec(ctx, batch, out);
- }
-};
-
-template class Op, typename OutType>
-struct TemporalComponentExtractBase {
- template
- static Status ExecWithOptions(KernelContext* ctx, const OptionsType* options,
- const ExecBatch& batch, Datum* out) {
- using ExecTemplate = Op;
- auto op = ExecTemplate(options, NonZonedLocalizer());
- applicator::ScalarUnaryNotNullStateful kernel{op};
- return kernel.Exec(ctx, batch, out);
- }
-};
-
-template class Op, typename Duration, typename InType,
- typename OutType>
-struct TemporalComponentExtract
- : public TemporalComponentExtractBase {
- using Base = TemporalComponentExtractBase;
-
- static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
- const FunctionOptions* options = nullptr;
- return Base::ExecWithOptions(ctx, options, batch, out);
- }
-};
-
template class Op, typename Duration, typename InType,
typename OutType>
struct TemporalComponentExtractDayOfWeek
@@ -899,7 +727,7 @@ enum EnabledTypes : uint8_t { WithDates, WithTimes, WithTimestamps };
template class Op,
template class OpExec, typename Duration,
- typename InType, typename OutType>
+ typename InType, typename OutType, typename... Args>
class ExecTemplate,
typename OutType>
std::shared_ptr MakeTemporal(
diff --git a/cpp/src/arrow/compute/kernels/temporal_internal.h b/cpp/src/arrow/compute/kernels/temporal_internal.h
new file mode 100644
index 00000000000..d0904815605
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/temporal_internal.h
@@ -0,0 +1,214 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include
+#include
+
+#include "arrow/vendored/datetime.h"
+
+namespace arrow {
+
+namespace compute {
+namespace internal {
+
+using arrow_vendored::date::days;
+using arrow_vendored::date::floor;
+using arrow_vendored::date::local_days;
+using arrow_vendored::date::local_time;
+using arrow_vendored::date::locate_zone;
+using arrow_vendored::date::sys_days;
+using arrow_vendored::date::sys_time;
+using arrow_vendored::date::time_zone;
+using arrow_vendored::date::year_month_day;
+
+static inline Result LocateZone(const std::string& timezone) {
+ try {
+ return locate_zone(timezone);
+ } catch (const std::runtime_error& ex) {
+ return Status::Invalid("Cannot locate timezone '", timezone, "': ", ex.what());
+ }
+}
+
+static inline const std::string& GetInputTimezone(const DataType& type) {
+ return checked_cast(type).timezone();
+}
+
+static inline const std::string& GetInputTimezone(const Datum& datum) {
+ return checked_cast(*datum.type()).timezone();
+}
+
+static inline const std::string& GetInputTimezone(const Scalar& scalar) {
+ return checked_cast(*scalar.type).timezone();
+}
+
+static inline const std::string& GetInputTimezone(const ArrayData& array) {
+ return checked_cast(*array.type).timezone();
+}
+
+template
+enable_if_timestamp GetInputTimezone(const DataType& type) {
+ return GetInputTimezone(type);
+}
+
+template
+enable_if_t::value || is_date_type::value, const std::string>
+GetInputTimezone(const DataType& type) {
+ return "";
+}
+
+struct NonZonedLocalizer {
+ // No-op conversions: UTC -> UTC
+ template
+ sys_time ConvertTimePoint(int64_t t) const {
+ return sys_time(Duration{t});
+ }
+
+ sys_days ConvertDays(sys_days d) const { return d; }
+};
+
+struct ZonedLocalizer {
+ // Timezone-localizing conversions: UTC -> local time
+ const time_zone* tz;
+
+ template
+ local_time ConvertTimePoint(int64_t t) const {
+ return tz->to_local(sys_time(Duration{t}));
+ }
+
+ local_days ConvertDays(sys_days d) const { return local_days(year_month_day(d)); }
+};
+
+//
+// Executor class for temporal component extractors, i.e. scalar kernels
+// with the signature Timestamp ->
+//
+// The `Op` parameter is templated on the Duration (which depends on the timestamp
+// unit) and a Localizer class (depending on whether the timestamp has a
+// timezone defined).
+//
+template class Op, typename Duration, typename InType,
+ typename OutType, typename... Args>
+struct TemporalComponentExtractBase {
+ template
+ static Status ExecWithOptions(KernelContext* ctx, const OptionsType* options,
+ const ExecBatch& batch, Datum* out, Args... args) {
+ const auto& timezone = GetInputTimezone(batch.values[0]);
+ if (timezone.empty()) {
+ using ExecTemplate = Op;
+ auto op = ExecTemplate(options, NonZonedLocalizer(), args...);
+ applicator::ScalarUnaryNotNullStateful kernel{op};
+ return kernel.Exec(ctx, batch, out);
+ } else {
+ ARROW_ASSIGN_OR_RAISE(auto tz, LocateZone(timezone));
+ using ExecTemplate = Op;
+ auto op = ExecTemplate(options, ZonedLocalizer{tz}, args...);
+ applicator::ScalarUnaryNotNullStateful kernel{op};
+ return kernel.Exec(ctx, batch, out);
+ }
+ }
+};
+
+template class Op, typename OutType>
+struct TemporalComponentExtractBase {
+ template
+ static Status ExecWithOptions(KernelContext* ctx, const OptionsType* options,
+ const ExecBatch& batch, Datum* out) {
+ using ExecTemplate = Op;
+ auto op = ExecTemplate(options, NonZonedLocalizer());
+ applicator::ScalarUnaryNotNullStateful kernel{op};
+ return kernel.Exec(ctx, batch, out);
+ }
+};
+
+template class Op, typename OutType>
+struct TemporalComponentExtractBase {
+ template
+ static Status ExecWithOptions(KernelContext* ctx, const OptionsType* options,
+ const ExecBatch& batch, Datum* out) {
+ using ExecTemplate = Op;
+ auto op = ExecTemplate(options, NonZonedLocalizer());
+ applicator::ScalarUnaryNotNullStateful kernel{op};
+ return kernel.Exec(ctx, batch, out);
+ }
+};
+
+template class Op, typename OutType>
+struct TemporalComponentExtractBase {
+ template
+ static Status ExecWithOptions(KernelContext* ctx, const OptionsType* options,
+ const ExecBatch& batch, Datum* out) {
+ using ExecTemplate = Op;
+ auto op = ExecTemplate(options, NonZonedLocalizer());
+ applicator::ScalarUnaryNotNullStateful kernel{op};
+ return kernel.Exec(ctx, batch, out);
+ }
+};
+
+template class Op, typename OutType>
+struct TemporalComponentExtractBase {
+ template
+ static Status ExecWithOptions(KernelContext* ctx, const OptionsType* options,
+ const ExecBatch& batch, Datum* out) {
+ using ExecTemplate = Op;
+ auto op = ExecTemplate(options, NonZonedLocalizer());
+ applicator::ScalarUnaryNotNullStateful kernel{op};
+ return kernel.Exec(ctx, batch, out);
+ }
+};
+
+template class Op, typename OutType>
+struct TemporalComponentExtractBase {
+ template
+ static Status ExecWithOptions(KernelContext* ctx, const OptionsType* options,
+ const ExecBatch& batch, Datum* out) {
+ using ExecTemplate = Op;
+ auto op = ExecTemplate(options, NonZonedLocalizer());
+ applicator::ScalarUnaryNotNullStateful kernel{op};
+ return kernel.Exec(ctx, batch, out);
+ }
+};
+
+template class Op, typename OutType>
+struct TemporalComponentExtractBase {
+ template
+ static Status ExecWithOptions(KernelContext* ctx, const OptionsType* options,
+ const ExecBatch& batch, Datum* out) {
+ using ExecTemplate = Op;
+ auto op = ExecTemplate(options, NonZonedLocalizer());
+ applicator::ScalarUnaryNotNullStateful kernel{op};
+ return kernel.Exec(ctx, batch, out);
+ }
+};
+
+template class Op, typename Duration, typename InType,
+ typename OutType, typename... Args>
+struct TemporalComponentExtract
+ : public TemporalComponentExtractBase {
+ using Base = TemporalComponentExtractBase;
+
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out,
+ Args... args) {
+ const FunctionOptions* options = nullptr;
+ return Base::ExecWithOptions(ctx, options, batch, out, args...);
+ }
+};
+
+} // namespace internal
+} // namespace compute
+} // namespace arrow
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index c3dffc836c1..4852e145d36 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -322,6 +322,29 @@ Conversions
cast
strptime
+Temporal component extraction
+-----------------------------
+
+.. autosummary::
+ :toctree: ../generated/
+
+ day
+ day_of_week
+ day_of_year
+ hour
+ iso_calendar
+ iso_week
+ iso_year
+ microsecond
+ millisecond
+ minute
+ month
+ nanosecond
+ quarter
+ second
+ subsecond
+ year
+
Replacements
------------
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index b6557875c2c..f25f161dd73 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -1109,40 +1109,31 @@ def test_datetime64_to_date32(self):
@pytest.mark.parametrize('mask', [
None,
- np.array([True, False, False]),
+ np.array([True, False, False, True, False, False]),
])
def test_pandas_datetime_to_date64(self, mask):
s = pd.to_datetime([
'2018-05-10T00:00:00',
'2018-05-11T00:00:00',
'2018-05-12T00:00:00',
+ '2018-05-10T10:24:01',
+ '2018-05-11T10:24:01',
+ '2018-05-12T10:24:01',
])
arr = pa.Array.from_pandas(s, type=pa.date64(), mask=mask)
data = np.array([
date(2018, 5, 10),
date(2018, 5, 11),
- date(2018, 5, 12)
+ date(2018, 5, 12),
+ date(2018, 5, 10),
+ date(2018, 5, 11),
+ date(2018, 5, 12),
])
expected = pa.array(data, mask=mask, type=pa.date64())
assert arr.equals(expected)
- @pytest.mark.parametrize('mask', [
- None,
- np.array([True, False, False])
- ])
- def test_pandas_datetime_to_date64_failures(self, mask):
- s = pd.to_datetime([
- '2018-05-10T10:24:01',
- '2018-05-11T10:24:01',
- '2018-05-12T10:24:01',
- ])
-
- expected_msg = 'Timestamp value had non-zero intraday milliseconds'
- with pytest.raises(pa.ArrowInvalid, match=expected_msg):
- pa.Array.from_pandas(s, type=pa.date64(), mask=mask)
-
def test_array_types_date_as_object(self):
data = [date(2000, 1, 1),
None,
From 0424f190d02b6b4caf8f2b28323d8f296b9c33c3 Mon Sep 17 00:00:00 2001
From: David Li
Date: Mon, 27 Sep 2021 10:21:14 -0400
Subject: [PATCH 2/2] ARROW-13549: [C++] Consolidate some overloads
---
.../arrow/compute/kernels/scalar_temporal.cc | 2 +-
.../arrow/compute/kernels/temporal_internal.h | 19 +++++++------------
2 files changed, 8 insertions(+), 13 deletions(-)
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_temporal.cc
index 0ced435345f..3df9b709887 100644
--- a/cpp/src/arrow/compute/kernels/scalar_temporal.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal.cc
@@ -420,7 +420,7 @@ struct Strftime {
if ((options.format.find("%c") != std::string::npos) && (options.locale != "C")) {
return Status::Invalid("%c flag is not supported in non-C locales.");
}
- auto timezone = GetInputTimezone(type);
+ auto timezone = GetInputTimezone(type);
if (timezone.empty()) {
if ((options.format.find("%z") != std::string::npos) ||
diff --git a/cpp/src/arrow/compute/kernels/temporal_internal.h b/cpp/src/arrow/compute/kernels/temporal_internal.h
index d0904815605..06dce454818 100644
--- a/cpp/src/arrow/compute/kernels/temporal_internal.h
+++ b/cpp/src/arrow/compute/kernels/temporal_internal.h
@@ -46,7 +46,13 @@ static inline Result LocateZone(const std::string& timezone) {
}
static inline const std::string& GetInputTimezone(const DataType& type) {
- return checked_cast(type).timezone();
+ static const std::string no_timezone = "";
+ switch (type.id()) {
+ case Type::TIMESTAMP:
+ return checked_cast(type).timezone();
+ default:
+ return no_timezone;
+ }
}
static inline const std::string& GetInputTimezone(const Datum& datum) {
@@ -61,17 +67,6 @@ static inline const std::string& GetInputTimezone(const ArrayData& array) {
return checked_cast(*array.type).timezone();
}
-template
-enable_if_timestamp GetInputTimezone(const DataType& type) {
- return GetInputTimezone(type);
-}
-
-template
-enable_if_t::value || is_date_type::value, const std::string>
-GetInputTimezone(const DataType& type) {
- return "";
-}
-
struct NonZonedLocalizer {
// No-op conversions: UTC -> UTC
template