From 55ca5a91be1538614314a9910eccc2bbe0d90932 Mon Sep 17 00:00:00 2001
From: David Li
Date: Tue, 5 Oct 2021 13:40:16 -0400
Subject: [PATCH 1/4] ARROW-14231: [C++] Support casting timestamp with
timezone to string
---
.../compute/kernels/scalar_cast_string.cc | 81 ++++++++++++++++++-
.../arrow/compute/kernels/scalar_cast_test.cc | 56 +++++++++++++
.../compute/kernels/scalar_temporal_unary.cc | 39 ++-------
.../arrow/compute/kernels/temporal_internal.h | 39 ++++++++-
cpp/src/arrow/csv/writer_test.cc | 34 +++++---
5 files changed, 204 insertions(+), 45 deletions(-)
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
index eb2f9043955..2e0e11d32dc 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
@@ -21,6 +21,7 @@
#include "arrow/array/builder_binary.h"
#include "arrow/compute/kernels/common.h"
#include "arrow/compute/kernels/scalar_cast_internal.h"
+#include "arrow/compute/kernels/temporal_internal.h"
#include "arrow/result.h"
#include "arrow/util/formatting.h"
#include "arrow/util/int_util.h"
@@ -105,6 +106,84 @@ struct TemporalToStringCastFunctor {
}
};
+template
+struct TemporalToStringCastFunctor {
+ using value_type = typename TypeTraits::CType;
+ using BuilderType = typename TypeTraits::BuilderType;
+ using FormatterType = StringFormatter;
+
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ DCHECK(out->is_array());
+ const ArrayData& input = *batch[0].array();
+ ArrayData* output = out->mutable_array();
+ return Convert(ctx, input, output);
+ }
+
+ static Status Convert(KernelContext* ctx, const ArrayData& input, ArrayData* output) {
+ const auto& timezone = GetInputTimezone(*input.type);
+ BuilderType builder(input.type, ctx->memory_pool());
+
+ if (timezone.empty() || timezone == "UTC") {
+ FormatterType formatter(input.type);
+ RETURN_NOT_OK(VisitArrayDataInline(
+ input,
+ [&](value_type v) {
+ return formatter(v, [&](util::string_view v) { return builder.Append(v); });
+ },
+ [&]() { return builder.AppendNull(); }));
+ } else {
+#ifdef _WIN32
+ // TODO(ARROW-13168):
+ return Status::NotImplemented(
+ "Casting a timestamp with time zone to string is not yet supported on "
+ "Windows.");
+#else
+ switch (checked_cast(*input.type).unit()) {
+ case TimeUnit::SECOND:
+ RETURN_NOT_OK(ConvertZoned(input, timezone, &builder));
+ break;
+ case TimeUnit::MILLI:
+ RETURN_NOT_OK(
+ ConvertZoned(input, timezone, &builder));
+ break;
+ case TimeUnit::MICRO:
+ RETURN_NOT_OK(
+ ConvertZoned(input, timezone, &builder));
+ break;
+ case TimeUnit::NANO:
+ RETURN_NOT_OK(
+ ConvertZoned(input, timezone, &builder));
+ break;
+ default:
+ DCHECK(false);
+ return Status::NotImplemented("Unimplemented time unit");
+ }
+#endif
+ }
+ std::shared_ptr output_array;
+ RETURN_NOT_OK(builder.Finish(&output_array));
+ *output = std::move(*output_array->data());
+ return Status::OK();
+ }
+
+ template
+ static Status ConvertZoned(const ArrayData& input, const std::string& timezone,
+ BuilderType* builder) {
+ static std::string kFormatString = "%Y-%m-%d %H:%M:%S%z";
+ ARROW_ASSIGN_OR_RAISE(const time_zone* tz,
+ LocateZone(timezone.empty() ? "UTC" : timezone));
+ ARROW_ASSIGN_OR_RAISE(std::locale locale, GetLocale("C"));
+ TimestampFormatter formatter{kFormatString, tz, locale};
+ return VisitArrayDataInline(
+ input,
+ [&](value_type v) {
+ ARROW_ASSIGN_OR_RAISE(auto formatted, formatter(v));
+ return builder->Append(std::move(formatted));
+ },
+ [&]() { return builder->AppendNull(); });
+ }
+};
+
// ----------------------------------------------------------------------
// Binary-like to binary-like
//
@@ -304,7 +383,7 @@ void AddTemporalToStringCasts(CastFunction* func) {
auto out_ty = TypeTraits::type_singleton();
for (const std::shared_ptr& in_ty : TemporalTypes()) {
DCHECK_OK(func->AddKernel(
- in_ty->id(), {in_ty}, out_ty,
+ in_ty->id(), {InputType(in_ty->id())}, out_ty,
TrivialScalarUnaryAsArraysExec(
GenerateTemporal(*in_ty)),
NullHandling::COMPUTED_NO_PREALLOCATE));
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index 5d516677669..afb64ddb043 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -1553,8 +1553,64 @@ TEST(Cast, TimestampToString) {
CheckCast(
ArrayFromJSON(timestamp(TimeUnit::SECOND), "[-30610224000, -5364662400]"),
ArrayFromJSON(string_type, R"(["1000-01-01 00:00:00", "1800-01-01 00:00:00"])"));
+
+ CheckCast(
+ ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC"), "[-30610224000, -5364662400]"),
+ ArrayFromJSON(string_type, R"(["1000-01-01 00:00:00", "1800-01-01 00:00:00"])"));
+
+ CheckCast(ArrayFromJSON(timestamp(TimeUnit::MILLI, "UTC"),
+ "[-30610224000000, -5364662400000]"),
+ ArrayFromJSON(string_type,
+ R"(["1000-01-01 00:00:00.000", "1800-01-01 00:00:00.000"])"));
+ }
+}
+
+#ifndef _WIN32
+TEST(Cast, TimestampWithZoneToString) {
+ for (auto string_type : {utf8(), large_utf8()}) {
+ CheckCast(
+ ArrayFromJSON(timestamp(TimeUnit::SECOND, "America/Phoenix"),
+ "[-34226955, 1456767743]"),
+ ArrayFromJSON(string_type,
+ R"(["1968-11-30 13:30:45-0700", "2016-02-29 10:42:23-0700"])"));
+ CheckCast(ArrayFromJSON(timestamp(TimeUnit::MILLI, "America/Phoenix"),
+ "[-34226955877, 1456767743456]"),
+ ArrayFromJSON(
+ string_type,
+ R"(["1968-11-30 13:30:44.123-0700", "2016-02-29 10:42:23.456-0700"])"));
+ CheckCast(
+ ArrayFromJSON(timestamp(TimeUnit::MICRO, "America/Phoenix"),
+ "[-34226955877000, 1456767743456789]"),
+ ArrayFromJSON(
+ string_type,
+ R"(["1968-11-30 13:30:44.123000-0700", "2016-02-29 10:42:23.456789-0700"])"));
+ CheckCast(
+ ArrayFromJSON(timestamp(TimeUnit::NANO, "America/Phoenix"),
+ "[-34226955876543211, 1456767743456789246]"),
+ ArrayFromJSON(
+ string_type,
+ R"(["1968-11-30 13:30:44.123456789-0700", "2016-02-29 10:42:23.456789246-0700"])"));
}
}
+#else
+// TODO(ARROW-13168): we lack tzdb on Windows
+TEST(Cast, TimestampWithZoneToString) {
+ for (auto string_type : {utf8(), large_utf8()}) {
+ CheckCastFails(ArrayFromJSON(timestamp(TimeUnit::SECOND, "America/Phoenix"),
+ "[-34226955, 1456767743]"),
+ CastOptions::Safe(string_type));
+ CheckCastFails(ArrayFromJSON(timestamp(TimeUnit::MILLI, "America/Phoenix"),
+ "[-34226955877, 1456767743456]"),
+ CastOptions::Safe(string_type));
+ CheckCastFails(ArrayFromJSON(timestamp(TimeUnit::MICRO, "America/Phoenix"),
+ "[-34226955877000, 1456767743456789]"),
+ CastOptions::Safe(string_type));
+ CheckCastFails(ArrayFromJSON(timestamp(TimeUnit::NANO, "America/Phoenix"),
+ "[-34226955876543211, 1456767743456789246]"),
+ CastOptions::Safe(string_type));
+ }
+}
+#endif
TEST(Cast, DateToDate) {
auto day_32 = ArrayFromJSON(date32(), "[0, null, 100, 1, 10]");
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
index d29ebca0ca8..d1c5855d2df 100644
--- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
@@ -45,7 +45,6 @@ using arrow_vendored::date::local_time;
using arrow_vendored::date::locate_zone;
using arrow_vendored::date::sys_days;
using arrow_vendored::date::sys_time;
-using arrow_vendored::date::time_zone;
using arrow_vendored::date::trunc;
using arrow_vendored::date::weekday;
using arrow_vendored::date::weeks;
@@ -479,7 +478,7 @@ struct Strftime {
if ((options.format.find("%c") != std::string::npos) && (options.locale != "C")) {
return Status::Invalid("%c flag is not supported in non-C locales.");
}
- auto timezone = GetInputTimezone(type);
+ const auto& timezone = GetInputTimezone(type);
if (timezone.empty()) {
if ((options.format.find("%z") != std::string::npos) ||
@@ -488,10 +487,10 @@ struct Strftime {
"Timezone not present, cannot convert to string with timezone: ",
options.format);
}
- timezone = "UTC";
}
- ARROW_ASSIGN_OR_RAISE(const time_zone* tz, LocateZone(timezone));
+ ARROW_ASSIGN_OR_RAISE(const time_zone* tz,
+ LocateZone(timezone.empty() ? "UTC" : timezone));
ARROW_ASSIGN_OR_RAISE(std::locale locale, GetLocale(options.locale));
@@ -500,7 +499,7 @@ struct Strftime {
static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
ARROW_ASSIGN_OR_RAISE(auto self, Make(ctx, *in.type));
- TimestampFormatter formatter{self.options.format, self.tz, self.locale};
+ TimestampFormatter formatter{self.options.format, self.tz, self.locale};
if (in.is_valid) {
const int64_t in_val = internal::UnboxScalar::Unbox(in);
@@ -514,7 +513,7 @@ struct Strftime {
static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) {
ARROW_ASSIGN_OR_RAISE(auto self, Make(ctx, *in.type));
- TimestampFormatter formatter{self.options.format, self.tz, self.locale};
+ TimestampFormatter formatter{self.options.format, self.tz, self.locale};
StringBuilder string_builder;
// Presize string data using a heuristic
@@ -539,35 +538,9 @@ struct Strftime {
return Status::OK();
}
-
- struct TimestampFormatter {
- const char* format;
- const time_zone* tz;
- std::ostringstream bufstream;
-
- explicit TimestampFormatter(const std::string& format, const time_zone* tz,
- const std::locale& locale)
- : format(format.c_str()), tz(tz) {
- bufstream.imbue(locale);
- // Propagate errors as C++ exceptions (to get an actual error message)
- bufstream.exceptions(std::ios::failbit | std::ios::badbit);
- }
-
- Result operator()(int64_t arg) {
- bufstream.str("");
- const auto zt = zoned_time{tz, sys_time(Duration{arg})};
- try {
- arrow_vendored::date::to_stream(bufstream, format, zt);
- } catch (const std::runtime_error& ex) {
- bufstream.clear();
- return Status::Invalid("Failed formatting timestamp: ", ex.what());
- }
- // XXX could return a view with std::ostringstream::view() (C++20)
- return std::move(bufstream).str();
- }
- };
};
#else
+// TODO(ARROW-13168)
template
struct Strftime {
static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
diff --git a/cpp/src/arrow/compute/kernels/temporal_internal.h b/cpp/src/arrow/compute/kernels/temporal_internal.h
index 45fa67a9b9b..3d2d9c5b9bd 100644
--- a/cpp/src/arrow/compute/kernels/temporal_internal.h
+++ b/cpp/src/arrow/compute/kernels/temporal_internal.h
@@ -37,6 +37,7 @@ using arrow_vendored::date::sys_days;
using arrow_vendored::date::sys_time;
using arrow_vendored::date::time_zone;
using arrow_vendored::date::year_month_day;
+using arrow_vendored::date::zoned_time;
inline int64_t GetQuarter(const year_month_day& ymd) {
return static_cast((static_cast(ymd.month()) - 1) / 3);
@@ -72,7 +73,7 @@ static inline const std::string& GetInputTimezone(const ArrayData& array) {
return checked_cast(*array.type).timezone();
}
-inline Status ValidateDayOfWeekOptions(const DayOfWeekOptions& options) {
+static inline Status ValidateDayOfWeekOptions(const DayOfWeekOptions& options) {
if (options.week_start < 1 || 7 < options.week_start) {
return Status::Invalid(
"week_start must follow ISO convention (Monday=1, Sunday=7). Got week_start=",
@@ -81,6 +82,14 @@ inline Status ValidateDayOfWeekOptions(const DayOfWeekOptions& options) {
return Status::OK();
}
+static inline Result GetLocale(const std::string& locale) {
+ try {
+ return std::locale(locale.c_str());
+ } catch (const std::runtime_error& ex) {
+ return Status::Invalid("Cannot find locale '", locale, "': ", ex.what());
+ }
+}
+
struct NonZonedLocalizer {
using days_t = sys_days;
@@ -107,6 +116,34 @@ struct ZonedLocalizer {
local_days ConvertDays(sys_days d) const { return local_days(year_month_day(d)); }
};
+template
+struct TimestampFormatter {
+ const char* format;
+ const time_zone* tz;
+ std::ostringstream bufstream;
+
+ explicit TimestampFormatter(const std::string& format, const time_zone* tz,
+ const std::locale& locale)
+ : format(format.c_str()), tz(tz) {
+ bufstream.imbue(locale);
+ // Propagate errors as C++ exceptions (to get an actual error message)
+ bufstream.exceptions(std::ios::failbit | std::ios::badbit);
+ }
+
+ Result operator()(int64_t arg) {
+ bufstream.str("");
+ const auto zt = zoned_time{tz, sys_time(Duration{arg})};
+ try {
+ arrow_vendored::date::to_stream(bufstream, format, zt);
+ } catch (const std::runtime_error& ex) {
+ bufstream.clear();
+ return Status::Invalid("Failed formatting timestamp: ", ex.what());
+ }
+ // XXX could return a view with std::ostringstream::view() (C++20)
+ return std::move(bufstream).str();
+ }
+};
+
//
// Which types to generate a kernel for
//
diff --git a/cpp/src/arrow/csv/writer_test.cc b/cpp/src/arrow/csv/writer_test.cc
index 57b42c7f5a7..4fff8eac92f 100644
--- a/cpp/src/arrow/csv/writer_test.cc
+++ b/cpp/src/arrow/csv/writer_test.cc
@@ -59,6 +59,7 @@ std::vector GenerateTestCases() {
{field("c ", int32())},
{field("d", date32())},
{field("e", date64())},
+ {field("f", timestamp(TimeUnit::SECOND))},
});
auto populated_batch = R"([{"a": 1, "c ": -1},
{ "a": 1, "b\"": "abc\"efg", "c ": 2324},
@@ -67,16 +68,18 @@ std::vector GenerateTestCases() {
{ "a": 546, "b\"": "", "c ": 517 },
{ "a": 124, "b\"": "a\"\"b\"" },
{ "d": 0 },
- { "e": 86400000 }])";
- std::string expected_without_header = std::string("1,,-1,,") + "\n" + // line 1
- R"(1,"abc""efg",2324,,)" + "\n" + // line 2
- R"(,"abcd",5467,,)" + "\n" + // line 3
- R"(,,,,)" + "\n" + // line 4
- R"(546,"",517,,)" + "\n" + // line 5
- R"(124,"a""""b""",,,)" + "\n" + // line 6
- R"(,,,1970-01-01,)" + "\n" + // line 7
- R"(,,,,1970-01-02)" + "\n"; // line 8
- std::string expected_header = std::string(R"("a","b""","c ","d","e")") + "\n";
+ { "e": 86400000 },
+ { "f": 1078016523 }])";
+ std::string expected_without_header = std::string("1,,-1,,,") + "\n" + // line 1
+ R"(1,"abc""efg",2324,,,)" + "\n" + // line 2
+ R"(,"abcd",5467,,,)" + "\n" + // line 3
+ R"(,,,,,)" + "\n" + // line 4
+ R"(546,"",517,,,)" + "\n" + // line 5
+ R"(124,"a""""b""",,,,)" + "\n" + // line 6
+ R"(,,,1970-01-01,,)" + "\n" + // line 7
+ R"(,,,,1970-01-02,)" + "\n" + // line 8
+ R"(,,,,,2004-02-29 01:02:03)" + "\n"; // line 9
+ std::string expected_header = std::string(R"("a","b""","c ","d","e","f")") + "\n";
return std::vector{
{abc_schema, "[]", DefaultTestOptions(/*header=*/false), ""},
@@ -155,5 +158,16 @@ INSTANTIATE_TEST_SUITE_P(SingleColumnWriteCSVTest, TestWriteCSV,
R"("int64")"
"\n9999\n\n-15\n"}));
+#ifndef _WIN32
+// TODO(ARROW-13168):
+INSTANTIATE_TEST_SUITE_P(
+ TimestampWithTimezoneWriteCSVTest, TestWriteCSV,
+ ::testing::Values(WriterTestParams{
+ schema({field("tz", timestamp(TimeUnit::SECOND, "America/Phoenix"))}),
+ R"([{ "tz": 1456767743 }])", WriteOptions(),
+ R"("tz")"
+ "\n2016-02-29 10:42:23-0700\n"}));
+#endif
+
} // namespace csv
} // namespace arrow
From d5fe9f22f80bab6b52b3366db782394950c1dd7f Mon Sep 17 00:00:00 2001
From: David Li
Date: Tue, 5 Oct 2021 14:21:38 -0400
Subject: [PATCH 2/4] ARROW-14231: [C++] Fix Windows tests
---
.../arrow/compute/kernels/scalar_cast_test.cc | 16 ++++------------
1 file changed, 4 insertions(+), 12 deletions(-)
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index afb64ddb043..fc8a1fada3b 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -1596,18 +1596,10 @@ TEST(Cast, TimestampWithZoneToString) {
// TODO(ARROW-13168): we lack tzdb on Windows
TEST(Cast, TimestampWithZoneToString) {
for (auto string_type : {utf8(), large_utf8()}) {
- CheckCastFails(ArrayFromJSON(timestamp(TimeUnit::SECOND, "America/Phoenix"),
- "[-34226955, 1456767743]"),
- CastOptions::Safe(string_type));
- CheckCastFails(ArrayFromJSON(timestamp(TimeUnit::MILLI, "America/Phoenix"),
- "[-34226955877, 1456767743456]"),
- CastOptions::Safe(string_type));
- CheckCastFails(ArrayFromJSON(timestamp(TimeUnit::MICRO, "America/Phoenix"),
- "[-34226955877000, 1456767743456789]"),
- CastOptions::Safe(string_type));
- CheckCastFails(ArrayFromJSON(timestamp(TimeUnit::NANO, "America/Phoenix"),
- "[-34226955876543211, 1456767743456789246]"),
- CastOptions::Safe(string_type));
+ ASSERT_RAISES(NotImplemented,
+ Cast(ArrayFromJSON(timestamp(TimeUnit::SECOND, "America/Phoenix"),
+ "[-34226955, 1456767743]"),
+ CastOptions::Safe(string_type)));
}
}
#endif
From 20ad4d322770cd9af697bad1ea309235b48f854a Mon Sep 17 00:00:00 2001
From: David Li
Date: Wed, 6 Oct 2021 09:20:36 -0400
Subject: [PATCH 3/4] ARROW-14231: [C++] Ensure UTC is not serialized to naive
timestamp
---
.../compute/kernels/scalar_cast_string.cc | 29 +++++++++++++---
.../arrow/compute/kernels/scalar_cast_test.cc | 33 +++++++++++++++----
2 files changed, 52 insertions(+), 10 deletions(-)
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
index 2e0e11d32dc..4eff4761347 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
@@ -121,16 +121,34 @@ struct TemporalToStringCastFunctor {
static Status Convert(KernelContext* ctx, const ArrayData& input, ArrayData* output) {
const auto& timezone = GetInputTimezone(*input.type);
+ const auto& ty = checked_cast(*input.type);
BuilderType builder(input.type, ctx->memory_pool());
- if (timezone.empty() || timezone == "UTC") {
+ // Preallocate
+ int64_t string_length = 19; // YYYY-MM-DD HH:MM:SS
+ if (ty.unit() == TimeUnit::MILLI) {
+ string_length += 4; // .SSS
+ } else if (ty.unit() == TimeUnit::MICRO) {
+ string_length += 7; // .SSSSSS
+ } else if (ty.unit() == TimeUnit::NANO) {
+ string_length += 10; // .SSSSSSSSS
+ }
+ if (!timezone.empty()) string_length += 5; // +0000
+ RETURN_NOT_OK(builder.Reserve(input.length));
+ RETURN_NOT_OK(
+ builder.ReserveData((input.length - input.GetNullCount()) * string_length));
+
+ if (timezone.empty()) {
FormatterType formatter(input.type);
RETURN_NOT_OK(VisitArrayDataInline(
input,
[&](value_type v) {
return formatter(v, [&](util::string_view v) { return builder.Append(v); });
},
- [&]() { return builder.AppendNull(); }));
+ [&]() {
+ builder.UnsafeAppendNull();
+ return Status::OK();
+ }));
} else {
#ifdef _WIN32
// TODO(ARROW-13168):
@@ -138,7 +156,7 @@ struct TemporalToStringCastFunctor {
"Casting a timestamp with time zone to string is not yet supported on "
"Windows.");
#else
- switch (checked_cast(*input.type).unit()) {
+ switch (ty.unit()) {
case TimeUnit::SECOND:
RETURN_NOT_OK(ConvertZoned(input, timezone, &builder));
break;
@@ -180,7 +198,10 @@ struct TemporalToStringCastFunctor {
ARROW_ASSIGN_OR_RAISE(auto formatted, formatter(v));
return builder->Append(std::move(formatted));
},
- [&]() { return builder->AppendNull(); });
+ [&]() {
+ builder->UnsafeAppendNull();
+ return Status::OK();
+ });
}
};
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index fc8a1fada3b..954f0166b21 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -1555,35 +1555,52 @@ TEST(Cast, TimestampToString) {
ArrayFromJSON(string_type, R"(["1000-01-01 00:00:00", "1800-01-01 00:00:00"])"));
CheckCast(
- ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC"), "[-30610224000, -5364662400]"),
- ArrayFromJSON(string_type, R"(["1000-01-01 00:00:00", "1800-01-01 00:00:00"])"));
+ ArrayFromJSON(timestamp(TimeUnit::MILLI), "[-30610224000000, -5364662400000]"),
+ ArrayFromJSON(string_type,
+ R"(["1000-01-01 00:00:00.000", "1800-01-01 00:00:00.000"])"));
- CheckCast(ArrayFromJSON(timestamp(TimeUnit::MILLI, "UTC"),
- "[-30610224000000, -5364662400000]"),
- ArrayFromJSON(string_type,
- R"(["1000-01-01 00:00:00.000", "1800-01-01 00:00:00.000"])"));
+ CheckCast(
+ ArrayFromJSON(timestamp(TimeUnit::MICRO),
+ "[-30610224000000000, -5364662400000000]"),
+ ArrayFromJSON(string_type,
+ R"(["1000-01-01 00:00:00.000000", "1800-01-01 00:00:00.000000"])"));
+
+ CheckCast(
+ ArrayFromJSON(timestamp(TimeUnit::NANO),
+ "[-596933876543210988, 349837323456789012]"),
+ ArrayFromJSON(
+ string_type,
+ R"(["1951-02-01 01:02:03.456789012", "1981-02-01 01:02:03.456789012"])"));
}
}
#ifndef _WIN32
TEST(Cast, TimestampWithZoneToString) {
for (auto string_type : {utf8(), large_utf8()}) {
+ CheckCast(
+ ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC"), "[-30610224000, -5364662400]"),
+ ArrayFromJSON(string_type,
+ R"(["1000-01-01 00:00:00+0000", "1800-01-01 00:00:00+0000"])"));
+
CheckCast(
ArrayFromJSON(timestamp(TimeUnit::SECOND, "America/Phoenix"),
"[-34226955, 1456767743]"),
ArrayFromJSON(string_type,
R"(["1968-11-30 13:30:45-0700", "2016-02-29 10:42:23-0700"])"));
+
CheckCast(ArrayFromJSON(timestamp(TimeUnit::MILLI, "America/Phoenix"),
"[-34226955877, 1456767743456]"),
ArrayFromJSON(
string_type,
R"(["1968-11-30 13:30:44.123-0700", "2016-02-29 10:42:23.456-0700"])"));
+
CheckCast(
ArrayFromJSON(timestamp(TimeUnit::MICRO, "America/Phoenix"),
"[-34226955877000, 1456767743456789]"),
ArrayFromJSON(
string_type,
R"(["1968-11-30 13:30:44.123000-0700", "2016-02-29 10:42:23.456789-0700"])"));
+
CheckCast(
ArrayFromJSON(timestamp(TimeUnit::NANO, "America/Phoenix"),
"[-34226955876543211, 1456767743456789246]"),
@@ -1596,6 +1613,10 @@ TEST(Cast, TimestampWithZoneToString) {
// TODO(ARROW-13168): we lack tzdb on Windows
TEST(Cast, TimestampWithZoneToString) {
for (auto string_type : {utf8(), large_utf8()}) {
+ ASSERT_RAISES(NotImplemented, Cast(ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC"),
+ "[-34226955, 1456767743]"),
+ CastOptions::Safe(string_type)));
+
ASSERT_RAISES(NotImplemented,
Cast(ArrayFromJSON(timestamp(TimeUnit::SECOND, "America/Phoenix"),
"[-34226955, 1456767743]"),
From 6ba13cf74d6778fde981d1f96eb0c6424e59bdc5 Mon Sep 17 00:00:00 2001
From: David Li
Date: Mon, 8 Nov 2021 11:37:38 -0500
Subject: [PATCH 4/4] ARROW-14231: [C++] Format UTC with trailing Z
---
cpp/src/arrow/compute/kernels/scalar_cast_string.cc | 10 ++++++----
cpp/src/arrow/compute/kernels/scalar_cast_test.cc | 2 +-
cpp/src/arrow/csv/writer_test.cc | 11 +++++++----
3 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
index 4eff4761347..4130c6a9487 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
@@ -187,11 +187,13 @@ struct TemporalToStringCastFunctor {
template
static Status ConvertZoned(const ArrayData& input, const std::string& timezone,
BuilderType* builder) {
- static std::string kFormatString = "%Y-%m-%d %H:%M:%S%z";
- ARROW_ASSIGN_OR_RAISE(const time_zone* tz,
- LocateZone(timezone.empty() ? "UTC" : timezone));
+ static const std::string kFormatString = "%Y-%m-%d %H:%M:%S%z";
+ static const std::string kUtcFormatString = "%Y-%m-%d %H:%M:%SZ";
+ DCHECK(!timezone.empty());
+ ARROW_ASSIGN_OR_RAISE(const time_zone* tz, LocateZone(timezone));
ARROW_ASSIGN_OR_RAISE(std::locale locale, GetLocale("C"));
- TimestampFormatter formatter{kFormatString, tz, locale};
+ TimestampFormatter formatter{
+ timezone == "UTC" ? kUtcFormatString : kFormatString, tz, locale};
return VisitArrayDataInline(
input,
[&](value_type v) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index 954f0166b21..92de7892f95 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -1580,7 +1580,7 @@ TEST(Cast, TimestampWithZoneToString) {
CheckCast(
ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC"), "[-30610224000, -5364662400]"),
ArrayFromJSON(string_type,
- R"(["1000-01-01 00:00:00+0000", "1800-01-01 00:00:00+0000"])"));
+ R"(["1000-01-01 00:00:00Z", "1800-01-01 00:00:00Z"])"));
CheckCast(
ArrayFromJSON(timestamp(TimeUnit::SECOND, "America/Phoenix"),
diff --git a/cpp/src/arrow/csv/writer_test.cc b/cpp/src/arrow/csv/writer_test.cc
index 4fff8eac92f..5d575887fdf 100644
--- a/cpp/src/arrow/csv/writer_test.cc
+++ b/cpp/src/arrow/csv/writer_test.cc
@@ -163,10 +163,13 @@ INSTANTIATE_TEST_SUITE_P(SingleColumnWriteCSVTest, TestWriteCSV,
INSTANTIATE_TEST_SUITE_P(
TimestampWithTimezoneWriteCSVTest, TestWriteCSV,
::testing::Values(WriterTestParams{
- schema({field("tz", timestamp(TimeUnit::SECOND, "America/Phoenix"))}),
- R"([{ "tz": 1456767743 }])", WriteOptions(),
- R"("tz")"
- "\n2016-02-29 10:42:23-0700\n"}));
+ schema({
+ field("tz", timestamp(TimeUnit::SECOND, "America/Phoenix")),
+ field("utc", timestamp(TimeUnit::SECOND, "UTC")),
+ }),
+ R"([{ "tz": 1456767743, "utc": 1456767743 }])", WriteOptions(),
+ R"("tz","utc")"
+ "\n2016-02-29 10:42:23-0700,2016-02-29 17:42:23Z\n"}));
#endif
} // namespace csv