From 4dd2ce1b12d42034482577b66d19a70f93789bdf Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Mon, 18 Dec 2023 10:17:45 +0100 Subject: [PATCH 1/7] Initial commit --- cpp/src/arrow/pretty_print_test.cc | 6 +++--- cpp/src/arrow/util/formatting.h | 7 ++++++- cpp/src/arrow/util/formatting_util_test.cc | 16 ++++++++++++++++ python/pyarrow/tests/test_types.py | 10 ++++++++++ 4 files changed, 35 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/pretty_print_test.cc b/cpp/src/arrow/pretty_print_test.cc index 0db6ae48672..5d2256e8c5d 100644 --- a/cpp/src/arrow/pretty_print_test.cc +++ b/cpp/src/arrow/pretty_print_test.cc @@ -350,10 +350,10 @@ TEST_F(TestPrettyPrint, DateTimeTypes) { std::vector values = { 0, 1, 2, 678 + 1000000 * (5 + 60 * (4 + 60 * (3 + 24 * int64_t(1)))), 4}; static const char* expected = R"expected([ - 1970-01-01 00:00:00.000000, - 1970-01-01 00:00:00.000001, + 1970-01-01 00:00:00.000000Z, + 1970-01-01 00:00:00.000001Z, null, - 1970-01-02 03:04:05.000678, + 1970-01-02 03:04:05.000678Z, null ])expected"; CheckPrimitive(timestamp(TimeUnit::MICRO, "Transylvania"), diff --git a/cpp/src/arrow/util/formatting.h b/cpp/src/arrow/util/formatting.h index 9dcc6463fb7..71bae74629e 100644 --- a/cpp/src/arrow/util/formatting.h +++ b/cpp/src/arrow/util/formatting.h @@ -470,7 +470,8 @@ class StringFormatter { using value_type = int64_t; explicit StringFormatter(const DataType* type) - : unit_(checked_cast(*type).unit()) {} + : unit_(checked_cast(*type).unit()), + timezone_(checked_cast(*type).timezone()) {} template Return operator()(Duration, value_type value, Appender&& append) { @@ -503,6 +504,9 @@ class StringFormatter { std::array buffer; char* cursor = buffer.data() + buffer_size; + if (timezone_.size() > 0) { + detail::FormatOneChar('Z', &cursor); + } detail::FormatHH_MM_SS(arrow_vendored::date::make_time(since_midnight), &cursor); detail::FormatOneChar(' ', &cursor); detail::FormatYYYY_MM_DD(timepoint_days, &cursor); @@ -516,6 +520,7 @@ class StringFormatter { private: TimeUnit::type unit_; + std::string timezone_; }; template diff --git a/cpp/src/arrow/util/formatting_util_test.cc b/cpp/src/arrow/util/formatting_util_test.cc index 9afbc91063a..06de5621536 100644 --- a/cpp/src/arrow/util/formatting_util_test.cc +++ b/cpp/src/arrow/util/formatting_util_test.cc @@ -522,6 +522,22 @@ TEST(Formatting, Timestamp) { AssertFormatting(formatter, -2203932304LL * 1000000000LL + 8, "1900-02-28 12:34:56.000000008"); } + + { + auto ty = timestamp(TimeUnit::SECOND, "US/Eastern"); + StringFormatter formatter(ty.get()); + + AssertFormatting(formatter, 0, "1970-01-01 00:00:00Z"); + AssertFormatting(formatter, 1, "1970-01-01 00:00:01Z"); + AssertFormatting(formatter, 24 * 60 * 60, "1970-01-02 00:00:00Z"); + AssertFormatting(formatter, 616377600, "1989-07-14 00:00:00Z"); + AssertFormatting(formatter, 951782400, "2000-02-29 00:00:00Z"); + AssertFormatting(formatter, 63730281600LL, "3989-07-14 00:00:00Z"); + AssertFormatting(formatter, -2203977600LL, "1900-02-28 00:00:00Z"); + + AssertFormatting(formatter, 1542129070, "2018-11-13 17:11:10Z"); + AssertFormatting(formatter, -2203932304LL, "1900-02-28 12:34:56Z"); + } } TEST(Formatting, Interval) { diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py index 7600f1dd332..7f118503092 100644 --- a/python/pyarrow/tests/test_types.py +++ b/python/pyarrow/tests/test_types.py @@ -487,6 +487,16 @@ def test_timestamp(): pa.timestamp(invalid_unit) +def test_timestamp_print(): + for unit in ('s', 'ms', 'us', 'ns'): + for tz in ('UTC', 'Europe/Paris'): + ty = pa.timestamp(unit, tz=tz) + arr = pa.array([0], ty) + assert "Z" in str(arr) + arr = pa.array([0]) + assert "Z" not in str(arr) + + def test_time32_units(): for valid_unit in ('s', 'ms'): ty = pa.time32(valid_unit) From 98ec7442fdf379492f362a0f16561f864c33bd10 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Mon, 18 Dec 2023 13:23:46 +0100 Subject: [PATCH 2/7] Apply suggestions from code review - rok Co-authored-by: Rok Mihevc --- cpp/src/arrow/util/formatting_util_test.cc | 34 +++++++++++++--------- python/pyarrow/tests/test_types.py | 2 +- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/cpp/src/arrow/util/formatting_util_test.cc b/cpp/src/arrow/util/formatting_util_test.cc index 06de5621536..58e4b399552 100644 --- a/cpp/src/arrow/util/formatting_util_test.cc +++ b/cpp/src/arrow/util/formatting_util_test.cc @@ -523,20 +523,28 @@ TEST(Formatting, Timestamp) { "1900-02-28 12:34:56.000000008"); } - { - auto ty = timestamp(TimeUnit::SECOND, "US/Eastern"); - StringFormatter formatter(ty.get()); - AssertFormatting(formatter, 0, "1970-01-01 00:00:00Z"); - AssertFormatting(formatter, 1, "1970-01-01 00:00:01Z"); - AssertFormatting(formatter, 24 * 60 * 60, "1970-01-02 00:00:00Z"); - AssertFormatting(formatter, 616377600, "1989-07-14 00:00:00Z"); - AssertFormatting(formatter, 951782400, "2000-02-29 00:00:00Z"); - AssertFormatting(formatter, 63730281600LL, "3989-07-14 00:00:00Z"); - AssertFormatting(formatter, -2203977600LL, "1900-02-28 00:00:00Z"); - - AssertFormatting(formatter, 1542129070, "2018-11-13 17:11:10Z"); - AssertFormatting(formatter, -2203932304LL, "1900-02-28 12:34:56Z"); + { + auto timestamp_types = {timestamp(TimeUnit::SECOND, "US/Eastern"), + timestamp(TimeUnit::SECOND, "+01:00"), + timestamp(TimeUnit::SECOND, "-42:00"), + timestamp(TimeUnit::SECOND, "Pacific/Maruesas"), + timestamp(TimeUnit::SECOND, "Mars/Mariner_Valley")}; + for (auto ty : timestamp_types) { + auto ty = timestamp(TimeUnit::SECOND, "US/Eastern"); + StringFormatter formatter(ty.get()); + + AssertFormatting(formatter, 0, "1970-01-01 00:00:00Z"); + AssertFormatting(formatter, 1, "1970-01-01 00:00:01Z"); + AssertFormatting(formatter, 24 * 60 * 60, "1970-01-02 00:00:00Z"); + AssertFormatting(formatter, 616377600, "1989-07-14 00:00:00Z"); + AssertFormatting(formatter, 951782400, "2000-02-29 00:00:00Z"); + AssertFormatting(formatter, 63730281600LL, "3989-07-14 00:00:00Z"); + AssertFormatting(formatter, -2203977600LL, "1900-02-28 00:00:00Z"); + + AssertFormatting(formatter, 1542129070, "2018-11-13 17:11:10Z"); + AssertFormatting(formatter, -2203932304LL, "1900-02-28 12:34:56Z"); + } } } diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py index 7f118503092..42020665a22 100644 --- a/python/pyarrow/tests/test_types.py +++ b/python/pyarrow/tests/test_types.py @@ -489,7 +489,7 @@ def test_timestamp(): def test_timestamp_print(): for unit in ('s', 'ms', 'us', 'ns'): - for tz in ('UTC', 'Europe/Paris'): + for tz in ('UTC', 'Europe/Paris', 'Pacific/Marquesas', 'Mars/Mariner_Valley', '-00:42', '+42:00'): ty = pa.timestamp(unit, tz=tz) arr = pa.array([0], ty) assert "Z" in str(arr) From 96167716a62ecd3e2d04a022bad459023df94c1d Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Mon, 18 Dec 2023 13:26:52 +0100 Subject: [PATCH 3/7] Run linter --- cpp/src/arrow/util/formatting_util_test.cc | 13 ++++++------- python/pyarrow/tests/test_types.py | 3 ++- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cpp/src/arrow/util/formatting_util_test.cc b/cpp/src/arrow/util/formatting_util_test.cc index 58e4b399552..36e11c856cb 100644 --- a/cpp/src/arrow/util/formatting_util_test.cc +++ b/cpp/src/arrow/util/formatting_util_test.cc @@ -523,17 +523,16 @@ TEST(Formatting, Timestamp) { "1900-02-28 12:34:56.000000008"); } - { auto timestamp_types = {timestamp(TimeUnit::SECOND, "US/Eastern"), - timestamp(TimeUnit::SECOND, "+01:00"), - timestamp(TimeUnit::SECOND, "-42:00"), - timestamp(TimeUnit::SECOND, "Pacific/Maruesas"), - timestamp(TimeUnit::SECOND, "Mars/Mariner_Valley")}; + timestamp(TimeUnit::SECOND, "+01:00"), + timestamp(TimeUnit::SECOND, "-42:00"), + timestamp(TimeUnit::SECOND, "Pacific/Maruesas"), + timestamp(TimeUnit::SECOND, "Mars/Mariner_Valley")}; for (auto ty : timestamp_types) { auto ty = timestamp(TimeUnit::SECOND, "US/Eastern"); StringFormatter formatter(ty.get()); - + AssertFormatting(formatter, 0, "1970-01-01 00:00:00Z"); AssertFormatting(formatter, 1, "1970-01-01 00:00:01Z"); AssertFormatting(formatter, 24 * 60 * 60, "1970-01-02 00:00:00Z"); @@ -541,7 +540,7 @@ TEST(Formatting, Timestamp) { AssertFormatting(formatter, 951782400, "2000-02-29 00:00:00Z"); AssertFormatting(formatter, 63730281600LL, "3989-07-14 00:00:00Z"); AssertFormatting(formatter, -2203977600LL, "1900-02-28 00:00:00Z"); - + AssertFormatting(formatter, 1542129070, "2018-11-13 17:11:10Z"); AssertFormatting(formatter, -2203932304LL, "1900-02-28 12:34:56Z"); } diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py index 42020665a22..55a44eeb92c 100644 --- a/python/pyarrow/tests/test_types.py +++ b/python/pyarrow/tests/test_types.py @@ -489,7 +489,8 @@ def test_timestamp(): def test_timestamp_print(): for unit in ('s', 'ms', 'us', 'ns'): - for tz in ('UTC', 'Europe/Paris', 'Pacific/Marquesas', 'Mars/Mariner_Valley', '-00:42', '+42:00'): + for tz in ('UTC', 'Europe/Paris', 'Pacific/Marquesas', + 'Mars/Mariner_Valley', '-00:42', '+42:00'): ty = pa.timestamp(unit, tz=tz) arr = pa.array([0], ty) assert "Z" in str(arr) From 944b1c20ed28a6de951ba8d2655463d8dc985f1d Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Mon, 18 Dec 2023 13:35:08 +0100 Subject: [PATCH 4/7] Remove redundant line in formatting_util_test.cc --- cpp/src/arrow/util/formatting_util_test.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/arrow/util/formatting_util_test.cc b/cpp/src/arrow/util/formatting_util_test.cc index 36e11c856cb..e8dc7c92b5a 100644 --- a/cpp/src/arrow/util/formatting_util_test.cc +++ b/cpp/src/arrow/util/formatting_util_test.cc @@ -530,7 +530,6 @@ TEST(Formatting, Timestamp) { timestamp(TimeUnit::SECOND, "Pacific/Maruesas"), timestamp(TimeUnit::SECOND, "Mars/Mariner_Valley")}; for (auto ty : timestamp_types) { - auto ty = timestamp(TimeUnit::SECOND, "US/Eastern"); StringFormatter formatter(ty.get()); AssertFormatting(formatter, 0, "1970-01-01 00:00:00Z"); From ca92eb778ad8e608ff626690b6a0ea86108f69a3 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Mon, 18 Dec 2023 14:59:57 +0100 Subject: [PATCH 5/7] Update python/pyarrow/tests/test_types.py Co-authored-by: Rok Mihevc --- python/pyarrow/tests/test_types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py index 55a44eeb92c..c8a52c6b626 100644 --- a/python/pyarrow/tests/test_types.py +++ b/python/pyarrow/tests/test_types.py @@ -494,7 +494,7 @@ def test_timestamp_print(): ty = pa.timestamp(unit, tz=tz) arr = pa.array([0], ty) assert "Z" in str(arr) - arr = pa.array([0]) + arr = pa.array([0], pa.timestamp(unit)) assert "Z" not in str(arr) From 32f13e89332b0c37b18a96367309be2b82e4bdc3 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 20 Dec 2023 09:35:34 +0100 Subject: [PATCH 6/7] Update test with different resolutions --- cpp/src/arrow/util/formatting_util_test.cc | 44 ++++++++++++++++------ 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/cpp/src/arrow/util/formatting_util_test.cc b/cpp/src/arrow/util/formatting_util_test.cc index e8dc7c92b5a..281a373c0bd 100644 --- a/cpp/src/arrow/util/formatting_util_test.cc +++ b/cpp/src/arrow/util/formatting_util_test.cc @@ -526,22 +526,44 @@ TEST(Formatting, Timestamp) { { auto timestamp_types = {timestamp(TimeUnit::SECOND, "US/Eastern"), timestamp(TimeUnit::SECOND, "+01:00"), - timestamp(TimeUnit::SECOND, "-42:00"), - timestamp(TimeUnit::SECOND, "Pacific/Maruesas"), timestamp(TimeUnit::SECOND, "Mars/Mariner_Valley")}; for (auto ty : timestamp_types) { StringFormatter formatter(ty.get()); AssertFormatting(formatter, 0, "1970-01-01 00:00:00Z"); - AssertFormatting(formatter, 1, "1970-01-01 00:00:01Z"); - AssertFormatting(formatter, 24 * 60 * 60, "1970-01-02 00:00:00Z"); - AssertFormatting(formatter, 616377600, "1989-07-14 00:00:00Z"); - AssertFormatting(formatter, 951782400, "2000-02-29 00:00:00Z"); - AssertFormatting(formatter, 63730281600LL, "3989-07-14 00:00:00Z"); - AssertFormatting(formatter, -2203977600LL, "1900-02-28 00:00:00Z"); - - AssertFormatting(formatter, 1542129070, "2018-11-13 17:11:10Z"); - AssertFormatting(formatter, -2203932304LL, "1900-02-28 12:34:56Z"); + } + } + + { + auto timestamp_types = {timestamp(TimeUnit::MILLI, "US/Eastern"), + timestamp(TimeUnit::MILLI, "Pacific/Maruesas"), + timestamp(TimeUnit::MILLI, "Mars/Mariner_Valley")}; + for (auto ty : timestamp_types) { + StringFormatter formatter(ty.get()); + + AssertFormatting(formatter, 0, "1970-01-01 00:00:00.000Z"); + } + } + + { + auto timestamp_types = {timestamp(TimeUnit::MICRO, "US/Eastern"), + timestamp(TimeUnit::MICRO, "-42:00"), + timestamp(TimeUnit::MICRO, "Pacific/Maruesas")}; + for (auto ty : timestamp_types) { + StringFormatter formatter(ty.get()); + + AssertFormatting(formatter, 0, "1970-01-01 00:00:00.000000Z"); + } + } + + { + auto timestamp_types = {timestamp(TimeUnit::NANO, "US/Eastern"), + timestamp(TimeUnit::NANO, "+01:00"), + timestamp(TimeUnit::NANO, "-42:00")}; + for (auto ty : timestamp_types) { + StringFormatter formatter(ty.get()); + + AssertFormatting(formatter, 0, "1970-01-01 00:00:00.000000000Z"); } } } From f53df2cc4696cace9bbb79b0e744b0985705c030 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 20 Dec 2023 10:12:47 +0100 Subject: [PATCH 7/7] Optimise the tests --- cpp/src/arrow/util/formatting_util_test.cc | 36 ++++++---------------- 1 file changed, 10 insertions(+), 26 deletions(-) diff --git a/cpp/src/arrow/util/formatting_util_test.cc b/cpp/src/arrow/util/formatting_util_test.cc index 281a373c0bd..13f57a495d6 100644 --- a/cpp/src/arrow/util/formatting_util_test.cc +++ b/cpp/src/arrow/util/formatting_util_test.cc @@ -525,8 +525,7 @@ TEST(Formatting, Timestamp) { { auto timestamp_types = {timestamp(TimeUnit::SECOND, "US/Eastern"), - timestamp(TimeUnit::SECOND, "+01:00"), - timestamp(TimeUnit::SECOND, "Mars/Mariner_Valley")}; + timestamp(TimeUnit::SECOND, "+01:00")}; for (auto ty : timestamp_types) { StringFormatter formatter(ty.get()); @@ -535,36 +534,21 @@ TEST(Formatting, Timestamp) { } { - auto timestamp_types = {timestamp(TimeUnit::MILLI, "US/Eastern"), - timestamp(TimeUnit::MILLI, "Pacific/Maruesas"), - timestamp(TimeUnit::MILLI, "Mars/Mariner_Valley")}; - for (auto ty : timestamp_types) { - StringFormatter formatter(ty.get()); - - AssertFormatting(formatter, 0, "1970-01-01 00:00:00.000Z"); - } + auto ty = timestamp(TimeUnit::MILLI, "Pacific/Maruesas"); + StringFormatter formatter(ty.get()); + AssertFormatting(formatter, 0, "1970-01-01 00:00:00.000Z"); } { - auto timestamp_types = {timestamp(TimeUnit::MICRO, "US/Eastern"), - timestamp(TimeUnit::MICRO, "-42:00"), - timestamp(TimeUnit::MICRO, "Pacific/Maruesas")}; - for (auto ty : timestamp_types) { - StringFormatter formatter(ty.get()); - - AssertFormatting(formatter, 0, "1970-01-01 00:00:00.000000Z"); - } + auto ty = timestamp(TimeUnit::MICRO, "-42:00"); + StringFormatter formatter(ty.get()); + AssertFormatting(formatter, 0, "1970-01-01 00:00:00.000000Z"); } { - auto timestamp_types = {timestamp(TimeUnit::NANO, "US/Eastern"), - timestamp(TimeUnit::NANO, "+01:00"), - timestamp(TimeUnit::NANO, "-42:00")}; - for (auto ty : timestamp_types) { - StringFormatter formatter(ty.get()); - - AssertFormatting(formatter, 0, "1970-01-01 00:00:00.000000000Z"); - } + auto ty = timestamp(TimeUnit::NANO, "Mars/Mariner_Valley"); + StringFormatter formatter(ty.get()); + AssertFormatting(formatter, 0, "1970-01-01 00:00:00.000000000Z"); } }