From e9b04fe10a3c1e72ed0a181da146ba185bd05bbf Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Fri, 5 Apr 2024 15:20:13 -0300 Subject: [PATCH 1/3] formatting.h: Clarify meaning of cursor in FormatOneChar --- cpp/src/arrow/util/formatting.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/util/formatting.h b/cpp/src/arrow/util/formatting.h index 6125f792ff9..54b4bd2dd20 100644 --- a/cpp/src/arrow/util/formatting.h +++ b/cpp/src/arrow/util/formatting.h @@ -126,8 +126,10 @@ namespace detail { ARROW_EXPORT extern const char digit_pairs[]; // Based on fmtlib's format_int class: -// Write digits from right to left into a stack allocated buffer -inline void FormatOneChar(char c, char** cursor) { *--*cursor = c; } +// Write digits from right to left into a stack allocated buffer. +// \pre *cursor points to the byte after the one that will be written. +// \post *cursor points to the byte that was written. +inline void FormatOneChar(char c, char** cursor) { *(--(*cursor)) = c; } template void FormatOneDigit(Int value, char** cursor) { From 87c2d4961c3f1a58cdeb7eb1b385b44fb931ed3e Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Fri, 5 Apr 2024 16:55:48 -0300 Subject: [PATCH 2/3] formatting_util_test.cc: Add a test case that uses all the bytes in formatting buffer With ASAN, this reproduces the issue. [ RUN ] Formatting.Timestamp ================================================================= ==4191383==ERROR: AddressSanitizer: stack-buffer-overflow on address 0x7fff1804c48f at pc 0x5608edffe39d bp 0x7fff1804c110 sp 0x7fff1804c108 WRITE of size 1 at 0x7fff1804c48f thread T0 #0 0x5608edffe39c in arrow::internal::detail::FormatOneChar(char, char**) /home/felipeo/code/arrow/cpp/src/arrow/util/formatting.h:132:67 #1 0x5608ee035c00 in arrow::internal::detail::FormatYYYY_MM_DD(arrow_vendored::date::year_month_day, char**) /home/felipeo/code/arrow/cpp/src/arrow/util/formatting.h:351:5 #2 0x5608ee05e8a0 in decltype(std::declval()(std::basic_string_view >{})) arrow::internal::StringFormatter::operator() >, arrow::StringAppender&>(std::chrono::duration >, long, arrow::StringAppender&) /home/felipeo/code/arrow/cpp/src/arrow/util/formatting.h :521:5 #3 0x5608ee05d60f in decltype(std::declval&>()(std::chrono::duration >{}, std::declval(), std::declval())) arrow::util::VisitDuration&, long&, arrow::StringAppender&>(arrow::TimeUnit::type, arrow::internal::StringFormatter&, long&, arrow::StringAppender&) /home/felipeo/code/arrow/cpp/src/arrow/util/time.h:60:14 #4 0x5608ee05d122 in decltype(std::declval()(std::basic_string_view >{})) arrow::internal::StringFormatter::operator()(long, arrow::StringAppender&) /home/felipeo/code/arrow/cpp/src/arrow/util/formatting.h:527:12 #5 0x5608edfeffb3 in void arrow::AssertFormatting, long>(arrow::internal::StringFormatter&, long, std::__cxx11::basic_string, std::allocator > const&) /home/felipeo/code/arrow/cpp/src/arrow/util/formatting_util_test.cc:52:3 #6 0x5608edfece95 in arrow::Formatting_Timestamp_Test::TestBody() /home/felipeo/code/arrow/cpp/src/arrow/util/formatting_util_test.cc:540:5 #7 0x7fd95d7901de in void testing::internal::HandleExceptionsInMethodIfSupported(testing::Test*, void (testing::Test::*)(), char const*) (/home/felipeo/code/arrow/cpp/ninja/debug/libarrow_testing.so.1600+0xd901de) (BuildId: dd9af0bafdb1786050262e8f6002568a9f08ecf6) #8 0x7fd95d784905 in testing::Test::Run() (/home/felipeo/code/arrow/cpp/ninja/debug/libarrow_testing.so.1600+0xd84905) (BuildId: dd9af0bafdb1786050262e8f6002568a9f08ecf6) #9 0x7fd95d784a84 in testing::TestInfo::Run() (/home/felipeo/code/arrow/cpp/ninja/debug/libarrow_testing.so.1600+0xd84a84) (BuildId: dd9af0bafdb1786050262e8f6002568a9f08ecf6) #10 0x7fd95d785038 in testing::TestSuite::Run() (/home/felipeo/code/arrow/cpp/ninja/debug/libarrow_testing.so.1600+0xd85038) (BuildId: dd9af0bafdb1786050262e8f6002568a9f08ecf6) #11 0x7fd95d78573e in testing::internal::UnitTestImpl::RunAllTests() (/home/felipeo/code/arrow/cpp/ninja/debug/libarrow_testing.so.1600+0xd8573e) (BuildId: dd9af0bafdb1786050262e8f6002568a9f08ecf6) #12 0x7fd95d7907a6 in bool testing::internal::HandleExceptionsInMethodIfSupported(testing::internal::UnitTestImpl*, bool (testing::internal::UnitTestImpl::*)(), char const*) (/home/felipeo/code/arrow/cpp/ninja/debug/libarrow_testing.so.1600+0xd907a6) (BuildId: dd9af0bafdb1786050262e8f6002568a9f08ecf6) #13 0x7fd95d784b4b in testing::UnitTest::Run() (/home/felipeo/code/arrow/cpp/ninja/debug/libarrow_testing.so.1600+0xd84b4b) (BuildId: dd9af0bafdb1786050262e8f6002568a9f08ecf6) #14 0x5608ee54506d in RUN_ALL_TESTS() /usr/include/gtest/gtest.h:2490:46 #15 0x5608ee544fb9 in main /home/felipeo/code/arrow/cpp/src/arrow/util/logging_test.cc:129:10 #16 0x7fd93de29d8f in __libc_start_call_main csu/../sysdeps/nptl/libc_start_call_main.h:58:16 #17 0x7fd93de29e3f in __libc_start_main csu/../csu/libc-start.c:392:3 #18 0x5608ed7b4f64 in _start (/home/felipeo/code/arrow/cpp/ninja/debug/arrow-utility-test+0x1224f64) (BuildId: 81cfdc36b7a960a7249ecd5884beaa869140ab89) Address 0x7fff1804c48f is located in stack of thread T0 at offset 399 in frame #0 0x5608ee05dc9f in decltype(std::declval()(std::basic_string_view >{})) arrow::internal::StringFormatter::operator() >, arrow::StringAppender&>(std::chrono::duration >, long, arrow::StringAppender&) /home/felipeo/code/arrow/cpp/src/arrow/util/formatting.h :486 --- cpp/src/arrow/util/formatting_util_test.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cpp/src/arrow/util/formatting_util_test.cc b/cpp/src/arrow/util/formatting_util_test.cc index 13f57a495d6..fcbeec347d3 100644 --- a/cpp/src/arrow/util/formatting_util_test.cc +++ b/cpp/src/arrow/util/formatting_util_test.cc @@ -533,6 +533,14 @@ TEST(Formatting, Timestamp) { } } + { + constexpr int64_t kMillisInDay = 24 * 60 * 60 * 1000; + auto ty = timestamp(TimeUnit::MILLI, "+01:00"); + StringFormatter formatter(ty.get()); + AssertFormatting(formatter, -15000 * 365 * kMillisInDay + 1, + "-13021-12-17 00:00:00.001Z"); + } + { auto ty = timestamp(TimeUnit::MILLI, "Pacific/Maruesas"); StringFormatter formatter(ty.get()); From d3afed7c2bde2ba872f6bc1405e1b284ed7188b1 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Fri, 5 Apr 2024 15:48:25 -0300 Subject: [PATCH 3/3] formatting.h: Make sure space is allocated for the 'Z' in formatted timestamp --- cpp/src/arrow/util/formatting.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/formatting.h b/cpp/src/arrow/util/formatting.h index 54b4bd2dd20..dd9af907ecc 100644 --- a/cpp/src/arrow/util/formatting.h +++ b/cpp/src/arrow/util/formatting.h @@ -328,6 +328,7 @@ class StringFormatter : public FloatToStringFormatterMixin constexpr size_t BufferSizeHH_MM_SS() { + // "23:59:59" ("." "9"+)? return detail::Digits10(23) + 1 + detail::Digits10(59) + 1 + detail::Digits10(59) + 1 + detail::Digits10(Duration::period::den) - 1; } @@ -507,8 +509,9 @@ class StringFormatter { timepoint_days -= days(1); } + // YYYY_MM_DD " " HH_MM_SS "Z"? constexpr size_t buffer_size = - detail::BufferSizeYYYY_MM_DD() + 1 + detail::BufferSizeHH_MM_SS(); + detail::BufferSizeYYYY_MM_DD() + 1 + detail::BufferSizeHH_MM_SS() + 1; std::array buffer; char* cursor = buffer.data() + buffer_size;