From 1b2ea78dbb23658a189845a727484427bc82274f Mon Sep 17 00:00:00 2001 From: David Li Date: Tue, 31 Aug 2021 16:14:22 -0400 Subject: [PATCH 1/2] ARROW-13812: [C++] Fix Valgrind error in Grouper.BooleanKey test --- cpp/src/arrow/compute/exec/key_encode.cc | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/exec/key_encode.cc b/cpp/src/arrow/compute/exec/key_encode.cc index 1a563867e90..1c01bc0971e 100644 --- a/cpp/src/arrow/compute/exec/key_encode.cc +++ b/cpp/src/arrow/compute/exec/key_encode.cc @@ -427,11 +427,19 @@ void KeyEncoder::EncoderInteger::Decode(uint32_t start_row, uint32_t num_rows, row_base += offset_within_row; uint8_t* col_base = col_prep.mutable_data(1); switch (col_prep.metadata().fixed_length) { - case 1: + case 1: { for (uint32_t i = 0; i < num_rows; ++i) { col_base[i] = row_base[i * row_size]; } + // For booleans, we pack 8 bytes at a time, and the buffer we're + // writing to here may not be fully initialized - so make sure a + // multiple of 8 bytes are initialized to avoid Valgrind errors. The + // temp buffer is sized to num_rows uint32_t values, so there's more + // than enough space here. + const uint32_t remainder = (8 - (num_rows % 8)) % 8; + std::memset(col_base + num_rows, 0x00, remainder); break; + } case 2: for (uint32_t i = 0; i < num_rows; ++i) { reinterpret_cast(col_base)[i] = From c005c9b8103743a57725e7ac4a65fbff89dc5248 Mon Sep 17 00:00:00 2001 From: David Li Date: Wed, 1 Sep 2021 13:22:53 -0400 Subject: [PATCH 2/2] ARROW-13812: [C++] Zero-initialize entire scratch buffer --- cpp/src/arrow/compute/exec/key_encode.cc | 10 +--------- cpp/src/arrow/compute/exec/util.h | 2 ++ 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/cpp/src/arrow/compute/exec/key_encode.cc b/cpp/src/arrow/compute/exec/key_encode.cc index 1c01bc0971e..1a563867e90 100644 --- a/cpp/src/arrow/compute/exec/key_encode.cc +++ b/cpp/src/arrow/compute/exec/key_encode.cc @@ -427,19 +427,11 @@ void KeyEncoder::EncoderInteger::Decode(uint32_t start_row, uint32_t num_rows, row_base += offset_within_row; uint8_t* col_base = col_prep.mutable_data(1); switch (col_prep.metadata().fixed_length) { - case 1: { + case 1: for (uint32_t i = 0; i < num_rows; ++i) { col_base[i] = row_base[i * row_size]; } - // For booleans, we pack 8 bytes at a time, and the buffer we're - // writing to here may not be fully initialized - so make sure a - // multiple of 8 bytes are initialized to avoid Valgrind errors. The - // temp buffer is sized to num_rows uint32_t values, so there's more - // than enough space here. - const uint32_t remainder = (8 - (num_rows % 8)) % 8; - std::memset(col_base + num_rows, 0x00, remainder); break; - } case 2: for (uint32_t i = 0; i < num_rows; ++i) { reinterpret_cast(col_base)[i] = diff --git a/cpp/src/arrow/compute/exec/util.h b/cpp/src/arrow/compute/exec/util.h index f5c55afe0f5..63f3315f7e0 100644 --- a/cpp/src/arrow/compute/exec/util.h +++ b/cpp/src/arrow/compute/exec/util.h @@ -70,6 +70,8 @@ class TempVectorStack { top_ = 0; buffer_size_ = size; ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(size, pool)); + // Ensure later operations don't accidentally read uninitialized memory. + std::memset(buffer->mutable_data(), 0xFF, size); buffer_ = std::move(buffer); return Status::OK(); }