From 1b2ea78dbb23658a189845a727484427bc82274f Mon Sep 17 00:00:00 2001
From: David Li
Date: Tue, 31 Aug 2021 16:14:22 -0400
Subject: [PATCH 1/2] ARROW-13812: [C++] Fix Valgrind error in
Grouper.BooleanKey test
---
cpp/src/arrow/compute/exec/key_encode.cc | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/cpp/src/arrow/compute/exec/key_encode.cc b/cpp/src/arrow/compute/exec/key_encode.cc
index 1a563867e90..1c01bc0971e 100644
--- a/cpp/src/arrow/compute/exec/key_encode.cc
+++ b/cpp/src/arrow/compute/exec/key_encode.cc
@@ -427,11 +427,19 @@ void KeyEncoder::EncoderInteger::Decode(uint32_t start_row, uint32_t num_rows,
row_base += offset_within_row;
uint8_t* col_base = col_prep.mutable_data(1);
switch (col_prep.metadata().fixed_length) {
- case 1:
+ case 1: {
for (uint32_t i = 0; i < num_rows; ++i) {
col_base[i] = row_base[i * row_size];
}
+ // For booleans, we pack 8 bytes at a time, and the buffer we're
+ // writing to here may not be fully initialized - so make sure a
+ // multiple of 8 bytes are initialized to avoid Valgrind errors. The
+ // temp buffer is sized to num_rows uint32_t values, so there's more
+ // than enough space here.
+ const uint32_t remainder = (8 - (num_rows % 8)) % 8;
+ std::memset(col_base + num_rows, 0x00, remainder);
break;
+ }
case 2:
for (uint32_t i = 0; i < num_rows; ++i) {
reinterpret_cast(col_base)[i] =
From c005c9b8103743a57725e7ac4a65fbff89dc5248 Mon Sep 17 00:00:00 2001
From: David Li
Date: Wed, 1 Sep 2021 13:22:53 -0400
Subject: [PATCH 2/2] ARROW-13812: [C++] Zero-initialize entire scratch buffer
---
cpp/src/arrow/compute/exec/key_encode.cc | 10 +---------
cpp/src/arrow/compute/exec/util.h | 2 ++
2 files changed, 3 insertions(+), 9 deletions(-)
diff --git a/cpp/src/arrow/compute/exec/key_encode.cc b/cpp/src/arrow/compute/exec/key_encode.cc
index 1c01bc0971e..1a563867e90 100644
--- a/cpp/src/arrow/compute/exec/key_encode.cc
+++ b/cpp/src/arrow/compute/exec/key_encode.cc
@@ -427,19 +427,11 @@ void KeyEncoder::EncoderInteger::Decode(uint32_t start_row, uint32_t num_rows,
row_base += offset_within_row;
uint8_t* col_base = col_prep.mutable_data(1);
switch (col_prep.metadata().fixed_length) {
- case 1: {
+ case 1:
for (uint32_t i = 0; i < num_rows; ++i) {
col_base[i] = row_base[i * row_size];
}
- // For booleans, we pack 8 bytes at a time, and the buffer we're
- // writing to here may not be fully initialized - so make sure a
- // multiple of 8 bytes are initialized to avoid Valgrind errors. The
- // temp buffer is sized to num_rows uint32_t values, so there's more
- // than enough space here.
- const uint32_t remainder = (8 - (num_rows % 8)) % 8;
- std::memset(col_base + num_rows, 0x00, remainder);
break;
- }
case 2:
for (uint32_t i = 0; i < num_rows; ++i) {
reinterpret_cast(col_base)[i] =
diff --git a/cpp/src/arrow/compute/exec/util.h b/cpp/src/arrow/compute/exec/util.h
index f5c55afe0f5..63f3315f7e0 100644
--- a/cpp/src/arrow/compute/exec/util.h
+++ b/cpp/src/arrow/compute/exec/util.h
@@ -70,6 +70,8 @@ class TempVectorStack {
top_ = 0;
buffer_size_ = size;
ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(size, pool));
+ // Ensure later operations don't accidentally read uninitialized memory.
+ std::memset(buffer->mutable_data(), 0xFF, size);
buffer_ = std::move(buffer);
return Status::OK();
}