From 200c1caf0a6dabb6e4bfc11659ea243d235f4dfb Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 14 Apr 2022 18:29:48 +0200 Subject: [PATCH 1/2] EXP: SecureZero helper to securely wipe memory --- cpp/src/arrow/util/io_util.cc | 32 ++++++++++++++++++++++++- cpp/src/arrow/util/io_util.h | 5 ++++ cpp/src/arrow/util/io_util_test.cc | 38 ++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc index 398e65a3c93..44adcd716b7 100644 --- a/cpp/src/arrow/util/io_util.cc +++ b/cpp/src/arrow/util/io_util.cc @@ -31,15 +31,18 @@ #define __EXTENSIONS__ #endif +// For memset_s +#define __STDC_WANT_LIB_EXT1__ 1 + #include "arrow/util/windows_compatibility.h" // IWYU pragma: keep #include #include #include #include +#include #include #include -#include #include #include #include @@ -52,6 +55,7 @@ #include #include #include +#include #include #include // IWYU pragma: keep @@ -1867,5 +1871,31 @@ uint64_t GetOptionalThreadId() { return (tid == 0) ? tid - 1 : tid; } +void SecureZero(uint8_t* data, int64_t size) { + // Heavily borrowed from libb2's `secure_zero_memory` at + // https://github.com/BLAKE2/libb2/blob/master/src/blake2-impl.h + const auto n = static_cast(size); +#if defined(_WIN32) || defined(WIN32) + SecureZeroMemory(data, n); +#elif defined(__STDC_LIB_EXT1__) + // prioritize first the general C11 call + memset_s(data, n, 0, n); +#elif defined(__linux__) + explicit_bzero(data, n); +#else + // Try to ensure that a true library call to memset() will be generated + // by the compiler. + static void *(*const volatile memset_v)(void *, int, size_t) = &memset; + memset_v(data, 0, n); + __asm__ __volatile__("" ::"r"(data) : "memory"); +#endif +} + +void SecureZero(std::string* data) { + SecureZero(reinterpret_cast(data->data()), + static_cast(data->length())); + data->clear(); +} + } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/io_util.h b/cpp/src/arrow/util/io_util.h index c094727a647..dde0cf94a5e 100644 --- a/cpp/src/arrow/util/io_util.h +++ b/cpp/src/arrow/util/io_util.h @@ -346,5 +346,10 @@ int64_t GetRandomSeed(); ARROW_EXPORT uint64_t GetThreadId(); +ARROW_EXPORT +void SecureZero(uint8_t* data, int64_t size); +ARROW_EXPORT +void SecureZero(std::string* data); + } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/io_util_test.cc b/cpp/src/arrow/util/io_util_test.cc index 0508b4c1902..1d235eb0a5a 100644 --- a/cpp/src/arrow/util/io_util_test.cc +++ b/cpp/src/arrow/util/io_util_test.cc @@ -37,6 +37,7 @@ #include "arrow/util/bit_util.h" #include "arrow/util/io_util.h" #include "arrow/util/logging.h" +#include "arrow/util/string_view.h" #include "arrow/util/windows_compatibility.h" #include "arrow/util/windows_fixup.h" @@ -719,5 +720,42 @@ TEST(SendSignal, ToThread) { #endif } +class TestSecureZero : public ::testing::Test { + public: + void CheckSecureZero() { + const std::string copy = data; + const auto old_ptr = data.c_str(); + const auto old_size = data.length(); + SecureZero(&data); + // Allocate new area without initializing it, to minimize the risk of + // dereferencing an invalid address. + std::string new_string; + new_string.reserve(old_size); + // The old data should not be there anymore + for (auto c : util::string_view(old_ptr, old_size)) { + ASSERT_EQ(c, 0); + } + } + + protected: + std::string data; +}; + +TEST_F(TestSecureZero, SmallString) { + // A small string may have its storage inside the string object itself + data = "123"; + CheckSecureZero(); +} + +TEST_F(TestSecureZero, LargeString) { + data.assign(200, 'x'); + CheckSecureZero(); +} + +TEST_F(TestSecureZero, EmptyString) { + // Shouldn't crash or misbehave + SecureZero(&data); +} + } // namespace internal } // namespace arrow From 99f2375e2222da81f660d92008e21eae88c56100 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 14 Apr 2022 18:54:53 +0200 Subject: [PATCH 2/2] Hopefully fix compilation on older glibc --- cpp/src/arrow/util/io_util.cc | 17 ++++++++++------- cpp/src/arrow/util/io_util_test.cc | 16 ++++++++-------- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc index 44adcd716b7..7f3ff2b77d2 100644 --- a/cpp/src/arrow/util/io_util.cc +++ b/cpp/src/arrow/util/io_util.cc @@ -1875,26 +1875,29 @@ void SecureZero(uint8_t* data, int64_t size) { // Heavily borrowed from libb2's `secure_zero_memory` at // https://github.com/BLAKE2/libb2/blob/master/src/blake2-impl.h const auto n = static_cast(size); -#if defined(_WIN32) || defined(WIN32) +#if defined(_WIN32) SecureZeroMemory(data, n); #elif defined(__STDC_LIB_EXT1__) - // prioritize first the general C11 call + // Prioritize first the general C11 call memset_s(data, n, 0, n); -#elif defined(__linux__) +#elif defined(__GLIBC__) && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 25)) + // glibc 2.25+ has explicit_bzero explicit_bzero(data, n); #else // Try to ensure that a true library call to memset() will be generated // by the compiler. - static void *(*const volatile memset_v)(void *, int, size_t) = &memset; + static const volatile auto memset_v = &memset; memset_v(data, 0, n); __asm__ __volatile__("" ::"r"(data) : "memory"); #endif } void SecureZero(std::string* data) { - SecureZero(reinterpret_cast(data->data()), - static_cast(data->length())); - data->clear(); + if (data->length() > 0) { + SecureZero(reinterpret_cast(&(*data)[0]), + static_cast(data->length())); + data->clear(); + } } } // namespace internal diff --git a/cpp/src/arrow/util/io_util_test.cc b/cpp/src/arrow/util/io_util_test.cc index 1d235eb0a5a..bdb8f7ec602 100644 --- a/cpp/src/arrow/util/io_util_test.cc +++ b/cpp/src/arrow/util/io_util_test.cc @@ -723,10 +723,10 @@ TEST(SendSignal, ToThread) { class TestSecureZero : public ::testing::Test { public: void CheckSecureZero() { - const std::string copy = data; - const auto old_ptr = data.c_str(); - const auto old_size = data.length(); - SecureZero(&data); + const std::string copy = data_; + const auto old_ptr = data_.c_str(); + const auto old_size = data_.length(); + SecureZero(&data_); // Allocate new area without initializing it, to minimize the risk of // dereferencing an invalid address. std::string new_string; @@ -738,23 +738,23 @@ class TestSecureZero : public ::testing::Test { } protected: - std::string data; + std::string data_; }; TEST_F(TestSecureZero, SmallString) { // A small string may have its storage inside the string object itself - data = "123"; + data_ = "123"; CheckSecureZero(); } TEST_F(TestSecureZero, LargeString) { - data.assign(200, 'x'); + data_.assign(200, 'x'); CheckSecureZero(); } TEST_F(TestSecureZero, EmptyString) { // Shouldn't crash or misbehave - SecureZero(&data); + SecureZero(&data_); } } // namespace internal