From fa3c780274cbe333f949eb882a99ddc1f0ea8288 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 28 May 2025 14:39:41 +0200 Subject: [PATCH 01/22] Add SecureString implementation to arrow/util/ --- cpp/src/arrow/CMakeLists.txt | 6 + cpp/src/arrow/util/CMakeLists.txt | 1 + cpp/src/arrow/util/secure_string.cc | 147 ++++++++++ cpp/src/arrow/util/secure_string.h | 71 +++++ cpp/src/arrow/util/secure_string_test.cc | 339 +++++++++++++++++++++++ 5 files changed, 564 insertions(+) create mode 100644 cpp/src/arrow/util/secure_string.cc create mode 100644 cpp/src/arrow/util/secure_string.h create mode 100644 cpp/src/arrow/util/secure_string_test.cc diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 77558726986..917f1d02a55 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -515,6 +515,7 @@ set(ARROW_UTIL_SRCS util/memory.cc util/mutex.cc util/ree_util.cc + util/secure_string.cc util/string.cc util/string_builder.cc util/task_group.cc @@ -574,6 +575,11 @@ if(ARROW_USE_GLOG) target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE glog::glog) endforeach() endif() +if(ARROW_USE_OPENSSL) + foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS}) + target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_OPENSSL_LIBS}) + endforeach() +endif() if(ARROW_USE_XSIMD) foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS}) target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_XSIMD}) diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index 17eea5532cc..df47389240e 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -72,6 +72,7 @@ add_arrow_test(utility-test ree_util_test.cc reflection_test.cc rows_to_batches_test.cc + secure_string_test.cc small_vector_test.cc span_test.cc stl_util_test.cc diff --git a/cpp/src/arrow/util/secure_string.cc b/cpp/src/arrow/util/secure_string.cc new file mode 100644 index 00000000000..8b699628fd9 --- /dev/null +++ b/cpp/src/arrow/util/secure_string.cc @@ -0,0 +1,147 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#if defined(ARROW_USE_OPENSSL) +# include +# include +#endif +#include +#if defined(_WIN32) +# include +#endif + +#include "arrow/util/secure_string.h" +#include "arrow/util/span.h" + +namespace arrow::util { +SecureString::SecureString(SecureString&& secret) noexcept + : secret_(std::move(secret.secret_)) { + secret.Dispose(); +} +SecureString::SecureString(std::string&& secret) noexcept : secret_(std::move(secret)) { + SecureClear(&secret); +} +SecureString::SecureString(size_t n, char c) noexcept : secret_(n, c) {} + +SecureString& SecureString::operator=(SecureString&& secret) noexcept { + if (this == &secret) { + // self-assignment + return *this; + } + Dispose(); + secret_ = std::move(secret.secret_); + secret.Dispose(); + return *this; +} +SecureString& SecureString::operator=(const SecureString& secret) { + if (this == &secret) { + // self-assignment + return *this; + } + Dispose(); + secret_ = secret.secret_; + return *this; +} +SecureString& SecureString::operator=(std::string&& secret) noexcept { + Dispose(); + // if secret is local string (length <= 15 characters), copies local buffer, resets to 0 + // - requires secure cleaning the local buffer + // if secret is longer, moves the pointer to secret_, resets to 0 and uses local buffer + // - does not require cleaning anything + secret_ = std::move(secret); + // cleans only the local buffer of secret as this always is a local string by now + SecureClear(&secret); + return *this; +} + +bool SecureString::operator==(const SecureString& other) const { + return secret_ == other.secret_; +} + +bool SecureString::operator!=(const SecureString& other) const { + return secret_ != other.secret_; +} + +bool SecureString::empty() const { return secret_.empty(); } +std::size_t SecureString::size() const { return secret_.size(); } +std::size_t SecureString::length() const { return secret_.length(); } + +::arrow::util::span SecureString::as_span() { + return {reinterpret_cast(secret_.data()), secret_.size()}; +} +::arrow::util::span SecureString::as_span() const { + return {reinterpret_cast(secret_.data()), secret_.size()}; +} +std::string_view SecureString::as_view() const { + return {secret_.data(), secret_.size()}; +} + +void SecureString::Dispose() { SecureClear(&secret_); } +void SecureString::SecureClear(std::string* secret) { + secret->clear(); + SecureClear(reinterpret_cast(secret->data()), secret->capacity()); +} +inline void SecureString::SecureClear(uint8_t* data, size_t size) { + // There is various prior art for this: + // https://www.cryptologie.net/article/419/zeroing-memory-compiler-optimizations-and-memset_s/ + // - libb2's `secure_zero_memory` at + // https://github.com/BLAKE2/libb2/blob/30d45a17c59dc7dbf853da3085b71d466275bd0a/src/blake2-impl.h#L140-L160 + // - libsodium's `sodium_memzero` at + // https://github.com/jedisct1/libsodium/blob/be58b2e6664389d9c7993b55291402934b43b3ca/src/libsodium/sodium/utils.c#L78:L101 + // Note: + // https://www.daemonology.net/blog/2014-09-06-zeroing-buffers-is-insufficient.html +#if defined(_WIN32) + // SecureZeroMemory is meant to not be optimized away + SecureZeroMemory(data, size); +#elif defined(__STDC_LIB_EXT1__) + // memset_s is meant to not be optimized away + memset_s(data, size, 0, size); +#elif defined(OPENSSL_VERSION_NUMBER) && OPENSSL_VERSION_NUMBER >= 0x30000000 + // rely on some implementation in OpenSSL cryptographic library + OPENSSL_cleanse(data, size); +#elif defined(__GLIBC__) && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 25)) + // explicit_bzero is meant to not be optimized away + explicit_bzero(data, size); +#else + // Volatile pointer to memset function is an attempt to avoid + // that the compiler optimizes away the memset function call. + // pretty much what OPENSSL_cleanse above does + // https://github.com/openssl/openssl/blob/3423c30db3aa044f46e1f0270e2ecd899415bf5f/crypto/mem_clr.c#L22 + static const volatile auto memset_v = &memset; + memset_v(data, 0, size); + +# if defined(__GNUC__) || defined(__clang__) + // __asm__ only supported by GCC and Clang + // not supported by MSVC on the ARM and x64 processors + // https://en.cppreference.com/w/c/language/asm.html + // https://en.cppreference.com/w/cpp/language/asm.html + + // Additional attempt on top of volatile memset_v above + // to avoid that the compiler optimizes away the memset function call. + // Assembler code that tells the compiler 'data' has side effects. + // https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html: + // - "volatile": the asm produces side effects + // - "memory": effectively forms a read/write memory barrier for the compiler + __asm__ __volatile__("" /* no actual code */ + : /* no output */ + : "r"(data) /* input */ + : "memory" /* memory side effects beyond input and output */); +# endif +#endif +} + +} // namespace arrow::util diff --git a/cpp/src/arrow/util/secure_string.h b/cpp/src/arrow/util/secure_string.h new file mode 100644 index 00000000000..537958f4aa8 --- /dev/null +++ b/cpp/src/arrow/util/secure_string.h @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "arrow/util/span.h" +#include "parquet/platform.h" + +namespace arrow::util { +/** + * A secure string that ensures the wrapped string is cleared from memory on + * deconstruction. This class can only be created from std::string that are securely + * erased after creation. + * + * Note: This class does not provide a constructor / assignment operator that copies a + * std::string because that would allow code to create a SecureString while accidentally + * not noticing the need to securely erasing the argument after invoking the constructor / + * calling the assignment operator. + */ +class PARQUET_EXPORT SecureString { + public: + SecureString() noexcept = default; + SecureString(SecureString&&) noexcept; + SecureString(const SecureString&) = default; + explicit SecureString(std::string&&) noexcept; + explicit SecureString(size_t, char) noexcept; + + SecureString& operator=(SecureString&&) noexcept; + SecureString& operator=(const SecureString&); + SecureString& operator=(std::string&& secret) noexcept; + + bool operator==(const SecureString&) const; + bool operator!=(const SecureString&) const; + + ~SecureString() { Dispose(); } + + [[nodiscard]] bool empty() const; + [[nodiscard]] std::size_t size() const; + [[nodiscard]] std::size_t length() const; + + [[nodiscard]] ::arrow::util::span as_span(); + [[nodiscard]] ::arrow::util::span as_span() const; + [[nodiscard]] std::string_view as_view() const; + + void Dispose(); + + static void SecureClear(std::string*); + static void SecureClear(uint8_t* data, size_t size); + + private: + std::string secret_; +}; + +} // namespace arrow::util diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc new file mode 100644 index 00000000000..9e04b698cdb --- /dev/null +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -0,0 +1,339 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include "arrow/util/secure_string.h" + +namespace arrow::util::test { + +std::string_view StringArea(const std::string& string) { + return {string.data(), string.capacity()}; +} + +void AssertSecurelyCleared(const std::string_view area) { + // the entire area is filled with zeros + std::string zeros(area.size(), '\0'); + ASSERT_EQ(area, std::string_view(zeros)); +} + +void AssertSecurelyCleared(const std::string& string) { + AssertSecurelyCleared(StringArea(string)); +} + +/** + * Checks the area has been securely cleared after some position. + */ +void AssertSecurelyCleared(const std::string_view area, const size_t pos) { + // the area after pos is filled with zeros + if (pos < area.size()) { + std::string zeros(area.size() - pos, '\0'); + ASSERT_EQ(area.substr(pos), std::string_view(zeros)); + } +} + +/** + * Checks the area has been securely cleared from the secret value. + * Assumes the area has been released, so it might have been reclaimed and changed after + * cleaning. We cannot check for all-zeros, best we can check here is no secret character + * has leaked. If by any chance the modification produced a former key character at the + * right position, this will be false negative / flaky. Therefore, we check for three + * consecutive secret characters before we fail. + */ +void AssertSecurelyCleared(const std::string_view area, const std::string& secret_value) { + auto leaks = 0; + for (size_t i = 0; i < secret_value.size(); i++) { + if (area[i] == secret_value[i]) { + leaks++; + } else { + if (leaks >= 3) { + break; + } + leaks = 0; + } + } + if (leaks >= 3) { + FAIL() << leaks << " characters of secret leaked into " << area; + } +} + +TEST(TestSecureString, SecureClearString) { + // short string + { + std::string tiny("abc"); + auto old_area = StringArea(tiny); + SecureString::SecureClear(&tiny); + AssertSecurelyCleared(tiny); + AssertSecurelyCleared(old_area); + } + + // long string + { + std::string large(1024, 'x'); + large.resize(512, 'y'); + auto old_area = StringArea(large); + SecureString::SecureClear(&large); + AssertSecurelyCleared(large); + AssertSecurelyCleared(old_area); + } + + // empty string + { + // this creates an empty string with some non-zero characters in the string buffer + // we test that all those characters are securely cleared + std::string empty("abcdef"); + empty.resize(0); + auto old_area = StringArea(empty); + SecureString::SecureClear(&empty); + AssertSecurelyCleared(empty); + AssertSecurelyCleared(old_area); + } +} + +TEST(TestSecureString, Construct) { + // move-constructing from a string securely clears that string + std::string string("hello world"); + auto old_string = StringArea(string); + SecureString secret_from_string(std::move(string)); + AssertSecurelyCleared(string); + AssertSecurelyCleared(old_string); + ASSERT_FALSE(secret_from_string.empty()); + + // move-constructing from a secure string securely clears that secure string + auto old_secret_from_string_view = secret_from_string.as_view(); + auto old_secret_from_string_value = std::string(secret_from_string.as_view()); + SecureString secret_from_move_secret(std::move(secret_from_string)); + ASSERT_TRUE(secret_from_string.empty()); + AssertSecurelyCleared(old_secret_from_string_view); + ASSERT_FALSE(secret_from_move_secret.empty()); + ASSERT_EQ(secret_from_move_secret.as_view(), + std::string_view(old_secret_from_string_value)); + + // copy-constructing from a secure string does not modify that secure string + SecureString secret_from_secret(secret_from_move_secret); + ASSERT_FALSE(secret_from_move_secret.empty()); + ASSERT_EQ(secret_from_move_secret.as_view(), + std::string_view(old_secret_from_string_value)); + ASSERT_FALSE(secret_from_secret.empty()); + ASSERT_EQ(secret_from_secret, secret_from_move_secret); +} + +TEST(TestSecureString, Assign) { + // we initialize with the first string and iteratively assign the subsequent values + // the first two values are local (15 chars and less), the remainder are non-local + // strings (larger than 15 chars) memory management of short and long strings behaves + // differently + std::vector test_strings = { + "secret", "another secret", "a much longer secret", std::string(1024, 'x')}; + + // assert test string configuration + ASSERT_GE(test_strings.size(), 4); + for (size_t i = 1; i < test_strings.size(); i++) { + // we expect first two strings to be local strings + if (i <= 1) { + ASSERT_LT(test_strings[i].size(), 15 / sizeof(char)); + } else { + ASSERT_GE(test_strings[i].size(), 15 / sizeof(char)); + } + // the strings are increasing in size + if (i > 0) { + ASSERT_TRUE(test_strings[i].size() > test_strings[i - 1].size()); + } + } + + std::vector reverse_strings = std::vector(test_strings); + reverse(reverse_strings.begin(), reverse_strings.end()); + + for (auto vec : {test_strings, reverse_strings}) { + auto init_string = vec[0]; + auto strings = std::vector(vec.begin() + 1, vec.end()); + + { + // an initialized secure string + std::string init_string_copy(init_string); + SecureString secret_from_string(std::move(init_string_copy)); + + // move-assigning from a string securely clears that string + // the earlier value of the secure string is securely cleared + for (auto string : strings) { + auto string_copy = std::string(string); + auto old_string_copy_area = StringArea(string_copy); + ASSERT_FALSE(string.empty()); + ASSERT_FALSE(string_copy.empty()); + auto old_secret_from_string_area = secret_from_string.as_view(); + auto old_secret_from_string_value = std::string(secret_from_string.as_view()); + + secret_from_string = std::move(string_copy); + + ASSERT_FALSE(string.empty()); + ASSERT_TRUE(string_copy.empty()); + AssertSecurelyCleared(string_copy); + auto secret_from_string_view = secret_from_string.as_view(); + // the secure string can reuse the string_copy's string buffer after assignment + // then, string_copy's string buffer is obviously not cleared + if (secret_from_string_view.data() != old_string_copy_area.data()) { + AssertSecurelyCleared(old_string_copy_area, string); + } + ASSERT_FALSE(secret_from_string.empty()); + ASSERT_EQ(secret_from_string.size(), string.size()); + ASSERT_EQ(secret_from_string.length(), string.length()); + ASSERT_EQ(secret_from_string_view, std::string_view(string)); + if (secret_from_string_view.data() == old_secret_from_string_area.data()) { + // when secure string reuses the buffer, the old value must be cleared + AssertSecurelyCleared(old_secret_from_string_area, secret_from_string.size()); + } else { + // when secure string has a new buffer, the old buffer must be cleared + AssertSecurelyCleared(old_secret_from_string_area, + old_secret_from_string_value); + } + } + } + + { + // an initialized secure string + std::string init_string_copy(init_string); + SecureString secret_from_move_secret(std::move(init_string_copy)); + + // move-assigning from a secure string securely clears that secure string + // the earlier value of the secure string is securely cleared + for (auto string : strings) { + auto string_copy = std::string(string); + SecureString secret_string(std::move(string_copy)); + ASSERT_FALSE(string.empty()); + ASSERT_TRUE(string_copy.empty()); + ASSERT_FALSE(secret_string.empty()); + auto old_secret_string_area = secret_string.as_view(); + auto old_secret_string_value = std::string(secret_string.as_view()); + auto old_secret_from_move_secret_area = secret_from_move_secret.as_view(); + auto old_secret_from_move_secret_value = + std::string(secret_from_move_secret.as_view()); + + secret_from_move_secret = std::move(secret_string); + + ASSERT_TRUE(secret_string.empty()); + // the secure string can reuse the string_copy's string buffer after assignment + // then, string_copy's string buffer is obviously not cleared + if (old_secret_string_area.data() != secret_from_move_secret.as_view().data()) { + AssertSecurelyCleared(old_secret_string_area, + old_secret_from_move_secret_value); + } + ASSERT_FALSE(secret_from_move_secret.empty()); + ASSERT_EQ(secret_from_move_secret.size(), string.size()); + ASSERT_EQ(secret_from_move_secret.length(), string.length()); + ASSERT_EQ(secret_from_move_secret.as_view(), std::string_view(string)); + if (old_secret_from_move_secret_area.data() == + secret_from_move_secret.as_view().data()) { + // when secure string reuses the buffer, the old value must be cleared + AssertSecurelyCleared(old_secret_from_move_secret_area, + secret_from_move_secret.size()); + } else { + // when secure string has a new buffer, the old buffer must be cleared + AssertSecurelyCleared(old_secret_from_move_secret_area, + old_secret_from_move_secret_value); + } + } + } + + { + // an initialized secure string + std::string init_string_copy(init_string); + SecureString secret_from_copy_secret(std::move(init_string_copy)); + + for (auto string : strings) { + // copy-assigning from a secure string does not modify that secure string + // the earlier value of the secure string is securely cleared + auto string_copy = std::string(string); + SecureString secret_string(std::move(string_copy)); + ASSERT_FALSE(string.empty()); + ASSERT_TRUE(string_copy.empty()); + ASSERT_FALSE(secret_string.empty()); + auto old_secret_from_copy_secret_area = secret_from_copy_secret.as_view(); + auto old_secret_from_copy_secret_value = + std::string(secret_from_copy_secret.as_view()); + + secret_from_copy_secret = secret_string; + + ASSERT_FALSE(secret_string.empty()); + ASSERT_FALSE(secret_from_copy_secret.empty()); + ASSERT_EQ(secret_from_copy_secret.size(), string.size()); + ASSERT_EQ(secret_from_copy_secret.length(), string.length()); + ASSERT_EQ(secret_from_copy_secret.as_view(), std::string_view(string)); + if (old_secret_from_copy_secret_area.data() == + secret_from_copy_secret.as_view().data()) { + // when secure string reuses the buffer, the old value must be cleared + AssertSecurelyCleared(old_secret_from_copy_secret_area, + secret_from_copy_secret.size()); + } else { + // when secure string has a new buffer, the old buffer must be cleared + AssertSecurelyCleared(old_secret_from_copy_secret_area, + old_secret_from_copy_secret_value); + } + } + } + } +} + +TEST(TestSecureString, Compare) { + ASSERT_TRUE(SecureString("") == SecureString("")); + ASSERT_FALSE(SecureString("") != SecureString("")); + + ASSERT_TRUE(SecureString("hello world") == SecureString("hello world")); + ASSERT_FALSE(SecureString("hello world") != SecureString("hello world")); + + ASSERT_FALSE(SecureString("hello world") == SecureString("hello worlds")); + ASSERT_TRUE(SecureString("hello world") != SecureString("hello worlds")); +} + +TEST(TestSecureString, Cardinality) { + ASSERT_TRUE(SecureString("").empty()); + ASSERT_EQ(SecureString("").size(), 0); + ASSERT_EQ(SecureString("").length(), 0); + + ASSERT_FALSE(SecureString("hello world").empty()); + ASSERT_EQ(SecureString("hello world").size(), 11); + ASSERT_EQ(SecureString("hello world").length(), 11); +} + +TEST(TestSecureString, AsSpan) { + SecureString secret("hello world"); + const SecureString& const_secret(secret); + auto const_span = const_secret.as_span(); + auto mutual_span = secret.as_span(); + + std::string expected = "hello world"; + ::arrow::util::span expected_span = {reinterpret_cast(expected.data()), + expected.size()}; + ASSERT_EQ(const_span, expected_span); + ASSERT_EQ(mutual_span, expected_span); + + // modify secret through mutual span + // the const span shares the same secret, so it is changed as well + mutual_span[0] = 'H'; + expected_span[0] = 'H'; + ASSERT_EQ(const_span, expected_span); + ASSERT_EQ(mutual_span, expected_span); +} + +TEST(TestSecureString, AsView) { + const SecureString secret = SecureString("hello world"); + const std::string_view view = secret.as_view(); + ASSERT_EQ(view, "hello world"); +} + +} // namespace arrow::util::test From 96fe8e35d9a15df72ad3193944ea8e94d2f2e678 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Tue, 3 Jun 2025 11:58:03 +0200 Subject: [PATCH 02/22] Fix import for memset_s, improve for loops in tests Co-authored-by: Antoine Pitrou --- cpp/src/arrow/util/secure_string.cc | 2 ++ cpp/src/arrow/util/secure_string_test.cc | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/util/secure_string.cc b/cpp/src/arrow/util/secure_string.cc index 8b699628fd9..b45d116878a 100644 --- a/cpp/src/arrow/util/secure_string.cc +++ b/cpp/src/arrow/util/secure_string.cc @@ -19,6 +19,8 @@ # include # include #endif +#define __STDC_WANT_LIB_EXT1__ 1 +#include #include #if defined(_WIN32) # include diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index 9e04b698cdb..ae9cdcf6301 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -171,7 +171,7 @@ TEST(TestSecureString, Assign) { // move-assigning from a string securely clears that string // the earlier value of the secure string is securely cleared - for (auto string : strings) { + for (const auto& string : strings) { auto string_copy = std::string(string); auto old_string_copy_area = StringArea(string_copy); ASSERT_FALSE(string.empty()); @@ -212,7 +212,7 @@ TEST(TestSecureString, Assign) { // move-assigning from a secure string securely clears that secure string // the earlier value of the secure string is securely cleared - for (auto string : strings) { + for (const auto& string : strings) { auto string_copy = std::string(string); SecureString secret_string(std::move(string_copy)); ASSERT_FALSE(string.empty()); @@ -255,7 +255,7 @@ TEST(TestSecureString, Assign) { std::string init_string_copy(init_string); SecureString secret_from_copy_secret(std::move(init_string_copy)); - for (auto string : strings) { + for (const auto& string : strings) { // copy-assigning from a secure string does not modify that secure string // the earlier value of the secure string is securely cleared auto string_copy = std::string(string); From fd5d43dbd509090f7e6d05e617f1c5a52d55d36a Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Tue, 3 Jun 2025 12:05:38 +0200 Subject: [PATCH 03/22] Address code review comments - Improve imports and definition export - Add lines between definitions - Improve comments - Remove redundant ::arrow::util:: when using span - Remove test setup assertions - Reuse view in test --- cpp/src/arrow/util/secure_string.cc | 34 +++++++++++++++---- cpp/src/arrow/util/secure_string.h | 9 ++--- cpp/src/arrow/util/secure_string_test.cc | 42 ++++++++---------------- 3 files changed, 45 insertions(+), 40 deletions(-) diff --git a/cpp/src/arrow/util/secure_string.cc b/cpp/src/arrow/util/secure_string.cc index b45d116878a..d1f93e7db97 100644 --- a/cpp/src/arrow/util/secure_string.cc +++ b/cpp/src/arrow/util/secure_string.cc @@ -15,13 +15,16 @@ // specific language governing permissions and limitations // under the License. +#define __STDC_WANT_LIB_EXT1__ 1 +#include +#include + #if defined(ARROW_USE_OPENSSL) # include # include #endif -#define __STDC_WANT_LIB_EXT1__ 1 -#include -#include + +#include "arrow/util/windows_compatibility.h" #if defined(_WIN32) # include #endif @@ -30,13 +33,16 @@ #include "arrow/util/span.h" namespace arrow::util { + SecureString::SecureString(SecureString&& secret) noexcept : secret_(std::move(secret.secret_)) { secret.Dispose(); } + SecureString::SecureString(std::string&& secret) noexcept : secret_(std::move(secret)) { SecureClear(&secret); } + SecureString::SecureString(size_t n, char c) noexcept : secret_(n, c) {} SecureString& SecureString::operator=(SecureString&& secret) noexcept { @@ -49,6 +55,7 @@ SecureString& SecureString::operator=(SecureString&& secret) noexcept { secret.Dispose(); return *this; } + SecureString& SecureString::operator=(const SecureString& secret) { if (this == &secret) { // self-assignment @@ -58,11 +65,16 @@ SecureString& SecureString::operator=(const SecureString& secret) { secret_ = secret.secret_; return *this; } + SecureString& SecureString::operator=(std::string&& secret) noexcept { Dispose(); - // if secret is local string (length <= 15 characters), copies local buffer, resets to 0 + // std::string implementation may distinguish between local string (a very short string) + // and non-local (longer) strings. The former stores the string in a local buffer, the + // latter stores a pointer to allocated memory that stores the string. + // + // If secret is a local string, copies local buffer, resets size to 0 // - requires secure cleaning the local buffer - // if secret is longer, moves the pointer to secret_, resets to 0 and uses local buffer + // If secret is longer, moves the pointer to secret_, resets to 0, uses local buffer // - does not require cleaning anything secret_ = std::move(secret); // cleans only the local buffer of secret as this always is a local string by now @@ -79,24 +91,32 @@ bool SecureString::operator!=(const SecureString& other) const { } bool SecureString::empty() const { return secret_.empty(); } + std::size_t SecureString::size() const { return secret_.size(); } + std::size_t SecureString::length() const { return secret_.length(); } -::arrow::util::span SecureString::as_span() { +std::size_t SecureString::capacity() const { return secret_.capacity(); } + +span SecureString::as_span() { return {reinterpret_cast(secret_.data()), secret_.size()}; } -::arrow::util::span SecureString::as_span() const { + +span SecureString::as_span() const { return {reinterpret_cast(secret_.data()), secret_.size()}; } + std::string_view SecureString::as_view() const { return {secret_.data(), secret_.size()}; } void SecureString::Dispose() { SecureClear(&secret_); } + void SecureString::SecureClear(std::string* secret) { secret->clear(); SecureClear(reinterpret_cast(secret->data()), secret->capacity()); } + inline void SecureString::SecureClear(uint8_t* data, size_t size) { // There is various prior art for this: // https://www.cryptologie.net/article/419/zeroing-memory-compiler-optimizations-and-memset_s/ diff --git a/cpp/src/arrow/util/secure_string.h b/cpp/src/arrow/util/secure_string.h index 537958f4aa8..11143ef6cc9 100644 --- a/cpp/src/arrow/util/secure_string.h +++ b/cpp/src/arrow/util/secure_string.h @@ -21,7 +21,7 @@ #include #include "arrow/util/span.h" -#include "parquet/platform.h" +#include "arrow/util/visibility.h" namespace arrow::util { /** @@ -34,7 +34,7 @@ namespace arrow::util { * not noticing the need to securely erasing the argument after invoking the constructor / * calling the assignment operator. */ -class PARQUET_EXPORT SecureString { +class ARROW_EXPORT SecureString { public: SecureString() noexcept = default; SecureString(SecureString&&) noexcept; @@ -54,9 +54,10 @@ class PARQUET_EXPORT SecureString { [[nodiscard]] bool empty() const; [[nodiscard]] std::size_t size() const; [[nodiscard]] std::size_t length() const; + [[nodiscard]] std::size_t capacity() const; - [[nodiscard]] ::arrow::util::span as_span(); - [[nodiscard]] ::arrow::util::span as_span() const; + [[nodiscard]] span as_span(); + [[nodiscard]] span as_span() const; [[nodiscard]] std::string_view as_view() const; void Dispose(); diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index ae9cdcf6301..18002a54ac9 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -135,30 +135,14 @@ TEST(TestSecureString, Construct) { } TEST(TestSecureString, Assign) { - // we initialize with the first string and iteratively assign the subsequent values - // the first two values are local (15 chars and less), the remainder are non-local - // strings (larger than 15 chars) memory management of short and long strings behaves - // differently + // We initialize with the first string and iteratively assign the subsequent values. + // The first two values are local (very short strings), the remainder are non-local + // strings. Memory management of short and long strings behaves differently. std::vector test_strings = { "secret", "another secret", "a much longer secret", std::string(1024, 'x')}; - // assert test string configuration - ASSERT_GE(test_strings.size(), 4); - for (size_t i = 1; i < test_strings.size(); i++) { - // we expect first two strings to be local strings - if (i <= 1) { - ASSERT_LT(test_strings[i].size(), 15 / sizeof(char)); - } else { - ASSERT_GE(test_strings[i].size(), 15 / sizeof(char)); - } - // the strings are increasing in size - if (i > 0) { - ASSERT_TRUE(test_strings[i].size() > test_strings[i - 1].size()); - } - } - std::vector reverse_strings = std::vector(test_strings); - reverse(reverse_strings.begin(), reverse_strings.end()); + std::reverse(reverse_strings.begin(), reverse_strings.end()); for (auto vec : {test_strings, reverse_strings}) { auto init_string = vec[0]; @@ -227,18 +211,19 @@ TEST(TestSecureString, Assign) { secret_from_move_secret = std::move(secret_string); ASSERT_TRUE(secret_string.empty()); + auto secret_from_move_secret_view = secret_from_move_secret.as_view(); // the secure string can reuse the string_copy's string buffer after assignment // then, string_copy's string buffer is obviously not cleared - if (old_secret_string_area.data() != secret_from_move_secret.as_view().data()) { + if (old_secret_string_area.data() != secret_from_move_secret_view.data()) { AssertSecurelyCleared(old_secret_string_area, old_secret_from_move_secret_value); } ASSERT_FALSE(secret_from_move_secret.empty()); ASSERT_EQ(secret_from_move_secret.size(), string.size()); ASSERT_EQ(secret_from_move_secret.length(), string.length()); - ASSERT_EQ(secret_from_move_secret.as_view(), std::string_view(string)); + ASSERT_EQ(secret_from_move_secret_view, std::string_view(string)); if (old_secret_from_move_secret_area.data() == - secret_from_move_secret.as_view().data()) { + secret_from_move_secret_view.data()) { // when secure string reuses the buffer, the old value must be cleared AssertSecurelyCleared(old_secret_from_move_secret_area, secret_from_move_secret.size()); @@ -314,20 +299,19 @@ TEST(TestSecureString, AsSpan) { SecureString secret("hello world"); const SecureString& const_secret(secret); auto const_span = const_secret.as_span(); - auto mutual_span = secret.as_span(); + auto mutable_span = secret.as_span(); std::string expected = "hello world"; - ::arrow::util::span expected_span = {reinterpret_cast(expected.data()), - expected.size()}; + span expected_span = {reinterpret_cast(expected.data()), expected.size()}; ASSERT_EQ(const_span, expected_span); - ASSERT_EQ(mutual_span, expected_span); + ASSERT_EQ(mutable_span, expected_span); // modify secret through mutual span // the const span shares the same secret, so it is changed as well - mutual_span[0] = 'H'; + mutable_span[0] = 'H'; expected_span[0] = 'H'; ASSERT_EQ(const_span, expected_span); - ASSERT_EQ(mutual_span, expected_span); + ASSERT_EQ(mutable_span, expected_span); } TEST(TestSecureString, AsView) { From 7ece2b07072335ac907aaf90f282e2b648988e69 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Tue, 3 Jun 2025 12:43:00 +0200 Subject: [PATCH 04/22] Test secure SecureString deconstruction --- cpp/src/arrow/util/secure_string_test.cc | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index 18002a54ac9..f2cc96f7b3c 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -274,6 +274,30 @@ TEST(TestSecureString, Assign) { } } +TEST(TestSecureString, Deconstruct) { +#if !defined(ARROW_VALGRIND) && !defined(ARROW_USE_ASAN) + // We use a very short and a very long string as memory management of short and long + // strings behaves differently. + std::vector strings = {"short secret", std::string(1024, 'x')}; + + for (auto& string : strings) { + auto old_string_value = string; + std::string_view view; + { + // construct secret + auto secret = SecureString(std::move(string)); + // memorize view + view = secret.as_view(); + // deconstruct secret on leaving this context + } + // assert secret memory is cleared on deconstruction + AssertSecurelyCleared(view, old_string_value); + // so is the string (tested more thoroughly elsewhere) + AssertSecurelyCleared(string); + } +#endif +} + TEST(TestSecureString, Compare) { ASSERT_TRUE(SecureString("") == SecureString("")); ASSERT_FALSE(SecureString("") != SecureString("")); From 15aa6deda96498aef4eeef7aa783723d3286016a Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Tue, 3 Jun 2025 17:28:39 +0200 Subject: [PATCH 05/22] Test correctness of AssertSecurelyCleared --- cpp/src/arrow/util/secure_string_test.cc | 80 ++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index f2cc96f7b3c..58a2782adba 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -21,6 +21,8 @@ #include "arrow/util/secure_string.h" +#include + namespace arrow::util::test { std::string_view StringArea(const std::string& string) { @@ -73,6 +75,84 @@ void AssertSecurelyCleared(const std::string_view area, const std::string& secre } } +// GTest test result reporter that captures the result but does not hand it to the unit +// test instance. This effectively hides the result from the GTest test framework. +class Reporter : public testing::TestPartResultReporterInterface { + public: + explicit Reporter(testing::TestInfo* test_info) + : result_(testing::TestPartResult::kSuccess, test_info->file(), test_info->line(), + "") {} + void ReportTestPartResult(const testing::TestPartResult& result) override { + result_ = result; + } + const testing::TestPartResult& result() const { return result_; } + + private: + testing::TestPartResult result_; +}; + +#define GET_TEST_RESULT_REPORTER() \ + testing::internal::GetUnitTestImpl()->GetTestPartResultReporterForCurrentThread() + +#define SET_TEST_RESULT_REPORTER(reporter) \ + testing::internal::GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread( \ + reporter); + +#define CAPTURE_TEST_RESULT(capture, body) \ + { \ + auto report = GET_TEST_RESULT_REPORTER(); \ + SET_TEST_RESULT_REPORTER(&capture); \ + body; \ + SET_TEST_RESULT_REPORTER(report); \ + } + +TEST(TestSecureString, AssertSecurelyCleared) { + // This tests AssertSecurelyCleared helper methods is actually able to identify secret + // leakage. It captures test results emitted by ASSERT_EQ and then asserts result type + // and message. + auto capture = Reporter(test_info_); + + auto short_zeros = std::string(10, '\0'); + AssertSecurelyCleared(std::string_view(short_zeros)); + + auto large_zeros = std::string(1000, '\0'); + AssertSecurelyCleared(large_zeros); + + auto no_zeros = std::string("abcdefghijklmnopqrstuvwxyz"); + CAPTURE_TEST_RESULT(capture, AssertSecurelyCleared(no_zeros)); + ASSERT_EQ(capture.result().type(), testing::TestPartResult::kFatalFailure); + ASSERT_EQ(std::string(capture.result().message()), + "Expected equality of these values:\n" + " area\n" + " Which is: \"abcdefghijklmnopqrstuvwxyz\"\n" + " std::string_view(zeros)\n" + " Which is: " + "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" + "0\\0\"\n"); + + auto some_zeros = no_zeros; + some_zeros = std::string(10, '\0'); + AssertSecurelyCleared(some_zeros, 10); + CAPTURE_TEST_RESULT(capture, AssertSecurelyCleared(some_zeros)); + ASSERT_EQ(capture.result().type(), testing::TestPartResult::kFatalFailure); + ASSERT_EQ(std::string(capture.result().message()), + "Expected equality of these values:\n" + " area\n" + " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0lmnopqrstuvwxyz\"\n" + " std::string_view(zeros)\n" + " Which is: " + "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" + "0\\0\"\n"); + + AssertSecurelyCleared(some_zeros, "12345678901234567890123456"); + CAPTURE_TEST_RESULT(capture, AssertSecurelyCleared(StringArea(some_zeros), no_zeros)); + ASSERT_EQ(capture.result().type(), testing::TestPartResult::kFatalFailure); + ASSERT_EQ(std::string(capture.result().message()), + "Failed\n" + "15 characters of secret leaked into " + "\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0lmnopqrstuvwxyz\n"); +} + TEST(TestSecureString, SecureClearString) { // short string { From 89e3f0097851341b2586470b71b56a3ecb804aba Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 4 Jun 2025 06:46:47 +0200 Subject: [PATCH 06/22] Rename SecureString argument to other --- cpp/src/arrow/util/secure_string.cc | 20 ++++++++++---------- cpp/src/arrow/util/secure_string.h | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/cpp/src/arrow/util/secure_string.cc b/cpp/src/arrow/util/secure_string.cc index d1f93e7db97..5e36aab23cf 100644 --- a/cpp/src/arrow/util/secure_string.cc +++ b/cpp/src/arrow/util/secure_string.cc @@ -34,9 +34,9 @@ namespace arrow::util { -SecureString::SecureString(SecureString&& secret) noexcept - : secret_(std::move(secret.secret_)) { - secret.Dispose(); +SecureString::SecureString(SecureString&& other) noexcept + : secret_(std::move(other.secret_)) { + other.Dispose(); } SecureString::SecureString(std::string&& secret) noexcept : secret_(std::move(secret)) { @@ -45,24 +45,24 @@ SecureString::SecureString(std::string&& secret) noexcept : secret_(std::move(se SecureString::SecureString(size_t n, char c) noexcept : secret_(n, c) {} -SecureString& SecureString::operator=(SecureString&& secret) noexcept { - if (this == &secret) { +SecureString& SecureString::operator=(SecureString&& other) noexcept { + if (this == &other) { // self-assignment return *this; } Dispose(); - secret_ = std::move(secret.secret_); - secret.Dispose(); + secret_ = std::move(other.secret_); + other.Dispose(); return *this; } -SecureString& SecureString::operator=(const SecureString& secret) { - if (this == &secret) { +SecureString& SecureString::operator=(const SecureString& other) { + if (this == &other) { // self-assignment return *this; } Dispose(); - secret_ = secret.secret_; + secret_ = other.secret_; return *this; } diff --git a/cpp/src/arrow/util/secure_string.h b/cpp/src/arrow/util/secure_string.h index 11143ef6cc9..843f6cd0a95 100644 --- a/cpp/src/arrow/util/secure_string.h +++ b/cpp/src/arrow/util/secure_string.h @@ -44,7 +44,7 @@ class ARROW_EXPORT SecureString { SecureString& operator=(SecureString&&) noexcept; SecureString& operator=(const SecureString&); - SecureString& operator=(std::string&& secret) noexcept; + SecureString& operator=(std::string&&) noexcept; bool operator==(const SecureString&) const; bool operator!=(const SecureString&) const; From cb5c9de2f25b54e787975ea049c48b7060125ad2 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 4 Jun 2025 08:15:00 +0200 Subject: [PATCH 07/22] Move std::move into secure_move, assert string ptr --- cpp/src/arrow/util/secure_string.cc | 52 +++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/cpp/src/arrow/util/secure_string.cc b/cpp/src/arrow/util/secure_string.cc index 5e36aab23cf..19abf68bc25 100644 --- a/cpp/src/arrow/util/secure_string.cc +++ b/cpp/src/arrow/util/secure_string.cc @@ -30,16 +30,49 @@ #endif #include "arrow/util/secure_string.h" +#include "arrow/util/logging.h" #include "arrow/util/span.h" namespace arrow::util { -SecureString::SecureString(SecureString&& other) noexcept - : secret_(std::move(other.secret_)) { +/// Note: +/// A string std::string is securely moved into a SecureString in two steps: +/// 1. the std::string is moved via std::move(string) +/// 2. the std::string is securely cleared +/// +/// The std::move has two different effects, depending on the size of the string. +/// A very short string (called local string) stores the string in a local buffer, +/// a long string stores a pointer to allocated memory that stores the string. +/// +/// If the string is a small string, std::move copies the local buffer. +/// If the string is a long string, std::move moves the pointer and then resets the +/// string size to 0 (which turns the string into a local string). +/// +/// In both cases, after a std::move(string), the string uses the local buffer. +/// +/// Thus, after a std::move(string), calling SecureClear(std::string*) only +/// securely clears the **local buffer** of the string. Therefore, std::move(string) +/// must move the pointer of long string into SecureString (which later clears the string). +/// Otherwise, the content of the string cannot be securely cleared. +/// +/// This condition is checked by secure_move. + +void secure_move(std::string& string, std::string& dst) { + auto ptr = string.data(); + dst = std::move(string); + + // We require the buffer address string.data() to remain (not be freed), + // or reused by dst. Otherwise, we cannot securely clear string after this move + ARROW_CHECK(string.data() == ptr || dst.data() == ptr); +} + +SecureString::SecureString(SecureString&& other) noexcept { + secure_move(other.secret_, secret_); other.Dispose(); } -SecureString::SecureString(std::string&& secret) noexcept : secret_(std::move(secret)) { +SecureString::SecureString(std::string&& secret) noexcept { + secure_move(secret, secret_); SecureClear(&secret); } @@ -51,7 +84,7 @@ SecureString& SecureString::operator=(SecureString&& other) noexcept { return *this; } Dispose(); - secret_ = std::move(other.secret_); + secure_move(other.secret_, secret_); other.Dispose(); return *this; } @@ -68,16 +101,7 @@ SecureString& SecureString::operator=(const SecureString& other) { SecureString& SecureString::operator=(std::string&& secret) noexcept { Dispose(); - // std::string implementation may distinguish between local string (a very short string) - // and non-local (longer) strings. The former stores the string in a local buffer, the - // latter stores a pointer to allocated memory that stores the string. - // - // If secret is a local string, copies local buffer, resets size to 0 - // - requires secure cleaning the local buffer - // If secret is longer, moves the pointer to secret_, resets to 0, uses local buffer - // - does not require cleaning anything - secret_ = std::move(secret); - // cleans only the local buffer of secret as this always is a local string by now + secure_move(secret, secret_); SecureClear(&secret); return *this; } From 37223952f6d76365f93859e28b85e2d563219ba3 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 4 Jun 2025 09:52:54 +0200 Subject: [PATCH 08/22] Add comments, fix linting --- cpp/src/arrow/util/secure_string.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/util/secure_string.cc b/cpp/src/arrow/util/secure_string.cc index 19abf68bc25..ade822c84f7 100644 --- a/cpp/src/arrow/util/secure_string.cc +++ b/cpp/src/arrow/util/secure_string.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// __STDC_WANT_LIB_EXT1__ and string.h are required by memset_s: +// https://en.cppreference.com/w/c/string/byte/memset #define __STDC_WANT_LIB_EXT1__ 1 #include #include @@ -29,8 +31,8 @@ # include #endif -#include "arrow/util/secure_string.h" #include "arrow/util/logging.h" +#include "arrow/util/secure_string.h" #include "arrow/util/span.h" namespace arrow::util { @@ -52,8 +54,8 @@ namespace arrow::util { /// /// Thus, after a std::move(string), calling SecureClear(std::string*) only /// securely clears the **local buffer** of the string. Therefore, std::move(string) -/// must move the pointer of long string into SecureString (which later clears the string). -/// Otherwise, the content of the string cannot be securely cleared. +/// must move the pointer of long string into SecureString (which later clears the +/// string). Otherwise, the content of the string cannot be securely cleared. /// /// This condition is checked by secure_move. From 753bfe7c619b2b2fbf2572e1fe8ce16fa481e511 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 4 Jun 2025 10:11:12 +0200 Subject: [PATCH 09/22] Improve assertions --- cpp/src/arrow/util/secure_string_test.cc | 66 +++++++++++++----------- 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index 58a2782adba..0a7eef4511d 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -113,10 +113,10 @@ TEST(TestSecureString, AssertSecurelyCleared) { auto capture = Reporter(test_info_); auto short_zeros = std::string(10, '\0'); - AssertSecurelyCleared(std::string_view(short_zeros)); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(std::string_view(short_zeros))); auto large_zeros = std::string(1000, '\0'); - AssertSecurelyCleared(large_zeros); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(large_zeros)); auto no_zeros = std::string("abcdefghijklmnopqrstuvwxyz"); CAPTURE_TEST_RESULT(capture, AssertSecurelyCleared(no_zeros)); @@ -132,7 +132,7 @@ TEST(TestSecureString, AssertSecurelyCleared) { auto some_zeros = no_zeros; some_zeros = std::string(10, '\0'); - AssertSecurelyCleared(some_zeros, 10); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(some_zeros, 10)); CAPTURE_TEST_RESULT(capture, AssertSecurelyCleared(some_zeros)); ASSERT_EQ(capture.result().type(), testing::TestPartResult::kFatalFailure); ASSERT_EQ(std::string(capture.result().message()), @@ -144,7 +144,8 @@ TEST(TestSecureString, AssertSecurelyCleared) { "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" "0\\0\"\n"); - AssertSecurelyCleared(some_zeros, "12345678901234567890123456"); + ASSERT_NO_FATAL_FAILURE( + AssertSecurelyCleared(some_zeros, "12345678901234567890123456")); CAPTURE_TEST_RESULT(capture, AssertSecurelyCleared(StringArea(some_zeros), no_zeros)); ASSERT_EQ(capture.result().type(), testing::TestPartResult::kFatalFailure); ASSERT_EQ(std::string(capture.result().message()), @@ -159,8 +160,8 @@ TEST(TestSecureString, SecureClearString) { std::string tiny("abc"); auto old_area = StringArea(tiny); SecureString::SecureClear(&tiny); - AssertSecurelyCleared(tiny); - AssertSecurelyCleared(old_area); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(tiny)); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_area)); } // long string @@ -169,8 +170,8 @@ TEST(TestSecureString, SecureClearString) { large.resize(512, 'y'); auto old_area = StringArea(large); SecureString::SecureClear(&large); - AssertSecurelyCleared(large); - AssertSecurelyCleared(old_area); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(large)); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_area)); } // empty string @@ -181,8 +182,8 @@ TEST(TestSecureString, SecureClearString) { empty.resize(0); auto old_area = StringArea(empty); SecureString::SecureClear(&empty); - AssertSecurelyCleared(empty); - AssertSecurelyCleared(old_area); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(empty)); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_area)); } } @@ -191,8 +192,8 @@ TEST(TestSecureString, Construct) { std::string string("hello world"); auto old_string = StringArea(string); SecureString secret_from_string(std::move(string)); - AssertSecurelyCleared(string); - AssertSecurelyCleared(old_string); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(string)); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_string)); ASSERT_FALSE(secret_from_string.empty()); // move-constructing from a secure string securely clears that secure string @@ -200,7 +201,7 @@ TEST(TestSecureString, Construct) { auto old_secret_from_string_value = std::string(secret_from_string.as_view()); SecureString secret_from_move_secret(std::move(secret_from_string)); ASSERT_TRUE(secret_from_string.empty()); - AssertSecurelyCleared(old_secret_from_string_view); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_secret_from_string_view)); ASSERT_FALSE(secret_from_move_secret.empty()); ASSERT_EQ(secret_from_move_secret.as_view(), std::string_view(old_secret_from_string_value)); @@ -247,12 +248,12 @@ TEST(TestSecureString, Assign) { ASSERT_FALSE(string.empty()); ASSERT_TRUE(string_copy.empty()); - AssertSecurelyCleared(string_copy); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(string_copy)); auto secret_from_string_view = secret_from_string.as_view(); // the secure string can reuse the string_copy's string buffer after assignment // then, string_copy's string buffer is obviously not cleared if (secret_from_string_view.data() != old_string_copy_area.data()) { - AssertSecurelyCleared(old_string_copy_area, string); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_string_copy_area, string)); } ASSERT_FALSE(secret_from_string.empty()); ASSERT_EQ(secret_from_string.size(), string.size()); @@ -260,11 +261,12 @@ TEST(TestSecureString, Assign) { ASSERT_EQ(secret_from_string_view, std::string_view(string)); if (secret_from_string_view.data() == old_secret_from_string_area.data()) { // when secure string reuses the buffer, the old value must be cleared - AssertSecurelyCleared(old_secret_from_string_area, secret_from_string.size()); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_secret_from_string_area, + secret_from_string.size())); } else { // when secure string has a new buffer, the old buffer must be cleared - AssertSecurelyCleared(old_secret_from_string_area, - old_secret_from_string_value); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_secret_from_string_area, + old_secret_from_string_value)); } } } @@ -295,8 +297,8 @@ TEST(TestSecureString, Assign) { // the secure string can reuse the string_copy's string buffer after assignment // then, string_copy's string buffer is obviously not cleared if (old_secret_string_area.data() != secret_from_move_secret_view.data()) { - AssertSecurelyCleared(old_secret_string_area, - old_secret_from_move_secret_value); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared( + old_secret_string_area, old_secret_from_move_secret_value)); } ASSERT_FALSE(secret_from_move_secret.empty()); ASSERT_EQ(secret_from_move_secret.size(), string.size()); @@ -305,12 +307,12 @@ TEST(TestSecureString, Assign) { if (old_secret_from_move_secret_area.data() == secret_from_move_secret_view.data()) { // when secure string reuses the buffer, the old value must be cleared - AssertSecurelyCleared(old_secret_from_move_secret_area, - secret_from_move_secret.size()); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_secret_from_move_secret_area, + secret_from_move_secret.size())); } else { // when secure string has a new buffer, the old buffer must be cleared - AssertSecurelyCleared(old_secret_from_move_secret_area, - old_secret_from_move_secret_value); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared( + old_secret_from_move_secret_area, old_secret_from_move_secret_value)); } } } @@ -342,12 +344,12 @@ TEST(TestSecureString, Assign) { if (old_secret_from_copy_secret_area.data() == secret_from_copy_secret.as_view().data()) { // when secure string reuses the buffer, the old value must be cleared - AssertSecurelyCleared(old_secret_from_copy_secret_area, - secret_from_copy_secret.size()); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_secret_from_copy_secret_area, + secret_from_copy_secret.size())); } else { // when secure string has a new buffer, the old buffer must be cleared - AssertSecurelyCleared(old_secret_from_copy_secret_area, - old_secret_from_copy_secret_value); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared( + old_secret_from_copy_secret_area, old_secret_from_copy_secret_value)); } } } @@ -355,7 +357,9 @@ TEST(TestSecureString, Assign) { } TEST(TestSecureString, Deconstruct) { -#if !defined(ARROW_VALGRIND) && !defined(ARROW_USE_ASAN) +#if defined(ARROW_VALGRIND) || defined(ARROW_USE_ASAN) + GTEST_SKIP() << "Test accesses deallocated memory"; +#else // We use a very short and a very long string as memory management of short and long // strings behaves differently. std::vector strings = {"short secret", std::string(1024, 'x')}; @@ -371,9 +375,9 @@ TEST(TestSecureString, Deconstruct) { // deconstruct secret on leaving this context } // assert secret memory is cleared on deconstruction - AssertSecurelyCleared(view, old_string_value); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(view, old_string_value)); // so is the string (tested more thoroughly elsewhere) - AssertSecurelyCleared(string); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(string)); } #endif } From fb01ecba4176ca9ac675392939ede4c41ee68b73 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 4 Jun 2025 10:36:35 +0200 Subject: [PATCH 10/22] Use testing::AssertionResult rather than capturing assertions through TestPartResultReporterInterface --- cpp/src/arrow/util/secure_string_test.cc | 153 +++++++++-------------- 1 file changed, 62 insertions(+), 91 deletions(-) diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index 0a7eef4511d..11e6dd37b66 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -21,33 +21,37 @@ #include "arrow/util/secure_string.h" -#include - namespace arrow::util::test { std::string_view StringArea(const std::string& string) { return {string.data(), string.capacity()}; } -void AssertSecurelyCleared(const std::string_view area) { +// same as GTest ASSERT_PRED_FORMAT2 macro, but without the outer GTEST_ASSERT_ +#define COMPARE(val1, val2) \ + ::testing::internal::EqHelper::Compare(#val1, #val2, val1, val2) + +::testing::AssertionResult AssertSecurelyCleared(const std::string_view area) { // the entire area is filled with zeros std::string zeros(area.size(), '\0'); - ASSERT_EQ(area, std::string_view(zeros)); + return COMPARE(area, std::string_view(zeros)); } -void AssertSecurelyCleared(const std::string& string) { - AssertSecurelyCleared(StringArea(string)); +::testing::AssertionResult AssertSecurelyCleared(const std::string& string) { + return AssertSecurelyCleared(StringArea(string)); } /** * Checks the area has been securely cleared after some position. */ -void AssertSecurelyCleared(const std::string_view area, const size_t pos) { +::testing::AssertionResult AssertSecurelyCleared(const std::string_view area, + const size_t pos) { // the area after pos is filled with zeros if (pos < area.size()) { std::string zeros(area.size() - pos, '\0'); - ASSERT_EQ(area.substr(pos), std::string_view(zeros)); + return COMPARE(area.substr(pos), std::string_view(zeros)); } + return ::testing::AssertionSuccess(); } /** @@ -58,7 +62,8 @@ void AssertSecurelyCleared(const std::string_view area, const size_t pos) { * right position, this will be false negative / flaky. Therefore, we check for three * consecutive secret characters before we fail. */ -void AssertSecurelyCleared(const std::string_view area, const std::string& secret_value) { +::testing::AssertionResult AssertSecurelyCleared(const std::string_view area, + const std::string& secret_value) { auto leaks = 0; for (size_t i = 0; i < secret_value.size(); i++) { if (area[i] == secret_value[i]) { @@ -71,87 +76,53 @@ void AssertSecurelyCleared(const std::string_view area, const std::string& secre } } if (leaks >= 3) { - FAIL() << leaks << " characters of secret leaked into " << area; + return ::testing::AssertionFailure() + << leaks << " characters of secret leaked into " << area; } + return ::testing::AssertionSuccess(); } -// GTest test result reporter that captures the result but does not hand it to the unit -// test instance. This effectively hides the result from the GTest test framework. -class Reporter : public testing::TestPartResultReporterInterface { - public: - explicit Reporter(testing::TestInfo* test_info) - : result_(testing::TestPartResult::kSuccess, test_info->file(), test_info->line(), - "") {} - void ReportTestPartResult(const testing::TestPartResult& result) override { - result_ = result; - } - const testing::TestPartResult& result() const { return result_; } - - private: - testing::TestPartResult result_; -}; - -#define GET_TEST_RESULT_REPORTER() \ - testing::internal::GetUnitTestImpl()->GetTestPartResultReporterForCurrentThread() - -#define SET_TEST_RESULT_REPORTER(reporter) \ - testing::internal::GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread( \ - reporter); - -#define CAPTURE_TEST_RESULT(capture, body) \ - { \ - auto report = GET_TEST_RESULT_REPORTER(); \ - SET_TEST_RESULT_REPORTER(&capture); \ - body; \ - SET_TEST_RESULT_REPORTER(report); \ - } - TEST(TestSecureString, AssertSecurelyCleared) { // This tests AssertSecurelyCleared helper methods is actually able to identify secret - // leakage. It captures test results emitted by ASSERT_EQ and then asserts result type - // and message. - auto capture = Reporter(test_info_); - + // leakage. It retrieves assertion results and asserts result type and message. auto short_zeros = std::string(10, '\0'); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(std::string_view(short_zeros))); + ASSERT_TRUE(AssertSecurelyCleared(std::string_view(short_zeros))); auto large_zeros = std::string(1000, '\0'); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(large_zeros)); + ASSERT_TRUE(AssertSecurelyCleared(large_zeros)); auto no_zeros = std::string("abcdefghijklmnopqrstuvwxyz"); - CAPTURE_TEST_RESULT(capture, AssertSecurelyCleared(no_zeros)); - ASSERT_EQ(capture.result().type(), testing::TestPartResult::kFatalFailure); - ASSERT_EQ(std::string(capture.result().message()), + auto result = AssertSecurelyCleared(no_zeros); + ASSERT_FALSE(result); + ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" " area\n" " Which is: \"abcdefghijklmnopqrstuvwxyz\"\n" " std::string_view(zeros)\n" " Which is: " "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" - "0\\0\"\n"); + "0\\0\""); auto some_zeros = no_zeros; some_zeros = std::string(10, '\0'); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(some_zeros, 10)); - CAPTURE_TEST_RESULT(capture, AssertSecurelyCleared(some_zeros)); - ASSERT_EQ(capture.result().type(), testing::TestPartResult::kFatalFailure); - ASSERT_EQ(std::string(capture.result().message()), + ASSERT_TRUE(AssertSecurelyCleared(some_zeros, 10)); + result = AssertSecurelyCleared(some_zeros); + ASSERT_FALSE(result); + ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" " area\n" " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0lmnopqrstuvwxyz\"\n" " std::string_view(zeros)\n" " Which is: " "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" - "0\\0\"\n"); - - ASSERT_NO_FATAL_FAILURE( - AssertSecurelyCleared(some_zeros, "12345678901234567890123456")); - CAPTURE_TEST_RESULT(capture, AssertSecurelyCleared(StringArea(some_zeros), no_zeros)); - ASSERT_EQ(capture.result().type(), testing::TestPartResult::kFatalFailure); - ASSERT_EQ(std::string(capture.result().message()), - "Failed\n" + "0\\0\""); + + ASSERT_TRUE(AssertSecurelyCleared(some_zeros, "12345678901234567890123456")); + result = AssertSecurelyCleared(StringArea(some_zeros), no_zeros); + ASSERT_FALSE(result); + ASSERT_EQ(std::string(result.message()), "15 characters of secret leaked into " - "\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0lmnopqrstuvwxyz\n"); + "\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0lmnopqrstuvwxyz"); } TEST(TestSecureString, SecureClearString) { @@ -160,8 +131,8 @@ TEST(TestSecureString, SecureClearString) { std::string tiny("abc"); auto old_area = StringArea(tiny); SecureString::SecureClear(&tiny); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(tiny)); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_area)); + ASSERT_TRUE(AssertSecurelyCleared(tiny)); + ASSERT_TRUE(AssertSecurelyCleared(old_area)); } // long string @@ -170,8 +141,8 @@ TEST(TestSecureString, SecureClearString) { large.resize(512, 'y'); auto old_area = StringArea(large); SecureString::SecureClear(&large); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(large)); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_area)); + ASSERT_TRUE(AssertSecurelyCleared(large)); + ASSERT_TRUE(AssertSecurelyCleared(old_area)); } // empty string @@ -182,8 +153,8 @@ TEST(TestSecureString, SecureClearString) { empty.resize(0); auto old_area = StringArea(empty); SecureString::SecureClear(&empty); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(empty)); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_area)); + ASSERT_TRUE(AssertSecurelyCleared(empty)); + ASSERT_TRUE(AssertSecurelyCleared(old_area)); } } @@ -192,8 +163,8 @@ TEST(TestSecureString, Construct) { std::string string("hello world"); auto old_string = StringArea(string); SecureString secret_from_string(std::move(string)); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(string)); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_string)); + ASSERT_TRUE(AssertSecurelyCleared(string)); + ASSERT_TRUE(AssertSecurelyCleared(old_string)); ASSERT_FALSE(secret_from_string.empty()); // move-constructing from a secure string securely clears that secure string @@ -201,7 +172,7 @@ TEST(TestSecureString, Construct) { auto old_secret_from_string_value = std::string(secret_from_string.as_view()); SecureString secret_from_move_secret(std::move(secret_from_string)); ASSERT_TRUE(secret_from_string.empty()); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_secret_from_string_view)); + ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_string_view)); ASSERT_FALSE(secret_from_move_secret.empty()); ASSERT_EQ(secret_from_move_secret.as_view(), std::string_view(old_secret_from_string_value)); @@ -248,12 +219,12 @@ TEST(TestSecureString, Assign) { ASSERT_FALSE(string.empty()); ASSERT_TRUE(string_copy.empty()); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(string_copy)); + ASSERT_TRUE(AssertSecurelyCleared(string_copy)); auto secret_from_string_view = secret_from_string.as_view(); // the secure string can reuse the string_copy's string buffer after assignment // then, string_copy's string buffer is obviously not cleared if (secret_from_string_view.data() != old_string_copy_area.data()) { - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_string_copy_area, string)); + ASSERT_TRUE(AssertSecurelyCleared(old_string_copy_area, string)); } ASSERT_FALSE(secret_from_string.empty()); ASSERT_EQ(secret_from_string.size(), string.size()); @@ -261,12 +232,12 @@ TEST(TestSecureString, Assign) { ASSERT_EQ(secret_from_string_view, std::string_view(string)); if (secret_from_string_view.data() == old_secret_from_string_area.data()) { // when secure string reuses the buffer, the old value must be cleared - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_secret_from_string_area, - secret_from_string.size())); + ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_string_area, + secret_from_string.size())); } else { // when secure string has a new buffer, the old buffer must be cleared - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_secret_from_string_area, - old_secret_from_string_value)); + ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_string_area, + old_secret_from_string_value)); } } } @@ -297,8 +268,8 @@ TEST(TestSecureString, Assign) { // the secure string can reuse the string_copy's string buffer after assignment // then, string_copy's string buffer is obviously not cleared if (old_secret_string_area.data() != secret_from_move_secret_view.data()) { - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared( - old_secret_string_area, old_secret_from_move_secret_value)); + ASSERT_TRUE(AssertSecurelyCleared(old_secret_string_area, + old_secret_from_move_secret_value)); } ASSERT_FALSE(secret_from_move_secret.empty()); ASSERT_EQ(secret_from_move_secret.size(), string.size()); @@ -307,12 +278,12 @@ TEST(TestSecureString, Assign) { if (old_secret_from_move_secret_area.data() == secret_from_move_secret_view.data()) { // when secure string reuses the buffer, the old value must be cleared - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_secret_from_move_secret_area, - secret_from_move_secret.size())); + ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_move_secret_area, + secret_from_move_secret.size())); } else { // when secure string has a new buffer, the old buffer must be cleared - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared( - old_secret_from_move_secret_area, old_secret_from_move_secret_value)); + ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_move_secret_area, + old_secret_from_move_secret_value)); } } } @@ -344,12 +315,12 @@ TEST(TestSecureString, Assign) { if (old_secret_from_copy_secret_area.data() == secret_from_copy_secret.as_view().data()) { // when secure string reuses the buffer, the old value must be cleared - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_secret_from_copy_secret_area, - secret_from_copy_secret.size())); + ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_copy_secret_area, + secret_from_copy_secret.size())); } else { // when secure string has a new buffer, the old buffer must be cleared - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared( - old_secret_from_copy_secret_area, old_secret_from_copy_secret_value)); + ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_copy_secret_area, + old_secret_from_copy_secret_value)); } } } @@ -375,9 +346,9 @@ TEST(TestSecureString, Deconstruct) { // deconstruct secret on leaving this context } // assert secret memory is cleared on deconstruction - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(view, old_string_value)); + ASSERT_TRUE(AssertSecurelyCleared(view, old_string_value)); // so is the string (tested more thoroughly elsewhere) - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(string)); + ASSERT_TRUE(AssertSecurelyCleared(string)); } #endif } From 0918e19bb845826ed33f4d41f7d807e2ca14e73f Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 4 Jun 2025 11:38:12 +0200 Subject: [PATCH 11/22] Expect string buffers larger than requested size --- cpp/src/arrow/util/secure_string_test.cc | 41 +++++++++++++++++++++--- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index 11e6dd37b66..f4e16abc78c 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -85,14 +85,47 @@ ::testing::AssertionResult AssertSecurelyCleared(const std::string_view area, TEST(TestSecureString, AssertSecurelyCleared) { // This tests AssertSecurelyCleared helper methods is actually able to identify secret // leakage. It retrieves assertion results and asserts result type and message. - auto short_zeros = std::string(10, '\0'); + testing::AssertionResult result = testing::AssertionSuccess(); + + // check short string with all zeros + auto short_zeros = std::string(8, '\0'); + short_zeros.resize(short_zeros.capacity(), '\0'); // for string buffers longer than 8 + short_zeros.resize(8); // now the entire string buffer has zeros + // checks the entire string buffer (capacity) + ASSERT_TRUE(AssertSecurelyCleared(short_zeros)); + // checks only 10 bytes (length) ASSERT_TRUE(AssertSecurelyCleared(std::string_view(short_zeros))); - auto large_zeros = std::string(1000, '\0'); - ASSERT_TRUE(AssertSecurelyCleared(large_zeros)); + // check long string with all zeros + auto long_zeros = std::string(1000, '\0'); + long_zeros.resize(long_zeros.capacity(), '\0'); // for longer string buffers + long_zeros.resize(1000); // now the entire string buffer has zeros + // checks the entire string buffer (capacity) + ASSERT_TRUE(AssertSecurelyCleared(long_zeros)); + // checks only 1000 bytes (length) + ASSERT_TRUE(AssertSecurelyCleared(std::string_view(long_zeros))); + + // check short string with zeros and non-zeros after string length + auto short_some_zeros = std::string(short_zeros.length() + 3, '*'); + short_some_zeros = short_zeros; + result = AssertSecurelyCleared(short_some_zeros); + ASSERT_FALSE(result); + ASSERT_EQ(std::string(result.message()), + "Expected equality of these values:\n" + " area\n" + " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0**\\0\\0\\0\\0\"\n" + " std::string_view(zeros)\n" + " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\""); + + // check long string with zeros and non-zeros after string length + auto long_some_zeros = std::string(long_zeros.length() + 10, '*'); + long_some_zeros = long_zeros; + result = AssertSecurelyCleared(long_some_zeros); + ASSERT_FALSE(result); + ASSERT_EQ(std::string(result.message()), ""); auto no_zeros = std::string("abcdefghijklmnopqrstuvwxyz"); - auto result = AssertSecurelyCleared(no_zeros); + result = AssertSecurelyCleared(no_zeros); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" From 489a532491028b344ae85adeff397b2d60cc90d8 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 4 Jun 2025 12:17:38 +0200 Subject: [PATCH 12/22] Handle string buffers larger than init size --- cpp/src/arrow/util/secure_string_test.cc | 74 +++++++++++++++++++----- 1 file changed, 59 insertions(+), 15 deletions(-) diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index f4e16abc78c..c43e5575601 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -65,7 +65,7 @@ ::testing::AssertionResult AssertSecurelyCleared(const std::string_view area, ::testing::AssertionResult AssertSecurelyCleared(const std::string_view area, const std::string& secret_value) { auto leaks = 0; - for (size_t i = 0; i < secret_value.size(); i++) { + for (size_t i = 0; i < std::min(area.length(), secret_value.length()); i++) { if (area[i] == secret_value[i]) { leaks++; } else { @@ -108,24 +108,45 @@ TEST(TestSecureString, AssertSecurelyCleared) { // check short string with zeros and non-zeros after string length auto short_some_zeros = std::string(short_zeros.length() + 3, '*'); short_some_zeros = short_zeros; - result = AssertSecurelyCleared(short_some_zeros); + // string buffer in short_some_zeros can be larger than short_zeros.length() + 3 + // assert only the area that we can control + auto short_some_zeros_view = + std::string_view(short_some_zeros.data(), short_zeros.length() + 3); + result = AssertSecurelyCleared(short_some_zeros_view); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" " area\n" - " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0**\\0\\0\\0\\0\"\n" + " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0**\"\n" " std::string_view(zeros)\n" - " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\""); + " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\""); // check long string with zeros and non-zeros after string length - auto long_some_zeros = std::string(long_zeros.length() + 10, '*'); - long_some_zeros = long_zeros; - result = AssertSecurelyCleared(long_some_zeros); + auto zeros = std::string(32, '\0'); + auto long_some_zeros = std::string(zeros.length() + 10, '*'); + long_some_zeros = zeros; + // string buffer in long_some_zeros can be larger than zeros.length() + 10 + // assert only the area that we can control + auto long_some_zeros_view = + std::string_view(long_some_zeros.data(), zeros.length() + 10); + result = AssertSecurelyCleared(long_some_zeros_view); ASSERT_FALSE(result); - ASSERT_EQ(std::string(result.message()), ""); + ASSERT_EQ(std::string(result.message()), + "Expected equality of these values:\n" + " area\n" + " Which is: " + "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" + "0\\0\\0\\0\\0\\0\\0\\0\\0*********\"\n" + " std::string_view(zeros)\n" + " Which is: " + "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" + "0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\""); auto no_zeros = std::string("abcdefghijklmnopqrstuvwxyz"); - result = AssertSecurelyCleared(no_zeros); + // string buffer in no_zeros can be larger than no_zeros.length() + // assert only the area that we can control + auto no_zeros_view = std::string_view(no_zeros); + result = AssertSecurelyCleared(no_zeros_view); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" @@ -136,10 +157,14 @@ TEST(TestSecureString, AssertSecurelyCleared) { "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" "0\\0\""); - auto some_zeros = no_zeros; - some_zeros = std::string(10, '\0'); - ASSERT_TRUE(AssertSecurelyCleared(some_zeros, 10)); - result = AssertSecurelyCleared(some_zeros); + // check string with zeros and non-zeros after string length + auto some_zeros_front = no_zeros; + some_zeros_front = std::string(10, '\0'); + // string buffer in some_zeros_front can be larger than no_zeros.length() + // assert only the area that we can control + auto some_zeros_fronts_view = + std::string_view(some_zeros_front.data(), no_zeros.length()); + result = AssertSecurelyCleared(some_zeros_fronts_view); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" @@ -150,12 +175,31 @@ TEST(TestSecureString, AssertSecurelyCleared) { "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" "0\\0\""); - ASSERT_TRUE(AssertSecurelyCleared(some_zeros, "12345678901234567890123456")); - result = AssertSecurelyCleared(StringArea(some_zeros), no_zeros); + ASSERT_TRUE(AssertSecurelyCleared(some_zeros_front, no_zeros)); + result = AssertSecurelyCleared(some_zeros_fronts_view, no_zeros); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "15 characters of secret leaked into " "\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0lmnopqrstuvwxyz"); + + // check string with non-zeros and zeros after string length + auto some_zeros_back = std::string(no_zeros.length() + 3, '\0'); + some_zeros_back = no_zeros; + // string buffer in some_zeros_back can be larger than no_zeros.length() + 3 + // assert only the area that we can control + auto some_zeros_back_view = + std::string_view(some_zeros_back.data(), no_zeros.length() + 3); + ASSERT_TRUE(AssertSecurelyCleared(some_zeros_back_view, no_zeros.length())); + result = AssertSecurelyCleared(some_zeros_back_view); + ASSERT_FALSE(result); + ASSERT_EQ(std::string(result.message()), + "Expected equality of these values:\n" + " area\n" + " Which is: \"abcdefghijklmnopqrstuvwxyz\\0\\0\\0\"\n" + " std::string_view(zeros)\n" + " Which is: " + "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" + "0\\0\\0\\0\\0\""); } TEST(TestSecureString, SecureClearString) { From bf460151b59768b85e6e12df98c6d4e3b1a0d57e Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 4 Jun 2025 15:24:47 +0200 Subject: [PATCH 13/22] Don't access deallocated memory in ASAN / Valgrind mode --- cpp/src/arrow/util/secure_string_test.cc | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index c43e5575601..a4e1b0e1886 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -56,14 +56,18 @@ ::testing::AssertionResult AssertSecurelyCleared(const std::string_view area, /** * Checks the area has been securely cleared from the secret value. - * Assumes the area has been released, so it might have been reclaimed and changed after - * cleaning. We cannot check for all-zeros, best we can check here is no secret character - * has leaked. If by any chance the modification produced a former key character at the - * right position, this will be false negative / flaky. Therefore, we check for three - * consecutive secret characters before we fail. + * Assumes the area has been deallocated, so it might have been reclaimed and changed + * after cleaning. We cannot check for all-zeros, best we can check here is no secret + * character has leaked. If by any chance the modification produced a former key character + * at the right position, this will be false negative / flaky. Therefore, we check for + * three consecutive secret characters before we fail. */ ::testing::AssertionResult AssertSecurelyCleared(const std::string_view area, const std::string& secret_value) { +#if defined(ARROW_VALGRIND) || defined(ARROW_USE_ASAN) + return testing::AssertionSuccess() << "Not checking deallocated memory"; +#else + // accessing deallocated memory will fail when running with Address Sanitizer enabled auto leaks = 0; for (size_t i = 0; i < std::min(area.length(), secret_value.length()); i++) { if (area[i] == secret_value[i]) { @@ -80,6 +84,7 @@ ::testing::AssertionResult AssertSecurelyCleared(const std::string_view area, << leaks << " characters of secret leaked into " << area; } return ::testing::AssertionSuccess(); +#endif } TEST(TestSecureString, AssertSecurelyCleared) { @@ -177,10 +182,14 @@ TEST(TestSecureString, AssertSecurelyCleared) { ASSERT_TRUE(AssertSecurelyCleared(some_zeros_front, no_zeros)); result = AssertSecurelyCleared(some_zeros_fronts_view, no_zeros); +#if defined(ARROW_VALGRIND) || defined(ARROW_USE_ASAN) + ASSERT_TRUE(result); +#else ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "15 characters of secret leaked into " "\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0lmnopqrstuvwxyz"); +#endif // check string with non-zeros and zeros after string length auto some_zeros_back = std::string(no_zeros.length() + 3, '\0'); From aeb66377b5a219c8ec797c29f5c5f3f94e16a242 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 4 Jun 2025 15:52:15 +0200 Subject: [PATCH 14/22] Fix SecureClear for non-local strings, stabalize mem assertions --- cpp/src/arrow/util/secure_string.cc | 5 ++++- cpp/src/arrow/util/secure_string_test.cc | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/secure_string.cc b/cpp/src/arrow/util/secure_string.cc index ade822c84f7..a82e6d018c4 100644 --- a/cpp/src/arrow/util/secure_string.cc +++ b/cpp/src/arrow/util/secure_string.cc @@ -139,8 +139,11 @@ std::string_view SecureString::as_view() const { void SecureString::Dispose() { SecureClear(&secret_); } void SecureString::SecureClear(std::string* secret) { - secret->clear(); + // in case of non-local strings (long strings), this order is vital + // first clear the string buffer SecureClear(reinterpret_cast(secret->data()), secret->capacity()); + // then reset the string size (moves from the non-local to the local string buffer) + secret->clear(); } inline void SecureString::SecureClear(uint8_t* data, size_t size) { diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index a4e1b0e1886..8528aa78bd8 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -112,6 +112,8 @@ TEST(TestSecureString, AssertSecurelyCleared) { // check short string with zeros and non-zeros after string length auto short_some_zeros = std::string(short_zeros.length() + 3, '*'); + short_zeros.resize(short_zeros.capacity(), '*'); // for string buffers longer than 8 + short_zeros.resize(8); // now the string buffer is filled with '*' after the string short_some_zeros = short_zeros; // string buffer in short_some_zeros can be larger than short_zeros.length() + 3 // assert only the area that we can control @@ -129,6 +131,8 @@ TEST(TestSecureString, AssertSecurelyCleared) { // check long string with zeros and non-zeros after string length auto zeros = std::string(32, '\0'); auto long_some_zeros = std::string(zeros.length() + 10, '*'); + zeros.resize(zeros.capacity(), '*'); // for string buffers longer than 32 + zeros.resize(32); // now the string buffer is filled with '*' after the string long_some_zeros = zeros; // string buffer in long_some_zeros can be larger than zeros.length() + 10 // assert only the area that we can control From 66e0def14a595ca3a00a5de786de761928363be9 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Thu, 5 Jun 2025 07:09:35 +0200 Subject: [PATCH 15/22] Avoid assigning short string to long string in test --- cpp/src/arrow/util/secure_string_test.cc | 39 ++++++++++++------------ 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index 8528aa78bd8..6e4472031d6 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -151,7 +151,7 @@ TEST(TestSecureString, AssertSecurelyCleared) { "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" "0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\""); - auto no_zeros = std::string("abcdefghijklmnopqrstuvwxyz"); + auto no_zeros = std::string("abcdefghijklmnopqrstuvwxyz123"); // string buffer in no_zeros can be larger than no_zeros.length() // assert only the area that we can control auto no_zeros_view = std::string_view(no_zeros); @@ -160,16 +160,16 @@ TEST(TestSecureString, AssertSecurelyCleared) { ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" " area\n" - " Which is: \"abcdefghijklmnopqrstuvwxyz\"\n" + " Which is: \"abcdefghijklmnopqrstuvwxyz123\"\n" " std::string_view(zeros)\n" " Which is: " "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" - "0\\0\""); + "0\\0\\0\\0\\0\""); // check string with zeros and non-zeros after string length auto some_zeros_front = no_zeros; - some_zeros_front = std::string(10, '\0'); - // string buffer in some_zeros_front can be larger than no_zeros.length() + some_zeros_front = std::string(no_zeros.length() - 3, '\0'); + // string buffer in some_zeros_front can be larger than no_zeros.length() - 3 // assert only the area that we can control auto some_zeros_fronts_view = std::string_view(some_zeros_front.data(), no_zeros.length()); @@ -178,21 +178,21 @@ TEST(TestSecureString, AssertSecurelyCleared) { ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" " area\n" - " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0lmnopqrstuvwxyz\"\n" + " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0" + "\\0\\0\\0\\0\\0\\0123\"\n" " std::string_view(zeros)\n" " Which is: " "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" "0\\0\""); ASSERT_TRUE(AssertSecurelyCleared(some_zeros_front, no_zeros)); +#if !defined(ARROW_VALGRIND) && !defined(ARROW_USE_ASAN) result = AssertSecurelyCleared(some_zeros_fronts_view, no_zeros); -#if defined(ARROW_VALGRIND) || defined(ARROW_USE_ASAN) - ASSERT_TRUE(result); -#else ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), - "15 characters of secret leaked into " - "\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0lmnopqrstuvwxyz"); + "3 characters of secret leaked into " + "\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0" + "\\0123"); #endif // check string with non-zeros and zeros after string length @@ -205,14 +205,15 @@ TEST(TestSecureString, AssertSecurelyCleared) { ASSERT_TRUE(AssertSecurelyCleared(some_zeros_back_view, no_zeros.length())); result = AssertSecurelyCleared(some_zeros_back_view); ASSERT_FALSE(result); - ASSERT_EQ(std::string(result.message()), - "Expected equality of these values:\n" - " area\n" - " Which is: \"abcdefghijklmnopqrstuvwxyz\\0\\0\\0\"\n" - " std::string_view(zeros)\n" - " Which is: " - "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" - "0\\0\\0\\0\\0\""); + ASSERT_EQ( + std::string(result.message()), + "Expected equality of these values:\n" + " area\n" + " Which is: \"abcdefghijklmnopqrstuvwxyz123\\0\\0\\0\"\n" + " std::string_view(zeros)\n" + " Which is: " + "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" + "0\\0\\0\\0\\0\\0\\0\\0\""); } TEST(TestSecureString, SecureClearString) { From 7f529d10427b8c6a1982b64bda812bb6405465e4 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Thu, 5 Jun 2025 07:16:01 +0200 Subject: [PATCH 16/22] Fix memory issues in tests --- cpp/src/arrow/util/secure_string_test.cc | 135 ++++++++++------------- 1 file changed, 60 insertions(+), 75 deletions(-) diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index 6e4472031d6..f56ef97ffd0 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -31,7 +31,7 @@ std::string_view StringArea(const std::string& string) { #define COMPARE(val1, val2) \ ::testing::internal::EqHelper::Compare(#val1, #val2, val1, val2) -::testing::AssertionResult AssertSecurelyCleared(const std::string_view area) { +::testing::AssertionResult AssertSecurelyCleared(const std::string_view& area) { // the entire area is filled with zeros std::string zeros(area.size(), '\0'); return COMPARE(area, std::string_view(zeros)); @@ -44,7 +44,7 @@ ::testing::AssertionResult AssertSecurelyCleared(const std::string& string) { /** * Checks the area has been securely cleared after some position. */ -::testing::AssertionResult AssertSecurelyCleared(const std::string_view area, +::testing::AssertionResult AssertSecurelyCleared(const std::string_view& area, const size_t pos) { // the area after pos is filled with zeros if (pos < area.size()) { @@ -62,9 +62,9 @@ ::testing::AssertionResult AssertSecurelyCleared(const std::string_view area, * at the right position, this will be false negative / flaky. Therefore, we check for * three consecutive secret characters before we fail. */ -::testing::AssertionResult AssertSecurelyCleared(const std::string_view area, +::testing::AssertionResult AssertSecurelyCleared(const std::string_view& area, const std::string& secret_value) { -#if defined(ARROW_VALGRIND) || defined(ARROW_USE_ASAN) +#if defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) return testing::AssertionSuccess() << "Not checking deallocated memory"; #else // accessing deallocated memory will fail when running with Address Sanitizer enabled @@ -110,110 +110,87 @@ TEST(TestSecureString, AssertSecurelyCleared) { // checks only 1000 bytes (length) ASSERT_TRUE(AssertSecurelyCleared(std::string_view(long_zeros))); - // check short string with zeros and non-zeros after string length - auto short_some_zeros = std::string(short_zeros.length() + 3, '*'); - short_zeros.resize(short_zeros.capacity(), '*'); // for string buffers longer than 8 - short_zeros.resize(8); // now the string buffer is filled with '*' after the string - short_some_zeros = short_zeros; - // string buffer in short_some_zeros can be larger than short_zeros.length() + 3 + auto no_zeros = std::string("abcdefghijklmnopqrstuvwxyz"); + // string buffer in no_zeros can be larger than no_zeros.length() // assert only the area that we can control - auto short_some_zeros_view = - std::string_view(short_some_zeros.data(), short_zeros.length() + 3); - result = AssertSecurelyCleared(short_some_zeros_view); + auto no_zeros_view = std::string_view(no_zeros); + result = AssertSecurelyCleared(no_zeros_view); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" " area\n" - " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0**\"\n" + " Which is: \"abcdefghijklmnopqrstuvwxyz\"\n" " std::string_view(zeros)\n" - " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\""); + " Which is: " + "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" + "0\\0\\0\\0\\0\""); - // check long string with zeros and non-zeros after string length - auto zeros = std::string(32, '\0'); - auto long_some_zeros = std::string(zeros.length() + 10, '*'); - zeros.resize(zeros.capacity(), '*'); // for string buffers longer than 32 - zeros.resize(32); // now the string buffer is filled with '*' after the string - long_some_zeros = zeros; - // string buffer in long_some_zeros can be larger than zeros.length() + 10 + // check short string with zeros and non-zeros after string length + auto stars = std::string(12, '*'); + auto short_some_zeros = stars; + memset(short_some_zeros.data(), '\0', 8); + short_some_zeros.resize(8); + // string buffer in short_some_zeros can be larger than 12 // assert only the area that we can control - auto long_some_zeros_view = - std::string_view(long_some_zeros.data(), zeros.length() + 10); - result = AssertSecurelyCleared(long_some_zeros_view); + auto short_some_zeros_view = std::string_view(short_some_zeros.data(), 12); + result = AssertSecurelyCleared(short_some_zeros_view); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" " area\n" - " Which is: " - "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" - "0\\0\\0\\0\\0\\0\\0\\0\\0*********\"\n" + " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0***\"\n" " std::string_view(zeros)\n" - " Which is: " - "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" - "0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\""); + " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\""); - auto no_zeros = std::string("abcdefghijklmnopqrstuvwxyz123"); - // string buffer in no_zeros can be larger than no_zeros.length() - // assert only the area that we can control - auto no_zeros_view = std::string_view(no_zeros); - result = AssertSecurelyCleared(no_zeros_view); + ASSERT_TRUE(AssertSecurelyCleared(short_some_zeros, stars)); +#if !defined(ARROW_VALGRIND) && !defined(ADDRESS_SANITIZER) + result = AssertSecurelyCleared(short_some_zeros_view, stars); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), - "Expected equality of these values:\n" - " area\n" - " Which is: \"abcdefghijklmnopqrstuvwxyz123\"\n" - " std::string_view(zeros)\n" - " Which is: " - "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" - "0\\0\\0\\0\\0\""); + "3 characters of secret leaked into " + "\\0\\0\\0\\0\\0\\0\\0\\0\\0***"); +#endif - // check string with zeros and non-zeros after string length - auto some_zeros_front = no_zeros; - some_zeros_front = std::string(no_zeros.length() - 3, '\0'); - // string buffer in some_zeros_front can be larger than no_zeros.length() - 3 + // check long string with zeros and non-zeros after string length + stars = std::string(42, '*'); + auto long_some_zeros = stars; + memset(long_some_zeros.data(), '\0', 32); + long_some_zeros.resize(32); + // string buffer in long_some_zeros can be larger than 42 // assert only the area that we can control - auto some_zeros_fronts_view = - std::string_view(some_zeros_front.data(), no_zeros.length()); - result = AssertSecurelyCleared(some_zeros_fronts_view); + auto long_some_zeros_view = std::string_view(long_some_zeros.data(), 42); + result = AssertSecurelyCleared(long_some_zeros_view); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" " area\n" - " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0" - "\\0\\0\\0\\0\\0\\0123\"\n" + " Which is: " + "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" + "0\\0\\0\\0\\0\\0\\0\\0\\0*********\"\n" " std::string_view(zeros)\n" " Which is: " "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" - "0\\0\""); + "0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\""); - ASSERT_TRUE(AssertSecurelyCleared(some_zeros_front, no_zeros)); -#if !defined(ARROW_VALGRIND) && !defined(ARROW_USE_ASAN) - result = AssertSecurelyCleared(some_zeros_fronts_view, no_zeros); + ASSERT_TRUE(AssertSecurelyCleared(long_some_zeros, stars)); +#if !defined(ARROW_VALGRIND) && !defined(ADDRESS_SANITIZER) + result = AssertSecurelyCleared(long_some_zeros_view, stars); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), - "3 characters of secret leaked into " - "\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0" - "\\0123"); + "9 characters of secret leaked into " + "\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" + "0\\0\\0\\0\\0\\0\\0\\0\\0*********"); #endif // check string with non-zeros and zeros after string length auto some_zeros_back = std::string(no_zeros.length() + 3, '\0'); some_zeros_back = no_zeros; + memset(some_zeros_back.data() + no_zeros.length() * sizeof(char), '\0', 3 + 1); // string buffer in some_zeros_back can be larger than no_zeros.length() + 3 // assert only the area that we can control auto some_zeros_back_view = std::string_view(some_zeros_back.data(), no_zeros.length() + 3); ASSERT_TRUE(AssertSecurelyCleared(some_zeros_back_view, no_zeros.length())); - result = AssertSecurelyCleared(some_zeros_back_view); - ASSERT_FALSE(result); - ASSERT_EQ( - std::string(result.message()), - "Expected equality of these values:\n" - " area\n" - " Which is: \"abcdefghijklmnopqrstuvwxyz123\\0\\0\\0\"\n" - " std::string_view(zeros)\n" - " Which is: " - "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" - "0\\0\\0\\0\\0\\0\\0\\0\""); } TEST(TestSecureString, SecureClearString) { @@ -281,8 +258,16 @@ TEST(TestSecureString, Assign) { // We initialize with the first string and iteratively assign the subsequent values. // The first two values are local (very short strings), the remainder are non-local // strings. Memory management of short and long strings behaves differently. - std::vector test_strings = { - "secret", "another secret", "a much longer secret", std::string(1024, 'x')}; + std::vector test_strings = {"secret", "another secret", + std::string(128, 'x'), std::string(1024, 'y')}; + for (auto& string : test_strings) { + // string buffer might be longer than string.length with arbitrary bytes + // secure string does not have to protect that garbage bytes + // zeroing here so we get expected results + auto length = string.length(); + string.resize(string.capacity(), '\0'); + string.resize(length); + } std::vector reverse_strings = std::vector(test_strings); std::reverse(reverse_strings.begin(), reverse_strings.end()); @@ -384,9 +369,9 @@ TEST(TestSecureString, Assign) { std::string init_string_copy(init_string); SecureString secret_from_copy_secret(std::move(init_string_copy)); + // copy-assigning from a secure string does not modify that secure string + // the earlier value of the secure string is securely cleared for (const auto& string : strings) { - // copy-assigning from a secure string does not modify that secure string - // the earlier value of the secure string is securely cleared auto string_copy = std::string(string); SecureString secret_string(std::move(string_copy)); ASSERT_FALSE(string.empty()); @@ -419,7 +404,7 @@ TEST(TestSecureString, Assign) { } TEST(TestSecureString, Deconstruct) { -#if defined(ARROW_VALGRIND) || defined(ARROW_USE_ASAN) +#if defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) GTEST_SKIP() << "Test accesses deallocated memory"; #else // We use a very short and a very long string as memory management of short and long From 49ccae1ee7b11f2d1c564ffe15ada26705399e5a Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Thu, 5 Jun 2025 15:26:33 +0200 Subject: [PATCH 17/22] Improve comments --- cpp/src/arrow/util/secure_string.cc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/util/secure_string.cc b/cpp/src/arrow/util/secure_string.cc index a82e6d018c4..3364c096057 100644 --- a/cpp/src/arrow/util/secure_string.cc +++ b/cpp/src/arrow/util/secure_string.cc @@ -38,7 +38,7 @@ namespace arrow::util { /// Note: -/// A string std::string is securely moved into a SecureString in two steps: +/// A std::string is securely moved into a SecureString in two steps: /// 1. the std::string is moved via std::move(string) /// 2. the std::string is securely cleared /// @@ -139,10 +139,8 @@ std::string_view SecureString::as_view() const { void SecureString::Dispose() { SecureClear(&secret_); } void SecureString::SecureClear(std::string* secret) { - // in case of non-local strings (long strings), this order is vital - // first clear the string buffer + // call SecureClear first just in case secret->clear() frees some memory SecureClear(reinterpret_cast(secret->data()), secret->capacity()); - // then reset the string size (moves from the non-local to the local string buffer) secret->clear(); } From 804617cb00daa293dbf2d974e3b0d6b9dbf199ac Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Fri, 6 Jun 2025 08:06:38 +0200 Subject: [PATCH 18/22] Apply code review comments - Rename secure_move to SecureMove - Move SecureMove into anonymous namespace - Move SecureClear up in source file - Rename AssertSecurelyCleared to IsSecurelyCleared - Remove std::string_view(std::string) from tests where not needed --- cpp/src/arrow/util/secure_string.cc | 116 ++++++++++++----------- cpp/src/arrow/util/secure_string_test.cc | 105 ++++++++++---------- 2 files changed, 111 insertions(+), 110 deletions(-) diff --git a/cpp/src/arrow/util/secure_string.cc b/cpp/src/arrow/util/secure_string.cc index 3364c096057..d2aaf6bccaa 100644 --- a/cpp/src/arrow/util/secure_string.cc +++ b/cpp/src/arrow/util/secure_string.cc @@ -57,24 +57,75 @@ namespace arrow::util { /// must move the pointer of long string into SecureString (which later clears the /// string). Otherwise, the content of the string cannot be securely cleared. /// -/// This condition is checked by secure_move. +/// This condition is checked by SecureMove. -void secure_move(std::string& string, std::string& dst) { +namespace { +void SecureMove(std::string& string, std::string& dst) { auto ptr = string.data(); dst = std::move(string); - // We require the buffer address string.data() to remain (not be freed), - // or reused by dst. Otherwise, we cannot securely clear string after this move + // We require the buffer address string.data() to remain (not be freed) as is, + // or to be reused by dst. Otherwise, we cannot securely clear string after std::move ARROW_CHECK(string.data() == ptr || dst.data() == ptr); } +} // namespace + +inline void SecureString::SecureClear(uint8_t* data, size_t size) { + // There is various prior art for this: + // https://www.cryptologie.net/article/419/zeroing-memory-compiler-optimizations-and-memset_s/ + // - libb2's `secure_zero_memory` at + // https://github.com/BLAKE2/libb2/blob/30d45a17c59dc7dbf853da3085b71d466275bd0a/src/blake2-impl.h#L140-L160 + // - libsodium's `sodium_memzero` at + // https://github.com/jedisct1/libsodium/blob/be58b2e6664389d9c7993b55291402934b43b3ca/src/libsodium/sodium/utils.c#L78:L101 + // Note: + // https://www.daemonology.net/blog/2014-09-06-zeroing-buffers-is-insufficient.html +#if defined(_WIN32) + // SecureZeroMemory is meant to not be optimized away + SecureZeroMemory(data, size); +#elif defined(__STDC_LIB_EXT1__) + // memset_s is meant to not be optimized away + memset_s(data, size, 0, size); +#elif defined(OPENSSL_VERSION_NUMBER) && OPENSSL_VERSION_NUMBER >= 0x30000000 + // rely on some implementation in OpenSSL cryptographic library + OPENSSL_cleanse(data, size); +#elif defined(__GLIBC__) && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 25)) + // explicit_bzero is meant to not be optimized away + explicit_bzero(data, size); +#else + // Volatile pointer to memset function is an attempt to avoid + // that the compiler optimizes away the memset function call. + // pretty much what OPENSSL_cleanse above does + // https://github.com/openssl/openssl/blob/3423c30db3aa044f46e1f0270e2ecd899415bf5f/crypto/mem_clr.c#L22 + static const volatile auto memset_v = &memset; + memset_v(data, 0, size); + +# if defined(__GNUC__) || defined(__clang__) + // __asm__ only supported by GCC and Clang + // not supported by MSVC on the ARM and x64 processors + // https://en.cppreference.com/w/c/language/asm.html + // https://en.cppreference.com/w/cpp/language/asm.html + + // Additional attempt on top of volatile memset_v above + // to avoid that the compiler optimizes away the memset function call. + // Assembler code that tells the compiler 'data' has side effects. + // https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html: + // - "volatile": the asm produces side effects + // - "memory": effectively forms a read/write memory barrier for the compiler + __asm__ __volatile__("" /* no actual code */ + : /* no output */ + : "r"(data) /* input */ + : "memory" /* memory side effects beyond input and output */); +# endif +#endif +} SecureString::SecureString(SecureString&& other) noexcept { - secure_move(other.secret_, secret_); + SecureMove(other.secret_, secret_); other.Dispose(); } SecureString::SecureString(std::string&& secret) noexcept { - secure_move(secret, secret_); + SecureMove(secret, secret_); SecureClear(&secret); } @@ -86,7 +137,7 @@ SecureString& SecureString::operator=(SecureString&& other) noexcept { return *this; } Dispose(); - secure_move(other.secret_, secret_); + SecureMove(other.secret_, secret_); other.Dispose(); return *this; } @@ -103,7 +154,7 @@ SecureString& SecureString::operator=(const SecureString& other) { SecureString& SecureString::operator=(std::string&& secret) noexcept { Dispose(); - secure_move(secret, secret_); + SecureMove(secret, secret_); SecureClear(&secret); return *this; } @@ -144,53 +195,4 @@ void SecureString::SecureClear(std::string* secret) { secret->clear(); } -inline void SecureString::SecureClear(uint8_t* data, size_t size) { - // There is various prior art for this: - // https://www.cryptologie.net/article/419/zeroing-memory-compiler-optimizations-and-memset_s/ - // - libb2's `secure_zero_memory` at - // https://github.com/BLAKE2/libb2/blob/30d45a17c59dc7dbf853da3085b71d466275bd0a/src/blake2-impl.h#L140-L160 - // - libsodium's `sodium_memzero` at - // https://github.com/jedisct1/libsodium/blob/be58b2e6664389d9c7993b55291402934b43b3ca/src/libsodium/sodium/utils.c#L78:L101 - // Note: - // https://www.daemonology.net/blog/2014-09-06-zeroing-buffers-is-insufficient.html -#if defined(_WIN32) - // SecureZeroMemory is meant to not be optimized away - SecureZeroMemory(data, size); -#elif defined(__STDC_LIB_EXT1__) - // memset_s is meant to not be optimized away - memset_s(data, size, 0, size); -#elif defined(OPENSSL_VERSION_NUMBER) && OPENSSL_VERSION_NUMBER >= 0x30000000 - // rely on some implementation in OpenSSL cryptographic library - OPENSSL_cleanse(data, size); -#elif defined(__GLIBC__) && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 25)) - // explicit_bzero is meant to not be optimized away - explicit_bzero(data, size); -#else - // Volatile pointer to memset function is an attempt to avoid - // that the compiler optimizes away the memset function call. - // pretty much what OPENSSL_cleanse above does - // https://github.com/openssl/openssl/blob/3423c30db3aa044f46e1f0270e2ecd899415bf5f/crypto/mem_clr.c#L22 - static const volatile auto memset_v = &memset; - memset_v(data, 0, size); - -# if defined(__GNUC__) || defined(__clang__) - // __asm__ only supported by GCC and Clang - // not supported by MSVC on the ARM and x64 processors - // https://en.cppreference.com/w/c/language/asm.html - // https://en.cppreference.com/w/cpp/language/asm.html - - // Additional attempt on top of volatile memset_v above - // to avoid that the compiler optimizes away the memset function call. - // Assembler code that tells the compiler 'data' has side effects. - // https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html: - // - "volatile": the asm produces side effects - // - "memory": effectively forms a read/write memory barrier for the compiler - __asm__ __volatile__("" /* no actual code */ - : /* no output */ - : "r"(data) /* input */ - : "memory" /* memory side effects beyond input and output */); -# endif -#endif -} - } // namespace arrow::util diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index f56ef97ffd0..0863751caa5 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -31,21 +31,21 @@ std::string_view StringArea(const std::string& string) { #define COMPARE(val1, val2) \ ::testing::internal::EqHelper::Compare(#val1, #val2, val1, val2) -::testing::AssertionResult AssertSecurelyCleared(const std::string_view& area) { +::testing::AssertionResult IsSecurelyCleared(const std::string_view& area) { // the entire area is filled with zeros std::string zeros(area.size(), '\0'); return COMPARE(area, std::string_view(zeros)); } -::testing::AssertionResult AssertSecurelyCleared(const std::string& string) { - return AssertSecurelyCleared(StringArea(string)); +::testing::AssertionResult IsSecurelyCleared(const std::string& string) { + return IsSecurelyCleared(StringArea(string)); } /** * Checks the area has been securely cleared after some position. */ -::testing::AssertionResult AssertSecurelyCleared(const std::string_view& area, - const size_t pos) { +::testing::AssertionResult IsSecurelyCleared(const std::string_view& area, + const size_t pos) { // the area after pos is filled with zeros if (pos < area.size()) { std::string zeros(area.size() - pos, '\0'); @@ -62,8 +62,8 @@ ::testing::AssertionResult AssertSecurelyCleared(const std::string_view& area, * at the right position, this will be false negative / flaky. Therefore, we check for * three consecutive secret characters before we fail. */ -::testing::AssertionResult AssertSecurelyCleared(const std::string_view& area, - const std::string& secret_value) { +::testing::AssertionResult IsSecurelyCleared(const std::string_view& area, + const std::string& secret_value) { #if defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) return testing::AssertionSuccess() << "Not checking deallocated memory"; #else @@ -97,24 +97,24 @@ TEST(TestSecureString, AssertSecurelyCleared) { short_zeros.resize(short_zeros.capacity(), '\0'); // for string buffers longer than 8 short_zeros.resize(8); // now the entire string buffer has zeros // checks the entire string buffer (capacity) - ASSERT_TRUE(AssertSecurelyCleared(short_zeros)); + ASSERT_TRUE(IsSecurelyCleared(short_zeros)); // checks only 10 bytes (length) - ASSERT_TRUE(AssertSecurelyCleared(std::string_view(short_zeros))); + ASSERT_TRUE(IsSecurelyCleared(std::string_view(short_zeros))); // check long string with all zeros auto long_zeros = std::string(1000, '\0'); long_zeros.resize(long_zeros.capacity(), '\0'); // for longer string buffers long_zeros.resize(1000); // now the entire string buffer has zeros // checks the entire string buffer (capacity) - ASSERT_TRUE(AssertSecurelyCleared(long_zeros)); + ASSERT_TRUE(IsSecurelyCleared(long_zeros)); // checks only 1000 bytes (length) - ASSERT_TRUE(AssertSecurelyCleared(std::string_view(long_zeros))); + ASSERT_TRUE(IsSecurelyCleared(std::string_view(long_zeros))); auto no_zeros = std::string("abcdefghijklmnopqrstuvwxyz"); // string buffer in no_zeros can be larger than no_zeros.length() // assert only the area that we can control auto no_zeros_view = std::string_view(no_zeros); - result = AssertSecurelyCleared(no_zeros_view); + result = IsSecurelyCleared(no_zeros_view); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" @@ -133,7 +133,7 @@ TEST(TestSecureString, AssertSecurelyCleared) { // string buffer in short_some_zeros can be larger than 12 // assert only the area that we can control auto short_some_zeros_view = std::string_view(short_some_zeros.data(), 12); - result = AssertSecurelyCleared(short_some_zeros_view); + result = IsSecurelyCleared(short_some_zeros_view); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" @@ -142,9 +142,9 @@ TEST(TestSecureString, AssertSecurelyCleared) { " std::string_view(zeros)\n" " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\""); - ASSERT_TRUE(AssertSecurelyCleared(short_some_zeros, stars)); + ASSERT_TRUE(IsSecurelyCleared(short_some_zeros, stars)); #if !defined(ARROW_VALGRIND) && !defined(ADDRESS_SANITIZER) - result = AssertSecurelyCleared(short_some_zeros_view, stars); + result = IsSecurelyCleared(short_some_zeros_view, stars); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "3 characters of secret leaked into " @@ -159,7 +159,7 @@ TEST(TestSecureString, AssertSecurelyCleared) { // string buffer in long_some_zeros can be larger than 42 // assert only the area that we can control auto long_some_zeros_view = std::string_view(long_some_zeros.data(), 42); - result = AssertSecurelyCleared(long_some_zeros_view); + result = IsSecurelyCleared(long_some_zeros_view); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" @@ -172,9 +172,9 @@ TEST(TestSecureString, AssertSecurelyCleared) { "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" "0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\""); - ASSERT_TRUE(AssertSecurelyCleared(long_some_zeros, stars)); + ASSERT_TRUE(IsSecurelyCleared(long_some_zeros, stars)); #if !defined(ARROW_VALGRIND) && !defined(ADDRESS_SANITIZER) - result = AssertSecurelyCleared(long_some_zeros_view, stars); + result = IsSecurelyCleared(long_some_zeros_view, stars); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "9 characters of secret leaked into " @@ -190,7 +190,7 @@ TEST(TestSecureString, AssertSecurelyCleared) { // assert only the area that we can control auto some_zeros_back_view = std::string_view(some_zeros_back.data(), no_zeros.length() + 3); - ASSERT_TRUE(AssertSecurelyCleared(some_zeros_back_view, no_zeros.length())); + ASSERT_TRUE(IsSecurelyCleared(some_zeros_back_view, no_zeros.length())); } TEST(TestSecureString, SecureClearString) { @@ -199,8 +199,8 @@ TEST(TestSecureString, SecureClearString) { std::string tiny("abc"); auto old_area = StringArea(tiny); SecureString::SecureClear(&tiny); - ASSERT_TRUE(AssertSecurelyCleared(tiny)); - ASSERT_TRUE(AssertSecurelyCleared(old_area)); + ASSERT_TRUE(IsSecurelyCleared(tiny)); + ASSERT_TRUE(IsSecurelyCleared(old_area)); } // long string @@ -209,8 +209,8 @@ TEST(TestSecureString, SecureClearString) { large.resize(512, 'y'); auto old_area = StringArea(large); SecureString::SecureClear(&large); - ASSERT_TRUE(AssertSecurelyCleared(large)); - ASSERT_TRUE(AssertSecurelyCleared(old_area)); + ASSERT_TRUE(IsSecurelyCleared(large)); + ASSERT_TRUE(IsSecurelyCleared(old_area)); } // empty string @@ -221,8 +221,8 @@ TEST(TestSecureString, SecureClearString) { empty.resize(0); auto old_area = StringArea(empty); SecureString::SecureClear(&empty); - ASSERT_TRUE(AssertSecurelyCleared(empty)); - ASSERT_TRUE(AssertSecurelyCleared(old_area)); + ASSERT_TRUE(IsSecurelyCleared(empty)); + ASSERT_TRUE(IsSecurelyCleared(old_area)); } } @@ -231,25 +231,24 @@ TEST(TestSecureString, Construct) { std::string string("hello world"); auto old_string = StringArea(string); SecureString secret_from_string(std::move(string)); - ASSERT_TRUE(AssertSecurelyCleared(string)); - ASSERT_TRUE(AssertSecurelyCleared(old_string)); + ASSERT_TRUE(IsSecurelyCleared(string)); + ASSERT_TRUE(IsSecurelyCleared(old_string)); ASSERT_FALSE(secret_from_string.empty()); + ASSERT_EQ(secret_from_string.as_view(), "hello world"); // move-constructing from a secure string securely clears that secure string auto old_secret_from_string_view = secret_from_string.as_view(); auto old_secret_from_string_value = std::string(secret_from_string.as_view()); SecureString secret_from_move_secret(std::move(secret_from_string)); ASSERT_TRUE(secret_from_string.empty()); - ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_string_view)); + ASSERT_TRUE(IsSecurelyCleared(old_secret_from_string_view)); ASSERT_FALSE(secret_from_move_secret.empty()); - ASSERT_EQ(secret_from_move_secret.as_view(), - std::string_view(old_secret_from_string_value)); + ASSERT_EQ(secret_from_move_secret.as_view(), old_secret_from_string_value); // copy-constructing from a secure string does not modify that secure string SecureString secret_from_secret(secret_from_move_secret); ASSERT_FALSE(secret_from_move_secret.empty()); - ASSERT_EQ(secret_from_move_secret.as_view(), - std::string_view(old_secret_from_string_value)); + ASSERT_EQ(secret_from_move_secret.as_view(), old_secret_from_string_value); ASSERT_FALSE(secret_from_secret.empty()); ASSERT_EQ(secret_from_secret, secret_from_move_secret); } @@ -295,25 +294,25 @@ TEST(TestSecureString, Assign) { ASSERT_FALSE(string.empty()); ASSERT_TRUE(string_copy.empty()); - ASSERT_TRUE(AssertSecurelyCleared(string_copy)); + ASSERT_TRUE(IsSecurelyCleared(string_copy)); auto secret_from_string_view = secret_from_string.as_view(); // the secure string can reuse the string_copy's string buffer after assignment // then, string_copy's string buffer is obviously not cleared if (secret_from_string_view.data() != old_string_copy_area.data()) { - ASSERT_TRUE(AssertSecurelyCleared(old_string_copy_area, string)); + ASSERT_TRUE(IsSecurelyCleared(old_string_copy_area, string)); } ASSERT_FALSE(secret_from_string.empty()); ASSERT_EQ(secret_from_string.size(), string.size()); ASSERT_EQ(secret_from_string.length(), string.length()); - ASSERT_EQ(secret_from_string_view, std::string_view(string)); + ASSERT_EQ(secret_from_string_view, string); if (secret_from_string_view.data() == old_secret_from_string_area.data()) { // when secure string reuses the buffer, the old value must be cleared - ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_string_area, - secret_from_string.size())); + ASSERT_TRUE( + IsSecurelyCleared(old_secret_from_string_area, secret_from_string.size())); } else { // when secure string has a new buffer, the old buffer must be cleared - ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_string_area, - old_secret_from_string_value)); + ASSERT_TRUE(IsSecurelyCleared(old_secret_from_string_area, + old_secret_from_string_value)); } } } @@ -344,22 +343,22 @@ TEST(TestSecureString, Assign) { // the secure string can reuse the string_copy's string buffer after assignment // then, string_copy's string buffer is obviously not cleared if (old_secret_string_area.data() != secret_from_move_secret_view.data()) { - ASSERT_TRUE(AssertSecurelyCleared(old_secret_string_area, - old_secret_from_move_secret_value)); + ASSERT_TRUE(IsSecurelyCleared(old_secret_string_area, + old_secret_from_move_secret_value)); } ASSERT_FALSE(secret_from_move_secret.empty()); ASSERT_EQ(secret_from_move_secret.size(), string.size()); ASSERT_EQ(secret_from_move_secret.length(), string.length()); - ASSERT_EQ(secret_from_move_secret_view, std::string_view(string)); + ASSERT_EQ(secret_from_move_secret_view, string); if (old_secret_from_move_secret_area.data() == secret_from_move_secret_view.data()) { // when secure string reuses the buffer, the old value must be cleared - ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_move_secret_area, - secret_from_move_secret.size())); + ASSERT_TRUE(IsSecurelyCleared(old_secret_from_move_secret_area, + secret_from_move_secret.size())); } else { // when secure string has a new buffer, the old buffer must be cleared - ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_move_secret_area, - old_secret_from_move_secret_value)); + ASSERT_TRUE(IsSecurelyCleared(old_secret_from_move_secret_area, + old_secret_from_move_secret_value)); } } } @@ -387,16 +386,16 @@ TEST(TestSecureString, Assign) { ASSERT_FALSE(secret_from_copy_secret.empty()); ASSERT_EQ(secret_from_copy_secret.size(), string.size()); ASSERT_EQ(secret_from_copy_secret.length(), string.length()); - ASSERT_EQ(secret_from_copy_secret.as_view(), std::string_view(string)); + ASSERT_EQ(secret_from_copy_secret.as_view(), string); if (old_secret_from_copy_secret_area.data() == secret_from_copy_secret.as_view().data()) { // when secure string reuses the buffer, the old value must be cleared - ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_copy_secret_area, - secret_from_copy_secret.size())); + ASSERT_TRUE(IsSecurelyCleared(old_secret_from_copy_secret_area, + secret_from_copy_secret.size())); } else { // when secure string has a new buffer, the old buffer must be cleared - ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_copy_secret_area, - old_secret_from_copy_secret_value)); + ASSERT_TRUE(IsSecurelyCleared(old_secret_from_copy_secret_area, + old_secret_from_copy_secret_value)); } } } @@ -422,9 +421,9 @@ TEST(TestSecureString, Deconstruct) { // deconstruct secret on leaving this context } // assert secret memory is cleared on deconstruction - ASSERT_TRUE(AssertSecurelyCleared(view, old_string_value)); + ASSERT_TRUE(IsSecurelyCleared(view, old_string_value)); // so is the string (tested more thoroughly elsewhere) - ASSERT_TRUE(AssertSecurelyCleared(string)); + ASSERT_TRUE(IsSecurelyCleared(string)); } #endif } From 210a592e80931f309227a3f72b86069e0cf28329 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Fri, 6 Jun 2025 08:16:17 +0200 Subject: [PATCH 19/22] Move SecureClear(std::string*) up in source file as well --- cpp/src/arrow/util/secure_string.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/util/secure_string.cc b/cpp/src/arrow/util/secure_string.cc index d2aaf6bccaa..bd52c55f312 100644 --- a/cpp/src/arrow/util/secure_string.cc +++ b/cpp/src/arrow/util/secure_string.cc @@ -70,6 +70,12 @@ void SecureMove(std::string& string, std::string& dst) { } } // namespace +void SecureString::SecureClear(std::string* secret) { + // call SecureClear first just in case secret->clear() frees some memory + SecureClear(reinterpret_cast(secret->data()), secret->capacity()); + secret->clear(); +} + inline void SecureString::SecureClear(uint8_t* data, size_t size) { // There is various prior art for this: // https://www.cryptologie.net/article/419/zeroing-memory-compiler-optimizations-and-memset_s/ @@ -189,10 +195,4 @@ std::string_view SecureString::as_view() const { void SecureString::Dispose() { SecureClear(&secret_); } -void SecureString::SecureClear(std::string* secret) { - // call SecureClear first just in case secret->clear() frees some memory - SecureClear(reinterpret_cast(secret->data()), secret->capacity()); - secret->clear(); -} - } // namespace arrow::util From 99cd8c3de38d59a62ead82f25e584af302f3276c Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 9 Jun 2025 10:17:26 +0200 Subject: [PATCH 20/22] Undefine macro after use --- cpp/src/arrow/util/secure_string_test.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index 0863751caa5..5842e07a9de 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -87,6 +87,8 @@ ::testing::AssertionResult IsSecurelyCleared(const std::string_view& area, #endif } +#undef COMPARE + TEST(TestSecureString, AssertSecurelyCleared) { // This tests AssertSecurelyCleared helper methods is actually able to identify secret // leakage. It retrieves assertion results and asserts result type and message. From e32b68d729c41bc40fb444e3bcfee951e4f8489e Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 9 Jun 2025 10:44:49 +0200 Subject: [PATCH 21/22] Fix and enhance Construct test --- cpp/src/arrow/util/secure_string_test.cc | 59 ++++++++++++++---------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index 5842e07a9de..48c86b98129 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -229,30 +229,41 @@ TEST(TestSecureString, SecureClearString) { } TEST(TestSecureString, Construct) { - // move-constructing from a string securely clears that string - std::string string("hello world"); - auto old_string = StringArea(string); - SecureString secret_from_string(std::move(string)); - ASSERT_TRUE(IsSecurelyCleared(string)); - ASSERT_TRUE(IsSecurelyCleared(old_string)); - ASSERT_FALSE(secret_from_string.empty()); - ASSERT_EQ(secret_from_string.as_view(), "hello world"); - - // move-constructing from a secure string securely clears that secure string - auto old_secret_from_string_view = secret_from_string.as_view(); - auto old_secret_from_string_value = std::string(secret_from_string.as_view()); - SecureString secret_from_move_secret(std::move(secret_from_string)); - ASSERT_TRUE(secret_from_string.empty()); - ASSERT_TRUE(IsSecurelyCleared(old_secret_from_string_view)); - ASSERT_FALSE(secret_from_move_secret.empty()); - ASSERT_EQ(secret_from_move_secret.as_view(), old_secret_from_string_value); - - // copy-constructing from a secure string does not modify that secure string - SecureString secret_from_secret(secret_from_move_secret); - ASSERT_FALSE(secret_from_move_secret.empty()); - ASSERT_EQ(secret_from_move_secret.as_view(), old_secret_from_string_value); - ASSERT_FALSE(secret_from_secret.empty()); - ASSERT_EQ(secret_from_secret, secret_from_move_secret); + // We use a very short and a very long string as memory management of short and long + // strings behaves differently. + std::vector strings = {"short secret", std::string(1024, 'x')}; + + for (const auto& original_string : strings) { + // move-constructing from a string either reuses its buffer or securely clears + // that string + std::string string = original_string; + auto old_string = StringArea(string); + SecureString secret_from_string(std::move(string)); + ASSERT_TRUE(IsSecurelyCleared(string)); + if (secret_from_string.as_view().data() != old_string.data()) { + ASSERT_TRUE(IsSecurelyCleared(old_string)); + } + ASSERT_FALSE(secret_from_string.empty()); + ASSERT_EQ(secret_from_string.as_view(), original_string); + + // move-constructing from a secure string securely clears that secure string + auto old_secret_from_string_view = secret_from_string.as_view(); + auto old_secret_from_string_value = std::string(secret_from_string.as_view()); + SecureString secret_from_move_secret(std::move(secret_from_string)); + ASSERT_TRUE(secret_from_string.empty()); + if (secret_from_move_secret.as_view().data() != old_secret_from_string_view.data()) { + ASSERT_TRUE(IsSecurelyCleared(old_secret_from_string_view)); + } + ASSERT_FALSE(secret_from_move_secret.empty()); + ASSERT_EQ(secret_from_move_secret.as_view(), old_secret_from_string_value); + + // copy-constructing from a secure string does not modify that secure string + SecureString secret_from_secret(secret_from_move_secret); + ASSERT_FALSE(secret_from_move_secret.empty()); + ASSERT_EQ(secret_from_move_secret.as_view(), old_secret_from_string_value); + ASSERT_FALSE(secret_from_secret.empty()); + ASSERT_EQ(secret_from_secret, secret_from_move_secret); + } } TEST(TestSecureString, Assign) { From 408572cdfb2aca96cb43efcee21781674e46be84 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 9 Jun 2025 10:48:50 +0200 Subject: [PATCH 22/22] Also skip deallocated area tests on Thread Sanitizer --- cpp/src/arrow/util/secure_string_test.cc | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index 48c86b98129..213a4b11f20 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -23,6 +23,12 @@ namespace arrow::util::test { +#if defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) +# define CAN_TEST_DEALLOCATED_AREAS 0 +#else +# define CAN_TEST_DEALLOCATED_AREAS 1 +#endif + std::string_view StringArea(const std::string& string) { return {string.data(), string.capacity()}; } @@ -64,7 +70,7 @@ ::testing::AssertionResult IsSecurelyCleared(const std::string_view& area, */ ::testing::AssertionResult IsSecurelyCleared(const std::string_view& area, const std::string& secret_value) { -#if defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) +#if !CAN_TEST_DEALLOCATED_AREAS return testing::AssertionSuccess() << "Not checking deallocated memory"; #else // accessing deallocated memory will fail when running with Address Sanitizer enabled @@ -145,7 +151,7 @@ TEST(TestSecureString, AssertSecurelyCleared) { " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\""); ASSERT_TRUE(IsSecurelyCleared(short_some_zeros, stars)); -#if !defined(ARROW_VALGRIND) && !defined(ADDRESS_SANITIZER) +#if CAN_TEST_DEALLOCATED_AREAS result = IsSecurelyCleared(short_some_zeros_view, stars); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), @@ -175,7 +181,7 @@ TEST(TestSecureString, AssertSecurelyCleared) { "0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\""); ASSERT_TRUE(IsSecurelyCleared(long_some_zeros, stars)); -#if !defined(ARROW_VALGRIND) && !defined(ADDRESS_SANITIZER) +#if CAN_TEST_DEALLOCATED_AREAS result = IsSecurelyCleared(long_some_zeros_view, stars); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), @@ -416,7 +422,7 @@ TEST(TestSecureString, Assign) { } TEST(TestSecureString, Deconstruct) { -#if defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) +#if !CAN_TEST_DEALLOCATED_AREAS GTEST_SKIP() << "Test accesses deallocated memory"; #else // We use a very short and a very long string as memory management of short and long @@ -487,4 +493,6 @@ TEST(TestSecureString, AsView) { ASSERT_EQ(view, "hello world"); } +#undef CAN_TEST_DEALLOCATED_AREAS + } // namespace arrow::util::test