Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
fa3c780
Add SecureString implementation to arrow/util/
EnricoMi May 28, 2025
96fe8e3
Fix import for memset_s, improve for loops in tests
EnricoMi Jun 3, 2025
fd5d43d
Address code review comments
EnricoMi Jun 3, 2025
7ece2b0
Test secure SecureString deconstruction
EnricoMi Jun 3, 2025
15aa6de
Test correctness of AssertSecurelyCleared
EnricoMi Jun 3, 2025
89e3f00
Rename SecureString argument to other
EnricoMi Jun 4, 2025
cb5c9de
Move std::move into secure_move, assert string ptr
EnricoMi Jun 4, 2025
3722395
Add comments, fix linting
EnricoMi Jun 4, 2025
753bfe7
Improve assertions
EnricoMi Jun 4, 2025
fb01ecb
Use testing::AssertionResult rather than capturing assertions through…
EnricoMi Jun 4, 2025
0918e19
Expect string buffers larger than requested size
EnricoMi Jun 4, 2025
489a532
Handle string buffers larger than init size
EnricoMi Jun 4, 2025
bf46015
Don't access deallocated memory in ASAN / Valgrind mode
EnricoMi Jun 4, 2025
aeb6637
Fix SecureClear for non-local strings, stabalize mem assertions
EnricoMi Jun 4, 2025
66e0def
Avoid assigning short string to long string in test
EnricoMi Jun 5, 2025
7f529d1
Fix memory issues in tests
EnricoMi Jun 5, 2025
49ccae1
Improve comments
EnricoMi Jun 5, 2025
804617c
Apply code review comments
EnricoMi Jun 6, 2025
210a592
Move SecureClear(std::string*) up in source file as well
EnricoMi Jun 6, 2025
99cd8c3
Undefine macro after use
pitrou Jun 9, 2025
e32b68d
Fix and enhance Construct test
pitrou Jun 9, 2025
408572c
Also skip deallocated area tests on Thread Sanitizer
pitrou Jun 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,7 @@ set(ARROW_UTIL_SRCS
util/memory.cc
util/mutex.cc
util/ree_util.cc
util/secure_string.cc
util/string.cc
util/string_builder.cc
util/task_group.cc
Expand Down Expand Up @@ -574,6 +575,11 @@ if(ARROW_USE_GLOG)
target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE glog::glog)
endforeach()
endif()
if(ARROW_USE_OPENSSL)
foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_OPENSSL_LIBS})
endforeach()
endif()
if(ARROW_USE_XSIMD)
foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_XSIMD})
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ add_arrow_test(utility-test
ree_util_test.cc
reflection_test.cc
rows_to_batches_test.cc
secure_string_test.cc
small_vector_test.cc
span_test.cc
stl_util_test.cc
Expand Down
198 changes: 198 additions & 0 deletions cpp/src/arrow/util/secure_string.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

// __STDC_WANT_LIB_EXT1__ and string.h are required by memset_s:
// https://en.cppreference.com/w/c/string/byte/memset
#define __STDC_WANT_LIB_EXT1__ 1
#include <string.h>
#include <utility>

#if defined(ARROW_USE_OPENSSL)
# include <openssl/crypto.h>
# include <openssl/opensslv.h>
#endif

#include "arrow/util/windows_compatibility.h"
#if defined(_WIN32)
# include <windows.h>
#endif

#include "arrow/util/logging.h"
#include "arrow/util/secure_string.h"
#include "arrow/util/span.h"

namespace arrow::util {

/// Note:
/// A std::string is securely moved into a SecureString in two steps:
/// 1. the std::string is moved via std::move(string)
/// 2. the std::string is securely cleared
///
/// The std::move has two different effects, depending on the size of the string.
/// A very short string (called local string) stores the string in a local buffer,
/// a long string stores a pointer to allocated memory that stores the string.
///
/// If the string is a small string, std::move copies the local buffer.
/// If the string is a long string, std::move moves the pointer and then resets the
/// string size to 0 (which turns the string into a local string).
///
/// In both cases, after a std::move(string), the string uses the local buffer.
///
/// Thus, after a std::move(string), calling SecureClear(std::string*) only
/// securely clears the **local buffer** of the string. Therefore, std::move(string)
/// must move the pointer of long string into SecureString (which later clears the
/// string). Otherwise, the content of the string cannot be securely cleared.
///
/// This condition is checked by SecureMove.

namespace {
void SecureMove(std::string& string, std::string& dst) {
auto ptr = string.data();
dst = std::move(string);

// We require the buffer address string.data() to remain (not be freed) as is,
// or to be reused by dst. Otherwise, we cannot securely clear string after std::move
ARROW_CHECK(string.data() == ptr || dst.data() == ptr);
}
} // namespace

void SecureString::SecureClear(std::string* secret) {
// call SecureClear first just in case secret->clear() frees some memory
SecureClear(reinterpret_cast<uint8_t*>(secret->data()), secret->capacity());
secret->clear();
}

inline void SecureString::SecureClear(uint8_t* data, size_t size) {
// There is various prior art for this:
// https://www.cryptologie.net/article/419/zeroing-memory-compiler-optimizations-and-memset_s/
// - libb2's `secure_zero_memory` at
// https://github.com/BLAKE2/libb2/blob/30d45a17c59dc7dbf853da3085b71d466275bd0a/src/blake2-impl.h#L140-L160
// - libsodium's `sodium_memzero` at
// https://github.com/jedisct1/libsodium/blob/be58b2e6664389d9c7993b55291402934b43b3ca/src/libsodium/sodium/utils.c#L78:L101
// Note:
// https://www.daemonology.net/blog/2014-09-06-zeroing-buffers-is-insufficient.html
#if defined(_WIN32)
// SecureZeroMemory is meant to not be optimized away
SecureZeroMemory(data, size);
#elif defined(__STDC_LIB_EXT1__)
// memset_s is meant to not be optimized away
memset_s(data, size, 0, size);
#elif defined(OPENSSL_VERSION_NUMBER) && OPENSSL_VERSION_NUMBER >= 0x30000000
// rely on some implementation in OpenSSL cryptographic library
OPENSSL_cleanse(data, size);
#elif defined(__GLIBC__) && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 25))
// explicit_bzero is meant to not be optimized away
explicit_bzero(data, size);
#else
// Volatile pointer to memset function is an attempt to avoid
// that the compiler optimizes away the memset function call.
// pretty much what OPENSSL_cleanse above does
// https://github.com/openssl/openssl/blob/3423c30db3aa044f46e1f0270e2ecd899415bf5f/crypto/mem_clr.c#L22
static const volatile auto memset_v = &memset;
memset_v(data, 0, size);

# if defined(__GNUC__) || defined(__clang__)
// __asm__ only supported by GCC and Clang
// not supported by MSVC on the ARM and x64 processors
// https://en.cppreference.com/w/c/language/asm.html
// https://en.cppreference.com/w/cpp/language/asm.html

// Additional attempt on top of volatile memset_v above
// to avoid that the compiler optimizes away the memset function call.
// Assembler code that tells the compiler 'data' has side effects.
// https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html:
// - "volatile": the asm produces side effects
// - "memory": effectively forms a read/write memory barrier for the compiler
__asm__ __volatile__("" /* no actual code */
: /* no output */
: "r"(data) /* input */
: "memory" /* memory side effects beyond input and output */);
# endif
#endif
}

SecureString::SecureString(SecureString&& other) noexcept {
SecureMove(other.secret_, secret_);
other.Dispose();
}

SecureString::SecureString(std::string&& secret) noexcept {
SecureMove(secret, secret_);
SecureClear(&secret);
}

SecureString::SecureString(size_t n, char c) noexcept : secret_(n, c) {}

SecureString& SecureString::operator=(SecureString&& other) noexcept {
if (this == &other) {
// self-assignment
return *this;
}
Dispose();
SecureMove(other.secret_, secret_);
other.Dispose();
return *this;
}

SecureString& SecureString::operator=(const SecureString& other) {
if (this == &other) {
// self-assignment
return *this;
}
Dispose();
secret_ = other.secret_;
return *this;
}

SecureString& SecureString::operator=(std::string&& secret) noexcept {
Dispose();
SecureMove(secret, secret_);
SecureClear(&secret);
return *this;
}

bool SecureString::operator==(const SecureString& other) const {
return secret_ == other.secret_;
}

bool SecureString::operator!=(const SecureString& other) const {
return secret_ != other.secret_;
}

bool SecureString::empty() const { return secret_.empty(); }

std::size_t SecureString::size() const { return secret_.size(); }

std::size_t SecureString::length() const { return secret_.length(); }

std::size_t SecureString::capacity() const { return secret_.capacity(); }

span<uint8_t> SecureString::as_span() {
return {reinterpret_cast<uint8_t*>(secret_.data()), secret_.size()};
}

span<const uint8_t> SecureString::as_span() const {
return {reinterpret_cast<const uint8_t*>(secret_.data()), secret_.size()};
}

std::string_view SecureString::as_view() const {
return {secret_.data(), secret_.size()};
}

void SecureString::Dispose() { SecureClear(&secret_); }

} // namespace arrow::util
72 changes: 72 additions & 0 deletions cpp/src/arrow/util/secure_string.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <cstdint>
#include <string>

#include "arrow/util/span.h"
#include "arrow/util/visibility.h"

namespace arrow::util {
/**
* A secure string that ensures the wrapped string is cleared from memory on
* deconstruction. This class can only be created from std::string that are securely
* erased after creation.
*
* Note: This class does not provide a constructor / assignment operator that copies a
* std::string because that would allow code to create a SecureString while accidentally
* not noticing the need to securely erasing the argument after invoking the constructor /
* calling the assignment operator.
*/
class ARROW_EXPORT SecureString {
public:
SecureString() noexcept = default;
SecureString(SecureString&&) noexcept;
SecureString(const SecureString&) = default;
explicit SecureString(std::string&&) noexcept;
explicit SecureString(size_t, char) noexcept;

SecureString& operator=(SecureString&&) noexcept;
SecureString& operator=(const SecureString&);
SecureString& operator=(std::string&&) noexcept;

bool operator==(const SecureString&) const;
bool operator!=(const SecureString&) const;

~SecureString() { Dispose(); }

[[nodiscard]] bool empty() const;
[[nodiscard]] std::size_t size() const;
[[nodiscard]] std::size_t length() const;
[[nodiscard]] std::size_t capacity() const;

[[nodiscard]] span<uint8_t> as_span();
[[nodiscard]] span<const uint8_t> as_span() const;
[[nodiscard]] std::string_view as_view() const;

void Dispose();

static void SecureClear(std::string*);
static void SecureClear(uint8_t* data, size_t size);

private:
std::string secret_;
};

} // namespace arrow::util
Loading
Loading