Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
5f73a1f
Wrap encryption keys in SecureString
EnricoMi Mar 31, 2025
0456be4
Rename non-trivial SecureString methods
EnricoMi Apr 4, 2025
689bc8d
Add mutable as_span method, add as_view
EnricoMi Apr 4, 2025
42d7319
More tests
EnricoMi Apr 4, 2025
01ab190
Change SecureClear(std::string&) to SecureClear(std::string*)
EnricoMi Apr 4, 2025
f3f4087
Decrypt key directly intoSecureString
EnricoMi Apr 7, 2025
f90f196
Remove const from KeyWithMasterId members
EnricoMi Apr 7, 2025
0e74be5
Replace call-by-ref with call-by-value when copied
EnricoMi Apr 10, 2025
29bc5e9
Use const for constants and construct SecureString from consts in tests
EnricoMi Apr 10, 2025
75c1f0a
Inline IntegerKeyIdRetriever::GetKey(std::string) implementation
EnricoMi Apr 11, 2025
d3670d4
Add comment to `no_key`
EnricoMi Apr 11, 2025
adbbfc8
Revert `InternalFileDecryptor::RetrieveColumnKeyIfEmpty`
EnricoMi Apr 11, 2025
fe79da0
Remove `noexcept`
EnricoMi Apr 11, 2025
6f5ef6c
Merge remote-tracking branch 'origin/main' into secure-string
EnricoMi May 26, 2025
db70c19
More secure cleared assertions on construction and assignment
EnricoMi May 26, 2025
d6f9ea9
Improve SecureString assignment tests
EnricoMi May 27, 2025
510349c
More context on SecureClear code
EnricoMi May 28, 2025
92a7980
Add SecureString implementation to arrow/util/
EnricoMi May 28, 2025
ec3c7c6
Merge branch 'main' into secure-string
EnricoMi May 28, 2025
d69f354
Merge branch 'secure-string-util' into secure-string
EnricoMi May 28, 2025
219d207
Move to arrow::util::SecureString
EnricoMi May 28, 2025
20d67b3
Fix import for memset_s, improve for loops in tests
EnricoMi Jun 3, 2025
15f94c6
Address code review comments
EnricoMi Jun 3, 2025
2a2ae81
Merge branch 'secure-string-util' into secure-string
EnricoMi Jun 3, 2025
973b233
Test secure SecureString deconstruction
EnricoMi Jun 3, 2025
9c88744
Test correctness of AssertSecurelyCleared
EnricoMi Jun 3, 2025
f3562f8
Rename SecureString argument to other
EnricoMi Jun 4, 2025
9ee3e2c
Move std::move into secure_move, assert string ptr
EnricoMi Jun 4, 2025
77e4e20
Add comments, fix linting
EnricoMi Jun 4, 2025
1f42383
Improve assertions
EnricoMi Jun 4, 2025
8d9c4f9
Use testing::AssertionResult rather than capturing assertions through…
EnricoMi Jun 4, 2025
4297f0d
Expect string buffers larger than requested size
EnricoMi Jun 4, 2025
064dfe7
Handle string buffers larger than init size
EnricoMi Jun 4, 2025
d4faa4f
Don't access deallocated memory in ASAN / Valgrind mode
EnricoMi Jun 4, 2025
267626c
Fix SecureClear for non-local strings, stabalize mem assertions
EnricoMi Jun 4, 2025
6995d36
Avoid assigning short string to long string in test
EnricoMi Jun 5, 2025
03b1fef
Fix memory issues in tests
EnricoMi Jun 5, 2025
e7470cd
Improve comments
EnricoMi Jun 5, 2025
8639235
Apply code review comments
EnricoMi Jun 6, 2025
7f827ed
Merge remote-tracking branch 'origin/main' into secure-string-util
EnricoMi Jun 6, 2025
0ec848c
Move SecureClear(std::string*) up in source file as well
EnricoMi Jun 6, 2025
4336801
Merge branch 'secure-string-util' into secure-string
EnricoMi Jun 6, 2025
e11d223
Add back std::string methods as deprecated
EnricoMi Jun 6, 2025
ee284be
Merge remote-tracking branch 'origin/main' into secure-string
EnricoMi Jun 9, 2025
efd7cb9
Merge branch 'main' into secure-string
EnricoMi Jul 2, 2025
fb41244
Fix merge
EnricoMi Jul 2, 2025
fa9abed
Use const in favour of inline strings
EnricoMi Jul 7, 2025
290449a
Remove deprecations to move users to more secure methods
EnricoMi Jul 7, 2025
184afcf
Move no_key_ into encryption.cc
EnricoMi Jul 9, 2025
edd97b8
Rename no_key_
EnricoMi Jul 9, 2025
0a31f50
Revert "Remove deprecations to move users to more secure methods"
EnricoMi Jul 10, 2025
1987cda
Merge branch 'main' into secure-string
pitrou Jul 15, 2025
04b15e2
Update deprecation notices
pitrou Jul 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions cpp/examples/arrow/parquet_column_encryption.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "arrow/dataset/file_parquet.h"
#include "arrow/dataset/parquet_encryption_config.h"
#include "arrow/filesystem/localfs.h"
#include "arrow/util/secure_string.h"
#include "parquet/encryption/crypto_factory.h"
#include "parquet/encryption/test_in_memory_kms.h"

Expand Down Expand Up @@ -106,9 +107,9 @@ arrow::Result<std::shared_ptr<arrow::Table>> GetTable() {

std::shared_ptr<parquet::encryption::CryptoFactory> GetCryptoFactory() {
// Configure KMS.
std::unordered_map<std::string, std::string> key_map;
key_map.emplace("footerKeyId", "0123456789012345");
key_map.emplace("columnKeyId", "1234567890123456");
std::unordered_map<std::string, arrow::util::SecureString> key_map;
key_map.emplace("footerKeyId", arrow::util::SecureString("0123456789012345"));
key_map.emplace("columnKeyId", arrow::util::SecureString("1234567890123456"));

auto crypto_factory = std::make_shared<parquet::encryption::CryptoFactory>();
auto kms_client_factory =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.

#include <arrow/util/secure_string.h>
#include <reader_writer.h>

#include <cassert>
Expand All @@ -39,9 +40,9 @@

constexpr int NUM_ROWS_PER_ROW_GROUP = 500;
const char* PARQUET_FILENAME = "parquet_cpp_example.parquet.encrypted";
const char* kFooterEncryptionKey = "0123456789012345"; // 128bit/16
const char* kColumnEncryptionKey1 = "1234567890123450";
const char* kColumnEncryptionKey2 = "1234567890123451";
const arrow::util::SecureString kFooterEncryptionKey("0123456789012345");
const arrow::util::SecureString kColumnEncryptionKey1("1234567890123450");
const arrow::util::SecureString kColumnEncryptionKey2("1234567890123451");

int main(int argc, char** argv) {
/**********************************************************************************
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include <arrow/io/file.h>
#include <arrow/util/logging.h>
#include <arrow/util/secure_string.h>
#include <dirent.h>
#include <parquet/api/reader.h>
#include <parquet/api/writer.h>
Expand Down Expand Up @@ -92,9 +93,9 @@

constexpr int NUM_ROWS_PER_ROW_GROUP = 500;

const char* kFooterEncryptionKey = "0123456789012345"; // 128bit/16
const char* kColumnEncryptionKey1 = "1234567890123450";
const char* kColumnEncryptionKey2 = "1234567890123451";
const arrow::util::SecureString kFooterEncryptionKey("0123456789012345");
const arrow::util::SecureString kColumnEncryptionKey1("1234567890123450");
const arrow::util::SecureString kColumnEncryptionKey2("1234567890123451");
const char* fileName = "tester";

using FileClass = ::arrow::io::FileOutputStream;
Expand Down
17 changes: 11 additions & 6 deletions cpp/src/arrow/dataset/file_parquet_encryption_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,24 @@
#include "arrow/testing/random.h"
#include "arrow/type.h"
#include "arrow/util/future.h"
#include "arrow/util/secure_string.h"
#include "arrow/util/thread_pool.h"
#include "parquet/arrow/reader.h"
#include "parquet/encryption/crypto_factory.h"
#include "parquet/encryption/encryption_internal.h"
#include "parquet/encryption/kms_client.h"
#include "parquet/encryption/test_in_memory_kms.h"

constexpr std::string_view kFooterKeyMasterKey = "0123456789012345";
using arrow::util::SecureString;

const SecureString kFooterKeyMasterKey("0123456789012345");
constexpr std::string_view kFooterKeyMasterKeyId = "footer_key";
constexpr std::string_view kFooterKeyName = "footer_key";
constexpr std::string_view kColumnMasterKey = "1234567890123450";

const SecureString kColumnMasterKey("1234567890123450");
constexpr std::string_view kColumnMasterKeyId = "col_key";
constexpr std::string_view kColumnKeyMapping = "col_key: a";

constexpr std::string_view kBaseDir = "";

using arrow::internal::checked_pointer_cast;
Expand Down Expand Up @@ -105,7 +110,7 @@ class DatasetEncryptionTestBase : public testing::TestWithParam<EncryptionTestPa
ASSERT_OK_AND_ASSIGN(expected_table_, SortTable(expected_table_));

// Prepare encryption properties.
std::unordered_map<std::string, std::string> key_map;
std::unordered_map<std::string, SecureString> key_map;
key_map.emplace(kColumnMasterKeyId, kColumnMasterKey);
key_map.emplace(kFooterKeyMasterKeyId, kFooterKeyMasterKey);

Expand Down Expand Up @@ -145,7 +150,7 @@ class DatasetEncryptionTestBase : public testing::TestWithParam<EncryptionTestPa
ASSERT_TRUE(GetParam().uniform_encryption);
auto file_encryption_properties =
std::make_unique<parquet::FileEncryptionProperties::Builder>(
std::string(kFooterKeyMasterKey))
kFooterKeyMasterKey)
->build();
auto writer_properties = std::make_unique<parquet::WriterProperties::Builder>()
->encryption(file_encryption_properties)
Expand Down Expand Up @@ -230,7 +235,7 @@ class DatasetEncryptionTestBase : public testing::TestWithParam<EncryptionTestPa
// Configure decryption keys via reader properties / file decryption properties.
auto file_decryption_properties =
std::make_unique<parquet::FileDecryptionProperties::Builder>()
->footer_key(std::string(kFooterKeyMasterKey))
->footer_key(kFooterKeyMasterKey)
->build();
parquet_scan_options->reader_properties->file_decryption_properties(
file_decryption_properties);
Expand Down Expand Up @@ -370,7 +375,7 @@ TEST_P(DatasetEncryptionTest, ReadSingleFile) {
// Configure decryption keys via file decryption properties with static footer key.
file_decryption_properties =
std::make_unique<parquet::FileDecryptionProperties::Builder>()
->footer_key(std::string(kFooterKeyMasterKey))
->footer_key(kFooterKeyMasterKey)
->build();
}
auto reader_properties = parquet::default_reader_properties();
Expand Down
12 changes: 8 additions & 4 deletions cpp/src/parquet/encryption/crypto_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <string_view>

#include "arrow/util/logging.h"
#include "arrow/util/secure_string.h"
#include "arrow/util/string.h"

#include "parquet/encryption/crypto_factory.h"
Expand All @@ -26,6 +27,8 @@
#include "parquet/encryption/file_system_key_material_store.h"
#include "parquet/encryption/key_toolkit_internal.h"

using arrow::util::SecureString;

namespace parquet::encryption {

void CryptoFactory::RegisterKmsClientFactory(
Expand Down Expand Up @@ -71,8 +74,8 @@ std::shared_ptr<FileEncryptionProperties> CryptoFactory::GetFileEncryptionProper

int dek_length = dek_length_bits / 8;

std::string footer_key(dek_length, '\0');
RandBytes(reinterpret_cast<uint8_t*>(footer_key.data()), footer_key.size());
SecureString footer_key(dek_length, '\0');
RandBytes(footer_key.as_span().data(), footer_key.size());

std::string footer_key_metadata =
key_wrapper.GetEncryptionKeyMetadata(footer_key, footer_key_id, true);
Expand Down Expand Up @@ -146,8 +149,9 @@ ColumnPathToEncryptionPropertiesMap CryptoFactory::GetColumnEncryptionProperties
column_name);
}

std::string column_key(dek_length, '\0');
RandBytes(reinterpret_cast<uint8_t*>(column_key.data()), column_key.size());
SecureString column_key(dek_length, '\0');
RandBytes(column_key.as_span().data(), column_key.size());

std::string column_key_key_metadata =
key_wrapper->GetEncryptionKeyMetadata(column_key, column_key_id, false);

Expand Down
116 changes: 60 additions & 56 deletions cpp/src/parquet/encryption/encryption.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,31 +26,35 @@
#include "arrow/util/utf8.h"
#include "parquet/encryption/encryption_internal.h"

namespace parquet {
using ::arrow::util::SecureString;

// integer key retriever
void IntegerKeyIdRetriever::PutKey(uint32_t key_id, const std::string& key) {
key_map_.insert({key_id, key});
}
namespace parquet {

std::string IntegerKeyIdRetriever::GetKey(const std::string& key_metadata) {
uint32_t key_id;
memcpy(reinterpret_cast<uint8_t*>(&key_id), key_metadata.c_str(), 4);
// any empty SecureString key is interpreted as if no key is given
// this instance is used when a SecureString reference is returned
static SecureString kNoKey = SecureString();

return key_map_.at(key_id);
// integer key retriever
void IntegerKeyIdRetriever::PutKey(uint32_t key_id, SecureString key) {
key_map_.insert({key_id, std::move(key)});
}

// string key retriever
void StringKeyIdRetriever::PutKey(const std::string& key_id, const std::string& key) {
key_map_.insert({key_id, key});
void StringKeyIdRetriever::PutKey(std::string key_id, SecureString key) {
key_map_.insert({std::move(key_id), std::move(key)});
}

std::string StringKeyIdRetriever::GetKey(const std::string& key_id) {
SecureString StringKeyIdRetriever::GetKeyById(const std::string& key_id) {
return key_map_.at(key_id);
}

ColumnEncryptionProperties::Builder* ColumnEncryptionProperties::Builder::key(
std::string column_key) {
return key(SecureString(std::move(column_key)));
}

ColumnEncryptionProperties::Builder* ColumnEncryptionProperties::Builder::key(
SecureString column_key) {
if (column_key.empty()) return this;

DCHECK(key_.empty());
Expand Down Expand Up @@ -92,6 +96,11 @@ FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::column_key

FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::footer_key(
std::string footer_key) {
return this->footer_key(SecureString(std::move(footer_key)));
}

FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::footer_key(
SecureString footer_key) {
if (footer_key.empty()) {
return this;
}
Expand Down Expand Up @@ -129,10 +138,10 @@ FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::aad_prefix
}

ColumnDecryptionProperties::Builder* ColumnDecryptionProperties::Builder::key(
std::string key) {
SecureString key) {
if (key.empty()) return this;

DCHECK(!key.empty());
DCHECK(key_.empty());
Copy link
Collaborator Author

@EnricoMi EnricoMi Apr 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looked suspicious, all other setter methods check if the member is unset.
Here, the check is always true as it checks the input.

This is a breaking change if user code calls this setter twice with non-empty keys.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you've corrected a bug; the intention with these setter methods seems to be one-time setting

key_ = std::move(key);
return this;
}
Expand Down Expand Up @@ -182,74 +191,69 @@ FileEncryptionProperties::Builder::disable_aad_prefix_storage() {

ColumnEncryptionProperties::ColumnEncryptionProperties(bool encrypted,
std::string column_path,
std::string key,
std::string key_metadata) {
DCHECK(!column_path.empty());
column_path_ = std::move(column_path);
SecureString key,
std::string key_metadata)
: column_path_(std::move(column_path)),
encrypted_(encrypted),
encrypted_with_footer_key_(encrypted && key.empty()),
key_(std::move(key)),
key_metadata_(std::move(key_metadata)) {
DCHECK(!column_path_.empty());
if (!encrypted) {
DCHECK(key.empty() && key_metadata.empty());
DCHECK(key_.empty() && key_metadata_.empty());
}

if (!key.empty()) {
DCHECK(key.length() == 16 || key.length() == 24 || key.length() == 32);
if (!key_.empty()) {
DCHECK(key_.length() == 16 || key_.length() == 24 || key_.length() == 32);
}

encrypted_with_footer_key_ = (encrypted && key.empty());
if (encrypted_with_footer_key_) {
DCHECK(key_metadata.empty());
DCHECK(key_metadata_.empty());
}

encrypted_ = encrypted;
key_metadata_ = std::move(key_metadata);
key_ = std::move(key);
}

ColumnDecryptionProperties::ColumnDecryptionProperties(std::string column_path,
std::string key) {
DCHECK(!column_path.empty());
column_path_ = std::move(column_path);
SecureString key)
: column_path_(std::move(column_path)), key_(std::move(key)) {
DCHECK(!column_path_.empty());

if (!key.empty()) {
DCHECK(key.length() == 16 || key.length() == 24 || key.length() == 32);
if (!key_.empty()) {
DCHECK(key_.length() == 16 || key_.length() == 24 || key_.length() == 32);
}

key_ = std::move(key);
}

std::string FileDecryptionProperties::column_key(const std::string& column_path) const {
const SecureString& FileDecryptionProperties::column_key(
const std::string& column_path) const {
if (column_decryption_properties_.find(column_path) !=
column_decryption_properties_.end()) {
auto column_prop = column_decryption_properties_.at(column_path);
if (column_prop != nullptr) {
return column_prop->key();
}
}
return {};
return kNoKey;
}

FileDecryptionProperties::FileDecryptionProperties(
std::string footer_key, std::shared_ptr<DecryptionKeyRetriever> key_retriever,
SecureString footer_key, std::shared_ptr<DecryptionKeyRetriever> key_retriever,
bool check_plaintext_footer_integrity, std::string aad_prefix,
std::shared_ptr<AADPrefixVerifier> aad_prefix_verifier,
ColumnPathToDecryptionPropertiesMap column_decryption_properties,
bool plaintext_files_allowed) {
DCHECK(!footer_key.empty() || nullptr != key_retriever ||
0 != column_decryption_properties.size());

if (!footer_key.empty()) {
DCHECK(footer_key.length() == 16 || footer_key.length() == 24 ||
footer_key.length() == 32);
bool plaintext_files_allowed)
: footer_key_(std::move(footer_key)),
aad_prefix_(std::move(aad_prefix)),
aad_prefix_verifier_(std::move(aad_prefix_verifier)),
column_decryption_properties_(std::move(column_decryption_properties)),
key_retriever_(std::move(key_retriever)),
check_plaintext_footer_integrity_(check_plaintext_footer_integrity),
plaintext_files_allowed_(plaintext_files_allowed) {
DCHECK(!footer_key_.empty() || nullptr != key_retriever_ ||
0 != column_decryption_properties_.size());
if (!footer_key_.empty()) {
DCHECK(footer_key_.length() == 16 || footer_key_.length() == 24 ||
footer_key_.length() == 32);
}
if (footer_key.empty() && check_plaintext_footer_integrity) {
DCHECK(nullptr != key_retriever);
if (footer_key_.empty() && check_plaintext_footer_integrity) {
DCHECK(nullptr != key_retriever_);
}
aad_prefix_verifier_ = std::move(aad_prefix_verifier);
footer_key_ = std::move(footer_key);
check_plaintext_footer_integrity_ = check_plaintext_footer_integrity;
key_retriever_ = std::move(key_retriever);
aad_prefix_ = std::move(aad_prefix);
column_decryption_properties_ = std::move(column_decryption_properties);
plaintext_files_allowed_ = plaintext_files_allowed;
}

FileEncryptionProperties::Builder* FileEncryptionProperties::Builder::footer_key_id(
Expand Down Expand Up @@ -282,7 +286,7 @@ FileEncryptionProperties::column_encryption_properties(const std::string& column
}

FileEncryptionProperties::FileEncryptionProperties(
ParquetCipher::type cipher, std::string footer_key, std::string footer_key_metadata,
ParquetCipher::type cipher, SecureString footer_key, std::string footer_key_metadata,
bool encrypted_footer, std::string aad_prefix, bool store_aad_prefix_in_file,
ColumnPathToEncryptionPropertiesMap encrypted_columns)
: footer_key_(std::move(footer_key)),
Expand Down
Loading
Loading