From 5f73a1f8ee6a50b47ca9c11b2fdc086332bc0045 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Mon, 31 Mar 2025 15:25:11 +0200 Subject: [PATCH 01/44] Wrap encryption keys in SecureString --- .../arrow/parquet_column_encryption.cc | 6 +- .../low_level_api/encryption_reader_writer.cc | 6 +- ...yption_reader_writer_all_crypto_options.cc | 6 +- .../dataset/file_parquet_encryption_test.cc | 14 ++- cpp/src/parquet/CMakeLists.txt | 4 +- cpp/src/parquet/encryption/crypto_factory.cc | 11 +- cpp/src/parquet/encryption/encryption.cc | 36 +++--- cpp/src/parquet/encryption/encryption.h | 68 +++++------ .../parquet/encryption/file_key_unwrapper.cc | 6 +- .../parquet/encryption/file_key_unwrapper.h | 5 +- .../parquet/encryption/file_key_wrapper.cc | 9 +- cpp/src/parquet/encryption/file_key_wrapper.h | 2 +- .../encryption/internal_file_decryptor.cc | 20 ++-- .../encryption/internal_file_decryptor.h | 17 +-- .../encryption/internal_file_encryptor.cc | 14 +-- .../encryption/internal_file_encryptor.h | 4 +- .../parquet/encryption/key_encryption_key.h | 7 +- .../parquet/encryption/key_management_test.cc | 4 +- cpp/src/parquet/encryption/key_toolkit.h | 10 +- .../encryption/key_toolkit_internal.cc | 14 +-- .../parquet/encryption/key_toolkit_internal.h | 7 +- .../parquet/encryption/key_wrapping_test.cc | 6 +- cpp/src/parquet/encryption/kms_client.h | 7 +- .../encryption/local_wrap_kms_client.cc | 16 +-- .../encryption/local_wrap_kms_client.h | 12 +- .../encryption/read_configurations_test.cc | 6 +- cpp/src/parquet/encryption/secure_string.cc | 103 +++++++++++++++++ cpp/src/parquet/encryption/secure_string.h | 67 +++++++++++ .../parquet/encryption/secure_string_test.cc | 106 ++++++++++++++++++ .../encryption/test_encryption_util.cc | 18 +-- .../parquet/encryption/test_encryption_util.h | 14 +-- .../parquet/encryption/test_in_memory_kms.cc | 26 ++--- .../parquet/encryption/test_in_memory_kms.h | 24 ++-- .../encryption/write_configurations_test.cc | 6 +- cpp/src/parquet/file_reader.cc | 7 -- cpp/src/parquet/metadata.cc | 4 +- 36 files changed, 487 insertions(+), 205 deletions(-) create mode 100644 cpp/src/parquet/encryption/secure_string.cc create mode 100644 cpp/src/parquet/encryption/secure_string.h create mode 100644 cpp/src/parquet/encryption/secure_string_test.cc diff --git a/cpp/examples/arrow/parquet_column_encryption.cc b/cpp/examples/arrow/parquet_column_encryption.cc index 2ea4f44f172..573b5e55b38 100644 --- a/cpp/examples/arrow/parquet_column_encryption.cc +++ b/cpp/examples/arrow/parquet_column_encryption.cc @@ -106,9 +106,9 @@ arrow::Result> GetTable() { std::shared_ptr GetCryptoFactory() { // Configure KMS. - std::unordered_map key_map; - key_map.emplace("footerKeyId", "0123456789012345"); - key_map.emplace("columnKeyId", "1234567890123456"); + std::unordered_map key_map; + key_map.emplace("footerKeyId", parquet::encryption::SecureString("0123456789012345")); + key_map.emplace("columnKeyId", parquet::encryption::SecureString("1234567890123456")); auto crypto_factory = std::make_shared(); auto kms_client_factory = diff --git a/cpp/examples/parquet/low_level_api/encryption_reader_writer.cc b/cpp/examples/parquet/low_level_api/encryption_reader_writer.cc index aa0f07ff5c6..32991446419 100644 --- a/cpp/examples/parquet/low_level_api/encryption_reader_writer.cc +++ b/cpp/examples/parquet/low_level_api/encryption_reader_writer.cc @@ -39,9 +39,9 @@ constexpr int NUM_ROWS_PER_ROW_GROUP = 500; const char* PARQUET_FILENAME = "parquet_cpp_example.parquet.encrypted"; -const char* kFooterEncryptionKey = "0123456789012345"; // 128bit/16 -const char* kColumnEncryptionKey1 = "1234567890123450"; -const char* kColumnEncryptionKey2 = "1234567890123451"; +const parquet::encryption::SecureString kFooterEncryptionKey("0123456789012345"); +const parquet::encryption::SecureString kColumnEncryptionKey1("1234567890123450"); +const parquet::encryption::SecureString kColumnEncryptionKey2("1234567890123451"); int main(int argc, char** argv) { /********************************************************************************** diff --git a/cpp/examples/parquet/low_level_api/encryption_reader_writer_all_crypto_options.cc b/cpp/examples/parquet/low_level_api/encryption_reader_writer_all_crypto_options.cc index b564cddcb50..d10bdcf39d5 100644 --- a/cpp/examples/parquet/low_level_api/encryption_reader_writer_all_crypto_options.cc +++ b/cpp/examples/parquet/low_level_api/encryption_reader_writer_all_crypto_options.cc @@ -92,9 +92,9 @@ constexpr int NUM_ROWS_PER_ROW_GROUP = 500; -const char* kFooterEncryptionKey = "0123456789012345"; // 128bit/16 -const char* kColumnEncryptionKey1 = "1234567890123450"; -const char* kColumnEncryptionKey2 = "1234567890123451"; +const parquet::encryption::SecureString kFooterEncryptionKey("0123456789012345"); +const parquet::encryption::SecureString kColumnEncryptionKey1("1234567890123450"); +const parquet::encryption::SecureString kColumnEncryptionKey2("1234567890123451"); const char* fileName = "tester"; using FileClass = ::arrow::io::FileOutputStream; diff --git a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc index d2e1763c62f..5e0663ff54f 100644 --- a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc +++ b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc @@ -41,12 +41,14 @@ #include "parquet/encryption/kms_client.h" #include "parquet/encryption/test_in_memory_kms.h" -constexpr std::string_view kFooterKeyMasterKey = "0123456789012345"; +const parquet::encryption::SecureString kFooterKeyMasterKey("0123456789012345"); constexpr std::string_view kFooterKeyMasterKeyId = "footer_key"; constexpr std::string_view kFooterKeyName = "footer_key"; -constexpr std::string_view kColumnMasterKey = "1234567890123450"; + +const parquet::encryption::SecureString kColumnMasterKey("1234567890123450"); constexpr std::string_view kColumnMasterKeyId = "col_key"; constexpr std::string_view kColumnKeyMapping = "col_key: a"; + constexpr std::string_view kBaseDir = ""; using arrow::internal::checked_pointer_cast; @@ -105,7 +107,7 @@ class DatasetEncryptionTestBase : public testing::TestWithParam key_map; + std::unordered_map key_map; key_map.emplace(kColumnMasterKeyId, kColumnMasterKey); key_map.emplace(kFooterKeyMasterKeyId, kFooterKeyMasterKey); @@ -145,7 +147,7 @@ class DatasetEncryptionTestBase : public testing::TestWithParam( - std::string(kFooterKeyMasterKey)) + kFooterKeyMasterKey) ->build(); auto writer_properties = std::make_unique() ->encryption(file_encryption_properties) @@ -230,7 +232,7 @@ class DatasetEncryptionTestBase : public testing::TestWithParam() - ->footer_key(std::string(kFooterKeyMasterKey)) + ->footer_key(kFooterKeyMasterKey) ->build(); parquet_scan_options->reader_properties->file_decryption_properties( file_decryption_properties); @@ -370,7 +372,7 @@ TEST_P(DatasetEncryptionTest, ReadSingleFile) { // Configure decryption keys via file decryption properties with static footer key. file_decryption_properties = std::make_unique() - ->footer_key(std::string(kFooterKeyMasterKey)) + ->footer_key(kFooterKeyMasterKey) ->build(); } auto reader_properties = parquet::default_reader_properties(); diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index 465704331fe..fd882771bb5 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -252,7 +252,8 @@ if(PARQUET_REQUIRE_ENCRYPTION) encryption/key_metadata.cc encryption/key_toolkit.cc encryption/key_toolkit_internal.cc - encryption/local_wrap_kms_client.cc) + encryption/local_wrap_kms_client.cc + encryption/secure_string.cc) else() set(PARQUET_SRCS ${PARQUET_SRCS} encryption/encryption_internal_nossl.cc) endif() @@ -413,6 +414,7 @@ if(PARQUET_REQUIRE_ENCRYPTION) encryption/write_configurations_test.cc encryption/read_configurations_test.cc encryption/properties_test.cc + encryption/secure_string_test.cc encryption/test_encryption_util.cc) add_parquet_test(encryption-key-management-test SOURCES diff --git a/cpp/src/parquet/encryption/crypto_factory.cc b/cpp/src/parquet/encryption/crypto_factory.cc index 56069d55977..4b8a6a51f36 100644 --- a/cpp/src/parquet/encryption/crypto_factory.cc +++ b/cpp/src/parquet/encryption/crypto_factory.cc @@ -73,12 +73,13 @@ std::shared_ptr CryptoFactory::GetFileEncryptionProper std::string footer_key(dek_length, '\0'); RandBytes(reinterpret_cast(footer_key.data()), footer_key.size()); + SecureString secure_footer_key(std::move(footer_key)); std::string footer_key_metadata = - key_wrapper.GetEncryptionKeyMetadata(footer_key, footer_key_id, true); + key_wrapper.GetEncryptionKeyMetadata(secure_footer_key, footer_key_id, true); FileEncryptionProperties::Builder properties_builder = - FileEncryptionProperties::Builder(footer_key); + FileEncryptionProperties::Builder(secure_footer_key); properties_builder.footer_key_metadata(footer_key_metadata); properties_builder.algorithm(encryption_config.encryption_algorithm); @@ -148,12 +149,14 @@ ColumnPathToEncryptionPropertiesMap CryptoFactory::GetColumnEncryptionProperties std::string column_key(dek_length, '\0'); RandBytes(reinterpret_cast(column_key.data()), column_key.size()); + SecureString secure_column_key(std::move(column_key)); + std::string column_key_key_metadata = - key_wrapper->GetEncryptionKeyMetadata(column_key, column_key_id, false); + key_wrapper->GetEncryptionKeyMetadata(secure_column_key, column_key_id, false); std::shared_ptr cmd = ColumnEncryptionProperties::Builder(column_name) - .key(column_key) + .key(secure_column_key) ->key_metadata(column_key_key_metadata) ->build(); encrypted_columns.insert({column_name, cmd}); diff --git a/cpp/src/parquet/encryption/encryption.cc b/cpp/src/parquet/encryption/encryption.cc index 95a717c5e3c..206535d7ec0 100644 --- a/cpp/src/parquet/encryption/encryption.cc +++ b/cpp/src/parquet/encryption/encryption.cc @@ -29,11 +29,11 @@ namespace parquet { // integer key retriever -void IntegerKeyIdRetriever::PutKey(uint32_t key_id, const std::string& key) { +void IntegerKeyIdRetriever::PutKey(uint32_t key_id, const encryption::SecureString& key) { key_map_.insert({key_id, key}); } -std::string IntegerKeyIdRetriever::GetKey(const std::string& key_metadata) { +encryption::SecureString IntegerKeyIdRetriever::GetKey(const std::string& key_metadata) { uint32_t key_id; memcpy(reinterpret_cast(&key_id), key_metadata.c_str(), 4); @@ -41,16 +41,17 @@ std::string IntegerKeyIdRetriever::GetKey(const std::string& key_metadata) { } // string key retriever -void StringKeyIdRetriever::PutKey(const std::string& key_id, const std::string& key) { +void StringKeyIdRetriever::PutKey(const std::string& key_id, + const encryption::SecureString& key) { key_map_.insert({key_id, key}); } -std::string StringKeyIdRetriever::GetKey(const std::string& key_id) { +encryption::SecureString StringKeyIdRetriever::GetKey(const std::string& key_id) { return key_map_.at(key_id); } ColumnEncryptionProperties::Builder* ColumnEncryptionProperties::Builder::key( - std::string column_key) { + encryption::SecureString column_key) { if (column_key.empty()) return this; DCHECK(key_.empty()); @@ -92,7 +93,7 @@ FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::column_key } FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::footer_key( - const std::string footer_key) { + const encryption::SecureString footer_key) { if (footer_key.empty()) { return this; } @@ -130,7 +131,7 @@ FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::aad_prefix } ColumnDecryptionProperties::Builder* ColumnDecryptionProperties::Builder::key( - const std::string& key) { + const encryption::SecureString& key) { if (key.empty()) return this; DCHECK(!key.empty()); @@ -181,10 +182,9 @@ FileEncryptionProperties::Builder::disable_aad_prefix_storage() { return this; } -ColumnEncryptionProperties::ColumnEncryptionProperties(bool encrypted, - const std::string& column_path, - const std::string& key, - const std::string& key_metadata) +ColumnEncryptionProperties::ColumnEncryptionProperties( + bool encrypted, const std::string& column_path, const encryption::SecureString& key, + const std::string& key_metadata) : column_path_(column_path) { DCHECK(!column_path.empty()); if (!encrypted) { @@ -205,8 +205,8 @@ ColumnEncryptionProperties::ColumnEncryptionProperties(bool encrypted, key_ = key; } -ColumnDecryptionProperties::ColumnDecryptionProperties(const std::string& column_path, - const std::string& key) +ColumnDecryptionProperties::ColumnDecryptionProperties( + const std::string& column_path, const encryption::SecureString& key) : column_path_(column_path) { DCHECK(!column_path.empty()); @@ -217,7 +217,8 @@ ColumnDecryptionProperties::ColumnDecryptionProperties(const std::string& column key_ = key; } -std::string FileDecryptionProperties::column_key(const std::string& column_path) const { +encryption::SecureString FileDecryptionProperties::column_key( + const std::string& column_path) const { if (column_decryption_properties_.find(column_path) != column_decryption_properties_.end()) { auto column_prop = column_decryption_properties_.at(column_path); @@ -225,11 +226,12 @@ std::string FileDecryptionProperties::column_key(const std::string& column_path) return column_prop->key(); } } - return empty_string_; + return no_key_; } FileDecryptionProperties::FileDecryptionProperties( - const std::string& footer_key, std::shared_ptr key_retriever, + const encryption::SecureString& footer_key, + std::shared_ptr key_retriever, bool check_plaintext_footer_integrity, const std::string& aad_prefix, std::shared_ptr aad_prefix_verifier, const ColumnPathToDecryptionPropertiesMap& column_decryption_properties, @@ -283,7 +285,7 @@ FileEncryptionProperties::column_encryption_properties(const std::string& column } FileEncryptionProperties::FileEncryptionProperties( - ParquetCipher::type cipher, const std::string& footer_key, + ParquetCipher::type cipher, const encryption::SecureString& footer_key, const std::string& footer_key_metadata, bool encrypted_footer, const std::string& aad_prefix, bool store_aad_prefix_in_file, const ColumnPathToEncryptionPropertiesMap& encrypted_columns) diff --git a/cpp/src/parquet/encryption/encryption.h b/cpp/src/parquet/encryption/encryption.h index 6604e329911..060499770b0 100644 --- a/cpp/src/parquet/encryption/encryption.h +++ b/cpp/src/parquet/encryption/encryption.h @@ -22,6 +22,7 @@ #include #include +#include "parquet/encryption/secure_string.h" #include "parquet/exception.h" #include "parquet/schema.h" #include "parquet/types.h" @@ -46,28 +47,28 @@ using ColumnPathToEncryptionPropertiesMap = class PARQUET_EXPORT DecryptionKeyRetriever { public: - virtual std::string GetKey(const std::string& key_metadata) = 0; + virtual encryption::SecureString GetKey(const std::string& key_metadata) = 0; virtual ~DecryptionKeyRetriever() {} }; /// Simple integer key retriever class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever { public: - void PutKey(uint32_t key_id, const std::string& key); - std::string GetKey(const std::string& key_metadata) override; + void PutKey(uint32_t key_id, const encryption::SecureString& key); + encryption::SecureString GetKey(const std::string& key_metadata) override; private: - std::map key_map_; + std::map key_map_; }; // Simple string key retriever class PARQUET_EXPORT StringKeyIdRetriever : public DecryptionKeyRetriever { public: - void PutKey(const std::string& key_id, const std::string& key); - std::string GetKey(const std::string& key_metadata) override; + void PutKey(const std::string& key_id, const encryption::SecureString& key); + encryption::SecureString GetKey(const std::string& key_metadata) override; private: - std::map key_map_; + std::map key_map_; }; class PARQUET_EXPORT HiddenColumnException : public ParquetException { @@ -113,7 +114,7 @@ class PARQUET_EXPORT ColumnEncryptionProperties { /// be encrypted with the footer key. /// keyBytes Key length must be either 16, 24 or 32 bytes. /// Caller is responsible for wiping out the input key array. - Builder* key(std::string column_key); + Builder* key(encryption::SecureString column_key); /// Set a key retrieval metadata. /// use either key_metadata() or key_id(), not both @@ -133,7 +134,7 @@ class PARQUET_EXPORT ColumnEncryptionProperties { private: const std::string column_path_; bool encrypted_; - std::string key_; + encryption::SecureString key_; std::string key_metadata_; Builder(const std::string path, bool encrypted) @@ -143,23 +144,21 @@ class PARQUET_EXPORT ColumnEncryptionProperties { std::string column_path() const { return column_path_; } bool is_encrypted() const { return encrypted_; } bool is_encrypted_with_footer_key() const { return encrypted_with_footer_key_; } - std::string key() const { return key_; } + encryption::SecureString key() const { return key_; } std::string key_metadata() const { return key_metadata_; } ColumnEncryptionProperties() = default; ColumnEncryptionProperties(const ColumnEncryptionProperties& other) = default; ColumnEncryptionProperties(ColumnEncryptionProperties&& other) = default; - ~ColumnEncryptionProperties() { key_.clear(); } - private: const std::string column_path_; bool encrypted_; bool encrypted_with_footer_key_; - std::string key_; + encryption::SecureString key_; std::string key_metadata_; explicit ColumnEncryptionProperties(bool encrypted, const std::string& column_path, - const std::string& key, + const encryption::SecureString& key, const std::string& key_metadata); }; @@ -176,33 +175,31 @@ class PARQUET_EXPORT ColumnDecryptionProperties { /// key metadata for this column the metadata will be ignored, /// the column will be decrypted with this key. /// key length must be either 16, 24 or 32 bytes. - Builder* key(const std::string& key); + Builder* key(const encryption::SecureString& key); std::shared_ptr build(); private: const std::string column_path_; - std::string key_; + encryption::SecureString key_; }; ColumnDecryptionProperties() = default; ColumnDecryptionProperties(const ColumnDecryptionProperties& other) = default; ColumnDecryptionProperties(ColumnDecryptionProperties&& other) = default; - ~ColumnDecryptionProperties() { key_.clear(); } - std::string column_path() const { return column_path_; } - std::string key() const { return key_; } + encryption::SecureString key() const { return key_; } private: const std::string column_path_; - std::string key_; + encryption::SecureString key_; /// This class is only required for setting explicit column decryption keys - /// to override key retriever (or to provide keys when key metadata and/or /// key retriever are not available) explicit ColumnDecryptionProperties(const std::string& column_path, - const std::string& key); + const encryption::SecureString& key); }; class PARQUET_EXPORT AADPrefixVerifier { @@ -237,7 +234,7 @@ class PARQUET_EXPORT FileDecryptionProperties { /// will be wiped out (array values set to 0). /// Caller is responsible for wiping out the input key array. /// param footerKey Key length must be either 16, 24 or 32 bytes. - Builder* footer_key(const std::string footer_key); + Builder* footer_key(const encryption::SecureString footer_key); /// Set explicit column keys (decryption properties). /// Its also possible to set a key retriever on this property object. @@ -294,7 +291,7 @@ class PARQUET_EXPORT FileDecryptionProperties { } private: - std::string footer_key_; + encryption::SecureString footer_key_; std::string aad_prefix_; std::shared_ptr aad_prefix_verifier_; ColumnPathToDecryptionPropertiesMap column_decryption_properties_; @@ -304,11 +301,9 @@ class PARQUET_EXPORT FileDecryptionProperties { bool plaintext_files_allowed_; }; - ~FileDecryptionProperties() { footer_key_.clear(); } + encryption::SecureString column_key(const std::string& column_path) const; - std::string column_key(const std::string& column_path) const; - - std::string footer_key() const { return footer_key_; } + encryption::SecureString footer_key() const { return footer_key_; } std::string aad_prefix() const { return aad_prefix_; } @@ -327,11 +322,11 @@ class PARQUET_EXPORT FileDecryptionProperties { } private: - std::string footer_key_; + encryption::SecureString footer_key_; std::string aad_prefix_; std::shared_ptr aad_prefix_verifier_; - const std::string empty_string_ = ""; + const encryption::SecureString no_key_ = encryption::SecureString(); ColumnPathToDecryptionPropertiesMap column_decryption_properties_; std::shared_ptr key_retriever_; @@ -339,7 +334,7 @@ class PARQUET_EXPORT FileDecryptionProperties { bool plaintext_files_allowed_; FileDecryptionProperties( - const std::string& footer_key, + const encryption::SecureString& footer_key, std::shared_ptr key_retriever, bool check_plaintext_footer_integrity, const std::string& aad_prefix, std::shared_ptr aad_prefix_verifier, @@ -351,7 +346,7 @@ class PARQUET_EXPORT FileEncryptionProperties { public: class PARQUET_EXPORT Builder { public: - explicit Builder(const std::string& footer_key) + explicit Builder(const encryption::SecureString& footer_key) : parquet_cipher_(kDefaultEncryptionAlgorithm), encrypted_footer_(kDefaultEncryptedFooter) { footer_key_ = footer_key; @@ -402,7 +397,7 @@ class PARQUET_EXPORT FileEncryptionProperties { private: ParquetCipher::type parquet_cipher_; bool encrypted_footer_; - std::string footer_key_; + encryption::SecureString footer_key_; std::string footer_key_metadata_; std::string aad_prefix_; @@ -410,13 +405,11 @@ class PARQUET_EXPORT FileEncryptionProperties { ColumnPathToEncryptionPropertiesMap encrypted_columns_; }; - ~FileEncryptionProperties() { footer_key_.clear(); } - bool encrypted_footer() const { return encrypted_footer_; } EncryptionAlgorithm algorithm() const { return algorithm_; } - std::string footer_key() const { return footer_key_; } + encryption::SecureString footer_key() const { return footer_key_; } std::string footer_key_metadata() const { return footer_key_metadata_; } @@ -431,7 +424,7 @@ class PARQUET_EXPORT FileEncryptionProperties { private: EncryptionAlgorithm algorithm_; - std::string footer_key_; + encryption::SecureString footer_key_; std::string footer_key_metadata_; bool encrypted_footer_; std::string file_aad_; @@ -439,7 +432,8 @@ class PARQUET_EXPORT FileEncryptionProperties { bool store_aad_prefix_in_file_; ColumnPathToEncryptionPropertiesMap encrypted_columns_; - FileEncryptionProperties(ParquetCipher::type cipher, const std::string& footer_key, + FileEncryptionProperties(ParquetCipher::type cipher, + const encryption::SecureString& footer_key, const std::string& footer_key_metadata, bool encrypted_footer, const std::string& aad_prefix, bool store_aad_prefix_in_file, const ColumnPathToEncryptionPropertiesMap& encrypted_columns); diff --git a/cpp/src/parquet/encryption/file_key_unwrapper.cc b/cpp/src/parquet/encryption/file_key_unwrapper.cc index d88aa6c52ac..99568c4814a 100644 --- a/cpp/src/parquet/encryption/file_key_unwrapper.cc +++ b/cpp/src/parquet/encryption/file_key_unwrapper.cc @@ -67,7 +67,7 @@ FileKeyUnwrapper::FileKeyUnwrapper( kms_connection_config.key_access_token(), cache_entry_lifetime_seconds_); } -std::string FileKeyUnwrapper::GetKey(const std::string& key_metadata_bytes) { +encryption::SecureString FileKeyUnwrapper::GetKey(const std::string& key_metadata_bytes) { // key_metadata is expected to be in UTF8 encoding ::arrow::util::InitializeUTF8(); if (!::arrow::util::ValidateUTF8( @@ -106,7 +106,7 @@ KeyWithMasterId FileKeyUnwrapper::GetDataEncryptionKey(const KeyMaterial& key_ma const std::string& master_key_id = key_material.master_key_id(); const std::string& encoded_wrapped_dek = key_material.wrapped_dek(); - std::string data_key; + SecureString data_key; if (!double_wrapping) { data_key = kms_client->UnwrapKey(encoded_wrapped_dek, master_key_id); } else { @@ -114,7 +114,7 @@ KeyWithMasterId FileKeyUnwrapper::GetDataEncryptionKey(const KeyMaterial& key_ma const std::string& encoded_kek_id = key_material.kek_id(); const std::string& encoded_wrapped_kek = key_material.wrapped_kek(); - std::string kek_bytes = kek_per_kek_id_->GetOrInsert( + SecureString kek_bytes = kek_per_kek_id_->GetOrInsert( encoded_kek_id, [kms_client, encoded_wrapped_kek, master_key_id]() { return kms_client->UnwrapKey(encoded_wrapped_kek, master_key_id); }); diff --git a/cpp/src/parquet/encryption/file_key_unwrapper.h b/cpp/src/parquet/encryption/file_key_unwrapper.h index 6147abbecd3..5ce877ae6f7 100644 --- a/cpp/src/parquet/encryption/file_key_unwrapper.h +++ b/cpp/src/parquet/encryption/file_key_unwrapper.h @@ -64,7 +64,7 @@ class PARQUET_EXPORT FileKeyUnwrapper : public DecryptionKeyRetriever { std::shared_ptr key_material_store); /// Get the data key from key metadata - std::string GetKey(const std::string& key_metadata) override; + SecureString GetKey(const std::string& key_metadata) override; /// Get the data key along with the master key id from key material KeyWithMasterId GetDataEncryptionKey(const KeyMaterial& key_material); @@ -81,7 +81,8 @@ class PARQUET_EXPORT FileKeyUnwrapper : public DecryptionKeyRetriever { const KeyMaterial& key_material); /// A map of Key Encryption Key (KEK) ID -> KEK bytes, for the current token - std::shared_ptr<::arrow::util::ConcurrentMap> kek_per_kek_id_; + std::shared_ptr<::arrow::util::ConcurrentMap> + kek_per_kek_id_; std::shared_ptr key_toolkit_owner_; KeyToolkit* key_toolkit_; KmsConnectionConfig kms_connection_config_; diff --git a/cpp/src/parquet/encryption/file_key_wrapper.cc b/cpp/src/parquet/encryption/file_key_wrapper.cc index 8ce563e60d7..97af661e442 100644 --- a/cpp/src/parquet/encryption/file_key_wrapper.cc +++ b/cpp/src/parquet/encryption/file_key_wrapper.cc @@ -49,7 +49,7 @@ FileKeyWrapper::FileKeyWrapper(KeyToolkit* key_toolkit, } } -std::string FileKeyWrapper::GetEncryptionKeyMetadata(const std::string& data_key, +std::string FileKeyWrapper::GetEncryptionKeyMetadata(const SecureString& data_key, const std::string& master_key_id, bool is_footer_key, std::string key_id_in_file) { @@ -70,7 +70,7 @@ std::string FileKeyWrapper::GetEncryptionKeyMetadata(const std::string& data_key }); // Encrypt DEK with KEK const std::string& aad = key_encryption_key.kek_id(); - const std::string& kek_bytes = key_encryption_key.kek_bytes(); + const SecureString& kek_bytes = key_encryption_key.kek_bytes(); encoded_wrapped_dek = internal::EncryptKeyLocally(data_key, kek_bytes, aad); encoded_kek_id = key_encryption_key.encoded_kek_id(); encoded_wrapped_kek = key_encryption_key.encoded_wrapped_kek(); @@ -113,14 +113,15 @@ KeyEncryptionKey FileKeyWrapper::CreateKeyEncryptionKey( const std::string& master_key_id) { std::string kek_bytes(kKeyEncryptionKeyLength, '\0'); RandBytes(reinterpret_cast(kek_bytes.data()), kKeyEncryptionKeyLength); + SecureString secure_kek_bytes(std::move(kek_bytes)); std::string kek_id(kKeyEncryptionKeyIdLength, '\0'); RandBytes(reinterpret_cast(kek_id.data()), kKeyEncryptionKeyIdLength); // Encrypt KEK with Master key - std::string encoded_wrapped_kek = kms_client_->WrapKey(kek_bytes, master_key_id); + std::string encoded_wrapped_kek = kms_client_->WrapKey(secure_kek_bytes, master_key_id); - return KeyEncryptionKey(std::move(kek_bytes), std::move(kek_id), + return KeyEncryptionKey(std::move(secure_kek_bytes), std::move(kek_id), std::move(encoded_wrapped_kek)); } diff --git a/cpp/src/parquet/encryption/file_key_wrapper.h b/cpp/src/parquet/encryption/file_key_wrapper.h index 26b9719de64..97fa2cf6011 100644 --- a/cpp/src/parquet/encryption/file_key_wrapper.h +++ b/cpp/src/parquet/encryption/file_key_wrapper.h @@ -61,7 +61,7 @@ class PARQUET_EXPORT FileKeyWrapper { /// When external key material is used, an identifier is usually generated automatically /// but may be specified explicitly to support key rotation, /// which requires keeping the same identifiers. - std::string GetEncryptionKeyMetadata(const std::string& data_key, + std::string GetEncryptionKeyMetadata(const SecureString& data_key, const std::string& master_key_id, bool is_footer_key, std::string key_id_in_file = ""); diff --git a/cpp/src/parquet/encryption/internal_file_decryptor.cc b/cpp/src/parquet/encryption/internal_file_decryptor.cc index 715807b4267..5decaec3554 100644 --- a/cpp/src/parquet/encryption/internal_file_decryptor.cc +++ b/cpp/src/parquet/encryption/internal_file_decryptor.cc @@ -26,7 +26,7 @@ namespace parquet { // Decryptor Decryptor::Decryptor(std::unique_ptr aes_decryptor, - const std::string& key, const std::string& file_aad, + const encryption::SecureString& key, const std::string& file_aad, const std::string& aad, ::arrow::MemoryPool* pool) : aes_decryptor_(std::move(aes_decryptor)), key_(key), @@ -46,7 +46,7 @@ int32_t Decryptor::CiphertextLength(int32_t plaintext_len) const { int32_t Decryptor::Decrypt(::arrow::util::span ciphertext, ::arrow::util::span plaintext) { - return aes_decryptor_->Decrypt(ciphertext, str2span(key_), str2span(aad_), plaintext); + return aes_decryptor_->Decrypt(ciphertext, key_.as_span(), str2span(aad_), plaintext); } // InternalFileDecryptor @@ -60,13 +60,13 @@ InternalFileDecryptor::InternalFileDecryptor( footer_key_metadata_(footer_key_metadata), pool_(pool) {} -std::string InternalFileDecryptor::GetFooterKey() { +encryption::SecureString InternalFileDecryptor::GetFooterKey() { std::unique_lock lock(mutex_); if (!footer_key_.empty()) { return footer_key_; } - std::string footer_key = properties_->footer_key(); + encryption::SecureString footer_key = properties_->footer_key(); // ignore footer key metadata if footer key is explicitly set via API if (footer_key.empty()) { if (footer_key_metadata_.empty()) @@ -99,7 +99,7 @@ std::unique_ptr InternalFileDecryptor::GetFooterDecryptor() { std::unique_ptr InternalFileDecryptor::GetFooterDecryptor( const std::string& aad, bool metadata) { - std::string footer_key = GetFooterKey(); + encryption::SecureString footer_key = GetFooterKey(); auto key_len = static_cast(footer_key.size()); auto aes_decryptor = encryption::AesDecryptor::Make(algorithm_, key_len, metadata); @@ -107,9 +107,9 @@ std::unique_ptr InternalFileDecryptor::GetFooterDecryptor( pool_); } -std::string InternalFileDecryptor::GetColumnKey(const std::string& column_path, - const std::string& column_key_metadata) { - std::string column_key = properties_->column_key(column_path); +encryption::SecureString InternalFileDecryptor::GetColumnKey( + const std::string& column_path, const std::string& column_key_metadata) { + encryption::SecureString column_key = properties_->column_key(column_path); // No explicit column key given via API. Retrieve via key metadata. if (column_key.empty() && !column_key_metadata.empty() && @@ -131,7 +131,7 @@ std::string InternalFileDecryptor::GetColumnKey(const std::string& column_path, std::unique_ptr InternalFileDecryptor::GetColumnDecryptor( const std::string& column_path, const std::string& column_key_metadata, const std::string& aad, bool metadata) { - std::string column_key = GetColumnKey(column_path, column_key_metadata); + encryption::SecureString column_key = GetColumnKey(column_path, column_key_metadata); auto key_len = static_cast(column_key.size()); auto aes_decryptor = encryption::AesDecryptor::Make(algorithm_, key_len, metadata); return std::make_unique(std::move(aes_decryptor), column_key, file_aad_, aad, @@ -148,7 +148,7 @@ InternalFileDecryptor::GetColumnDecryptorFactory( // The column is encrypted with its own key const std::string& column_key_metadata = crypto_metadata->key_metadata(); const std::string column_path = crypto_metadata->path_in_schema()->ToDotString(); - std::string column_key = GetColumnKey(column_path, column_key_metadata); + encryption::SecureString column_key = GetColumnKey(column_path, column_key_metadata); return [this, aad, metadata, column_key = std::move(column_key)]() { auto key_len = static_cast(column_key.size()); diff --git a/cpp/src/parquet/encryption/internal_file_decryptor.h b/cpp/src/parquet/encryption/internal_file_decryptor.h index cc0e315e029..aebce309769 100644 --- a/cpp/src/parquet/encryption/internal_file_decryptor.h +++ b/cpp/src/parquet/encryption/internal_file_decryptor.h @@ -22,6 +22,7 @@ #include #include +#include "parquet/encryption/secure_string.h" #include "parquet/schema.h" namespace parquet { @@ -39,9 +40,9 @@ class FileDecryptionProperties; // CAUTION: Decryptor objects are not thread-safe. class PARQUET_EXPORT Decryptor { public: - Decryptor(std::unique_ptr decryptor, const std::string& key, - const std::string& file_aad, const std::string& aad, - ::arrow::MemoryPool* pool); + Decryptor(std::unique_ptr decryptor, + const encryption::SecureString& key, const std::string& file_aad, + const std::string& aad, ::arrow::MemoryPool* pool); ~Decryptor(); const std::string& file_aad() const { return file_aad_; } @@ -55,7 +56,7 @@ class PARQUET_EXPORT Decryptor { private: std::unique_ptr aes_decryptor_; - std::string key_; + encryption::SecureString key_; std::string file_aad_; std::string aad_; ::arrow::MemoryPool* pool_; @@ -71,7 +72,7 @@ class InternalFileDecryptor { const std::string& file_aad() const { return file_aad_; } - std::string GetFooterKey(); + encryption::SecureString GetFooterKey(); ParquetCipher::type algorithm() const { return algorithm_; } @@ -127,10 +128,10 @@ class InternalFileDecryptor { // Protects footer_key_ updates std::mutex mutex_; - std::string footer_key_; + encryption::SecureString footer_key_; - std::string GetColumnKey(const std::string& column_path, - const std::string& column_key_metadata); + encryption::SecureString GetColumnKey(const std::string& column_path, + const std::string& column_key_metadata); std::unique_ptr GetFooterDecryptor(const std::string& aad, bool metadata); diff --git a/cpp/src/parquet/encryption/internal_file_encryptor.cc b/cpp/src/parquet/encryption/internal_file_encryptor.cc index 9210ffba9cc..867e337d6e5 100644 --- a/cpp/src/parquet/encryption/internal_file_encryptor.cc +++ b/cpp/src/parquet/encryption/internal_file_encryptor.cc @@ -22,9 +22,9 @@ namespace parquet { // Encryptor -Encryptor::Encryptor(encryption::AesEncryptor* aes_encryptor, const std::string& key, - const std::string& file_aad, const std::string& aad, - ::arrow::MemoryPool* pool) +Encryptor::Encryptor(encryption::AesEncryptor* aes_encryptor, + const encryption::SecureString& key, const std::string& file_aad, + const std::string& aad, ::arrow::MemoryPool* pool) : aes_encryptor_(aes_encryptor), key_(key), file_aad_(file_aad), @@ -37,7 +37,7 @@ int32_t Encryptor::CiphertextLength(int64_t plaintext_len) const { int32_t Encryptor::Encrypt(::arrow::util::span plaintext, ::arrow::util::span ciphertext) { - return aes_encryptor_->Encrypt(plaintext, str2span(key_), str2span(aad_), ciphertext); + return aes_encryptor_->Encrypt(plaintext, key_.as_span(), str2span(aad_), ciphertext); } // InternalFileEncryptor @@ -52,7 +52,7 @@ std::shared_ptr InternalFileEncryptor::GetFooterEncryptor() { ParquetCipher::type algorithm = properties_->algorithm().algorithm; std::string footer_aad = encryption::CreateFooterAad(properties_->file_aad()); - std::string footer_key = properties_->footer_key(); + encryption::SecureString footer_key = properties_->footer_key(); auto aes_encryptor = GetMetaAesEncryptor(algorithm, footer_key.size()); footer_encryptor_ = std::make_shared( aes_encryptor, footer_key, properties_->file_aad(), footer_aad, pool_); @@ -66,7 +66,7 @@ std::shared_ptr InternalFileEncryptor::GetFooterSigningEncryptor() { ParquetCipher::type algorithm = properties_->algorithm().algorithm; std::string footer_aad = encryption::CreateFooterAad(properties_->file_aad()); - std::string footer_signing_key = properties_->footer_key(); + encryption::SecureString footer_signing_key = properties_->footer_key(); auto aes_encryptor = GetMetaAesEncryptor(algorithm, footer_signing_key.size()); footer_signing_encryptor_ = std::make_shared( aes_encryptor, footer_signing_key, properties_->file_aad(), footer_aad, pool_); @@ -101,7 +101,7 @@ InternalFileEncryptor::InternalFileEncryptor::GetColumnEncryptor( return nullptr; } - std::string key; + encryption::SecureString key; if (column_prop->is_encrypted_with_footer_key()) { key = properties_->footer_key(); } else { diff --git a/cpp/src/parquet/encryption/internal_file_encryptor.h b/cpp/src/parquet/encryption/internal_file_encryptor.h index a7108ab66f6..9d582ed0816 100644 --- a/cpp/src/parquet/encryption/internal_file_encryptor.h +++ b/cpp/src/parquet/encryption/internal_file_encryptor.h @@ -36,7 +36,7 @@ class ColumnEncryptionProperties; class PARQUET_EXPORT Encryptor { public: - Encryptor(encryption::AesEncryptor* aes_encryptor, const std::string& key, + Encryptor(encryption::AesEncryptor* aes_encryptor, const encryption::SecureString& key, const std::string& file_aad, const std::string& aad, ::arrow::MemoryPool* pool); const std::string& file_aad() { return file_aad_; } @@ -62,7 +62,7 @@ class PARQUET_EXPORT Encryptor { private: encryption::AesEncryptor* aes_encryptor_; - std::string key_; + encryption::SecureString key_; std::string file_aad_; std::string aad_; ::arrow::MemoryPool* pool_; diff --git a/cpp/src/parquet/encryption/key_encryption_key.h b/cpp/src/parquet/encryption/key_encryption_key.h index 62263ee3cd5..18cb9484c7e 100644 --- a/cpp/src/parquet/encryption/key_encryption_key.h +++ b/cpp/src/parquet/encryption/key_encryption_key.h @@ -21,6 +21,7 @@ #include #include "arrow/util/base64.h" +#include "parquet/encryption/secure_string.h" namespace parquet::encryption { @@ -32,14 +33,14 @@ namespace parquet::encryption { // locally, and does not involve an interaction with a KMS server. class KeyEncryptionKey { public: - KeyEncryptionKey(std::string kek_bytes, std::string kek_id, + KeyEncryptionKey(SecureString kek_bytes, std::string kek_id, std::string encoded_wrapped_kek) : kek_bytes_(std::move(kek_bytes)), kek_id_(std::move(kek_id)), encoded_kek_id_(::arrow::util::base64_encode(kek_id_)), encoded_wrapped_kek_(std::move(encoded_wrapped_kek)) {} - const std::string& kek_bytes() const { return kek_bytes_; } + const SecureString& kek_bytes() const { return kek_bytes_; } const std::string& kek_id() const { return kek_id_; } @@ -48,7 +49,7 @@ class KeyEncryptionKey { const std::string& encoded_wrapped_kek() const { return encoded_wrapped_kek_; } private: - std::string kek_bytes_; + SecureString kek_bytes_; std::string kek_id_; std::string encoded_kek_id_; std::string encoded_wrapped_kek_; diff --git a/cpp/src/parquet/encryption/key_management_test.cc b/cpp/src/parquet/encryption/key_management_test.cc index 2e43edee530..ed6d15dbb6a 100644 --- a/cpp/src/parquet/encryption/key_management_test.cc +++ b/cpp/src/parquet/encryption/key_management_test.cc @@ -46,8 +46,8 @@ class TestEncryptionKeyManagement : public ::testing::Test { FileEncryptor encryptor_; FileDecryptor decryptor_; - std::unordered_map key_list_; - std::unordered_map new_key_list_; + std::unordered_map key_list_; + std::unordered_map new_key_list_; std::string column_key_mapping_; KmsConnectionConfig kms_connection_config_; CryptoFactory crypto_factory_; diff --git a/cpp/src/parquet/encryption/key_toolkit.h b/cpp/src/parquet/encryption/key_toolkit.h index 339692a99a3..08c46f499c6 100644 --- a/cpp/src/parquet/encryption/key_toolkit.h +++ b/cpp/src/parquet/encryption/key_toolkit.h @@ -49,7 +49,7 @@ class PARQUET_EXPORT KeyToolkit { /// Key encryption key two level cache for unwrapping: token -> KeyEncryptionKeyId -> /// KeyEncryptionKeyBytes - TwoLevelCacheWithExpiration& kek_read_cache_per_token() { + TwoLevelCacheWithExpiration& kek_read_cache_per_token() { return key_encryption_key_read_cache_; } @@ -82,7 +82,7 @@ class PARQUET_EXPORT KeyToolkit { private: TwoLevelCacheWithExpiration> kms_client_cache_; TwoLevelCacheWithExpiration key_encryption_key_write_cache_; - TwoLevelCacheWithExpiration key_encryption_key_read_cache_; + TwoLevelCacheWithExpiration key_encryption_key_read_cache_; std::shared_ptr kms_client_factory_; mutable ::arrow::util::Mutex last_cache_clean_for_key_rotation_time_mutex_; internal::TimePoint last_cache_clean_for_key_rotation_time_; @@ -92,14 +92,14 @@ class PARQUET_EXPORT KeyToolkit { // parsing from "key material" class PARQUET_EXPORT KeyWithMasterId { public: - KeyWithMasterId(std::string key_bytes, std::string master_id) + KeyWithMasterId(SecureString key_bytes, std::string master_id) : key_bytes_(std::move(key_bytes)), master_id_(std::move(master_id)) {} - const std::string& data_key() const { return key_bytes_; } + const SecureString& data_key() const { return key_bytes_; } const std::string& master_id() const { return master_id_; } private: - const std::string key_bytes_; + const SecureString key_bytes_; const std::string master_id_; }; diff --git a/cpp/src/parquet/encryption/key_toolkit_internal.cc b/cpp/src/parquet/encryption/key_toolkit_internal.cc index 89a52a2bcd6..53d450b6561 100644 --- a/cpp/src/parquet/encryption/key_toolkit_internal.cc +++ b/cpp/src/parquet/encryption/key_toolkit_internal.cc @@ -26,8 +26,8 @@ namespace parquet::encryption::internal { // configured by users and the master key lengths fetched from KMS server. static constexpr const int32_t kAcceptableDataKeyLengths[] = {128, 192, 256}; -std::string EncryptKeyLocally(const std::string& key_bytes, const std::string& master_key, - const std::string& aad) { +std::string EncryptKeyLocally(const SecureString& key_bytes, + const SecureString& master_key, const std::string& aad) { AesEncryptor key_encryptor(ParquetCipher::AES_GCM_V1, static_cast(master_key.size()), false, false /*write_length*/); @@ -38,15 +38,15 @@ std::string EncryptKeyLocally(const std::string& key_bytes, const std::string& m ::arrow::util::span encrypted_key_span( reinterpret_cast(&encrypted_key[0]), encrypted_key_len); - encrypted_key_len = key_encryptor.Encrypt(str2span(key_bytes), str2span(master_key), + encrypted_key_len = key_encryptor.Encrypt(key_bytes.as_span(), master_key.as_span(), str2span(aad), encrypted_key_span); return ::arrow::util::base64_encode( ::std::string_view(encrypted_key.data(), encrypted_key_len)); } -std::string DecryptKeyLocally(const std::string& encoded_encrypted_key, - const std::string& master_key, const std::string& aad) { +SecureString DecryptKeyLocally(const std::string& encoded_encrypted_key, + const SecureString& master_key, const std::string& aad) { std::string encrypted_key = ::arrow::util::base64_decode(encoded_encrypted_key); AesDecryptor key_decryptor(ParquetCipher::AES_GCM_V1, @@ -59,10 +59,10 @@ std::string DecryptKeyLocally(const std::string& encoded_encrypted_key, ::arrow::util::span decrypted_key_span( reinterpret_cast(&decrypted_key[0]), decrypted_key_len); - decrypted_key_len = key_decryptor.Decrypt(str2span(encrypted_key), str2span(master_key), + decrypted_key_len = key_decryptor.Decrypt(str2span(encrypted_key), master_key.as_span(), str2span(aad), decrypted_key_span); - return decrypted_key; + return SecureString(std::move(decrypted_key)); } bool ValidateKeyLength(int32_t key_length_bits) { diff --git a/cpp/src/parquet/encryption/key_toolkit_internal.h b/cpp/src/parquet/encryption/key_toolkit_internal.h index 8474a91fc1a..1bdfcb73a21 100644 --- a/cpp/src/parquet/encryption/key_toolkit_internal.h +++ b/cpp/src/parquet/encryption/key_toolkit_internal.h @@ -19,19 +19,20 @@ #include +#include "parquet/encryption/secure_string.h" #include "parquet/platform.h" namespace parquet::encryption::internal { /// Encrypts "key" with "master_key", using AES-GCM and the "aad" PARQUET_EXPORT -std::string EncryptKeyLocally(const std::string& key, const std::string& master_key, +std::string EncryptKeyLocally(const SecureString& key, const SecureString& master_key, const std::string& aad); /// Decrypts encrypted key with "master_key", using AES-GCM and the "aad" PARQUET_EXPORT -std::string DecryptKeyLocally(const std::string& encoded_encrypted_key, - const std::string& master_key, const std::string& aad); +SecureString DecryptKeyLocally(const std::string& encoded_encrypted_key, + const SecureString& master_key, const std::string& aad); PARQUET_EXPORT bool ValidateKeyLength(int32_t key_length_bits); diff --git a/cpp/src/parquet/encryption/key_wrapping_test.cc b/cpp/src/parquet/encryption/key_wrapping_test.cc index 198ceb9bf4b..04494d8cc21 100644 --- a/cpp/src/parquet/encryption/key_wrapping_test.cc +++ b/cpp/src/parquet/encryption/key_wrapping_test.cc @@ -86,14 +86,14 @@ class KeyWrappingTest : public ::testing::Test { FileKeyUnwrapper unwrapper(&key_toolkit, kms_connection_config_, cache_entry_lifetime_seconds, readable_file_path, file_system); - std::string footer_key = unwrapper.GetKey(key_metadata_json_footer); + SecureString footer_key = unwrapper.GetKey(key_metadata_json_footer); ASSERT_EQ(footer_key, kFooterEncryptionKey); - std::string column_key = unwrapper.GetKey(key_metadata_json_column); + SecureString column_key = unwrapper.GetKey(key_metadata_json_column); ASSERT_EQ(column_key, kColumnEncryptionKey1); } - std::unordered_map key_list_; + std::unordered_map key_list_; KmsConnectionConfig kms_connection_config_; }; diff --git a/cpp/src/parquet/encryption/kms_client.h b/cpp/src/parquet/encryption/kms_client.h index ef363d9c2cd..4f224dda710 100644 --- a/cpp/src/parquet/encryption/kms_client.h +++ b/cpp/src/parquet/encryption/kms_client.h @@ -23,6 +23,7 @@ #include "arrow/util/mutex.h" +#include "parquet/encryption/secure_string.h" #include "parquet/exception.h" #include "parquet/platform.h" @@ -81,12 +82,12 @@ class PARQUET_EXPORT KmsClient { /// Wraps a key - encrypts it with the master key, encodes the result /// and potentially adds a KMS-specific metadata. - virtual std::string WrapKey(const std::string& key_bytes, + virtual std::string WrapKey(const SecureString& key_bytes, const std::string& master_key_identifier) = 0; /// Decrypts (unwraps) a key with the master key. - virtual std::string UnwrapKey(const std::string& wrapped_key, - const std::string& master_key_identifier) = 0; + virtual SecureString UnwrapKey(const std::string& wrapped_key, + const std::string& master_key_identifier) = 0; virtual ~KmsClient() {} }; diff --git a/cpp/src/parquet/encryption/local_wrap_kms_client.cc b/cpp/src/parquet/encryption/local_wrap_kms_client.cc index 23e28bb8e61..1cbf5982304 100644 --- a/cpp/src/parquet/encryption/local_wrap_kms_client.cc +++ b/cpp/src/parquet/encryption/local_wrap_kms_client.cc @@ -69,10 +69,10 @@ LocalWrapKmsClient::LocalWrapKmsClient(const KmsConnectionConfig& kms_connection master_key_cache_.Clear(); } -std::string LocalWrapKmsClient::WrapKey(const std::string& key_bytes, +std::string LocalWrapKmsClient::WrapKey(const SecureString& key_bytes, const std::string& master_key_identifier) { const auto master_key = master_key_cache_.GetOrInsert( - master_key_identifier, [this, master_key_identifier]() -> std::string { + master_key_identifier, [this, master_key_identifier]() -> SecureString { return this->GetKeyFromServer(master_key_identifier); }); const auto& aad = master_key_identifier; @@ -82,8 +82,8 @@ std::string LocalWrapKmsClient::WrapKey(const std::string& key_bytes, return LocalKeyWrap::CreateSerialized(encrypted_encoded_key); } -std::string LocalWrapKmsClient::UnwrapKey(const std::string& wrapped_key, - const std::string& master_key_identifier) { +SecureString LocalWrapKmsClient::UnwrapKey(const std::string& wrapped_key, + const std::string& master_key_identifier) { LocalKeyWrap key_wrap = LocalKeyWrap::Parse(wrapped_key); const std::string& master_key_version = key_wrap.master_key_version(); if (kLocalWrapNoKeyVersion != master_key_version) { @@ -91,8 +91,8 @@ std::string LocalWrapKmsClient::UnwrapKey(const std::string& wrapped_key, master_key_version); } const std::string& encrypted_encoded_key = key_wrap.encrypted_encoded_key(); - const std::string master_key = master_key_cache_.GetOrInsert( - master_key_identifier, [this, master_key_identifier]() -> std::string { + const SecureString master_key = master_key_cache_.GetOrInsert( + master_key_identifier, [this, master_key_identifier]() -> SecureString { return this->GetKeyFromServer(master_key_identifier); }); const std::string& aad = master_key_identifier; @@ -100,8 +100,8 @@ std::string LocalWrapKmsClient::UnwrapKey(const std::string& wrapped_key, return internal::DecryptKeyLocally(encrypted_encoded_key, master_key, aad); } -std::string LocalWrapKmsClient::GetKeyFromServer(const std::string& key_identifier) { - std::string master_key = GetMasterKeyFromServer(key_identifier); +SecureString LocalWrapKmsClient::GetKeyFromServer(const std::string& key_identifier) { + SecureString master_key = GetMasterKeyFromServer(key_identifier); int32_t key_length_bits = static_cast(master_key.size() * 8); if (!internal::ValidateKeyLength(key_length_bits)) { std::ostringstream ss; diff --git a/cpp/src/parquet/encryption/local_wrap_kms_client.h b/cpp/src/parquet/encryption/local_wrap_kms_client.h index 3c90d829605..df4cef22f87 100644 --- a/cpp/src/parquet/encryption/local_wrap_kms_client.h +++ b/cpp/src/parquet/encryption/local_wrap_kms_client.h @@ -35,16 +35,16 @@ class PARQUET_EXPORT LocalWrapKmsClient : public KmsClient { explicit LocalWrapKmsClient(const KmsConnectionConfig& kms_connection_config); - std::string WrapKey(const std::string& key_bytes, + std::string WrapKey(const SecureString& key_bytes, const std::string& master_key_identifier) override; - std::string UnwrapKey(const std::string& wrapped_key, - const std::string& master_key_identifier) override; + SecureString UnwrapKey(const std::string& wrapped_key, + const std::string& master_key_identifier) override; protected: /// Get master key from the remote KMS server. /// Note: this function might be called by multiple threads - virtual std::string GetMasterKeyFromServer( + virtual SecureString GetMasterKeyFromServer( const std::string& master_key_identifier) = 0; private: @@ -84,11 +84,11 @@ class PARQUET_EXPORT LocalWrapKmsClient : public KmsClient { std::string master_key_version_; }; - std::string GetKeyFromServer(const std::string& key_identifier); + SecureString GetKeyFromServer(const std::string& key_identifier); protected: KmsConnectionConfig kms_connection_config_; - ::arrow::util::ConcurrentMap master_key_cache_; + ::arrow::util::ConcurrentMap master_key_cache_; }; } // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/read_configurations_test.cc b/cpp/src/parquet/encryption/read_configurations_test.cc index 2612229028f..c548b11157e 100644 --- a/cpp/src/parquet/encryption/read_configurations_test.cc +++ b/cpp/src/parquet/encryption/read_configurations_test.cc @@ -103,9 +103,9 @@ class TestDecryptionConfiguration // This vector will hold various decryption configurations. std::vector> vector_of_decryption_configurations_; - std::string kFooterEncryptionKey_ = std::string(kFooterEncryptionKey); - std::string kColumnEncryptionKey1_ = std::string(kColumnEncryptionKey1); - std::string kColumnEncryptionKey2_ = std::string(kColumnEncryptionKey2); + SecureString kFooterEncryptionKey_ = kFooterEncryptionKey; + SecureString kColumnEncryptionKey1_ = kColumnEncryptionKey1; + SecureString kColumnEncryptionKey2_ = kColumnEncryptionKey2; std::string kFileName_ = std::string(kFileName); void CreateDecryptionConfigurations() { diff --git a/cpp/src/parquet/encryption/secure_string.cc b/cpp/src/parquet/encryption/secure_string.cc new file mode 100644 index 00000000000..88baf5aa948 --- /dev/null +++ b/cpp/src/parquet/encryption/secure_string.cc @@ -0,0 +1,103 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "parquet/encryption/secure_string.h" + +#include +#include +#include +#if defined(_WIN32) +# include +#endif + +#include "arrow/util/span.h" +#include "parquet/encryption/encryption.h" + +namespace parquet::encryption { +SecureString::SecureString(SecureString&& secret) noexcept + : secret_(std::move(secret.secret_)) {} +SecureString::SecureString(std::string&& secret) noexcept : secret_(std::move(secret)) { + secure_clear(secret); +} + +SecureString& SecureString::operator=(SecureString&& secret) noexcept { + if (this == &secret) { + // self-assignment + return *this; + } + dispose(); + secret_ = std::move(secret.secret_); + return *this; +} +SecureString& SecureString::operator=(const SecureString& secret) noexcept { + if (this == &secret) { + // self-assignment + return *this; + } + dispose(); + secret_ = secret.secret_; + return *this; +} +SecureString& SecureString::operator=(std::string&& secret) noexcept { + dispose(); + secret_ = std::move(secret); + secure_clear(secret); + return *this; +} + +bool SecureString::operator==(const SecureString& other) const { + return secret_ == other.secret_; +} + +bool SecureString::operator!=(const SecureString& other) const { + return secret_ != other.secret_; +} + +bool SecureString::empty() const { return secret_.empty(); } +std::size_t SecureString::size() const { return secret_.size(); } +std::size_t SecureString::length() const { return secret_.length(); } +::arrow::util::span SecureString::as_span() const { + return str2span(secret_); +} +void SecureString::dispose() { secure_clear(secret_); } +void SecureString::secure_clear(std::string& secret) { + secret.clear(); + secure_clear(reinterpret_cast(secret.data()), secret.capacity()); +} +inline void SecureString::secure_clear(uint8_t* data, size_t size) { + // Heavily borrowed from libb2's `secure_zero_memory` at + // https://github.com/BLAKE2/libb2/blob/master/src/blake2-impl.h +#if defined(_WIN32) + SecureZeroMemory(data, size); +#elif defined(__STDC_LIB_EXT1__) + // memset_s is meant to not be optimized away + memset_s(data, size, 0, size); +#elif defined(OPENSSL_VERSION_NUMBER) && OPENSSL_VERSION_NUMBER >= 0x30000000 + OPENSSL_cleanse(data, size); +#elif defined(__GLIBC__) && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 25)) + // glibc 2.25+ has explicit_bzero + explicit_bzero(data, size); +#else + // Try to ensure that a true library call to memset() will be generated + // by the compiler. + static const volatile auto memset_v = &memset; + memset_v(data, 0, size); + __asm__ __volatile__("" ::"r"(data) : "memory"); +#endif +} + +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/secure_string.h b/cpp/src/parquet/encryption/secure_string.h new file mode 100644 index 00000000000..5ffa0160df0 --- /dev/null +++ b/cpp/src/parquet/encryption/secure_string.h @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "arrow/util/span.h" +#include "parquet/platform.h" + +namespace parquet::encryption { +/** + * A secure string that ensures the wrapped string is cleared from memory on + * deconstruction. This class can only be created from std::string that are securely + * erased after creation. + * + * Note: This class does not provide a constructor / assignment operator that copies a + * std::string because that would allow code to create a SecureString while accidentally + * not noticing the need to securely erasing the argument after invoking the constructor / + * calling the assignment operator. + */ +class PARQUET_EXPORT SecureString { + public: + SecureString() noexcept = default; + SecureString(SecureString&&) noexcept; + SecureString(const SecureString&) noexcept = default; + explicit SecureString(std::string&&) noexcept; + + SecureString& operator=(SecureString&&) noexcept; + SecureString& operator=(const SecureString&) noexcept; + SecureString& operator=(std::string&& secret) noexcept; + + bool operator==(const SecureString&) const; + bool operator!=(const SecureString&) const; + + ~SecureString() { dispose(); } + + [[nodiscard]] bool empty() const; + [[nodiscard]] std::size_t size() const; + [[nodiscard]] std::size_t length() const; + [[nodiscard]] ::arrow::util::span as_span() const; + + void dispose(); + + static void secure_clear(std::string&); + static void secure_clear(uint8_t* data, size_t size); + + private: + std::string secret_; +}; + +} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/secure_string_test.cc b/cpp/src/parquet/encryption/secure_string_test.cc new file mode 100644 index 00000000000..07039de7636 --- /dev/null +++ b/cpp/src/parquet/encryption/secure_string_test.cc @@ -0,0 +1,106 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include "parquet/encryption/secure_string.h" + +namespace parquet::encryption::test { + +void assert_securely_cleared(const std::string& string) { + // the entire buffer of the string is filled with zeros + std::vector zeros(string.capacity()); + ::arrow::util::span actual(string.data(), string.capacity()); + ::arrow::util::span expected(zeros.data(), zeros.size()); + ASSERT_EQ(actual, expected); + + // the string is empty + ASSERT_TRUE(string.empty()); +} + +TEST(TestSecureString, SecureClearString) { + // short string + { + std::string tiny("abc"); + SecureString::secure_clear(tiny); + assert_securely_cleared(tiny); + } + + // long string + { + std::string large(1024, 'x'); + large.resize(1024, 'y'); + SecureString::secure_clear(large); + assert_securely_cleared(large); + } + + // empty string + { + // this creates an empty string with some non-zero characters in the string buffer + // we test that all those characters are securely cleared + std::string empty("abcdef"); + empty.resize(0); + SecureString::secure_clear(empty); + assert_securely_cleared(empty); + } +} + +TEST(TestSecureString, Construct) { + // move constructing from a string securely clears that string + std::string string("hello world"); + SecureString secret_from_string(std::move(string)); + assert_securely_cleared(string); + ASSERT_FALSE(secret_from_string.empty()); + + // move constructing from a secure string securely clears that secure string + // Note: there is no way to test the secure clearing of the moved secure string + SecureString secret_from_move_secret(std::move(secret_from_string)); + ASSERT_TRUE(secret_from_string.empty()); + ASSERT_FALSE(secret_from_move_secret.empty()); + + // copy constructing from a secure string does not modify that secure string + SecureString secret_from_secret(secret_from_move_secret); + ASSERT_FALSE(secret_from_move_secret.empty()); + ASSERT_FALSE(secret_from_secret.empty()); + ASSERT_EQ(secret_from_secret, secret_from_move_secret); +} + +TEST(TestSecureString, Assign) { + // move assigning from a string securely clears that string + std::string string("hello world"); + SecureString secret_from_string; + secret_from_string = std::move(string); + assert_securely_cleared(string); + ASSERT_FALSE(secret_from_string.empty()); + + // move assigning from a secure string securely clears that secure string + // Note: there is no way to test the secure clearing of the moved secure string + SecureString secret_from_move_secret; + secret_from_move_secret = std::move(secret_from_string); + ASSERT_TRUE(secret_from_string.empty()); + ASSERT_FALSE(secret_from_move_secret.empty()); + + // assigning from a secure string does not modify that secure string + SecureString secret_from_secret; + secret_from_secret = secret_from_move_secret; + ASSERT_FALSE(secret_from_move_secret.empty()); + ASSERT_FALSE(secret_from_secret.empty()); + ASSERT_EQ(secret_from_secret, secret_from_move_secret); +} + +} // namespace parquet::encryption::test diff --git a/cpp/src/parquet/encryption/test_encryption_util.cc b/cpp/src/parquet/encryption/test_encryption_util.cc index 1864e86f34a..86951f8264e 100644 --- a/cpp/src/parquet/encryption/test_encryption_util.cc +++ b/cpp/src/parquet/encryption/test_encryption_util.cc @@ -49,17 +49,21 @@ std::string data_file(const char* file) { return ss.str(); } -std::unordered_map BuildKeyMap(const char* const* column_ids, - const char* const* column_keys, - const char* footer_id, - const char* footer_key) { - std::unordered_map key_map; +std::unordered_map BuildKeyMap(const char* const* column_ids, + const char* const* column_keys, + const char* footer_id, + const char* footer_key) { + std::unordered_map key_map; // add column keys for (int i = 0; i < 6; i++) { - key_map.insert({column_ids[i], column_keys[i]}); + // this is not safe to do as column_keys[i] is not protected by SecureString + // do not do outside test code + key_map.insert({column_ids[i], SecureString(column_keys[i])}); } // add footer key - key_map.insert({footer_id, footer_key}); + // this is not safe to do as footer_key[i] is not protected by SecureString + // do not do outside test code + key_map.insert({footer_id, SecureString(footer_key)}); return key_map; } diff --git a/cpp/src/parquet/encryption/test_encryption_util.h b/cpp/src/parquet/encryption/test_encryption_util.h index 9bfc774278d..f455cff0506 100644 --- a/cpp/src/parquet/encryption/test_encryption_util.h +++ b/cpp/src/parquet/encryption/test_encryption_util.h @@ -43,9 +43,9 @@ using ::arrow::internal::TemporaryDir; constexpr int kFixedLength = 10; -const char kFooterEncryptionKey[] = "0123456789012345"; // 128bit/16 -const char kColumnEncryptionKey1[] = "1234567890123450"; -const char kColumnEncryptionKey2[] = "1234567890123451"; +inline SecureString kFooterEncryptionKey(std::string("0123456789012345")); +inline SecureString kColumnEncryptionKey1(std::string("1234567890123450")); +inline SecureString kColumnEncryptionKey2(std::string("1234567890123451")); const char kFileName[] = "tester"; // Get the path of file inside parquet test data directory @@ -82,10 +82,10 @@ const char* const kNewColumnMasterKeys[] = {"9234567890123450", "923456789012345 // The result of this function will be used to set into TestOnlyInMemoryKmsClientFactory // as the key mapping to look at. -std::unordered_map BuildKeyMap(const char* const* column_ids, - const char* const* column_keys, - const char* footer_id, - const char* footer_key); +std::unordered_map BuildKeyMap(const char* const* column_ids, + const char* const* column_keys, + const char* footer_id, + const char* footer_key); // The result of this function will be used to set into EncryptionConfiguration // as column keys. diff --git a/cpp/src/parquet/encryption/test_in_memory_kms.cc b/cpp/src/parquet/encryption/test_in_memory_kms.cc index e1339ab48b5..0bdc8d71b56 100644 --- a/cpp/src/parquet/encryption/test_in_memory_kms.cc +++ b/cpp/src/parquet/encryption/test_in_memory_kms.cc @@ -23,15 +23,15 @@ namespace parquet::encryption { -std::unordered_map +std::unordered_map TestOnlyLocalWrapInMemoryKms::master_key_map_; -std::unordered_map +std::unordered_map TestOnlyInServerWrapKms::unwrapping_master_key_map_; -std::unordered_map +std::unordered_map TestOnlyInServerWrapKms::wrapping_master_key_map_; void TestOnlyLocalWrapInMemoryKms::InitializeMasterKeys( - const std::unordered_map& master_keys_map) { + const std::unordered_map& master_keys_map) { master_key_map_ = master_keys_map; } @@ -39,20 +39,20 @@ TestOnlyLocalWrapInMemoryKms::TestOnlyLocalWrapInMemoryKms( const KmsConnectionConfig& kms_connection_config) : LocalWrapKmsClient(kms_connection_config) {} -std::string TestOnlyLocalWrapInMemoryKms::GetMasterKeyFromServer( +SecureString TestOnlyLocalWrapInMemoryKms::GetMasterKeyFromServer( const std::string& master_key_identifier) { // Always return the latest key version return master_key_map_.at(master_key_identifier); } void TestOnlyInServerWrapKms::InitializeMasterKeys( - const std::unordered_map& master_keys_map) { + const std::unordered_map& master_keys_map) { unwrapping_master_key_map_ = master_keys_map; wrapping_master_key_map_ = unwrapping_master_key_map_; } void TestOnlyInServerWrapKms::StartKeyRotation( - const std::unordered_map& new_master_key_map) { + const std::unordered_map& new_master_key_map) { if (new_master_key_map.empty()) { throw ParquetException("No encryption key list"); } @@ -63,32 +63,32 @@ void TestOnlyInServerWrapKms::FinishKeyRotation() { unwrapping_master_key_map_ = wrapping_master_key_map_; } -std::string TestOnlyInServerWrapKms::WrapKey(const std::string& key_bytes, +std::string TestOnlyInServerWrapKms::WrapKey(const SecureString& key_bytes, const std::string& master_key_identifier) { // Always use the latest key version for writing if (wrapping_master_key_map_.find(master_key_identifier) == wrapping_master_key_map_.end()) { throw ParquetException("Key not found: " + master_key_identifier); } - const std::string& master_key = wrapping_master_key_map_.at(master_key_identifier); + const SecureString& master_key = wrapping_master_key_map_.at(master_key_identifier); std::string aad = master_key_identifier; return internal::EncryptKeyLocally(key_bytes, master_key, aad); } -std::string TestOnlyInServerWrapKms::UnwrapKey(const std::string& wrapped_key, - const std::string& master_key_identifier) { +SecureString TestOnlyInServerWrapKms::UnwrapKey( + const std::string& wrapped_key, const std::string& master_key_identifier) { if (unwrapping_master_key_map_.find(master_key_identifier) == unwrapping_master_key_map_.end()) { throw ParquetException("Key not found: " + master_key_identifier); } - const std::string& master_key = unwrapping_master_key_map_.at(master_key_identifier); + const SecureString& master_key = unwrapping_master_key_map_.at(master_key_identifier); std::string aad = master_key_identifier; return internal::DecryptKeyLocally(wrapped_key, master_key, aad); } -std::string TestOnlyInServerWrapKms::GetMasterKeyFromServer( +SecureString TestOnlyInServerWrapKms::GetMasterKeyFromServer( const std::string& master_key_identifier) { // Always return the latest key version return wrapping_master_key_map_.at(master_key_identifier); diff --git a/cpp/src/parquet/encryption/test_in_memory_kms.h b/cpp/src/parquet/encryption/test_in_memory_kms.h index c5fdc797b8c..689b6f75144 100644 --- a/cpp/src/parquet/encryption/test_in_memory_kms.h +++ b/cpp/src/parquet/encryption/test_in_memory_kms.h @@ -34,13 +34,13 @@ class TestOnlyLocalWrapInMemoryKms : public LocalWrapKmsClient { explicit TestOnlyLocalWrapInMemoryKms(const KmsConnectionConfig& kms_connection_config); static void InitializeMasterKeys( - const std::unordered_map& master_keys_map); + const std::unordered_map& master_keys_map); protected: - std::string GetMasterKeyFromServer(const std::string& master_key_identifier) override; + SecureString GetMasterKeyFromServer(const std::string& master_key_identifier) override; private: - static std::unordered_map master_key_map_; + static std::unordered_map master_key_map_; }; // This is a mock class, built for testing only. Don't use it as an example of KmsClient @@ -48,25 +48,25 @@ class TestOnlyLocalWrapInMemoryKms : public LocalWrapKmsClient { class TestOnlyInServerWrapKms : public KmsClient { public: static void InitializeMasterKeys( - const std::unordered_map& master_keys_map); + const std::unordered_map& master_keys_map); - std::string WrapKey(const std::string& key_bytes, + std::string WrapKey(const SecureString& key_bytes, const std::string& master_key_identifier) override; - std::string UnwrapKey(const std::string& wrapped_key, - const std::string& master_key_identifier) override; + SecureString UnwrapKey(const std::string& wrapped_key, + const std::string& master_key_identifier) override; static void StartKeyRotation( - const std::unordered_map& new_master_keys_map); + const std::unordered_map& new_master_keys_map); static void FinishKeyRotation(); private: - std::string GetMasterKeyFromServer(const std::string& master_key_identifier); + SecureString GetMasterKeyFromServer(const std::string& master_key_identifier); // Different wrapping and unwrapping key maps to imitate versioning // and support key rotation. - static std::unordered_map unwrapping_master_key_map_; - static std::unordered_map wrapping_master_key_map_; + static std::unordered_map unwrapping_master_key_map_; + static std::unordered_map wrapping_master_key_map_; }; // This is a mock class, built for testing only. Don't use it as an example of @@ -75,7 +75,7 @@ class TestOnlyInMemoryKmsClientFactory : public KmsClientFactory { public: TestOnlyInMemoryKmsClientFactory( bool wrap_locally, - const std::unordered_map& master_keys_map) + const std::unordered_map& master_keys_map) : KmsClientFactory(wrap_locally) { TestOnlyLocalWrapInMemoryKms::InitializeMasterKeys(master_keys_map); TestOnlyInServerWrapKms::InitializeMasterKeys(master_keys_map); diff --git a/cpp/src/parquet/encryption/write_configurations_test.cc b/cpp/src/parquet/encryption/write_configurations_test.cc index f27da826948..3c77fcd58eb 100644 --- a/cpp/src/parquet/encryption/write_configurations_test.cc +++ b/cpp/src/parquet/encryption/write_configurations_test.cc @@ -76,9 +76,9 @@ class TestEncryptionConfiguration : public ::testing::Test { std::string path_to_double_field_ = kDoubleFieldName; std::string path_to_float_field_ = kFloatFieldName; std::string file_name_; - std::string kFooterEncryptionKey_ = std::string(kFooterEncryptionKey); - std::string kColumnEncryptionKey1_ = std::string(kColumnEncryptionKey1); - std::string kColumnEncryptionKey2_ = std::string(kColumnEncryptionKey2); + SecureString kFooterEncryptionKey_ = kFooterEncryptionKey; + SecureString kColumnEncryptionKey1_ = kColumnEncryptionKey1; + SecureString kColumnEncryptionKey2_ = kColumnEncryptionKey2; std::string kFileName_ = std::string(kFileName); void EncryptFile( diff --git a/cpp/src/parquet/file_reader.cc b/cpp/src/parquet/file_reader.cc index 54df6922a1e..a7f50162daf 100644 --- a/cpp/src/parquet/file_reader.cc +++ b/cpp/src/parquet/file_reader.cc @@ -307,13 +307,6 @@ class SerializedFile : public ParquetFileReader::Contents { PARQUET_ASSIGN_OR_THROW(source_size_, source_->GetSize()); } - ~SerializedFile() override { - try { - Close(); - } catch (...) { - } - } - void Close() override {} std::shared_ptr GetRowGroup(int i) override { diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index 9fbf8f17997..81b98f4bf7c 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -737,7 +737,7 @@ class FileMetaData::FileMetaDataImpl { encryption::kNonceLength); auto tag = reinterpret_cast(signature) + encryption::kNonceLength; - std::string key = file_decryptor_->GetFooterKey(); + encryption::SecureString key = file_decryptor_->GetFooterKey(); std::string aad = encryption::CreateFooterAad(file_decryptor_->file_aad()); auto aes_encryptor = encryption::AesEncryptor::Make(file_decryptor_->algorithm(), @@ -747,7 +747,7 @@ class FileMetaData::FileMetaDataImpl { std::shared_ptr encrypted_buffer = AllocateBuffer( file_decryptor_->pool(), aes_encryptor->CiphertextLength(serialized_len)); int32_t encrypted_len = aes_encryptor->SignedFooterEncrypt( - serialized_data_span, str2span(key), str2span(aad), nonce, + serialized_data_span, key.as_span(), str2span(aad), nonce, encrypted_buffer->mutable_span_as()); return 0 == memcmp(encrypted_buffer->data() + encrypted_len - encryption::kGcmTagLength, From 0456be418e4ec02bd4a5547bfcdf02a0444edb44 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Fri, 4 Apr 2025 15:19:40 +0200 Subject: [PATCH 02/44] Rename non-trivial SecureString methods --- cpp/src/parquet/encryption/secure_string.cc | 18 +++++++++--------- cpp/src/parquet/encryption/secure_string.h | 8 ++++---- .../parquet/encryption/secure_string_test.cc | 6 +++--- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/cpp/src/parquet/encryption/secure_string.cc b/cpp/src/parquet/encryption/secure_string.cc index 88baf5aa948..18085df3699 100644 --- a/cpp/src/parquet/encryption/secure_string.cc +++ b/cpp/src/parquet/encryption/secure_string.cc @@ -31,7 +31,7 @@ namespace parquet::encryption { SecureString::SecureString(SecureString&& secret) noexcept : secret_(std::move(secret.secret_)) {} SecureString::SecureString(std::string&& secret) noexcept : secret_(std::move(secret)) { - secure_clear(secret); + SecureClear(secret); } SecureString& SecureString::operator=(SecureString&& secret) noexcept { @@ -39,7 +39,7 @@ SecureString& SecureString::operator=(SecureString&& secret) noexcept { // self-assignment return *this; } - dispose(); + Dispose(); secret_ = std::move(secret.secret_); return *this; } @@ -48,14 +48,14 @@ SecureString& SecureString::operator=(const SecureString& secret) noexcept { // self-assignment return *this; } - dispose(); + Dispose(); secret_ = secret.secret_; return *this; } SecureString& SecureString::operator=(std::string&& secret) noexcept { - dispose(); + Dispose(); secret_ = std::move(secret); - secure_clear(secret); + SecureClear(secret); return *this; } @@ -73,12 +73,12 @@ std::size_t SecureString::length() const { return secret_.length(); } ::arrow::util::span SecureString::as_span() const { return str2span(secret_); } -void SecureString::dispose() { secure_clear(secret_); } -void SecureString::secure_clear(std::string& secret) { +void SecureString::Dispose() { SecureClear(secret_); } +void SecureString::SecureClear(std::string& secret) { secret.clear(); - secure_clear(reinterpret_cast(secret.data()), secret.capacity()); + SecureClear(reinterpret_cast(secret.data()), secret.capacity()); } -inline void SecureString::secure_clear(uint8_t* data, size_t size) { +inline void SecureString::SecureClear(uint8_t* data, size_t size) { // Heavily borrowed from libb2's `secure_zero_memory` at // https://github.com/BLAKE2/libb2/blob/master/src/blake2-impl.h #if defined(_WIN32) diff --git a/cpp/src/parquet/encryption/secure_string.h b/cpp/src/parquet/encryption/secure_string.h index 5ffa0160df0..83f22c9c648 100644 --- a/cpp/src/parquet/encryption/secure_string.h +++ b/cpp/src/parquet/encryption/secure_string.h @@ -48,17 +48,17 @@ class PARQUET_EXPORT SecureString { bool operator==(const SecureString&) const; bool operator!=(const SecureString&) const; - ~SecureString() { dispose(); } + ~SecureString() { Dispose(); } [[nodiscard]] bool empty() const; [[nodiscard]] std::size_t size() const; [[nodiscard]] std::size_t length() const; [[nodiscard]] ::arrow::util::span as_span() const; - void dispose(); + void Dispose(); - static void secure_clear(std::string&); - static void secure_clear(uint8_t* data, size_t size); + static void SecureClear(std::string&); + static void SecureClear(uint8_t* data, size_t size); private: std::string secret_; diff --git a/cpp/src/parquet/encryption/secure_string_test.cc b/cpp/src/parquet/encryption/secure_string_test.cc index 07039de7636..c8adf820371 100644 --- a/cpp/src/parquet/encryption/secure_string_test.cc +++ b/cpp/src/parquet/encryption/secure_string_test.cc @@ -37,7 +37,7 @@ TEST(TestSecureString, SecureClearString) { // short string { std::string tiny("abc"); - SecureString::secure_clear(tiny); + SecureString::SecureClear(tiny); assert_securely_cleared(tiny); } @@ -45,7 +45,7 @@ TEST(TestSecureString, SecureClearString) { { std::string large(1024, 'x'); large.resize(1024, 'y'); - SecureString::secure_clear(large); + SecureString::SecureClear(large); assert_securely_cleared(large); } @@ -55,7 +55,7 @@ TEST(TestSecureString, SecureClearString) { // we test that all those characters are securely cleared std::string empty("abcdef"); empty.resize(0); - SecureString::secure_clear(empty); + SecureString::SecureClear(empty); assert_securely_cleared(empty); } } From 689bc8d58e6920ce0c7f731edfe6ad94039844e8 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Fri, 4 Apr 2025 16:52:41 +0200 Subject: [PATCH 03/44] Add mutable as_span method, add as_view --- cpp/src/parquet/encryption/secure_string.cc | 10 +++++++- cpp/src/parquet/encryption/secure_string.h | 3 +++ .../parquet/encryption/secure_string_test.cc | 25 +++++++++++++++++++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/cpp/src/parquet/encryption/secure_string.cc b/cpp/src/parquet/encryption/secure_string.cc index 18085df3699..dd7c0196df7 100644 --- a/cpp/src/parquet/encryption/secure_string.cc +++ b/cpp/src/parquet/encryption/secure_string.cc @@ -70,9 +70,17 @@ bool SecureString::operator!=(const SecureString& other) const { bool SecureString::empty() const { return secret_.empty(); } std::size_t SecureString::size() const { return secret_.size(); } std::size_t SecureString::length() const { return secret_.length(); } + +::arrow::util::span SecureString::as_span() { + return {reinterpret_cast(secret_.data()), secret_.size()}; +} ::arrow::util::span SecureString::as_span() const { - return str2span(secret_); + return {reinterpret_cast(secret_.data()), secret_.size()}; } +std::string_view SecureString::as_view() const { + return {secret_.data(), secret_.size()}; +} + void SecureString::Dispose() { SecureClear(secret_); } void SecureString::SecureClear(std::string& secret) { secret.clear(); diff --git a/cpp/src/parquet/encryption/secure_string.h b/cpp/src/parquet/encryption/secure_string.h index 83f22c9c648..7500909a089 100644 --- a/cpp/src/parquet/encryption/secure_string.h +++ b/cpp/src/parquet/encryption/secure_string.h @@ -53,7 +53,10 @@ class PARQUET_EXPORT SecureString { [[nodiscard]] bool empty() const; [[nodiscard]] std::size_t size() const; [[nodiscard]] std::size_t length() const; + + [[nodiscard]] ::arrow::util::span as_span(); [[nodiscard]] ::arrow::util::span as_span() const; + [[nodiscard]] std::string_view as_view() const; void Dispose(); diff --git a/cpp/src/parquet/encryption/secure_string_test.cc b/cpp/src/parquet/encryption/secure_string_test.cc index c8adf820371..22b2b8b389e 100644 --- a/cpp/src/parquet/encryption/secure_string_test.cc +++ b/cpp/src/parquet/encryption/secure_string_test.cc @@ -103,4 +103,29 @@ TEST(TestSecureString, Assign) { ASSERT_EQ(secret_from_secret, secret_from_move_secret); } +TEST(TestSecureString, AsSpan) { + SecureString secret("hello world"); + const SecureString& const_secret(secret); + auto const_span = const_secret.as_span(); + auto mutual_span = secret.as_span(); + + std::string expected = "hello world"; + ::arrow::util::span expected_span = {reinterpret_cast(expected.data()), expected.size()}; + ASSERT_EQ(const_span, expected_span); + ASSERT_EQ(mutual_span, expected_span); + + // modify secret through mutual span + // the const span shares the same secret, so it is changed as well + mutual_span[0] = 'H'; + expected_span[0] = 'H'; + ASSERT_EQ(const_span, expected_span); + ASSERT_EQ(mutual_span, expected_span); +} + +TEST(TestSecureString, AsView) { + const SecureString secret = SecureString("hello world"); + const std::string_view view = secret.as_view(); + ASSERT_EQ(view, "hello world"); +} + } // namespace parquet::encryption::test From 42d7319707892a41c406b741fe67b108c34ed11a Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Fri, 4 Apr 2025 17:49:55 +0200 Subject: [PATCH 04/44] More tests --- .../parquet/encryption/secure_string_test.cc | 60 ++++++++++++++----- 1 file changed, 45 insertions(+), 15 deletions(-) diff --git a/cpp/src/parquet/encryption/secure_string_test.cc b/cpp/src/parquet/encryption/secure_string_test.cc index 22b2b8b389e..8578f522101 100644 --- a/cpp/src/parquet/encryption/secure_string_test.cc +++ b/cpp/src/parquet/encryption/secure_string_test.cc @@ -22,31 +22,38 @@ namespace parquet::encryption::test { -void assert_securely_cleared(const std::string& string) { - // the entire buffer of the string is filled with zeros - std::vector zeros(string.capacity()); - ::arrow::util::span actual(string.data(), string.capacity()); - ::arrow::util::span expected(zeros.data(), zeros.size()); - ASSERT_EQ(actual, expected); - - // the string is empty - ASSERT_TRUE(string.empty()); +std::string_view StringArea(const std::string& string) { + return {string.data(), string.capacity()}; +} + +void AssertSecurelyCleared(std::string_view area) { + // the entire area is filled with zeros + std::string zeros(area.size(), '\0'); + ASSERT_EQ(area, std::string_view(zeros)); +} + +void AssertSecurelyCleared(const std::string& string) { + AssertSecurelyCleared(StringArea(string)); } TEST(TestSecureString, SecureClearString) { // short string { std::string tiny("abc"); + auto old_area = StringArea(tiny); SecureString::SecureClear(tiny); - assert_securely_cleared(tiny); + AssertSecurelyCleared(tiny); + AssertSecurelyCleared(old_area); } // long string { std::string large(1024, 'x'); - large.resize(1024, 'y'); + large.resize(512, 'y'); + auto old_area = StringArea(large); SecureString::SecureClear(large); - assert_securely_cleared(large); + AssertSecurelyCleared(large); + AssertSecurelyCleared(old_area); } // empty string @@ -55,8 +62,10 @@ TEST(TestSecureString, SecureClearString) { // we test that all those characters are securely cleared std::string empty("abcdef"); empty.resize(0); + auto old_area = StringArea(empty); SecureString::SecureClear(empty); - assert_securely_cleared(empty); + AssertSecurelyCleared(empty); + AssertSecurelyCleared(old_area); } } @@ -64,7 +73,7 @@ TEST(TestSecureString, Construct) { // move constructing from a string securely clears that string std::string string("hello world"); SecureString secret_from_string(std::move(string)); - assert_securely_cleared(string); + AssertSecurelyCleared(string); ASSERT_FALSE(secret_from_string.empty()); // move constructing from a secure string securely clears that secure string @@ -85,7 +94,7 @@ TEST(TestSecureString, Assign) { std::string string("hello world"); SecureString secret_from_string; secret_from_string = std::move(string); - assert_securely_cleared(string); + AssertSecurelyCleared(string); ASSERT_FALSE(secret_from_string.empty()); // move assigning from a secure string securely clears that secure string @@ -103,6 +112,27 @@ TEST(TestSecureString, Assign) { ASSERT_EQ(secret_from_secret, secret_from_move_secret); } +TEST(TestSecureString, Compare) { + ASSERT_TRUE(SecureString("") == SecureString("")); + ASSERT_FALSE(SecureString("") != SecureString("")); + + ASSERT_TRUE(SecureString("hello world") == SecureString("hello world")); + ASSERT_FALSE(SecureString("hello world") != SecureString("hello world")); + + ASSERT_FALSE(SecureString("hello world") == SecureString("hello worlds")); + ASSERT_TRUE(SecureString("hello world") != SecureString("hello worlds")); +} + +TEST(TestSecureString, Cardinality) { + ASSERT_TRUE(SecureString("").empty()); + ASSERT_EQ(SecureString("").size(), 0); + ASSERT_EQ(SecureString("").length(), 0); + + ASSERT_FALSE(SecureString("hello world").empty()); + ASSERT_EQ(SecureString("hello world").size(), 11); + ASSERT_EQ(SecureString("hello world").length(), 11); +} + TEST(TestSecureString, AsSpan) { SecureString secret("hello world"); const SecureString& const_secret(secret); From 01ab19085fe224bcb9a966a26a5243040be4ed5f Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Fri, 4 Apr 2025 17:52:15 +0200 Subject: [PATCH 05/44] Change SecureClear(std::string&) to SecureClear(std::string*) --- cpp/src/parquet/encryption/secure_string.cc | 12 ++++++------ cpp/src/parquet/encryption/secure_string.h | 2 +- cpp/src/parquet/encryption/secure_string_test.cc | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cpp/src/parquet/encryption/secure_string.cc b/cpp/src/parquet/encryption/secure_string.cc index dd7c0196df7..e93ea503cc9 100644 --- a/cpp/src/parquet/encryption/secure_string.cc +++ b/cpp/src/parquet/encryption/secure_string.cc @@ -31,7 +31,7 @@ namespace parquet::encryption { SecureString::SecureString(SecureString&& secret) noexcept : secret_(std::move(secret.secret_)) {} SecureString::SecureString(std::string&& secret) noexcept : secret_(std::move(secret)) { - SecureClear(secret); + SecureClear(&secret); } SecureString& SecureString::operator=(SecureString&& secret) noexcept { @@ -55,7 +55,7 @@ SecureString& SecureString::operator=(const SecureString& secret) noexcept { SecureString& SecureString::operator=(std::string&& secret) noexcept { Dispose(); secret_ = std::move(secret); - SecureClear(secret); + SecureClear(&secret); return *this; } @@ -81,10 +81,10 @@ std::string_view SecureString::as_view() const { return {secret_.data(), secret_.size()}; } -void SecureString::Dispose() { SecureClear(secret_); } -void SecureString::SecureClear(std::string& secret) { - secret.clear(); - SecureClear(reinterpret_cast(secret.data()), secret.capacity()); +void SecureString::Dispose() { SecureClear(&secret_); } +void SecureString::SecureClear(std::string* secret) { + secret->clear(); + SecureClear(reinterpret_cast(secret->data()), secret->capacity()); } inline void SecureString::SecureClear(uint8_t* data, size_t size) { // Heavily borrowed from libb2's `secure_zero_memory` at diff --git a/cpp/src/parquet/encryption/secure_string.h b/cpp/src/parquet/encryption/secure_string.h index 7500909a089..1c5043fae53 100644 --- a/cpp/src/parquet/encryption/secure_string.h +++ b/cpp/src/parquet/encryption/secure_string.h @@ -60,7 +60,7 @@ class PARQUET_EXPORT SecureString { void Dispose(); - static void SecureClear(std::string&); + static void SecureClear(std::string*); static void SecureClear(uint8_t* data, size_t size); private: diff --git a/cpp/src/parquet/encryption/secure_string_test.cc b/cpp/src/parquet/encryption/secure_string_test.cc index 8578f522101..46752b772c9 100644 --- a/cpp/src/parquet/encryption/secure_string_test.cc +++ b/cpp/src/parquet/encryption/secure_string_test.cc @@ -41,7 +41,7 @@ TEST(TestSecureString, SecureClearString) { { std::string tiny("abc"); auto old_area = StringArea(tiny); - SecureString::SecureClear(tiny); + SecureString::SecureClear(&tiny); AssertSecurelyCleared(tiny); AssertSecurelyCleared(old_area); } @@ -51,7 +51,7 @@ TEST(TestSecureString, SecureClearString) { std::string large(1024, 'x'); large.resize(512, 'y'); auto old_area = StringArea(large); - SecureString::SecureClear(large); + SecureString::SecureClear(&large); AssertSecurelyCleared(large); AssertSecurelyCleared(old_area); } @@ -63,7 +63,7 @@ TEST(TestSecureString, SecureClearString) { std::string empty("abcdef"); empty.resize(0); auto old_area = StringArea(empty); - SecureString::SecureClear(empty); + SecureString::SecureClear(&empty); AssertSecurelyCleared(empty); AssertSecurelyCleared(old_area); } From f3f4087f0254c4682168f7ec73b48e202c166fe0 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Mon, 7 Apr 2025 14:38:50 +0200 Subject: [PATCH 06/44] Decrypt key directly intoSecureString --- cpp/src/parquet/encryption/crypto_factory.cc | 18 ++++++++---------- .../parquet/encryption/key_toolkit_internal.cc | 8 +++----- cpp/src/parquet/encryption/secure_string.cc | 1 + cpp/src/parquet/encryption/secure_string.h | 1 + 4 files changed, 13 insertions(+), 15 deletions(-) diff --git a/cpp/src/parquet/encryption/crypto_factory.cc b/cpp/src/parquet/encryption/crypto_factory.cc index 4b8a6a51f36..d318d8c899f 100644 --- a/cpp/src/parquet/encryption/crypto_factory.cc +++ b/cpp/src/parquet/encryption/crypto_factory.cc @@ -71,15 +71,14 @@ std::shared_ptr CryptoFactory::GetFileEncryptionProper int dek_length = dek_length_bits / 8; - std::string footer_key(dek_length, '\0'); - RandBytes(reinterpret_cast(footer_key.data()), footer_key.size()); - SecureString secure_footer_key(std::move(footer_key)); + SecureString footer_key(dek_length, '\0'); + RandBytes(footer_key.as_span().data(), footer_key.size()); std::string footer_key_metadata = - key_wrapper.GetEncryptionKeyMetadata(secure_footer_key, footer_key_id, true); + key_wrapper.GetEncryptionKeyMetadata(footer_key, footer_key_id, true); FileEncryptionProperties::Builder properties_builder = - FileEncryptionProperties::Builder(secure_footer_key); + FileEncryptionProperties::Builder(footer_key); properties_builder.footer_key_metadata(footer_key_metadata); properties_builder.algorithm(encryption_config.encryption_algorithm); @@ -147,16 +146,15 @@ ColumnPathToEncryptionPropertiesMap CryptoFactory::GetColumnEncryptionProperties column_name); } - std::string column_key(dek_length, '\0'); - RandBytes(reinterpret_cast(column_key.data()), column_key.size()); - SecureString secure_column_key(std::move(column_key)); + SecureString column_key(dek_length, '\0'); + RandBytes(column_key.as_span().data(), column_key.size()); std::string column_key_key_metadata = - key_wrapper->GetEncryptionKeyMetadata(secure_column_key, column_key_id, false); + key_wrapper->GetEncryptionKeyMetadata(column_key, column_key_id, false); std::shared_ptr cmd = ColumnEncryptionProperties::Builder(column_name) - .key(secure_column_key) + .key(column_key) ->key_metadata(column_key_key_metadata) ->build(); encrypted_columns.insert({column_name, cmd}); diff --git a/cpp/src/parquet/encryption/key_toolkit_internal.cc b/cpp/src/parquet/encryption/key_toolkit_internal.cc index 53d450b6561..a1f4004c5ba 100644 --- a/cpp/src/parquet/encryption/key_toolkit_internal.cc +++ b/cpp/src/parquet/encryption/key_toolkit_internal.cc @@ -55,14 +55,12 @@ SecureString DecryptKeyLocally(const std::string& encoded_encrypted_key, int32_t decrypted_key_len = key_decryptor.PlaintextLength(static_cast(encrypted_key.size())); - std::string decrypted_key(decrypted_key_len, '\0'); - ::arrow::util::span decrypted_key_span( - reinterpret_cast(&decrypted_key[0]), decrypted_key_len); + SecureString decrypted_key(decrypted_key_len, '\0'); decrypted_key_len = key_decryptor.Decrypt(str2span(encrypted_key), master_key.as_span(), - str2span(aad), decrypted_key_span); + str2span(aad), decrypted_key.as_span()); - return SecureString(std::move(decrypted_key)); + return decrypted_key; } bool ValidateKeyLength(int32_t key_length_bits) { diff --git a/cpp/src/parquet/encryption/secure_string.cc b/cpp/src/parquet/encryption/secure_string.cc index e93ea503cc9..44eb1923d0a 100644 --- a/cpp/src/parquet/encryption/secure_string.cc +++ b/cpp/src/parquet/encryption/secure_string.cc @@ -33,6 +33,7 @@ SecureString::SecureString(SecureString&& secret) noexcept SecureString::SecureString(std::string&& secret) noexcept : secret_(std::move(secret)) { SecureClear(&secret); } +SecureString::SecureString(size_t n, char c) noexcept : secret_(n, c) {} SecureString& SecureString::operator=(SecureString&& secret) noexcept { if (this == &secret) { diff --git a/cpp/src/parquet/encryption/secure_string.h b/cpp/src/parquet/encryption/secure_string.h index 1c5043fae53..e136977e9b2 100644 --- a/cpp/src/parquet/encryption/secure_string.h +++ b/cpp/src/parquet/encryption/secure_string.h @@ -40,6 +40,7 @@ class PARQUET_EXPORT SecureString { SecureString(SecureString&&) noexcept; SecureString(const SecureString&) noexcept = default; explicit SecureString(std::string&&) noexcept; + explicit SecureString(size_t, char) noexcept; SecureString& operator=(SecureString&&) noexcept; SecureString& operator=(const SecureString&) noexcept; From f90f196ac308526540301ed6f90fd5904adda1a2 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Mon, 7 Apr 2025 14:45:45 +0200 Subject: [PATCH 07/44] Remove const from KeyWithMasterId members --- cpp/src/parquet/encryption/key_toolkit.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/parquet/encryption/key_toolkit.h b/cpp/src/parquet/encryption/key_toolkit.h index 08c46f499c6..be6991b7650 100644 --- a/cpp/src/parquet/encryption/key_toolkit.h +++ b/cpp/src/parquet/encryption/key_toolkit.h @@ -99,8 +99,8 @@ class PARQUET_EXPORT KeyWithMasterId { const std::string& master_id() const { return master_id_; } private: - const SecureString key_bytes_; - const std::string master_id_; + SecureString key_bytes_; + std::string master_id_; }; } // namespace parquet::encryption From 0e74be5890331426a924e1682419f90053b48f58 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Thu, 10 Apr 2025 20:02:47 +0200 Subject: [PATCH 08/44] Replace call-by-ref with call-by-value when copied --- cpp/src/parquet/encryption/encryption.cc | 147 +++++++++--------- cpp/src/parquet/encryption/encryption.h | 70 ++++----- .../parquet/encryption/file_key_unwrapper.cc | 4 +- .../parquet/encryption/file_key_wrapper.cc | 9 +- .../encryption/internal_file_decryptor.cc | 67 ++++---- .../encryption/internal_file_decryptor.h | 11 +- .../encryption/internal_file_encryptor.cc | 23 ++- .../encryption/internal_file_encryptor.h | 5 +- .../encryption/local_wrap_kms_client.cc | 9 +- .../encryption/local_wrap_kms_client.h | 4 +- .../parquet/encryption/secure_string_test.cc | 3 +- .../parquet/encryption/test_in_memory_kms.cc | 2 +- .../parquet/encryption/test_in_memory_kms.h | 3 +- cpp/src/parquet/metadata.cc | 4 +- 14 files changed, 182 insertions(+), 179 deletions(-) diff --git a/cpp/src/parquet/encryption/encryption.cc b/cpp/src/parquet/encryption/encryption.cc index 206535d7ec0..11c5a391509 100644 --- a/cpp/src/parquet/encryption/encryption.cc +++ b/cpp/src/parquet/encryption/encryption.cc @@ -29,8 +29,8 @@ namespace parquet { // integer key retriever -void IntegerKeyIdRetriever::PutKey(uint32_t key_id, const encryption::SecureString& key) { - key_map_.insert({key_id, key}); +void IntegerKeyIdRetriever::PutKey(uint32_t key_id, encryption::SecureString key) { + key_map_.insert({key_id, std::move(key)}); } encryption::SecureString IntegerKeyIdRetriever::GetKey(const std::string& key_metadata) { @@ -41,9 +41,8 @@ encryption::SecureString IntegerKeyIdRetriever::GetKey(const std::string& key_me } // string key retriever -void StringKeyIdRetriever::PutKey(const std::string& key_id, - const encryption::SecureString& key) { - key_map_.insert({key_id, key}); +void StringKeyIdRetriever::PutKey(std::string key_id, encryption::SecureString key) { + key_map_.insert({std::move(key_id), std::move(key)}); } encryption::SecureString StringKeyIdRetriever::GetKey(const std::string& key_id) { @@ -55,7 +54,7 @@ ColumnEncryptionProperties::Builder* ColumnEncryptionProperties::Builder::key( if (column_key.empty()) return this; DCHECK(key_.empty()); - key_ = column_key; + key_ = std::move(column_key); return this; } @@ -93,12 +92,12 @@ FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::column_key } FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::footer_key( - const encryption::SecureString footer_key) { + encryption::SecureString footer_key) { if (footer_key.empty()) { return this; } DCHECK(footer_key_.empty()); - footer_key_ = footer_key; + footer_key_ = std::move(footer_key); return this; } @@ -112,12 +111,12 @@ FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::key_retrie } FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::aad_prefix( - const std::string& aad_prefix) { + std::string aad_prefix) { if (aad_prefix.empty()) { return this; } DCHECK(aad_prefix_.empty()); - aad_prefix_ = aad_prefix; + aad_prefix_ = std::move(aad_prefix); return this; } @@ -131,11 +130,11 @@ FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::aad_prefix } ColumnDecryptionProperties::Builder* ColumnDecryptionProperties::Builder::key( - const encryption::SecureString& key) { + encryption::SecureString key) { if (key.empty()) return this; - DCHECK(!key.empty()); - key_ = key; + DCHECK(key_.empty()); + key_ = std::move(key); return this; } @@ -145,11 +144,11 @@ std::shared_ptr ColumnDecryptionProperties::Builder: } FileEncryptionProperties::Builder* FileEncryptionProperties::Builder::footer_key_metadata( - const std::string& footer_key_metadata) { + std::string footer_key_metadata) { if (footer_key_metadata.empty()) return this; DCHECK(footer_key_metadata_.empty()); - footer_key_metadata_ = footer_key_metadata; + footer_key_metadata_ = std::move(footer_key_metadata); return this; } @@ -165,11 +164,11 @@ FileEncryptionProperties::Builder* FileEncryptionProperties::Builder::encrypted_ } FileEncryptionProperties::Builder* FileEncryptionProperties::Builder::aad_prefix( - const std::string& aad_prefix) { + std::string aad_prefix) { if (aad_prefix.empty()) return this; DCHECK(aad_prefix_.empty()); - aad_prefix_ = aad_prefix; + aad_prefix_ = std::move(aad_prefix); store_aad_prefix_in_file_ = true; return this; } @@ -182,42 +181,38 @@ FileEncryptionProperties::Builder::disable_aad_prefix_storage() { return this; } -ColumnEncryptionProperties::ColumnEncryptionProperties( - bool encrypted, const std::string& column_path, const encryption::SecureString& key, - const std::string& key_metadata) - : column_path_(column_path) { - DCHECK(!column_path.empty()); +ColumnEncryptionProperties::ColumnEncryptionProperties(bool encrypted, + std::string column_path, + encryption::SecureString key, + std::string key_metadata) + : column_path_(std::move(column_path)), + encrypted_(encrypted), + encrypted_with_footer_key_(encrypted && key.empty()), + key_(std::move(key)), + key_metadata_(std::move(key_metadata)) { + DCHECK(!column_path_.empty()); if (!encrypted) { DCHECK(key.empty() && key_metadata.empty()); } - - if (!key.empty()) { - DCHECK(key.length() == 16 || key.length() == 24 || key.length() == 32); + if (!key_.empty()) { + DCHECK(key_.length() == 16 || key_.length() == 24 || key_.length() == 32); } - - encrypted_with_footer_key_ = (encrypted && key.empty()); if (encrypted_with_footer_key_) { - DCHECK(key_metadata.empty()); + DCHECK(key_metadata_.empty()); } - - encrypted_ = encrypted; - key_metadata_ = key_metadata; - key_ = key; } -ColumnDecryptionProperties::ColumnDecryptionProperties( - const std::string& column_path, const encryption::SecureString& key) - : column_path_(column_path) { - DCHECK(!column_path.empty()); +ColumnDecryptionProperties::ColumnDecryptionProperties(std::string column_path, + encryption::SecureString key) + : column_path_(std::move(column_path)), key_(std::move(key)) { + DCHECK(!column_path_.empty()); - if (!key.empty()) { - DCHECK(key.length() == 16 || key.length() == 24 || key.length() == 32); + if (!key_.empty()) { + DCHECK(key_.length() == 16 || key_.length() == 24 || key_.length() == 32); } - - key_ = key; } -encryption::SecureString FileDecryptionProperties::column_key( +const encryption::SecureString& FileDecryptionProperties::column_key( const std::string& column_path) const { if (column_decryption_properties_.find(column_path) != column_decryption_properties_.end()) { @@ -230,33 +225,32 @@ encryption::SecureString FileDecryptionProperties::column_key( } FileDecryptionProperties::FileDecryptionProperties( - const encryption::SecureString& footer_key, + encryption::SecureString footer_key, std::shared_ptr key_retriever, - bool check_plaintext_footer_integrity, const std::string& aad_prefix, + bool check_plaintext_footer_integrity, std::string aad_prefix, std::shared_ptr aad_prefix_verifier, - const ColumnPathToDecryptionPropertiesMap& column_decryption_properties, - bool plaintext_files_allowed) { - DCHECK(!footer_key.empty() || nullptr != key_retriever || - 0 != column_decryption_properties.size()); - - if (!footer_key.empty()) { - DCHECK(footer_key.length() == 16 || footer_key.length() == 24 || - footer_key.length() == 32); + ColumnPathToDecryptionPropertiesMap column_decryption_properties, + bool plaintext_files_allowed) + : footer_key_(std::move(footer_key)), + aad_prefix_(std::move(aad_prefix)), + aad_prefix_verifier_(std::move(aad_prefix_verifier)), + column_decryption_properties_(std::move(column_decryption_properties)), + key_retriever_(std::move(key_retriever)), + check_plaintext_footer_integrity_(check_plaintext_footer_integrity), + plaintext_files_allowed_(plaintext_files_allowed) { + DCHECK(!footer_key_.empty() || nullptr != key_retriever_ || + 0 != column_decryption_properties_.size()); + if (!footer_key_.empty()) { + DCHECK(footer_key_.length() == 16 || footer_key_.length() == 24 || + footer_key_.length() == 32); } - if (footer_key.empty() && check_plaintext_footer_integrity) { - DCHECK(nullptr != key_retriever); + if (footer_key_.empty() && check_plaintext_footer_integrity) { + DCHECK(nullptr != key_retriever_); } - aad_prefix_verifier_ = std::move(aad_prefix_verifier); - footer_key_ = footer_key; - check_plaintext_footer_integrity_ = check_plaintext_footer_integrity; - key_retriever_ = std::move(key_retriever); - aad_prefix_ = aad_prefix; - column_decryption_properties_ = column_decryption_properties; - plaintext_files_allowed_ = plaintext_files_allowed; } FileEncryptionProperties::Builder* FileEncryptionProperties::Builder::footer_key_id( - const std::string& key_id) { + std::string key_id) { // key_id is expected to be in UTF8 encoding ::arrow::util::InitializeUTF8(); const uint8_t* data = reinterpret_cast(key_id.c_str()); @@ -268,7 +262,7 @@ FileEncryptionProperties::Builder* FileEncryptionProperties::Builder::footer_key return this; } - return footer_key_metadata(key_id); + return footer_key_metadata(std::move(key_id)); } std::shared_ptr @@ -285,20 +279,19 @@ FileEncryptionProperties::column_encryption_properties(const std::string& column } FileEncryptionProperties::FileEncryptionProperties( - ParquetCipher::type cipher, const encryption::SecureString& footer_key, - const std::string& footer_key_metadata, bool encrypted_footer, - const std::string& aad_prefix, bool store_aad_prefix_in_file, - const ColumnPathToEncryptionPropertiesMap& encrypted_columns) - : footer_key_(footer_key), - footer_key_metadata_(footer_key_metadata), + ParquetCipher::type cipher, encryption::SecureString footer_key, + std::string footer_key_metadata, bool encrypted_footer, std::string aad_prefix, + bool store_aad_prefix_in_file, ColumnPathToEncryptionPropertiesMap encrypted_columns) + : footer_key_(std::move(footer_key)), + footer_key_metadata_(std::move(footer_key_metadata)), encrypted_footer_(encrypted_footer), - aad_prefix_(aad_prefix), + aad_prefix_(std::move(aad_prefix)), store_aad_prefix_in_file_(store_aad_prefix_in_file), - encrypted_columns_(encrypted_columns) { - DCHECK(!footer_key.empty()); + encrypted_columns_(std::move(encrypted_columns)) { + DCHECK(!footer_key_.empty()); // footer_key must be either 16, 24 or 32 bytes. - DCHECK(footer_key.length() == 16 || footer_key.length() == 24 || - footer_key.length() == 32); + DCHECK(footer_key_.length() == 16 || footer_key_.length() == 24 || + footer_key_.length() == 32); uint8_t aad_file_unique[kAadFileUniqueLength]; encryption::RandBytes(aad_file_unique, kAadFileUniqueLength); @@ -306,17 +299,17 @@ FileEncryptionProperties::FileEncryptionProperties( kAadFileUniqueLength); bool supply_aad_prefix = false; - if (aad_prefix.empty()) { + if (aad_prefix_.empty()) { file_aad_ = aad_file_unique_str; } else { - file_aad_ = aad_prefix + aad_file_unique_str; + file_aad_ = aad_prefix_ + aad_file_unique_str; if (!store_aad_prefix_in_file_) supply_aad_prefix = true; } algorithm_.algorithm = cipher; algorithm_.aad.aad_file_unique = aad_file_unique_str; algorithm_.aad.supply_aad_prefix = supply_aad_prefix; - if (!aad_prefix.empty() && store_aad_prefix_in_file_) { - algorithm_.aad.aad_prefix = aad_prefix; + if (!aad_prefix_.empty() && store_aad_prefix_in_file_) { + algorithm_.aad.aad_prefix = aad_prefix_; } } diff --git a/cpp/src/parquet/encryption/encryption.h b/cpp/src/parquet/encryption/encryption.h index 060499770b0..22ce1e7798a 100644 --- a/cpp/src/parquet/encryption/encryption.h +++ b/cpp/src/parquet/encryption/encryption.h @@ -54,7 +54,7 @@ class PARQUET_EXPORT DecryptionKeyRetriever { /// Simple integer key retriever class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever { public: - void PutKey(uint32_t key_id, const encryption::SecureString& key); + void PutKey(uint32_t key_id, encryption::SecureString key); encryption::SecureString GetKey(const std::string& key_metadata) override; private: @@ -64,7 +64,7 @@ class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever { // Simple string key retriever class PARQUET_EXPORT StringKeyIdRetriever : public DecryptionKeyRetriever { public: - void PutKey(const std::string& key_id, const encryption::SecureString& key); + void PutKey(std::string key_id, encryption::SecureString key); encryption::SecureString GetKey(const std::string& key_metadata) override; private: @@ -141,11 +141,11 @@ class PARQUET_EXPORT ColumnEncryptionProperties { : column_path_(path), encrypted_(encrypted) {} }; - std::string column_path() const { return column_path_; } + const std::string& column_path() const { return column_path_; } bool is_encrypted() const { return encrypted_; } bool is_encrypted_with_footer_key() const { return encrypted_with_footer_key_; } - encryption::SecureString key() const { return key_; } - std::string key_metadata() const { return key_metadata_; } + const encryption::SecureString& key() const { return key_; } + const std::string& key_metadata() const { return key_metadata_; } ColumnEncryptionProperties() = default; ColumnEncryptionProperties(const ColumnEncryptionProperties& other) = default; @@ -157,9 +157,9 @@ class PARQUET_EXPORT ColumnEncryptionProperties { bool encrypted_with_footer_key_; encryption::SecureString key_; std::string key_metadata_; - explicit ColumnEncryptionProperties(bool encrypted, const std::string& column_path, - const encryption::SecureString& key, - const std::string& key_metadata); + explicit ColumnEncryptionProperties(bool encrypted, std::string column_path, + encryption::SecureString key, + std::string key_metadata); }; class PARQUET_EXPORT ColumnDecryptionProperties { @@ -175,7 +175,7 @@ class PARQUET_EXPORT ColumnDecryptionProperties { /// key metadata for this column the metadata will be ignored, /// the column will be decrypted with this key. /// key length must be either 16, 24 or 32 bytes. - Builder* key(const encryption::SecureString& key); + Builder* key(encryption::SecureString key); std::shared_ptr build(); @@ -188,8 +188,8 @@ class PARQUET_EXPORT ColumnDecryptionProperties { ColumnDecryptionProperties(const ColumnDecryptionProperties& other) = default; ColumnDecryptionProperties(ColumnDecryptionProperties&& other) = default; - std::string column_path() const { return column_path_; } - encryption::SecureString key() const { return key_; } + const std::string& column_path() const { return column_path_; } + const encryption::SecureString& key() const { return key_; } private: const std::string column_path_; @@ -198,8 +198,8 @@ class PARQUET_EXPORT ColumnDecryptionProperties { /// This class is only required for setting explicit column decryption keys - /// to override key retriever (or to provide keys when key metadata and/or /// key retriever are not available) - explicit ColumnDecryptionProperties(const std::string& column_path, - const encryption::SecureString& key); + explicit ColumnDecryptionProperties(std::string column_path, + encryption::SecureString key); }; class PARQUET_EXPORT AADPrefixVerifier { @@ -234,7 +234,7 @@ class PARQUET_EXPORT FileDecryptionProperties { /// will be wiped out (array values set to 0). /// Caller is responsible for wiping out the input key array. /// param footerKey Key length must be either 16, 24 or 32 bytes. - Builder* footer_key(const encryption::SecureString footer_key); + Builder* footer_key(encryption::SecureString footer_key); /// Set explicit column keys (decryption properties). /// Its also possible to set a key retriever on this property object. @@ -268,7 +268,7 @@ class PARQUET_EXPORT FileDecryptionProperties { /// A must when a prefix is used for file encryption, but not stored in file. /// If AAD prefix is stored in file, it will be compared to the explicitly /// supplied value and an exception will be thrown if they differ. - Builder* aad_prefix(const std::string& aad_prefix); + Builder* aad_prefix(std::string aad_prefix); /// Set callback for verification of AAD Prefixes stored in file. Builder* aad_prefix_verifier(std::shared_ptr aad_prefix_verifier); @@ -301,11 +301,11 @@ class PARQUET_EXPORT FileDecryptionProperties { bool plaintext_files_allowed_; }; - encryption::SecureString column_key(const std::string& column_path) const; + const encryption::SecureString& column_key(const std::string& column_path) const; - encryption::SecureString footer_key() const { return footer_key_; } + const encryption::SecureString& footer_key() const { return footer_key_; } - std::string aad_prefix() const { return aad_prefix_; } + const std::string& aad_prefix() const { return aad_prefix_; } const std::shared_ptr& key_retriever() const { return key_retriever_; @@ -334,11 +334,11 @@ class PARQUET_EXPORT FileDecryptionProperties { bool plaintext_files_allowed_; FileDecryptionProperties( - const encryption::SecureString& footer_key, + encryption::SecureString footer_key, std::shared_ptr key_retriever, - bool check_plaintext_footer_integrity, const std::string& aad_prefix, + bool check_plaintext_footer_integrity, std::string aad_prefix, std::shared_ptr aad_prefix_verifier, - const ColumnPathToDecryptionPropertiesMap& column_decryption_properties, + ColumnPathToDecryptionPropertiesMap column_decryption_properties, bool plaintext_files_allowed); }; @@ -346,10 +346,10 @@ class PARQUET_EXPORT FileEncryptionProperties { public: class PARQUET_EXPORT Builder { public: - explicit Builder(const encryption::SecureString& footer_key) + explicit Builder(encryption::SecureString footer_key) : parquet_cipher_(kDefaultEncryptionAlgorithm), - encrypted_footer_(kDefaultEncryptedFooter) { - footer_key_ = footer_key; + encrypted_footer_(kDefaultEncryptedFooter), + footer_key_(std::move(footer_key)) { store_aad_prefix_in_file_ = false; } @@ -369,14 +369,14 @@ class PARQUET_EXPORT FileEncryptionProperties { /// Set a key retrieval metadata (converted from String). /// use either footer_key_metadata or footer_key_id, not both. - Builder* footer_key_id(const std::string& key_id); + Builder* footer_key_id(std::string key_id); /// Set a key retrieval metadata. /// use either footer_key_metadata or footer_key_id, not both. - Builder* footer_key_metadata(const std::string& footer_key_metadata); + Builder* footer_key_metadata(std::string footer_key_metadata); /// Set the file AAD Prefix. - Builder* aad_prefix(const std::string& aad_prefix); + Builder* aad_prefix(std::string aad_prefix); /// Skip storing AAD Prefix in file. /// If not called, and if AAD Prefix is set, it will be stored. @@ -409,16 +409,16 @@ class PARQUET_EXPORT FileEncryptionProperties { EncryptionAlgorithm algorithm() const { return algorithm_; } - encryption::SecureString footer_key() const { return footer_key_; } + const encryption::SecureString& footer_key() const { return footer_key_; } - std::string footer_key_metadata() const { return footer_key_metadata_; } + const std::string& footer_key_metadata() const { return footer_key_metadata_; } - std::string file_aad() const { return file_aad_; } + const std::string& file_aad() const { return file_aad_; } std::shared_ptr column_encryption_properties( const std::string& column_path); - ColumnPathToEncryptionPropertiesMap encrypted_columns() const { + const ColumnPathToEncryptionPropertiesMap& encrypted_columns() const { return encrypted_columns_; } @@ -433,10 +433,10 @@ class PARQUET_EXPORT FileEncryptionProperties { ColumnPathToEncryptionPropertiesMap encrypted_columns_; FileEncryptionProperties(ParquetCipher::type cipher, - const encryption::SecureString& footer_key, - const std::string& footer_key_metadata, bool encrypted_footer, - const std::string& aad_prefix, bool store_aad_prefix_in_file, - const ColumnPathToEncryptionPropertiesMap& encrypted_columns); + encryption::SecureString footer_key, + std::string footer_key_metadata, bool encrypted_footer, + std::string aad_prefix, bool store_aad_prefix_in_file, + ColumnPathToEncryptionPropertiesMap encrypted_columns); }; } // namespace parquet diff --git a/cpp/src/parquet/encryption/file_key_unwrapper.cc b/cpp/src/parquet/encryption/file_key_unwrapper.cc index 99568c4814a..25862f150e5 100644 --- a/cpp/src/parquet/encryption/file_key_unwrapper.cc +++ b/cpp/src/parquet/encryption/file_key_unwrapper.cc @@ -67,7 +67,7 @@ FileKeyUnwrapper::FileKeyUnwrapper( kms_connection_config.key_access_token(), cache_entry_lifetime_seconds_); } -encryption::SecureString FileKeyUnwrapper::GetKey(const std::string& key_metadata_bytes) { +SecureString FileKeyUnwrapper::GetKey(const std::string& key_metadata_bytes) { // key_metadata is expected to be in UTF8 encoding ::arrow::util::InitializeUTF8(); if (!::arrow::util::ValidateUTF8( @@ -114,7 +114,7 @@ KeyWithMasterId FileKeyUnwrapper::GetDataEncryptionKey(const KeyMaterial& key_ma const std::string& encoded_kek_id = key_material.kek_id(); const std::string& encoded_wrapped_kek = key_material.wrapped_kek(); - SecureString kek_bytes = kek_per_kek_id_->GetOrInsert( + const SecureString kek_bytes = kek_per_kek_id_->GetOrInsert( encoded_kek_id, [kms_client, encoded_wrapped_kek, master_key_id]() { return kms_client->UnwrapKey(encoded_wrapped_kek, master_key_id); }); diff --git a/cpp/src/parquet/encryption/file_key_wrapper.cc b/cpp/src/parquet/encryption/file_key_wrapper.cc index 97af661e442..affa68e9edd 100644 --- a/cpp/src/parquet/encryption/file_key_wrapper.cc +++ b/cpp/src/parquet/encryption/file_key_wrapper.cc @@ -111,17 +111,16 @@ std::string FileKeyWrapper::GetEncryptionKeyMetadata(const SecureString& data_ke KeyEncryptionKey FileKeyWrapper::CreateKeyEncryptionKey( const std::string& master_key_id) { - std::string kek_bytes(kKeyEncryptionKeyLength, '\0'); - RandBytes(reinterpret_cast(kek_bytes.data()), kKeyEncryptionKeyLength); - SecureString secure_kek_bytes(std::move(kek_bytes)); + SecureString kek_bytes(kKeyEncryptionKeyLength, '\0'); + RandBytes(kek_bytes.as_span().data(), kKeyEncryptionKeyLength); std::string kek_id(kKeyEncryptionKeyIdLength, '\0'); RandBytes(reinterpret_cast(kek_id.data()), kKeyEncryptionKeyIdLength); // Encrypt KEK with Master key - std::string encoded_wrapped_kek = kms_client_->WrapKey(secure_kek_bytes, master_key_id); + std::string encoded_wrapped_kek = kms_client_->WrapKey(kek_bytes, master_key_id); - return KeyEncryptionKey(std::move(secure_kek_bytes), std::move(kek_id), + return KeyEncryptionKey(std::move(kek_bytes), std::move(kek_id), std::move(encoded_wrapped_kek)); } diff --git a/cpp/src/parquet/encryption/internal_file_decryptor.cc b/cpp/src/parquet/encryption/internal_file_decryptor.cc index 5decaec3554..568c2728cdf 100644 --- a/cpp/src/parquet/encryption/internal_file_decryptor.cc +++ b/cpp/src/parquet/encryption/internal_file_decryptor.cc @@ -26,12 +26,12 @@ namespace parquet { // Decryptor Decryptor::Decryptor(std::unique_ptr aes_decryptor, - const encryption::SecureString& key, const std::string& file_aad, - const std::string& aad, ::arrow::MemoryPool* pool) + encryption::SecureString key, std::string file_aad, std::string aad, + ::arrow::MemoryPool* pool) : aes_decryptor_(std::move(aes_decryptor)), - key_(key), - file_aad_(file_aad), - aad_(aad), + key_(std::move(key)), + file_aad_(std::move(file_aad)), + aad_(std::move(aad)), pool_(pool) {} Decryptor::~Decryptor() = default; @@ -60,36 +60,35 @@ InternalFileDecryptor::InternalFileDecryptor( footer_key_metadata_(footer_key_metadata), pool_(pool) {} -encryption::SecureString InternalFileDecryptor::GetFooterKey() { +const encryption::SecureString& InternalFileDecryptor::GetFooterKey() { std::unique_lock lock(mutex_); if (!footer_key_.empty()) { return footer_key_; } - encryption::SecureString footer_key = properties_->footer_key(); + // cache footer key to avoid repeated retrieval of key from the key_retriever + footer_key_ = properties_->footer_key(); // ignore footer key metadata if footer key is explicitly set via API - if (footer_key.empty()) { + if (footer_key_.empty()) { if (footer_key_metadata_.empty()) throw ParquetException("No footer key or key metadata"); if (properties_->key_retriever() == nullptr) throw ParquetException("No footer key or key retriever"); try { - footer_key = properties_->key_retriever()->GetKey(footer_key_metadata_); + footer_key_ = properties_->key_retriever()->GetKey(footer_key_metadata_); } catch (KeyAccessDeniedException& e) { std::stringstream ss; ss << "Footer key: access denied " << e.what() << "\n"; throw ParquetException(ss.str()); } } - if (footer_key.empty()) { + if (footer_key_.empty()) { throw ParquetException( "Footer key unavailable. Could not verify " "plaintext footer metadata"); } - // cache footer key to avoid repeated retrieval of key from the key_retriever - footer_key_ = footer_key; - return footer_key; + return footer_key_; } std::unique_ptr InternalFileDecryptor::GetFooterDecryptor() { @@ -99,7 +98,7 @@ std::unique_ptr InternalFileDecryptor::GetFooterDecryptor() { std::unique_ptr InternalFileDecryptor::GetFooterDecryptor( const std::string& aad, bool metadata) { - encryption::SecureString footer_key = GetFooterKey(); + const encryption::SecureString& footer_key = GetFooterKey(); auto key_len = static_cast(footer_key.size()); auto aes_decryptor = encryption::AesDecryptor::Make(algorithm_, key_len, metadata); @@ -109,21 +108,27 @@ std::unique_ptr InternalFileDecryptor::GetFooterDecryptor( encryption::SecureString InternalFileDecryptor::GetColumnKey( const std::string& column_path, const std::string& column_key_metadata) { - encryption::SecureString column_key = properties_->column_key(column_path); - - // No explicit column key given via API. Retrieve via key metadata. - if (column_key.empty() && !column_key_metadata.empty() && - properties_->key_retriever() != nullptr) { - try { - column_key = properties_->key_retriever()->GetKey(column_key_metadata); - } catch (KeyAccessDeniedException& e) { - std::stringstream ss; - ss << "HiddenColumnException, path=" + column_path + " " << e.what() << "\n"; - throw HiddenColumnException(ss.str()); + try { + encryption::SecureString column_key = + RetrieveColumnKeyIfEmpty(properties_->column_key(column_path), + column_key_metadata, properties_->key_retriever()); + if (column_key.empty()) { + throw HiddenColumnException("HiddenColumnException, path=" + column_path); } + return column_key; + } catch (KeyAccessDeniedException& e) { + std::stringstream ss; + ss << "HiddenColumnException, path=" + column_path + " " << e.what() << "\n"; + throw HiddenColumnException(ss.str()); } - if (column_key.empty()) { - throw HiddenColumnException("HiddenColumnException, path=" + column_path); +} + +encryption::SecureString InternalFileDecryptor::RetrieveColumnKeyIfEmpty( + encryption::SecureString column_key, const std::string& column_key_metadata, + const std::shared_ptr& key_retriever) { + if (column_key.empty() && !column_key_metadata.empty() && key_retriever != nullptr) { + // No explicit column key given via API. Retrieve via key metadata. + return key_retriever->GetKey(column_key_metadata); } return column_key; } @@ -131,7 +136,8 @@ encryption::SecureString InternalFileDecryptor::GetColumnKey( std::unique_ptr InternalFileDecryptor::GetColumnDecryptor( const std::string& column_path, const std::string& column_key_metadata, const std::string& aad, bool metadata) { - encryption::SecureString column_key = GetColumnKey(column_path, column_key_metadata); + const encryption::SecureString& column_key = + GetColumnKey(column_path, column_key_metadata); auto key_len = static_cast(column_key.size()); auto aes_decryptor = encryption::AesDecryptor::Make(algorithm_, key_len, metadata); return std::make_unique(std::move(aes_decryptor), column_key, file_aad_, aad, @@ -148,9 +154,10 @@ InternalFileDecryptor::GetColumnDecryptorFactory( // The column is encrypted with its own key const std::string& column_key_metadata = crypto_metadata->key_metadata(); const std::string column_path = crypto_metadata->path_in_schema()->ToDotString(); - encryption::SecureString column_key = GetColumnKey(column_path, column_key_metadata); + const encryption::SecureString& column_key = + GetColumnKey(column_path, column_key_metadata); - return [this, aad, metadata, column_key = std::move(column_key)]() { + return [this, aad, metadata, column_key = column_key]() { auto key_len = static_cast(column_key.size()); auto aes_decryptor = encryption::AesDecryptor::Make(algorithm_, key_len, metadata); return std::make_unique(std::move(aes_decryptor), column_key, file_aad_, diff --git a/cpp/src/parquet/encryption/internal_file_decryptor.h b/cpp/src/parquet/encryption/internal_file_decryptor.h index aebce309769..14491e1dc2f 100644 --- a/cpp/src/parquet/encryption/internal_file_decryptor.h +++ b/cpp/src/parquet/encryption/internal_file_decryptor.h @@ -33,6 +33,7 @@ class AesEncryptor; } // namespace encryption class ColumnCryptoMetaData; +class DecryptionKeyRetriever; class FileDecryptionProperties; // An object handling decryption using well-known encryption parameters @@ -41,8 +42,8 @@ class FileDecryptionProperties; class PARQUET_EXPORT Decryptor { public: Decryptor(std::unique_ptr decryptor, - const encryption::SecureString& key, const std::string& file_aad, - const std::string& aad, ::arrow::MemoryPool* pool); + encryption::SecureString key, std::string file_aad, std::string aad, + ::arrow::MemoryPool* pool); ~Decryptor(); const std::string& file_aad() const { return file_aad_; } @@ -72,7 +73,7 @@ class InternalFileDecryptor { const std::string& file_aad() const { return file_aad_; } - encryption::SecureString GetFooterKey(); + const encryption::SecureString& GetFooterKey(); ParquetCipher::type algorithm() const { return algorithm_; } @@ -133,6 +134,10 @@ class InternalFileDecryptor { encryption::SecureString GetColumnKey(const std::string& column_path, const std::string& column_key_metadata); + static encryption::SecureString RetrieveColumnKeyIfEmpty( + encryption::SecureString column_key, const std::string& column_key_metadata, + const std::shared_ptr& key_retriever); + std::unique_ptr GetFooterDecryptor(const std::string& aad, bool metadata); std::unique_ptr GetColumnDecryptor(const std::string& column_path, diff --git a/cpp/src/parquet/encryption/internal_file_encryptor.cc b/cpp/src/parquet/encryption/internal_file_encryptor.cc index 867e337d6e5..43d3ace53e8 100644 --- a/cpp/src/parquet/encryption/internal_file_encryptor.cc +++ b/cpp/src/parquet/encryption/internal_file_encryptor.cc @@ -23,12 +23,12 @@ namespace parquet { // Encryptor Encryptor::Encryptor(encryption::AesEncryptor* aes_encryptor, - const encryption::SecureString& key, const std::string& file_aad, - const std::string& aad, ::arrow::MemoryPool* pool) + encryption::SecureString key, std::string file_aad, std::string aad, + ::arrow::MemoryPool* pool) : aes_encryptor_(aes_encryptor), - key_(key), - file_aad_(file_aad), - aad_(aad), + key_(std::move(key)), + file_aad_(std::move(file_aad)), + aad_(std::move(aad)), pool_(pool) {} int32_t Encryptor::CiphertextLength(int64_t plaintext_len) const { @@ -52,7 +52,7 @@ std::shared_ptr InternalFileEncryptor::GetFooterEncryptor() { ParquetCipher::type algorithm = properties_->algorithm().algorithm; std::string footer_aad = encryption::CreateFooterAad(properties_->file_aad()); - encryption::SecureString footer_key = properties_->footer_key(); + const encryption::SecureString& footer_key = properties_->footer_key(); auto aes_encryptor = GetMetaAesEncryptor(algorithm, footer_key.size()); footer_encryptor_ = std::make_shared( aes_encryptor, footer_key, properties_->file_aad(), footer_aad, pool_); @@ -66,7 +66,7 @@ std::shared_ptr InternalFileEncryptor::GetFooterSigningEncryptor() { ParquetCipher::type algorithm = properties_->algorithm().algorithm; std::string footer_aad = encryption::CreateFooterAad(properties_->file_aad()); - encryption::SecureString footer_signing_key = properties_->footer_key(); + const encryption::SecureString& footer_signing_key = properties_->footer_key(); auto aes_encryptor = GetMetaAesEncryptor(algorithm, footer_signing_key.size()); footer_signing_encryptor_ = std::make_shared( aes_encryptor, footer_signing_key, properties_->file_aad(), footer_aad, pool_); @@ -101,12 +101,9 @@ InternalFileEncryptor::InternalFileEncryptor::GetColumnEncryptor( return nullptr; } - encryption::SecureString key; - if (column_prop->is_encrypted_with_footer_key()) { - key = properties_->footer_key(); - } else { - key = column_prop->key(); - } + const encryption::SecureString& key = column_prop->is_encrypted_with_footer_key() + ? properties_->footer_key() + : column_prop->key(); ParquetCipher::type algorithm = properties_->algorithm().algorithm; auto aes_encryptor = metadata ? GetMetaAesEncryptor(algorithm, key.size()) diff --git a/cpp/src/parquet/encryption/internal_file_encryptor.h b/cpp/src/parquet/encryption/internal_file_encryptor.h index 9d582ed0816..c37581a2a08 100644 --- a/cpp/src/parquet/encryption/internal_file_encryptor.h +++ b/cpp/src/parquet/encryption/internal_file_encryptor.h @@ -36,9 +36,8 @@ class ColumnEncryptionProperties; class PARQUET_EXPORT Encryptor { public: - Encryptor(encryption::AesEncryptor* aes_encryptor, const encryption::SecureString& key, - const std::string& file_aad, const std::string& aad, - ::arrow::MemoryPool* pool); + Encryptor(encryption::AesEncryptor* aes_encryptor, encryption::SecureString key, + std::string file_aad, std::string aad, ::arrow::MemoryPool* pool); const std::string& file_aad() { return file_aad_; } void UpdateAad(const std::string& aad) { aad_ = aad; } ::arrow::MemoryPool* pool() { return pool_; } diff --git a/cpp/src/parquet/encryption/local_wrap_kms_client.cc b/cpp/src/parquet/encryption/local_wrap_kms_client.cc index 1cbf5982304..b0069c2b280 100644 --- a/cpp/src/parquet/encryption/local_wrap_kms_client.cc +++ b/cpp/src/parquet/encryption/local_wrap_kms_client.cc @@ -91,8 +91,8 @@ SecureString LocalWrapKmsClient::UnwrapKey(const std::string& wrapped_key, master_key_version); } const std::string& encrypted_encoded_key = key_wrap.encrypted_encoded_key(); - const SecureString master_key = master_key_cache_.GetOrInsert( - master_key_identifier, [this, master_key_identifier]() -> SecureString { + const SecureString& master_key = master_key_cache_.GetOrInsert( + master_key_identifier, [this, master_key_identifier]() -> const SecureString& { return this->GetKeyFromServer(master_key_identifier); }); const std::string& aad = master_key_identifier; @@ -100,8 +100,9 @@ SecureString LocalWrapKmsClient::UnwrapKey(const std::string& wrapped_key, return internal::DecryptKeyLocally(encrypted_encoded_key, master_key, aad); } -SecureString LocalWrapKmsClient::GetKeyFromServer(const std::string& key_identifier) { - SecureString master_key = GetMasterKeyFromServer(key_identifier); +const SecureString& LocalWrapKmsClient::GetKeyFromServer( + const std::string& key_identifier) { + const SecureString& master_key = GetMasterKeyFromServer(key_identifier); int32_t key_length_bits = static_cast(master_key.size() * 8); if (!internal::ValidateKeyLength(key_length_bits)) { std::ostringstream ss; diff --git a/cpp/src/parquet/encryption/local_wrap_kms_client.h b/cpp/src/parquet/encryption/local_wrap_kms_client.h index df4cef22f87..44fc6f03e05 100644 --- a/cpp/src/parquet/encryption/local_wrap_kms_client.h +++ b/cpp/src/parquet/encryption/local_wrap_kms_client.h @@ -44,7 +44,7 @@ class PARQUET_EXPORT LocalWrapKmsClient : public KmsClient { protected: /// Get master key from the remote KMS server. /// Note: this function might be called by multiple threads - virtual SecureString GetMasterKeyFromServer( + virtual const SecureString& GetMasterKeyFromServer( const std::string& master_key_identifier) = 0; private: @@ -84,7 +84,7 @@ class PARQUET_EXPORT LocalWrapKmsClient : public KmsClient { std::string master_key_version_; }; - SecureString GetKeyFromServer(const std::string& key_identifier); + const SecureString& GetKeyFromServer(const std::string& key_identifier); protected: KmsConnectionConfig kms_connection_config_; diff --git a/cpp/src/parquet/encryption/secure_string_test.cc b/cpp/src/parquet/encryption/secure_string_test.cc index 46752b772c9..8feac489518 100644 --- a/cpp/src/parquet/encryption/secure_string_test.cc +++ b/cpp/src/parquet/encryption/secure_string_test.cc @@ -140,7 +140,8 @@ TEST(TestSecureString, AsSpan) { auto mutual_span = secret.as_span(); std::string expected = "hello world"; - ::arrow::util::span expected_span = {reinterpret_cast(expected.data()), expected.size()}; + ::arrow::util::span expected_span = {reinterpret_cast(expected.data()), + expected.size()}; ASSERT_EQ(const_span, expected_span); ASSERT_EQ(mutual_span, expected_span); diff --git a/cpp/src/parquet/encryption/test_in_memory_kms.cc b/cpp/src/parquet/encryption/test_in_memory_kms.cc index 0bdc8d71b56..16e4f30ed8f 100644 --- a/cpp/src/parquet/encryption/test_in_memory_kms.cc +++ b/cpp/src/parquet/encryption/test_in_memory_kms.cc @@ -39,7 +39,7 @@ TestOnlyLocalWrapInMemoryKms::TestOnlyLocalWrapInMemoryKms( const KmsConnectionConfig& kms_connection_config) : LocalWrapKmsClient(kms_connection_config) {} -SecureString TestOnlyLocalWrapInMemoryKms::GetMasterKeyFromServer( +const SecureString& TestOnlyLocalWrapInMemoryKms::GetMasterKeyFromServer( const std::string& master_key_identifier) { // Always return the latest key version return master_key_map_.at(master_key_identifier); diff --git a/cpp/src/parquet/encryption/test_in_memory_kms.h b/cpp/src/parquet/encryption/test_in_memory_kms.h index 689b6f75144..5a17b3dfff6 100644 --- a/cpp/src/parquet/encryption/test_in_memory_kms.h +++ b/cpp/src/parquet/encryption/test_in_memory_kms.h @@ -37,7 +37,8 @@ class TestOnlyLocalWrapInMemoryKms : public LocalWrapKmsClient { const std::unordered_map& master_keys_map); protected: - SecureString GetMasterKeyFromServer(const std::string& master_key_identifier) override; + const SecureString& GetMasterKeyFromServer( + const std::string& master_key_identifier) override; private: static std::unordered_map master_key_map_; diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index 81b98f4bf7c..73b8b45ea41 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -737,8 +737,8 @@ class FileMetaData::FileMetaDataImpl { encryption::kNonceLength); auto tag = reinterpret_cast(signature) + encryption::kNonceLength; - encryption::SecureString key = file_decryptor_->GetFooterKey(); - std::string aad = encryption::CreateFooterAad(file_decryptor_->file_aad()); + const encryption::SecureString& key = file_decryptor_->GetFooterKey(); + const std::string& aad = encryption::CreateFooterAad(file_decryptor_->file_aad()); auto aes_encryptor = encryption::AesEncryptor::Make(file_decryptor_->algorithm(), static_cast(key.size()), From 29bc5e99c12a5b4bdb47ec3d33e92a6bbc27cf34 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Thu, 10 Apr 2025 20:11:34 +0200 Subject: [PATCH 09/44] Use const for constants and construct SecureString from consts in tests --- .../parquet/encryption/read_configurations_test.cc | 12 ++++++------ cpp/src/parquet/encryption/test_encryption_util.h | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cpp/src/parquet/encryption/read_configurations_test.cc b/cpp/src/parquet/encryption/read_configurations_test.cc index c548b11157e..d7cc96b0256 100644 --- a/cpp/src/parquet/encryption/read_configurations_test.cc +++ b/cpp/src/parquet/encryption/read_configurations_test.cc @@ -98,15 +98,15 @@ class TestDecryptionConfiguration protected: FileDecryptor decryptor_; - std::string path_to_double_field_ = kDoubleFieldName; - std::string path_to_float_field_ = kFloatFieldName; + const std::string path_to_double_field_ = kDoubleFieldName; + const std::string path_to_float_field_ = kFloatFieldName; // This vector will hold various decryption configurations. std::vector> vector_of_decryption_configurations_; - SecureString kFooterEncryptionKey_ = kFooterEncryptionKey; - SecureString kColumnEncryptionKey1_ = kColumnEncryptionKey1; - SecureString kColumnEncryptionKey2_ = kColumnEncryptionKey2; - std::string kFileName_ = std::string(kFileName); + const SecureString kFooterEncryptionKey_ = kFooterEncryptionKey; + const SecureString kColumnEncryptionKey1_ = kColumnEncryptionKey1; + const SecureString kColumnEncryptionKey2_ = kColumnEncryptionKey2; + const std::string kFileName_ = std::string(kFileName); void CreateDecryptionConfigurations() { /********************************************************************************** diff --git a/cpp/src/parquet/encryption/test_encryption_util.h b/cpp/src/parquet/encryption/test_encryption_util.h index f455cff0506..3c24f65c922 100644 --- a/cpp/src/parquet/encryption/test_encryption_util.h +++ b/cpp/src/parquet/encryption/test_encryption_util.h @@ -43,9 +43,9 @@ using ::arrow::internal::TemporaryDir; constexpr int kFixedLength = 10; -inline SecureString kFooterEncryptionKey(std::string("0123456789012345")); -inline SecureString kColumnEncryptionKey1(std::string("1234567890123450")); -inline SecureString kColumnEncryptionKey2(std::string("1234567890123451")); +inline SecureString kFooterEncryptionKey("0123456789012345"); +inline SecureString kColumnEncryptionKey1("1234567890123450"); +inline SecureString kColumnEncryptionKey2("1234567890123451"); const char kFileName[] = "tester"; // Get the path of file inside parquet test data directory From 75c1f0a4561c39072efaf81152fbf81c53999118 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Fri, 11 Apr 2025 12:52:30 +0200 Subject: [PATCH 10/44] Inline IntegerKeyIdRetriever::GetKey(std::string) implementation --- cpp/src/parquet/encryption/encryption.cc | 7 ------- cpp/src/parquet/encryption/encryption.h | 13 ++++++++++++- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/cpp/src/parquet/encryption/encryption.cc b/cpp/src/parquet/encryption/encryption.cc index 11c5a391509..1fce188c7fc 100644 --- a/cpp/src/parquet/encryption/encryption.cc +++ b/cpp/src/parquet/encryption/encryption.cc @@ -33,13 +33,6 @@ void IntegerKeyIdRetriever::PutKey(uint32_t key_id, encryption::SecureString key key_map_.insert({key_id, std::move(key)}); } -encryption::SecureString IntegerKeyIdRetriever::GetKey(const std::string& key_metadata) { - uint32_t key_id; - memcpy(reinterpret_cast(&key_id), key_metadata.c_str(), 4); - - return key_map_.at(key_id); -} - // string key retriever void StringKeyIdRetriever::PutKey(std::string key_id, encryption::SecureString key) { key_map_.insert({std::move(key_id), std::move(key)}); diff --git a/cpp/src/parquet/encryption/encryption.h b/cpp/src/parquet/encryption/encryption.h index 22ce1e7798a..6064061dfa5 100644 --- a/cpp/src/parquet/encryption/encryption.h +++ b/cpp/src/parquet/encryption/encryption.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include #include @@ -55,7 +56,17 @@ class PARQUET_EXPORT DecryptionKeyRetriever { class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever { public: void PutKey(uint32_t key_id, encryption::SecureString key); - encryption::SecureString GetKey(const std::string& key_metadata) override; + encryption::SecureString GetKey(const std::string& key_metadata) override { + // key_metadata is string but for IntegerKeyIdRetriever it encodes + // a native-endian 32 bit unsigned integer key_id + uint32_t key_id; + assert(key_metadata.size() == sizeof(key_id)); + memcpy(&key_id, key_metadata.data(), sizeof(key_id)); + + return GetKey(key_id); + } + + encryption::SecureString GetKey(uint32_t key_id) { return key_map_.at(key_id); } private: std::map key_map_; From d3670d4cbf6b75951193c1bafa871e3b92cd07be Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Fri, 11 Apr 2025 13:11:33 +0200 Subject: [PATCH 11/44] Add comment to `no_key` --- cpp/src/parquet/encryption/encryption.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/parquet/encryption/encryption.h b/cpp/src/parquet/encryption/encryption.h index 6064061dfa5..9656b0aa556 100644 --- a/cpp/src/parquet/encryption/encryption.h +++ b/cpp/src/parquet/encryption/encryption.h @@ -337,6 +337,8 @@ class PARQUET_EXPORT FileDecryptionProperties { std::string aad_prefix_; std::shared_ptr aad_prefix_verifier_; + // any empty SecureString key is interpreted as if no key is given + // this instance is used if a SecureString reference is returned const encryption::SecureString no_key_ = encryption::SecureString(); ColumnPathToDecryptionPropertiesMap column_decryption_properties_; From adbbfc8ec660c2d4c74c8ea152d3c2d6075ce23a Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Fri, 11 Apr 2025 13:19:10 +0200 Subject: [PATCH 12/44] Revert `InternalFileDecryptor::RetrieveColumnKeyIfEmpty` --- .../encryption/internal_file_decryptor.cc | 30 ++++++++----------- 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/cpp/src/parquet/encryption/internal_file_decryptor.cc b/cpp/src/parquet/encryption/internal_file_decryptor.cc index 568c2728cdf..ec66f424076 100644 --- a/cpp/src/parquet/encryption/internal_file_decryptor.cc +++ b/cpp/src/parquet/encryption/internal_file_decryptor.cc @@ -108,27 +108,21 @@ std::unique_ptr InternalFileDecryptor::GetFooterDecryptor( encryption::SecureString InternalFileDecryptor::GetColumnKey( const std::string& column_path, const std::string& column_key_metadata) { - try { - encryption::SecureString column_key = - RetrieveColumnKeyIfEmpty(properties_->column_key(column_path), - column_key_metadata, properties_->key_retriever()); + encryption::SecureString column_key = properties_->column_key(column_path); + + // No explicit column key given via API. Retrieve via key metadata. + if (column_key.empty() && !column_key_metadata.empty() && + properties_->key_retriever() != nullptr) { + try { + column_key = properties_->key_retriever()->GetKey(column_key_metadata); + } catch (KeyAccessDeniedException& e) { + std::stringstream ss; + ss << "HiddenColumnException, path=" + column_path + " " << e.what() << "\n"; + throw HiddenColumnException(ss.str()); + } if (column_key.empty()) { throw HiddenColumnException("HiddenColumnException, path=" + column_path); } - return column_key; - } catch (KeyAccessDeniedException& e) { - std::stringstream ss; - ss << "HiddenColumnException, path=" + column_path + " " << e.what() << "\n"; - throw HiddenColumnException(ss.str()); - } -} - -encryption::SecureString InternalFileDecryptor::RetrieveColumnKeyIfEmpty( - encryption::SecureString column_key, const std::string& column_key_metadata, - const std::shared_ptr& key_retriever) { - if (column_key.empty() && !column_key_metadata.empty() && key_retriever != nullptr) { - // No explicit column key given via API. Retrieve via key metadata. - return key_retriever->GetKey(column_key_metadata); } return column_key; } From fe79da03da2ae623b80c4ecba1b1830e1d2c08f5 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Fri, 11 Apr 2025 15:09:17 +0200 Subject: [PATCH 13/44] Remove `noexcept` --- cpp/src/parquet/encryption/secure_string.cc | 2 +- cpp/src/parquet/encryption/secure_string.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/parquet/encryption/secure_string.cc b/cpp/src/parquet/encryption/secure_string.cc index 44eb1923d0a..e7634722741 100644 --- a/cpp/src/parquet/encryption/secure_string.cc +++ b/cpp/src/parquet/encryption/secure_string.cc @@ -44,7 +44,7 @@ SecureString& SecureString::operator=(SecureString&& secret) noexcept { secret_ = std::move(secret.secret_); return *this; } -SecureString& SecureString::operator=(const SecureString& secret) noexcept { +SecureString& SecureString::operator=(const SecureString& secret) { if (this == &secret) { // self-assignment return *this; diff --git a/cpp/src/parquet/encryption/secure_string.h b/cpp/src/parquet/encryption/secure_string.h index e136977e9b2..019ce1eb311 100644 --- a/cpp/src/parquet/encryption/secure_string.h +++ b/cpp/src/parquet/encryption/secure_string.h @@ -38,12 +38,12 @@ class PARQUET_EXPORT SecureString { public: SecureString() noexcept = default; SecureString(SecureString&&) noexcept; - SecureString(const SecureString&) noexcept = default; + SecureString(const SecureString&) = default; explicit SecureString(std::string&&) noexcept; explicit SecureString(size_t, char) noexcept; SecureString& operator=(SecureString&&) noexcept; - SecureString& operator=(const SecureString&) noexcept; + SecureString& operator=(const SecureString&); SecureString& operator=(std::string&& secret) noexcept; bool operator==(const SecureString&) const; From db70c192e4e5f84b9e3207ad6071913fdbeec0e7 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Mon, 26 May 2025 22:15:57 +0200 Subject: [PATCH 14/44] More secure cleared assertions on construction and assignment --- cpp/src/parquet/encryption/secure_string.cc | 5 +++- .../parquet/encryption/secure_string_test.cc | 26 ++++++++++++++++--- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/cpp/src/parquet/encryption/secure_string.cc b/cpp/src/parquet/encryption/secure_string.cc index e7634722741..8602b4b198f 100644 --- a/cpp/src/parquet/encryption/secure_string.cc +++ b/cpp/src/parquet/encryption/secure_string.cc @@ -29,7 +29,9 @@ namespace parquet::encryption { SecureString::SecureString(SecureString&& secret) noexcept - : secret_(std::move(secret.secret_)) {} + : secret_(std::move(secret.secret_)) { + secret.Dispose(); +} SecureString::SecureString(std::string&& secret) noexcept : secret_(std::move(secret)) { SecureClear(&secret); } @@ -42,6 +44,7 @@ SecureString& SecureString::operator=(SecureString&& secret) noexcept { } Dispose(); secret_ = std::move(secret.secret_); + secret.Dispose(); return *this; } SecureString& SecureString::operator=(const SecureString& secret) { diff --git a/cpp/src/parquet/encryption/secure_string_test.cc b/cpp/src/parquet/encryption/secure_string_test.cc index 8feac489518..48a1ee98d54 100644 --- a/cpp/src/parquet/encryption/secure_string_test.cc +++ b/cpp/src/parquet/encryption/secure_string_test.cc @@ -72,14 +72,18 @@ TEST(TestSecureString, SecureClearString) { TEST(TestSecureString, Construct) { // move constructing from a string securely clears that string std::string string("hello world"); + auto old_string = StringArea(string); SecureString secret_from_string(std::move(string)); AssertSecurelyCleared(string); + AssertSecurelyCleared(old_string); ASSERT_FALSE(secret_from_string.empty()); // move constructing from a secure string securely clears that secure string - // Note: there is no way to test the secure clearing of the moved secure string + auto old_secret_from_string = + std::string_view(secret_from_string.as_view().data(), secret_from_string.length()); SecureString secret_from_move_secret(std::move(secret_from_string)); ASSERT_TRUE(secret_from_string.empty()); + AssertSecurelyCleared(old_secret_from_string); ASSERT_FALSE(secret_from_move_secret.empty()); // copy constructing from a secure string does not modify that secure string @@ -92,16 +96,30 @@ TEST(TestSecureString, Construct) { TEST(TestSecureString, Assign) { // move assigning from a string securely clears that string std::string string("hello world"); - SecureString secret_from_string; + auto old_string = StringArea(string); + SecureString secret_from_string("a secret"); + auto old_secret_from_string = + std::string_view(secret_from_string.as_view().data(), secret_from_string.length()); secret_from_string = std::move(string); AssertSecurelyCleared(string); + AssertSecurelyCleared(old_string); + if (old_secret_from_string.data() != secret_from_string.as_view().data()) { + AssertSecurelyCleared(old_secret_from_string); + } ASSERT_FALSE(secret_from_string.empty()); // move assigning from a secure string securely clears that secure string - // Note: there is no way to test the secure clearing of the moved secure string - SecureString secret_from_move_secret; + auto new_secret_from_string = + std::string_view(secret_from_string.as_view().data(), secret_from_string.length()); + SecureString secret_from_move_secret("another secret"); + auto old_secret_from_move_secret = std::string_view( + secret_from_move_secret.as_view().data(), secret_from_move_secret.length()); secret_from_move_secret = std::move(secret_from_string); ASSERT_TRUE(secret_from_string.empty()); + AssertSecurelyCleared(new_secret_from_string); + if (old_secret_from_move_secret.data() != secret_from_move_secret.as_view().data()) { + AssertSecurelyCleared(old_secret_from_move_secret); + } ASSERT_FALSE(secret_from_move_secret.empty()); // assigning from a secure string does not modify that secure string From d6f9ea92d657ef1c01b9164167b861ecbdff4ca3 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Tue, 27 May 2025 14:49:03 +0200 Subject: [PATCH 15/44] Improve SecureString assignment tests --- cpp/src/parquet/encryption/secure_string.cc | 5 + .../parquet/encryption/secure_string_test.cc | 235 +++++++++++++++--- 2 files changed, 202 insertions(+), 38 deletions(-) diff --git a/cpp/src/parquet/encryption/secure_string.cc b/cpp/src/parquet/encryption/secure_string.cc index 8602b4b198f..5b69ec7898c 100644 --- a/cpp/src/parquet/encryption/secure_string.cc +++ b/cpp/src/parquet/encryption/secure_string.cc @@ -58,7 +58,12 @@ SecureString& SecureString::operator=(const SecureString& secret) { } SecureString& SecureString::operator=(std::string&& secret) noexcept { Dispose(); + // if secret is local string (length <= 15 characters), copies local buffer, resets to 0 + // - requires secure cleaning the local buffer + // if secret is longer, moves the pointer to secret_, resets to 0 and uses local buffer + // - does not require cleaning anything secret_ = std::move(secret); + // cleans only the local buffer of secret as this always is a local string by now SecureClear(&secret); return *this; } diff --git a/cpp/src/parquet/encryption/secure_string_test.cc b/cpp/src/parquet/encryption/secure_string_test.cc index 48a1ee98d54..23bdf6446ac 100644 --- a/cpp/src/parquet/encryption/secure_string_test.cc +++ b/cpp/src/parquet/encryption/secure_string_test.cc @@ -16,6 +16,7 @@ // under the License. #include +#include #include #include "parquet/encryption/secure_string.h" @@ -26,7 +27,7 @@ std::string_view StringArea(const std::string& string) { return {string.data(), string.capacity()}; } -void AssertSecurelyCleared(std::string_view area) { +void AssertSecurelyCleared(const std::string_view area) { // the entire area is filled with zeros std::string zeros(area.size(), '\0'); ASSERT_EQ(area, std::string_view(zeros)); @@ -36,6 +37,42 @@ void AssertSecurelyCleared(const std::string& string) { AssertSecurelyCleared(StringArea(string)); } +/** + * Checks the area has been securely cleared after some position. + */ +void AssertSecurelyCleared(const std::string_view area, const size_t pos) { + // the area after pos is filled with zeros + if (pos < area.size()) { + std::string zeros(area.size() - pos, '\0'); + ASSERT_EQ(area.substr(pos), std::string_view(zeros)); + } +} + +/** + * Checks the area has been securely cleared from the secret value. + * Assumes the area has been released, so it might have been reclaimed and changed after + * cleaning. We cannot check for all-zeros, best we can check here is no secret character + * has leaked. If by any chance the modification produced a former key character at the right + * position, this will be false negative / flaky. Therefore, we check for three consecutive + * secret characters before we fail. + */ +void AssertSecurelyCleared(const std::string_view area, const std::string& secret_value) { + auto leaks = 0; + for (size_t i = 0; i < secret_value.size(); i++) { + if (area[i] == secret_value[i]) { + leaks++; + } else { + if (leaks >= 3) { + break; + } + leaks = 0; + } + } + if (leaks >= 3) { + FAIL() << leaks << " characters of secret leaked into " << area; + } +} + TEST(TestSecureString, SecureClearString) { // short string { @@ -70,7 +107,7 @@ TEST(TestSecureString, SecureClearString) { } TEST(TestSecureString, Construct) { - // move constructing from a string securely clears that string + // move-constructing from a string securely clears that string std::string string("hello world"); auto old_string = StringArea(string); SecureString secret_from_string(std::move(string)); @@ -78,56 +115,178 @@ TEST(TestSecureString, Construct) { AssertSecurelyCleared(old_string); ASSERT_FALSE(secret_from_string.empty()); - // move constructing from a secure string securely clears that secure string - auto old_secret_from_string = - std::string_view(secret_from_string.as_view().data(), secret_from_string.length()); + // move-constructing from a secure string securely clears that secure string + auto old_secret_from_string_view = secret_from_string.as_view(); + auto old_secret_from_string_value = std::string(secret_from_string.as_view()); SecureString secret_from_move_secret(std::move(secret_from_string)); ASSERT_TRUE(secret_from_string.empty()); - AssertSecurelyCleared(old_secret_from_string); + AssertSecurelyCleared(old_secret_from_string_view); ASSERT_FALSE(secret_from_move_secret.empty()); + ASSERT_EQ(secret_from_move_secret.as_view(), + std::string_view(old_secret_from_string_value)); - // copy constructing from a secure string does not modify that secure string + // copy-constructing from a secure string does not modify that secure string SecureString secret_from_secret(secret_from_move_secret); ASSERT_FALSE(secret_from_move_secret.empty()); + ASSERT_EQ(secret_from_move_secret.as_view(), + std::string_view(old_secret_from_string_value)); ASSERT_FALSE(secret_from_secret.empty()); ASSERT_EQ(secret_from_secret, secret_from_move_secret); } TEST(TestSecureString, Assign) { - // move assigning from a string securely clears that string - std::string string("hello world"); - auto old_string = StringArea(string); - SecureString secret_from_string("a secret"); - auto old_secret_from_string = - std::string_view(secret_from_string.as_view().data(), secret_from_string.length()); - secret_from_string = std::move(string); - AssertSecurelyCleared(string); - AssertSecurelyCleared(old_string); - if (old_secret_from_string.data() != secret_from_string.as_view().data()) { - AssertSecurelyCleared(old_secret_from_string); - } - ASSERT_FALSE(secret_from_string.empty()); + // we initialize with the first string and iteratively assign the subsequent values + // the first two values are local (15 chars and less), the remainder are non-local + // strings (larger than 15 chars) memory management of short and long strings behaves + // differently + std::vector test_strings = {"secret", "another secret", + "a much longer secret", std::string(1024, 'x')}; - // move assigning from a secure string securely clears that secure string - auto new_secret_from_string = - std::string_view(secret_from_string.as_view().data(), secret_from_string.length()); - SecureString secret_from_move_secret("another secret"); - auto old_secret_from_move_secret = std::string_view( - secret_from_move_secret.as_view().data(), secret_from_move_secret.length()); - secret_from_move_secret = std::move(secret_from_string); - ASSERT_TRUE(secret_from_string.empty()); - AssertSecurelyCleared(new_secret_from_string); - if (old_secret_from_move_secret.data() != secret_from_move_secret.as_view().data()) { - AssertSecurelyCleared(old_secret_from_move_secret); + // assert test string configuration + ASSERT_GE(test_strings.size(), 4); + for (size_t i = 1; i < test_strings.size(); i++) { + // we expect first two strings to be local strings + if (i <= 1) { + ASSERT_LT(test_strings[i].size(), 15 / sizeof(char)); + } else { + ASSERT_GE(test_strings[i].size(), 15 / sizeof(char)); + } + // the strings are increasing in size + if (i > 0) { + ASSERT_TRUE(test_strings[i].size() > test_strings[i - 1].size()); + } } - ASSERT_FALSE(secret_from_move_secret.empty()); - // assigning from a secure string does not modify that secure string - SecureString secret_from_secret; - secret_from_secret = secret_from_move_secret; - ASSERT_FALSE(secret_from_move_secret.empty()); - ASSERT_FALSE(secret_from_secret.empty()); - ASSERT_EQ(secret_from_secret, secret_from_move_secret); + std::vector reverse_strings = std::vector(test_strings); + reverse(reverse_strings.begin(), reverse_strings.end()); + + for (auto vec : {test_strings, reverse_strings}) { + auto init_string = vec[0]; + auto strings = std::vector(vec.begin() + 1, vec.end()); + + { + // an initialized secure string + std::string init_string_copy(init_string); + SecureString secret_from_string(std::move(init_string_copy)); + + // move-assigning from a string securely clears that string + // the earlier value of the secure string is securely cleared + for (auto string : strings) { + auto string_copy = std::string(string); + auto old_string_copy_area = StringArea(string_copy); + ASSERT_FALSE(string.empty()); + ASSERT_FALSE(string_copy.empty()); + auto old_secret_from_string_area = secret_from_string.as_view(); + auto old_secret_from_string_value = std::string(secret_from_string.as_view()); + + secret_from_string = std::move(string_copy); + + ASSERT_FALSE(string.empty()); + ASSERT_TRUE(string_copy.empty()); + AssertSecurelyCleared(string_copy); + auto secret_from_string_view = secret_from_string.as_view(); + // the secure string can reuse the string_copy's string buffer after assignment + // then, string_copy's string buffer is obviously not cleared + if (secret_from_string_view.data() != old_string_copy_area.data()) { + AssertSecurelyCleared(old_string_copy_area, string); + } + ASSERT_FALSE(secret_from_string.empty()); + ASSERT_EQ(secret_from_string.size(), string.size()); + ASSERT_EQ(secret_from_string.length(), string.length()); + ASSERT_EQ(secret_from_string_view, std::string_view(string)); + if (secret_from_string_view.data() == old_secret_from_string_area.data()) { + // when secure string reuses the buffer, the old value must be cleared + AssertSecurelyCleared(old_secret_from_string_area, secret_from_string.size()); + } else { + // when secure string has a new buffer, the old buffer must be cleared + AssertSecurelyCleared(old_secret_from_string_area, + old_secret_from_string_value); + } + } + } + + { + // an initialized secure string + std::string init_string_copy(init_string); + SecureString secret_from_move_secret(std::move(init_string_copy)); + + // move-assigning from a secure string securely clears that secure string + // the earlier value of the secure string is securely cleared + for (auto string : strings) { + auto string_copy = std::string(string); + SecureString secret_string(std::move(string_copy)); + ASSERT_FALSE(string.empty()); + ASSERT_TRUE(string_copy.empty()); + ASSERT_FALSE(secret_string.empty()); + auto old_secret_string_area = secret_string.as_view(); + auto old_secret_string_value = std::string(secret_string.as_view()); + auto old_secret_from_move_secret_area = secret_from_move_secret.as_view(); + auto old_secret_from_move_secret_value = + std::string(secret_from_move_secret.as_view()); + + secret_from_move_secret = std::move(secret_string); + + ASSERT_TRUE(secret_string.empty()); + // the secure string can reuse the string_copy's string buffer after assignment + // then, string_copy's string buffer is obviously not cleared + if (old_secret_string_area.data() != secret_from_move_secret.as_view().data()) { + AssertSecurelyCleared(old_secret_string_area, + old_secret_from_move_secret_value); + } + ASSERT_FALSE(secret_from_move_secret.empty()); + ASSERT_EQ(secret_from_move_secret.size(), string.size()); + ASSERT_EQ(secret_from_move_secret.length(), string.length()); + ASSERT_EQ(secret_from_move_secret.as_view(), std::string_view(string)); + if (old_secret_from_move_secret_area.data() == + secret_from_move_secret.as_view().data()) { + // when secure string reuses the buffer, the old value must be cleared + AssertSecurelyCleared(old_secret_from_move_secret_area, + secret_from_move_secret.size()); + } else { + // when secure string has a new buffer, the old buffer must be cleared + AssertSecurelyCleared(old_secret_from_move_secret_area, + old_secret_from_move_secret_value); + } + } + } + + { + // an initialized secure string + std::string init_string_copy(init_string); + SecureString secret_from_copy_secret(std::move(init_string_copy)); + + for (auto string : strings) { + // copy-assigning from a secure string does not modify that secure string + // the earlier value of the secure string is securely cleared + auto string_copy = std::string(string); + SecureString secret_string(std::move(string_copy)); + ASSERT_FALSE(string.empty()); + ASSERT_TRUE(string_copy.empty()); + ASSERT_FALSE(secret_string.empty()); + auto old_secret_from_copy_secret_area = secret_from_copy_secret.as_view(); + auto old_secret_from_copy_secret_value = + std::string(secret_from_copy_secret.as_view()); + + secret_from_copy_secret = secret_string; + + ASSERT_FALSE(secret_string.empty()); + ASSERT_FALSE(secret_from_copy_secret.empty()); + ASSERT_EQ(secret_from_copy_secret.size(), string.size()); + ASSERT_EQ(secret_from_copy_secret.length(), string.length()); + ASSERT_EQ(secret_from_copy_secret.as_view(), std::string_view(string)); + if (old_secret_from_copy_secret_area.data() == + secret_from_copy_secret.as_view().data()) { + // when secure string reuses the buffer, the old value must be cleared + AssertSecurelyCleared(old_secret_from_copy_secret_area, + secret_from_copy_secret.size()); + } else { + // when secure string has a new buffer, the old buffer must be cleared + AssertSecurelyCleared(old_secret_from_copy_secret_area, + old_secret_from_copy_secret_value); + } + } + } + } } TEST(TestSecureString, Compare) { From 510349c170834e348305a70d51face81a5709196 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 28 May 2025 11:08:11 +0200 Subject: [PATCH 16/44] More context on SecureClear code --- cpp/src/parquet/encryption/secure_string.cc | 37 +++++++++++++++++---- 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/cpp/src/parquet/encryption/secure_string.cc b/cpp/src/parquet/encryption/secure_string.cc index 5b69ec7898c..613e316c29c 100644 --- a/cpp/src/parquet/encryption/secure_string.cc +++ b/cpp/src/parquet/encryption/secure_string.cc @@ -96,24 +96,49 @@ void SecureString::SecureClear(std::string* secret) { SecureClear(reinterpret_cast(secret->data()), secret->capacity()); } inline void SecureString::SecureClear(uint8_t* data, size_t size) { - // Heavily borrowed from libb2's `secure_zero_memory` at - // https://github.com/BLAKE2/libb2/blob/master/src/blake2-impl.h + // There is various prior art for this: + // https://www.cryptologie.net/article/419/zeroing-memory-compiler-optimizations-and-memset_s/ + // - libb2's `secure_zero_memory` at https://github.com/BLAKE2/libb2/blob/30d45a17c59dc7dbf853da3085b71d466275bd0a/src/blake2-impl.h#L140-L160 + // - libsodium's `sodium_memzero` at https://github.com/jedisct1/libsodium/blob/be58b2e6664389d9c7993b55291402934b43b3ca/src/libsodium/sodium/utils.c#L78:L101 + // Note: https://www.daemonology.net/blog/2014-09-06-zeroing-buffers-is-insufficient.html #if defined(_WIN32) + // SecureZeroMemory is meant to not be optimized away SecureZeroMemory(data, size); #elif defined(__STDC_LIB_EXT1__) // memset_s is meant to not be optimized away memset_s(data, size, 0, size); #elif defined(OPENSSL_VERSION_NUMBER) && OPENSSL_VERSION_NUMBER >= 0x30000000 + // rely on some implementation in OpenSSL cryptographic library OPENSSL_cleanse(data, size); #elif defined(__GLIBC__) && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 25)) - // glibc 2.25+ has explicit_bzero + // explicit_bzero is meant to not be optimized away explicit_bzero(data, size); #else - // Try to ensure that a true library call to memset() will be generated - // by the compiler. + // Volatile pointer to memset function is an attempt to avoid + // that the compiler optimizes away the memset function call. + // pretty much what OPENSSL_cleanse above does + // https://github.com/openssl/openssl/blob/3423c30db3aa044f46e1f0270e2ecd899415bf5f/crypto/mem_clr.c#L22 static const volatile auto memset_v = &memset; memset_v(data, 0, size); - __asm__ __volatile__("" ::"r"(data) : "memory"); + +#if defined(__GNUC__) || defined(__clang__) + // __asm__ only supported by GCC and Clang + // not supported by MSVC on the ARM and x64 processors + // https://en.cppreference.com/w/c/language/asm.html + // https://en.cppreference.com/w/cpp/language/asm.html + + // Additional attempt on top of volatile memset_v above + // to avoid that the compiler optimizes away the memset function call. + // Assembler code that tells the compiler 'data' has side effects. + // https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html: + // - "volatile": the asm produces side effects + // - "memory": effectively forms a read/write memory barrier for the compiler + __asm__ __volatile__( + "" /* no actual code */ + : /* no output */ + : "r"(data) /* input */ + : "memory" /* memory side effects beyond input and output */); +#endif #endif } From 92a7980e4ca1b0311bfef44a3f5a9e5bd84e5dd1 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 28 May 2025 14:39:41 +0200 Subject: [PATCH 17/44] Add SecureString implementation to arrow/util/ --- cpp/src/arrow/CMakeLists.txt | 6 + cpp/src/arrow/util/CMakeLists.txt | 1 + cpp/src/arrow/util/secure_string.cc | 147 ++++++++++ cpp/src/arrow/util/secure_string.h | 71 +++++ cpp/src/arrow/util/secure_string_test.cc | 339 +++++++++++++++++++++++ 5 files changed, 564 insertions(+) create mode 100644 cpp/src/arrow/util/secure_string.cc create mode 100644 cpp/src/arrow/util/secure_string.h create mode 100644 cpp/src/arrow/util/secure_string_test.cc diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 77558726986..917f1d02a55 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -515,6 +515,7 @@ set(ARROW_UTIL_SRCS util/memory.cc util/mutex.cc util/ree_util.cc + util/secure_string.cc util/string.cc util/string_builder.cc util/task_group.cc @@ -574,6 +575,11 @@ if(ARROW_USE_GLOG) target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE glog::glog) endforeach() endif() +if(ARROW_USE_OPENSSL) + foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS}) + target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_OPENSSL_LIBS}) + endforeach() +endif() if(ARROW_USE_XSIMD) foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS}) target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_XSIMD}) diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index 17eea5532cc..df47389240e 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -72,6 +72,7 @@ add_arrow_test(utility-test ree_util_test.cc reflection_test.cc rows_to_batches_test.cc + secure_string_test.cc small_vector_test.cc span_test.cc stl_util_test.cc diff --git a/cpp/src/arrow/util/secure_string.cc b/cpp/src/arrow/util/secure_string.cc new file mode 100644 index 00000000000..8b699628fd9 --- /dev/null +++ b/cpp/src/arrow/util/secure_string.cc @@ -0,0 +1,147 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#if defined(ARROW_USE_OPENSSL) +# include +# include +#endif +#include +#if defined(_WIN32) +# include +#endif + +#include "arrow/util/secure_string.h" +#include "arrow/util/span.h" + +namespace arrow::util { +SecureString::SecureString(SecureString&& secret) noexcept + : secret_(std::move(secret.secret_)) { + secret.Dispose(); +} +SecureString::SecureString(std::string&& secret) noexcept : secret_(std::move(secret)) { + SecureClear(&secret); +} +SecureString::SecureString(size_t n, char c) noexcept : secret_(n, c) {} + +SecureString& SecureString::operator=(SecureString&& secret) noexcept { + if (this == &secret) { + // self-assignment + return *this; + } + Dispose(); + secret_ = std::move(secret.secret_); + secret.Dispose(); + return *this; +} +SecureString& SecureString::operator=(const SecureString& secret) { + if (this == &secret) { + // self-assignment + return *this; + } + Dispose(); + secret_ = secret.secret_; + return *this; +} +SecureString& SecureString::operator=(std::string&& secret) noexcept { + Dispose(); + // if secret is local string (length <= 15 characters), copies local buffer, resets to 0 + // - requires secure cleaning the local buffer + // if secret is longer, moves the pointer to secret_, resets to 0 and uses local buffer + // - does not require cleaning anything + secret_ = std::move(secret); + // cleans only the local buffer of secret as this always is a local string by now + SecureClear(&secret); + return *this; +} + +bool SecureString::operator==(const SecureString& other) const { + return secret_ == other.secret_; +} + +bool SecureString::operator!=(const SecureString& other) const { + return secret_ != other.secret_; +} + +bool SecureString::empty() const { return secret_.empty(); } +std::size_t SecureString::size() const { return secret_.size(); } +std::size_t SecureString::length() const { return secret_.length(); } + +::arrow::util::span SecureString::as_span() { + return {reinterpret_cast(secret_.data()), secret_.size()}; +} +::arrow::util::span SecureString::as_span() const { + return {reinterpret_cast(secret_.data()), secret_.size()}; +} +std::string_view SecureString::as_view() const { + return {secret_.data(), secret_.size()}; +} + +void SecureString::Dispose() { SecureClear(&secret_); } +void SecureString::SecureClear(std::string* secret) { + secret->clear(); + SecureClear(reinterpret_cast(secret->data()), secret->capacity()); +} +inline void SecureString::SecureClear(uint8_t* data, size_t size) { + // There is various prior art for this: + // https://www.cryptologie.net/article/419/zeroing-memory-compiler-optimizations-and-memset_s/ + // - libb2's `secure_zero_memory` at + // https://github.com/BLAKE2/libb2/blob/30d45a17c59dc7dbf853da3085b71d466275bd0a/src/blake2-impl.h#L140-L160 + // - libsodium's `sodium_memzero` at + // https://github.com/jedisct1/libsodium/blob/be58b2e6664389d9c7993b55291402934b43b3ca/src/libsodium/sodium/utils.c#L78:L101 + // Note: + // https://www.daemonology.net/blog/2014-09-06-zeroing-buffers-is-insufficient.html +#if defined(_WIN32) + // SecureZeroMemory is meant to not be optimized away + SecureZeroMemory(data, size); +#elif defined(__STDC_LIB_EXT1__) + // memset_s is meant to not be optimized away + memset_s(data, size, 0, size); +#elif defined(OPENSSL_VERSION_NUMBER) && OPENSSL_VERSION_NUMBER >= 0x30000000 + // rely on some implementation in OpenSSL cryptographic library + OPENSSL_cleanse(data, size); +#elif defined(__GLIBC__) && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 25)) + // explicit_bzero is meant to not be optimized away + explicit_bzero(data, size); +#else + // Volatile pointer to memset function is an attempt to avoid + // that the compiler optimizes away the memset function call. + // pretty much what OPENSSL_cleanse above does + // https://github.com/openssl/openssl/blob/3423c30db3aa044f46e1f0270e2ecd899415bf5f/crypto/mem_clr.c#L22 + static const volatile auto memset_v = &memset; + memset_v(data, 0, size); + +# if defined(__GNUC__) || defined(__clang__) + // __asm__ only supported by GCC and Clang + // not supported by MSVC on the ARM and x64 processors + // https://en.cppreference.com/w/c/language/asm.html + // https://en.cppreference.com/w/cpp/language/asm.html + + // Additional attempt on top of volatile memset_v above + // to avoid that the compiler optimizes away the memset function call. + // Assembler code that tells the compiler 'data' has side effects. + // https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html: + // - "volatile": the asm produces side effects + // - "memory": effectively forms a read/write memory barrier for the compiler + __asm__ __volatile__("" /* no actual code */ + : /* no output */ + : "r"(data) /* input */ + : "memory" /* memory side effects beyond input and output */); +# endif +#endif +} + +} // namespace arrow::util diff --git a/cpp/src/arrow/util/secure_string.h b/cpp/src/arrow/util/secure_string.h new file mode 100644 index 00000000000..537958f4aa8 --- /dev/null +++ b/cpp/src/arrow/util/secure_string.h @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "arrow/util/span.h" +#include "parquet/platform.h" + +namespace arrow::util { +/** + * A secure string that ensures the wrapped string is cleared from memory on + * deconstruction. This class can only be created from std::string that are securely + * erased after creation. + * + * Note: This class does not provide a constructor / assignment operator that copies a + * std::string because that would allow code to create a SecureString while accidentally + * not noticing the need to securely erasing the argument after invoking the constructor / + * calling the assignment operator. + */ +class PARQUET_EXPORT SecureString { + public: + SecureString() noexcept = default; + SecureString(SecureString&&) noexcept; + SecureString(const SecureString&) = default; + explicit SecureString(std::string&&) noexcept; + explicit SecureString(size_t, char) noexcept; + + SecureString& operator=(SecureString&&) noexcept; + SecureString& operator=(const SecureString&); + SecureString& operator=(std::string&& secret) noexcept; + + bool operator==(const SecureString&) const; + bool operator!=(const SecureString&) const; + + ~SecureString() { Dispose(); } + + [[nodiscard]] bool empty() const; + [[nodiscard]] std::size_t size() const; + [[nodiscard]] std::size_t length() const; + + [[nodiscard]] ::arrow::util::span as_span(); + [[nodiscard]] ::arrow::util::span as_span() const; + [[nodiscard]] std::string_view as_view() const; + + void Dispose(); + + static void SecureClear(std::string*); + static void SecureClear(uint8_t* data, size_t size); + + private: + std::string secret_; +}; + +} // namespace arrow::util diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc new file mode 100644 index 00000000000..9e04b698cdb --- /dev/null +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -0,0 +1,339 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include "arrow/util/secure_string.h" + +namespace arrow::util::test { + +std::string_view StringArea(const std::string& string) { + return {string.data(), string.capacity()}; +} + +void AssertSecurelyCleared(const std::string_view area) { + // the entire area is filled with zeros + std::string zeros(area.size(), '\0'); + ASSERT_EQ(area, std::string_view(zeros)); +} + +void AssertSecurelyCleared(const std::string& string) { + AssertSecurelyCleared(StringArea(string)); +} + +/** + * Checks the area has been securely cleared after some position. + */ +void AssertSecurelyCleared(const std::string_view area, const size_t pos) { + // the area after pos is filled with zeros + if (pos < area.size()) { + std::string zeros(area.size() - pos, '\0'); + ASSERT_EQ(area.substr(pos), std::string_view(zeros)); + } +} + +/** + * Checks the area has been securely cleared from the secret value. + * Assumes the area has been released, so it might have been reclaimed and changed after + * cleaning. We cannot check for all-zeros, best we can check here is no secret character + * has leaked. If by any chance the modification produced a former key character at the + * right position, this will be false negative / flaky. Therefore, we check for three + * consecutive secret characters before we fail. + */ +void AssertSecurelyCleared(const std::string_view area, const std::string& secret_value) { + auto leaks = 0; + for (size_t i = 0; i < secret_value.size(); i++) { + if (area[i] == secret_value[i]) { + leaks++; + } else { + if (leaks >= 3) { + break; + } + leaks = 0; + } + } + if (leaks >= 3) { + FAIL() << leaks << " characters of secret leaked into " << area; + } +} + +TEST(TestSecureString, SecureClearString) { + // short string + { + std::string tiny("abc"); + auto old_area = StringArea(tiny); + SecureString::SecureClear(&tiny); + AssertSecurelyCleared(tiny); + AssertSecurelyCleared(old_area); + } + + // long string + { + std::string large(1024, 'x'); + large.resize(512, 'y'); + auto old_area = StringArea(large); + SecureString::SecureClear(&large); + AssertSecurelyCleared(large); + AssertSecurelyCleared(old_area); + } + + // empty string + { + // this creates an empty string with some non-zero characters in the string buffer + // we test that all those characters are securely cleared + std::string empty("abcdef"); + empty.resize(0); + auto old_area = StringArea(empty); + SecureString::SecureClear(&empty); + AssertSecurelyCleared(empty); + AssertSecurelyCleared(old_area); + } +} + +TEST(TestSecureString, Construct) { + // move-constructing from a string securely clears that string + std::string string("hello world"); + auto old_string = StringArea(string); + SecureString secret_from_string(std::move(string)); + AssertSecurelyCleared(string); + AssertSecurelyCleared(old_string); + ASSERT_FALSE(secret_from_string.empty()); + + // move-constructing from a secure string securely clears that secure string + auto old_secret_from_string_view = secret_from_string.as_view(); + auto old_secret_from_string_value = std::string(secret_from_string.as_view()); + SecureString secret_from_move_secret(std::move(secret_from_string)); + ASSERT_TRUE(secret_from_string.empty()); + AssertSecurelyCleared(old_secret_from_string_view); + ASSERT_FALSE(secret_from_move_secret.empty()); + ASSERT_EQ(secret_from_move_secret.as_view(), + std::string_view(old_secret_from_string_value)); + + // copy-constructing from a secure string does not modify that secure string + SecureString secret_from_secret(secret_from_move_secret); + ASSERT_FALSE(secret_from_move_secret.empty()); + ASSERT_EQ(secret_from_move_secret.as_view(), + std::string_view(old_secret_from_string_value)); + ASSERT_FALSE(secret_from_secret.empty()); + ASSERT_EQ(secret_from_secret, secret_from_move_secret); +} + +TEST(TestSecureString, Assign) { + // we initialize with the first string and iteratively assign the subsequent values + // the first two values are local (15 chars and less), the remainder are non-local + // strings (larger than 15 chars) memory management of short and long strings behaves + // differently + std::vector test_strings = { + "secret", "another secret", "a much longer secret", std::string(1024, 'x')}; + + // assert test string configuration + ASSERT_GE(test_strings.size(), 4); + for (size_t i = 1; i < test_strings.size(); i++) { + // we expect first two strings to be local strings + if (i <= 1) { + ASSERT_LT(test_strings[i].size(), 15 / sizeof(char)); + } else { + ASSERT_GE(test_strings[i].size(), 15 / sizeof(char)); + } + // the strings are increasing in size + if (i > 0) { + ASSERT_TRUE(test_strings[i].size() > test_strings[i - 1].size()); + } + } + + std::vector reverse_strings = std::vector(test_strings); + reverse(reverse_strings.begin(), reverse_strings.end()); + + for (auto vec : {test_strings, reverse_strings}) { + auto init_string = vec[0]; + auto strings = std::vector(vec.begin() + 1, vec.end()); + + { + // an initialized secure string + std::string init_string_copy(init_string); + SecureString secret_from_string(std::move(init_string_copy)); + + // move-assigning from a string securely clears that string + // the earlier value of the secure string is securely cleared + for (auto string : strings) { + auto string_copy = std::string(string); + auto old_string_copy_area = StringArea(string_copy); + ASSERT_FALSE(string.empty()); + ASSERT_FALSE(string_copy.empty()); + auto old_secret_from_string_area = secret_from_string.as_view(); + auto old_secret_from_string_value = std::string(secret_from_string.as_view()); + + secret_from_string = std::move(string_copy); + + ASSERT_FALSE(string.empty()); + ASSERT_TRUE(string_copy.empty()); + AssertSecurelyCleared(string_copy); + auto secret_from_string_view = secret_from_string.as_view(); + // the secure string can reuse the string_copy's string buffer after assignment + // then, string_copy's string buffer is obviously not cleared + if (secret_from_string_view.data() != old_string_copy_area.data()) { + AssertSecurelyCleared(old_string_copy_area, string); + } + ASSERT_FALSE(secret_from_string.empty()); + ASSERT_EQ(secret_from_string.size(), string.size()); + ASSERT_EQ(secret_from_string.length(), string.length()); + ASSERT_EQ(secret_from_string_view, std::string_view(string)); + if (secret_from_string_view.data() == old_secret_from_string_area.data()) { + // when secure string reuses the buffer, the old value must be cleared + AssertSecurelyCleared(old_secret_from_string_area, secret_from_string.size()); + } else { + // when secure string has a new buffer, the old buffer must be cleared + AssertSecurelyCleared(old_secret_from_string_area, + old_secret_from_string_value); + } + } + } + + { + // an initialized secure string + std::string init_string_copy(init_string); + SecureString secret_from_move_secret(std::move(init_string_copy)); + + // move-assigning from a secure string securely clears that secure string + // the earlier value of the secure string is securely cleared + for (auto string : strings) { + auto string_copy = std::string(string); + SecureString secret_string(std::move(string_copy)); + ASSERT_FALSE(string.empty()); + ASSERT_TRUE(string_copy.empty()); + ASSERT_FALSE(secret_string.empty()); + auto old_secret_string_area = secret_string.as_view(); + auto old_secret_string_value = std::string(secret_string.as_view()); + auto old_secret_from_move_secret_area = secret_from_move_secret.as_view(); + auto old_secret_from_move_secret_value = + std::string(secret_from_move_secret.as_view()); + + secret_from_move_secret = std::move(secret_string); + + ASSERT_TRUE(secret_string.empty()); + // the secure string can reuse the string_copy's string buffer after assignment + // then, string_copy's string buffer is obviously not cleared + if (old_secret_string_area.data() != secret_from_move_secret.as_view().data()) { + AssertSecurelyCleared(old_secret_string_area, + old_secret_from_move_secret_value); + } + ASSERT_FALSE(secret_from_move_secret.empty()); + ASSERT_EQ(secret_from_move_secret.size(), string.size()); + ASSERT_EQ(secret_from_move_secret.length(), string.length()); + ASSERT_EQ(secret_from_move_secret.as_view(), std::string_view(string)); + if (old_secret_from_move_secret_area.data() == + secret_from_move_secret.as_view().data()) { + // when secure string reuses the buffer, the old value must be cleared + AssertSecurelyCleared(old_secret_from_move_secret_area, + secret_from_move_secret.size()); + } else { + // when secure string has a new buffer, the old buffer must be cleared + AssertSecurelyCleared(old_secret_from_move_secret_area, + old_secret_from_move_secret_value); + } + } + } + + { + // an initialized secure string + std::string init_string_copy(init_string); + SecureString secret_from_copy_secret(std::move(init_string_copy)); + + for (auto string : strings) { + // copy-assigning from a secure string does not modify that secure string + // the earlier value of the secure string is securely cleared + auto string_copy = std::string(string); + SecureString secret_string(std::move(string_copy)); + ASSERT_FALSE(string.empty()); + ASSERT_TRUE(string_copy.empty()); + ASSERT_FALSE(secret_string.empty()); + auto old_secret_from_copy_secret_area = secret_from_copy_secret.as_view(); + auto old_secret_from_copy_secret_value = + std::string(secret_from_copy_secret.as_view()); + + secret_from_copy_secret = secret_string; + + ASSERT_FALSE(secret_string.empty()); + ASSERT_FALSE(secret_from_copy_secret.empty()); + ASSERT_EQ(secret_from_copy_secret.size(), string.size()); + ASSERT_EQ(secret_from_copy_secret.length(), string.length()); + ASSERT_EQ(secret_from_copy_secret.as_view(), std::string_view(string)); + if (old_secret_from_copy_secret_area.data() == + secret_from_copy_secret.as_view().data()) { + // when secure string reuses the buffer, the old value must be cleared + AssertSecurelyCleared(old_secret_from_copy_secret_area, + secret_from_copy_secret.size()); + } else { + // when secure string has a new buffer, the old buffer must be cleared + AssertSecurelyCleared(old_secret_from_copy_secret_area, + old_secret_from_copy_secret_value); + } + } + } + } +} + +TEST(TestSecureString, Compare) { + ASSERT_TRUE(SecureString("") == SecureString("")); + ASSERT_FALSE(SecureString("") != SecureString("")); + + ASSERT_TRUE(SecureString("hello world") == SecureString("hello world")); + ASSERT_FALSE(SecureString("hello world") != SecureString("hello world")); + + ASSERT_FALSE(SecureString("hello world") == SecureString("hello worlds")); + ASSERT_TRUE(SecureString("hello world") != SecureString("hello worlds")); +} + +TEST(TestSecureString, Cardinality) { + ASSERT_TRUE(SecureString("").empty()); + ASSERT_EQ(SecureString("").size(), 0); + ASSERT_EQ(SecureString("").length(), 0); + + ASSERT_FALSE(SecureString("hello world").empty()); + ASSERT_EQ(SecureString("hello world").size(), 11); + ASSERT_EQ(SecureString("hello world").length(), 11); +} + +TEST(TestSecureString, AsSpan) { + SecureString secret("hello world"); + const SecureString& const_secret(secret); + auto const_span = const_secret.as_span(); + auto mutual_span = secret.as_span(); + + std::string expected = "hello world"; + ::arrow::util::span expected_span = {reinterpret_cast(expected.data()), + expected.size()}; + ASSERT_EQ(const_span, expected_span); + ASSERT_EQ(mutual_span, expected_span); + + // modify secret through mutual span + // the const span shares the same secret, so it is changed as well + mutual_span[0] = 'H'; + expected_span[0] = 'H'; + ASSERT_EQ(const_span, expected_span); + ASSERT_EQ(mutual_span, expected_span); +} + +TEST(TestSecureString, AsView) { + const SecureString secret = SecureString("hello world"); + const std::string_view view = secret.as_view(); + ASSERT_EQ(view, "hello world"); +} + +} // namespace arrow::util::test From 219d207f97152a5017cc61c0f925dc4387adc6f7 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 28 May 2025 15:32:47 +0200 Subject: [PATCH 18/44] Move to arrow::util::SecureString --- .../arrow/parquet_column_encryption.cc | 7 +- .../low_level_api/encryption_reader_writer.cc | 7 +- ...yption_reader_writer_all_crypto_options.cc | 7 +- .../dataset/file_parquet_encryption_test.cc | 9 +- cpp/src/parquet/CMakeLists.txt | 4 +- cpp/src/parquet/encryption/crypto_factory.cc | 3 + cpp/src/parquet/encryption/encryption.cc | 29 +- cpp/src/parquet/encryption/encryption.h | 62 ++-- .../parquet/encryption/file_key_unwrapper.cc | 2 + .../parquet/encryption/file_key_unwrapper.h | 5 +- .../parquet/encryption/file_key_wrapper.cc | 2 + cpp/src/parquet/encryption/file_key_wrapper.h | 2 +- .../encryption/internal_file_decryptor.cc | 21 +- .../encryption/internal_file_decryptor.h | 18 +- .../encryption/internal_file_encryptor.cc | 18 +- .../encryption/internal_file_encryptor.h | 4 +- .../parquet/encryption/key_encryption_key.h | 8 +- cpp/src/parquet/encryption/key_toolkit.h | 10 +- .../encryption/key_toolkit_internal.cc | 3 + .../parquet/encryption/key_toolkit_internal.h | 10 +- cpp/src/parquet/encryption/kms_client.h | 8 +- .../encryption/local_wrap_kms_client.cc | 2 + .../encryption/local_wrap_kms_client.h | 13 +- cpp/src/parquet/encryption/secure_string.cc | 145 -------- cpp/src/parquet/encryption/secure_string.h | 71 ---- .../parquet/encryption/secure_string_test.cc | 339 ------------------ .../parquet/encryption/test_encryption_util.h | 2 + .../parquet/encryption/test_in_memory_kms.cc | 3 + .../parquet/encryption/test_in_memory_kms.h | 30 +- cpp/src/parquet/metadata.cc | 4 +- 30 files changed, 165 insertions(+), 683 deletions(-) delete mode 100644 cpp/src/parquet/encryption/secure_string.cc delete mode 100644 cpp/src/parquet/encryption/secure_string.h delete mode 100644 cpp/src/parquet/encryption/secure_string_test.cc diff --git a/cpp/examples/arrow/parquet_column_encryption.cc b/cpp/examples/arrow/parquet_column_encryption.cc index 573b5e55b38..7b2d5d80bf4 100644 --- a/cpp/examples/arrow/parquet_column_encryption.cc +++ b/cpp/examples/arrow/parquet_column_encryption.cc @@ -19,6 +19,7 @@ #include "arrow/dataset/file_parquet.h" #include "arrow/dataset/parquet_encryption_config.h" #include "arrow/filesystem/localfs.h" +#include "arrow/util/secure_string.h" #include "parquet/encryption/crypto_factory.h" #include "parquet/encryption/test_in_memory_kms.h" @@ -106,9 +107,9 @@ arrow::Result> GetTable() { std::shared_ptr GetCryptoFactory() { // Configure KMS. - std::unordered_map key_map; - key_map.emplace("footerKeyId", parquet::encryption::SecureString("0123456789012345")); - key_map.emplace("columnKeyId", parquet::encryption::SecureString("1234567890123456")); + std::unordered_map key_map; + key_map.emplace("footerKeyId", arrow::util::SecureString("0123456789012345")); + key_map.emplace("columnKeyId", arrow::util::SecureString("1234567890123456")); auto crypto_factory = std::make_shared(); auto kms_client_factory = diff --git a/cpp/examples/parquet/low_level_api/encryption_reader_writer.cc b/cpp/examples/parquet/low_level_api/encryption_reader_writer.cc index 32991446419..6e488396a80 100644 --- a/cpp/examples/parquet/low_level_api/encryption_reader_writer.cc +++ b/cpp/examples/parquet/low_level_api/encryption_reader_writer.cc @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include #include #include @@ -39,9 +40,9 @@ constexpr int NUM_ROWS_PER_ROW_GROUP = 500; const char* PARQUET_FILENAME = "parquet_cpp_example.parquet.encrypted"; -const parquet::encryption::SecureString kFooterEncryptionKey("0123456789012345"); -const parquet::encryption::SecureString kColumnEncryptionKey1("1234567890123450"); -const parquet::encryption::SecureString kColumnEncryptionKey2("1234567890123451"); +const arrow::util::SecureString kFooterEncryptionKey("0123456789012345"); +const arrow::util::SecureString kColumnEncryptionKey1("1234567890123450"); +const arrow::util::SecureString kColumnEncryptionKey2("1234567890123451"); int main(int argc, char** argv) { /********************************************************************************** diff --git a/cpp/examples/parquet/low_level_api/encryption_reader_writer_all_crypto_options.cc b/cpp/examples/parquet/low_level_api/encryption_reader_writer_all_crypto_options.cc index d10bdcf39d5..6d714316c99 100644 --- a/cpp/examples/parquet/low_level_api/encryption_reader_writer_all_crypto_options.cc +++ b/cpp/examples/parquet/low_level_api/encryption_reader_writer_all_crypto_options.cc @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -92,9 +93,9 @@ constexpr int NUM_ROWS_PER_ROW_GROUP = 500; -const parquet::encryption::SecureString kFooterEncryptionKey("0123456789012345"); -const parquet::encryption::SecureString kColumnEncryptionKey1("1234567890123450"); -const parquet::encryption::SecureString kColumnEncryptionKey2("1234567890123451"); +const arrow::util::SecureString kFooterEncryptionKey("0123456789012345"); +const arrow::util::SecureString kColumnEncryptionKey1("1234567890123450"); +const arrow::util::SecureString kColumnEncryptionKey2("1234567890123451"); const char* fileName = "tester"; using FileClass = ::arrow::io::FileOutputStream; diff --git a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc index 5e0663ff54f..91d813530d4 100644 --- a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc +++ b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc @@ -34,6 +34,7 @@ #include "arrow/testing/random.h" #include "arrow/type.h" #include "arrow/util/future.h" +#include "arrow/util/secure_string.h" #include "arrow/util/thread_pool.h" #include "parquet/arrow/reader.h" #include "parquet/encryption/crypto_factory.h" @@ -41,11 +42,13 @@ #include "parquet/encryption/kms_client.h" #include "parquet/encryption/test_in_memory_kms.h" -const parquet::encryption::SecureString kFooterKeyMasterKey("0123456789012345"); +using arrow::util::SecureString; + +const SecureString kFooterKeyMasterKey("0123456789012345"); constexpr std::string_view kFooterKeyMasterKeyId = "footer_key"; constexpr std::string_view kFooterKeyName = "footer_key"; -const parquet::encryption::SecureString kColumnMasterKey("1234567890123450"); +const SecureString kColumnMasterKey("1234567890123450"); constexpr std::string_view kColumnMasterKeyId = "col_key"; constexpr std::string_view kColumnKeyMapping = "col_key: a"; @@ -107,7 +110,7 @@ class DatasetEncryptionTestBase : public testing::TestWithParam key_map; + std::unordered_map key_map; key_map.emplace(kColumnMasterKeyId, kColumnMasterKey); key_map.emplace(kFooterKeyMasterKeyId, kFooterKeyMasterKey); diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index bcfc20c8a07..043f3cfbbd7 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -253,8 +253,7 @@ if(PARQUET_REQUIRE_ENCRYPTION) encryption/key_metadata.cc encryption/key_toolkit.cc encryption/key_toolkit_internal.cc - encryption/local_wrap_kms_client.cc - encryption/secure_string.cc) + encryption/local_wrap_kms_client.cc) else() set(PARQUET_SRCS ${PARQUET_SRCS} encryption/encryption_internal_nossl.cc) endif() @@ -422,7 +421,6 @@ if(PARQUET_REQUIRE_ENCRYPTION) encryption/write_configurations_test.cc encryption/read_configurations_test.cc encryption/properties_test.cc - encryption/secure_string_test.cc encryption/test_encryption_util.cc) add_parquet_test(encryption-key-management-test SOURCES diff --git a/cpp/src/parquet/encryption/crypto_factory.cc b/cpp/src/parquet/encryption/crypto_factory.cc index d318d8c899f..03c16e6a3ba 100644 --- a/cpp/src/parquet/encryption/crypto_factory.cc +++ b/cpp/src/parquet/encryption/crypto_factory.cc @@ -18,6 +18,7 @@ #include #include "arrow/util/logging.h" +#include "arrow/util/secure_string.h" #include "arrow/util/string.h" #include "parquet/encryption/crypto_factory.h" @@ -26,6 +27,8 @@ #include "parquet/encryption/file_system_key_material_store.h" #include "parquet/encryption/key_toolkit_internal.h" +using arrow::util::SecureString; + namespace parquet::encryption { void CryptoFactory::RegisterKmsClientFactory( diff --git a/cpp/src/parquet/encryption/encryption.cc b/cpp/src/parquet/encryption/encryption.cc index 1fce188c7fc..06734079cbf 100644 --- a/cpp/src/parquet/encryption/encryption.cc +++ b/cpp/src/parquet/encryption/encryption.cc @@ -26,24 +26,26 @@ #include "arrow/util/utf8.h" #include "parquet/encryption/encryption_internal.h" +using ::arrow::util::SecureString; + namespace parquet { // integer key retriever -void IntegerKeyIdRetriever::PutKey(uint32_t key_id, encryption::SecureString key) { +void IntegerKeyIdRetriever::PutKey(uint32_t key_id, SecureString key) { key_map_.insert({key_id, std::move(key)}); } // string key retriever -void StringKeyIdRetriever::PutKey(std::string key_id, encryption::SecureString key) { +void StringKeyIdRetriever::PutKey(std::string key_id, SecureString key) { key_map_.insert({std::move(key_id), std::move(key)}); } -encryption::SecureString StringKeyIdRetriever::GetKey(const std::string& key_id) { +SecureString StringKeyIdRetriever::GetKey(const std::string& key_id) { return key_map_.at(key_id); } ColumnEncryptionProperties::Builder* ColumnEncryptionProperties::Builder::key( - encryption::SecureString column_key) { + SecureString column_key) { if (column_key.empty()) return this; DCHECK(key_.empty()); @@ -85,7 +87,7 @@ FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::column_key } FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::footer_key( - encryption::SecureString footer_key) { + SecureString footer_key) { if (footer_key.empty()) { return this; } @@ -123,7 +125,7 @@ FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::aad_prefix } ColumnDecryptionProperties::Builder* ColumnDecryptionProperties::Builder::key( - encryption::SecureString key) { + SecureString key) { if (key.empty()) return this; DCHECK(key_.empty()); @@ -176,7 +178,7 @@ FileEncryptionProperties::Builder::disable_aad_prefix_storage() { ColumnEncryptionProperties::ColumnEncryptionProperties(bool encrypted, std::string column_path, - encryption::SecureString key, + SecureString key, std::string key_metadata) : column_path_(std::move(column_path)), encrypted_(encrypted), @@ -196,7 +198,7 @@ ColumnEncryptionProperties::ColumnEncryptionProperties(bool encrypted, } ColumnDecryptionProperties::ColumnDecryptionProperties(std::string column_path, - encryption::SecureString key) + SecureString key) : column_path_(std::move(column_path)), key_(std::move(key)) { DCHECK(!column_path_.empty()); @@ -205,7 +207,7 @@ ColumnDecryptionProperties::ColumnDecryptionProperties(std::string column_path, } } -const encryption::SecureString& FileDecryptionProperties::column_key( +const SecureString& FileDecryptionProperties::column_key( const std::string& column_path) const { if (column_decryption_properties_.find(column_path) != column_decryption_properties_.end()) { @@ -218,8 +220,7 @@ const encryption::SecureString& FileDecryptionProperties::column_key( } FileDecryptionProperties::FileDecryptionProperties( - encryption::SecureString footer_key, - std::shared_ptr key_retriever, + SecureString footer_key, std::shared_ptr key_retriever, bool check_plaintext_footer_integrity, std::string aad_prefix, std::shared_ptr aad_prefix_verifier, ColumnPathToDecryptionPropertiesMap column_decryption_properties, @@ -272,9 +273,9 @@ FileEncryptionProperties::column_encryption_properties(const std::string& column } FileEncryptionProperties::FileEncryptionProperties( - ParquetCipher::type cipher, encryption::SecureString footer_key, - std::string footer_key_metadata, bool encrypted_footer, std::string aad_prefix, - bool store_aad_prefix_in_file, ColumnPathToEncryptionPropertiesMap encrypted_columns) + ParquetCipher::type cipher, SecureString footer_key, std::string footer_key_metadata, + bool encrypted_footer, std::string aad_prefix, bool store_aad_prefix_in_file, + ColumnPathToEncryptionPropertiesMap encrypted_columns) : footer_key_(std::move(footer_key)), footer_key_metadata_(std::move(footer_key_metadata)), encrypted_footer_(encrypted_footer), diff --git a/cpp/src/parquet/encryption/encryption.h b/cpp/src/parquet/encryption/encryption.h index 9656b0aa556..7e681c28d1a 100644 --- a/cpp/src/parquet/encryption/encryption.h +++ b/cpp/src/parquet/encryption/encryption.h @@ -23,7 +23,7 @@ #include #include -#include "parquet/encryption/secure_string.h" +#include "arrow/util/secure_string.h" #include "parquet/exception.h" #include "parquet/schema.h" #include "parquet/types.h" @@ -48,15 +48,15 @@ using ColumnPathToEncryptionPropertiesMap = class PARQUET_EXPORT DecryptionKeyRetriever { public: - virtual encryption::SecureString GetKey(const std::string& key_metadata) = 0; + virtual ::arrow::util::SecureString GetKey(const std::string& key_metadata) = 0; virtual ~DecryptionKeyRetriever() {} }; /// Simple integer key retriever class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever { public: - void PutKey(uint32_t key_id, encryption::SecureString key); - encryption::SecureString GetKey(const std::string& key_metadata) override { + void PutKey(uint32_t key_id, ::arrow::util::SecureString key); + ::arrow::util::SecureString GetKey(const std::string& key_metadata) override { // key_metadata is string but for IntegerKeyIdRetriever it encodes // a native-endian 32 bit unsigned integer key_id uint32_t key_id; @@ -66,20 +66,20 @@ class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever { return GetKey(key_id); } - encryption::SecureString GetKey(uint32_t key_id) { return key_map_.at(key_id); } + ::arrow::util::SecureString GetKey(uint32_t key_id) { return key_map_.at(key_id); } private: - std::map key_map_; + std::map key_map_; }; // Simple string key retriever class PARQUET_EXPORT StringKeyIdRetriever : public DecryptionKeyRetriever { public: - void PutKey(std::string key_id, encryption::SecureString key); - encryption::SecureString GetKey(const std::string& key_metadata) override; + void PutKey(std::string key_id, ::arrow::util::SecureString key); + ::arrow::util::SecureString GetKey(const std::string& key_metadata) override; private: - std::map key_map_; + std::map key_map_; }; class PARQUET_EXPORT HiddenColumnException : public ParquetException { @@ -125,7 +125,7 @@ class PARQUET_EXPORT ColumnEncryptionProperties { /// be encrypted with the footer key. /// keyBytes Key length must be either 16, 24 or 32 bytes. /// Caller is responsible for wiping out the input key array. - Builder* key(encryption::SecureString column_key); + Builder* key(::arrow::util::SecureString column_key); /// Set a key retrieval metadata. /// use either key_metadata() or key_id(), not both @@ -145,7 +145,7 @@ class PARQUET_EXPORT ColumnEncryptionProperties { private: const std::string column_path_; bool encrypted_; - encryption::SecureString key_; + ::arrow::util::SecureString key_; std::string key_metadata_; Builder(const std::string path, bool encrypted) @@ -155,7 +155,7 @@ class PARQUET_EXPORT ColumnEncryptionProperties { const std::string& column_path() const { return column_path_; } bool is_encrypted() const { return encrypted_; } bool is_encrypted_with_footer_key() const { return encrypted_with_footer_key_; } - const encryption::SecureString& key() const { return key_; } + const ::arrow::util::SecureString& key() const { return key_; } const std::string& key_metadata() const { return key_metadata_; } ColumnEncryptionProperties() = default; @@ -166,10 +166,10 @@ class PARQUET_EXPORT ColumnEncryptionProperties { const std::string column_path_; bool encrypted_; bool encrypted_with_footer_key_; - encryption::SecureString key_; + ::arrow::util::SecureString key_; std::string key_metadata_; explicit ColumnEncryptionProperties(bool encrypted, std::string column_path, - encryption::SecureString key, + ::arrow::util::SecureString key, std::string key_metadata); }; @@ -186,13 +186,13 @@ class PARQUET_EXPORT ColumnDecryptionProperties { /// key metadata for this column the metadata will be ignored, /// the column will be decrypted with this key. /// key length must be either 16, 24 or 32 bytes. - Builder* key(encryption::SecureString key); + Builder* key(::arrow::util::SecureString key); std::shared_ptr build(); private: const std::string column_path_; - encryption::SecureString key_; + ::arrow::util::SecureString key_; }; ColumnDecryptionProperties() = default; @@ -200,17 +200,17 @@ class PARQUET_EXPORT ColumnDecryptionProperties { ColumnDecryptionProperties(ColumnDecryptionProperties&& other) = default; const std::string& column_path() const { return column_path_; } - const encryption::SecureString& key() const { return key_; } + const ::arrow::util::SecureString& key() const { return key_; } private: const std::string column_path_; - encryption::SecureString key_; + ::arrow::util::SecureString key_; /// This class is only required for setting explicit column decryption keys - /// to override key retriever (or to provide keys when key metadata and/or /// key retriever are not available) explicit ColumnDecryptionProperties(std::string column_path, - encryption::SecureString key); + ::arrow::util::SecureString key); }; class PARQUET_EXPORT AADPrefixVerifier { @@ -245,7 +245,7 @@ class PARQUET_EXPORT FileDecryptionProperties { /// will be wiped out (array values set to 0). /// Caller is responsible for wiping out the input key array. /// param footerKey Key length must be either 16, 24 or 32 bytes. - Builder* footer_key(encryption::SecureString footer_key); + Builder* footer_key(::arrow::util::SecureString footer_key); /// Set explicit column keys (decryption properties). /// Its also possible to set a key retriever on this property object. @@ -302,7 +302,7 @@ class PARQUET_EXPORT FileDecryptionProperties { } private: - encryption::SecureString footer_key_; + ::arrow::util::SecureString footer_key_; std::string aad_prefix_; std::shared_ptr aad_prefix_verifier_; ColumnPathToDecryptionPropertiesMap column_decryption_properties_; @@ -312,9 +312,9 @@ class PARQUET_EXPORT FileDecryptionProperties { bool plaintext_files_allowed_; }; - const encryption::SecureString& column_key(const std::string& column_path) const; + const ::arrow::util::SecureString& column_key(const std::string& column_path) const; - const encryption::SecureString& footer_key() const { return footer_key_; } + const ::arrow::util::SecureString& footer_key() const { return footer_key_; } const std::string& aad_prefix() const { return aad_prefix_; } @@ -333,13 +333,13 @@ class PARQUET_EXPORT FileDecryptionProperties { } private: - encryption::SecureString footer_key_; + ::arrow::util::SecureString footer_key_; std::string aad_prefix_; std::shared_ptr aad_prefix_verifier_; // any empty SecureString key is interpreted as if no key is given // this instance is used if a SecureString reference is returned - const encryption::SecureString no_key_ = encryption::SecureString(); + const ::arrow::util::SecureString no_key_ = ::arrow::util::SecureString(); ColumnPathToDecryptionPropertiesMap column_decryption_properties_; std::shared_ptr key_retriever_; @@ -347,7 +347,7 @@ class PARQUET_EXPORT FileDecryptionProperties { bool plaintext_files_allowed_; FileDecryptionProperties( - encryption::SecureString footer_key, + ::arrow::util::SecureString footer_key, std::shared_ptr key_retriever, bool check_plaintext_footer_integrity, std::string aad_prefix, std::shared_ptr aad_prefix_verifier, @@ -359,7 +359,7 @@ class PARQUET_EXPORT FileEncryptionProperties { public: class PARQUET_EXPORT Builder { public: - explicit Builder(encryption::SecureString footer_key) + explicit Builder(::arrow::util::SecureString footer_key) : parquet_cipher_(kDefaultEncryptionAlgorithm), encrypted_footer_(kDefaultEncryptedFooter), footer_key_(std::move(footer_key)) { @@ -410,7 +410,7 @@ class PARQUET_EXPORT FileEncryptionProperties { private: ParquetCipher::type parquet_cipher_; bool encrypted_footer_; - encryption::SecureString footer_key_; + ::arrow::util::SecureString footer_key_; std::string footer_key_metadata_; std::string aad_prefix_; @@ -422,7 +422,7 @@ class PARQUET_EXPORT FileEncryptionProperties { EncryptionAlgorithm algorithm() const { return algorithm_; } - const encryption::SecureString& footer_key() const { return footer_key_; } + const ::arrow::util::SecureString& footer_key() const { return footer_key_; } const std::string& footer_key_metadata() const { return footer_key_metadata_; } @@ -437,7 +437,7 @@ class PARQUET_EXPORT FileEncryptionProperties { private: EncryptionAlgorithm algorithm_; - encryption::SecureString footer_key_; + ::arrow::util::SecureString footer_key_; std::string footer_key_metadata_; bool encrypted_footer_; std::string file_aad_; @@ -446,7 +446,7 @@ class PARQUET_EXPORT FileEncryptionProperties { ColumnPathToEncryptionPropertiesMap encrypted_columns_; FileEncryptionProperties(ParquetCipher::type cipher, - encryption::SecureString footer_key, + ::arrow::util::SecureString footer_key, std::string footer_key_metadata, bool encrypted_footer, std::string aad_prefix, bool store_aad_prefix_in_file, ColumnPathToEncryptionPropertiesMap encrypted_columns); diff --git a/cpp/src/parquet/encryption/file_key_unwrapper.cc b/cpp/src/parquet/encryption/file_key_unwrapper.cc index 25862f150e5..4dc1492a0b7 100644 --- a/cpp/src/parquet/encryption/file_key_unwrapper.cc +++ b/cpp/src/parquet/encryption/file_key_unwrapper.cc @@ -22,6 +22,8 @@ #include "parquet/encryption/file_key_unwrapper.h" #include "parquet/encryption/key_metadata.h" +using ::arrow::util::SecureString; + namespace parquet::encryption { FileKeyUnwrapper::FileKeyUnwrapper( diff --git a/cpp/src/parquet/encryption/file_key_unwrapper.h b/cpp/src/parquet/encryption/file_key_unwrapper.h index 5ce877ae6f7..d2c35d54b88 100644 --- a/cpp/src/parquet/encryption/file_key_unwrapper.h +++ b/cpp/src/parquet/encryption/file_key_unwrapper.h @@ -18,6 +18,7 @@ #pragma once #include "arrow/util/concurrent_map.h" +#include "arrow/util/secure_string.h" #include "parquet/encryption/encryption.h" #include "parquet/encryption/file_system_key_material_store.h" @@ -64,7 +65,7 @@ class PARQUET_EXPORT FileKeyUnwrapper : public DecryptionKeyRetriever { std::shared_ptr key_material_store); /// Get the data key from key metadata - SecureString GetKey(const std::string& key_metadata) override; + ::arrow::util::SecureString GetKey(const std::string& key_metadata) override; /// Get the data key along with the master key id from key material KeyWithMasterId GetDataEncryptionKey(const KeyMaterial& key_material); @@ -81,7 +82,7 @@ class PARQUET_EXPORT FileKeyUnwrapper : public DecryptionKeyRetriever { const KeyMaterial& key_material); /// A map of Key Encryption Key (KEK) ID -> KEK bytes, for the current token - std::shared_ptr<::arrow::util::ConcurrentMap> + std::shared_ptr<::arrow::util::ConcurrentMap> kek_per_kek_id_; std::shared_ptr key_toolkit_owner_; KeyToolkit* key_toolkit_; diff --git a/cpp/src/parquet/encryption/file_key_wrapper.cc b/cpp/src/parquet/encryption/file_key_wrapper.cc index affa68e9edd..fd870ed1f3b 100644 --- a/cpp/src/parquet/encryption/file_key_wrapper.cc +++ b/cpp/src/parquet/encryption/file_key_wrapper.cc @@ -22,6 +22,8 @@ #include "parquet/encryption/key_toolkit_internal.h" #include "parquet/exception.h" +using ::arrow::util::SecureString; + namespace parquet::encryption { FileKeyWrapper::FileKeyWrapper(KeyToolkit* key_toolkit, diff --git a/cpp/src/parquet/encryption/file_key_wrapper.h b/cpp/src/parquet/encryption/file_key_wrapper.h index 97fa2cf6011..aa6d878bafe 100644 --- a/cpp/src/parquet/encryption/file_key_wrapper.h +++ b/cpp/src/parquet/encryption/file_key_wrapper.h @@ -61,7 +61,7 @@ class PARQUET_EXPORT FileKeyWrapper { /// When external key material is used, an identifier is usually generated automatically /// but may be specified explicitly to support key rotation, /// which requires keeping the same identifiers. - std::string GetEncryptionKeyMetadata(const SecureString& data_key, + std::string GetEncryptionKeyMetadata(const ::arrow::util::SecureString& data_key, const std::string& master_key_id, bool is_footer_key, std::string key_id_in_file = ""); diff --git a/cpp/src/parquet/encryption/internal_file_decryptor.cc b/cpp/src/parquet/encryption/internal_file_decryptor.cc index ec66f424076..b90d3158559 100644 --- a/cpp/src/parquet/encryption/internal_file_decryptor.cc +++ b/cpp/src/parquet/encryption/internal_file_decryptor.cc @@ -18,15 +18,18 @@ #include "parquet/encryption/internal_file_decryptor.h" #include "arrow/util/logging.h" +#include "arrow/util/secure_string.h" #include "parquet/encryption/encryption.h" #include "parquet/encryption/encryption_internal.h" #include "parquet/metadata.h" +using arrow::util::SecureString; + namespace parquet { // Decryptor Decryptor::Decryptor(std::unique_ptr aes_decryptor, - encryption::SecureString key, std::string file_aad, std::string aad, + SecureString key, std::string file_aad, std::string aad, ::arrow::MemoryPool* pool) : aes_decryptor_(std::move(aes_decryptor)), key_(std::move(key)), @@ -60,7 +63,7 @@ InternalFileDecryptor::InternalFileDecryptor( footer_key_metadata_(footer_key_metadata), pool_(pool) {} -const encryption::SecureString& InternalFileDecryptor::GetFooterKey() { +const SecureString& InternalFileDecryptor::GetFooterKey() { std::unique_lock lock(mutex_); if (!footer_key_.empty()) { return footer_key_; @@ -98,7 +101,7 @@ std::unique_ptr InternalFileDecryptor::GetFooterDecryptor() { std::unique_ptr InternalFileDecryptor::GetFooterDecryptor( const std::string& aad, bool metadata) { - const encryption::SecureString& footer_key = GetFooterKey(); + const SecureString& footer_key = GetFooterKey(); auto key_len = static_cast(footer_key.size()); auto aes_decryptor = encryption::AesDecryptor::Make(algorithm_, key_len, metadata); @@ -106,9 +109,9 @@ std::unique_ptr InternalFileDecryptor::GetFooterDecryptor( pool_); } -encryption::SecureString InternalFileDecryptor::GetColumnKey( - const std::string& column_path, const std::string& column_key_metadata) { - encryption::SecureString column_key = properties_->column_key(column_path); +SecureString InternalFileDecryptor::GetColumnKey(const std::string& column_path, + const std::string& column_key_metadata) { + SecureString column_key = properties_->column_key(column_path); // No explicit column key given via API. Retrieve via key metadata. if (column_key.empty() && !column_key_metadata.empty() && @@ -130,8 +133,7 @@ encryption::SecureString InternalFileDecryptor::GetColumnKey( std::unique_ptr InternalFileDecryptor::GetColumnDecryptor( const std::string& column_path, const std::string& column_key_metadata, const std::string& aad, bool metadata) { - const encryption::SecureString& column_key = - GetColumnKey(column_path, column_key_metadata); + const SecureString& column_key = GetColumnKey(column_path, column_key_metadata); auto key_len = static_cast(column_key.size()); auto aes_decryptor = encryption::AesDecryptor::Make(algorithm_, key_len, metadata); return std::make_unique(std::move(aes_decryptor), column_key, file_aad_, aad, @@ -148,8 +150,7 @@ InternalFileDecryptor::GetColumnDecryptorFactory( // The column is encrypted with its own key const std::string& column_key_metadata = crypto_metadata->key_metadata(); const std::string column_path = crypto_metadata->path_in_schema()->ToDotString(); - const encryption::SecureString& column_key = - GetColumnKey(column_path, column_key_metadata); + const SecureString& column_key = GetColumnKey(column_path, column_key_metadata); return [this, aad, metadata, column_key = column_key]() { auto key_len = static_cast(column_key.size()); diff --git a/cpp/src/parquet/encryption/internal_file_decryptor.h b/cpp/src/parquet/encryption/internal_file_decryptor.h index 14491e1dc2f..a365b4df4bf 100644 --- a/cpp/src/parquet/encryption/internal_file_decryptor.h +++ b/cpp/src/parquet/encryption/internal_file_decryptor.h @@ -22,7 +22,7 @@ #include #include -#include "parquet/encryption/secure_string.h" +#include "arrow/util/secure_string.h" #include "parquet/schema.h" namespace parquet { @@ -42,7 +42,7 @@ class FileDecryptionProperties; class PARQUET_EXPORT Decryptor { public: Decryptor(std::unique_ptr decryptor, - encryption::SecureString key, std::string file_aad, std::string aad, + ::arrow::util::SecureString key, std::string file_aad, std::string aad, ::arrow::MemoryPool* pool); ~Decryptor(); @@ -57,7 +57,7 @@ class PARQUET_EXPORT Decryptor { private: std::unique_ptr aes_decryptor_; - encryption::SecureString key_; + ::arrow::util::SecureString key_; std::string file_aad_; std::string aad_; ::arrow::MemoryPool* pool_; @@ -73,7 +73,7 @@ class InternalFileDecryptor { const std::string& file_aad() const { return file_aad_; } - const encryption::SecureString& GetFooterKey(); + const ::arrow::util::SecureString& GetFooterKey(); ParquetCipher::type algorithm() const { return algorithm_; } @@ -129,13 +129,13 @@ class InternalFileDecryptor { // Protects footer_key_ updates std::mutex mutex_; - encryption::SecureString footer_key_; + ::arrow::util::SecureString footer_key_; - encryption::SecureString GetColumnKey(const std::string& column_path, - const std::string& column_key_metadata); + ::arrow::util::SecureString GetColumnKey(const std::string& column_path, + const std::string& column_key_metadata); - static encryption::SecureString RetrieveColumnKeyIfEmpty( - encryption::SecureString column_key, const std::string& column_key_metadata, + static ::arrow::util::SecureString RetrieveColumnKeyIfEmpty( + ::arrow::util::SecureString column_key, const std::string& column_key_metadata, const std::shared_ptr& key_retriever); std::unique_ptr GetFooterDecryptor(const std::string& aad, bool metadata); diff --git a/cpp/src/parquet/encryption/internal_file_encryptor.cc b/cpp/src/parquet/encryption/internal_file_encryptor.cc index 43d3ace53e8..3623aa05c66 100644 --- a/cpp/src/parquet/encryption/internal_file_encryptor.cc +++ b/cpp/src/parquet/encryption/internal_file_encryptor.cc @@ -16,15 +16,17 @@ // under the License. #include "parquet/encryption/internal_file_encryptor.h" +#include "arrow/util/secure_string.h" #include "parquet/encryption/encryption.h" #include "parquet/encryption/encryption_internal.h" +using arrow::util::SecureString; + namespace parquet { // Encryptor -Encryptor::Encryptor(encryption::AesEncryptor* aes_encryptor, - encryption::SecureString key, std::string file_aad, std::string aad, - ::arrow::MemoryPool* pool) +Encryptor::Encryptor(encryption::AesEncryptor* aes_encryptor, SecureString key, + std::string file_aad, std::string aad, ::arrow::MemoryPool* pool) : aes_encryptor_(aes_encryptor), key_(std::move(key)), file_aad_(std::move(file_aad)), @@ -52,7 +54,7 @@ std::shared_ptr InternalFileEncryptor::GetFooterEncryptor() { ParquetCipher::type algorithm = properties_->algorithm().algorithm; std::string footer_aad = encryption::CreateFooterAad(properties_->file_aad()); - const encryption::SecureString& footer_key = properties_->footer_key(); + const SecureString& footer_key = properties_->footer_key(); auto aes_encryptor = GetMetaAesEncryptor(algorithm, footer_key.size()); footer_encryptor_ = std::make_shared( aes_encryptor, footer_key, properties_->file_aad(), footer_aad, pool_); @@ -66,7 +68,7 @@ std::shared_ptr InternalFileEncryptor::GetFooterSigningEncryptor() { ParquetCipher::type algorithm = properties_->algorithm().algorithm; std::string footer_aad = encryption::CreateFooterAad(properties_->file_aad()); - const encryption::SecureString& footer_signing_key = properties_->footer_key(); + const SecureString& footer_signing_key = properties_->footer_key(); auto aes_encryptor = GetMetaAesEncryptor(algorithm, footer_signing_key.size()); footer_signing_encryptor_ = std::make_shared( aes_encryptor, footer_signing_key, properties_->file_aad(), footer_aad, pool_); @@ -101,9 +103,9 @@ InternalFileEncryptor::InternalFileEncryptor::GetColumnEncryptor( return nullptr; } - const encryption::SecureString& key = column_prop->is_encrypted_with_footer_key() - ? properties_->footer_key() - : column_prop->key(); + const SecureString& key = column_prop->is_encrypted_with_footer_key() + ? properties_->footer_key() + : column_prop->key(); ParquetCipher::type algorithm = properties_->algorithm().algorithm; auto aes_encryptor = metadata ? GetMetaAesEncryptor(algorithm, key.size()) diff --git a/cpp/src/parquet/encryption/internal_file_encryptor.h b/cpp/src/parquet/encryption/internal_file_encryptor.h index c37581a2a08..ee15fe32de9 100644 --- a/cpp/src/parquet/encryption/internal_file_encryptor.h +++ b/cpp/src/parquet/encryption/internal_file_encryptor.h @@ -36,7 +36,7 @@ class ColumnEncryptionProperties; class PARQUET_EXPORT Encryptor { public: - Encryptor(encryption::AesEncryptor* aes_encryptor, encryption::SecureString key, + Encryptor(encryption::AesEncryptor* aes_encryptor, ::arrow::util::SecureString key, std::string file_aad, std::string aad, ::arrow::MemoryPool* pool); const std::string& file_aad() { return file_aad_; } void UpdateAad(const std::string& aad) { aad_ = aad; } @@ -61,7 +61,7 @@ class PARQUET_EXPORT Encryptor { private: encryption::AesEncryptor* aes_encryptor_; - encryption::SecureString key_; + ::arrow::util::SecureString key_; std::string file_aad_; std::string aad_; ::arrow::MemoryPool* pool_; diff --git a/cpp/src/parquet/encryption/key_encryption_key.h b/cpp/src/parquet/encryption/key_encryption_key.h index 18cb9484c7e..1157937632a 100644 --- a/cpp/src/parquet/encryption/key_encryption_key.h +++ b/cpp/src/parquet/encryption/key_encryption_key.h @@ -21,7 +21,7 @@ #include #include "arrow/util/base64.h" -#include "parquet/encryption/secure_string.h" +#include "arrow/util/secure_string.h" namespace parquet::encryption { @@ -33,14 +33,14 @@ namespace parquet::encryption { // locally, and does not involve an interaction with a KMS server. class KeyEncryptionKey { public: - KeyEncryptionKey(SecureString kek_bytes, std::string kek_id, + KeyEncryptionKey(::arrow::util::SecureString kek_bytes, std::string kek_id, std::string encoded_wrapped_kek) : kek_bytes_(std::move(kek_bytes)), kek_id_(std::move(kek_id)), encoded_kek_id_(::arrow::util::base64_encode(kek_id_)), encoded_wrapped_kek_(std::move(encoded_wrapped_kek)) {} - const SecureString& kek_bytes() const { return kek_bytes_; } + const ::arrow::util::SecureString& kek_bytes() const { return kek_bytes_; } const std::string& kek_id() const { return kek_id_; } @@ -49,7 +49,7 @@ class KeyEncryptionKey { const std::string& encoded_wrapped_kek() const { return encoded_wrapped_kek_; } private: - SecureString kek_bytes_; + ::arrow::util::SecureString kek_bytes_; std::string kek_id_; std::string encoded_kek_id_; std::string encoded_wrapped_kek_; diff --git a/cpp/src/parquet/encryption/key_toolkit.h b/cpp/src/parquet/encryption/key_toolkit.h index be6991b7650..a0b929877ee 100644 --- a/cpp/src/parquet/encryption/key_toolkit.h +++ b/cpp/src/parquet/encryption/key_toolkit.h @@ -49,7 +49,7 @@ class PARQUET_EXPORT KeyToolkit { /// Key encryption key two level cache for unwrapping: token -> KeyEncryptionKeyId -> /// KeyEncryptionKeyBytes - TwoLevelCacheWithExpiration& kek_read_cache_per_token() { + TwoLevelCacheWithExpiration<::arrow::util::SecureString>& kek_read_cache_per_token() { return key_encryption_key_read_cache_; } @@ -82,7 +82,7 @@ class PARQUET_EXPORT KeyToolkit { private: TwoLevelCacheWithExpiration> kms_client_cache_; TwoLevelCacheWithExpiration key_encryption_key_write_cache_; - TwoLevelCacheWithExpiration key_encryption_key_read_cache_; + TwoLevelCacheWithExpiration<::arrow::util::SecureString> key_encryption_key_read_cache_; std::shared_ptr kms_client_factory_; mutable ::arrow::util::Mutex last_cache_clean_for_key_rotation_time_mutex_; internal::TimePoint last_cache_clean_for_key_rotation_time_; @@ -92,14 +92,14 @@ class PARQUET_EXPORT KeyToolkit { // parsing from "key material" class PARQUET_EXPORT KeyWithMasterId { public: - KeyWithMasterId(SecureString key_bytes, std::string master_id) + KeyWithMasterId(::arrow::util::SecureString key_bytes, std::string master_id) : key_bytes_(std::move(key_bytes)), master_id_(std::move(master_id)) {} - const SecureString& data_key() const { return key_bytes_; } + const ::arrow::util::SecureString& data_key() const { return key_bytes_; } const std::string& master_id() const { return master_id_; } private: - SecureString key_bytes_; + ::arrow::util::SecureString key_bytes_; std::string master_id_; }; diff --git a/cpp/src/parquet/encryption/key_toolkit_internal.cc b/cpp/src/parquet/encryption/key_toolkit_internal.cc index a1f4004c5ba..60a8a52206c 100644 --- a/cpp/src/parquet/encryption/key_toolkit_internal.cc +++ b/cpp/src/parquet/encryption/key_toolkit_internal.cc @@ -16,10 +16,13 @@ // under the License. #include "arrow/util/base64.h" +#include "arrow/util/secure_string.h" #include "parquet/encryption/encryption_internal.h" #include "parquet/encryption/key_toolkit_internal.h" +using arrow::util::SecureString; + namespace parquet::encryption::internal { // Acceptable key lengths in number of bits, used to validate the data key lengths diff --git a/cpp/src/parquet/encryption/key_toolkit_internal.h b/cpp/src/parquet/encryption/key_toolkit_internal.h index 1bdfcb73a21..d3a75cb8495 100644 --- a/cpp/src/parquet/encryption/key_toolkit_internal.h +++ b/cpp/src/parquet/encryption/key_toolkit_internal.h @@ -19,20 +19,22 @@ #include -#include "parquet/encryption/secure_string.h" +#include "arrow/util/secure_string.h" #include "parquet/platform.h" namespace parquet::encryption::internal { /// Encrypts "key" with "master_key", using AES-GCM and the "aad" PARQUET_EXPORT -std::string EncryptKeyLocally(const SecureString& key, const SecureString& master_key, +std::string EncryptKeyLocally(const ::arrow::util::SecureString& key, + const ::arrow::util::SecureString& master_key, const std::string& aad); /// Decrypts encrypted key with "master_key", using AES-GCM and the "aad" PARQUET_EXPORT -SecureString DecryptKeyLocally(const std::string& encoded_encrypted_key, - const SecureString& master_key, const std::string& aad); +::arrow::util::SecureString DecryptKeyLocally( + const std::string& encoded_encrypted_key, + const ::arrow::util::SecureString& master_key, const std::string& aad); PARQUET_EXPORT bool ValidateKeyLength(int32_t key_length_bits); diff --git a/cpp/src/parquet/encryption/kms_client.h b/cpp/src/parquet/encryption/kms_client.h index 4f224dda710..524476a64a8 100644 --- a/cpp/src/parquet/encryption/kms_client.h +++ b/cpp/src/parquet/encryption/kms_client.h @@ -22,8 +22,8 @@ #include #include "arrow/util/mutex.h" +#include "arrow/util/secure_string.h" -#include "parquet/encryption/secure_string.h" #include "parquet/exception.h" #include "parquet/platform.h" @@ -82,12 +82,12 @@ class PARQUET_EXPORT KmsClient { /// Wraps a key - encrypts it with the master key, encodes the result /// and potentially adds a KMS-specific metadata. - virtual std::string WrapKey(const SecureString& key_bytes, + virtual std::string WrapKey(const ::arrow::util::SecureString& key_bytes, const std::string& master_key_identifier) = 0; /// Decrypts (unwraps) a key with the master key. - virtual SecureString UnwrapKey(const std::string& wrapped_key, - const std::string& master_key_identifier) = 0; + virtual ::arrow::util::SecureString UnwrapKey( + const std::string& wrapped_key, const std::string& master_key_identifier) = 0; virtual ~KmsClient() {} }; diff --git a/cpp/src/parquet/encryption/local_wrap_kms_client.cc b/cpp/src/parquet/encryption/local_wrap_kms_client.cc index b0069c2b280..80543c2932a 100644 --- a/cpp/src/parquet/encryption/local_wrap_kms_client.cc +++ b/cpp/src/parquet/encryption/local_wrap_kms_client.cc @@ -17,6 +17,7 @@ #include "arrow/json/object_parser.h" #include "arrow/json/object_writer.h" +#include "arrow/util/secure_string.h" #include "parquet/encryption/key_toolkit_internal.h" #include "parquet/encryption/local_wrap_kms_client.h" @@ -24,6 +25,7 @@ using ::arrow::json::internal::ObjectParser; using ::arrow::json::internal::ObjectWriter; +using ::arrow::util::SecureString; namespace parquet::encryption { diff --git a/cpp/src/parquet/encryption/local_wrap_kms_client.h b/cpp/src/parquet/encryption/local_wrap_kms_client.h index 44fc6f03e05..607c75a4c2e 100644 --- a/cpp/src/parquet/encryption/local_wrap_kms_client.h +++ b/cpp/src/parquet/encryption/local_wrap_kms_client.h @@ -35,16 +35,16 @@ class PARQUET_EXPORT LocalWrapKmsClient : public KmsClient { explicit LocalWrapKmsClient(const KmsConnectionConfig& kms_connection_config); - std::string WrapKey(const SecureString& key_bytes, + std::string WrapKey(const ::arrow::util::SecureString& key_bytes, const std::string& master_key_identifier) override; - SecureString UnwrapKey(const std::string& wrapped_key, - const std::string& master_key_identifier) override; + ::arrow::util::SecureString UnwrapKey( + const std::string& wrapped_key, const std::string& master_key_identifier) override; protected: /// Get master key from the remote KMS server. /// Note: this function might be called by multiple threads - virtual const SecureString& GetMasterKeyFromServer( + virtual const ::arrow::util::SecureString& GetMasterKeyFromServer( const std::string& master_key_identifier) = 0; private: @@ -84,11 +84,12 @@ class PARQUET_EXPORT LocalWrapKmsClient : public KmsClient { std::string master_key_version_; }; - const SecureString& GetKeyFromServer(const std::string& key_identifier); + const ::arrow::util::SecureString& GetKeyFromServer(const std::string& key_identifier); protected: KmsConnectionConfig kms_connection_config_; - ::arrow::util::ConcurrentMap master_key_cache_; + ::arrow::util::ConcurrentMap + master_key_cache_; }; } // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/secure_string.cc b/cpp/src/parquet/encryption/secure_string.cc deleted file mode 100644 index 613e316c29c..00000000000 --- a/cpp/src/parquet/encryption/secure_string.cc +++ /dev/null @@ -1,145 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "parquet/encryption/secure_string.h" - -#include -#include -#include -#if defined(_WIN32) -# include -#endif - -#include "arrow/util/span.h" -#include "parquet/encryption/encryption.h" - -namespace parquet::encryption { -SecureString::SecureString(SecureString&& secret) noexcept - : secret_(std::move(secret.secret_)) { - secret.Dispose(); -} -SecureString::SecureString(std::string&& secret) noexcept : secret_(std::move(secret)) { - SecureClear(&secret); -} -SecureString::SecureString(size_t n, char c) noexcept : secret_(n, c) {} - -SecureString& SecureString::operator=(SecureString&& secret) noexcept { - if (this == &secret) { - // self-assignment - return *this; - } - Dispose(); - secret_ = std::move(secret.secret_); - secret.Dispose(); - return *this; -} -SecureString& SecureString::operator=(const SecureString& secret) { - if (this == &secret) { - // self-assignment - return *this; - } - Dispose(); - secret_ = secret.secret_; - return *this; -} -SecureString& SecureString::operator=(std::string&& secret) noexcept { - Dispose(); - // if secret is local string (length <= 15 characters), copies local buffer, resets to 0 - // - requires secure cleaning the local buffer - // if secret is longer, moves the pointer to secret_, resets to 0 and uses local buffer - // - does not require cleaning anything - secret_ = std::move(secret); - // cleans only the local buffer of secret as this always is a local string by now - SecureClear(&secret); - return *this; -} - -bool SecureString::operator==(const SecureString& other) const { - return secret_ == other.secret_; -} - -bool SecureString::operator!=(const SecureString& other) const { - return secret_ != other.secret_; -} - -bool SecureString::empty() const { return secret_.empty(); } -std::size_t SecureString::size() const { return secret_.size(); } -std::size_t SecureString::length() const { return secret_.length(); } - -::arrow::util::span SecureString::as_span() { - return {reinterpret_cast(secret_.data()), secret_.size()}; -} -::arrow::util::span SecureString::as_span() const { - return {reinterpret_cast(secret_.data()), secret_.size()}; -} -std::string_view SecureString::as_view() const { - return {secret_.data(), secret_.size()}; -} - -void SecureString::Dispose() { SecureClear(&secret_); } -void SecureString::SecureClear(std::string* secret) { - secret->clear(); - SecureClear(reinterpret_cast(secret->data()), secret->capacity()); -} -inline void SecureString::SecureClear(uint8_t* data, size_t size) { - // There is various prior art for this: - // https://www.cryptologie.net/article/419/zeroing-memory-compiler-optimizations-and-memset_s/ - // - libb2's `secure_zero_memory` at https://github.com/BLAKE2/libb2/blob/30d45a17c59dc7dbf853da3085b71d466275bd0a/src/blake2-impl.h#L140-L160 - // - libsodium's `sodium_memzero` at https://github.com/jedisct1/libsodium/blob/be58b2e6664389d9c7993b55291402934b43b3ca/src/libsodium/sodium/utils.c#L78:L101 - // Note: https://www.daemonology.net/blog/2014-09-06-zeroing-buffers-is-insufficient.html -#if defined(_WIN32) - // SecureZeroMemory is meant to not be optimized away - SecureZeroMemory(data, size); -#elif defined(__STDC_LIB_EXT1__) - // memset_s is meant to not be optimized away - memset_s(data, size, 0, size); -#elif defined(OPENSSL_VERSION_NUMBER) && OPENSSL_VERSION_NUMBER >= 0x30000000 - // rely on some implementation in OpenSSL cryptographic library - OPENSSL_cleanse(data, size); -#elif defined(__GLIBC__) && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 25)) - // explicit_bzero is meant to not be optimized away - explicit_bzero(data, size); -#else - // Volatile pointer to memset function is an attempt to avoid - // that the compiler optimizes away the memset function call. - // pretty much what OPENSSL_cleanse above does - // https://github.com/openssl/openssl/blob/3423c30db3aa044f46e1f0270e2ecd899415bf5f/crypto/mem_clr.c#L22 - static const volatile auto memset_v = &memset; - memset_v(data, 0, size); - -#if defined(__GNUC__) || defined(__clang__) - // __asm__ only supported by GCC and Clang - // not supported by MSVC on the ARM and x64 processors - // https://en.cppreference.com/w/c/language/asm.html - // https://en.cppreference.com/w/cpp/language/asm.html - - // Additional attempt on top of volatile memset_v above - // to avoid that the compiler optimizes away the memset function call. - // Assembler code that tells the compiler 'data' has side effects. - // https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html: - // - "volatile": the asm produces side effects - // - "memory": effectively forms a read/write memory barrier for the compiler - __asm__ __volatile__( - "" /* no actual code */ - : /* no output */ - : "r"(data) /* input */ - : "memory" /* memory side effects beyond input and output */); -#endif -#endif -} - -} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/secure_string.h b/cpp/src/parquet/encryption/secure_string.h deleted file mode 100644 index 019ce1eb311..00000000000 --- a/cpp/src/parquet/encryption/secure_string.h +++ /dev/null @@ -1,71 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -#include "arrow/util/span.h" -#include "parquet/platform.h" - -namespace parquet::encryption { -/** - * A secure string that ensures the wrapped string is cleared from memory on - * deconstruction. This class can only be created from std::string that are securely - * erased after creation. - * - * Note: This class does not provide a constructor / assignment operator that copies a - * std::string because that would allow code to create a SecureString while accidentally - * not noticing the need to securely erasing the argument after invoking the constructor / - * calling the assignment operator. - */ -class PARQUET_EXPORT SecureString { - public: - SecureString() noexcept = default; - SecureString(SecureString&&) noexcept; - SecureString(const SecureString&) = default; - explicit SecureString(std::string&&) noexcept; - explicit SecureString(size_t, char) noexcept; - - SecureString& operator=(SecureString&&) noexcept; - SecureString& operator=(const SecureString&); - SecureString& operator=(std::string&& secret) noexcept; - - bool operator==(const SecureString&) const; - bool operator!=(const SecureString&) const; - - ~SecureString() { Dispose(); } - - [[nodiscard]] bool empty() const; - [[nodiscard]] std::size_t size() const; - [[nodiscard]] std::size_t length() const; - - [[nodiscard]] ::arrow::util::span as_span(); - [[nodiscard]] ::arrow::util::span as_span() const; - [[nodiscard]] std::string_view as_view() const; - - void Dispose(); - - static void SecureClear(std::string*); - static void SecureClear(uint8_t* data, size_t size); - - private: - std::string secret_; -}; - -} // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/secure_string_test.cc b/cpp/src/parquet/encryption/secure_string_test.cc deleted file mode 100644 index 23bdf6446ac..00000000000 --- a/cpp/src/parquet/encryption/secure_string_test.cc +++ /dev/null @@ -1,339 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include -#include - -#include "parquet/encryption/secure_string.h" - -namespace parquet::encryption::test { - -std::string_view StringArea(const std::string& string) { - return {string.data(), string.capacity()}; -} - -void AssertSecurelyCleared(const std::string_view area) { - // the entire area is filled with zeros - std::string zeros(area.size(), '\0'); - ASSERT_EQ(area, std::string_view(zeros)); -} - -void AssertSecurelyCleared(const std::string& string) { - AssertSecurelyCleared(StringArea(string)); -} - -/** - * Checks the area has been securely cleared after some position. - */ -void AssertSecurelyCleared(const std::string_view area, const size_t pos) { - // the area after pos is filled with zeros - if (pos < area.size()) { - std::string zeros(area.size() - pos, '\0'); - ASSERT_EQ(area.substr(pos), std::string_view(zeros)); - } -} - -/** - * Checks the area has been securely cleared from the secret value. - * Assumes the area has been released, so it might have been reclaimed and changed after - * cleaning. We cannot check for all-zeros, best we can check here is no secret character - * has leaked. If by any chance the modification produced a former key character at the right - * position, this will be false negative / flaky. Therefore, we check for three consecutive - * secret characters before we fail. - */ -void AssertSecurelyCleared(const std::string_view area, const std::string& secret_value) { - auto leaks = 0; - for (size_t i = 0; i < secret_value.size(); i++) { - if (area[i] == secret_value[i]) { - leaks++; - } else { - if (leaks >= 3) { - break; - } - leaks = 0; - } - } - if (leaks >= 3) { - FAIL() << leaks << " characters of secret leaked into " << area; - } -} - -TEST(TestSecureString, SecureClearString) { - // short string - { - std::string tiny("abc"); - auto old_area = StringArea(tiny); - SecureString::SecureClear(&tiny); - AssertSecurelyCleared(tiny); - AssertSecurelyCleared(old_area); - } - - // long string - { - std::string large(1024, 'x'); - large.resize(512, 'y'); - auto old_area = StringArea(large); - SecureString::SecureClear(&large); - AssertSecurelyCleared(large); - AssertSecurelyCleared(old_area); - } - - // empty string - { - // this creates an empty string with some non-zero characters in the string buffer - // we test that all those characters are securely cleared - std::string empty("abcdef"); - empty.resize(0); - auto old_area = StringArea(empty); - SecureString::SecureClear(&empty); - AssertSecurelyCleared(empty); - AssertSecurelyCleared(old_area); - } -} - -TEST(TestSecureString, Construct) { - // move-constructing from a string securely clears that string - std::string string("hello world"); - auto old_string = StringArea(string); - SecureString secret_from_string(std::move(string)); - AssertSecurelyCleared(string); - AssertSecurelyCleared(old_string); - ASSERT_FALSE(secret_from_string.empty()); - - // move-constructing from a secure string securely clears that secure string - auto old_secret_from_string_view = secret_from_string.as_view(); - auto old_secret_from_string_value = std::string(secret_from_string.as_view()); - SecureString secret_from_move_secret(std::move(secret_from_string)); - ASSERT_TRUE(secret_from_string.empty()); - AssertSecurelyCleared(old_secret_from_string_view); - ASSERT_FALSE(secret_from_move_secret.empty()); - ASSERT_EQ(secret_from_move_secret.as_view(), - std::string_view(old_secret_from_string_value)); - - // copy-constructing from a secure string does not modify that secure string - SecureString secret_from_secret(secret_from_move_secret); - ASSERT_FALSE(secret_from_move_secret.empty()); - ASSERT_EQ(secret_from_move_secret.as_view(), - std::string_view(old_secret_from_string_value)); - ASSERT_FALSE(secret_from_secret.empty()); - ASSERT_EQ(secret_from_secret, secret_from_move_secret); -} - -TEST(TestSecureString, Assign) { - // we initialize with the first string and iteratively assign the subsequent values - // the first two values are local (15 chars and less), the remainder are non-local - // strings (larger than 15 chars) memory management of short and long strings behaves - // differently - std::vector test_strings = {"secret", "another secret", - "a much longer secret", std::string(1024, 'x')}; - - // assert test string configuration - ASSERT_GE(test_strings.size(), 4); - for (size_t i = 1; i < test_strings.size(); i++) { - // we expect first two strings to be local strings - if (i <= 1) { - ASSERT_LT(test_strings[i].size(), 15 / sizeof(char)); - } else { - ASSERT_GE(test_strings[i].size(), 15 / sizeof(char)); - } - // the strings are increasing in size - if (i > 0) { - ASSERT_TRUE(test_strings[i].size() > test_strings[i - 1].size()); - } - } - - std::vector reverse_strings = std::vector(test_strings); - reverse(reverse_strings.begin(), reverse_strings.end()); - - for (auto vec : {test_strings, reverse_strings}) { - auto init_string = vec[0]; - auto strings = std::vector(vec.begin() + 1, vec.end()); - - { - // an initialized secure string - std::string init_string_copy(init_string); - SecureString secret_from_string(std::move(init_string_copy)); - - // move-assigning from a string securely clears that string - // the earlier value of the secure string is securely cleared - for (auto string : strings) { - auto string_copy = std::string(string); - auto old_string_copy_area = StringArea(string_copy); - ASSERT_FALSE(string.empty()); - ASSERT_FALSE(string_copy.empty()); - auto old_secret_from_string_area = secret_from_string.as_view(); - auto old_secret_from_string_value = std::string(secret_from_string.as_view()); - - secret_from_string = std::move(string_copy); - - ASSERT_FALSE(string.empty()); - ASSERT_TRUE(string_copy.empty()); - AssertSecurelyCleared(string_copy); - auto secret_from_string_view = secret_from_string.as_view(); - // the secure string can reuse the string_copy's string buffer after assignment - // then, string_copy's string buffer is obviously not cleared - if (secret_from_string_view.data() != old_string_copy_area.data()) { - AssertSecurelyCleared(old_string_copy_area, string); - } - ASSERT_FALSE(secret_from_string.empty()); - ASSERT_EQ(secret_from_string.size(), string.size()); - ASSERT_EQ(secret_from_string.length(), string.length()); - ASSERT_EQ(secret_from_string_view, std::string_view(string)); - if (secret_from_string_view.data() == old_secret_from_string_area.data()) { - // when secure string reuses the buffer, the old value must be cleared - AssertSecurelyCleared(old_secret_from_string_area, secret_from_string.size()); - } else { - // when secure string has a new buffer, the old buffer must be cleared - AssertSecurelyCleared(old_secret_from_string_area, - old_secret_from_string_value); - } - } - } - - { - // an initialized secure string - std::string init_string_copy(init_string); - SecureString secret_from_move_secret(std::move(init_string_copy)); - - // move-assigning from a secure string securely clears that secure string - // the earlier value of the secure string is securely cleared - for (auto string : strings) { - auto string_copy = std::string(string); - SecureString secret_string(std::move(string_copy)); - ASSERT_FALSE(string.empty()); - ASSERT_TRUE(string_copy.empty()); - ASSERT_FALSE(secret_string.empty()); - auto old_secret_string_area = secret_string.as_view(); - auto old_secret_string_value = std::string(secret_string.as_view()); - auto old_secret_from_move_secret_area = secret_from_move_secret.as_view(); - auto old_secret_from_move_secret_value = - std::string(secret_from_move_secret.as_view()); - - secret_from_move_secret = std::move(secret_string); - - ASSERT_TRUE(secret_string.empty()); - // the secure string can reuse the string_copy's string buffer after assignment - // then, string_copy's string buffer is obviously not cleared - if (old_secret_string_area.data() != secret_from_move_secret.as_view().data()) { - AssertSecurelyCleared(old_secret_string_area, - old_secret_from_move_secret_value); - } - ASSERT_FALSE(secret_from_move_secret.empty()); - ASSERT_EQ(secret_from_move_secret.size(), string.size()); - ASSERT_EQ(secret_from_move_secret.length(), string.length()); - ASSERT_EQ(secret_from_move_secret.as_view(), std::string_view(string)); - if (old_secret_from_move_secret_area.data() == - secret_from_move_secret.as_view().data()) { - // when secure string reuses the buffer, the old value must be cleared - AssertSecurelyCleared(old_secret_from_move_secret_area, - secret_from_move_secret.size()); - } else { - // when secure string has a new buffer, the old buffer must be cleared - AssertSecurelyCleared(old_secret_from_move_secret_area, - old_secret_from_move_secret_value); - } - } - } - - { - // an initialized secure string - std::string init_string_copy(init_string); - SecureString secret_from_copy_secret(std::move(init_string_copy)); - - for (auto string : strings) { - // copy-assigning from a secure string does not modify that secure string - // the earlier value of the secure string is securely cleared - auto string_copy = std::string(string); - SecureString secret_string(std::move(string_copy)); - ASSERT_FALSE(string.empty()); - ASSERT_TRUE(string_copy.empty()); - ASSERT_FALSE(secret_string.empty()); - auto old_secret_from_copy_secret_area = secret_from_copy_secret.as_view(); - auto old_secret_from_copy_secret_value = - std::string(secret_from_copy_secret.as_view()); - - secret_from_copy_secret = secret_string; - - ASSERT_FALSE(secret_string.empty()); - ASSERT_FALSE(secret_from_copy_secret.empty()); - ASSERT_EQ(secret_from_copy_secret.size(), string.size()); - ASSERT_EQ(secret_from_copy_secret.length(), string.length()); - ASSERT_EQ(secret_from_copy_secret.as_view(), std::string_view(string)); - if (old_secret_from_copy_secret_area.data() == - secret_from_copy_secret.as_view().data()) { - // when secure string reuses the buffer, the old value must be cleared - AssertSecurelyCleared(old_secret_from_copy_secret_area, - secret_from_copy_secret.size()); - } else { - // when secure string has a new buffer, the old buffer must be cleared - AssertSecurelyCleared(old_secret_from_copy_secret_area, - old_secret_from_copy_secret_value); - } - } - } - } -} - -TEST(TestSecureString, Compare) { - ASSERT_TRUE(SecureString("") == SecureString("")); - ASSERT_FALSE(SecureString("") != SecureString("")); - - ASSERT_TRUE(SecureString("hello world") == SecureString("hello world")); - ASSERT_FALSE(SecureString("hello world") != SecureString("hello world")); - - ASSERT_FALSE(SecureString("hello world") == SecureString("hello worlds")); - ASSERT_TRUE(SecureString("hello world") != SecureString("hello worlds")); -} - -TEST(TestSecureString, Cardinality) { - ASSERT_TRUE(SecureString("").empty()); - ASSERT_EQ(SecureString("").size(), 0); - ASSERT_EQ(SecureString("").length(), 0); - - ASSERT_FALSE(SecureString("hello world").empty()); - ASSERT_EQ(SecureString("hello world").size(), 11); - ASSERT_EQ(SecureString("hello world").length(), 11); -} - -TEST(TestSecureString, AsSpan) { - SecureString secret("hello world"); - const SecureString& const_secret(secret); - auto const_span = const_secret.as_span(); - auto mutual_span = secret.as_span(); - - std::string expected = "hello world"; - ::arrow::util::span expected_span = {reinterpret_cast(expected.data()), - expected.size()}; - ASSERT_EQ(const_span, expected_span); - ASSERT_EQ(mutual_span, expected_span); - - // modify secret through mutual span - // the const span shares the same secret, so it is changed as well - mutual_span[0] = 'H'; - expected_span[0] = 'H'; - ASSERT_EQ(const_span, expected_span); - ASSERT_EQ(mutual_span, expected_span); -} - -TEST(TestSecureString, AsView) { - const SecureString secret = SecureString("hello world"); - const std::string_view view = secret.as_view(); - ASSERT_EQ(view, "hello world"); -} - -} // namespace parquet::encryption::test diff --git a/cpp/src/parquet/encryption/test_encryption_util.h b/cpp/src/parquet/encryption/test_encryption_util.h index 3c24f65c922..7c34c501580 100644 --- a/cpp/src/parquet/encryption/test_encryption_util.h +++ b/cpp/src/parquet/encryption/test_encryption_util.h @@ -31,6 +31,7 @@ #include "arrow/filesystem/localfs.h" #include "arrow/status.h" #include "arrow/util/io_util.h" +#include "arrow/util/secure_string.h" #include "parquet/encryption/encryption.h" #include "parquet/test_util.h" @@ -40,6 +41,7 @@ class ParquetFileReader; namespace encryption::test { using ::arrow::internal::TemporaryDir; +using ::arrow::util::SecureString; constexpr int kFixedLength = 10; diff --git a/cpp/src/parquet/encryption/test_in_memory_kms.cc b/cpp/src/parquet/encryption/test_in_memory_kms.cc index 16e4f30ed8f..6af15d177fd 100644 --- a/cpp/src/parquet/encryption/test_in_memory_kms.cc +++ b/cpp/src/parquet/encryption/test_in_memory_kms.cc @@ -16,11 +16,14 @@ // under the License. #include "arrow/util/base64.h" +#include "arrow/util/secure_string.h" #include "parquet/encryption/key_toolkit_internal.h" #include "parquet/encryption/test_in_memory_kms.h" #include "parquet/exception.h" +using arrow::util::SecureString; + namespace parquet::encryption { std::unordered_map diff --git a/cpp/src/parquet/encryption/test_in_memory_kms.h b/cpp/src/parquet/encryption/test_in_memory_kms.h index 5a17b3dfff6..b9d4169c634 100644 --- a/cpp/src/parquet/encryption/test_in_memory_kms.h +++ b/cpp/src/parquet/encryption/test_in_memory_kms.h @@ -34,14 +34,15 @@ class TestOnlyLocalWrapInMemoryKms : public LocalWrapKmsClient { explicit TestOnlyLocalWrapInMemoryKms(const KmsConnectionConfig& kms_connection_config); static void InitializeMasterKeys( - const std::unordered_map& master_keys_map); + const std::unordered_map& + master_keys_map); protected: - const SecureString& GetMasterKeyFromServer( + const ::arrow::util::SecureString& GetMasterKeyFromServer( const std::string& master_key_identifier) override; private: - static std::unordered_map master_key_map_; + static std::unordered_map master_key_map_; }; // This is a mock class, built for testing only. Don't use it as an example of KmsClient @@ -49,25 +50,30 @@ class TestOnlyLocalWrapInMemoryKms : public LocalWrapKmsClient { class TestOnlyInServerWrapKms : public KmsClient { public: static void InitializeMasterKeys( - const std::unordered_map& master_keys_map); + const std::unordered_map& + master_keys_map); - std::string WrapKey(const SecureString& key_bytes, + std::string WrapKey(const ::arrow::util::SecureString& key_bytes, const std::string& master_key_identifier) override; - SecureString UnwrapKey(const std::string& wrapped_key, - const std::string& master_key_identifier) override; + ::arrow::util::SecureString UnwrapKey( + const std::string& wrapped_key, const std::string& master_key_identifier) override; static void StartKeyRotation( - const std::unordered_map& new_master_keys_map); + const std::unordered_map& + new_master_keys_map); static void FinishKeyRotation(); private: - SecureString GetMasterKeyFromServer(const std::string& master_key_identifier); + ::arrow::util::SecureString GetMasterKeyFromServer( + const std::string& master_key_identifier); // Different wrapping and unwrapping key maps to imitate versioning // and support key rotation. - static std::unordered_map unwrapping_master_key_map_; - static std::unordered_map wrapping_master_key_map_; + static std::unordered_map + unwrapping_master_key_map_; + static std::unordered_map + wrapping_master_key_map_; }; // This is a mock class, built for testing only. Don't use it as an example of @@ -76,7 +82,7 @@ class TestOnlyInMemoryKmsClientFactory : public KmsClientFactory { public: TestOnlyInMemoryKmsClientFactory( bool wrap_locally, - const std::unordered_map& master_keys_map) + const std::unordered_map& master_keys_map) : KmsClientFactory(wrap_locally) { TestOnlyLocalWrapInMemoryKms::InitializeMasterKeys(master_keys_map); TestOnlyInServerWrapKms::InitializeMasterKeys(master_keys_map); diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index 9911fc9f5f7..389aae87ade 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -41,6 +41,8 @@ #include "parquet/statistics.h" #include "parquet/thrift_internal.h" +using ::arrow::util::SecureString; + namespace parquet { const ApplicationVersion& ApplicationVersion::PARQUET_251_FIXED_VERSION() { @@ -792,7 +794,7 @@ class FileMetaData::FileMetaDataImpl { encryption::kNonceLength); auto tag = reinterpret_cast(signature) + encryption::kNonceLength; - const encryption::SecureString& key = file_decryptor_->GetFooterKey(); + const SecureString& key = file_decryptor_->GetFooterKey(); const std::string& aad = encryption::CreateFooterAad(file_decryptor_->file_aad()); auto aes_encryptor = encryption::AesEncryptor::Make(file_decryptor_->algorithm(), From 20d67b3f37dd0038729b10b7a935c37ff7352304 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Tue, 3 Jun 2025 11:58:03 +0200 Subject: [PATCH 19/44] Fix import for memset_s, improve for loops in tests Co-authored-by: Antoine Pitrou --- cpp/src/arrow/util/secure_string.cc | 2 ++ cpp/src/arrow/util/secure_string_test.cc | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/util/secure_string.cc b/cpp/src/arrow/util/secure_string.cc index 8b699628fd9..b45d116878a 100644 --- a/cpp/src/arrow/util/secure_string.cc +++ b/cpp/src/arrow/util/secure_string.cc @@ -19,6 +19,8 @@ # include # include #endif +#define __STDC_WANT_LIB_EXT1__ 1 +#include #include #if defined(_WIN32) # include diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index 9e04b698cdb..ae9cdcf6301 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -171,7 +171,7 @@ TEST(TestSecureString, Assign) { // move-assigning from a string securely clears that string // the earlier value of the secure string is securely cleared - for (auto string : strings) { + for (const auto& string : strings) { auto string_copy = std::string(string); auto old_string_copy_area = StringArea(string_copy); ASSERT_FALSE(string.empty()); @@ -212,7 +212,7 @@ TEST(TestSecureString, Assign) { // move-assigning from a secure string securely clears that secure string // the earlier value of the secure string is securely cleared - for (auto string : strings) { + for (const auto& string : strings) { auto string_copy = std::string(string); SecureString secret_string(std::move(string_copy)); ASSERT_FALSE(string.empty()); @@ -255,7 +255,7 @@ TEST(TestSecureString, Assign) { std::string init_string_copy(init_string); SecureString secret_from_copy_secret(std::move(init_string_copy)); - for (auto string : strings) { + for (const auto& string : strings) { // copy-assigning from a secure string does not modify that secure string // the earlier value of the secure string is securely cleared auto string_copy = std::string(string); From 15f94c635a412265ccbf65a09324d7732fb6f7d4 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Tue, 3 Jun 2025 12:05:38 +0200 Subject: [PATCH 20/44] Address code review comments - Improve imports and definition export - Add lines between definitions - Improve comments - Remove redundant ::arrow::util:: when using span - Remove test setup assertions - Reuse view in test --- cpp/src/arrow/util/secure_string.cc | 34 +++++++++++++++---- cpp/src/arrow/util/secure_string.h | 9 ++--- cpp/src/arrow/util/secure_string_test.cc | 42 ++++++++---------------- 3 files changed, 45 insertions(+), 40 deletions(-) diff --git a/cpp/src/arrow/util/secure_string.cc b/cpp/src/arrow/util/secure_string.cc index b45d116878a..d1f93e7db97 100644 --- a/cpp/src/arrow/util/secure_string.cc +++ b/cpp/src/arrow/util/secure_string.cc @@ -15,13 +15,16 @@ // specific language governing permissions and limitations // under the License. +#define __STDC_WANT_LIB_EXT1__ 1 +#include +#include + #if defined(ARROW_USE_OPENSSL) # include # include #endif -#define __STDC_WANT_LIB_EXT1__ 1 -#include -#include + +#include "arrow/util/windows_compatibility.h" #if defined(_WIN32) # include #endif @@ -30,13 +33,16 @@ #include "arrow/util/span.h" namespace arrow::util { + SecureString::SecureString(SecureString&& secret) noexcept : secret_(std::move(secret.secret_)) { secret.Dispose(); } + SecureString::SecureString(std::string&& secret) noexcept : secret_(std::move(secret)) { SecureClear(&secret); } + SecureString::SecureString(size_t n, char c) noexcept : secret_(n, c) {} SecureString& SecureString::operator=(SecureString&& secret) noexcept { @@ -49,6 +55,7 @@ SecureString& SecureString::operator=(SecureString&& secret) noexcept { secret.Dispose(); return *this; } + SecureString& SecureString::operator=(const SecureString& secret) { if (this == &secret) { // self-assignment @@ -58,11 +65,16 @@ SecureString& SecureString::operator=(const SecureString& secret) { secret_ = secret.secret_; return *this; } + SecureString& SecureString::operator=(std::string&& secret) noexcept { Dispose(); - // if secret is local string (length <= 15 characters), copies local buffer, resets to 0 + // std::string implementation may distinguish between local string (a very short string) + // and non-local (longer) strings. The former stores the string in a local buffer, the + // latter stores a pointer to allocated memory that stores the string. + // + // If secret is a local string, copies local buffer, resets size to 0 // - requires secure cleaning the local buffer - // if secret is longer, moves the pointer to secret_, resets to 0 and uses local buffer + // If secret is longer, moves the pointer to secret_, resets to 0, uses local buffer // - does not require cleaning anything secret_ = std::move(secret); // cleans only the local buffer of secret as this always is a local string by now @@ -79,24 +91,32 @@ bool SecureString::operator!=(const SecureString& other) const { } bool SecureString::empty() const { return secret_.empty(); } + std::size_t SecureString::size() const { return secret_.size(); } + std::size_t SecureString::length() const { return secret_.length(); } -::arrow::util::span SecureString::as_span() { +std::size_t SecureString::capacity() const { return secret_.capacity(); } + +span SecureString::as_span() { return {reinterpret_cast(secret_.data()), secret_.size()}; } -::arrow::util::span SecureString::as_span() const { + +span SecureString::as_span() const { return {reinterpret_cast(secret_.data()), secret_.size()}; } + std::string_view SecureString::as_view() const { return {secret_.data(), secret_.size()}; } void SecureString::Dispose() { SecureClear(&secret_); } + void SecureString::SecureClear(std::string* secret) { secret->clear(); SecureClear(reinterpret_cast(secret->data()), secret->capacity()); } + inline void SecureString::SecureClear(uint8_t* data, size_t size) { // There is various prior art for this: // https://www.cryptologie.net/article/419/zeroing-memory-compiler-optimizations-and-memset_s/ diff --git a/cpp/src/arrow/util/secure_string.h b/cpp/src/arrow/util/secure_string.h index 537958f4aa8..11143ef6cc9 100644 --- a/cpp/src/arrow/util/secure_string.h +++ b/cpp/src/arrow/util/secure_string.h @@ -21,7 +21,7 @@ #include #include "arrow/util/span.h" -#include "parquet/platform.h" +#include "arrow/util/visibility.h" namespace arrow::util { /** @@ -34,7 +34,7 @@ namespace arrow::util { * not noticing the need to securely erasing the argument after invoking the constructor / * calling the assignment operator. */ -class PARQUET_EXPORT SecureString { +class ARROW_EXPORT SecureString { public: SecureString() noexcept = default; SecureString(SecureString&&) noexcept; @@ -54,9 +54,10 @@ class PARQUET_EXPORT SecureString { [[nodiscard]] bool empty() const; [[nodiscard]] std::size_t size() const; [[nodiscard]] std::size_t length() const; + [[nodiscard]] std::size_t capacity() const; - [[nodiscard]] ::arrow::util::span as_span(); - [[nodiscard]] ::arrow::util::span as_span() const; + [[nodiscard]] span as_span(); + [[nodiscard]] span as_span() const; [[nodiscard]] std::string_view as_view() const; void Dispose(); diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index ae9cdcf6301..18002a54ac9 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -135,30 +135,14 @@ TEST(TestSecureString, Construct) { } TEST(TestSecureString, Assign) { - // we initialize with the first string and iteratively assign the subsequent values - // the first two values are local (15 chars and less), the remainder are non-local - // strings (larger than 15 chars) memory management of short and long strings behaves - // differently + // We initialize with the first string and iteratively assign the subsequent values. + // The first two values are local (very short strings), the remainder are non-local + // strings. Memory management of short and long strings behaves differently. std::vector test_strings = { "secret", "another secret", "a much longer secret", std::string(1024, 'x')}; - // assert test string configuration - ASSERT_GE(test_strings.size(), 4); - for (size_t i = 1; i < test_strings.size(); i++) { - // we expect first two strings to be local strings - if (i <= 1) { - ASSERT_LT(test_strings[i].size(), 15 / sizeof(char)); - } else { - ASSERT_GE(test_strings[i].size(), 15 / sizeof(char)); - } - // the strings are increasing in size - if (i > 0) { - ASSERT_TRUE(test_strings[i].size() > test_strings[i - 1].size()); - } - } - std::vector reverse_strings = std::vector(test_strings); - reverse(reverse_strings.begin(), reverse_strings.end()); + std::reverse(reverse_strings.begin(), reverse_strings.end()); for (auto vec : {test_strings, reverse_strings}) { auto init_string = vec[0]; @@ -227,18 +211,19 @@ TEST(TestSecureString, Assign) { secret_from_move_secret = std::move(secret_string); ASSERT_TRUE(secret_string.empty()); + auto secret_from_move_secret_view = secret_from_move_secret.as_view(); // the secure string can reuse the string_copy's string buffer after assignment // then, string_copy's string buffer is obviously not cleared - if (old_secret_string_area.data() != secret_from_move_secret.as_view().data()) { + if (old_secret_string_area.data() != secret_from_move_secret_view.data()) { AssertSecurelyCleared(old_secret_string_area, old_secret_from_move_secret_value); } ASSERT_FALSE(secret_from_move_secret.empty()); ASSERT_EQ(secret_from_move_secret.size(), string.size()); ASSERT_EQ(secret_from_move_secret.length(), string.length()); - ASSERT_EQ(secret_from_move_secret.as_view(), std::string_view(string)); + ASSERT_EQ(secret_from_move_secret_view, std::string_view(string)); if (old_secret_from_move_secret_area.data() == - secret_from_move_secret.as_view().data()) { + secret_from_move_secret_view.data()) { // when secure string reuses the buffer, the old value must be cleared AssertSecurelyCleared(old_secret_from_move_secret_area, secret_from_move_secret.size()); @@ -314,20 +299,19 @@ TEST(TestSecureString, AsSpan) { SecureString secret("hello world"); const SecureString& const_secret(secret); auto const_span = const_secret.as_span(); - auto mutual_span = secret.as_span(); + auto mutable_span = secret.as_span(); std::string expected = "hello world"; - ::arrow::util::span expected_span = {reinterpret_cast(expected.data()), - expected.size()}; + span expected_span = {reinterpret_cast(expected.data()), expected.size()}; ASSERT_EQ(const_span, expected_span); - ASSERT_EQ(mutual_span, expected_span); + ASSERT_EQ(mutable_span, expected_span); // modify secret through mutual span // the const span shares the same secret, so it is changed as well - mutual_span[0] = 'H'; + mutable_span[0] = 'H'; expected_span[0] = 'H'; ASSERT_EQ(const_span, expected_span); - ASSERT_EQ(mutual_span, expected_span); + ASSERT_EQ(mutable_span, expected_span); } TEST(TestSecureString, AsView) { From 973b2337f76bc6e614090ad076284b255680a373 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Tue, 3 Jun 2025 12:43:00 +0200 Subject: [PATCH 21/44] Test secure SecureString deconstruction --- cpp/src/arrow/util/secure_string_test.cc | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index 18002a54ac9..f2cc96f7b3c 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -274,6 +274,30 @@ TEST(TestSecureString, Assign) { } } +TEST(TestSecureString, Deconstruct) { +#if !defined(ARROW_VALGRIND) && !defined(ARROW_USE_ASAN) + // We use a very short and a very long string as memory management of short and long + // strings behaves differently. + std::vector strings = {"short secret", std::string(1024, 'x')}; + + for (auto& string : strings) { + auto old_string_value = string; + std::string_view view; + { + // construct secret + auto secret = SecureString(std::move(string)); + // memorize view + view = secret.as_view(); + // deconstruct secret on leaving this context + } + // assert secret memory is cleared on deconstruction + AssertSecurelyCleared(view, old_string_value); + // so is the string (tested more thoroughly elsewhere) + AssertSecurelyCleared(string); + } +#endif +} + TEST(TestSecureString, Compare) { ASSERT_TRUE(SecureString("") == SecureString("")); ASSERT_FALSE(SecureString("") != SecureString("")); From 9c8874402c45f0a43432b2207f81a97cecbe6334 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Tue, 3 Jun 2025 17:28:39 +0200 Subject: [PATCH 22/44] Test correctness of AssertSecurelyCleared --- cpp/src/arrow/util/secure_string_test.cc | 80 ++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index f2cc96f7b3c..58a2782adba 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -21,6 +21,8 @@ #include "arrow/util/secure_string.h" +#include + namespace arrow::util::test { std::string_view StringArea(const std::string& string) { @@ -73,6 +75,84 @@ void AssertSecurelyCleared(const std::string_view area, const std::string& secre } } +// GTest test result reporter that captures the result but does not hand it to the unit +// test instance. This effectively hides the result from the GTest test framework. +class Reporter : public testing::TestPartResultReporterInterface { + public: + explicit Reporter(testing::TestInfo* test_info) + : result_(testing::TestPartResult::kSuccess, test_info->file(), test_info->line(), + "") {} + void ReportTestPartResult(const testing::TestPartResult& result) override { + result_ = result; + } + const testing::TestPartResult& result() const { return result_; } + + private: + testing::TestPartResult result_; +}; + +#define GET_TEST_RESULT_REPORTER() \ + testing::internal::GetUnitTestImpl()->GetTestPartResultReporterForCurrentThread() + +#define SET_TEST_RESULT_REPORTER(reporter) \ + testing::internal::GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread( \ + reporter); + +#define CAPTURE_TEST_RESULT(capture, body) \ + { \ + auto report = GET_TEST_RESULT_REPORTER(); \ + SET_TEST_RESULT_REPORTER(&capture); \ + body; \ + SET_TEST_RESULT_REPORTER(report); \ + } + +TEST(TestSecureString, AssertSecurelyCleared) { + // This tests AssertSecurelyCleared helper methods is actually able to identify secret + // leakage. It captures test results emitted by ASSERT_EQ and then asserts result type + // and message. + auto capture = Reporter(test_info_); + + auto short_zeros = std::string(10, '\0'); + AssertSecurelyCleared(std::string_view(short_zeros)); + + auto large_zeros = std::string(1000, '\0'); + AssertSecurelyCleared(large_zeros); + + auto no_zeros = std::string("abcdefghijklmnopqrstuvwxyz"); + CAPTURE_TEST_RESULT(capture, AssertSecurelyCleared(no_zeros)); + ASSERT_EQ(capture.result().type(), testing::TestPartResult::kFatalFailure); + ASSERT_EQ(std::string(capture.result().message()), + "Expected equality of these values:\n" + " area\n" + " Which is: \"abcdefghijklmnopqrstuvwxyz\"\n" + " std::string_view(zeros)\n" + " Which is: " + "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" + "0\\0\"\n"); + + auto some_zeros = no_zeros; + some_zeros = std::string(10, '\0'); + AssertSecurelyCleared(some_zeros, 10); + CAPTURE_TEST_RESULT(capture, AssertSecurelyCleared(some_zeros)); + ASSERT_EQ(capture.result().type(), testing::TestPartResult::kFatalFailure); + ASSERT_EQ(std::string(capture.result().message()), + "Expected equality of these values:\n" + " area\n" + " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0lmnopqrstuvwxyz\"\n" + " std::string_view(zeros)\n" + " Which is: " + "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" + "0\\0\"\n"); + + AssertSecurelyCleared(some_zeros, "12345678901234567890123456"); + CAPTURE_TEST_RESULT(capture, AssertSecurelyCleared(StringArea(some_zeros), no_zeros)); + ASSERT_EQ(capture.result().type(), testing::TestPartResult::kFatalFailure); + ASSERT_EQ(std::string(capture.result().message()), + "Failed\n" + "15 characters of secret leaked into " + "\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0lmnopqrstuvwxyz\n"); +} + TEST(TestSecureString, SecureClearString) { // short string { From f3562f8bfd8d018a69b4f50edc628c8ce34a3412 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 4 Jun 2025 06:46:47 +0200 Subject: [PATCH 23/44] Rename SecureString argument to other --- cpp/src/arrow/util/secure_string.cc | 20 ++++++++++---------- cpp/src/arrow/util/secure_string.h | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/cpp/src/arrow/util/secure_string.cc b/cpp/src/arrow/util/secure_string.cc index d1f93e7db97..5e36aab23cf 100644 --- a/cpp/src/arrow/util/secure_string.cc +++ b/cpp/src/arrow/util/secure_string.cc @@ -34,9 +34,9 @@ namespace arrow::util { -SecureString::SecureString(SecureString&& secret) noexcept - : secret_(std::move(secret.secret_)) { - secret.Dispose(); +SecureString::SecureString(SecureString&& other) noexcept + : secret_(std::move(other.secret_)) { + other.Dispose(); } SecureString::SecureString(std::string&& secret) noexcept : secret_(std::move(secret)) { @@ -45,24 +45,24 @@ SecureString::SecureString(std::string&& secret) noexcept : secret_(std::move(se SecureString::SecureString(size_t n, char c) noexcept : secret_(n, c) {} -SecureString& SecureString::operator=(SecureString&& secret) noexcept { - if (this == &secret) { +SecureString& SecureString::operator=(SecureString&& other) noexcept { + if (this == &other) { // self-assignment return *this; } Dispose(); - secret_ = std::move(secret.secret_); - secret.Dispose(); + secret_ = std::move(other.secret_); + other.Dispose(); return *this; } -SecureString& SecureString::operator=(const SecureString& secret) { - if (this == &secret) { +SecureString& SecureString::operator=(const SecureString& other) { + if (this == &other) { // self-assignment return *this; } Dispose(); - secret_ = secret.secret_; + secret_ = other.secret_; return *this; } diff --git a/cpp/src/arrow/util/secure_string.h b/cpp/src/arrow/util/secure_string.h index 11143ef6cc9..843f6cd0a95 100644 --- a/cpp/src/arrow/util/secure_string.h +++ b/cpp/src/arrow/util/secure_string.h @@ -44,7 +44,7 @@ class ARROW_EXPORT SecureString { SecureString& operator=(SecureString&&) noexcept; SecureString& operator=(const SecureString&); - SecureString& operator=(std::string&& secret) noexcept; + SecureString& operator=(std::string&&) noexcept; bool operator==(const SecureString&) const; bool operator!=(const SecureString&) const; From 9ee3e2c7b54e267f6f21d71c9e37632e96c3aaa9 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 4 Jun 2025 08:15:00 +0200 Subject: [PATCH 24/44] Move std::move into secure_move, assert string ptr --- cpp/src/arrow/util/secure_string.cc | 52 +++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/cpp/src/arrow/util/secure_string.cc b/cpp/src/arrow/util/secure_string.cc index 5e36aab23cf..19abf68bc25 100644 --- a/cpp/src/arrow/util/secure_string.cc +++ b/cpp/src/arrow/util/secure_string.cc @@ -30,16 +30,49 @@ #endif #include "arrow/util/secure_string.h" +#include "arrow/util/logging.h" #include "arrow/util/span.h" namespace arrow::util { -SecureString::SecureString(SecureString&& other) noexcept - : secret_(std::move(other.secret_)) { +/// Note: +/// A string std::string is securely moved into a SecureString in two steps: +/// 1. the std::string is moved via std::move(string) +/// 2. the std::string is securely cleared +/// +/// The std::move has two different effects, depending on the size of the string. +/// A very short string (called local string) stores the string in a local buffer, +/// a long string stores a pointer to allocated memory that stores the string. +/// +/// If the string is a small string, std::move copies the local buffer. +/// If the string is a long string, std::move moves the pointer and then resets the +/// string size to 0 (which turns the string into a local string). +/// +/// In both cases, after a std::move(string), the string uses the local buffer. +/// +/// Thus, after a std::move(string), calling SecureClear(std::string*) only +/// securely clears the **local buffer** of the string. Therefore, std::move(string) +/// must move the pointer of long string into SecureString (which later clears the string). +/// Otherwise, the content of the string cannot be securely cleared. +/// +/// This condition is checked by secure_move. + +void secure_move(std::string& string, std::string& dst) { + auto ptr = string.data(); + dst = std::move(string); + + // We require the buffer address string.data() to remain (not be freed), + // or reused by dst. Otherwise, we cannot securely clear string after this move + ARROW_CHECK(string.data() == ptr || dst.data() == ptr); +} + +SecureString::SecureString(SecureString&& other) noexcept { + secure_move(other.secret_, secret_); other.Dispose(); } -SecureString::SecureString(std::string&& secret) noexcept : secret_(std::move(secret)) { +SecureString::SecureString(std::string&& secret) noexcept { + secure_move(secret, secret_); SecureClear(&secret); } @@ -51,7 +84,7 @@ SecureString& SecureString::operator=(SecureString&& other) noexcept { return *this; } Dispose(); - secret_ = std::move(other.secret_); + secure_move(other.secret_, secret_); other.Dispose(); return *this; } @@ -68,16 +101,7 @@ SecureString& SecureString::operator=(const SecureString& other) { SecureString& SecureString::operator=(std::string&& secret) noexcept { Dispose(); - // std::string implementation may distinguish between local string (a very short string) - // and non-local (longer) strings. The former stores the string in a local buffer, the - // latter stores a pointer to allocated memory that stores the string. - // - // If secret is a local string, copies local buffer, resets size to 0 - // - requires secure cleaning the local buffer - // If secret is longer, moves the pointer to secret_, resets to 0, uses local buffer - // - does not require cleaning anything - secret_ = std::move(secret); - // cleans only the local buffer of secret as this always is a local string by now + secure_move(secret, secret_); SecureClear(&secret); return *this; } From 77e4e20997e0e3580a77ee4ee28efa603d00ad30 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 4 Jun 2025 09:52:54 +0200 Subject: [PATCH 25/44] Add comments, fix linting --- cpp/src/arrow/util/secure_string.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/util/secure_string.cc b/cpp/src/arrow/util/secure_string.cc index 19abf68bc25..ade822c84f7 100644 --- a/cpp/src/arrow/util/secure_string.cc +++ b/cpp/src/arrow/util/secure_string.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +// __STDC_WANT_LIB_EXT1__ and string.h are required by memset_s: +// https://en.cppreference.com/w/c/string/byte/memset #define __STDC_WANT_LIB_EXT1__ 1 #include #include @@ -29,8 +31,8 @@ # include #endif -#include "arrow/util/secure_string.h" #include "arrow/util/logging.h" +#include "arrow/util/secure_string.h" #include "arrow/util/span.h" namespace arrow::util { @@ -52,8 +54,8 @@ namespace arrow::util { /// /// Thus, after a std::move(string), calling SecureClear(std::string*) only /// securely clears the **local buffer** of the string. Therefore, std::move(string) -/// must move the pointer of long string into SecureString (which later clears the string). -/// Otherwise, the content of the string cannot be securely cleared. +/// must move the pointer of long string into SecureString (which later clears the +/// string). Otherwise, the content of the string cannot be securely cleared. /// /// This condition is checked by secure_move. From 1f42383f5d94efa63e944630c39406a10f4be266 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 4 Jun 2025 10:11:12 +0200 Subject: [PATCH 26/44] Improve assertions --- cpp/src/arrow/util/secure_string_test.cc | 66 +++++++++++++----------- 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index 58a2782adba..0a7eef4511d 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -113,10 +113,10 @@ TEST(TestSecureString, AssertSecurelyCleared) { auto capture = Reporter(test_info_); auto short_zeros = std::string(10, '\0'); - AssertSecurelyCleared(std::string_view(short_zeros)); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(std::string_view(short_zeros))); auto large_zeros = std::string(1000, '\0'); - AssertSecurelyCleared(large_zeros); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(large_zeros)); auto no_zeros = std::string("abcdefghijklmnopqrstuvwxyz"); CAPTURE_TEST_RESULT(capture, AssertSecurelyCleared(no_zeros)); @@ -132,7 +132,7 @@ TEST(TestSecureString, AssertSecurelyCleared) { auto some_zeros = no_zeros; some_zeros = std::string(10, '\0'); - AssertSecurelyCleared(some_zeros, 10); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(some_zeros, 10)); CAPTURE_TEST_RESULT(capture, AssertSecurelyCleared(some_zeros)); ASSERT_EQ(capture.result().type(), testing::TestPartResult::kFatalFailure); ASSERT_EQ(std::string(capture.result().message()), @@ -144,7 +144,8 @@ TEST(TestSecureString, AssertSecurelyCleared) { "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" "0\\0\"\n"); - AssertSecurelyCleared(some_zeros, "12345678901234567890123456"); + ASSERT_NO_FATAL_FAILURE( + AssertSecurelyCleared(some_zeros, "12345678901234567890123456")); CAPTURE_TEST_RESULT(capture, AssertSecurelyCleared(StringArea(some_zeros), no_zeros)); ASSERT_EQ(capture.result().type(), testing::TestPartResult::kFatalFailure); ASSERT_EQ(std::string(capture.result().message()), @@ -159,8 +160,8 @@ TEST(TestSecureString, SecureClearString) { std::string tiny("abc"); auto old_area = StringArea(tiny); SecureString::SecureClear(&tiny); - AssertSecurelyCleared(tiny); - AssertSecurelyCleared(old_area); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(tiny)); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_area)); } // long string @@ -169,8 +170,8 @@ TEST(TestSecureString, SecureClearString) { large.resize(512, 'y'); auto old_area = StringArea(large); SecureString::SecureClear(&large); - AssertSecurelyCleared(large); - AssertSecurelyCleared(old_area); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(large)); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_area)); } // empty string @@ -181,8 +182,8 @@ TEST(TestSecureString, SecureClearString) { empty.resize(0); auto old_area = StringArea(empty); SecureString::SecureClear(&empty); - AssertSecurelyCleared(empty); - AssertSecurelyCleared(old_area); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(empty)); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_area)); } } @@ -191,8 +192,8 @@ TEST(TestSecureString, Construct) { std::string string("hello world"); auto old_string = StringArea(string); SecureString secret_from_string(std::move(string)); - AssertSecurelyCleared(string); - AssertSecurelyCleared(old_string); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(string)); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_string)); ASSERT_FALSE(secret_from_string.empty()); // move-constructing from a secure string securely clears that secure string @@ -200,7 +201,7 @@ TEST(TestSecureString, Construct) { auto old_secret_from_string_value = std::string(secret_from_string.as_view()); SecureString secret_from_move_secret(std::move(secret_from_string)); ASSERT_TRUE(secret_from_string.empty()); - AssertSecurelyCleared(old_secret_from_string_view); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_secret_from_string_view)); ASSERT_FALSE(secret_from_move_secret.empty()); ASSERT_EQ(secret_from_move_secret.as_view(), std::string_view(old_secret_from_string_value)); @@ -247,12 +248,12 @@ TEST(TestSecureString, Assign) { ASSERT_FALSE(string.empty()); ASSERT_TRUE(string_copy.empty()); - AssertSecurelyCleared(string_copy); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(string_copy)); auto secret_from_string_view = secret_from_string.as_view(); // the secure string can reuse the string_copy's string buffer after assignment // then, string_copy's string buffer is obviously not cleared if (secret_from_string_view.data() != old_string_copy_area.data()) { - AssertSecurelyCleared(old_string_copy_area, string); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_string_copy_area, string)); } ASSERT_FALSE(secret_from_string.empty()); ASSERT_EQ(secret_from_string.size(), string.size()); @@ -260,11 +261,12 @@ TEST(TestSecureString, Assign) { ASSERT_EQ(secret_from_string_view, std::string_view(string)); if (secret_from_string_view.data() == old_secret_from_string_area.data()) { // when secure string reuses the buffer, the old value must be cleared - AssertSecurelyCleared(old_secret_from_string_area, secret_from_string.size()); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_secret_from_string_area, + secret_from_string.size())); } else { // when secure string has a new buffer, the old buffer must be cleared - AssertSecurelyCleared(old_secret_from_string_area, - old_secret_from_string_value); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_secret_from_string_area, + old_secret_from_string_value)); } } } @@ -295,8 +297,8 @@ TEST(TestSecureString, Assign) { // the secure string can reuse the string_copy's string buffer after assignment // then, string_copy's string buffer is obviously not cleared if (old_secret_string_area.data() != secret_from_move_secret_view.data()) { - AssertSecurelyCleared(old_secret_string_area, - old_secret_from_move_secret_value); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared( + old_secret_string_area, old_secret_from_move_secret_value)); } ASSERT_FALSE(secret_from_move_secret.empty()); ASSERT_EQ(secret_from_move_secret.size(), string.size()); @@ -305,12 +307,12 @@ TEST(TestSecureString, Assign) { if (old_secret_from_move_secret_area.data() == secret_from_move_secret_view.data()) { // when secure string reuses the buffer, the old value must be cleared - AssertSecurelyCleared(old_secret_from_move_secret_area, - secret_from_move_secret.size()); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_secret_from_move_secret_area, + secret_from_move_secret.size())); } else { // when secure string has a new buffer, the old buffer must be cleared - AssertSecurelyCleared(old_secret_from_move_secret_area, - old_secret_from_move_secret_value); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared( + old_secret_from_move_secret_area, old_secret_from_move_secret_value)); } } } @@ -342,12 +344,12 @@ TEST(TestSecureString, Assign) { if (old_secret_from_copy_secret_area.data() == secret_from_copy_secret.as_view().data()) { // when secure string reuses the buffer, the old value must be cleared - AssertSecurelyCleared(old_secret_from_copy_secret_area, - secret_from_copy_secret.size()); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_secret_from_copy_secret_area, + secret_from_copy_secret.size())); } else { // when secure string has a new buffer, the old buffer must be cleared - AssertSecurelyCleared(old_secret_from_copy_secret_area, - old_secret_from_copy_secret_value); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared( + old_secret_from_copy_secret_area, old_secret_from_copy_secret_value)); } } } @@ -355,7 +357,9 @@ TEST(TestSecureString, Assign) { } TEST(TestSecureString, Deconstruct) { -#if !defined(ARROW_VALGRIND) && !defined(ARROW_USE_ASAN) +#if defined(ARROW_VALGRIND) || defined(ARROW_USE_ASAN) + GTEST_SKIP() << "Test accesses deallocated memory"; +#else // We use a very short and a very long string as memory management of short and long // strings behaves differently. std::vector strings = {"short secret", std::string(1024, 'x')}; @@ -371,9 +375,9 @@ TEST(TestSecureString, Deconstruct) { // deconstruct secret on leaving this context } // assert secret memory is cleared on deconstruction - AssertSecurelyCleared(view, old_string_value); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(view, old_string_value)); // so is the string (tested more thoroughly elsewhere) - AssertSecurelyCleared(string); + ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(string)); } #endif } From 8d9c4f98ad3008641ce006d2cf07f94e83a93595 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 4 Jun 2025 10:36:35 +0200 Subject: [PATCH 27/44] Use testing::AssertionResult rather than capturing assertions through TestPartResultReporterInterface --- cpp/src/arrow/util/secure_string_test.cc | 153 +++++++++-------------- 1 file changed, 62 insertions(+), 91 deletions(-) diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index 0a7eef4511d..11e6dd37b66 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -21,33 +21,37 @@ #include "arrow/util/secure_string.h" -#include - namespace arrow::util::test { std::string_view StringArea(const std::string& string) { return {string.data(), string.capacity()}; } -void AssertSecurelyCleared(const std::string_view area) { +// same as GTest ASSERT_PRED_FORMAT2 macro, but without the outer GTEST_ASSERT_ +#define COMPARE(val1, val2) \ + ::testing::internal::EqHelper::Compare(#val1, #val2, val1, val2) + +::testing::AssertionResult AssertSecurelyCleared(const std::string_view area) { // the entire area is filled with zeros std::string zeros(area.size(), '\0'); - ASSERT_EQ(area, std::string_view(zeros)); + return COMPARE(area, std::string_view(zeros)); } -void AssertSecurelyCleared(const std::string& string) { - AssertSecurelyCleared(StringArea(string)); +::testing::AssertionResult AssertSecurelyCleared(const std::string& string) { + return AssertSecurelyCleared(StringArea(string)); } /** * Checks the area has been securely cleared after some position. */ -void AssertSecurelyCleared(const std::string_view area, const size_t pos) { +::testing::AssertionResult AssertSecurelyCleared(const std::string_view area, + const size_t pos) { // the area after pos is filled with zeros if (pos < area.size()) { std::string zeros(area.size() - pos, '\0'); - ASSERT_EQ(area.substr(pos), std::string_view(zeros)); + return COMPARE(area.substr(pos), std::string_view(zeros)); } + return ::testing::AssertionSuccess(); } /** @@ -58,7 +62,8 @@ void AssertSecurelyCleared(const std::string_view area, const size_t pos) { * right position, this will be false negative / flaky. Therefore, we check for three * consecutive secret characters before we fail. */ -void AssertSecurelyCleared(const std::string_view area, const std::string& secret_value) { +::testing::AssertionResult AssertSecurelyCleared(const std::string_view area, + const std::string& secret_value) { auto leaks = 0; for (size_t i = 0; i < secret_value.size(); i++) { if (area[i] == secret_value[i]) { @@ -71,87 +76,53 @@ void AssertSecurelyCleared(const std::string_view area, const std::string& secre } } if (leaks >= 3) { - FAIL() << leaks << " characters of secret leaked into " << area; + return ::testing::AssertionFailure() + << leaks << " characters of secret leaked into " << area; } + return ::testing::AssertionSuccess(); } -// GTest test result reporter that captures the result but does not hand it to the unit -// test instance. This effectively hides the result from the GTest test framework. -class Reporter : public testing::TestPartResultReporterInterface { - public: - explicit Reporter(testing::TestInfo* test_info) - : result_(testing::TestPartResult::kSuccess, test_info->file(), test_info->line(), - "") {} - void ReportTestPartResult(const testing::TestPartResult& result) override { - result_ = result; - } - const testing::TestPartResult& result() const { return result_; } - - private: - testing::TestPartResult result_; -}; - -#define GET_TEST_RESULT_REPORTER() \ - testing::internal::GetUnitTestImpl()->GetTestPartResultReporterForCurrentThread() - -#define SET_TEST_RESULT_REPORTER(reporter) \ - testing::internal::GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread( \ - reporter); - -#define CAPTURE_TEST_RESULT(capture, body) \ - { \ - auto report = GET_TEST_RESULT_REPORTER(); \ - SET_TEST_RESULT_REPORTER(&capture); \ - body; \ - SET_TEST_RESULT_REPORTER(report); \ - } - TEST(TestSecureString, AssertSecurelyCleared) { // This tests AssertSecurelyCleared helper methods is actually able to identify secret - // leakage. It captures test results emitted by ASSERT_EQ and then asserts result type - // and message. - auto capture = Reporter(test_info_); - + // leakage. It retrieves assertion results and asserts result type and message. auto short_zeros = std::string(10, '\0'); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(std::string_view(short_zeros))); + ASSERT_TRUE(AssertSecurelyCleared(std::string_view(short_zeros))); auto large_zeros = std::string(1000, '\0'); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(large_zeros)); + ASSERT_TRUE(AssertSecurelyCleared(large_zeros)); auto no_zeros = std::string("abcdefghijklmnopqrstuvwxyz"); - CAPTURE_TEST_RESULT(capture, AssertSecurelyCleared(no_zeros)); - ASSERT_EQ(capture.result().type(), testing::TestPartResult::kFatalFailure); - ASSERT_EQ(std::string(capture.result().message()), + auto result = AssertSecurelyCleared(no_zeros); + ASSERT_FALSE(result); + ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" " area\n" " Which is: \"abcdefghijklmnopqrstuvwxyz\"\n" " std::string_view(zeros)\n" " Which is: " "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" - "0\\0\"\n"); + "0\\0\""); auto some_zeros = no_zeros; some_zeros = std::string(10, '\0'); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(some_zeros, 10)); - CAPTURE_TEST_RESULT(capture, AssertSecurelyCleared(some_zeros)); - ASSERT_EQ(capture.result().type(), testing::TestPartResult::kFatalFailure); - ASSERT_EQ(std::string(capture.result().message()), + ASSERT_TRUE(AssertSecurelyCleared(some_zeros, 10)); + result = AssertSecurelyCleared(some_zeros); + ASSERT_FALSE(result); + ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" " area\n" " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0lmnopqrstuvwxyz\"\n" " std::string_view(zeros)\n" " Which is: " "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" - "0\\0\"\n"); - - ASSERT_NO_FATAL_FAILURE( - AssertSecurelyCleared(some_zeros, "12345678901234567890123456")); - CAPTURE_TEST_RESULT(capture, AssertSecurelyCleared(StringArea(some_zeros), no_zeros)); - ASSERT_EQ(capture.result().type(), testing::TestPartResult::kFatalFailure); - ASSERT_EQ(std::string(capture.result().message()), - "Failed\n" + "0\\0\""); + + ASSERT_TRUE(AssertSecurelyCleared(some_zeros, "12345678901234567890123456")); + result = AssertSecurelyCleared(StringArea(some_zeros), no_zeros); + ASSERT_FALSE(result); + ASSERT_EQ(std::string(result.message()), "15 characters of secret leaked into " - "\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0lmnopqrstuvwxyz\n"); + "\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0lmnopqrstuvwxyz"); } TEST(TestSecureString, SecureClearString) { @@ -160,8 +131,8 @@ TEST(TestSecureString, SecureClearString) { std::string tiny("abc"); auto old_area = StringArea(tiny); SecureString::SecureClear(&tiny); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(tiny)); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_area)); + ASSERT_TRUE(AssertSecurelyCleared(tiny)); + ASSERT_TRUE(AssertSecurelyCleared(old_area)); } // long string @@ -170,8 +141,8 @@ TEST(TestSecureString, SecureClearString) { large.resize(512, 'y'); auto old_area = StringArea(large); SecureString::SecureClear(&large); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(large)); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_area)); + ASSERT_TRUE(AssertSecurelyCleared(large)); + ASSERT_TRUE(AssertSecurelyCleared(old_area)); } // empty string @@ -182,8 +153,8 @@ TEST(TestSecureString, SecureClearString) { empty.resize(0); auto old_area = StringArea(empty); SecureString::SecureClear(&empty); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(empty)); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_area)); + ASSERT_TRUE(AssertSecurelyCleared(empty)); + ASSERT_TRUE(AssertSecurelyCleared(old_area)); } } @@ -192,8 +163,8 @@ TEST(TestSecureString, Construct) { std::string string("hello world"); auto old_string = StringArea(string); SecureString secret_from_string(std::move(string)); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(string)); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_string)); + ASSERT_TRUE(AssertSecurelyCleared(string)); + ASSERT_TRUE(AssertSecurelyCleared(old_string)); ASSERT_FALSE(secret_from_string.empty()); // move-constructing from a secure string securely clears that secure string @@ -201,7 +172,7 @@ TEST(TestSecureString, Construct) { auto old_secret_from_string_value = std::string(secret_from_string.as_view()); SecureString secret_from_move_secret(std::move(secret_from_string)); ASSERT_TRUE(secret_from_string.empty()); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_secret_from_string_view)); + ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_string_view)); ASSERT_FALSE(secret_from_move_secret.empty()); ASSERT_EQ(secret_from_move_secret.as_view(), std::string_view(old_secret_from_string_value)); @@ -248,12 +219,12 @@ TEST(TestSecureString, Assign) { ASSERT_FALSE(string.empty()); ASSERT_TRUE(string_copy.empty()); - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(string_copy)); + ASSERT_TRUE(AssertSecurelyCleared(string_copy)); auto secret_from_string_view = secret_from_string.as_view(); // the secure string can reuse the string_copy's string buffer after assignment // then, string_copy's string buffer is obviously not cleared if (secret_from_string_view.data() != old_string_copy_area.data()) { - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_string_copy_area, string)); + ASSERT_TRUE(AssertSecurelyCleared(old_string_copy_area, string)); } ASSERT_FALSE(secret_from_string.empty()); ASSERT_EQ(secret_from_string.size(), string.size()); @@ -261,12 +232,12 @@ TEST(TestSecureString, Assign) { ASSERT_EQ(secret_from_string_view, std::string_view(string)); if (secret_from_string_view.data() == old_secret_from_string_area.data()) { // when secure string reuses the buffer, the old value must be cleared - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_secret_from_string_area, - secret_from_string.size())); + ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_string_area, + secret_from_string.size())); } else { // when secure string has a new buffer, the old buffer must be cleared - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_secret_from_string_area, - old_secret_from_string_value)); + ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_string_area, + old_secret_from_string_value)); } } } @@ -297,8 +268,8 @@ TEST(TestSecureString, Assign) { // the secure string can reuse the string_copy's string buffer after assignment // then, string_copy's string buffer is obviously not cleared if (old_secret_string_area.data() != secret_from_move_secret_view.data()) { - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared( - old_secret_string_area, old_secret_from_move_secret_value)); + ASSERT_TRUE(AssertSecurelyCleared(old_secret_string_area, + old_secret_from_move_secret_value)); } ASSERT_FALSE(secret_from_move_secret.empty()); ASSERT_EQ(secret_from_move_secret.size(), string.size()); @@ -307,12 +278,12 @@ TEST(TestSecureString, Assign) { if (old_secret_from_move_secret_area.data() == secret_from_move_secret_view.data()) { // when secure string reuses the buffer, the old value must be cleared - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_secret_from_move_secret_area, - secret_from_move_secret.size())); + ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_move_secret_area, + secret_from_move_secret.size())); } else { // when secure string has a new buffer, the old buffer must be cleared - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared( - old_secret_from_move_secret_area, old_secret_from_move_secret_value)); + ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_move_secret_area, + old_secret_from_move_secret_value)); } } } @@ -344,12 +315,12 @@ TEST(TestSecureString, Assign) { if (old_secret_from_copy_secret_area.data() == secret_from_copy_secret.as_view().data()) { // when secure string reuses the buffer, the old value must be cleared - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(old_secret_from_copy_secret_area, - secret_from_copy_secret.size())); + ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_copy_secret_area, + secret_from_copy_secret.size())); } else { // when secure string has a new buffer, the old buffer must be cleared - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared( - old_secret_from_copy_secret_area, old_secret_from_copy_secret_value)); + ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_copy_secret_area, + old_secret_from_copy_secret_value)); } } } @@ -375,9 +346,9 @@ TEST(TestSecureString, Deconstruct) { // deconstruct secret on leaving this context } // assert secret memory is cleared on deconstruction - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(view, old_string_value)); + ASSERT_TRUE(AssertSecurelyCleared(view, old_string_value)); // so is the string (tested more thoroughly elsewhere) - ASSERT_NO_FATAL_FAILURE(AssertSecurelyCleared(string)); + ASSERT_TRUE(AssertSecurelyCleared(string)); } #endif } From 4297f0d5fe50e315be6bcc830949ba9132a29318 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 4 Jun 2025 11:38:12 +0200 Subject: [PATCH 28/44] Expect string buffers larger than requested size --- cpp/src/arrow/util/secure_string_test.cc | 41 +++++++++++++++++++++--- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index 11e6dd37b66..f4e16abc78c 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -85,14 +85,47 @@ ::testing::AssertionResult AssertSecurelyCleared(const std::string_view area, TEST(TestSecureString, AssertSecurelyCleared) { // This tests AssertSecurelyCleared helper methods is actually able to identify secret // leakage. It retrieves assertion results and asserts result type and message. - auto short_zeros = std::string(10, '\0'); + testing::AssertionResult result = testing::AssertionSuccess(); + + // check short string with all zeros + auto short_zeros = std::string(8, '\0'); + short_zeros.resize(short_zeros.capacity(), '\0'); // for string buffers longer than 8 + short_zeros.resize(8); // now the entire string buffer has zeros + // checks the entire string buffer (capacity) + ASSERT_TRUE(AssertSecurelyCleared(short_zeros)); + // checks only 10 bytes (length) ASSERT_TRUE(AssertSecurelyCleared(std::string_view(short_zeros))); - auto large_zeros = std::string(1000, '\0'); - ASSERT_TRUE(AssertSecurelyCleared(large_zeros)); + // check long string with all zeros + auto long_zeros = std::string(1000, '\0'); + long_zeros.resize(long_zeros.capacity(), '\0'); // for longer string buffers + long_zeros.resize(1000); // now the entire string buffer has zeros + // checks the entire string buffer (capacity) + ASSERT_TRUE(AssertSecurelyCleared(long_zeros)); + // checks only 1000 bytes (length) + ASSERT_TRUE(AssertSecurelyCleared(std::string_view(long_zeros))); + + // check short string with zeros and non-zeros after string length + auto short_some_zeros = std::string(short_zeros.length() + 3, '*'); + short_some_zeros = short_zeros; + result = AssertSecurelyCleared(short_some_zeros); + ASSERT_FALSE(result); + ASSERT_EQ(std::string(result.message()), + "Expected equality of these values:\n" + " area\n" + " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0**\\0\\0\\0\\0\"\n" + " std::string_view(zeros)\n" + " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\""); + + // check long string with zeros and non-zeros after string length + auto long_some_zeros = std::string(long_zeros.length() + 10, '*'); + long_some_zeros = long_zeros; + result = AssertSecurelyCleared(long_some_zeros); + ASSERT_FALSE(result); + ASSERT_EQ(std::string(result.message()), ""); auto no_zeros = std::string("abcdefghijklmnopqrstuvwxyz"); - auto result = AssertSecurelyCleared(no_zeros); + result = AssertSecurelyCleared(no_zeros); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" From 064dfe77bc3a90e05aca40e2b94d4c815b1bb486 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 4 Jun 2025 12:17:38 +0200 Subject: [PATCH 29/44] Handle string buffers larger than init size --- cpp/src/arrow/util/secure_string_test.cc | 74 +++++++++++++++++++----- 1 file changed, 59 insertions(+), 15 deletions(-) diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index f4e16abc78c..c43e5575601 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -65,7 +65,7 @@ ::testing::AssertionResult AssertSecurelyCleared(const std::string_view area, ::testing::AssertionResult AssertSecurelyCleared(const std::string_view area, const std::string& secret_value) { auto leaks = 0; - for (size_t i = 0; i < secret_value.size(); i++) { + for (size_t i = 0; i < std::min(area.length(), secret_value.length()); i++) { if (area[i] == secret_value[i]) { leaks++; } else { @@ -108,24 +108,45 @@ TEST(TestSecureString, AssertSecurelyCleared) { // check short string with zeros and non-zeros after string length auto short_some_zeros = std::string(short_zeros.length() + 3, '*'); short_some_zeros = short_zeros; - result = AssertSecurelyCleared(short_some_zeros); + // string buffer in short_some_zeros can be larger than short_zeros.length() + 3 + // assert only the area that we can control + auto short_some_zeros_view = + std::string_view(short_some_zeros.data(), short_zeros.length() + 3); + result = AssertSecurelyCleared(short_some_zeros_view); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" " area\n" - " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0**\\0\\0\\0\\0\"\n" + " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0**\"\n" " std::string_view(zeros)\n" - " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\""); + " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\""); // check long string with zeros and non-zeros after string length - auto long_some_zeros = std::string(long_zeros.length() + 10, '*'); - long_some_zeros = long_zeros; - result = AssertSecurelyCleared(long_some_zeros); + auto zeros = std::string(32, '\0'); + auto long_some_zeros = std::string(zeros.length() + 10, '*'); + long_some_zeros = zeros; + // string buffer in long_some_zeros can be larger than zeros.length() + 10 + // assert only the area that we can control + auto long_some_zeros_view = + std::string_view(long_some_zeros.data(), zeros.length() + 10); + result = AssertSecurelyCleared(long_some_zeros_view); ASSERT_FALSE(result); - ASSERT_EQ(std::string(result.message()), ""); + ASSERT_EQ(std::string(result.message()), + "Expected equality of these values:\n" + " area\n" + " Which is: " + "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" + "0\\0\\0\\0\\0\\0\\0\\0\\0*********\"\n" + " std::string_view(zeros)\n" + " Which is: " + "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" + "0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\""); auto no_zeros = std::string("abcdefghijklmnopqrstuvwxyz"); - result = AssertSecurelyCleared(no_zeros); + // string buffer in no_zeros can be larger than no_zeros.length() + // assert only the area that we can control + auto no_zeros_view = std::string_view(no_zeros); + result = AssertSecurelyCleared(no_zeros_view); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" @@ -136,10 +157,14 @@ TEST(TestSecureString, AssertSecurelyCleared) { "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" "0\\0\""); - auto some_zeros = no_zeros; - some_zeros = std::string(10, '\0'); - ASSERT_TRUE(AssertSecurelyCleared(some_zeros, 10)); - result = AssertSecurelyCleared(some_zeros); + // check string with zeros and non-zeros after string length + auto some_zeros_front = no_zeros; + some_zeros_front = std::string(10, '\0'); + // string buffer in some_zeros_front can be larger than no_zeros.length() + // assert only the area that we can control + auto some_zeros_fronts_view = + std::string_view(some_zeros_front.data(), no_zeros.length()); + result = AssertSecurelyCleared(some_zeros_fronts_view); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" @@ -150,12 +175,31 @@ TEST(TestSecureString, AssertSecurelyCleared) { "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" "0\\0\""); - ASSERT_TRUE(AssertSecurelyCleared(some_zeros, "12345678901234567890123456")); - result = AssertSecurelyCleared(StringArea(some_zeros), no_zeros); + ASSERT_TRUE(AssertSecurelyCleared(some_zeros_front, no_zeros)); + result = AssertSecurelyCleared(some_zeros_fronts_view, no_zeros); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "15 characters of secret leaked into " "\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0lmnopqrstuvwxyz"); + + // check string with non-zeros and zeros after string length + auto some_zeros_back = std::string(no_zeros.length() + 3, '\0'); + some_zeros_back = no_zeros; + // string buffer in some_zeros_back can be larger than no_zeros.length() + 3 + // assert only the area that we can control + auto some_zeros_back_view = + std::string_view(some_zeros_back.data(), no_zeros.length() + 3); + ASSERT_TRUE(AssertSecurelyCleared(some_zeros_back_view, no_zeros.length())); + result = AssertSecurelyCleared(some_zeros_back_view); + ASSERT_FALSE(result); + ASSERT_EQ(std::string(result.message()), + "Expected equality of these values:\n" + " area\n" + " Which is: \"abcdefghijklmnopqrstuvwxyz\\0\\0\\0\"\n" + " std::string_view(zeros)\n" + " Which is: " + "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" + "0\\0\\0\\0\\0\""); } TEST(TestSecureString, SecureClearString) { From d4faa4fabf8098dcbfc5b7b3a0ad01c9985c56d5 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 4 Jun 2025 15:24:47 +0200 Subject: [PATCH 30/44] Don't access deallocated memory in ASAN / Valgrind mode --- cpp/src/arrow/util/secure_string_test.cc | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index c43e5575601..a4e1b0e1886 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -56,14 +56,18 @@ ::testing::AssertionResult AssertSecurelyCleared(const std::string_view area, /** * Checks the area has been securely cleared from the secret value. - * Assumes the area has been released, so it might have been reclaimed and changed after - * cleaning. We cannot check for all-zeros, best we can check here is no secret character - * has leaked. If by any chance the modification produced a former key character at the - * right position, this will be false negative / flaky. Therefore, we check for three - * consecutive secret characters before we fail. + * Assumes the area has been deallocated, so it might have been reclaimed and changed + * after cleaning. We cannot check for all-zeros, best we can check here is no secret + * character has leaked. If by any chance the modification produced a former key character + * at the right position, this will be false negative / flaky. Therefore, we check for + * three consecutive secret characters before we fail. */ ::testing::AssertionResult AssertSecurelyCleared(const std::string_view area, const std::string& secret_value) { +#if defined(ARROW_VALGRIND) || defined(ARROW_USE_ASAN) + return testing::AssertionSuccess() << "Not checking deallocated memory"; +#else + // accessing deallocated memory will fail when running with Address Sanitizer enabled auto leaks = 0; for (size_t i = 0; i < std::min(area.length(), secret_value.length()); i++) { if (area[i] == secret_value[i]) { @@ -80,6 +84,7 @@ ::testing::AssertionResult AssertSecurelyCleared(const std::string_view area, << leaks << " characters of secret leaked into " << area; } return ::testing::AssertionSuccess(); +#endif } TEST(TestSecureString, AssertSecurelyCleared) { @@ -177,10 +182,14 @@ TEST(TestSecureString, AssertSecurelyCleared) { ASSERT_TRUE(AssertSecurelyCleared(some_zeros_front, no_zeros)); result = AssertSecurelyCleared(some_zeros_fronts_view, no_zeros); +#if defined(ARROW_VALGRIND) || defined(ARROW_USE_ASAN) + ASSERT_TRUE(result); +#else ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "15 characters of secret leaked into " "\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0lmnopqrstuvwxyz"); +#endif // check string with non-zeros and zeros after string length auto some_zeros_back = std::string(no_zeros.length() + 3, '\0'); From 267626c6e32a25b1499a2289e0e5e78b0722f50c Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 4 Jun 2025 15:52:15 +0200 Subject: [PATCH 31/44] Fix SecureClear for non-local strings, stabalize mem assertions --- cpp/src/arrow/util/secure_string.cc | 5 ++++- cpp/src/arrow/util/secure_string_test.cc | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/secure_string.cc b/cpp/src/arrow/util/secure_string.cc index ade822c84f7..a82e6d018c4 100644 --- a/cpp/src/arrow/util/secure_string.cc +++ b/cpp/src/arrow/util/secure_string.cc @@ -139,8 +139,11 @@ std::string_view SecureString::as_view() const { void SecureString::Dispose() { SecureClear(&secret_); } void SecureString::SecureClear(std::string* secret) { - secret->clear(); + // in case of non-local strings (long strings), this order is vital + // first clear the string buffer SecureClear(reinterpret_cast(secret->data()), secret->capacity()); + // then reset the string size (moves from the non-local to the local string buffer) + secret->clear(); } inline void SecureString::SecureClear(uint8_t* data, size_t size) { diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index a4e1b0e1886..8528aa78bd8 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -112,6 +112,8 @@ TEST(TestSecureString, AssertSecurelyCleared) { // check short string with zeros and non-zeros after string length auto short_some_zeros = std::string(short_zeros.length() + 3, '*'); + short_zeros.resize(short_zeros.capacity(), '*'); // for string buffers longer than 8 + short_zeros.resize(8); // now the string buffer is filled with '*' after the string short_some_zeros = short_zeros; // string buffer in short_some_zeros can be larger than short_zeros.length() + 3 // assert only the area that we can control @@ -129,6 +131,8 @@ TEST(TestSecureString, AssertSecurelyCleared) { // check long string with zeros and non-zeros after string length auto zeros = std::string(32, '\0'); auto long_some_zeros = std::string(zeros.length() + 10, '*'); + zeros.resize(zeros.capacity(), '*'); // for string buffers longer than 32 + zeros.resize(32); // now the string buffer is filled with '*' after the string long_some_zeros = zeros; // string buffer in long_some_zeros can be larger than zeros.length() + 10 // assert only the area that we can control From 6995d36836ab100d0ab32be5b773f281f98fc958 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Thu, 5 Jun 2025 07:09:35 +0200 Subject: [PATCH 32/44] Avoid assigning short string to long string in test --- cpp/src/arrow/util/secure_string_test.cc | 39 ++++++++++++------------ 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index 8528aa78bd8..6e4472031d6 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -151,7 +151,7 @@ TEST(TestSecureString, AssertSecurelyCleared) { "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" "0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\""); - auto no_zeros = std::string("abcdefghijklmnopqrstuvwxyz"); + auto no_zeros = std::string("abcdefghijklmnopqrstuvwxyz123"); // string buffer in no_zeros can be larger than no_zeros.length() // assert only the area that we can control auto no_zeros_view = std::string_view(no_zeros); @@ -160,16 +160,16 @@ TEST(TestSecureString, AssertSecurelyCleared) { ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" " area\n" - " Which is: \"abcdefghijklmnopqrstuvwxyz\"\n" + " Which is: \"abcdefghijklmnopqrstuvwxyz123\"\n" " std::string_view(zeros)\n" " Which is: " "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" - "0\\0\""); + "0\\0\\0\\0\\0\""); // check string with zeros and non-zeros after string length auto some_zeros_front = no_zeros; - some_zeros_front = std::string(10, '\0'); - // string buffer in some_zeros_front can be larger than no_zeros.length() + some_zeros_front = std::string(no_zeros.length() - 3, '\0'); + // string buffer in some_zeros_front can be larger than no_zeros.length() - 3 // assert only the area that we can control auto some_zeros_fronts_view = std::string_view(some_zeros_front.data(), no_zeros.length()); @@ -178,21 +178,21 @@ TEST(TestSecureString, AssertSecurelyCleared) { ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" " area\n" - " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0lmnopqrstuvwxyz\"\n" + " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0" + "\\0\\0\\0\\0\\0\\0123\"\n" " std::string_view(zeros)\n" " Which is: " "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" "0\\0\""); ASSERT_TRUE(AssertSecurelyCleared(some_zeros_front, no_zeros)); +#if !defined(ARROW_VALGRIND) && !defined(ARROW_USE_ASAN) result = AssertSecurelyCleared(some_zeros_fronts_view, no_zeros); -#if defined(ARROW_VALGRIND) || defined(ARROW_USE_ASAN) - ASSERT_TRUE(result); -#else ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), - "15 characters of secret leaked into " - "\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0lmnopqrstuvwxyz"); + "3 characters of secret leaked into " + "\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0" + "\\0123"); #endif // check string with non-zeros and zeros after string length @@ -205,14 +205,15 @@ TEST(TestSecureString, AssertSecurelyCleared) { ASSERT_TRUE(AssertSecurelyCleared(some_zeros_back_view, no_zeros.length())); result = AssertSecurelyCleared(some_zeros_back_view); ASSERT_FALSE(result); - ASSERT_EQ(std::string(result.message()), - "Expected equality of these values:\n" - " area\n" - " Which is: \"abcdefghijklmnopqrstuvwxyz\\0\\0\\0\"\n" - " std::string_view(zeros)\n" - " Which is: " - "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" - "0\\0\\0\\0\\0\""); + ASSERT_EQ( + std::string(result.message()), + "Expected equality of these values:\n" + " area\n" + " Which is: \"abcdefghijklmnopqrstuvwxyz123\\0\\0\\0\"\n" + " std::string_view(zeros)\n" + " Which is: " + "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" + "0\\0\\0\\0\\0\\0\\0\\0\""); } TEST(TestSecureString, SecureClearString) { From 03b1fef927f68949a1d6c7600c1e24a3d57dfaf5 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Thu, 5 Jun 2025 07:16:01 +0200 Subject: [PATCH 33/44] Fix memory issues in tests --- cpp/src/arrow/util/secure_string_test.cc | 135 ++++++++++------------- 1 file changed, 60 insertions(+), 75 deletions(-) diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index 6e4472031d6..f56ef97ffd0 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -31,7 +31,7 @@ std::string_view StringArea(const std::string& string) { #define COMPARE(val1, val2) \ ::testing::internal::EqHelper::Compare(#val1, #val2, val1, val2) -::testing::AssertionResult AssertSecurelyCleared(const std::string_view area) { +::testing::AssertionResult AssertSecurelyCleared(const std::string_view& area) { // the entire area is filled with zeros std::string zeros(area.size(), '\0'); return COMPARE(area, std::string_view(zeros)); @@ -44,7 +44,7 @@ ::testing::AssertionResult AssertSecurelyCleared(const std::string& string) { /** * Checks the area has been securely cleared after some position. */ -::testing::AssertionResult AssertSecurelyCleared(const std::string_view area, +::testing::AssertionResult AssertSecurelyCleared(const std::string_view& area, const size_t pos) { // the area after pos is filled with zeros if (pos < area.size()) { @@ -62,9 +62,9 @@ ::testing::AssertionResult AssertSecurelyCleared(const std::string_view area, * at the right position, this will be false negative / flaky. Therefore, we check for * three consecutive secret characters before we fail. */ -::testing::AssertionResult AssertSecurelyCleared(const std::string_view area, +::testing::AssertionResult AssertSecurelyCleared(const std::string_view& area, const std::string& secret_value) { -#if defined(ARROW_VALGRIND) || defined(ARROW_USE_ASAN) +#if defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) return testing::AssertionSuccess() << "Not checking deallocated memory"; #else // accessing deallocated memory will fail when running with Address Sanitizer enabled @@ -110,110 +110,87 @@ TEST(TestSecureString, AssertSecurelyCleared) { // checks only 1000 bytes (length) ASSERT_TRUE(AssertSecurelyCleared(std::string_view(long_zeros))); - // check short string with zeros and non-zeros after string length - auto short_some_zeros = std::string(short_zeros.length() + 3, '*'); - short_zeros.resize(short_zeros.capacity(), '*'); // for string buffers longer than 8 - short_zeros.resize(8); // now the string buffer is filled with '*' after the string - short_some_zeros = short_zeros; - // string buffer in short_some_zeros can be larger than short_zeros.length() + 3 + auto no_zeros = std::string("abcdefghijklmnopqrstuvwxyz"); + // string buffer in no_zeros can be larger than no_zeros.length() // assert only the area that we can control - auto short_some_zeros_view = - std::string_view(short_some_zeros.data(), short_zeros.length() + 3); - result = AssertSecurelyCleared(short_some_zeros_view); + auto no_zeros_view = std::string_view(no_zeros); + result = AssertSecurelyCleared(no_zeros_view); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" " area\n" - " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0**\"\n" + " Which is: \"abcdefghijklmnopqrstuvwxyz\"\n" " std::string_view(zeros)\n" - " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\""); + " Which is: " + "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" + "0\\0\\0\\0\\0\""); - // check long string with zeros and non-zeros after string length - auto zeros = std::string(32, '\0'); - auto long_some_zeros = std::string(zeros.length() + 10, '*'); - zeros.resize(zeros.capacity(), '*'); // for string buffers longer than 32 - zeros.resize(32); // now the string buffer is filled with '*' after the string - long_some_zeros = zeros; - // string buffer in long_some_zeros can be larger than zeros.length() + 10 + // check short string with zeros and non-zeros after string length + auto stars = std::string(12, '*'); + auto short_some_zeros = stars; + memset(short_some_zeros.data(), '\0', 8); + short_some_zeros.resize(8); + // string buffer in short_some_zeros can be larger than 12 // assert only the area that we can control - auto long_some_zeros_view = - std::string_view(long_some_zeros.data(), zeros.length() + 10); - result = AssertSecurelyCleared(long_some_zeros_view); + auto short_some_zeros_view = std::string_view(short_some_zeros.data(), 12); + result = AssertSecurelyCleared(short_some_zeros_view); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" " area\n" - " Which is: " - "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" - "0\\0\\0\\0\\0\\0\\0\\0\\0*********\"\n" + " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0***\"\n" " std::string_view(zeros)\n" - " Which is: " - "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" - "0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\""); + " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\""); - auto no_zeros = std::string("abcdefghijklmnopqrstuvwxyz123"); - // string buffer in no_zeros can be larger than no_zeros.length() - // assert only the area that we can control - auto no_zeros_view = std::string_view(no_zeros); - result = AssertSecurelyCleared(no_zeros_view); + ASSERT_TRUE(AssertSecurelyCleared(short_some_zeros, stars)); +#if !defined(ARROW_VALGRIND) && !defined(ADDRESS_SANITIZER) + result = AssertSecurelyCleared(short_some_zeros_view, stars); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), - "Expected equality of these values:\n" - " area\n" - " Which is: \"abcdefghijklmnopqrstuvwxyz123\"\n" - " std::string_view(zeros)\n" - " Which is: " - "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" - "0\\0\\0\\0\\0\""); + "3 characters of secret leaked into " + "\\0\\0\\0\\0\\0\\0\\0\\0\\0***"); +#endif - // check string with zeros and non-zeros after string length - auto some_zeros_front = no_zeros; - some_zeros_front = std::string(no_zeros.length() - 3, '\0'); - // string buffer in some_zeros_front can be larger than no_zeros.length() - 3 + // check long string with zeros and non-zeros after string length + stars = std::string(42, '*'); + auto long_some_zeros = stars; + memset(long_some_zeros.data(), '\0', 32); + long_some_zeros.resize(32); + // string buffer in long_some_zeros can be larger than 42 // assert only the area that we can control - auto some_zeros_fronts_view = - std::string_view(some_zeros_front.data(), no_zeros.length()); - result = AssertSecurelyCleared(some_zeros_fronts_view); + auto long_some_zeros_view = std::string_view(long_some_zeros.data(), 42); + result = AssertSecurelyCleared(long_some_zeros_view); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" " area\n" - " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0" - "\\0\\0\\0\\0\\0\\0123\"\n" + " Which is: " + "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" + "0\\0\\0\\0\\0\\0\\0\\0\\0*********\"\n" " std::string_view(zeros)\n" " Which is: " "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" - "0\\0\""); + "0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\""); - ASSERT_TRUE(AssertSecurelyCleared(some_zeros_front, no_zeros)); -#if !defined(ARROW_VALGRIND) && !defined(ARROW_USE_ASAN) - result = AssertSecurelyCleared(some_zeros_fronts_view, no_zeros); + ASSERT_TRUE(AssertSecurelyCleared(long_some_zeros, stars)); +#if !defined(ARROW_VALGRIND) && !defined(ADDRESS_SANITIZER) + result = AssertSecurelyCleared(long_some_zeros_view, stars); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), - "3 characters of secret leaked into " - "\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0" - "\\0123"); + "9 characters of secret leaked into " + "\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" + "0\\0\\0\\0\\0\\0\\0\\0\\0*********"); #endif // check string with non-zeros and zeros after string length auto some_zeros_back = std::string(no_zeros.length() + 3, '\0'); some_zeros_back = no_zeros; + memset(some_zeros_back.data() + no_zeros.length() * sizeof(char), '\0', 3 + 1); // string buffer in some_zeros_back can be larger than no_zeros.length() + 3 // assert only the area that we can control auto some_zeros_back_view = std::string_view(some_zeros_back.data(), no_zeros.length() + 3); ASSERT_TRUE(AssertSecurelyCleared(some_zeros_back_view, no_zeros.length())); - result = AssertSecurelyCleared(some_zeros_back_view); - ASSERT_FALSE(result); - ASSERT_EQ( - std::string(result.message()), - "Expected equality of these values:\n" - " area\n" - " Which is: \"abcdefghijklmnopqrstuvwxyz123\\0\\0\\0\"\n" - " std::string_view(zeros)\n" - " Which is: " - "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" - "0\\0\\0\\0\\0\\0\\0\\0\""); } TEST(TestSecureString, SecureClearString) { @@ -281,8 +258,16 @@ TEST(TestSecureString, Assign) { // We initialize with the first string and iteratively assign the subsequent values. // The first two values are local (very short strings), the remainder are non-local // strings. Memory management of short and long strings behaves differently. - std::vector test_strings = { - "secret", "another secret", "a much longer secret", std::string(1024, 'x')}; + std::vector test_strings = {"secret", "another secret", + std::string(128, 'x'), std::string(1024, 'y')}; + for (auto& string : test_strings) { + // string buffer might be longer than string.length with arbitrary bytes + // secure string does not have to protect that garbage bytes + // zeroing here so we get expected results + auto length = string.length(); + string.resize(string.capacity(), '\0'); + string.resize(length); + } std::vector reverse_strings = std::vector(test_strings); std::reverse(reverse_strings.begin(), reverse_strings.end()); @@ -384,9 +369,9 @@ TEST(TestSecureString, Assign) { std::string init_string_copy(init_string); SecureString secret_from_copy_secret(std::move(init_string_copy)); + // copy-assigning from a secure string does not modify that secure string + // the earlier value of the secure string is securely cleared for (const auto& string : strings) { - // copy-assigning from a secure string does not modify that secure string - // the earlier value of the secure string is securely cleared auto string_copy = std::string(string); SecureString secret_string(std::move(string_copy)); ASSERT_FALSE(string.empty()); @@ -419,7 +404,7 @@ TEST(TestSecureString, Assign) { } TEST(TestSecureString, Deconstruct) { -#if defined(ARROW_VALGRIND) || defined(ARROW_USE_ASAN) +#if defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) GTEST_SKIP() << "Test accesses deallocated memory"; #else // We use a very short and a very long string as memory management of short and long From e7470cd676a8a9b51f25272aa1a251cb33fa052c Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Thu, 5 Jun 2025 15:26:33 +0200 Subject: [PATCH 34/44] Improve comments --- cpp/src/arrow/util/secure_string.cc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/util/secure_string.cc b/cpp/src/arrow/util/secure_string.cc index a82e6d018c4..3364c096057 100644 --- a/cpp/src/arrow/util/secure_string.cc +++ b/cpp/src/arrow/util/secure_string.cc @@ -38,7 +38,7 @@ namespace arrow::util { /// Note: -/// A string std::string is securely moved into a SecureString in two steps: +/// A std::string is securely moved into a SecureString in two steps: /// 1. the std::string is moved via std::move(string) /// 2. the std::string is securely cleared /// @@ -139,10 +139,8 @@ std::string_view SecureString::as_view() const { void SecureString::Dispose() { SecureClear(&secret_); } void SecureString::SecureClear(std::string* secret) { - // in case of non-local strings (long strings), this order is vital - // first clear the string buffer + // call SecureClear first just in case secret->clear() frees some memory SecureClear(reinterpret_cast(secret->data()), secret->capacity()); - // then reset the string size (moves from the non-local to the local string buffer) secret->clear(); } From 863923563802fcf2b72d6fefff263808d1151cc5 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Fri, 6 Jun 2025 08:06:38 +0200 Subject: [PATCH 35/44] Apply code review comments - Rename secure_move to SecureMove - Move SecureMove into anonymous namespace - Move SecureClear up in source file - Rename AssertSecurelyCleared to IsSecurelyCleared - Remove std::string_view(std::string) from tests where not needed --- cpp/src/arrow/util/secure_string.cc | 116 ++++++++++++----------- cpp/src/arrow/util/secure_string_test.cc | 105 ++++++++++---------- 2 files changed, 111 insertions(+), 110 deletions(-) diff --git a/cpp/src/arrow/util/secure_string.cc b/cpp/src/arrow/util/secure_string.cc index 3364c096057..d2aaf6bccaa 100644 --- a/cpp/src/arrow/util/secure_string.cc +++ b/cpp/src/arrow/util/secure_string.cc @@ -57,24 +57,75 @@ namespace arrow::util { /// must move the pointer of long string into SecureString (which later clears the /// string). Otherwise, the content of the string cannot be securely cleared. /// -/// This condition is checked by secure_move. +/// This condition is checked by SecureMove. -void secure_move(std::string& string, std::string& dst) { +namespace { +void SecureMove(std::string& string, std::string& dst) { auto ptr = string.data(); dst = std::move(string); - // We require the buffer address string.data() to remain (not be freed), - // or reused by dst. Otherwise, we cannot securely clear string after this move + // We require the buffer address string.data() to remain (not be freed) as is, + // or to be reused by dst. Otherwise, we cannot securely clear string after std::move ARROW_CHECK(string.data() == ptr || dst.data() == ptr); } +} // namespace + +inline void SecureString::SecureClear(uint8_t* data, size_t size) { + // There is various prior art for this: + // https://www.cryptologie.net/article/419/zeroing-memory-compiler-optimizations-and-memset_s/ + // - libb2's `secure_zero_memory` at + // https://github.com/BLAKE2/libb2/blob/30d45a17c59dc7dbf853da3085b71d466275bd0a/src/blake2-impl.h#L140-L160 + // - libsodium's `sodium_memzero` at + // https://github.com/jedisct1/libsodium/blob/be58b2e6664389d9c7993b55291402934b43b3ca/src/libsodium/sodium/utils.c#L78:L101 + // Note: + // https://www.daemonology.net/blog/2014-09-06-zeroing-buffers-is-insufficient.html +#if defined(_WIN32) + // SecureZeroMemory is meant to not be optimized away + SecureZeroMemory(data, size); +#elif defined(__STDC_LIB_EXT1__) + // memset_s is meant to not be optimized away + memset_s(data, size, 0, size); +#elif defined(OPENSSL_VERSION_NUMBER) && OPENSSL_VERSION_NUMBER >= 0x30000000 + // rely on some implementation in OpenSSL cryptographic library + OPENSSL_cleanse(data, size); +#elif defined(__GLIBC__) && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 25)) + // explicit_bzero is meant to not be optimized away + explicit_bzero(data, size); +#else + // Volatile pointer to memset function is an attempt to avoid + // that the compiler optimizes away the memset function call. + // pretty much what OPENSSL_cleanse above does + // https://github.com/openssl/openssl/blob/3423c30db3aa044f46e1f0270e2ecd899415bf5f/crypto/mem_clr.c#L22 + static const volatile auto memset_v = &memset; + memset_v(data, 0, size); + +# if defined(__GNUC__) || defined(__clang__) + // __asm__ only supported by GCC and Clang + // not supported by MSVC on the ARM and x64 processors + // https://en.cppreference.com/w/c/language/asm.html + // https://en.cppreference.com/w/cpp/language/asm.html + + // Additional attempt on top of volatile memset_v above + // to avoid that the compiler optimizes away the memset function call. + // Assembler code that tells the compiler 'data' has side effects. + // https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html: + // - "volatile": the asm produces side effects + // - "memory": effectively forms a read/write memory barrier for the compiler + __asm__ __volatile__("" /* no actual code */ + : /* no output */ + : "r"(data) /* input */ + : "memory" /* memory side effects beyond input and output */); +# endif +#endif +} SecureString::SecureString(SecureString&& other) noexcept { - secure_move(other.secret_, secret_); + SecureMove(other.secret_, secret_); other.Dispose(); } SecureString::SecureString(std::string&& secret) noexcept { - secure_move(secret, secret_); + SecureMove(secret, secret_); SecureClear(&secret); } @@ -86,7 +137,7 @@ SecureString& SecureString::operator=(SecureString&& other) noexcept { return *this; } Dispose(); - secure_move(other.secret_, secret_); + SecureMove(other.secret_, secret_); other.Dispose(); return *this; } @@ -103,7 +154,7 @@ SecureString& SecureString::operator=(const SecureString& other) { SecureString& SecureString::operator=(std::string&& secret) noexcept { Dispose(); - secure_move(secret, secret_); + SecureMove(secret, secret_); SecureClear(&secret); return *this; } @@ -144,53 +195,4 @@ void SecureString::SecureClear(std::string* secret) { secret->clear(); } -inline void SecureString::SecureClear(uint8_t* data, size_t size) { - // There is various prior art for this: - // https://www.cryptologie.net/article/419/zeroing-memory-compiler-optimizations-and-memset_s/ - // - libb2's `secure_zero_memory` at - // https://github.com/BLAKE2/libb2/blob/30d45a17c59dc7dbf853da3085b71d466275bd0a/src/blake2-impl.h#L140-L160 - // - libsodium's `sodium_memzero` at - // https://github.com/jedisct1/libsodium/blob/be58b2e6664389d9c7993b55291402934b43b3ca/src/libsodium/sodium/utils.c#L78:L101 - // Note: - // https://www.daemonology.net/blog/2014-09-06-zeroing-buffers-is-insufficient.html -#if defined(_WIN32) - // SecureZeroMemory is meant to not be optimized away - SecureZeroMemory(data, size); -#elif defined(__STDC_LIB_EXT1__) - // memset_s is meant to not be optimized away - memset_s(data, size, 0, size); -#elif defined(OPENSSL_VERSION_NUMBER) && OPENSSL_VERSION_NUMBER >= 0x30000000 - // rely on some implementation in OpenSSL cryptographic library - OPENSSL_cleanse(data, size); -#elif defined(__GLIBC__) && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 25)) - // explicit_bzero is meant to not be optimized away - explicit_bzero(data, size); -#else - // Volatile pointer to memset function is an attempt to avoid - // that the compiler optimizes away the memset function call. - // pretty much what OPENSSL_cleanse above does - // https://github.com/openssl/openssl/blob/3423c30db3aa044f46e1f0270e2ecd899415bf5f/crypto/mem_clr.c#L22 - static const volatile auto memset_v = &memset; - memset_v(data, 0, size); - -# if defined(__GNUC__) || defined(__clang__) - // __asm__ only supported by GCC and Clang - // not supported by MSVC on the ARM and x64 processors - // https://en.cppreference.com/w/c/language/asm.html - // https://en.cppreference.com/w/cpp/language/asm.html - - // Additional attempt on top of volatile memset_v above - // to avoid that the compiler optimizes away the memset function call. - // Assembler code that tells the compiler 'data' has side effects. - // https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html: - // - "volatile": the asm produces side effects - // - "memory": effectively forms a read/write memory barrier for the compiler - __asm__ __volatile__("" /* no actual code */ - : /* no output */ - : "r"(data) /* input */ - : "memory" /* memory side effects beyond input and output */); -# endif -#endif -} - } // namespace arrow::util diff --git a/cpp/src/arrow/util/secure_string_test.cc b/cpp/src/arrow/util/secure_string_test.cc index f56ef97ffd0..0863751caa5 100644 --- a/cpp/src/arrow/util/secure_string_test.cc +++ b/cpp/src/arrow/util/secure_string_test.cc @@ -31,21 +31,21 @@ std::string_view StringArea(const std::string& string) { #define COMPARE(val1, val2) \ ::testing::internal::EqHelper::Compare(#val1, #val2, val1, val2) -::testing::AssertionResult AssertSecurelyCleared(const std::string_view& area) { +::testing::AssertionResult IsSecurelyCleared(const std::string_view& area) { // the entire area is filled with zeros std::string zeros(area.size(), '\0'); return COMPARE(area, std::string_view(zeros)); } -::testing::AssertionResult AssertSecurelyCleared(const std::string& string) { - return AssertSecurelyCleared(StringArea(string)); +::testing::AssertionResult IsSecurelyCleared(const std::string& string) { + return IsSecurelyCleared(StringArea(string)); } /** * Checks the area has been securely cleared after some position. */ -::testing::AssertionResult AssertSecurelyCleared(const std::string_view& area, - const size_t pos) { +::testing::AssertionResult IsSecurelyCleared(const std::string_view& area, + const size_t pos) { // the area after pos is filled with zeros if (pos < area.size()) { std::string zeros(area.size() - pos, '\0'); @@ -62,8 +62,8 @@ ::testing::AssertionResult AssertSecurelyCleared(const std::string_view& area, * at the right position, this will be false negative / flaky. Therefore, we check for * three consecutive secret characters before we fail. */ -::testing::AssertionResult AssertSecurelyCleared(const std::string_view& area, - const std::string& secret_value) { +::testing::AssertionResult IsSecurelyCleared(const std::string_view& area, + const std::string& secret_value) { #if defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) return testing::AssertionSuccess() << "Not checking deallocated memory"; #else @@ -97,24 +97,24 @@ TEST(TestSecureString, AssertSecurelyCleared) { short_zeros.resize(short_zeros.capacity(), '\0'); // for string buffers longer than 8 short_zeros.resize(8); // now the entire string buffer has zeros // checks the entire string buffer (capacity) - ASSERT_TRUE(AssertSecurelyCleared(short_zeros)); + ASSERT_TRUE(IsSecurelyCleared(short_zeros)); // checks only 10 bytes (length) - ASSERT_TRUE(AssertSecurelyCleared(std::string_view(short_zeros))); + ASSERT_TRUE(IsSecurelyCleared(std::string_view(short_zeros))); // check long string with all zeros auto long_zeros = std::string(1000, '\0'); long_zeros.resize(long_zeros.capacity(), '\0'); // for longer string buffers long_zeros.resize(1000); // now the entire string buffer has zeros // checks the entire string buffer (capacity) - ASSERT_TRUE(AssertSecurelyCleared(long_zeros)); + ASSERT_TRUE(IsSecurelyCleared(long_zeros)); // checks only 1000 bytes (length) - ASSERT_TRUE(AssertSecurelyCleared(std::string_view(long_zeros))); + ASSERT_TRUE(IsSecurelyCleared(std::string_view(long_zeros))); auto no_zeros = std::string("abcdefghijklmnopqrstuvwxyz"); // string buffer in no_zeros can be larger than no_zeros.length() // assert only the area that we can control auto no_zeros_view = std::string_view(no_zeros); - result = AssertSecurelyCleared(no_zeros_view); + result = IsSecurelyCleared(no_zeros_view); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" @@ -133,7 +133,7 @@ TEST(TestSecureString, AssertSecurelyCleared) { // string buffer in short_some_zeros can be larger than 12 // assert only the area that we can control auto short_some_zeros_view = std::string_view(short_some_zeros.data(), 12); - result = AssertSecurelyCleared(short_some_zeros_view); + result = IsSecurelyCleared(short_some_zeros_view); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" @@ -142,9 +142,9 @@ TEST(TestSecureString, AssertSecurelyCleared) { " std::string_view(zeros)\n" " Which is: \"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\""); - ASSERT_TRUE(AssertSecurelyCleared(short_some_zeros, stars)); + ASSERT_TRUE(IsSecurelyCleared(short_some_zeros, stars)); #if !defined(ARROW_VALGRIND) && !defined(ADDRESS_SANITIZER) - result = AssertSecurelyCleared(short_some_zeros_view, stars); + result = IsSecurelyCleared(short_some_zeros_view, stars); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "3 characters of secret leaked into " @@ -159,7 +159,7 @@ TEST(TestSecureString, AssertSecurelyCleared) { // string buffer in long_some_zeros can be larger than 42 // assert only the area that we can control auto long_some_zeros_view = std::string_view(long_some_zeros.data(), 42); - result = AssertSecurelyCleared(long_some_zeros_view); + result = IsSecurelyCleared(long_some_zeros_view); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "Expected equality of these values:\n" @@ -172,9 +172,9 @@ TEST(TestSecureString, AssertSecurelyCleared) { "\"\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\" "0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\""); - ASSERT_TRUE(AssertSecurelyCleared(long_some_zeros, stars)); + ASSERT_TRUE(IsSecurelyCleared(long_some_zeros, stars)); #if !defined(ARROW_VALGRIND) && !defined(ADDRESS_SANITIZER) - result = AssertSecurelyCleared(long_some_zeros_view, stars); + result = IsSecurelyCleared(long_some_zeros_view, stars); ASSERT_FALSE(result); ASSERT_EQ(std::string(result.message()), "9 characters of secret leaked into " @@ -190,7 +190,7 @@ TEST(TestSecureString, AssertSecurelyCleared) { // assert only the area that we can control auto some_zeros_back_view = std::string_view(some_zeros_back.data(), no_zeros.length() + 3); - ASSERT_TRUE(AssertSecurelyCleared(some_zeros_back_view, no_zeros.length())); + ASSERT_TRUE(IsSecurelyCleared(some_zeros_back_view, no_zeros.length())); } TEST(TestSecureString, SecureClearString) { @@ -199,8 +199,8 @@ TEST(TestSecureString, SecureClearString) { std::string tiny("abc"); auto old_area = StringArea(tiny); SecureString::SecureClear(&tiny); - ASSERT_TRUE(AssertSecurelyCleared(tiny)); - ASSERT_TRUE(AssertSecurelyCleared(old_area)); + ASSERT_TRUE(IsSecurelyCleared(tiny)); + ASSERT_TRUE(IsSecurelyCleared(old_area)); } // long string @@ -209,8 +209,8 @@ TEST(TestSecureString, SecureClearString) { large.resize(512, 'y'); auto old_area = StringArea(large); SecureString::SecureClear(&large); - ASSERT_TRUE(AssertSecurelyCleared(large)); - ASSERT_TRUE(AssertSecurelyCleared(old_area)); + ASSERT_TRUE(IsSecurelyCleared(large)); + ASSERT_TRUE(IsSecurelyCleared(old_area)); } // empty string @@ -221,8 +221,8 @@ TEST(TestSecureString, SecureClearString) { empty.resize(0); auto old_area = StringArea(empty); SecureString::SecureClear(&empty); - ASSERT_TRUE(AssertSecurelyCleared(empty)); - ASSERT_TRUE(AssertSecurelyCleared(old_area)); + ASSERT_TRUE(IsSecurelyCleared(empty)); + ASSERT_TRUE(IsSecurelyCleared(old_area)); } } @@ -231,25 +231,24 @@ TEST(TestSecureString, Construct) { std::string string("hello world"); auto old_string = StringArea(string); SecureString secret_from_string(std::move(string)); - ASSERT_TRUE(AssertSecurelyCleared(string)); - ASSERT_TRUE(AssertSecurelyCleared(old_string)); + ASSERT_TRUE(IsSecurelyCleared(string)); + ASSERT_TRUE(IsSecurelyCleared(old_string)); ASSERT_FALSE(secret_from_string.empty()); + ASSERT_EQ(secret_from_string.as_view(), "hello world"); // move-constructing from a secure string securely clears that secure string auto old_secret_from_string_view = secret_from_string.as_view(); auto old_secret_from_string_value = std::string(secret_from_string.as_view()); SecureString secret_from_move_secret(std::move(secret_from_string)); ASSERT_TRUE(secret_from_string.empty()); - ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_string_view)); + ASSERT_TRUE(IsSecurelyCleared(old_secret_from_string_view)); ASSERT_FALSE(secret_from_move_secret.empty()); - ASSERT_EQ(secret_from_move_secret.as_view(), - std::string_view(old_secret_from_string_value)); + ASSERT_EQ(secret_from_move_secret.as_view(), old_secret_from_string_value); // copy-constructing from a secure string does not modify that secure string SecureString secret_from_secret(secret_from_move_secret); ASSERT_FALSE(secret_from_move_secret.empty()); - ASSERT_EQ(secret_from_move_secret.as_view(), - std::string_view(old_secret_from_string_value)); + ASSERT_EQ(secret_from_move_secret.as_view(), old_secret_from_string_value); ASSERT_FALSE(secret_from_secret.empty()); ASSERT_EQ(secret_from_secret, secret_from_move_secret); } @@ -295,25 +294,25 @@ TEST(TestSecureString, Assign) { ASSERT_FALSE(string.empty()); ASSERT_TRUE(string_copy.empty()); - ASSERT_TRUE(AssertSecurelyCleared(string_copy)); + ASSERT_TRUE(IsSecurelyCleared(string_copy)); auto secret_from_string_view = secret_from_string.as_view(); // the secure string can reuse the string_copy's string buffer after assignment // then, string_copy's string buffer is obviously not cleared if (secret_from_string_view.data() != old_string_copy_area.data()) { - ASSERT_TRUE(AssertSecurelyCleared(old_string_copy_area, string)); + ASSERT_TRUE(IsSecurelyCleared(old_string_copy_area, string)); } ASSERT_FALSE(secret_from_string.empty()); ASSERT_EQ(secret_from_string.size(), string.size()); ASSERT_EQ(secret_from_string.length(), string.length()); - ASSERT_EQ(secret_from_string_view, std::string_view(string)); + ASSERT_EQ(secret_from_string_view, string); if (secret_from_string_view.data() == old_secret_from_string_area.data()) { // when secure string reuses the buffer, the old value must be cleared - ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_string_area, - secret_from_string.size())); + ASSERT_TRUE( + IsSecurelyCleared(old_secret_from_string_area, secret_from_string.size())); } else { // when secure string has a new buffer, the old buffer must be cleared - ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_string_area, - old_secret_from_string_value)); + ASSERT_TRUE(IsSecurelyCleared(old_secret_from_string_area, + old_secret_from_string_value)); } } } @@ -344,22 +343,22 @@ TEST(TestSecureString, Assign) { // the secure string can reuse the string_copy's string buffer after assignment // then, string_copy's string buffer is obviously not cleared if (old_secret_string_area.data() != secret_from_move_secret_view.data()) { - ASSERT_TRUE(AssertSecurelyCleared(old_secret_string_area, - old_secret_from_move_secret_value)); + ASSERT_TRUE(IsSecurelyCleared(old_secret_string_area, + old_secret_from_move_secret_value)); } ASSERT_FALSE(secret_from_move_secret.empty()); ASSERT_EQ(secret_from_move_secret.size(), string.size()); ASSERT_EQ(secret_from_move_secret.length(), string.length()); - ASSERT_EQ(secret_from_move_secret_view, std::string_view(string)); + ASSERT_EQ(secret_from_move_secret_view, string); if (old_secret_from_move_secret_area.data() == secret_from_move_secret_view.data()) { // when secure string reuses the buffer, the old value must be cleared - ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_move_secret_area, - secret_from_move_secret.size())); + ASSERT_TRUE(IsSecurelyCleared(old_secret_from_move_secret_area, + secret_from_move_secret.size())); } else { // when secure string has a new buffer, the old buffer must be cleared - ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_move_secret_area, - old_secret_from_move_secret_value)); + ASSERT_TRUE(IsSecurelyCleared(old_secret_from_move_secret_area, + old_secret_from_move_secret_value)); } } } @@ -387,16 +386,16 @@ TEST(TestSecureString, Assign) { ASSERT_FALSE(secret_from_copy_secret.empty()); ASSERT_EQ(secret_from_copy_secret.size(), string.size()); ASSERT_EQ(secret_from_copy_secret.length(), string.length()); - ASSERT_EQ(secret_from_copy_secret.as_view(), std::string_view(string)); + ASSERT_EQ(secret_from_copy_secret.as_view(), string); if (old_secret_from_copy_secret_area.data() == secret_from_copy_secret.as_view().data()) { // when secure string reuses the buffer, the old value must be cleared - ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_copy_secret_area, - secret_from_copy_secret.size())); + ASSERT_TRUE(IsSecurelyCleared(old_secret_from_copy_secret_area, + secret_from_copy_secret.size())); } else { // when secure string has a new buffer, the old buffer must be cleared - ASSERT_TRUE(AssertSecurelyCleared(old_secret_from_copy_secret_area, - old_secret_from_copy_secret_value)); + ASSERT_TRUE(IsSecurelyCleared(old_secret_from_copy_secret_area, + old_secret_from_copy_secret_value)); } } } @@ -422,9 +421,9 @@ TEST(TestSecureString, Deconstruct) { // deconstruct secret on leaving this context } // assert secret memory is cleared on deconstruction - ASSERT_TRUE(AssertSecurelyCleared(view, old_string_value)); + ASSERT_TRUE(IsSecurelyCleared(view, old_string_value)); // so is the string (tested more thoroughly elsewhere) - ASSERT_TRUE(AssertSecurelyCleared(string)); + ASSERT_TRUE(IsSecurelyCleared(string)); } #endif } From 0ec848ced0e851fb876f7e6c5de5c1d8f93307d2 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Fri, 6 Jun 2025 08:16:17 +0200 Subject: [PATCH 36/44] Move SecureClear(std::string*) up in source file as well --- cpp/src/arrow/util/secure_string.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/util/secure_string.cc b/cpp/src/arrow/util/secure_string.cc index d2aaf6bccaa..bd52c55f312 100644 --- a/cpp/src/arrow/util/secure_string.cc +++ b/cpp/src/arrow/util/secure_string.cc @@ -70,6 +70,12 @@ void SecureMove(std::string& string, std::string& dst) { } } // namespace +void SecureString::SecureClear(std::string* secret) { + // call SecureClear first just in case secret->clear() frees some memory + SecureClear(reinterpret_cast(secret->data()), secret->capacity()); + secret->clear(); +} + inline void SecureString::SecureClear(uint8_t* data, size_t size) { // There is various prior art for this: // https://www.cryptologie.net/article/419/zeroing-memory-compiler-optimizations-and-memset_s/ @@ -189,10 +195,4 @@ std::string_view SecureString::as_view() const { void SecureString::Dispose() { SecureClear(&secret_); } -void SecureString::SecureClear(std::string* secret) { - // call SecureClear first just in case secret->clear() frees some memory - SecureClear(reinterpret_cast(secret->data()), secret->capacity()); - secret->clear(); -} - } // namespace arrow::util From e11d223341b30aeb6e6ac1dc2ddec0dd9c6708e1 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Fri, 6 Jun 2025 16:21:17 +0200 Subject: [PATCH 37/44] Add back std::string methods as deprecated --- cpp/src/parquet/encryption/encryption.cc | 12 +++- cpp/src/parquet/encryption/encryption.h | 55 ++++++++++++++++--- .../parquet/encryption/file_key_unwrapper.cc | 6 +- .../parquet/encryption/file_key_unwrapper.h | 2 +- .../encryption/internal_file_decryptor.cc | 4 +- .../parquet/encryption/key_wrapping_test.cc | 4 +- cpp/src/parquet/encryption/kms_client.h | 43 +++++++++++++-- .../encryption/local_wrap_kms_client.cc | 2 +- .../encryption/local_wrap_kms_client.h | 2 +- cpp/src/parquet/encryption/properties_test.cc | 6 +- .../parquet/encryption/test_in_memory_kms.cc | 2 +- .../parquet/encryption/test_in_memory_kms.h | 2 +- 12 files changed, 111 insertions(+), 29 deletions(-) diff --git a/cpp/src/parquet/encryption/encryption.cc b/cpp/src/parquet/encryption/encryption.cc index 06734079cbf..a27e4e7d0a7 100644 --- a/cpp/src/parquet/encryption/encryption.cc +++ b/cpp/src/parquet/encryption/encryption.cc @@ -40,10 +40,15 @@ void StringKeyIdRetriever::PutKey(std::string key_id, SecureString key) { key_map_.insert({std::move(key_id), std::move(key)}); } -SecureString StringKeyIdRetriever::GetKey(const std::string& key_id) { +SecureString StringKeyIdRetriever::GetKeyById(const std::string& key_id) { return key_map_.at(key_id); } +ColumnEncryptionProperties::Builder* ColumnEncryptionProperties::Builder::key( + std::string column_key) { + return key(SecureString(std::move(column_key))); +} + ColumnEncryptionProperties::Builder* ColumnEncryptionProperties::Builder::key( SecureString column_key) { if (column_key.empty()) return this; @@ -86,6 +91,11 @@ FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::column_key return this; } +FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::footer_key( + std::string footer_key) { + return this->footer_key(SecureString(std::move(footer_key))); +} + FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::footer_key( SecureString footer_key) { if (footer_key.empty()) { diff --git a/cpp/src/parquet/encryption/encryption.h b/cpp/src/parquet/encryption/encryption.h index 7e681c28d1a..259c3947e78 100644 --- a/cpp/src/parquet/encryption/encryption.h +++ b/cpp/src/parquet/encryption/encryption.h @@ -48,7 +48,24 @@ using ColumnPathToEncryptionPropertiesMap = class PARQUET_EXPORT DecryptionKeyRetriever { public: - virtual ::arrow::util::SecureString GetKey(const std::string& key_metadata) = 0; + /// \brief Retrieve a key. + /// \deprecated Deprecated since 21.0.0. + /// Implement GetKeyById(const std::string&) instead. + ARROW_DEPRECATED( + "Deprecated in 21.0.0. " + "Implement GetKeyById(const std::string&) instead.") + virtual std::string GetKey(const std::string& key_id) { + throw ParquetException("Not implemented"); + } + + /// \brief Retrieve a key by its id. + virtual ::arrow::util::SecureString GetKeyById(const std::string& key_id) { + ARROW_SUPPRESS_DEPRECATION_WARNING + auto key = ::arrow::util::SecureString(GetKey(key_id)); + ARROW_UNSUPPRESS_DEPRECATION_WARNING + return key; + } + virtual ~DecryptionKeyRetriever() {} }; @@ -56,17 +73,18 @@ class PARQUET_EXPORT DecryptionKeyRetriever { class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever { public: void PutKey(uint32_t key_id, ::arrow::util::SecureString key); - ::arrow::util::SecureString GetKey(const std::string& key_metadata) override { - // key_metadata is string but for IntegerKeyIdRetriever it encodes + + ::arrow::util::SecureString GetKeyById(const std::string& key_id_string) override { + // key_id_string is string but for IntegerKeyIdRetriever it encodes // a native-endian 32 bit unsigned integer key_id uint32_t key_id; - assert(key_metadata.size() == sizeof(key_id)); - memcpy(&key_id, key_metadata.data(), sizeof(key_id)); + assert(key_id_string.size() == sizeof(key_id)); + memcpy(&key_id, key_id_string.data(), sizeof(key_id)); - return GetKey(key_id); + return GetKeyById(key_id); } - ::arrow::util::SecureString GetKey(uint32_t key_id) { return key_map_.at(key_id); } + ::arrow::util::SecureString GetKeyById(uint32_t key_id) { return key_map_.at(key_id); } private: std::map key_map_; @@ -76,7 +94,7 @@ class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever { class PARQUET_EXPORT StringKeyIdRetriever : public DecryptionKeyRetriever { public: void PutKey(std::string key_id, ::arrow::util::SecureString key); - ::arrow::util::SecureString GetKey(const std::string& key_metadata) override; + ::arrow::util::SecureString GetKeyById(const std::string& key_id) override; private: std::map key_map_; @@ -125,6 +143,11 @@ class PARQUET_EXPORT ColumnEncryptionProperties { /// be encrypted with the footer key. /// keyBytes Key length must be either 16, 24 or 32 bytes. /// Caller is responsible for wiping out the input key array. + /// \deprecated "Deprecated in 21.0.0. Use key(arrow::util::SecureString) instead." + ARROW_DEPRECATED("Deprecated in 21.0.0. Use key(arrow::util::SecureString) instead.") + Builder* key(std::string column_key); + + /// \copydoc key(std::string) Builder* key(::arrow::util::SecureString column_key); /// Set a key retrieval metadata. @@ -245,6 +268,14 @@ class PARQUET_EXPORT FileDecryptionProperties { /// will be wiped out (array values set to 0). /// Caller is responsible for wiping out the input key array. /// param footerKey Key length must be either 16, 24 or 32 bytes. + /// \deprecated Deprecated since 21.0.0. + /// Use footer_key(arrow::util::SecureString) instead. + ARROW_DEPRECATED( + "Deprecated in 21.0.0. " + "Use footer_key(arrow::util::SecureString) instead.") + Builder* footer_key(std::string footer_key); + + /// \copydoc footer_key(std::string footer_key) Builder* footer_key(::arrow::util::SecureString footer_key); /// Set explicit column keys (decryption properties). @@ -359,6 +390,14 @@ class PARQUET_EXPORT FileEncryptionProperties { public: class PARQUET_EXPORT Builder { public: + /// \deprecated Deprecated since 21.0.0. Use Builder(arrow::util::SecureString) + /// instead. + ARROW_DEPRECATED( + "Deprecated in 21.0.0. " + "Use Builder(arrow::util::SecureString) instead") + explicit Builder(std::string footer_key) + : Builder(::arrow::util::SecureString(std::move(footer_key))) {} + explicit Builder(::arrow::util::SecureString footer_key) : parquet_cipher_(kDefaultEncryptionAlgorithm), encrypted_footer_(kDefaultEncryptedFooter), diff --git a/cpp/src/parquet/encryption/file_key_unwrapper.cc b/cpp/src/parquet/encryption/file_key_unwrapper.cc index 4dc1492a0b7..d7463590358 100644 --- a/cpp/src/parquet/encryption/file_key_unwrapper.cc +++ b/cpp/src/parquet/encryption/file_key_unwrapper.cc @@ -69,7 +69,7 @@ FileKeyUnwrapper::FileKeyUnwrapper( kms_connection_config.key_access_token(), cache_entry_lifetime_seconds_); } -SecureString FileKeyUnwrapper::GetKey(const std::string& key_metadata_bytes) { +SecureString FileKeyUnwrapper::GetKeyById(const std::string& key_metadata_bytes) { // key_metadata is expected to be in UTF8 encoding ::arrow::util::InitializeUTF8(); if (!::arrow::util::ValidateUTF8( @@ -110,7 +110,7 @@ KeyWithMasterId FileKeyUnwrapper::GetDataEncryptionKey(const KeyMaterial& key_ma SecureString data_key; if (!double_wrapping) { - data_key = kms_client->UnwrapKey(encoded_wrapped_dek, master_key_id); + data_key = kms_client->UnWrapKey(encoded_wrapped_dek, master_key_id); } else { // Get Key Encryption Key const std::string& encoded_kek_id = key_material.kek_id(); @@ -118,7 +118,7 @@ KeyWithMasterId FileKeyUnwrapper::GetDataEncryptionKey(const KeyMaterial& key_ma const SecureString kek_bytes = kek_per_kek_id_->GetOrInsert( encoded_kek_id, [kms_client, encoded_wrapped_kek, master_key_id]() { - return kms_client->UnwrapKey(encoded_wrapped_kek, master_key_id); + return kms_client->UnWrapKey(encoded_wrapped_kek, master_key_id); }); // Decrypt the data key diff --git a/cpp/src/parquet/encryption/file_key_unwrapper.h b/cpp/src/parquet/encryption/file_key_unwrapper.h index d2c35d54b88..d674b5cf2ac 100644 --- a/cpp/src/parquet/encryption/file_key_unwrapper.h +++ b/cpp/src/parquet/encryption/file_key_unwrapper.h @@ -65,7 +65,7 @@ class PARQUET_EXPORT FileKeyUnwrapper : public DecryptionKeyRetriever { std::shared_ptr key_material_store); /// Get the data key from key metadata - ::arrow::util::SecureString GetKey(const std::string& key_metadata) override; + ::arrow::util::SecureString GetKeyById(const std::string& key_metadata_bytes) override; /// Get the data key along with the master key id from key material KeyWithMasterId GetDataEncryptionKey(const KeyMaterial& key_material); diff --git a/cpp/src/parquet/encryption/internal_file_decryptor.cc b/cpp/src/parquet/encryption/internal_file_decryptor.cc index b90d3158559..efd1ec8067c 100644 --- a/cpp/src/parquet/encryption/internal_file_decryptor.cc +++ b/cpp/src/parquet/encryption/internal_file_decryptor.cc @@ -78,7 +78,7 @@ const SecureString& InternalFileDecryptor::GetFooterKey() { if (properties_->key_retriever() == nullptr) throw ParquetException("No footer key or key retriever"); try { - footer_key_ = properties_->key_retriever()->GetKey(footer_key_metadata_); + footer_key_ = properties_->key_retriever()->GetKeyById(footer_key_metadata_); } catch (KeyAccessDeniedException& e) { std::stringstream ss; ss << "Footer key: access denied " << e.what() << "\n"; @@ -117,7 +117,7 @@ SecureString InternalFileDecryptor::GetColumnKey(const std::string& column_path, if (column_key.empty() && !column_key_metadata.empty() && properties_->key_retriever() != nullptr) { try { - column_key = properties_->key_retriever()->GetKey(column_key_metadata); + column_key = properties_->key_retriever()->GetKeyById(column_key_metadata); } catch (KeyAccessDeniedException& e) { std::stringstream ss; ss << "HiddenColumnException, path=" + column_path + " " << e.what() << "\n"; diff --git a/cpp/src/parquet/encryption/key_wrapping_test.cc b/cpp/src/parquet/encryption/key_wrapping_test.cc index 04494d8cc21..4ff3903fb7e 100644 --- a/cpp/src/parquet/encryption/key_wrapping_test.cc +++ b/cpp/src/parquet/encryption/key_wrapping_test.cc @@ -86,10 +86,10 @@ class KeyWrappingTest : public ::testing::Test { FileKeyUnwrapper unwrapper(&key_toolkit, kms_connection_config_, cache_entry_lifetime_seconds, readable_file_path, file_system); - SecureString footer_key = unwrapper.GetKey(key_metadata_json_footer); + SecureString footer_key = unwrapper.GetKeyById(key_metadata_json_footer); ASSERT_EQ(footer_key, kFooterEncryptionKey); - SecureString column_key = unwrapper.GetKey(key_metadata_json_column); + SecureString column_key = unwrapper.GetKeyById(key_metadata_json_column); ASSERT_EQ(column_key, kColumnEncryptionKey1); } diff --git a/cpp/src/parquet/encryption/kms_client.h b/cpp/src/parquet/encryption/kms_client.h index 524476a64a8..0639197fb37 100644 --- a/cpp/src/parquet/encryption/kms_client.h +++ b/cpp/src/parquet/encryption/kms_client.h @@ -80,14 +80,47 @@ class PARQUET_EXPORT KmsClient { static constexpr const char kKmsInstanceUrlDefault[] = "DEFAULT"; static constexpr const char kKeyAccessTokenDefault[] = "DEFAULT"; - /// Wraps a key - encrypts it with the master key, encodes the result + /// \brief Wraps a key. + /// + /// Encrypts it with the master key, encodes the result /// and potentially adds a KMS-specific metadata. + /// + /// \deprecated Deprecated since 21.0.0. Implement + /// WrapKey(const SecureString&, const std::string&) instead. + ARROW_DEPRECATED( + "Deprecated in 21.0.0. " + "Implement WrapKey(const SecureString&, const std::string&) instead.") + virtual std::string WrapKey(const std::string& key_bytes, + const std::string& master_key_identifier) { + throw ParquetException("Not implemented"); + } + + /// \copydoc WrapKey(const std::string&, const std::string&) virtual std::string WrapKey(const ::arrow::util::SecureString& key_bytes, - const std::string& master_key_identifier) = 0; + const std::string& master_key_identifier) { + ARROW_SUPPRESS_DEPRECATION_WARNING + auto key = WrapKey(std::string(key_bytes.as_view()), master_key_identifier); + ARROW_UNSUPPRESS_DEPRECATION_WARNING + return key; + } + + /// \brief Decrypts (unwraps) a key with the master key. + /// \deprecated Deprecated since 21.0.0. Implement UnWrapKey instead. + ARROW_DEPRECATED("Deprecated in 21.0.0. Implement UnWrapKey instead.") + virtual std::string UnwrapKey(const std::string& wrapped_key, + const std::string& master_key_identifier) { + throw ParquetException("Not implemented"); + } + + /// \copydoc UnwrapKey(const std::string&, const std::string&) + virtual ::arrow::util::SecureString UnWrapKey( + const std::string& wrapped_key, const std::string& master_key_identifier) { + ARROW_SUPPRESS_DEPRECATION_WARNING + auto key = ::arrow::util::SecureString(UnwrapKey(wrapped_key, master_key_identifier)); + ARROW_UNSUPPRESS_DEPRECATION_WARNING + return key; + } - /// Decrypts (unwraps) a key with the master key. - virtual ::arrow::util::SecureString UnwrapKey( - const std::string& wrapped_key, const std::string& master_key_identifier) = 0; virtual ~KmsClient() {} }; diff --git a/cpp/src/parquet/encryption/local_wrap_kms_client.cc b/cpp/src/parquet/encryption/local_wrap_kms_client.cc index 80543c2932a..6bd80479a03 100644 --- a/cpp/src/parquet/encryption/local_wrap_kms_client.cc +++ b/cpp/src/parquet/encryption/local_wrap_kms_client.cc @@ -84,7 +84,7 @@ std::string LocalWrapKmsClient::WrapKey(const SecureString& key_bytes, return LocalKeyWrap::CreateSerialized(encrypted_encoded_key); } -SecureString LocalWrapKmsClient::UnwrapKey(const std::string& wrapped_key, +SecureString LocalWrapKmsClient::UnWrapKey(const std::string& wrapped_key, const std::string& master_key_identifier) { LocalKeyWrap key_wrap = LocalKeyWrap::Parse(wrapped_key); const std::string& master_key_version = key_wrap.master_key_version(); diff --git a/cpp/src/parquet/encryption/local_wrap_kms_client.h b/cpp/src/parquet/encryption/local_wrap_kms_client.h index 607c75a4c2e..7eedbaaf77e 100644 --- a/cpp/src/parquet/encryption/local_wrap_kms_client.h +++ b/cpp/src/parquet/encryption/local_wrap_kms_client.h @@ -38,7 +38,7 @@ class PARQUET_EXPORT LocalWrapKmsClient : public KmsClient { std::string WrapKey(const ::arrow::util::SecureString& key_bytes, const std::string& master_key_identifier) override; - ::arrow::util::SecureString UnwrapKey( + ::arrow::util::SecureString UnWrapKey( const std::string& wrapped_key, const std::string& master_key_identifier) override; protected: diff --git a/cpp/src/parquet/encryption/properties_test.cc b/cpp/src/parquet/encryption/properties_test.cc index 895cf6c6343..ed41f312842 100644 --- a/cpp/src/parquet/encryption/properties_test.cc +++ b/cpp/src/parquet/encryption/properties_test.cc @@ -224,9 +224,9 @@ TEST(TestDecryptionProperties, UseKeyRetriever) { std::shared_ptr props = builder.build(); auto out_key_retriever = props->key_retriever(); - ASSERT_EQ(kFooterEncryptionKey, out_key_retriever->GetKey("kf")); - ASSERT_EQ(kColumnEncryptionKey1, out_key_retriever->GetKey("kc1")); - ASSERT_EQ(kColumnEncryptionKey2, out_key_retriever->GetKey("kc2")); + ASSERT_EQ(kFooterEncryptionKey, out_key_retriever->GetKeyById("kf")); + ASSERT_EQ(kColumnEncryptionKey1, out_key_retriever->GetKeyById("kc1")); + ASSERT_EQ(kColumnEncryptionKey2, out_key_retriever->GetKeyById("kc2")); } TEST(TestDecryptionProperties, SupplyAadPrefix) { diff --git a/cpp/src/parquet/encryption/test_in_memory_kms.cc b/cpp/src/parquet/encryption/test_in_memory_kms.cc index 6af15d177fd..969d6df858c 100644 --- a/cpp/src/parquet/encryption/test_in_memory_kms.cc +++ b/cpp/src/parquet/encryption/test_in_memory_kms.cc @@ -79,7 +79,7 @@ std::string TestOnlyInServerWrapKms::WrapKey(const SecureString& key_bytes, return internal::EncryptKeyLocally(key_bytes, master_key, aad); } -SecureString TestOnlyInServerWrapKms::UnwrapKey( +SecureString TestOnlyInServerWrapKms::UnWrapKey( const std::string& wrapped_key, const std::string& master_key_identifier) { if (unwrapping_master_key_map_.find(master_key_identifier) == unwrapping_master_key_map_.end()) { diff --git a/cpp/src/parquet/encryption/test_in_memory_kms.h b/cpp/src/parquet/encryption/test_in_memory_kms.h index b9d4169c634..df63984546a 100644 --- a/cpp/src/parquet/encryption/test_in_memory_kms.h +++ b/cpp/src/parquet/encryption/test_in_memory_kms.h @@ -56,7 +56,7 @@ class TestOnlyInServerWrapKms : public KmsClient { std::string WrapKey(const ::arrow::util::SecureString& key_bytes, const std::string& master_key_identifier) override; - ::arrow::util::SecureString UnwrapKey( + ::arrow::util::SecureString UnWrapKey( const std::string& wrapped_key, const std::string& master_key_identifier) override; static void StartKeyRotation( From fb41244cffa6252cb78cba5d32e011bba2f4e90a Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 2 Jul 2025 09:15:09 +0200 Subject: [PATCH 38/44] Fix merge --- cpp/src/parquet/encryption/encryption.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/parquet/encryption/encryption.cc b/cpp/src/parquet/encryption/encryption.cc index 97930b327eb..287c6ccd900 100644 --- a/cpp/src/parquet/encryption/encryption.cc +++ b/cpp/src/parquet/encryption/encryption.cc @@ -204,6 +204,7 @@ ColumnEncryptionProperties::ColumnEncryptionProperties(bool encrypted, if (encrypted_with_footer_key_) { DCHECK(key_metadata_.empty()); } +} ColumnDecryptionProperties::ColumnDecryptionProperties(std::string column_path, SecureString key) From fa9abed7c3a6e8e6ca9c23b7742ab88e4ac9d560 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Mon, 7 Jul 2025 16:57:46 +0200 Subject: [PATCH 39/44] Use const in favour of inline strings --- cpp/src/parquet/encryption/test_encryption_util.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/parquet/encryption/test_encryption_util.h b/cpp/src/parquet/encryption/test_encryption_util.h index 7c34c501580..43ac25744bc 100644 --- a/cpp/src/parquet/encryption/test_encryption_util.h +++ b/cpp/src/parquet/encryption/test_encryption_util.h @@ -45,9 +45,9 @@ using ::arrow::util::SecureString; constexpr int kFixedLength = 10; -inline SecureString kFooterEncryptionKey("0123456789012345"); -inline SecureString kColumnEncryptionKey1("1234567890123450"); -inline SecureString kColumnEncryptionKey2("1234567890123451"); +const SecureString kFooterEncryptionKey("0123456789012345"); +const SecureString kColumnEncryptionKey1("1234567890123450"); +const SecureString kColumnEncryptionKey2("1234567890123451"); const char kFileName[] = "tester"; // Get the path of file inside parquet test data directory From 290449a36c4b45fc5e6be931a6a12b378d80cdf4 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Mon, 7 Jul 2025 16:44:04 +0200 Subject: [PATCH 40/44] Remove deprecations to move users to more secure methods --- cpp/src/parquet/encryption/encryption.cc | 12 +---- cpp/src/parquet/encryption/encryption.h | 46 ++----------------- .../parquet/encryption/file_key_unwrapper.cc | 6 +-- .../parquet/encryption/file_key_unwrapper.h | 2 +- .../encryption/internal_file_decryptor.cc | 4 +- .../parquet/encryption/key_wrapping_test.cc | 4 +- cpp/src/parquet/encryption/kms_client.h | 36 ++------------- .../encryption/local_wrap_kms_client.cc | 2 +- .../encryption/local_wrap_kms_client.h | 2 +- cpp/src/parquet/encryption/properties_test.cc | 6 +-- .../parquet/encryption/test_in_memory_kms.cc | 2 +- .../parquet/encryption/test_in_memory_kms.h | 2 +- 12 files changed, 23 insertions(+), 101 deletions(-) diff --git a/cpp/src/parquet/encryption/encryption.cc b/cpp/src/parquet/encryption/encryption.cc index 287c6ccd900..88a057a2e80 100644 --- a/cpp/src/parquet/encryption/encryption.cc +++ b/cpp/src/parquet/encryption/encryption.cc @@ -40,15 +40,10 @@ void StringKeyIdRetriever::PutKey(std::string key_id, SecureString key) { key_map_.insert({std::move(key_id), std::move(key)}); } -SecureString StringKeyIdRetriever::GetKeyById(const std::string& key_id) { +SecureString StringKeyIdRetriever::GetKey(const std::string& key_id) { return key_map_.at(key_id); } -ColumnEncryptionProperties::Builder* ColumnEncryptionProperties::Builder::key( - std::string column_key) { - return key(SecureString(std::move(column_key))); -} - ColumnEncryptionProperties::Builder* ColumnEncryptionProperties::Builder::key( SecureString column_key) { if (column_key.empty()) return this; @@ -90,11 +85,6 @@ FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::column_key return this; } -FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::footer_key( - std::string footer_key) { - return this->footer_key(SecureString(std::move(footer_key))); -} - FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::footer_key( SecureString footer_key) { if (footer_key.empty()) { diff --git a/cpp/src/parquet/encryption/encryption.h b/cpp/src/parquet/encryption/encryption.h index e56b39f6d27..5b40ab2b76f 100644 --- a/cpp/src/parquet/encryption/encryption.h +++ b/cpp/src/parquet/encryption/encryption.h @@ -49,22 +49,7 @@ using ColumnPathToEncryptionPropertiesMap = class PARQUET_EXPORT DecryptionKeyRetriever { public: /// \brief Retrieve a key. - /// \deprecated Deprecated since 21.0.0. - /// Implement GetKeyById(const std::string&) instead. - ARROW_DEPRECATED( - "Deprecated in 21.0.0. " - "Implement GetKeyById(const std::string&) instead.") - virtual std::string GetKey(const std::string& key_id) { - throw ParquetException("Not implemented"); - } - - /// \brief Retrieve a key by its id. - virtual ::arrow::util::SecureString GetKeyById(const std::string& key_id) { - ARROW_SUPPRESS_DEPRECATION_WARNING - auto key = ::arrow::util::SecureString(GetKey(key_id)); - ARROW_UNSUPPRESS_DEPRECATION_WARNING - return key; - } + virtual ::arrow::util::SecureString GetKey(const std::string& key_id) = 0; virtual ~DecryptionKeyRetriever() {} }; @@ -74,18 +59,16 @@ class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever { public: void PutKey(uint32_t key_id, ::arrow::util::SecureString key); - ::arrow::util::SecureString GetKeyById(const std::string& key_id_string) override { + ::arrow::util::SecureString GetKey(const std::string& key_id_string) override { // key_id_string is string but for IntegerKeyIdRetriever it encodes // a native-endian 32 bit unsigned integer key_id uint32_t key_id; assert(key_id_string.size() == sizeof(key_id)); memcpy(&key_id, key_id_string.data(), sizeof(key_id)); - return GetKeyById(key_id); + return key_map_.at(key_id); } - ::arrow::util::SecureString GetKeyById(uint32_t key_id) { return key_map_.at(key_id); } - private: std::map key_map_; }; @@ -94,7 +77,7 @@ class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever { class PARQUET_EXPORT StringKeyIdRetriever : public DecryptionKeyRetriever { public: void PutKey(std::string key_id, ::arrow::util::SecureString key); - ::arrow::util::SecureString GetKeyById(const std::string& key_id) override; + ::arrow::util::SecureString GetKey(const std::string& key_id) override; private: std::map key_map_; @@ -143,11 +126,6 @@ class PARQUET_EXPORT ColumnEncryptionProperties { /// be encrypted with the footer key. /// keyBytes Key length must be either 16, 24 or 32 bytes. /// Caller is responsible for wiping out the input key array. - /// \deprecated "Deprecated in 21.0.0. Use key(arrow::util::SecureString) instead." - ARROW_DEPRECATED("Deprecated in 21.0.0. Use key(arrow::util::SecureString) instead.") - Builder* key(std::string column_key); - - /// \copydoc key(std::string) Builder* key(::arrow::util::SecureString column_key); /// Set a key retrieval metadata. @@ -259,14 +237,6 @@ class PARQUET_EXPORT FileDecryptionProperties { /// will be wiped out (array values set to 0). /// Caller is responsible for wiping out the input key array. /// param footerKey Key length must be either 16, 24 or 32 bytes. - /// \deprecated Deprecated since 21.0.0. - /// Use footer_key(arrow::util::SecureString) instead. - ARROW_DEPRECATED( - "Deprecated in 21.0.0. " - "Use footer_key(arrow::util::SecureString) instead.") - Builder* footer_key(std::string footer_key); - - /// \copydoc footer_key(std::string footer_key) Builder* footer_key(::arrow::util::SecureString footer_key); /// Set explicit column keys (decryption properties). @@ -381,14 +351,6 @@ class PARQUET_EXPORT FileEncryptionProperties { public: class PARQUET_EXPORT Builder { public: - /// \deprecated Deprecated since 21.0.0. Use Builder(arrow::util::SecureString) - /// instead. - ARROW_DEPRECATED( - "Deprecated in 21.0.0. " - "Use Builder(arrow::util::SecureString) instead") - explicit Builder(std::string footer_key) - : Builder(::arrow::util::SecureString(std::move(footer_key))) {} - explicit Builder(::arrow::util::SecureString footer_key) : parquet_cipher_(kDefaultEncryptionAlgorithm), encrypted_footer_(kDefaultEncryptedFooter), diff --git a/cpp/src/parquet/encryption/file_key_unwrapper.cc b/cpp/src/parquet/encryption/file_key_unwrapper.cc index d7463590358..4dc1492a0b7 100644 --- a/cpp/src/parquet/encryption/file_key_unwrapper.cc +++ b/cpp/src/parquet/encryption/file_key_unwrapper.cc @@ -69,7 +69,7 @@ FileKeyUnwrapper::FileKeyUnwrapper( kms_connection_config.key_access_token(), cache_entry_lifetime_seconds_); } -SecureString FileKeyUnwrapper::GetKeyById(const std::string& key_metadata_bytes) { +SecureString FileKeyUnwrapper::GetKey(const std::string& key_metadata_bytes) { // key_metadata is expected to be in UTF8 encoding ::arrow::util::InitializeUTF8(); if (!::arrow::util::ValidateUTF8( @@ -110,7 +110,7 @@ KeyWithMasterId FileKeyUnwrapper::GetDataEncryptionKey(const KeyMaterial& key_ma SecureString data_key; if (!double_wrapping) { - data_key = kms_client->UnWrapKey(encoded_wrapped_dek, master_key_id); + data_key = kms_client->UnwrapKey(encoded_wrapped_dek, master_key_id); } else { // Get Key Encryption Key const std::string& encoded_kek_id = key_material.kek_id(); @@ -118,7 +118,7 @@ KeyWithMasterId FileKeyUnwrapper::GetDataEncryptionKey(const KeyMaterial& key_ma const SecureString kek_bytes = kek_per_kek_id_->GetOrInsert( encoded_kek_id, [kms_client, encoded_wrapped_kek, master_key_id]() { - return kms_client->UnWrapKey(encoded_wrapped_kek, master_key_id); + return kms_client->UnwrapKey(encoded_wrapped_kek, master_key_id); }); // Decrypt the data key diff --git a/cpp/src/parquet/encryption/file_key_unwrapper.h b/cpp/src/parquet/encryption/file_key_unwrapper.h index d674b5cf2ac..c86f68121c8 100644 --- a/cpp/src/parquet/encryption/file_key_unwrapper.h +++ b/cpp/src/parquet/encryption/file_key_unwrapper.h @@ -65,7 +65,7 @@ class PARQUET_EXPORT FileKeyUnwrapper : public DecryptionKeyRetriever { std::shared_ptr key_material_store); /// Get the data key from key metadata - ::arrow::util::SecureString GetKeyById(const std::string& key_metadata_bytes) override; + ::arrow::util::SecureString GetKey(const std::string& key_metadata_bytes) override; /// Get the data key along with the master key id from key material KeyWithMasterId GetDataEncryptionKey(const KeyMaterial& key_material); diff --git a/cpp/src/parquet/encryption/internal_file_decryptor.cc b/cpp/src/parquet/encryption/internal_file_decryptor.cc index efd1ec8067c..b90d3158559 100644 --- a/cpp/src/parquet/encryption/internal_file_decryptor.cc +++ b/cpp/src/parquet/encryption/internal_file_decryptor.cc @@ -78,7 +78,7 @@ const SecureString& InternalFileDecryptor::GetFooterKey() { if (properties_->key_retriever() == nullptr) throw ParquetException("No footer key or key retriever"); try { - footer_key_ = properties_->key_retriever()->GetKeyById(footer_key_metadata_); + footer_key_ = properties_->key_retriever()->GetKey(footer_key_metadata_); } catch (KeyAccessDeniedException& e) { std::stringstream ss; ss << "Footer key: access denied " << e.what() << "\n"; @@ -117,7 +117,7 @@ SecureString InternalFileDecryptor::GetColumnKey(const std::string& column_path, if (column_key.empty() && !column_key_metadata.empty() && properties_->key_retriever() != nullptr) { try { - column_key = properties_->key_retriever()->GetKeyById(column_key_metadata); + column_key = properties_->key_retriever()->GetKey(column_key_metadata); } catch (KeyAccessDeniedException& e) { std::stringstream ss; ss << "HiddenColumnException, path=" + column_path + " " << e.what() << "\n"; diff --git a/cpp/src/parquet/encryption/key_wrapping_test.cc b/cpp/src/parquet/encryption/key_wrapping_test.cc index 4ff3903fb7e..04494d8cc21 100644 --- a/cpp/src/parquet/encryption/key_wrapping_test.cc +++ b/cpp/src/parquet/encryption/key_wrapping_test.cc @@ -86,10 +86,10 @@ class KeyWrappingTest : public ::testing::Test { FileKeyUnwrapper unwrapper(&key_toolkit, kms_connection_config_, cache_entry_lifetime_seconds, readable_file_path, file_system); - SecureString footer_key = unwrapper.GetKeyById(key_metadata_json_footer); + SecureString footer_key = unwrapper.GetKey(key_metadata_json_footer); ASSERT_EQ(footer_key, kFooterEncryptionKey); - SecureString column_key = unwrapper.GetKeyById(key_metadata_json_column); + SecureString column_key = unwrapper.GetKey(key_metadata_json_column); ASSERT_EQ(column_key, kColumnEncryptionKey1); } diff --git a/cpp/src/parquet/encryption/kms_client.h b/cpp/src/parquet/encryption/kms_client.h index 0639197fb37..9c67e7cae49 100644 --- a/cpp/src/parquet/encryption/kms_client.h +++ b/cpp/src/parquet/encryption/kms_client.h @@ -84,42 +84,12 @@ class PARQUET_EXPORT KmsClient { /// /// Encrypts it with the master key, encodes the result /// and potentially adds a KMS-specific metadata. - /// - /// \deprecated Deprecated since 21.0.0. Implement - /// WrapKey(const SecureString&, const std::string&) instead. - ARROW_DEPRECATED( - "Deprecated in 21.0.0. " - "Implement WrapKey(const SecureString&, const std::string&) instead.") - virtual std::string WrapKey(const std::string& key_bytes, - const std::string& master_key_identifier) { - throw ParquetException("Not implemented"); - } - - /// \copydoc WrapKey(const std::string&, const std::string&) virtual std::string WrapKey(const ::arrow::util::SecureString& key_bytes, - const std::string& master_key_identifier) { - ARROW_SUPPRESS_DEPRECATION_WARNING - auto key = WrapKey(std::string(key_bytes.as_view()), master_key_identifier); - ARROW_UNSUPPRESS_DEPRECATION_WARNING - return key; - } + const std::string& master_key_identifier) = 0; /// \brief Decrypts (unwraps) a key with the master key. - /// \deprecated Deprecated since 21.0.0. Implement UnWrapKey instead. - ARROW_DEPRECATED("Deprecated in 21.0.0. Implement UnWrapKey instead.") - virtual std::string UnwrapKey(const std::string& wrapped_key, - const std::string& master_key_identifier) { - throw ParquetException("Not implemented"); - } - - /// \copydoc UnwrapKey(const std::string&, const std::string&) - virtual ::arrow::util::SecureString UnWrapKey( - const std::string& wrapped_key, const std::string& master_key_identifier) { - ARROW_SUPPRESS_DEPRECATION_WARNING - auto key = ::arrow::util::SecureString(UnwrapKey(wrapped_key, master_key_identifier)); - ARROW_UNSUPPRESS_DEPRECATION_WARNING - return key; - } + virtual ::arrow::util::SecureString UnwrapKey( + const std::string& wrapped_key, const std::string& master_key_identifier) = 0; virtual ~KmsClient() {} }; diff --git a/cpp/src/parquet/encryption/local_wrap_kms_client.cc b/cpp/src/parquet/encryption/local_wrap_kms_client.cc index 6bd80479a03..80543c2932a 100644 --- a/cpp/src/parquet/encryption/local_wrap_kms_client.cc +++ b/cpp/src/parquet/encryption/local_wrap_kms_client.cc @@ -84,7 +84,7 @@ std::string LocalWrapKmsClient::WrapKey(const SecureString& key_bytes, return LocalKeyWrap::CreateSerialized(encrypted_encoded_key); } -SecureString LocalWrapKmsClient::UnWrapKey(const std::string& wrapped_key, +SecureString LocalWrapKmsClient::UnwrapKey(const std::string& wrapped_key, const std::string& master_key_identifier) { LocalKeyWrap key_wrap = LocalKeyWrap::Parse(wrapped_key); const std::string& master_key_version = key_wrap.master_key_version(); diff --git a/cpp/src/parquet/encryption/local_wrap_kms_client.h b/cpp/src/parquet/encryption/local_wrap_kms_client.h index 7eedbaaf77e..607c75a4c2e 100644 --- a/cpp/src/parquet/encryption/local_wrap_kms_client.h +++ b/cpp/src/parquet/encryption/local_wrap_kms_client.h @@ -38,7 +38,7 @@ class PARQUET_EXPORT LocalWrapKmsClient : public KmsClient { std::string WrapKey(const ::arrow::util::SecureString& key_bytes, const std::string& master_key_identifier) override; - ::arrow::util::SecureString UnWrapKey( + ::arrow::util::SecureString UnwrapKey( const std::string& wrapped_key, const std::string& master_key_identifier) override; protected: diff --git a/cpp/src/parquet/encryption/properties_test.cc b/cpp/src/parquet/encryption/properties_test.cc index 3f39cc8eb64..1ceda7ac032 100644 --- a/cpp/src/parquet/encryption/properties_test.cc +++ b/cpp/src/parquet/encryption/properties_test.cc @@ -224,9 +224,9 @@ TEST(TestDecryptionProperties, UseKeyRetriever) { std::shared_ptr props = builder.build(); auto out_key_retriever = props->key_retriever(); - ASSERT_EQ(kFooterEncryptionKey, out_key_retriever->GetKeyById("kf")); - ASSERT_EQ(kColumnEncryptionKey1, out_key_retriever->GetKeyById("kc1")); - ASSERT_EQ(kColumnEncryptionKey2, out_key_retriever->GetKeyById("kc2")); + ASSERT_EQ(kFooterEncryptionKey, out_key_retriever->GetKey("kf")); + ASSERT_EQ(kColumnEncryptionKey1, out_key_retriever->GetKey("kc1")); + ASSERT_EQ(kColumnEncryptionKey2, out_key_retriever->GetKey("kc2")); } TEST(TestDecryptionProperties, SupplyAadPrefix) { diff --git a/cpp/src/parquet/encryption/test_in_memory_kms.cc b/cpp/src/parquet/encryption/test_in_memory_kms.cc index 969d6df858c..6af15d177fd 100644 --- a/cpp/src/parquet/encryption/test_in_memory_kms.cc +++ b/cpp/src/parquet/encryption/test_in_memory_kms.cc @@ -79,7 +79,7 @@ std::string TestOnlyInServerWrapKms::WrapKey(const SecureString& key_bytes, return internal::EncryptKeyLocally(key_bytes, master_key, aad); } -SecureString TestOnlyInServerWrapKms::UnWrapKey( +SecureString TestOnlyInServerWrapKms::UnwrapKey( const std::string& wrapped_key, const std::string& master_key_identifier) { if (unwrapping_master_key_map_.find(master_key_identifier) == unwrapping_master_key_map_.end()) { diff --git a/cpp/src/parquet/encryption/test_in_memory_kms.h b/cpp/src/parquet/encryption/test_in_memory_kms.h index df63984546a..b9d4169c634 100644 --- a/cpp/src/parquet/encryption/test_in_memory_kms.h +++ b/cpp/src/parquet/encryption/test_in_memory_kms.h @@ -56,7 +56,7 @@ class TestOnlyInServerWrapKms : public KmsClient { std::string WrapKey(const ::arrow::util::SecureString& key_bytes, const std::string& master_key_identifier) override; - ::arrow::util::SecureString UnWrapKey( + ::arrow::util::SecureString UnwrapKey( const std::string& wrapped_key, const std::string& master_key_identifier) override; static void StartKeyRotation( From 184afcfd6da7e0d0b97ed7c03a110c2caa83af3a Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 9 Jul 2025 14:49:37 +0200 Subject: [PATCH 41/44] Move no_key_ into encryption.cc --- cpp/src/parquet/encryption/encryption.cc | 4 ++++ cpp/src/parquet/encryption/encryption.h | 5 ----- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/cpp/src/parquet/encryption/encryption.cc b/cpp/src/parquet/encryption/encryption.cc index 88a057a2e80..aeb4d51b2e1 100644 --- a/cpp/src/parquet/encryption/encryption.cc +++ b/cpp/src/parquet/encryption/encryption.cc @@ -30,6 +30,10 @@ using ::arrow::util::SecureString; namespace parquet { +// any empty SecureString key is interpreted as if no key is given +// this instance is used when a SecureString reference is returned +const SecureString no_key_ = SecureString(); + // integer key retriever void IntegerKeyIdRetriever::PutKey(uint32_t key_id, SecureString key) { key_map_.insert({key_id, std::move(key)}); diff --git a/cpp/src/parquet/encryption/encryption.h b/cpp/src/parquet/encryption/encryption.h index 5b40ab2b76f..d822cc3c184 100644 --- a/cpp/src/parquet/encryption/encryption.h +++ b/cpp/src/parquet/encryption/encryption.h @@ -328,12 +328,7 @@ class PARQUET_EXPORT FileDecryptionProperties { ::arrow::util::SecureString footer_key_; std::string aad_prefix_; std::shared_ptr aad_prefix_verifier_; - - // any empty SecureString key is interpreted as if no key is given - // this instance is used if a SecureString reference is returned - const ::arrow::util::SecureString no_key_ = ::arrow::util::SecureString(); ColumnPathToDecryptionPropertiesMap column_decryption_properties_; - std::shared_ptr key_retriever_; bool check_plaintext_footer_integrity_; bool plaintext_files_allowed_; From edd97b877dd31e465ec14c90d3561331898cdbcc Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 9 Jul 2025 18:43:15 +0200 Subject: [PATCH 42/44] Rename no_key_ --- cpp/src/parquet/encryption/encryption.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/parquet/encryption/encryption.cc b/cpp/src/parquet/encryption/encryption.cc index aeb4d51b2e1..701b21ee7f1 100644 --- a/cpp/src/parquet/encryption/encryption.cc +++ b/cpp/src/parquet/encryption/encryption.cc @@ -32,7 +32,7 @@ namespace parquet { // any empty SecureString key is interpreted as if no key is given // this instance is used when a SecureString reference is returned -const SecureString no_key_ = SecureString(); +static SecureString kNoKey = SecureString(); // integer key retriever void IntegerKeyIdRetriever::PutKey(uint32_t key_id, SecureString key) { @@ -219,7 +219,7 @@ const SecureString& FileDecryptionProperties::column_key( return column_prop->key(); } } - return no_key_; + return kNoKey; } FileDecryptionProperties::FileDecryptionProperties( From 0a31f5044b71282e269b2b98e0b6f70dcc9534fe Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Thu, 10 Jul 2025 13:45:53 +0200 Subject: [PATCH 43/44] Revert "Remove deprecations to move users to more secure methods" This reverts commit 290449a36c4b45fc5e6be931a6a12b378d80cdf4. --- cpp/src/parquet/encryption/encryption.cc | 12 ++++- cpp/src/parquet/encryption/encryption.h | 46 +++++++++++++++++-- .../parquet/encryption/file_key_unwrapper.cc | 6 +-- .../parquet/encryption/file_key_unwrapper.h | 2 +- .../encryption/internal_file_decryptor.cc | 4 +- .../parquet/encryption/key_wrapping_test.cc | 4 +- cpp/src/parquet/encryption/kms_client.h | 36 +++++++++++++-- .../encryption/local_wrap_kms_client.cc | 2 +- .../encryption/local_wrap_kms_client.h | 2 +- cpp/src/parquet/encryption/properties_test.cc | 6 +-- .../parquet/encryption/test_in_memory_kms.cc | 2 +- .../parquet/encryption/test_in_memory_kms.h | 2 +- 12 files changed, 101 insertions(+), 23 deletions(-) diff --git a/cpp/src/parquet/encryption/encryption.cc b/cpp/src/parquet/encryption/encryption.cc index 701b21ee7f1..e156ec00446 100644 --- a/cpp/src/parquet/encryption/encryption.cc +++ b/cpp/src/parquet/encryption/encryption.cc @@ -44,10 +44,15 @@ void StringKeyIdRetriever::PutKey(std::string key_id, SecureString key) { key_map_.insert({std::move(key_id), std::move(key)}); } -SecureString StringKeyIdRetriever::GetKey(const std::string& key_id) { +SecureString StringKeyIdRetriever::GetKeyById(const std::string& key_id) { return key_map_.at(key_id); } +ColumnEncryptionProperties::Builder* ColumnEncryptionProperties::Builder::key( + std::string column_key) { + return key(SecureString(std::move(column_key))); +} + ColumnEncryptionProperties::Builder* ColumnEncryptionProperties::Builder::key( SecureString column_key) { if (column_key.empty()) return this; @@ -89,6 +94,11 @@ FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::column_key return this; } +FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::footer_key( + std::string footer_key) { + return this->footer_key(SecureString(std::move(footer_key))); +} + FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::footer_key( SecureString footer_key) { if (footer_key.empty()) { diff --git a/cpp/src/parquet/encryption/encryption.h b/cpp/src/parquet/encryption/encryption.h index d822cc3c184..b49d0746c28 100644 --- a/cpp/src/parquet/encryption/encryption.h +++ b/cpp/src/parquet/encryption/encryption.h @@ -49,7 +49,22 @@ using ColumnPathToEncryptionPropertiesMap = class PARQUET_EXPORT DecryptionKeyRetriever { public: /// \brief Retrieve a key. - virtual ::arrow::util::SecureString GetKey(const std::string& key_id) = 0; + /// \deprecated Deprecated since 21.0.0. + /// Implement GetKeyById(const std::string&) instead. + ARROW_DEPRECATED( + "Deprecated in 21.0.0. " + "Implement GetKeyById(const std::string&) instead.") + virtual std::string GetKey(const std::string& key_id) { + throw ParquetException("Not implemented"); + } + + /// \brief Retrieve a key by its id. + virtual ::arrow::util::SecureString GetKeyById(const std::string& key_id) { + ARROW_SUPPRESS_DEPRECATION_WARNING + auto key = ::arrow::util::SecureString(GetKey(key_id)); + ARROW_UNSUPPRESS_DEPRECATION_WARNING + return key; + } virtual ~DecryptionKeyRetriever() {} }; @@ -59,16 +74,18 @@ class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever { public: void PutKey(uint32_t key_id, ::arrow::util::SecureString key); - ::arrow::util::SecureString GetKey(const std::string& key_id_string) override { + ::arrow::util::SecureString GetKeyById(const std::string& key_id_string) override { // key_id_string is string but for IntegerKeyIdRetriever it encodes // a native-endian 32 bit unsigned integer key_id uint32_t key_id; assert(key_id_string.size() == sizeof(key_id)); memcpy(&key_id, key_id_string.data(), sizeof(key_id)); - return key_map_.at(key_id); + return GetKeyById(key_id); } + ::arrow::util::SecureString GetKeyById(uint32_t key_id) { return key_map_.at(key_id); } + private: std::map key_map_; }; @@ -77,7 +94,7 @@ class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever { class PARQUET_EXPORT StringKeyIdRetriever : public DecryptionKeyRetriever { public: void PutKey(std::string key_id, ::arrow::util::SecureString key); - ::arrow::util::SecureString GetKey(const std::string& key_id) override; + ::arrow::util::SecureString GetKeyById(const std::string& key_id) override; private: std::map key_map_; @@ -126,6 +143,11 @@ class PARQUET_EXPORT ColumnEncryptionProperties { /// be encrypted with the footer key. /// keyBytes Key length must be either 16, 24 or 32 bytes. /// Caller is responsible for wiping out the input key array. + /// \deprecated "Deprecated in 21.0.0. Use key(arrow::util::SecureString) instead." + ARROW_DEPRECATED("Deprecated in 21.0.0. Use key(arrow::util::SecureString) instead.") + Builder* key(std::string column_key); + + /// \copydoc key(std::string) Builder* key(::arrow::util::SecureString column_key); /// Set a key retrieval metadata. @@ -237,6 +259,14 @@ class PARQUET_EXPORT FileDecryptionProperties { /// will be wiped out (array values set to 0). /// Caller is responsible for wiping out the input key array. /// param footerKey Key length must be either 16, 24 or 32 bytes. + /// \deprecated Deprecated since 21.0.0. + /// Use footer_key(arrow::util::SecureString) instead. + ARROW_DEPRECATED( + "Deprecated in 21.0.0. " + "Use footer_key(arrow::util::SecureString) instead.") + Builder* footer_key(std::string footer_key); + + /// \copydoc footer_key(std::string footer_key) Builder* footer_key(::arrow::util::SecureString footer_key); /// Set explicit column keys (decryption properties). @@ -346,6 +376,14 @@ class PARQUET_EXPORT FileEncryptionProperties { public: class PARQUET_EXPORT Builder { public: + /// \deprecated Deprecated since 21.0.0. Use Builder(arrow::util::SecureString) + /// instead. + ARROW_DEPRECATED( + "Deprecated in 21.0.0. " + "Use Builder(arrow::util::SecureString) instead") + explicit Builder(std::string footer_key) + : Builder(::arrow::util::SecureString(std::move(footer_key))) {} + explicit Builder(::arrow::util::SecureString footer_key) : parquet_cipher_(kDefaultEncryptionAlgorithm), encrypted_footer_(kDefaultEncryptedFooter), diff --git a/cpp/src/parquet/encryption/file_key_unwrapper.cc b/cpp/src/parquet/encryption/file_key_unwrapper.cc index 4dc1492a0b7..d7463590358 100644 --- a/cpp/src/parquet/encryption/file_key_unwrapper.cc +++ b/cpp/src/parquet/encryption/file_key_unwrapper.cc @@ -69,7 +69,7 @@ FileKeyUnwrapper::FileKeyUnwrapper( kms_connection_config.key_access_token(), cache_entry_lifetime_seconds_); } -SecureString FileKeyUnwrapper::GetKey(const std::string& key_metadata_bytes) { +SecureString FileKeyUnwrapper::GetKeyById(const std::string& key_metadata_bytes) { // key_metadata is expected to be in UTF8 encoding ::arrow::util::InitializeUTF8(); if (!::arrow::util::ValidateUTF8( @@ -110,7 +110,7 @@ KeyWithMasterId FileKeyUnwrapper::GetDataEncryptionKey(const KeyMaterial& key_ma SecureString data_key; if (!double_wrapping) { - data_key = kms_client->UnwrapKey(encoded_wrapped_dek, master_key_id); + data_key = kms_client->UnWrapKey(encoded_wrapped_dek, master_key_id); } else { // Get Key Encryption Key const std::string& encoded_kek_id = key_material.kek_id(); @@ -118,7 +118,7 @@ KeyWithMasterId FileKeyUnwrapper::GetDataEncryptionKey(const KeyMaterial& key_ma const SecureString kek_bytes = kek_per_kek_id_->GetOrInsert( encoded_kek_id, [kms_client, encoded_wrapped_kek, master_key_id]() { - return kms_client->UnwrapKey(encoded_wrapped_kek, master_key_id); + return kms_client->UnWrapKey(encoded_wrapped_kek, master_key_id); }); // Decrypt the data key diff --git a/cpp/src/parquet/encryption/file_key_unwrapper.h b/cpp/src/parquet/encryption/file_key_unwrapper.h index c86f68121c8..d674b5cf2ac 100644 --- a/cpp/src/parquet/encryption/file_key_unwrapper.h +++ b/cpp/src/parquet/encryption/file_key_unwrapper.h @@ -65,7 +65,7 @@ class PARQUET_EXPORT FileKeyUnwrapper : public DecryptionKeyRetriever { std::shared_ptr key_material_store); /// Get the data key from key metadata - ::arrow::util::SecureString GetKey(const std::string& key_metadata_bytes) override; + ::arrow::util::SecureString GetKeyById(const std::string& key_metadata_bytes) override; /// Get the data key along with the master key id from key material KeyWithMasterId GetDataEncryptionKey(const KeyMaterial& key_material); diff --git a/cpp/src/parquet/encryption/internal_file_decryptor.cc b/cpp/src/parquet/encryption/internal_file_decryptor.cc index b90d3158559..efd1ec8067c 100644 --- a/cpp/src/parquet/encryption/internal_file_decryptor.cc +++ b/cpp/src/parquet/encryption/internal_file_decryptor.cc @@ -78,7 +78,7 @@ const SecureString& InternalFileDecryptor::GetFooterKey() { if (properties_->key_retriever() == nullptr) throw ParquetException("No footer key or key retriever"); try { - footer_key_ = properties_->key_retriever()->GetKey(footer_key_metadata_); + footer_key_ = properties_->key_retriever()->GetKeyById(footer_key_metadata_); } catch (KeyAccessDeniedException& e) { std::stringstream ss; ss << "Footer key: access denied " << e.what() << "\n"; @@ -117,7 +117,7 @@ SecureString InternalFileDecryptor::GetColumnKey(const std::string& column_path, if (column_key.empty() && !column_key_metadata.empty() && properties_->key_retriever() != nullptr) { try { - column_key = properties_->key_retriever()->GetKey(column_key_metadata); + column_key = properties_->key_retriever()->GetKeyById(column_key_metadata); } catch (KeyAccessDeniedException& e) { std::stringstream ss; ss << "HiddenColumnException, path=" + column_path + " " << e.what() << "\n"; diff --git a/cpp/src/parquet/encryption/key_wrapping_test.cc b/cpp/src/parquet/encryption/key_wrapping_test.cc index 04494d8cc21..4ff3903fb7e 100644 --- a/cpp/src/parquet/encryption/key_wrapping_test.cc +++ b/cpp/src/parquet/encryption/key_wrapping_test.cc @@ -86,10 +86,10 @@ class KeyWrappingTest : public ::testing::Test { FileKeyUnwrapper unwrapper(&key_toolkit, kms_connection_config_, cache_entry_lifetime_seconds, readable_file_path, file_system); - SecureString footer_key = unwrapper.GetKey(key_metadata_json_footer); + SecureString footer_key = unwrapper.GetKeyById(key_metadata_json_footer); ASSERT_EQ(footer_key, kFooterEncryptionKey); - SecureString column_key = unwrapper.GetKey(key_metadata_json_column); + SecureString column_key = unwrapper.GetKeyById(key_metadata_json_column); ASSERT_EQ(column_key, kColumnEncryptionKey1); } diff --git a/cpp/src/parquet/encryption/kms_client.h b/cpp/src/parquet/encryption/kms_client.h index 9c67e7cae49..0639197fb37 100644 --- a/cpp/src/parquet/encryption/kms_client.h +++ b/cpp/src/parquet/encryption/kms_client.h @@ -84,12 +84,42 @@ class PARQUET_EXPORT KmsClient { /// /// Encrypts it with the master key, encodes the result /// and potentially adds a KMS-specific metadata. + /// + /// \deprecated Deprecated since 21.0.0. Implement + /// WrapKey(const SecureString&, const std::string&) instead. + ARROW_DEPRECATED( + "Deprecated in 21.0.0. " + "Implement WrapKey(const SecureString&, const std::string&) instead.") + virtual std::string WrapKey(const std::string& key_bytes, + const std::string& master_key_identifier) { + throw ParquetException("Not implemented"); + } + + /// \copydoc WrapKey(const std::string&, const std::string&) virtual std::string WrapKey(const ::arrow::util::SecureString& key_bytes, - const std::string& master_key_identifier) = 0; + const std::string& master_key_identifier) { + ARROW_SUPPRESS_DEPRECATION_WARNING + auto key = WrapKey(std::string(key_bytes.as_view()), master_key_identifier); + ARROW_UNSUPPRESS_DEPRECATION_WARNING + return key; + } /// \brief Decrypts (unwraps) a key with the master key. - virtual ::arrow::util::SecureString UnwrapKey( - const std::string& wrapped_key, const std::string& master_key_identifier) = 0; + /// \deprecated Deprecated since 21.0.0. Implement UnWrapKey instead. + ARROW_DEPRECATED("Deprecated in 21.0.0. Implement UnWrapKey instead.") + virtual std::string UnwrapKey(const std::string& wrapped_key, + const std::string& master_key_identifier) { + throw ParquetException("Not implemented"); + } + + /// \copydoc UnwrapKey(const std::string&, const std::string&) + virtual ::arrow::util::SecureString UnWrapKey( + const std::string& wrapped_key, const std::string& master_key_identifier) { + ARROW_SUPPRESS_DEPRECATION_WARNING + auto key = ::arrow::util::SecureString(UnwrapKey(wrapped_key, master_key_identifier)); + ARROW_UNSUPPRESS_DEPRECATION_WARNING + return key; + } virtual ~KmsClient() {} }; diff --git a/cpp/src/parquet/encryption/local_wrap_kms_client.cc b/cpp/src/parquet/encryption/local_wrap_kms_client.cc index 80543c2932a..6bd80479a03 100644 --- a/cpp/src/parquet/encryption/local_wrap_kms_client.cc +++ b/cpp/src/parquet/encryption/local_wrap_kms_client.cc @@ -84,7 +84,7 @@ std::string LocalWrapKmsClient::WrapKey(const SecureString& key_bytes, return LocalKeyWrap::CreateSerialized(encrypted_encoded_key); } -SecureString LocalWrapKmsClient::UnwrapKey(const std::string& wrapped_key, +SecureString LocalWrapKmsClient::UnWrapKey(const std::string& wrapped_key, const std::string& master_key_identifier) { LocalKeyWrap key_wrap = LocalKeyWrap::Parse(wrapped_key); const std::string& master_key_version = key_wrap.master_key_version(); diff --git a/cpp/src/parquet/encryption/local_wrap_kms_client.h b/cpp/src/parquet/encryption/local_wrap_kms_client.h index 607c75a4c2e..7eedbaaf77e 100644 --- a/cpp/src/parquet/encryption/local_wrap_kms_client.h +++ b/cpp/src/parquet/encryption/local_wrap_kms_client.h @@ -38,7 +38,7 @@ class PARQUET_EXPORT LocalWrapKmsClient : public KmsClient { std::string WrapKey(const ::arrow::util::SecureString& key_bytes, const std::string& master_key_identifier) override; - ::arrow::util::SecureString UnwrapKey( + ::arrow::util::SecureString UnWrapKey( const std::string& wrapped_key, const std::string& master_key_identifier) override; protected: diff --git a/cpp/src/parquet/encryption/properties_test.cc b/cpp/src/parquet/encryption/properties_test.cc index 1ceda7ac032..3f39cc8eb64 100644 --- a/cpp/src/parquet/encryption/properties_test.cc +++ b/cpp/src/parquet/encryption/properties_test.cc @@ -224,9 +224,9 @@ TEST(TestDecryptionProperties, UseKeyRetriever) { std::shared_ptr props = builder.build(); auto out_key_retriever = props->key_retriever(); - ASSERT_EQ(kFooterEncryptionKey, out_key_retriever->GetKey("kf")); - ASSERT_EQ(kColumnEncryptionKey1, out_key_retriever->GetKey("kc1")); - ASSERT_EQ(kColumnEncryptionKey2, out_key_retriever->GetKey("kc2")); + ASSERT_EQ(kFooterEncryptionKey, out_key_retriever->GetKeyById("kf")); + ASSERT_EQ(kColumnEncryptionKey1, out_key_retriever->GetKeyById("kc1")); + ASSERT_EQ(kColumnEncryptionKey2, out_key_retriever->GetKeyById("kc2")); } TEST(TestDecryptionProperties, SupplyAadPrefix) { diff --git a/cpp/src/parquet/encryption/test_in_memory_kms.cc b/cpp/src/parquet/encryption/test_in_memory_kms.cc index 6af15d177fd..969d6df858c 100644 --- a/cpp/src/parquet/encryption/test_in_memory_kms.cc +++ b/cpp/src/parquet/encryption/test_in_memory_kms.cc @@ -79,7 +79,7 @@ std::string TestOnlyInServerWrapKms::WrapKey(const SecureString& key_bytes, return internal::EncryptKeyLocally(key_bytes, master_key, aad); } -SecureString TestOnlyInServerWrapKms::UnwrapKey( +SecureString TestOnlyInServerWrapKms::UnWrapKey( const std::string& wrapped_key, const std::string& master_key_identifier) { if (unwrapping_master_key_map_.find(master_key_identifier) == unwrapping_master_key_map_.end()) { diff --git a/cpp/src/parquet/encryption/test_in_memory_kms.h b/cpp/src/parquet/encryption/test_in_memory_kms.h index b9d4169c634..df63984546a 100644 --- a/cpp/src/parquet/encryption/test_in_memory_kms.h +++ b/cpp/src/parquet/encryption/test_in_memory_kms.h @@ -56,7 +56,7 @@ class TestOnlyInServerWrapKms : public KmsClient { std::string WrapKey(const ::arrow::util::SecureString& key_bytes, const std::string& master_key_identifier) override; - ::arrow::util::SecureString UnwrapKey( + ::arrow::util::SecureString UnWrapKey( const std::string& wrapped_key, const std::string& master_key_identifier) override; static void StartKeyRotation( From 04b15e2bfc94970670605e3fe21bc45680b29b67 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 15 Jul 2025 17:11:55 +0200 Subject: [PATCH 44/44] Update deprecation notices --- cpp/src/parquet/encryption/encryption.h | 16 ++++++++-------- cpp/src/parquet/encryption/kms_client.h | 8 ++++---- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/cpp/src/parquet/encryption/encryption.h b/cpp/src/parquet/encryption/encryption.h index b49d0746c28..8d7bb9489af 100644 --- a/cpp/src/parquet/encryption/encryption.h +++ b/cpp/src/parquet/encryption/encryption.h @@ -49,10 +49,10 @@ using ColumnPathToEncryptionPropertiesMap = class PARQUET_EXPORT DecryptionKeyRetriever { public: /// \brief Retrieve a key. - /// \deprecated Deprecated since 21.0.0. + /// \deprecated Deprecated since 22.0.0. /// Implement GetKeyById(const std::string&) instead. ARROW_DEPRECATED( - "Deprecated in 21.0.0. " + "Deprecated in 22.0.0. " "Implement GetKeyById(const std::string&) instead.") virtual std::string GetKey(const std::string& key_id) { throw ParquetException("Not implemented"); @@ -143,8 +143,8 @@ class PARQUET_EXPORT ColumnEncryptionProperties { /// be encrypted with the footer key. /// keyBytes Key length must be either 16, 24 or 32 bytes. /// Caller is responsible for wiping out the input key array. - /// \deprecated "Deprecated in 21.0.0. Use key(arrow::util::SecureString) instead." - ARROW_DEPRECATED("Deprecated in 21.0.0. Use key(arrow::util::SecureString) instead.") + /// \deprecated "Deprecated in 22.0.0. Use key(arrow::util::SecureString) instead." + ARROW_DEPRECATED("Deprecated in 22.0.0. Use key(arrow::util::SecureString) instead.") Builder* key(std::string column_key); /// \copydoc key(std::string) @@ -259,10 +259,10 @@ class PARQUET_EXPORT FileDecryptionProperties { /// will be wiped out (array values set to 0). /// Caller is responsible for wiping out the input key array. /// param footerKey Key length must be either 16, 24 or 32 bytes. - /// \deprecated Deprecated since 21.0.0. + /// \deprecated Deprecated since 22.0.0. /// Use footer_key(arrow::util::SecureString) instead. ARROW_DEPRECATED( - "Deprecated in 21.0.0. " + "Deprecated in 22.0.0. " "Use footer_key(arrow::util::SecureString) instead.") Builder* footer_key(std::string footer_key); @@ -376,10 +376,10 @@ class PARQUET_EXPORT FileEncryptionProperties { public: class PARQUET_EXPORT Builder { public: - /// \deprecated Deprecated since 21.0.0. Use Builder(arrow::util::SecureString) + /// \deprecated Deprecated since 22.0.0. Use Builder(arrow::util::SecureString) /// instead. ARROW_DEPRECATED( - "Deprecated in 21.0.0. " + "Deprecated in 22.0.0. " "Use Builder(arrow::util::SecureString) instead") explicit Builder(std::string footer_key) : Builder(::arrow::util::SecureString(std::move(footer_key))) {} diff --git a/cpp/src/parquet/encryption/kms_client.h b/cpp/src/parquet/encryption/kms_client.h index 0639197fb37..09133439da3 100644 --- a/cpp/src/parquet/encryption/kms_client.h +++ b/cpp/src/parquet/encryption/kms_client.h @@ -85,10 +85,10 @@ class PARQUET_EXPORT KmsClient { /// Encrypts it with the master key, encodes the result /// and potentially adds a KMS-specific metadata. /// - /// \deprecated Deprecated since 21.0.0. Implement + /// \deprecated Deprecated since 22.0.0. Implement /// WrapKey(const SecureString&, const std::string&) instead. ARROW_DEPRECATED( - "Deprecated in 21.0.0. " + "Deprecated in 22.0.0. " "Implement WrapKey(const SecureString&, const std::string&) instead.") virtual std::string WrapKey(const std::string& key_bytes, const std::string& master_key_identifier) { @@ -105,8 +105,8 @@ class PARQUET_EXPORT KmsClient { } /// \brief Decrypts (unwraps) a key with the master key. - /// \deprecated Deprecated since 21.0.0. Implement UnWrapKey instead. - ARROW_DEPRECATED("Deprecated in 21.0.0. Implement UnWrapKey instead.") + /// \deprecated Deprecated since 22.0.0. Implement UnWrapKey instead. + ARROW_DEPRECATED("Deprecated in 22.0.0. Implement UnWrapKey instead.") virtual std::string UnwrapKey(const std::string& wrapped_key, const std::string& master_key_identifier) { throw ParquetException("Not implemented");